From 1b6d9bd3a5ac7f2910c1acbb801e02fc4bf0f0e2 Mon Sep 17 00:00:00 2001 From: Nicholas Ade <90573287+Nadir-Lafayette@users.noreply.github.com> Date: Wed, 12 Apr 2023 15:01:09 -0400 Subject: [PATCH 001/407] Making the bfloat files --- sim/common/bfloat.cpp | 0 sim/common/bfloat.hpp | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 sim/common/bfloat.cpp create mode 100644 sim/common/bfloat.hpp diff --git a/sim/common/bfloat.cpp b/sim/common/bfloat.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sim/common/bfloat.hpp b/sim/common/bfloat.hpp new file mode 100644 index 0000000000..e69de29bb2 From afa9e4003c6800d3c9a842d40ca10bcf2fd3d8ad Mon Sep 17 00:00:00 2001 From: Nicholas Ade <90573287+Nadir-Lafayette@users.noreply.github.com> Date: Thu, 13 Apr 2023 04:20:23 -0400 Subject: [PATCH 002/407] adding mul and divide to bfloat --- sim/common/bfloat.cpp | 221 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) diff --git a/sim/common/bfloat.cpp b/sim/common/bfloat.cpp index e69de29bb2..e44f81b8b9 100644 --- a/sim/common/bfloat.cpp +++ b/sim/common/bfloat.cpp @@ -0,0 +1,221 @@ +#include +#include + +#include +#include + +// get float "in-memory" to exploit iee754 binary representation of floating point values +// use a u to trick compiler into letting you access float's bits directly +// bitwise operations cannot be done directly on iee754 representations per compiler settings +// ordering of the fields is important here +class MyFloat +{ +private: + void printBinary(int n, int i) + { + // Prints the binary representation + // of a number n up to i-bits. + int k; + for (k = i - 1; k >= 0; k--) + { + + if ((n >> k) & 1) + std::cout << "1"; + else + std::cout << "0"; + } + } + +public: + union BFloat_t + { + float f; + int i; + struct + { + uint32_t dead : 16; // don't use these, just place-holders + uint32_t mantissa : 7; // Mantissa (fractional part) of the number + uint32_t exponent : 8; // Exponent (power of 2) of the number + uint32_t sign : 1; + } parts; + }; + + void printBFloat(BFloat_t b) + { + std::cout << b.parts.sign << " | "; + printBinary(b.parts.exponent, 8); + std::cout << " | "; + printBinary(b.parts.mantissa, 7); + std::cout << std::endl; + } + + BFloat_t in_mem; + + MyFloat(float x) + { + in_mem.f = x; + printBFloat(in_mem); + } + + MyFloat(uint8_t mantissa, uint8_t exponent, bool sign) + { + in_mem.parts.mantissa = mantissa & 0x7F; + in_mem.parts.exponent = exponent; + in_mem.parts.sign = (int)sign; + + std::cout << "inside constructor" << std::endl; + std::cout << "bfloat:" << in_mem.f << std::endl; + printBFloat(in_mem); + } + + friend MyFloat operator+(const MyFloat &a, const MyFloat &b) + { + // get fields + bool a_sign = (bool)a.in_mem.parts.sign; + uint8_t a_exp = a.in_mem.parts.exponent - 127; + uint8_t a_mantissa = a.in_mem.parts.mantissa | 0x80; // add in the implicit bit + + bool b_sign = (bool)b.in_mem.parts.sign; + uint8_t b_exp = b.in_mem.parts.exponent - 127; + uint8_t b_mantissa = b.in_mem.parts.mantissa | 0x80; // add in the implicit bit + + // align mantissas by shifting the smaller exponent to the larger exponent + if (a_exp < b_exp) + { + a_mantissa >>= (b_exp - a_exp); + a_exp = b_exp; + } + else + { + b_mantissa >>= (a_exp - b_exp); + b_exp = a_exp; + } + + // add mantissas and adjust exponent if necessary + int sum_mantissa = a_mantissa + b_mantissa; + if (sum_mantissa & 0x100) + { // this val check might be wrong + sum_mantissa >>= 1; + a_exp++; + } + + // build binary representation of result + return MyFloat(sum_mantissa, a_exp, a_sign); + } + + friend MyFloat operator*(const MyFloat &a, const MyFloat &b) + { + uint16_t a_exp = a.in_mem.parts.exponent; + uint16_t b_exp = b.in_mem.parts.exponent; + uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit + uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bi + + std::bitset<8> bits(a_exp); + std::cout << "Binary a exp: " << bits << std::endl; + + bool product_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; + + if (a_exp == 0xFF || b_exp == 0xff) + { + return MyFloat(0, 0xFF, product_sign); + } + // Multiply mantissas + uint32_t product_mantissa = static_cast(a_mantissa) * static_cast(b_mantissa); + + // Add exponents + int product_exp = a_exp + b_exp - 127; + + product_mantissa = (product_mantissa + 0x40) >> 7; + + // Round to nearest even (round half to even) + if ((product_mantissa & 0x7F) == 0x40 && (product_mantissa & 0x1) != 0) + { + product_mantissa++; + } + if (product_mantissa & 0x0100) + { // Check if the implicit bit shifted to the left + product_mantissa >>= 1; + product_exp++; + } + else + { + product_mantissa &= 0x7F; // Remove the implicit bit + } + return MyFloat(product_mantissa, product_exp, product_sign); + } + + friend MyFloat operator/(const MyFloat &a, const MyFloat &b) + { + uint16_t a_exp = a.in_mem.parts.exponent; + uint16_t b_exp = b.in_mem.parts.exponent; + std::bitset<8> bits(b_exp); + std::cout << "Binary b exp: " << bits << std::endl; + uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit + uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bit + + bool quotient_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; + + // Check if divisor is zero + if (b_exp == 0 && b_mantissa == 0) + { + std::cout << "HERE" << std::endl; + return MyFloat(0, 0xFF, quotient_sign); // Return infinity with the appropriate sign + } + + // Check for infinity or zero in dividend + if (a_exp == 0xFF || a_exp == 0) + { + return MyFloat(0, a_exp, quotient_sign); + } + + // Subtract exponents + int quotient_exp = a_exp - b_exp + 127; + + // Divide mantissas + uint32_t quotient_mantissa = (static_cast(a_mantissa) << 8) / static_cast(b_mantissa); + + quotient_mantissa = (quotient_mantissa + 0x40) >> 8; + + // Round to nearest even (round half to even) + if ((quotient_mantissa & 0x1) != 0 && (quotient_mantissa & 0x7F) == 0x40) + { + quotient_mantissa--; + } + else if ((quotient_mantissa & 0x7F) == 0x40) + { + quotient_mantissa++; + } + + if (quotient_mantissa & 0x0100) + { // Check if the implicit bit shifted to the left + quotient_mantissa >>= 1; + quotient_exp++; + } + else + { + quotient_mantissa &= 0x7F; // Remove the implicit bit + } + return MyFloat(quotient_mantissa, quotient_exp, quotient_sign); + } +}; + +int main() +{ + float a = 8; + float b = 0; + std::cout << a << std::endl; + + std::bitset bits(*reinterpret_cast(&a)); + std::cout << "Binary representation of " << a << " is \n" + << bits << std::endl; + std::cout << "Binary representation of " << b << " is \n" + << bits << std::endl; + + MyFloat bfloat_version_of_a(a); + MyFloat bfloat_version_of_b(b); + MyFloat c = bfloat_version_of_a / bfloat_version_of_b; + + // You can now print the result stored in c or perform other operations with it. + + return 0; +} From 99c6a1af5a58cbd915f2a0ed9fe279d77d99498a Mon Sep 17 00:00:00 2001 From: Varsha Singhania Date: Mon, 17 Jun 2024 04:28:51 -0400 Subject: [PATCH 003/407] Tensor cores in Vortex --- ci/blackbox.sh | 12 +- hw/rtl/VX_config.vh | 18 ++ hw/rtl/VX_types.vh | 3 + kernel/include/vx_intrinsics.h | 20 ++ runtime/include/vortex.h | 2 + runtime/simx/vortex.cpp | 8 +- sim/simx/arch.h | 15 +- sim/simx/core.cpp | 4 +- sim/simx/core.h | 1 + sim/simx/decode.cpp | 19 ++ sim/simx/emulator.cpp | 18 +- sim/simx/emulator.h | 4 + sim/simx/execute.cpp | 179 +++++++++++++++ sim/simx/func_unit.cpp | 91 ++++++-- sim/simx/func_unit.h | 9 + sim/simx/instr.h | 2 +- sim/simx/instr_trace.h | 1 + sim/simx/main.cpp | 4 +- sim/simx/types.h | 23 +- tests/regression/matmul/Makefile | 14 ++ tests/regression/matmul/common.h | 17 ++ tests/regression/matmul/kernel.cpp | 124 +++++++++++ tests/regression/matmul/main.cpp | 343 +++++++++++++++++++++++++++++ 23 files changed, 899 insertions(+), 32 deletions(-) create mode 100644 tests/regression/matmul/Makefile create mode 100644 tests/regression/matmul/common.h create mode 100644 tests/regression/matmul/kernel.cpp create mode 100644 tests/regression/matmul/main.cpp diff --git a/ci/blackbox.sh b/ci/blackbox.sh index fe94677aa2..8a04133f97 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -48,6 +48,8 @@ PERF_CLASS=0 REBUILD=2 TEMPBUILD=0 LOGFILE=run.log +TC_SIZE=567 +TC_NUM=123 for i in "$@" do @@ -112,6 +114,14 @@ case $i in LOGFILE=${i#*=} shift ;; + --tc_size=*) + TC_SIZE=${i#*=} + shift + ;; + --tc_num=*) + TC_NUM=${i#*=} + shift + ;; --help) show_help exit 0 @@ -180,7 +190,7 @@ then fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" - +CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" echo "CONFIGS=$CONFIGS" if [ $REBUILD -ne 0 ] diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 7fc8d14641..651234768c 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -111,6 +111,24 @@ `endif `define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE) +`ifndef TC_SIZE +`define TC_SIZE 4 +`endif + +`ifndef TC_NUM +`define TC_NUM 1 +`endif + +// Number of TCU units +`ifndef NUM_TCU_LANES +`define NUM_TCU_LANES `TC_NUM +`endif + +// Number of TCU units +`ifndef NUM_TCU_BLOCKS +`define NUM_TCU_BLOCKS `ISSUE_WIDTH +`endif + `ifdef L2_ENABLE `define L2_ENABLED 1 `else diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index e744a26f99..06929b0587 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -196,4 +196,7 @@ `define VX_CSR_NUM_CORES 12'hFC2 `define VX_CSR_LOCAL_MEM_BASE 12'hFC3 +`define VX_MAT_MUL_SIZE 12'hFC4 + + `endif // VX_TYPES_VH diff --git a/kernel/include/vx_intrinsics.h b/kernel/include/vx_intrinsics.h index 6000065e98..b67a770da1 100644 --- a/kernel/include/vx_intrinsics.h +++ b/kernel/include/vx_intrinsics.h @@ -221,6 +221,26 @@ inline void vx_fence() { __asm__ volatile ("fence iorw, iorw"); } +//Matrix load +//Converted instruction type cause destination registers were not getiing blocked otherwise +inline void mload(unsigned dest, unsigned addr) +{ + asm volatile (".insn i 0x7b, 0, x0, %0(%1)" :: "i"(dest), "r"(addr)); +} + +//mat store +inline void ms(unsigned addr) +{ + asm volatile (".insn i 0x7b, 1, x0, 0(%0)" :: "r"(addr)); +} + +//mat mul +//num tiles along reduced K dimension of matmul as imm value (can use rd,rs field to expand range of n_tiles from 12 bits) +inline void mm() +{ + asm volatile (".insn i 0x7b, 2, x0, 0(x0)"); +} + #ifdef __cplusplus } #endif diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index c9dd6ec365..f1a412b81b 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -34,6 +34,8 @@ typedef void* vx_buffer_h; #define VX_CAPS_GLOBAL_MEM_SIZE 0x5 #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 +#define VX_CAPS_TC_SIZE 0x8 +#define VX_CAPS_TC_NUM 0x9 // device isa flags #define VX_ISA_STD_A (1ull << 0) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 89856f3a0c..f65d7b385d 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -32,7 +32,7 @@ using namespace vortex; class vx_device { public: vx_device() - : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) + : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES, TC_SIZE, TC_NUM) , ram_(0, RAM_PAGE_SIZE) , processor_(arch_) , global_mem_(ALLOC_BASE_ADDR, @@ -69,6 +69,12 @@ class vx_device { case VX_CAPS_NUM_CORES: _value = NUM_CORES * NUM_CLUSTERS; break; + case VX_CAPS_TC_SIZE: + _value = TC_SIZE; + break; + case VX_CAPS_TC_NUM: + _value = TC_NUM; + break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; diff --git a/sim/simx/arch.h b/sim/simx/arch.h index 2507bf28fa..e35687dbd9 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -35,9 +35,11 @@ class Arch { uint16_t num_barriers_; uint16_t ipdom_size_; uint64_t local_mem_base_; + uint16_t tc_size_; + uint16_t tc_num_; public: - Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores) + Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores, uint64_t tc_size, uint64_t tc_num) : num_threads_(num_threads) , num_warps_(num_warps) , num_cores_(num_cores) @@ -49,6 +51,8 @@ class Arch { , num_barriers_(NUM_BARRIERS) , ipdom_size_((num_threads-1) * 2) , local_mem_base_(LMEM_BASE_ADDR) + , tc_size_ (tc_size) + , tc_num_ (tc_num) {} uint16_t vsize() const { @@ -94,6 +98,15 @@ class Arch { uint16_t socket_size() const { return socket_size_; } + + uint16_t tc_size() const { + return tc_size_; + } + + uint16_t tc_num() const { + return tc_num_; + } + }; } \ No newline at end of file diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 0bd72524dd..7020cf8ffb 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -105,12 +105,14 @@ Core::Core(const SimContext& ctx, dispatchers_.at((int)FUType::FPU) = SimPlatform::instance().create_object(arch, 2, NUM_FPU_BLOCKS, NUM_FPU_LANES); dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object(arch, 2, NUM_LSU_BLOCKS, NUM_LSU_LANES); dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object(arch, 2, NUM_SFU_BLOCKS, NUM_SFU_LANES); - + dispatchers_.at((int)FUType::TCU) = SimPlatform::instance().create_object(arch, 2, NUM_TCU_BLOCKS, NUM_TCU_LANES); + // initialize execute units func_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::LSU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::SFU) = SimPlatform::instance().create_object(this); + func_units_.at((int)FUType::TCU) = SimPlatform::instance().create_object(this); // bind commit arbiters for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { diff --git a/sim/simx/core.h b/sim/simx/core.h index cc0e46c8cc..0b82de84ad 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -170,6 +170,7 @@ class Core : public SimObject { friend class AluUnit; friend class FpuUnit; friend class SfuUnit; + friend class TcuUnit; }; } // namespace vortex diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index f934524c3a..4d8d0a1054 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -51,6 +51,7 @@ static const std::unordered_map sc_instTable = { {Opcode::EXT2, InstType::R4}, {Opcode::R_W, InstType::R}, {Opcode::I_W, InstType::I}, + {Opcode::TCU, InstType::I}, }; enum Constants { @@ -405,6 +406,16 @@ static const char* op_string(const Instr &instr) { default: std::abort(); } + + case Opcode::TCU: + switch(func3) + { + case 0: return "ML"; // + case 1: return "MS"; // + case 2: return "MATMUL"; + default: + std::abort(); + } default: std::abort(); } @@ -543,6 +554,14 @@ std::shared_ptr Emulator::decode(uint32_t code) const { case InstType::I: { switch (op) { + case Opcode::TCU: { + instr->setDestReg(rs1, RegType::Integer); + instr->addSrcReg(rs1, RegType::Integer); + instr->setFunc3(func3); + instr->setFunc7(func7); + auto imm = code >> shift_rs2; + instr->setImm(sext(imm, width_i_imm)); + } break; case Opcode::I: case Opcode::I_W: case Opcode::JALR: diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 5850bfd563..ea5f72c429 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,6 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) + , scratchpad(std::vector(core->arch().tc_size() * core->arch().tc_size() * 32768)) //Fix this { this->clear(); } @@ -110,6 +111,11 @@ void Emulator::clear() { active_warps_.set(0); warps_[0].tmask.set(0); wspawn_.valid = false; + + for (auto& reg : scratchpad) + { + reg = 0; + } } void Emulator::attach_ram(RAM* ram) { @@ -344,6 +350,11 @@ void Emulator::cout_flush() { case (addr + (VX_CSR_MPM_BASE_H-VX_CSR_MPM_BASE)) : return ((value >> 32) & 0xFFFFFFFF) #endif +Word Emulator::get_tiles() +{ + return mat_size; +} + Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { @@ -375,6 +386,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_NUM_CORES: return uint32_t(arch_.num_cores()) * arch_.num_clusters(); case VX_CSR_LOCAL_MEM_BASE: return arch_.local_mem_base(); case VX_CSR_MSCRATCH: return csr_mscratch_; + case VX_MAT_MUL_SIZE: return mat_size; + CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles); CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs); default: @@ -484,6 +497,9 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { case VX_CSR_MNSTATUS: case VX_CSR_MCAUSE: break; + case VX_MAT_MUL_SIZE: + mat_size = value; + break; default: { std::cout << std::hex << "Error: invalid CSR write addr=0x" << addr << ", value=0x" << value << std::endl; std::abort(); @@ -500,4 +516,4 @@ void Emulator::update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid) { this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, tid, wid) | fflags, tid, wid); this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, tid, wid) | fflags, tid, wid); } -} \ No newline at end of file +} diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 81dcecd832..82b5bc98bb 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -53,6 +53,8 @@ class Emulator { bool wspawn(uint32_t num_warps, Word nextPC); int get_exitcode() const; + + Word get_tiles(); private: @@ -121,6 +123,8 @@ class Emulator { MemoryUnit mmu_; Word csr_mscratch_; wspawn_t wspawn_; + std::vector scratchpad; + uint32_t mat_size; }; } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index e0fc2b94a2..d522145db0 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -25,6 +25,7 @@ #include "emulator.h" #include "instr.h" #include "core.h" +#include "VX_types.h" using namespace vortex; @@ -1414,6 +1415,184 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::abort(); } } break; + case Opcode::TCU: + { //TODO - make it data-type flexible + uint32_t mem_bytes = 1; + DP(3, "mem_bytes=" << mem_bytes << std::endl); + uint16_t tc_size = core_->arch().tc_size(); + uint32_t TC_per_warp = core_->arch().tc_num(); + + //Number of loads - dependant on the thread config + uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value + int num_data_per_thread; + int num_data_per_thread_st; + int num_threads_actv; + int num_threads_actv_st; + uint32_t data_bytes_load; + uint32_t data_bytes_store; + uint32_t num_threads_per_tc = MAX (1, num_threads/TC_per_warp); + + //int num_warps = MIN() + //int active_tcs = MIN (TC_per_warp, num_output_tiles/num_warps) + //LOAD + if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp) + { + num_threads_actv = tc_size*tc_size*n_tiles*TC_per_warp; + num_data_per_thread = 1; + } + else + { + num_threads_actv = num_threads; + num_data_per_thread = (tc_size*tc_size*n_tiles)/num_threads_per_tc; + } + data_bytes_load = mem_bytes*num_data_per_thread; + + //STORE + + // DP(3, "DEBUG :: num_threads = " << num_threads); + // DP(3, "DEBUG :: tc_size*tc_size = " << tc_size*tc_size); + //DP(3, "imm = " << immsrc); + + if(num_threads > tc_size*tc_size*TC_per_warp) + { + num_threads_actv_st = tc_size*tc_size*TC_per_warp; + num_data_per_thread_st = 1; + } + else + { + num_threads_actv_st = num_threads; + num_data_per_thread_st = (tc_size*tc_size)/num_threads_per_tc; + } + data_bytes_store = mem_bytes*num_data_per_thread_st; + + DP(3, "Num Tiles=" << n_tiles << std::endl); + + switch (func3) { + case 0: + { //Matrix Load + + DP (4, "TCU LOAD"); + trace->fu_type = FUType::LSU; + trace->lsu_type = LsuType::TCU_LOAD; + + trace->used_iregs.set(rsrc0); + auto trace_data = std::make_shared(num_threads); + trace->data = trace_data; + + for (uint32_t t = thread_start; t < num_threads_actv; ++t) + { + if (!warp.tmask.test(t)) + continue; + DP(3, "Thread ID" << t); + + uint32_t base_addr = rsdata[t][0].i ; + trace_data->mem_addrs.at(t) = {base_addr, data_bytes_load}; + + //Load A or B (depends on immsrc) + int loop_offset = 0; + DP(3, "n_tiles = " << n_tiles << "; num_data_per_thread = " << num_data_per_thread <dcache_read(temp_ref, (base_addr+(n*mem_bytes)+(loop_offset*mem_bytes)), mem_bytes); + + scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n] = *temp_ref; + DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]); + } + //loop_offset += tc_size*tc_size; + //} + } + rd_write = true; + } break; + case 1: + { + DP(4, "TCU STORE"); + trace->fu_type = FUType::LSU; + trace->lsu_type = LsuType::TCU_STORE; + + auto trace_data = std::make_shared(num_threads); + trace->data = trace_data; + uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; + + for (uint32_t t = thread_start; t < num_threads_actv_st; ++t) + { + if (!warp.tmask.test(t)) + continue; + + DP(3, "Thread ID" << t); + uint32_t base_addr = rsdata[t][0].i ; + + trace_data->mem_addrs.at(t) = {base_addr, data_bytes_store}; + + //Store C + for (int n=0; n csr (TODO :: can intermediate step of moving to CSR be skipped?) + //core_->set_csr(csr_addr[(2*num_data_per_thread) + n], scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread) + n], t, warp_id_); + Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0)); + *temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n]; + + this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes); + } + } + //Clear the scratchpad + for(int i =0 ; i < scratchpad.size(); i++) + { + scratchpad[i] = 0; + } + } + break; + case 2: + { //Matrix Multiply + DP(4, "TCU MULTIPLY MAT"); + trace->fu_type = FUType::TCU; + trace->tcu_type = TCUType::TCU_MUL; + uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; + uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp); + for (uint32_t t = thread_start; t < num_threads_actv; ++t) + { + if (!warp.tmask.test(t)) + continue; + + DP(3, "Thread ID" << t); + //TC operation [only 1 thread in 1 warp needs to do this] + if (t%threads_per_tc == 0) + { + //TODO - change to systolic array implementation + uint32_t thread_offset = t*(tc_size*tc_size); + int loop_offset = 0; + int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; + // Loop over all tiles - output stationary + //for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? + //{ + /* + for (int i = 0; i < tc_size; i++) { //ROW-1 + for (int j = 0; j < tc_size; j++) { //COL-2 + int sum = 0; + for (int k = 0; k < tc_size; k++) + { //COL-1 + sum = sum + scratchpad[loop_offset + thread_offset*n_tiles + i * tc_size + k] *scratchpad[loop_offset + thread_offset*n_tiles + offset_b + (k * tc_size + j)]; + } + scratchpad[accu_offset + thread_offset +(i * tc_size + j)] += sum; //[i * col2 + j] = sum + DP(3, "Scratchpad Index: " << accu_offset + (i * tc_size + j) << " , Value=" << scratchpad[accu_offset + (i * tc_size + j)]); + + } + } + */ + //loop_offset += tc_size*tc_size; //Move to the next tiled matmul fragment + //} + } + } + + }break; + default: + std::abort(); + } + } break; default: std::abort(); } diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index c9a3f0fc71..3991a17e75 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -21,6 +21,7 @@ #include "core.h" #include "constants.h" #include "cache_sim.h" +#include "VX_types.h" using namespace vortex; @@ -162,7 +163,7 @@ void LsuUnit::tick() { continue; } - bool is_write = (trace->lsu_type == LsuType::STORE); + bool is_write = ((trace->lsu_type == LsuType::STORE) || (trace->lsu_type == LsuType::TCU_STORE)); // check pending queue capacity if (!is_write && state.pending_rd_reqs.full()) { @@ -175,13 +176,14 @@ void LsuUnit::tick() { } uint32_t tag = 0; + if (!is_write) { tag = state.pending_rd_reqs.allocate({trace, 0}); } // send memory request auto num_reqs = this->send_requests(trace, block_idx, tag); - + if (!is_write) { state.pending_rd_reqs.at(tag).count = num_reqs; } @@ -200,7 +202,14 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { int count = 0; auto trace_data = std::dynamic_pointer_cast(trace->data); - bool is_write = (trace->lsu_type == LsuType::STORE); + bool is_write = ((trace->lsu_type == LsuType::STORE) || (trace->lsu_type == LsuType::TCU_STORE)); + + uint16_t req_per_thread = 1; + if ((trace->lsu_type == LsuType::TCU_LOAD) || (trace->lsu_type == LsuType::TCU_STORE)) + { + req_per_thread= (1>(trace_data->mem_addrs.at(0).size)/4)? 1: ((trace_data->mem_addrs.at(0).size)/4); + } + auto t0 = trace->pid * NUM_LSU_LANES; for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) { @@ -213,33 +222,69 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { auto mem_addr = trace_data->mem_addrs.at(t); auto type = get_addr_type(mem_addr.addr); - - MemReq mem_req; - mem_req.addr = mem_addr.addr; - mem_req.write = is_write; - mem_req.type = type; - mem_req.tag = tag; - mem_req.cid = trace->cid; - mem_req.uuid = trace->uuid; - - dcache_req_port.push(mem_req, 1); - DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag - << ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace); - - if (is_write) { - ++core_->perf_stats_.stores; - } else { - ++core_->perf_stats_.loads; - ++pending_loads_; + // DT(3, "addr_type = " << type << ", " << *trace); + uint32_t mem_bytes = 1; + for (int i = 0; i < req_per_thread; i++) + { + MemReq mem_req; + mem_req.addr = mem_addr.addr + (i*mem_bytes); + mem_req.write = is_write; + mem_req.type = type; + mem_req.tag = tag; + mem_req.cid = trace->cid; + mem_req.uuid = trace->uuid; + + dcache_req_port.push(mem_req, 1); + DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag + << ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace); + + if (is_write) { + ++core_->perf_stats_.stores; + } else { + ++core_->perf_stats_.loads; + ++pending_loads_; + } + + ++count; } - - ++count; } return count; } /////////////////////////////////////////////////////////////////////////////// +TcuUnit::TcuUnit(const SimContext& ctx, Core* core) + : FuncUnit(ctx, core, "TCU") + , tc_size (core_->arch().tc_size()) + {} + +void TcuUnit::tick() { + + for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { + auto& input = Inputs.at(i); + if (input.empty()) + continue; + auto& output = Outputs.at(i); + auto trace = input.front(); + uint32_t n_tiles = core_->emulator_.get_tiles(); + switch (trace->tcu_type) { + case TCUType::TCU_MUL: + { //mat size = n_tiles * tc_size + int matmul_latency = (n_tiles * tc_size) + tc_size + tc_size; + output.push(trace, matmul_latency); + DT(3, "matmul_latency = " << matmul_latency << ", " << *trace); + break; + } + default: + std::abort(); + } + DT(3, "pipeline-execute: op=" << trace->tcu_type << ", " << *trace); + input.pop(); + } +} + +/////////////////////////////////////////////////////////////////////////////// + SfuUnit::SfuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "SFU") {} diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index 45f0152ffd..5fc922991c 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -100,6 +100,15 @@ class LsuUnit : public FuncUnit { /////////////////////////////////////////////////////////////////////////////// +class TcuUnit : public FuncUnit { +public: + TcuUnit(const SimContext& ctx, Core*); + uint64_t tc_size; + void tick(); +}; + +/////////////////////////////////////////////////////////////////////////////// + class SfuUnit : public FuncUnit { public: SfuUnit(const SimContext& ctx, Core*); diff --git a/sim/simx/instr.h b/sim/simx/instr.h index f97a19eacf..061b4deb09 100644 --- a/sim/simx/instr.h +++ b/sim/simx/instr.h @@ -46,7 +46,7 @@ enum class Opcode { EXT1 = 0x0b, EXT2 = 0x2b, EXT3 = 0x5b, - EXT4 = 0x7b + TCU = 0x7b }; enum class InstType { diff --git a/sim/simx/instr_trace.h b/sim/simx/instr_trace.h index 532b736f51..9d6859fb73 100644 --- a/sim/simx/instr_trace.h +++ b/sim/simx/instr_trace.h @@ -75,6 +75,7 @@ struct instr_trace_t { AluType alu_type; FpuType fpu_type; SfuType sfu_type; + TCUType tcu_type; }; ITraceData::Ptr data; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 0f61de6f4d..58eb96d612 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -35,6 +35,8 @@ static void show_usage() { uint32_t num_threads = NUM_THREADS; uint32_t num_warps = NUM_WARPS; uint32_t num_cores = NUM_CORES; +uint32_t tc_size = TC_SIZE; +uint32_t tc_num = TC_NUM; bool showStats = false; const char* program = nullptr; @@ -81,7 +83,7 @@ int main(int argc, char **argv) { { // create processor configuation - Arch arch(num_threads, num_warps, num_cores); + Arch arch(num_threads, num_warps, num_cores, tc_size, tc_num); // create memory module RAM ram(0, RAM_PAGE_SIZE); diff --git a/sim/simx/types.h b/sim/simx/types.h index a84216ae12..15623ce399 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -23,6 +23,7 @@ #include #include #include "debug.h" +#include namespace vortex { @@ -78,6 +79,7 @@ enum class FUType { LSU, FPU, SFU, + TCU, Count }; @@ -87,6 +89,7 @@ inline std::ostream &operator<<(std::ostream &os, const FUType& type) { case FUType::LSU: os << "LSU"; break; case FUType::FPU: os << "FPU"; break; case FUType::SFU: os << "SFU"; break; + case FUType::TCU: os << "TCU"; break; default: assert(false); } return os; @@ -118,14 +121,30 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) { enum class LsuType { LOAD, + TCU_LOAD, STORE, + TCU_STORE, FENCE }; +enum class TCUType { + TCU_MUL +}; + +inline std::ostream &operator<<(std::ostream &os, const TCUType& type) { + switch (type) { + case TCUType::TCU_MUL: os << "TCU MUL"; break; + default: assert(false); + } + return os; +} + inline std::ostream &operator<<(std::ostream &os, const LsuType& type) { switch (type) { case LsuType::LOAD: os << "LOAD"; break; + case LsuType::TCU_LOAD: os << "TCU_LOAD"; break; case LsuType::STORE: os << "STORE"; break; + case LsuType::TCU_STORE: os << "TCU_STORE"; break; case LsuType::FENCE: os << "FENCE"; break; default: assert(false); } @@ -383,7 +402,7 @@ class Mux : public SimObject> { , type_(type) , delay_(delay) , cursors_(num_outputs, 0) - , num_reqs_(num_inputs / num_outputs) + , num_reqs_(log2ceil(num_inputs / num_outputs)) { assert(delay != 0); assert(num_inputs <= 32); @@ -407,7 +426,7 @@ class Mux : public SimObject> { void tick() { uint32_t I = Inputs.size(); uint32_t O = Outputs.size(); - uint32_t R = num_reqs_; + uint32_t R = 1 << num_reqs_; // skip bypass mode if (I == O) diff --git a/tests/regression/matmul/Makefile b/tests/regression/matmul/Makefile new file mode 100644 index 0000000000..7f1c485239 --- /dev/null +++ b/tests/regression/matmul/Makefile @@ -0,0 +1,14 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := matmul + +SRC_DIR := $(VORTEX_HOME)/tests/regression/$(PROJECT) + +SRCS := $(SRC_DIR)/main.cpp + +VX_SRCS := $(SRC_DIR)/kernel.cpp + +OPTS ?= -n128 -d1 + +include ../common.mk diff --git a/tests/regression/matmul/common.h b/tests/regression/matmul/common.h new file mode 100644 index 0000000000..a9aa5de6c8 --- /dev/null +++ b/tests/regression/matmul/common.h @@ -0,0 +1,17 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +typedef struct { + uint32_t num_tasks; + uint32_t num_warps; + uint32_t num_threads; + uint32_t TC_per_warp; + uint32_t matrix_size; + uint32_t data_size; + uint64_t tc_size; + uint64_t src0_addr; + uint64_t src1_addr; + uint64_t dst_addr; +} kernel_arg_t; + +#endif \ No newline at end of file diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp new file mode 100644 index 0000000000..eeb902acb4 --- /dev/null +++ b/tests/regression/matmul/kernel.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include "common.h" + +void kernel_body(kernel_arg_t* __UNIFORM__ arg) { + uint32_t task_id = blockIdx.x; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; + unsigned a_addr = reinterpret_cast(src0_ptr); + unsigned b_addr = reinterpret_cast(src1_ptr); + unsigned c_addr = reinterpret_cast(dst_ptr); + + uint32_t tc_size = arg->tc_size; + int TC_per_warp = arg->TC_per_warp; + unsigned num_threads = arg->num_threads; + int num_warps = arg->num_warps; + uint32_t matrix_size = arg->matrix_size; + + int n_tiles = matrix_size/tc_size; + int num_output_tiles = (matrix_size*matrix_size)/(tc_size*tc_size); + + int num_tasks = arg->num_tasks; + + //Assuming matrix size always > tensor core size + int warps_actual; + if (TC_per_warp > num_output_tiles) + warps_actual = 1; + else + warps_actual = num_output_tiles/TC_per_warp; + + int num_warps_actual = (warps_actual < num_warps)? warps_actual: num_warps; + int num_threads_per_tc = (1> num_threads/TC_per_warp)? 1: num_threads/TC_per_warp; + + int num_tasks_per_thread = (1> (num_tasks/(num_threads*num_warps_actual)))? 1: (num_tasks/(num_threads*num_warps_actual)); + int num_tasks_per_warp = (1 > num_tasks/num_warps_actual)? 1:num_tasks/num_warps_actual; + int task_id_first_warp = task_id%num_tasks_per_warp; + + //A&B + int num_data_per_op_tile = tc_size*tc_size*n_tiles; + int num_data_per_warp = num_data_per_op_tile*((1> (num_output_tiles/num_warps_actual))?1:(num_output_tiles/num_warps_actual)); + + int addr_shift; + if (((tc_size*tc_size*n_tiles)/(num_threads)) > 1) + addr_shift = (tc_size*tc_size*n_tiles)/(num_threads); + else + addr_shift = 1; + //Offset for 1st warp + int offset = ((task_id_first_warp/num_tasks_per_thread)*addr_shift) + ((task_id_first_warp%num_tasks_per_thread)*num_data_per_op_tile); + offset = offset + (num_data_per_warp*(task_id/num_tasks_per_warp)); + + //C + int num_data_per_op_tile_c = tc_size*tc_size; + int num_data_per_warp_c = num_data_per_warp/n_tiles; + + int addr_shift_c; + if (((tc_size*tc_size)/(num_threads)) > 1) + addr_shift_c = tc_size; + else + addr_shift_c = 1; + //Offset for 1st warp + int offset_c = ((task_id_first_warp/num_tasks_per_thread)*addr_shift_c) + ((task_id_first_warp%num_tasks_per_thread)*num_data_per_op_tile_c); + offset_c = offset_c + (num_data_per_warp_c*(task_id/num_tasks_per_warp)); + + int thread_limit = (num_threads < tc_size*tc_size*n_tiles*TC_per_warp)? num_threads : tc_size*tc_size*n_tiles*TC_per_warp; + int thread_limit_c = (num_threads 64 tasks => 32 tasks/warp => 8 tasks/thread + /*task0->thread0, warp0 + task1->thread0 , warp0 + task2->thread0 , warp0 + . + task7->thread0 + task8->thread1 + task9->thread1 + . + . + ------ + task32 -> thread0, warp1 + task33 -> thread1, warp1 + . + */ + + //NEW TASK DISTRIBUTION // For 8x8 matrix, 2x2 tc_size, 1 tc_num, 4threads, 2warps => 64 tasks => 32 tasks/warp => 8 tasks/thread + /*task0->thread0, warp0 + task1->thread1 , warp0 + task2->thread2 , warp0 + task3->thread3 ,... + task4->thread0 + task5->thread1 + . + . + ------ + task32 -> thread0, warp1 + task33 -> thread1, warp1 + . + .*/ + + //TODO :: change this for new task->thread distribution + if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit) + { + unsigned a_addr_base = a_addr + offset*arg->data_size; + unsigned b_addr_base = b_addr + offset*arg->data_size; + unsigned c_addr_base = c_addr + offset_c*arg->data_size; + csr_write(VX_MAT_MUL_SIZE,n_tiles); + mload (0, a_addr_base); + mload (1, b_addr_base); + //In case of multiple threads - sync load + vx_fence(); + + mm(); //Assuming padding to ensure matrix size is a multiple of tc_size + vx_fence(); + if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit_c) + ms(c_addr_base); + //In case of multiple threads - sync store + vx_fence(); + } +} + +int main() { + kernel_arg_t* arg = (kernel_arg_t*)csr_read(VX_CSR_MSCRATCH); + return vx_spawn_threads(1, &arg->num_tasks, nullptr, (vx_kernel_func_cb)kernel_body, arg); +} diff --git a/tests/regression/matmul/main.cpp b/tests/regression/matmul/main.cpp new file mode 100644 index 0000000000..6a86712aef --- /dev/null +++ b/tests/regression/matmul/main.cpp @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +const char* kernel_file = "kernel.vxbin"; +uint32_t matrix_size = 0; +vx_device_h device = nullptr; +vx_buffer_h A_buffer = nullptr; +vx_buffer_h B_buffer = nullptr; +vx_buffer_h C_buffer = nullptr; +vx_buffer_h krnl_buffer = nullptr; +vx_buffer_h args_buffer = nullptr; + +std::vector staging_buf; +kernel_arg_t kernel_arg = {}; + +static void show_usage() { + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv, uint32_t &data_size) { + int c; + while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) { + switch (c) { + case 'n': + matrix_size = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'd': + data_size = atoi(optarg); + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (device) { + vx_mem_free(A_buffer); + vx_mem_free(B_buffer); + vx_mem_free(C_buffer); + vx_mem_free(krnl_buffer); + vx_mem_free(args_buffer); + vx_dev_close(device); + } +} + +template +class mainVariables +{ + public: + // Constructor + mainVariables(uint32_t bufSize, uint32_t dataSize, uint32_t matrixSize) + : buf_size(bufSize), data_size(dataSize), matrix_size(matrixSize) + { + // Resize vectors to specified sizes + src_A.resize(buf_size/data_size); + src_B.resize(buf_size/data_size); + refs.resize(buf_size/data_size); + } + + void init_inputs () + { + std::cout << "inside init" << std::endl; + for (uint32_t i = 0; i < matrix_size*matrix_size; ++i) + { + auto a = static_cast(std::rand()) / RAND_MAX; + auto b = static_cast(std::rand()) / RAND_MAX; + src_A[i] = static_cast(a * matrix_size); + src_B[i] = static_cast(b * matrix_size); + } + } + + void matmul_cpu() + { + for (uint32_t row = 0; row < matrix_size; ++row) + { + for (uint32_t col = 0; col < matrix_size; ++col) + { + TYPE sum(0); + for (uint32_t e = 0; e < matrix_size; ++e) { + sum += src_A[row * matrix_size + e] * src_B[e * matrix_size + col]; + } + refs[row * matrix_size + col] = sum; + } + } + } + + //Public variables + std::vector src_A; + std::vector src_B; + std::vector refs; + + std::vector A_mat; + std::vector B_mat; + + private: + uint32_t buf_size; + uint32_t data_size; + uint32_t matrix_size; +}; + + + +int main(int argc, char *argv[]) { + // parse command arguments + uint32_t data_size = 0; + parse_args(argc, argv, data_size); + if (matrix_size == 0) { + matrix_size = 2; + } + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + uint64_t num_cores, num_warps, num_threads, tc_size, TC_per_warp; + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); + + std::cout << "Debug :: tc_size = " << tc_size << std::endl; + std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl; + + int threads_per_tc; + //TODO - can be changed + //Number of output tiles * number of threads + if (TC_per_warp > num_threads) + threads_per_tc = 1; + else + threads_per_tc = num_threads/TC_per_warp; + + uint32_t num_tasks = ((matrix_size*matrix_size)/(tc_size*tc_size))*threads_per_tc; + + //size of each operand + uint32_t buf_size = ((matrix_size*matrix_size)/(tc_size*tc_size))*(matrix_size/(tc_size))*(tc_size*tc_size)*data_size; + + //256 + std::cout << "Debug :: buf_size: " << buf_size << " bytes" << std::endl; + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_READ, &A_buffer)); + RT_CHECK(vx_mem_address(A_buffer, &kernel_arg.src0_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_READ, &B_buffer)); + RT_CHECK(vx_mem_address(B_buffer, &kernel_arg.src1_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_WRITE, &C_buffer)); + RT_CHECK(vx_mem_address(C_buffer, &kernel_arg.dst_addr)); + + std::cout << "A_addr=0x" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "B_addr=0x" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "C_addr=0x" << std::hex << kernel_arg.dst_addr << std::endl; + + mainVariables variables (buf_size, data_size, matrix_size); + variables.init_inputs(); + + ////////////////////////////////////////////////// + // generate source data + ////////////////////////////////////////////////// + variables.matmul_cpu(); + + uint32_t tc_size_f = tc_size*tc_size; + uint32_t n_tiles = matrix_size/tc_size; + + variables.A_mat.resize(buf_size); + variables.B_mat.resize(buf_size); + + //Demand matrix creation for A / traverse through the rows + for(uint32_t k=0; k(time_end - time_start).count(); + printf("Elapsed time: %lg ms\n", elapsed); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev((int8_t*)variables.B_mat.data(), C_buffer, 0, buf_size)); + + // verify result (TODO : needs to be fixed for for functional correctness) + /* + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (int8_t*)staging_buf.data(); + uint64_t tc_size = kernel_arg.tc_size; + std::cout << "tc_size = " << tc_size << std::endl; + int Result[matrix_size*matrix_size]; + int n_tiles = (matrix_size/tc_size); + int tc_size_f = tc_size*tc_size; + + //converting buf ptr (tile by tile) to CPU style linear (row by row) + for(int k = 0; k < matrix_size/tc_size; k+= 1) + { + for(int j = 0; j < matrix_size; j+= tc_size) + { + for(int i =0; i < tc_size*tc_size; i++) + { + Result[ tc_size*matrix_size*k +j+ (i/tc_size)*matrix_size +i%(tc_size)] = buf_ptr[matrix_size*tc_size*k+tc_size*j+i]; + } + } + } + + for (uint32_t i = 0; i < matrix_size*matrix_size; ++i) { + //int ref = i + i; + int cur = Result[i]; + if (cur != refs[i]) { + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + else + { + std::cout << "CONDITIONALLY PASSED!" << std::endl; + } + } + */ + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "PASSED!" << std::endl; + + return 0; +} \ No newline at end of file From 0e3badf723c9e2d03013237433aa0838b21c7f99 Mon Sep 17 00:00:00 2001 From: Varsha Singhania Date: Tue, 18 Jun 2024 02:19:57 -0400 Subject: [PATCH 004/407] Script checkin and code cleanup --- run_final.sh | 22 ++++++++++++++++++++++ sim/simx/execute.cpp | 28 +++++++++------------------- 2 files changed, 31 insertions(+), 19 deletions(-) create mode 100755 run_final.sh diff --git a/run_final.sh b/run_final.sh new file mode 100755 index 0000000000..5f618dc64c --- /dev/null +++ b/run_final.sh @@ -0,0 +1,22 @@ +# Define arrays for threads, warps, and matrix sizes +matrix_sizes=(16 32 64 128 256 512) +tcsizes=(8 16 32) +tcnums=(4 8 16 32) +#lsulanes=(4 16) +#cores=(32) + + +# Loop through each combination of threads and warps +for size in "${matrix_sizes[@]}"; do + sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" ../tests/regression/matmul/Makefile + sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" tests/regression/matmul/Makefile + echo "Matrix size changed to ${size} in Makefile" + for tcsize in "${tcsizes[@]}"; do + for tcnum in "${tcnums[@]}"; do + log_name="sim_final/mat${size}/tcsize${tcsize}_tcnum${tcnum}_32w32t" + command="./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --tc_size=${tcsize} --tc_num=${tcnum} --rebuild=1 --perf=1 > ${log_name} 2>&1" + echo "$command" + eval "$command" + done + done +done diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index d522145db0..e13df18b9d 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1432,8 +1432,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { uint32_t data_bytes_store; uint32_t num_threads_per_tc = MAX (1, num_threads/TC_per_warp); - //int num_warps = MIN() - //int active_tcs = MIN (TC_per_warp, num_output_tiles/num_warps) //LOAD if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp) { @@ -1448,11 +1446,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { data_bytes_load = mem_bytes*num_data_per_thread; //STORE - - // DP(3, "DEBUG :: num_threads = " << num_threads); - // DP(3, "DEBUG :: tc_size*tc_size = " << tc_size*tc_size); - //DP(3, "imm = " << immsrc); - if(num_threads > tc_size*tc_size*TC_per_warp) { num_threads_actv_st = tc_size*tc_size*TC_per_warp; @@ -1499,8 +1492,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n] = *temp_ref; DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]); } - //loop_offset += tc_size*tc_size; - //} } rd_write = true; } break; @@ -1531,7 +1522,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { uint32_t csr_index = (2*num_data_per_thread_st) + n; uint32_t scratchpad_index = (tc_size*tc_size*2) + (t*num_data_per_thread) + n; - //scratchpad -> csr (TODO :: can intermediate step of moving to CSR be skipped?) + //scratchpad -> csr (TODO :: removed intermediate CSR stage ; incorporate limited scratchmad implementation) //core_->set_csr(csr_addr[(2*num_data_per_thread) + n], scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread) + n], t, warp_id_); Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0)); *temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n]; @@ -1562,14 +1553,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { //TC operation [only 1 thread in 1 warp needs to do this] if (t%threads_per_tc == 0) { - //TODO - change to systolic array implementation + //TODO : change to systolic array implementation uint32_t thread_offset = t*(tc_size*tc_size); int loop_offset = 0; int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; - // Loop over all tiles - output stationary - //for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? - //{ - /* + /* + // TODO : Fix needed for functional correctness + for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? + { for (int i = 0; i < tc_size; i++) { //ROW-1 for (int j = 0; j < tc_size; j++) { //COL-2 int sum = 0; @@ -1579,12 +1570,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } scratchpad[accu_offset + thread_offset +(i * tc_size + j)] += sum; //[i * col2 + j] = sum DP(3, "Scratchpad Index: " << accu_offset + (i * tc_size + j) << " , Value=" << scratchpad[accu_offset + (i * tc_size + j)]); - } } - */ - //loop_offset += tc_size*tc_size; //Move to the next tiled matmul fragment - //} + loop_offset += tc_size*tc_size; //Move to the next tiled matmul fragment + } + */ } } From a378aed67cc7d891201124eb5b51059ec3989252 Mon Sep 17 00:00:00 2001 From: Nayan Sivakumar Nair Date: Fri, 21 Jun 2024 22:23:24 -0400 Subject: [PATCH 005/407] Moved tc_num, tc_size param to makefile args --- ci/blackbox.sh | 10 +--------- ci/regression.sh.in | 1 + hw/rtl/VX_types.vh | 3 +++ runtime/simx/vortex.cpp | 14 +++++++------- sim/simx/arch.h | 14 +------------- sim/simx/emulator.cpp | 16 +++++++++++++++- sim/simx/emulator.h | 3 +++ sim/simx/execute.cpp | 7 +++++-- sim/simx/func_unit.cpp | 4 +++- sim/simx/func_unit.h | 2 +- sim/simx/main.cpp | 2 +- tests/regression/matmul/Makefile | 2 +- tests/regression/matmul/kernel.cpp | 5 ++++- tests/regression/matmul/main.cpp | 29 ++++++++++++++++++++++++----- 14 files changed, 70 insertions(+), 42 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 8a04133f97..defad4c059 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -114,14 +114,6 @@ case $i in LOGFILE=${i#*=} shift ;; - --tc_size=*) - TC_SIZE=${i#*=} - shift - ;; - --tc_num=*) - TC_NUM=${i#*=} - shift - ;; --help) show_help exit 0 @@ -190,7 +182,7 @@ then fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" -CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" +# CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" echo "CONFIGS=$CONFIGS" if [ $REBUILD -ne 0 ] diff --git a/ci/regression.sh.in b/ci/regression.sh.in index a5f1bffdb0..50d309af6c 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -124,6 +124,7 @@ regression() # test local barrier ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar" + echo "regression tests done!" } diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 06929b0587..9a8f932349 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -197,6 +197,9 @@ `define VX_CSR_LOCAL_MEM_BASE 12'hFC3 `define VX_MAT_MUL_SIZE 12'hFC4 +`define VX_TC_NUM 12'hFC5 +`define VX_TC_SIZE 12'hFC6 + `endif // VX_TYPES_VH diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index f65d7b385d..4210ab0b6e 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -32,7 +32,7 @@ using namespace vortex; class vx_device { public: vx_device() - : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES, TC_SIZE, TC_NUM) + : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) , ram_(0, RAM_PAGE_SIZE) , processor_(arch_) , global_mem_(ALLOC_BASE_ADDR, @@ -69,12 +69,12 @@ class vx_device { case VX_CAPS_NUM_CORES: _value = NUM_CORES * NUM_CLUSTERS; break; - case VX_CAPS_TC_SIZE: - _value = TC_SIZE; - break; - case VX_CAPS_TC_NUM: - _value = TC_NUM; - break; + // case VX_CAPS_TC_SIZE: + // _value = TC_SIZE; + // break; + // case VX_CAPS_TC_NUM: + // _value = TC_NUM; + // break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; diff --git a/sim/simx/arch.h b/sim/simx/arch.h index e35687dbd9..9af266d7ad 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -35,11 +35,9 @@ class Arch { uint16_t num_barriers_; uint16_t ipdom_size_; uint64_t local_mem_base_; - uint16_t tc_size_; - uint16_t tc_num_; public: - Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores, uint64_t tc_size, uint64_t tc_num) + Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores) : num_threads_(num_threads) , num_warps_(num_warps) , num_cores_(num_cores) @@ -51,8 +49,6 @@ class Arch { , num_barriers_(NUM_BARRIERS) , ipdom_size_((num_threads-1) * 2) , local_mem_base_(LMEM_BASE_ADDR) - , tc_size_ (tc_size) - , tc_num_ (tc_num) {} uint16_t vsize() const { @@ -98,14 +94,6 @@ class Arch { uint16_t socket_size() const { return socket_size_; } - - uint16_t tc_size() const { - return tc_size_; - } - - uint16_t tc_num() const { - return tc_num_; - } }; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index ea5f72c429..d2faf7f98d 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,7 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) - , scratchpad(std::vector(core->arch().tc_size() * core->arch().tc_size() * 32768)) //Fix this + , scratchpad(std::vector(32 * 32 * 32768)) //Fix this : Max TC_SIZE = 32 { this->clear(); } @@ -355,6 +355,11 @@ Word Emulator::get_tiles() return mat_size; } +Word Emulator::get_tc_size() +{ + return tc_size; +} + Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { @@ -387,6 +392,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_LOCAL_MEM_BASE: return arch_.local_mem_base(); case VX_CSR_MSCRATCH: return csr_mscratch_; case VX_MAT_MUL_SIZE: return mat_size; + case VX_TC_NUM: return tc_num; + case VX_TC_SIZE: return tc_size; CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles); CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs); @@ -500,6 +507,13 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { case VX_MAT_MUL_SIZE: mat_size = value; break; + case VX_TC_NUM: + tc_num = value; + break; + case VX_TC_SIZE: + tc_size = value; + break; + default: { std::cout << std::hex << "Error: invalid CSR write addr=0x" << addr << ", value=0x" << value << std::endl; std::abort(); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 82b5bc98bb..743c2786e6 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -55,6 +55,7 @@ class Emulator { int get_exitcode() const; Word get_tiles(); + Word get_tc_size(); private: @@ -125,6 +126,8 @@ class Emulator { wspawn_t wspawn_; std::vector scratchpad; uint32_t mat_size; + uint32_t tc_size; + uint32_t tc_num; }; } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index e13df18b9d..0dfd72a0f1 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1419,8 +1419,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { { //TODO - make it data-type flexible uint32_t mem_bytes = 1; DP(3, "mem_bytes=" << mem_bytes << std::endl); - uint16_t tc_size = core_->arch().tc_size(); - uint32_t TC_per_warp = core_->arch().tc_num(); + uint16_t tc_size = this->get_csr(VX_TC_SIZE, 0, wid); + uint32_t TC_per_warp = this->get_csr(VX_TC_NUM, 0, wid); + + DP(3, "tc_size=" << tc_size << std::endl); + DP(3, "TC_per_warp=" << TC_per_warp << std::endl); //Number of loads - dependant on the thread config uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index 3991a17e75..f53a1fb223 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -255,7 +255,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { TcuUnit::TcuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "TCU") - , tc_size (core_->arch().tc_size()) + // , tc_size (core_->arch().tc_size()) {} void TcuUnit::tick() { @@ -267,6 +267,8 @@ void TcuUnit::tick() { auto& output = Outputs.at(i); auto trace = input.front(); uint32_t n_tiles = core_->emulator_.get_tiles(); + uint32_t tc_size = core_->emulator_.get_tc_size(); + switch (trace->tcu_type) { case TCUType::TCU_MUL: { //mat size = n_tiles * tc_size diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index 5fc922991c..a7f182efee 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -103,7 +103,7 @@ class LsuUnit : public FuncUnit { class TcuUnit : public FuncUnit { public: TcuUnit(const SimContext& ctx, Core*); - uint64_t tc_size; + // uint64_t tc_size; void tick(); }; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 58eb96d612..9031a0a029 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -83,7 +83,7 @@ int main(int argc, char **argv) { { // create processor configuation - Arch arch(num_threads, num_warps, num_cores, tc_size, tc_num); + Arch arch(num_threads, num_warps, num_cores); // create memory module RAM ram(0, RAM_PAGE_SIZE); diff --git a/tests/regression/matmul/Makefile b/tests/regression/matmul/Makefile index 7f1c485239..0ef2071944 100644 --- a/tests/regression/matmul/Makefile +++ b/tests/regression/matmul/Makefile @@ -9,6 +9,6 @@ SRCS := $(SRC_DIR)/main.cpp VX_SRCS := $(SRC_DIR)/kernel.cpp -OPTS ?= -n128 -d1 +OPTS ?= -n512 -d1 -s4 -t4 include ../common.mk diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp index eeb902acb4..a4585fb539 100644 --- a/tests/regression/matmul/kernel.cpp +++ b/tests/regression/matmul/kernel.cpp @@ -13,7 +13,7 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { unsigned c_addr = reinterpret_cast(dst_ptr); uint32_t tc_size = arg->tc_size; - int TC_per_warp = arg->TC_per_warp; + uint32_t TC_per_warp = arg->TC_per_warp; unsigned num_threads = arg->num_threads; int num_warps = arg->num_warps; uint32_t matrix_size = arg->matrix_size; @@ -104,6 +104,9 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { unsigned b_addr_base = b_addr + offset*arg->data_size; unsigned c_addr_base = c_addr + offset_c*arg->data_size; csr_write(VX_MAT_MUL_SIZE,n_tiles); + csr_write(VX_TC_NUM,TC_per_warp); + csr_write(VX_TC_SIZE,tc_size); + mload (0, a_addr_base); mload (1, b_addr_base); //In case of multiple threads - sync load diff --git a/tests/regression/matmul/main.cpp b/tests/regression/matmul/main.cpp index 6a86712aef..b2238bf5a4 100644 --- a/tests/regression/matmul/main.cpp +++ b/tests/regression/matmul/main.cpp @@ -21,6 +21,9 @@ const char* kernel_file = "kernel.vxbin"; uint32_t matrix_size = 0; +uint32_t tc_num = 4; +uint32_t TC_size = 8; + vx_device_h device = nullptr; vx_buffer_h A_buffer = nullptr; vx_buffer_h B_buffer = nullptr; @@ -38,7 +41,7 @@ static void show_usage() { static void parse_args(int argc, char **argv, uint32_t &data_size) { int c; - while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:d:t:s:h?")) != -1) { switch (c) { case 'n': matrix_size = atoi(optarg); @@ -48,7 +51,13 @@ static void parse_args(int argc, char **argv, uint32_t &data_size) { break; case 'd': data_size = atoi(optarg); - break; + break; + case 't': + tc_num = atoi(optarg); + break; + case 's': + TC_size = atoi(optarg); + break; case 'h': case '?': { show_usage(); @@ -141,12 +150,22 @@ int main(int argc, char *argv[]) { std::cout << "open device connection" << std::endl; RT_CHECK(vx_dev_open(&device)); - uint64_t num_cores, num_warps, num_threads, tc_size, TC_per_warp; + uint64_t num_cores, num_warps, num_threads; + uint32_t tc_size, TC_per_warp; + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); + + std::cout << "Debug :: tc_size (optarg) = " << TC_size << std::endl; + std::cout << "Debug :: tc_num (optarg) = " << tc_num << std::endl; + + //Add assert/knob + tc_size = TC_size; + TC_per_warp = tc_num; + + // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); + // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); std::cout << "Debug :: tc_size = " << tc_size << std::endl; std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl; From 5b0fc8cbd43c813ced9790b287e8444ab619606b Mon Sep 17 00:00:00 2001 From: Nayan Sivakumar Nair Date: Tue, 25 Jun 2024 03:18:50 -0400 Subject: [PATCH 006/407] Fixes for PR --- ci/blackbox.sh | 3 --- ci/regression.sh.in | 4 ++- hw/rtl/VX_config.vh | 8 +++--- hw/rtl/VX_types.vh | 2 +- kernel/include/vx_intrinsics.h | 12 ++++----- run_final.sh | 22 ----------------- runtime/simx/vortex.cpp | 12 ++++----- sim/simx/decode.cpp | 6 ++--- sim/simx/emulator.cpp | 10 +++++++- sim/simx/emulator.h | 3 ++- sim/simx/execute.cpp | 22 ++++++----------- sim/simx/func_unit.cpp | 1 - sim/simx/func_unit.h | 1 - sim/simx/main.cpp | 2 -- tests/regression/matmul/Makefile | 2 +- tests/regression/matmul/kernel.cpp | 8 +++--- tests/regression/matmul/main.cpp | 22 +++-------------- tests/regression/matmul/matmul_regression.sh | 26 ++++++++++++++++++++ 18 files changed, 76 insertions(+), 90 deletions(-) delete mode 100755 run_final.sh create mode 100755 tests/regression/matmul/matmul_regression.sh diff --git a/ci/blackbox.sh b/ci/blackbox.sh index defad4c059..8bcb120f3d 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -48,8 +48,6 @@ PERF_CLASS=0 REBUILD=2 TEMPBUILD=0 LOGFILE=run.log -TC_SIZE=567 -TC_NUM=123 for i in "$@" do @@ -182,7 +180,6 @@ then fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" -# CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" echo "CONFIGS=$CONFIGS" if [ $REBUILD -ne 0 ] diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 50d309af6c..3c89ac9965 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -124,7 +124,9 @@ regression() # test local barrier ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar" - + + # test for matmul + CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1" echo "regression tests done!" } diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 651234768c..ef93065038 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -111,20 +111,20 @@ `endif `define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE) +// Size of Tensor Core `ifndef TC_SIZE -`define TC_SIZE 4 +`define TC_SIZE 8 `endif +// Number of TCs per Warp `ifndef TC_NUM -`define TC_NUM 1 +`define TC_NUM 4 `endif -// Number of TCU units `ifndef NUM_TCU_LANES `define NUM_TCU_LANES `TC_NUM `endif -// Number of TCU units `ifndef NUM_TCU_BLOCKS `define NUM_TCU_BLOCKS `ISSUE_WIDTH `endif diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 9a8f932349..23fb16904e 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -196,7 +196,7 @@ `define VX_CSR_NUM_CORES 12'hFC2 `define VX_CSR_LOCAL_MEM_BASE 12'hFC3 -`define VX_MAT_MUL_SIZE 12'hFC4 +`define VX_MAT_MUL_SIZE 12'hFC4 // VX_MAT_MUL_SIZE = Matrix Size / TC Size `define VX_TC_NUM 12'hFC5 `define VX_TC_SIZE 12'hFC6 diff --git a/kernel/include/vx_intrinsics.h b/kernel/include/vx_intrinsics.h index b67a770da1..5d16d44dac 100644 --- a/kernel/include/vx_intrinsics.h +++ b/kernel/include/vx_intrinsics.h @@ -222,21 +222,19 @@ inline void vx_fence() { } //Matrix load -//Converted instruction type cause destination registers were not getiing blocked otherwise -inline void mload(unsigned dest, unsigned addr) +inline void vx_matrix_load(unsigned dest, unsigned addr) { asm volatile (".insn i 0x7b, 0, x0, %0(%1)" :: "i"(dest), "r"(addr)); } -//mat store -inline void ms(unsigned addr) +//Matrix Store +inline void vx_matrix_store(unsigned addr) { asm volatile (".insn i 0x7b, 1, x0, 0(%0)" :: "r"(addr)); } -//mat mul -//num tiles along reduced K dimension of matmul as imm value (can use rd,rs field to expand range of n_tiles from 12 bits) -inline void mm() +//Matrix Mul +inline void vx_matrix_mul() { asm volatile (".insn i 0x7b, 2, x0, 0(x0)"); } diff --git a/run_final.sh b/run_final.sh deleted file mode 100755 index 5f618dc64c..0000000000 --- a/run_final.sh +++ /dev/null @@ -1,22 +0,0 @@ -# Define arrays for threads, warps, and matrix sizes -matrix_sizes=(16 32 64 128 256 512) -tcsizes=(8 16 32) -tcnums=(4 8 16 32) -#lsulanes=(4 16) -#cores=(32) - - -# Loop through each combination of threads and warps -for size in "${matrix_sizes[@]}"; do - sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" ../tests/regression/matmul/Makefile - sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" tests/regression/matmul/Makefile - echo "Matrix size changed to ${size} in Makefile" - for tcsize in "${tcsizes[@]}"; do - for tcnum in "${tcnums[@]}"; do - log_name="sim_final/mat${size}/tcsize${tcsize}_tcnum${tcnum}_32w32t" - command="./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --tc_size=${tcsize} --tc_num=${tcnum} --rebuild=1 --perf=1 > ${log_name} 2>&1" - echo "$command" - eval "$command" - done - done -done diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 4210ab0b6e..5ab5e14f51 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -69,12 +69,12 @@ class vx_device { case VX_CAPS_NUM_CORES: _value = NUM_CORES * NUM_CLUSTERS; break; - // case VX_CAPS_TC_SIZE: - // _value = TC_SIZE; - // break; - // case VX_CAPS_TC_NUM: - // _value = TC_NUM; - // break; + case VX_CAPS_TC_SIZE: + _value = TC_SIZE; + break; + case VX_CAPS_TC_NUM: + _value = TC_NUM; + break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index 4d8d0a1054..21d0e61ddb 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -410,9 +410,9 @@ static const char* op_string(const Instr &instr) { case Opcode::TCU: switch(func3) { - case 0: return "ML"; // - case 1: return "MS"; // - case 2: return "MATMUL"; + case 0: return "ML"; // Matrix Load + case 1: return "MS"; // Matrix Store + case 2: return "MATMUL"; // Matrix Multiply default: std::abort(); } diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index d2faf7f98d..0dc8a06c41 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,7 +74,10 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) - , scratchpad(std::vector(32 * 32 * 32768)) //Fix this : Max TC_SIZE = 32 + // Currently, tradeoff between scratchpad size & performance has not been evaluated. Scratchpad is + // considered to be big enough to hold input tiles for one output tile. + // In future versions, scratchpad size should be fixed to an appropriate value. + , scratchpad(std::vector(32 * 32 * 32768)) { this->clear(); } @@ -360,6 +363,11 @@ Word Emulator::get_tc_size() return tc_size; } +Word Emulator::get_tc_num() +{ + return tc_num; +} + Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 743c2786e6..fe3aadf810 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -56,7 +56,8 @@ class Emulator { Word get_tiles(); Word get_tc_size(); - + Word get_tc_num(); + private: struct ipdom_entry_t { diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 0dfd72a0f1..20025f40b9 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1429,8 +1429,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value int num_data_per_thread; int num_data_per_thread_st; - int num_threads_actv; - int num_threads_actv_st; + uint32_t num_threads_actv; + uint32_t num_threads_actv_st; uint32_t data_bytes_load; uint32_t data_bytes_store; uint32_t num_threads_per_tc = MAX (1, num_threads/TC_per_warp); @@ -1506,7 +1506,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { auto trace_data = std::make_shared(num_threads); trace->data = trace_data; - uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; for (uint32_t t = thread_start; t < num_threads_actv_st; ++t) { @@ -1521,12 +1520,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { //Store C for (int n=0; n csr (TODO :: removed intermediate CSR stage ; incorporate limited scratchmad implementation) - //core_->set_csr(csr_addr[(2*num_data_per_thread) + n], scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread) + n], t, warp_id_); Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0)); *temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n]; @@ -1534,7 +1527,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } } //Clear the scratchpad - for(int i =0 ; i < scratchpad.size(); i++) + for(long unsigned int i=0 ; i < scratchpad.size(); i++) { scratchpad[i] = 0; } @@ -1545,7 +1538,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { DP(4, "TCU MULTIPLY MAT"); trace->fu_type = FUType::TCU; trace->tcu_type = TCUType::TCU_MUL; - uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp); for (uint32_t t = thread_start; t < num_threads_actv; ++t) { @@ -1556,12 +1548,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { //TC operation [only 1 thread in 1 warp needs to do this] if (t%threads_per_tc == 0) { - //TODO : change to systolic array implementation + /* + // TODO : Fix needed for functional correctness + // TODO : change to systolic array implementation uint32_t thread_offset = t*(tc_size*tc_size); + int loop_offset = 0; int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; - /* - // TODO : Fix needed for functional correctness + uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? { for (int i = 0; i < tc_size; i++) { //ROW-1 diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index f53a1fb223..8acbfddebe 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -255,7 +255,6 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { TcuUnit::TcuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "TCU") - // , tc_size (core_->arch().tc_size()) {} void TcuUnit::tick() { diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index a7f182efee..cf119a5c3a 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -103,7 +103,6 @@ class LsuUnit : public FuncUnit { class TcuUnit : public FuncUnit { public: TcuUnit(const SimContext& ctx, Core*); - // uint64_t tc_size; void tick(); }; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 9031a0a029..0f61de6f4d 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -35,8 +35,6 @@ static void show_usage() { uint32_t num_threads = NUM_THREADS; uint32_t num_warps = NUM_WARPS; uint32_t num_cores = NUM_CORES; -uint32_t tc_size = TC_SIZE; -uint32_t tc_num = TC_NUM; bool showStats = false; const char* program = nullptr; diff --git a/tests/regression/matmul/Makefile b/tests/regression/matmul/Makefile index 0ef2071944..7f1c485239 100644 --- a/tests/regression/matmul/Makefile +++ b/tests/regression/matmul/Makefile @@ -9,6 +9,6 @@ SRCS := $(SRC_DIR)/main.cpp VX_SRCS := $(SRC_DIR)/kernel.cpp -OPTS ?= -n512 -d1 -s4 -t4 +OPTS ?= -n128 -d1 include ../common.mk diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp index a4585fb539..b0b4753c79 100644 --- a/tests/regression/matmul/kernel.cpp +++ b/tests/regression/matmul/kernel.cpp @@ -107,15 +107,15 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { csr_write(VX_TC_NUM,TC_per_warp); csr_write(VX_TC_SIZE,tc_size); - mload (0, a_addr_base); - mload (1, b_addr_base); + vx_matrix_load (0, a_addr_base); + vx_matrix_load (1, b_addr_base); //In case of multiple threads - sync load vx_fence(); - mm(); //Assuming padding to ensure matrix size is a multiple of tc_size + vx_matrix_mul(); //Assuming padding to ensure matrix size is a multiple of tc_size vx_fence(); if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit_c) - ms(c_addr_base); + vx_matrix_store(c_addr_base); //In case of multiple threads - sync store vx_fence(); } diff --git a/tests/regression/matmul/main.cpp b/tests/regression/matmul/main.cpp index b2238bf5a4..9b3465c52c 100644 --- a/tests/regression/matmul/main.cpp +++ b/tests/regression/matmul/main.cpp @@ -21,8 +21,6 @@ const char* kernel_file = "kernel.vxbin"; uint32_t matrix_size = 0; -uint32_t tc_num = 4; -uint32_t TC_size = 8; vx_device_h device = nullptr; vx_buffer_h A_buffer = nullptr; @@ -41,7 +39,7 @@ static void show_usage() { static void parse_args(int argc, char **argv, uint32_t &data_size) { int c; - while ((c = getopt(argc, argv, "n:k:d:t:s:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) { switch (c) { case 'n': matrix_size = atoi(optarg); @@ -52,12 +50,6 @@ static void parse_args(int argc, char **argv, uint32_t &data_size) { case 'd': data_size = atoi(optarg); break; - case 't': - tc_num = atoi(optarg); - break; - case 's': - TC_size = atoi(optarg); - break; case 'h': case '?': { show_usage(); @@ -151,21 +143,15 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_dev_open(&device)); uint64_t num_cores, num_warps, num_threads; - uint32_t tc_size, TC_per_warp; + uint64_t tc_size, TC_per_warp; RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - std::cout << "Debug :: tc_size (optarg) = " << TC_size << std::endl; - std::cout << "Debug :: tc_num (optarg) = " << tc_num << std::endl; - //Add assert/knob - tc_size = TC_size; - TC_per_warp = tc_num; - - // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); - // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); std::cout << "Debug :: tc_size = " << tc_size << std::endl; std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl; diff --git a/tests/regression/matmul/matmul_regression.sh b/tests/regression/matmul/matmul_regression.sh new file mode 100755 index 0000000000..8d35fcfd3a --- /dev/null +++ b/tests/regression/matmul/matmul_regression.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# README: +# This script launches a sweep of TC_SIZE, TC_NUM and MATRIX SIZES +# default values of NUM_WARPS=32, NUM_THREADS=32, NUM_CORES=4, DATA_SIZE=1 +# Edit matrix_sizes, tcsizes & tcnums variables to vary the sweep limits + +# Define arrays for tc_size,tc_num and matrix sizes +matrix_sizes=(16 32 64 128 256 512) +tcsizes=(8 16 32) +tcnums=(4 8 16 32) + +cd ../../../build/ + +# Loop through each combination of above configs +for size in "${matrix_sizes[@]}"; do + for tcsize in "${tcsizes[@]}"; do + for tcnum in "${tcnums[@]}"; do + mkdir -p sim_final/mat${size} + log_name="sim_final/mat${size}/tcsize${tcsize}_tcnum${tcnum}_32w32t" + cmd="CONFIGS=\"-DTC_NUM=${tcnum} -DTC_SIZE=${tcsize}\" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args=\"-n${size} -d1\" --rebuild=1 --perf=1 > ${log_name} 2>&1" + echo $cmd + eval $cmd + done + done +done From 5e63b8f35ac3b695d574fde4ad0a280ecbe1b83a Mon Sep 17 00:00:00 2001 From: Nayan Sivakumar Nair Date: Tue, 25 Jun 2024 23:27:18 -0400 Subject: [PATCH 007/407] dummy commit --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6eeb1ccfa5..a1593a67a4 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Vortex is a full-stack open-source RISC-V GPGPU. ## Specifications - Support RISC-V RV32IMAF and RV64IMAFD + - Microarchitecture: - configurable number of cores, warps, and threads. - configurable number of ALU, FPU, LSU, and SFU units per core. From bddf276335bf6671a688e4150457e2f5eed6e231 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 6 Aug 2024 19:05:22 -0700 Subject: [PATCH 008/407] memory request flags refactoring --- hw/rtl/VX_define.vh | 10 ++++---- hw/rtl/Vortex.sv | 2 +- hw/rtl/afu/opae/vortex_afu.sv | 12 ++++----- hw/rtl/cache/VX_cache.sv | 4 +-- hw/rtl/cache/VX_cache_bypass.sv | 20 +++++++-------- hw/rtl/cache/VX_cache_flush.sv | 2 +- hw/rtl/cache/VX_cache_top.sv | 6 ++--- hw/rtl/core/VX_core.sv | 6 ++--- hw/rtl/core/VX_core_top.sv | 6 ++--- hw/rtl/core/VX_dispatch.sv | 3 ++- hw/rtl/core/VX_fetch.sv | 2 +- hw/rtl/core/VX_lmem_unit.sv | 12 ++++----- hw/rtl/core/VX_lsu_adapter.sv | 6 ++--- hw/rtl/core/VX_lsu_slice.sv | 26 +++++++++---------- hw/rtl/core/VX_operands.sv | 6 +++-- hw/rtl/interfaces/VX_lsu_mem_if.sv | 10 ++++---- hw/rtl/libs/VX_mem_coalescer.sv | 32 ++++++++++++------------ hw/rtl/libs/VX_mem_scheduler.sv | 40 +++++++++++++++--------------- hw/rtl/mem/VX_local_mem.sv | 2 +- hw/rtl/mem/VX_local_mem_top.sv | 20 +++++++-------- hw/rtl/mem/VX_mem_arb.sv | 40 +++++++++++++++--------------- hw/rtl/mem/VX_mem_bus_if.sv | 8 +++--- hw/rtl/mem/VX_mem_switch.sv | 2 +- 23 files changed, 140 insertions(+), 137 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 686124c162..4384660165 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -303,10 +303,10 @@ `define L1_ENABLE `endif -`define ADDR_TYPE_FLUSH 0 -`define ADDR_TYPE_IO 1 -`define ADDR_TYPE_LOCAL 2 // shoud be last since optional -`define ADDR_TYPE_WIDTH (`ADDR_TYPE_LOCAL + `LMEM_ENABLED) +`define MEM_REQ_FLAG_FLUSH 0 +`define MEM_REQ_FLAG_IO 1 +`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional +`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED) `define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE `define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE)) @@ -364,7 +364,7 @@ assign dst.req_data.rw = src.req_data.rw; \ assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.addr = src.req_data.addr; \ - assign dst.req_data.atype = src.req_data.atype; \ + assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.data = src.req_data.data; \ if (TD != TS) \ assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \ diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 978259101e..b496120478 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -109,7 +109,7 @@ module Vortex import VX_gpu_pkg::*; ( assign mem_req_data = mem_bus_if.req_data.data; assign mem_req_tag = mem_bus_if.req_data.tag; assign mem_bus_if.req_ready = mem_req_ready; - `UNUSED_VAR (mem_bus_if.req_data.atype) + `UNUSED_VAR (mem_bus_if.req_data.flags) assign mem_bus_if.rsp_valid = mem_rsp_valid; assign mem_bus_if.rsp_data.data = mem_rsp_data; diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 93f63c48d8..b67cae3a59 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -517,8 +517,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready) ); - assign cci_vx_mem_bus_if[1].req_data.atype = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype) + assign cci_vx_mem_bus_if[1].req_data.flags = '0; + `UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.flags) //-- @@ -570,8 +570,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready) ); - assign cci_vx_mem_bus_if[0].req_data.atype = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype) + assign cci_vx_mem_bus_if[0].req_data.flags = '0; + `UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.flags) //-- VX_mem_bus_if #( @@ -639,8 +639,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .avs_readdatavalid(avs_readdatavalid) ); - assign mem_bus_if[0].req_data.atype = '0; - `UNUSED_VAR (mem_bus_if[0].req_data.atype) + assign mem_bus_if[0].req_data.flags = '0; + `UNUSED_VAR (mem_bus_if[0].req_data.flags) // CCI-P Read Request /////////////////////////////////////////////////////////// diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index ae0747690a..1131791bbc 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -185,7 +185,7 @@ module VX_cache import VX_gpu_pkg::*; #( .ready_out (mem_bus_if.req_ready) ); - assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; + assign mem_bus_if.req_data.flags = mem_bus_if_flush ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; /////////////////////////////////////////////////////////////////////////// @@ -273,7 +273,7 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_req_addr[i] = core_bus2_if[i].req_data.addr; assign core_req_data[i] = core_bus2_if[i].req_data.data; assign core_req_tag[i] = core_bus2_if[i].req_data.tag; - assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; assign core_bus2_if[i].req_ready = core_req_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 379d33e8a9..53d847c4e2 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -56,7 +56,7 @@ module VX_cache_bypass #( localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); - localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; + localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE); @@ -80,7 +80,7 @@ module VX_cache_bypass #( if (PASSTHRU != 0) begin assign core_req_nc_idxs[i] = 1'b1; end else if (NC_ENABLE) begin - assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; + assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO]; end else begin assign core_req_nc_idxs[i] = 1'b0; end @@ -113,7 +113,7 @@ module VX_cache_bypass #( wire mem_req_out_rw; wire [LINE_SIZE-1:0] mem_req_out_byteen; wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr; - wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype; + wire [`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_out_flags; wire [`CS_LINE_WIDTH-1:0] mem_req_out_data; wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; wire mem_req_out_ready; @@ -121,7 +121,7 @@ module VX_cache_bypass #( wire core_req_nc_sel_rw; wire [WORD_SIZE-1:0] core_req_nc_sel_byteen; wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; - wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype; + wire [`MEM_REQ_FLAGS_WIDTH-1:0] core_req_nc_sel_flags; wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data; wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; @@ -131,7 +131,7 @@ module VX_cache_bypass #( core_bus_in_if[i].req_data.rw, core_bus_in_if[i].req_data.byteen, core_bus_in_if[i].req_data.addr, - core_bus_in_if[i].req_data.atype, + core_bus_in_if[i].req_data.flags, core_bus_in_if[i].req_data.data, core_bus_in_if[i].req_data.tag }; @@ -141,7 +141,7 @@ module VX_cache_bypass #( core_req_nc_sel_rw, core_req_nc_sel_byteen, core_req_nc_sel_addr, - core_req_nc_sel_atype, + core_req_nc_sel_flags, core_req_nc_sel_data, core_req_nc_sel_tag } = core_req_nc_mux_in[core_req_nc_idx]; @@ -151,7 +151,7 @@ module VX_cache_bypass #( assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid; assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw; assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH]; - assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype; + assign mem_req_out_flags = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.flags : core_req_nc_sel_flags; wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; @@ -218,7 +218,7 @@ module VX_cache_bypass #( assign mem_bus_in_if.req_ready = mem_req_out_ready; VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -226,8 +226,8 @@ module VX_cache_bypass #( .reset (reset), .valid_in (mem_req_out_valid), .ready_in (mem_req_out_ready), - .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), - .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), + .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_flags, mem_req_out_data, mem_req_out_tag}), + .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.flags, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), .valid_out (mem_bus_out_if.req_valid), .ready_out (mem_bus_out_if.req_ready) ); diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index 7a33565fc1..648fbebb30 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -83,7 +83,7 @@ module VX_cache_flush #( wire [NUM_REQS-1:0] flush_req_mask; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; end wire flush_req_enable = (| flush_req_mask); diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index 0959701aa1..3fa0e5d65f 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -75,7 +75,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( input wire [NUM_REQS-1:0] core_req_rw, input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr, - input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype, + input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] core_req_flags, input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag, output wire [NUM_REQS-1:0] core_req_ready, @@ -117,7 +117,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( assign core_bus_if[i].req_data.rw = core_req_rw[i]; assign core_bus_if[i].req_data.byteen = core_req_byteen[i]; assign core_bus_if[i].req_data.addr = core_req_addr[i]; - assign core_bus_if[i].req_data.atype = core_req_atype[i]; + assign core_bus_if[i].req_data.flags = core_req_flags[i]; assign core_bus_if[i].req_data.data = core_req_data[i]; assign core_bus_if[i].req_data.tag = core_req_tag[i]; assign core_req_ready[i] = core_bus_if[i].req_ready; @@ -139,7 +139,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( assign mem_req_data = mem_bus_if.req_data.data; assign mem_req_tag = mem_bus_if.req_data.tag; assign mem_bus_if.req_ready = mem_req_ready; - `UNUSED_VAR (mem_bus_if.req_data.atype) + `UNUSED_VAR (mem_bus_if.req_data.flags) // Memory response assign mem_bus_if.rsp_valid = mem_rsp_valid; diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 4c82db8127..83af50f16c 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -250,7 +250,7 @@ module VX_core import VX_gpu_pkg::*; #( .DATA_IN_SIZE (LSU_WORD_SIZE), .DATA_OUT_SIZE (DCACHE_WORD_SIZE), .ADDR_WIDTH (LSU_ADDR_WIDTH), - .ATYPE_WIDTH (`ADDR_TYPE_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .TAG_WIDTH (LSU_TAG_WIDTH), .UUID_WIDTH (`UUID_WIDTH), .QUEUE_SIZE (`LSUQ_OUT_SIZE) @@ -264,7 +264,7 @@ module VX_core import VX_gpu_pkg::*; #( .in_req_rw (lsu_dcache_if[i].req_data.rw), .in_req_byteen (lsu_dcache_if[i].req_data.byteen), .in_req_addr (lsu_dcache_if[i].req_data.addr), - .in_req_atype (lsu_dcache_if[i].req_data.atype), + .in_req_flags (lsu_dcache_if[i].req_data.flags), .in_req_data (lsu_dcache_if[i].req_data.data), .in_req_tag (lsu_dcache_if[i].req_data.tag), .in_req_ready (lsu_dcache_if[i].req_ready), @@ -282,7 +282,7 @@ module VX_core import VX_gpu_pkg::*; #( .out_req_rw (dcache_coalesced_if.req_data.rw), .out_req_byteen (dcache_coalesced_if.req_data.byteen), .out_req_addr (dcache_coalesced_if.req_data.addr), - .out_req_atype (dcache_coalesced_if.req_data.atype), + .out_req_flags (dcache_coalesced_if.req_data.flags), .out_req_data (dcache_coalesced_if.req_data.data), .out_req_tag (dcache_coalesced_if.req_data.tag), .out_req_ready (dcache_coalesced_if.req_ready), diff --git a/hw/rtl/core/VX_core_top.sv b/hw/rtl/core/VX_core_top.sv index 420ae7b67f..9ade1c28b0 100644 --- a/hw/rtl/core/VX_core_top.sv +++ b/hw/rtl/core/VX_core_top.sv @@ -32,7 +32,7 @@ module VX_core_top import VX_gpu_pkg::*; #( output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw, output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen, output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr, - output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype, + output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] dcache_req_flags, output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data, output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag, input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready, @@ -96,7 +96,7 @@ module VX_core_top import VX_gpu_pkg::*; #( assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw; assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen; assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr; - assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype; + assign dcache_req_flags[i] = dcache_bus_if[i].req_data.flags; assign dcache_req_data[i] = dcache_bus_if[i].req_data.data; assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag; assign dcache_bus_if[i].req_ready = dcache_req_ready[i]; @@ -119,7 +119,7 @@ module VX_core_top import VX_gpu_pkg::*; #( assign icache_req_data = icache_bus_if.req_data.data; assign icache_req_tag = icache_bus_if.req_data.tag; assign icache_bus_if.req_ready = icache_req_ready; - `UNUSED_VAR (icache_bus_if.req_data.atype) + `UNUSED_VAR (icache_bus_if.req_data.flags) assign icache_bus_if.rsp_valid = icache_rsp_valid; assign icache_bus_if.rsp_data.tag = icache_rsp_tag; diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 8ea3a61250..96c947d1ea 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -61,7 +61,8 @@ module VX_dispatch import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (2), .OUT_REG (2), // 2-cycle EB for area reduction - .LUTRAM (1) + .LUTRAM (1), + .MAX_FANOUT (`MAX_FANOUT * 64) ) buffer ( .clk (clk), .reset (buffer_reset), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 043a879391..de622bd1d8 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -116,7 +116,7 @@ module VX_fetch import VX_gpu_pkg::*; #( .ready_out (icache_bus_if.req_ready) ); - assign icache_bus_if.req_data.atype = '0; + assign icache_bus_if.req_data.flags = '0; assign icache_bus_if.req_data.rw = 0; assign icache_bus_if.req_data.byteen = 4'b1111; assign icache_bus_if.req_data.data = '0; diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index accb7a586c..74da1e1148 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -29,7 +29,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) - localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `ADDR_TYPE_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); @@ -45,7 +45,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL]; + assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAGE_LOCAL]; end wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); @@ -67,7 +67,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_in_if[i].req_data.rw, lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.atype, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, lsu_mem_in_if[i].req_data.tag }), @@ -78,7 +78,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_out_if[i].req_data.rw, lsu_mem_out_if[i].req_data.byteen, lsu_mem_out_if[i].req_data.addr, - lsu_mem_out_if[i].req_data.atype, + lsu_mem_out_if[i].req_data.flags, lsu_mem_out_if[i].req_data.data, lsu_mem_out_if[i].req_data.tag }), @@ -98,7 +98,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_in_if[i].req_data.rw, lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.atype, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, lsu_mem_in_if[i].req_data.tag }), @@ -109,7 +109,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_switch_if[i].req_data.rw, lsu_switch_if[i].req_data.byteen, lsu_switch_if[i].req_data.addr, - lsu_switch_if[i].req_data.atype, + lsu_switch_if[i].req_data.flags, lsu_switch_if[i].req_data.data, lsu_switch_if[i].req_data.tag }), diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/core/VX_lsu_adapter.sv index 21d43d2808..48ef231635 100644 --- a/hw/rtl/core/VX_lsu_adapter.sv +++ b/hw/rtl/core/VX_lsu_adapter.sv @@ -29,7 +29,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( VX_mem_bus_if.master mem_bus_if [NUM_LANES] ); localparam REQ_ADDR_WIDTH = `MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE); - localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `ADDR_TYPE_WIDTH + DATA_SIZE * 8; + localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + DATA_SIZE * 8; localparam RSP_DATA_WIDTH = DATA_SIZE * 8; // handle request unpacking @@ -46,7 +46,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( lsu_mem_if.req_data.rw, lsu_mem_if.req_data.byteen[i], lsu_mem_if.req_data.addr[i], - lsu_mem_if.req_data.atype[i], + lsu_mem_if.req_data.flags[i], lsu_mem_if.req_data.data[i] }; end @@ -57,7 +57,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, - mem_bus_if[i].req_data.atype, + mem_bus_if[i].req_data.flags, mem_bus_if[i].req_data.data } = req_data_out[i]; assign mem_bus_if[i].req_data.tag = req_tag_out[i]; diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 8c685fca29..6de9011821 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -65,19 +65,19 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( // address type calculation - wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype; + wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags; for (genvar i = 0; i < NUM_LANES; ++i) begin wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW]; // is I/O address wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT); wire [MEM_ADDRW-1:0] io_addr_end = MEM_ADDRW'(`XLEN'(`IO_END_ADDR) >> MEM_ASHIFT); - assign mem_req_atype[i][`ADDR_TYPE_FLUSH] = req_is_fence; - assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end); + assign mem_req_flags[i][`MEM_REQ_FLAG_FLUSH] = req_is_fence; + assign mem_req_flags[i][`MEM_REQ_FLAG_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end); `ifdef LMEM_ENABLE // is local memory address wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT); wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT); - assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end); + assign mem_req_flags[i][`MEM_REQ_FLAG_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end); `endif end @@ -300,7 +300,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire [NUM_LANES-1:0] lsu_mem_req_mask; wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_mem_req_byteen; wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_mem_req_addr; - wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] lsu_mem_req_atype; + wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_mem_req_flags; wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_req_data; wire [LSU_TAG_WIDTH-1:0] lsu_mem_req_tag; wire lsu_mem_req_ready; @@ -320,7 +320,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .WORD_SIZE (LSU_WORD_SIZE), .LINE_SIZE (LSU_WORD_SIZE), .ADDR_WIDTH (LSU_ADDR_WIDTH), - .ATYPE_WIDTH (`ADDR_TYPE_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .TAG_WIDTH (TAG_WIDTH), .CORE_QUEUE_SIZE (`LSUQ_IN_SIZE), .MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE), @@ -338,7 +338,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .core_req_mask (mem_req_mask), .core_req_byteen(mem_req_byteen), .core_req_addr (mem_req_addr), - .core_req_atype (mem_req_atype), + .core_req_flags (mem_req_flags), .core_req_data (mem_req_data), .core_req_tag (mem_req_tag), .core_req_ready (mem_req_ready), @@ -360,7 +360,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .mem_req_mask (lsu_mem_req_mask), .mem_req_byteen (lsu_mem_req_byteen), .mem_req_addr (lsu_mem_req_addr), - .mem_req_atype (lsu_mem_req_atype), + .mem_req_flags (lsu_mem_req_flags), .mem_req_data (lsu_mem_req_data), .mem_req_tag (lsu_mem_req_tag), .mem_req_ready (lsu_mem_req_ready), @@ -378,7 +378,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( assign lsu_mem_if.req_data.rw = lsu_mem_req_rw; assign lsu_mem_if.req_data.byteen = lsu_mem_req_byteen; assign lsu_mem_if.req_data.addr = lsu_mem_req_addr; - assign lsu_mem_if.req_data.atype = lsu_mem_req_atype; + assign lsu_mem_if.req_data.flags = lsu_mem_req_flags; assign lsu_mem_if.req_data.data = lsu_mem_req_data; assign lsu_mem_if.req_data.tag = lsu_mem_req_tag; assign lsu_mem_req_ready = lsu_mem_if.req_ready; @@ -513,16 +513,16 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( if (mem_req_rw) begin `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid)); end else begin `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); `TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid)); end end diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index e3df0c1fad..bd0d122ebf 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -183,7 +183,8 @@ module VX_operands import VX_gpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (1 + NUM_SRC_REGS * REGS_DATAW) + .RESETW (1 + NUM_SRC_REGS * REGS_DATAW), + .MAX_FANOUT (`MAX_FANOUT * 64) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), @@ -205,7 +206,8 @@ module VX_operands import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (1) + .LUTRAM (1), + .MAX_FANOUT (`MAX_FANOUT * 64) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/interfaces/VX_lsu_mem_if.sv b/hw/rtl/interfaces/VX_lsu_mem_if.sv index 661071eb6e..4b2c6d4afa 100644 --- a/hw/rtl/interfaces/VX_lsu_mem_if.sv +++ b/hw/rtl/interfaces/VX_lsu_mem_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,18 +16,18 @@ interface VX_lsu_mem_if #( parameter NUM_LANES = 1, parameter DATA_SIZE = 1, - parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH, + parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH, parameter TAG_WIDTH = 1, parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE) ) (); typedef struct packed { - logic rw; + logic rw; logic [NUM_LANES-1:0] mask; logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; logic [NUM_LANES-1:0][ADDR_WIDTH-1:0] addr; - logic [NUM_LANES-1:0][ATYPE_WIDTH-1:0] atype; + logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags; logic [NUM_LANES-1:0][DATA_SIZE*8-1:0] data; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index d1ffde09af..db36ac7815 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -18,7 +18,7 @@ module VX_mem_coalescer #( parameter `STRING INSTANCE_ID = "", parameter NUM_REQS = 1, parameter ADDR_WIDTH = 32, - parameter ATYPE_WIDTH = 1, + parameter FLAGS_WIDTH = 1, parameter DATA_IN_SIZE = 4, parameter DATA_OUT_SIZE = 64, parameter TAG_WIDTH = 8, @@ -43,7 +43,7 @@ module VX_mem_coalescer #( input wire [NUM_REQS-1:0] in_req_mask, input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr, - input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype, + input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags, input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data, input wire [TAG_WIDTH-1:0] in_req_tag, output wire in_req_ready, @@ -61,7 +61,7 @@ module VX_mem_coalescer #( output wire [OUT_REQS-1:0] out_req_mask, output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen, output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr, - output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype, + output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags, output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data, output wire [OUT_TAG_WIDTH-1:0] out_req_tag, input wire out_req_ready, @@ -93,7 +93,7 @@ module VX_mem_coalescer #( logic out_req_rw_r, out_req_rw_n; logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; - logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n; + logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n; logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; @@ -111,7 +111,7 @@ module VX_mem_coalescer #( logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; - logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n; + logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n; logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n; @@ -144,7 +144,7 @@ module VX_mem_coalescer #( for (genvar i = 0; i < OUT_REQS; ++i) begin assign seed_addr_n[i] = in_addr_base[seed_idx[i]]; - assign seed_atype_n[i] = in_req_atype[seed_idx[i]]; + assign seed_flags_n[i] = in_req_flags[seed_idx[i]]; end for (genvar i = 0; i < OUT_REQS; ++i) begin @@ -188,7 +188,7 @@ module VX_mem_coalescer #( out_req_mask_n = out_req_mask_r; out_req_rw_n = out_req_rw_r; out_req_addr_n = out_req_addr_r; - out_req_atype_n = out_req_atype_r; + out_req_flags_n = out_req_flags_r; out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; @@ -211,7 +211,7 @@ module VX_mem_coalescer #( out_req_mask_n = batch_valid_r; out_req_rw_n = in_req_rw; out_req_addr_n = seed_addr_r; - out_req_atype_n = seed_atype_r; + out_req_flags_n = seed_flags_r; out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; @@ -230,14 +230,14 @@ module VX_mem_coalescer #( end VX_pipe_register #( - .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), + .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), .RESETW (1 + NUM_REQS + 1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), - .data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) + .data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), + .data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) ); wire out_rsp_fire = out_rsp_valid && out_rsp_ready; @@ -278,7 +278,7 @@ module VX_mem_coalescer #( assign out_req_mask = out_req_mask_r; assign out_req_byteen = out_req_byteen_r; assign out_req_addr = out_req_addr_r; - assign out_req_atype = out_req_atype_r; + assign out_req_flags = out_req_flags_r; assign out_req_data = out_req_data_r; assign out_req_tag = out_req_tag_r; @@ -350,8 +350,8 @@ module VX_mem_coalescer #( if (out_req_rw) begin `TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS); `TRACE(1, (", data=")); @@ -359,8 +359,8 @@ module VX_mem_coalescer #( end else begin `TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); end `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index aa3ef9b2fb..f173d7d0ad 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -21,7 +21,7 @@ module VX_mem_scheduler #( parameter WORD_SIZE = 4, parameter LINE_SIZE = WORD_SIZE, parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), - parameter ATYPE_WIDTH = 1, + parameter FLAGS_WIDTH = 1, parameter TAG_WIDTH = 8, parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter CORE_QUEUE_SIZE= 8, @@ -50,7 +50,7 @@ module VX_mem_scheduler #( input wire [CORE_REQS-1:0] core_req_mask, input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr, - input wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] core_req_atype, + input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags, input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data, input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, @@ -72,7 +72,7 @@ module VX_mem_scheduler #( output wire [MEM_CHANNELS-1:0] mem_req_mask, output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen, output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype, + output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags, output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, input wire mem_req_ready, @@ -113,7 +113,7 @@ module VX_mem_scheduler #( wire reqq_rw; wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen; wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr; - wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype; + wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags; wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data; wire [REQQ_TAG_WIDTH-1:0] reqq_tag; wire reqq_ready; @@ -123,7 +123,7 @@ module VX_mem_scheduler #( wire reqq_rw_s; wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s; wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s; - wire [MERGED_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype_s; + wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s; wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s; wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s; wire reqq_ready_s; @@ -133,7 +133,7 @@ module VX_mem_scheduler #( wire mem_req_rw_s; wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_s; + wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s; wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; wire mem_req_ready_s; @@ -168,7 +168,7 @@ module VX_mem_scheduler #( end VX_elastic_buffer #( - .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), + .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), .SIZE (CORE_QUEUE_SIZE), .OUT_REG (1) ) req_queue ( @@ -176,8 +176,8 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (reqq_valid_in), .ready_in (reqq_ready_in), - .data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_atype, core_req_data, reqq_tag_u}), - .data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_atype, reqq_data, reqq_tag}), + .data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_flags, core_req_data, reqq_tag_u}), + .data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_flags, reqq_data, reqq_tag}), .valid_out(reqq_valid), .ready_out(reqq_ready) ); @@ -231,7 +231,7 @@ module VX_mem_scheduler #( .DATA_IN_SIZE (WORD_SIZE), .DATA_OUT_SIZE (LINE_SIZE), .ADDR_WIDTH (ADDR_WIDTH), - .ATYPE_WIDTH (ATYPE_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .TAG_WIDTH (REQQ_TAG_WIDTH), .UUID_WIDTH (UUID_WIDTH), .QUEUE_SIZE (MEM_QUEUE_SIZE) @@ -245,7 +245,7 @@ module VX_mem_scheduler #( .in_req_rw (reqq_rw), .in_req_byteen (reqq_byteen), .in_req_addr (reqq_addr), - .in_req_atype (reqq_atype), + .in_req_flags (reqq_flags), .in_req_data (reqq_data), .in_req_tag (reqq_tag), .in_req_ready (reqq_ready), @@ -263,7 +263,7 @@ module VX_mem_scheduler #( .out_req_rw (reqq_rw_s), .out_req_byteen (reqq_byteen_s), .out_req_addr (reqq_addr_s), - .out_req_atype (reqq_atype_s), + .out_req_flags (reqq_flags_s), .out_req_data (reqq_data_s), .out_req_tag (reqq_tag_s), .out_req_ready (reqq_ready_s), @@ -283,7 +283,7 @@ module VX_mem_scheduler #( assign reqq_rw_s = reqq_rw; assign reqq_byteen_s= reqq_byteen; assign reqq_addr_s = reqq_addr; - assign reqq_atype_s = reqq_atype; + assign reqq_flags_s = reqq_flags; assign reqq_data_s = reqq_data; assign reqq_tag_s = reqq_tag; assign reqq_ready = reqq_ready_s; @@ -301,7 +301,7 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b; - wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b; + wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b; wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; @@ -313,13 +313,13 @@ module VX_mem_scheduler #( assign mem_req_mask_b[i][j] = reqq_mask_s[r]; assign mem_req_byteen_b[i][j] = reqq_byteen_s[r]; assign mem_req_addr_b[i][j] = reqq_addr_s[r]; - assign mem_req_atype_b[i][j] = reqq_atype_s[r]; + assign mem_req_flags_b[i][j] = reqq_flags_s[r]; assign mem_req_data_b[i][j] = reqq_data_s[r]; end else begin assign mem_req_mask_b[i][j] = 0; assign mem_req_byteen_b[i][j] = '0; assign mem_req_addr_b[i][j] = '0; - assign mem_req_atype_b[i][j] = '0; + assign mem_req_flags_b[i][j] = '0; assign mem_req_data_b[i][j] = '0; end end @@ -329,7 +329,7 @@ module VX_mem_scheduler #( assign mem_req_rw_s = reqq_rw_s; assign mem_req_byteen_s = mem_req_byteen_b[req_batch_idx]; assign mem_req_addr_s = mem_req_addr_b[req_batch_idx]; - assign mem_req_atype_s = mem_req_atype_b[req_batch_idx]; + assign mem_req_flags_s = mem_req_flags_b[req_batch_idx]; assign mem_req_data_s = mem_req_data_b[req_batch_idx]; if (MEM_BATCHES != 1) begin @@ -390,7 +390,7 @@ module VX_mem_scheduler #( assign reqq_ready_s = req_sent_all; VX_elastic_buffer #( - .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), + .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -398,8 +398,8 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (mem_req_valid_s), .ready_in (mem_req_ready_s), - .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_atype_s, mem_req_data_s, mem_req_tag_s}), - .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_atype, mem_req_data, mem_req_tag}), + .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}), + .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}), .valid_out (mem_req_valid), .ready_out (mem_req_ready) ); diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 3dce0ec432..5d095b0838 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -80,7 +80,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr; for (genvar i = 0; i < NUM_REQS; ++i) begin assign req_bank_addr[i] = mem_bus_if[i].req_data.addr[BANK_SEL_BITS +: BANK_ADDR_WIDTH]; - `UNUSED_VAR (mem_bus_if[i].req_data.atype) + `UNUSED_VAR (mem_bus_if[i].req_data.flags) end // bank requests dispatch diff --git a/hw/rtl/mem/VX_local_mem_top.sv b/hw/rtl/mem/VX_local_mem_top.sv index e576d32ec4..d1cac7ebfc 100644 --- a/hw/rtl/mem/VX_local_mem_top.sv +++ b/hw/rtl/mem/VX_local_mem_top.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,10 +17,10 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", // Size of cache in bytes - parameter SIZE = (1024*16*8), - + parameter SIZE = (1024*16*8), + // Number of Word requests per cycle - parameter NUM_REQS = 4, + parameter NUM_REQS = 4, // Number of banks parameter NUM_BANKS = 4, @@ -34,7 +34,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( // Request tag size parameter TAG_WIDTH = 16 - ) ( + ) ( input wire clk, input wire reset, @@ -43,7 +43,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( input wire [NUM_REQS-1:0] mem_req_rw, input wire [NUM_REQS-1:0][WORD_SIZE-1:0] mem_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] mem_req_addr, - input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype, + input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags, input wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] mem_req_data, input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] mem_req_tag, output wire [NUM_REQS-1:0] mem_req_ready, @@ -65,7 +65,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_data.rw = mem_req_rw[i]; assign mem_bus_if[i].req_data.byteen = mem_req_byteen[i]; assign mem_bus_if[i].req_data.addr = mem_req_addr[i]; - assign mem_bus_if[i].req_data.atype = mem_req_atype[i]; + assign mem_bus_if[i].req_data.flags = mem_req_flags[i]; assign mem_bus_if[i].req_data.data = mem_req_data[i]; assign mem_bus_if[i].req_data.tag = mem_req_tag[i]; assign mem_req_ready[i] = mem_bus_if[i].req_ready; @@ -86,9 +86,9 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .ADDR_WIDTH (ADDR_WIDTH), - .UUID_WIDTH (UUID_WIDTH), + .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH) - ) local_mem ( + ) local_mem ( .clk (clk), .reset (reset), .mem_bus_if (mem_bus_if) diff --git a/hw/rtl/mem/VX_mem_arb.sv b/hw/rtl/mem/VX_mem_arb.sv index ef51e23871..f45a7ea75f 100644 --- a/hw/rtl/mem/VX_mem_arb.sv +++ b/hw/rtl/mem/VX_mem_arb.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,14 +13,14 @@ `include "VX_define.vh" -module VX_mem_arb #( - parameter NUM_INPUTS = 1, +module VX_mem_arb #( + parameter NUM_INPUTS = 1, parameter NUM_OUTPUTS = 1, parameter DATA_SIZE = 1, - parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, + parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE)), - parameter TAG_WIDTH = 1, - parameter TAG_SEL_IDX = 0, + parameter TAG_WIDTH = 1, + parameter TAG_SEL_IDX = 0, parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0, parameter `STRING ARBITER = "R" @@ -30,10 +30,10 @@ module VX_mem_arb #( VX_mem_bus_if.slave bus_in_if [NUM_INPUTS], VX_mem_bus_if.master bus_out_if [NUM_OUTPUTS] -); +); localparam DATA_WIDTH = (8 * DATA_SIZE); localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS); - localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; + localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; `STATIC_ASSERT ((NUM_INPUTS >= NUM_OUTPUTS), ("invalid parameter")) @@ -53,14 +53,14 @@ module VX_mem_arb #( bus_in_if[i].req_data.rw, bus_in_if[i].req_data.byteen, bus_in_if[i].req_data.addr, - bus_in_if[i].req_data.atype, + bus_in_if[i].req_data.flags, bus_in_if[i].req_data.data, bus_in_if[i].req_data.tag }; assign bus_in_if[i].req_ready = req_ready_in[i]; end - VX_stream_arb #( + VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (NUM_OUTPUTS), .DATAW (REQ_DATAW), @@ -80,7 +80,7 @@ module VX_mem_arb #( for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin wire [TAG_WIDTH-1:0] req_tag_out; - VX_bits_insert #( + VX_bits_insert #( .N (TAG_WIDTH), .S (LOG_NUM_REQS), .POS (TAG_SEL_IDX) @@ -94,8 +94,8 @@ module VX_mem_arb #( bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.addr, - bus_out_if[i].req_data.atype, - bus_out_if[i].req_data.data, + bus_out_if[i].req_data.flags, + bus_out_if[i].req_data.data, req_tag_out } = req_data_out[i]; assign req_ready_out[i] = bus_out_if[i].req_ready; @@ -117,7 +117,7 @@ module VX_mem_arb #( for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin wire [TAG_WIDTH-1:0] rsp_tag_out; - VX_bits_remove #( + VX_bits_remove #( .N (TAG_WIDTH + LOG_NUM_REQS), .S (LOG_NUM_REQS), .POS (TAG_SEL_IDX) @@ -135,7 +135,7 @@ module VX_mem_arb #( end else begin assign rsp_sel_in[i] = '0; end - end + end VX_stream_switch #( .NUM_INPUTS (NUM_OUTPUTS), @@ -155,11 +155,11 @@ module VX_mem_arb #( ); end else begin - + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin assign rsp_valid_in[i] = bus_out_if[i].rsp_valid; assign rsp_data_in[i] = { - bus_out_if[i].rsp_data.tag, + bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data }; assign bus_out_if[i].rsp_ready = rsp_ready_in[i]; @@ -184,11 +184,11 @@ module VX_mem_arb #( ); end - + for (genvar i = 0; i < NUM_INPUTS; ++i) begin assign bus_in_if[i].rsp_valid = rsp_valid_out[i]; assign { - bus_in_if[i].rsp_data.tag, + bus_in_if[i].rsp_data.tag, bus_in_if[i].rsp_data.data } = rsp_data_out[i]; assign rsp_ready_out[i] = bus_in_if[i].rsp_ready; diff --git a/hw/rtl/mem/VX_mem_bus_if.sv b/hw/rtl/mem/VX_mem_bus_if.sv index 1b7fca7772..5f341904c6 100644 --- a/hw/rtl/mem/VX_mem_bus_if.sv +++ b/hw/rtl/mem/VX_mem_bus_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,7 +15,7 @@ interface VX_mem_bus_if #( parameter DATA_SIZE = 1, - parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH, + parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH, parameter TAG_WIDTH = 1, parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE) @@ -25,7 +25,7 @@ interface VX_mem_bus_if #( logic rw; logic [DATA_SIZE-1:0] byteen; logic [ADDR_WIDTH-1:0] addr; - logic [ATYPE_WIDTH-1:0] atype; + logic [FLAGS_WIDTH-1:0] flags; logic [DATA_SIZE*8-1:0] data; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/mem/VX_mem_switch.sv b/hw/rtl/mem/VX_mem_switch.sv index fd26c2aa8f..21ec7278a8 100644 --- a/hw/rtl/mem/VX_mem_switch.sv +++ b/hw/rtl/mem/VX_mem_switch.sv @@ -31,7 +31,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #( VX_mem_bus_if.master bus_out_if [NUM_REQS] ); localparam DATA_WIDTH = (8 * DATA_SIZE); - localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; + localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; // handle requests //////////////////////////////////////////////////////// From 0d7012e69e36b0b144fa44f231351a1e8a6784b0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 6 Aug 2024 21:27:08 -0700 Subject: [PATCH 009/407] minor update --- hw/rtl/core/VX_lmem_unit.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 74da1e1148..d93befda7e 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -45,7 +45,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAGE_LOCAL]; + assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAG_LOCAL]; end wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); From 30ebb65fc336ac59323791f35a9e43a071e8d62e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 6 Aug 2024 23:36:37 -0700 Subject: [PATCH 010/407] minor update --- hw/rtl/core/VX_dispatch.sv | 3 +- hw/rtl/core/VX_operands.sv | 6 +-- hw/rtl/libs/VX_elastic_buffer.sv | 40 +-------------- hw/rtl/libs/VX_pipe_register.sv | 85 ++++++++++++-------------------- 4 files changed, 35 insertions(+), 99 deletions(-) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 96c947d1ea..8ea3a61250 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -61,8 +61,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (2), .OUT_REG (2), // 2-cycle EB for area reduction - .LUTRAM (1), - .MAX_FANOUT (`MAX_FANOUT * 64) + .LUTRAM (1) ) buffer ( .clk (clk), .reset (buffer_reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index bd0d122ebf..e3df0c1fad 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -183,8 +183,7 @@ module VX_operands import VX_gpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (1 + NUM_SRC_REGS * REGS_DATAW), - .MAX_FANOUT (`MAX_FANOUT * 64) + .RESETW (1 + NUM_SRC_REGS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), @@ -206,8 +205,7 @@ module VX_operands import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (1), - .MAX_FANOUT (`MAX_FANOUT * 64) + .LUTRAM (1) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 9213572d31..ee6f31b58b 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -18,8 +18,7 @@ module VX_elastic_buffer #( parameter DATAW = 1, parameter SIZE = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, - parameter MAX_FANOUT = 0 + parameter LUTRAM = 0 ) ( input wire clk, input wire reset, @@ -41,43 +40,6 @@ module VX_elastic_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin - - localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT); - localparam N_DATAW = DATAW / NUM_SLICES; - - for (genvar i = 0; i < NUM_SLICES; ++i) begin - - localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - i * N_DATAW) : N_DATAW; - - wire valid_out_t, ready_in_t; - `UNUSED_VAR (valid_out_t) - `UNUSED_VAR (ready_in_t) - - `RESET_RELAY (slice_reset, reset); - - VX_elastic_buffer #( - .DATAW (S_DATAW), - .SIZE (SIZE), - .OUT_REG (OUT_REG), - .LUTRAM (LUTRAM) - ) buffer_slice ( - .clk (clk), - .reset (slice_reset), - .valid_in (valid_in), - .data_in (data_in[i * N_DATAW +: S_DATAW]), - .ready_in (ready_in_t), - .valid_out (valid_out_t), - .data_out (data_out[i * N_DATAW +: S_DATAW]), - .ready_out (ready_out) - ); - - if (i == 0) begin - assign ready_in = ready_in_t; - assign valid_out = valid_out_t; - end - end - end else if (SIZE == 1) begin VX_pipe_buffer #( diff --git a/hw/rtl/libs/VX_pipe_register.sv b/hw/rtl/libs/VX_pipe_register.sv index 707438abdb..2c1cddfd64 100644 --- a/hw/rtl/libs/VX_pipe_register.sv +++ b/hw/rtl/libs/VX_pipe_register.sv @@ -17,8 +17,7 @@ module VX_pipe_register #( parameter DATAW = 1, parameter RESETW = 0, - parameter DEPTH = 1, - parameter MAX_FANOUT = 0 + parameter DEPTH = 1 ) ( input wire clk, input wire reset, @@ -32,67 +31,45 @@ module VX_pipe_register #( `UNUSED_VAR (enable) assign data_out = data_in; end else if (DEPTH == 1) begin - if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin - localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT); - localparam N_DATAW = DATAW / NUM_SLICES; - for (genvar i = 0; i < NUM_SLICES; ++i) begin - localparam SLICE_START = i * N_DATAW; - localparam SLICE_END = SLICE_START + S_DATAW - 1; - localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - SLICE_START) : N_DATAW; - localparam S_RESETW = (SLICE_END >= (DATAW - RESETW)) ? - ((SLICE_START >= (DATAW - RESETW)) ? S_DATAW : (SLICE_END - (DATAW - RESETW) + 1)) : 0; - VX_pipe_register #( - .DATAW (S_DATAW), - .RESETW (S_RESETW) - ) pipe_register_slice ( - .clk (clk), - .reset (reset), - .enable (enable), - .data_in (data_in[i * N_DATAW +: S_DATAW]), - .data_out (data_out[i * N_DATAW +: S_DATAW]) - ); - end - end else begin - if (RESETW == 0) begin - `UNUSED_VAR (reset) - reg [DATAW-1:0] value; + if (RESETW == 0) begin + `UNUSED_VAR (reset) + reg [DATAW-1:0] value; - always @(posedge clk) begin - if (enable) begin - value <= data_in; - end + always @(posedge clk) begin + if (enable) begin + value <= data_in; end - assign data_out = value; - end else if (RESETW == DATAW) begin - reg [DATAW-1:0] value; + end + assign data_out = value; + end else if (RESETW == DATAW) begin + reg [DATAW-1:0] value; - always @(posedge clk) begin - if (reset) begin - value <= RESETW'(0); - end else if (enable) begin - value <= data_in; - end + always @(posedge clk) begin + if (reset) begin + value <= RESETW'(0); + end else if (enable) begin + value <= data_in; end - assign data_out = value; - end else begin - reg [DATAW-RESETW-1:0] value_d; - reg [RESETW-1:0] value_r; + end + assign data_out = value; + end else begin + reg [DATAW-RESETW-1:0] value_d; + reg [RESETW-1:0] value_r; - always @(posedge clk) begin - if (reset) begin - value_r <= RESETW'(0); - end else if (enable) begin - value_r <= data_in[DATAW-1:DATAW-RESETW]; - end + always @(posedge clk) begin + if (reset) begin + value_r <= RESETW'(0); + end else if (enable) begin + value_r <= data_in[DATAW-1:DATAW-RESETW]; end + end - always @(posedge clk) begin - if (enable) begin - value_d <= data_in[DATAW-RESETW-1:0]; - end + always @(posedge clk) begin + if (enable) begin + value_d <= data_in[DATAW-RESETW-1:0]; end - assign data_out = {value_r, value_d}; end + assign data_out = {value_r, value_d}; end end else begin wire [DEPTH:0][DATAW-1:0] data_delayed; From f1e79f4c0f8e4d140e2b88ee9371f07fbf251472 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 7 Aug 2024 19:44:04 -0700 Subject: [PATCH 011/407] fixed toolchain install on centos/7 --- ci/toolchain_install.sh.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/toolchain_install.sh.in b/ci/toolchain_install.sh.in index 935568ff07..73e27eb552 100755 --- a/ci/toolchain_install.sh.in +++ b/ci/toolchain_install.sh.in @@ -23,7 +23,7 @@ OSVERSION=${OSVERSION:=@OSVERSION@} riscv32() { case $OSVERSION in - "centos/7") parts=$(eval echo {a..h}) ;; + "centos/7") parts=$(eval echo {a..l}) ;; "ubuntu/focal") parts=$(eval echo {a..k}) ;; *) parts=$(eval echo {a..j}) ;; esac @@ -41,7 +41,7 @@ riscv32() riscv64() { case $OSVERSION in - "centos/7") parts=$(eval echo {a..h}) ;; + "centos/7") parts=$(eval echo {a..l}) ;; *) parts=$(eval echo {a..j}) ;; esac rm -f riscv64-gnu-toolchain.tar.bz2.parta* From ab21f76aed67afd251bfc3b6d9b960170d738454 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 7 Aug 2024 19:44:24 -0700 Subject: [PATCH 012/407] minor update --- tests/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index b141fd41df..1068da2abd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,9 +13,7 @@ regression: $(MAKE) -C regression opencl: -ifneq ($(XLEN),64) $(MAKE) -C opencl -endif riscv: $(MAKE) -C riscv From 455fc8389c029b741c041b5245b9d6feb006a417 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Aug 2024 13:58:19 -0700 Subject: [PATCH 013/407] refactoring priority encoder --- hw/rtl/libs/VX_mem_coalescer.sv | 14 +++++--- hw/rtl/libs/VX_priority_arbiter.sv | 8 ++--- hw/rtl/libs/VX_priority_encoder.sv | 54 +++++++++++++++--------------- 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index db36ac7815..dbc53336bf 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -125,16 +125,20 @@ module VX_mem_coalescer #( end for (genvar i = 0; i < OUT_REQS; ++i) begin - wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + wire [DATA_RATIO-1:0] batch_mask; wire [DATA_RATIO_W-1:0] batch_idx; + + assign batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + VX_priority_encoder #( .N (DATA_RATIO) ) priority_encoder ( - .data_in (batch_mask), - .index (batch_idx), - `UNUSED_PIN (onehot), - .valid_out (batch_valid_n[i]) + .data_in (batch_mask), + .index_out (batch_idx), + `UNUSED_PIN (onehot_out), + .valid_out (batch_valid_n[i]) ); + if (OUT_REQS > 1) begin assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx}; end else begin diff --git a/hw/rtl/libs/VX_priority_arbiter.sv b/hw/rtl/libs/VX_priority_arbiter.sv index cd4844d251..13a9401780 100644 --- a/hw/rtl/libs/VX_priority_arbiter.sv +++ b/hw/rtl/libs/VX_priority_arbiter.sv @@ -34,10 +34,10 @@ module VX_priority_arbiter #( VX_priority_encoder #( .N (NUM_REQS) ) priority_encoder ( - .data_in (requests), - .index (grant_index), - .onehot (grant_onehot), - .valid_out (grant_valid) + .data_in (requests), + .index_out (grant_index), + .onehot_out (grant_onehot), + .valid_out (grant_valid) ); end diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 5a08e34122..8bba538b1b 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,38 +14,38 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_priority_encoder #( - parameter N = 1, +module VX_priority_encoder #( + parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, parameter LN = `LOG2UP(N) ) ( - input wire [N-1:0] data_in, - output wire [N-1:0] onehot, - output wire [LN-1:0] index, + input wire [N-1:0] data_in, + output wire [N-1:0] onehot_out, + output wire [LN-1:0] index_out, output wire valid_out ); - wire [N-1:0] reversed; + wire [N-1:0] reversed; if (REVERSE != 0) begin for (genvar i = 0; i < N; ++i) begin assign reversed[N-i-1] = data_in[i]; - end + end end else begin assign reversed = data_in; end if (N == 1) begin - assign onehot = reversed; - assign index = '0; - assign valid_out = reversed; + assign onehot_out = reversed; + assign index_out = '0; + assign valid_out = reversed; end else if (N == 2) begin - assign onehot = {~reversed[0], reversed[0]}; - assign index = ~reversed[0]; - assign valid_out = (| reversed); + assign onehot_out = {~reversed[0], reversed[0]}; + assign index_out = ~reversed[0]; + assign valid_out = (| reversed); end else if (MODEL == 1) begin @@ -64,12 +64,12 @@ module VX_priority_encoder #( .REVERSE (1) ) lzc ( .data_in (reversed), - .data_out (index), + .data_out (index_out), `UNUSED_PIN (valid_out) ); - assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - assign valid_out = scan_lo[N-1]; + assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; + assign valid_out = scan_lo[N-1]; end else if (MODEL == 2) begin @@ -78,27 +78,27 @@ module VX_priority_encoder #( `IGNORE_WARNINGS_END assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | reversed[N-2:0]; assign higher_pri_regs[0] = 1'b0; - assign onehot[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; + assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; VX_lzc #( .N (N), .REVERSE (1) ) lzc ( .data_in (reversed), - .data_out (index), + .data_out (index_out), .valid_out (valid_out) ); end else if (MODEL == 3) begin - assign onehot = reversed & -reversed; + assign onehot_out = reversed & -reversed; VX_lzc #( .N (N), .REVERSE (1) ) lzc ( .data_in (reversed), - .data_out (index), + .data_out (index_out), .valid_out (valid_out) ); @@ -117,13 +117,13 @@ module VX_priority_encoder #( onehot_r[i] = 1'b1; end end - end + end - assign index = index_r; - assign onehot = onehot_r; - assign valid_out = (| reversed); + assign index_out = index_r; + assign onehot_out = onehot_r; + assign valid_out = (| reversed); - end + end endmodule `TRACING_ON From 42afa2472f9296a680c6752b49a2942182433edc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Aug 2024 18:11:12 -0700 Subject: [PATCH 014/407] cdiv --- hw/rtl/core/VX_issue.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 1480e6649c..45e414865e 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -29,6 +29,7 @@ module VX_issue import VX_gpu_pkg::*; #( VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] ); + `STATIC_ASSERT (`ISSUE_WIDTH <= `NUM_WARPS, "invalid parameter"); `ifdef PERF_ENABLE issue_perf_t per_issue_perf [`ISSUE_WIDTH]; From 229641441f0ba5afcc47aa84a5cb740ff0922f8d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Aug 2024 18:13:52 -0700 Subject: [PATCH 015/407] adding static assertion --- hw/rtl/core/VX_issue.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 45e414865e..e77a3633a0 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -29,7 +29,7 @@ module VX_issue import VX_gpu_pkg::*; #( VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] ); - `STATIC_ASSERT (`ISSUE_WIDTH <= `NUM_WARPS, "invalid parameter"); + `STATIC_ASSERT ((`ISSUE_WIDTH <= `NUM_WARPS), ("invalid parameter")) `ifdef PERF_ENABLE issue_perf_t per_issue_perf [`ISSUE_WIDTH]; From c8d0357ac655e06f9036e22ace309f8592db795f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 00:37:56 -0700 Subject: [PATCH 016/407] rtl arbiter fixes --- hw/rtl/libs/VX_fair_arbiter.sv | 25 +++++++-------- hw/rtl/libs/VX_matrix_arbiter.sv | 52 ++++++++++++++------------------ hw/rtl/libs/VX_rr_arbiter.sv | 4 +-- 3 files changed, 36 insertions(+), 45 deletions(-) diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index 82bcfc5c68..d4dba9a3f6 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -38,26 +38,27 @@ module VX_fair_arbiter #( end else begin - reg [NUM_REQS-1:0] requests_r; + reg [NUM_REQS-1:0] grant_hist; - wire [NUM_REQS-1:0] requests_sel = requests_r & requests; - wire [NUM_REQS-1:0] requests_qual = (| requests_sel) ? requests_sel : requests; + wire [NUM_REQS-1:0] requests_sel = requests & ~grant_hist; + wire rem_valid = (| requests_sel); + wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_sel : requests; always @(posedge clk) begin if (reset) begin - requests_r <= '0; + grant_hist <= '0; end else if (grant_ready) begin - requests_r <= requests_qual & ~grant_onehot; + grant_hist <= rem_valid ? (grant_hist | grant_onehot) : grant_onehot; end end - VX_priority_arbiter #( - .NUM_REQS (NUM_REQS) - ) priority_arbiter ( - .requests (requests_qual), - .grant_index (grant_index), - .grant_onehot (grant_onehot), - .grant_valid (grant_valid) + VX_priority_encoder #( + .N (NUM_REQS) + ) priority_enc ( + .data_in (requests_qual), + .index_out (grant_index), + .onehot_out (grant_onehot), + .valid_out (grant_valid) ); end diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 23f9ea2a01..9f0ead356e 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -38,57 +38,49 @@ module VX_matrix_arbiter #( end else begin - reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; + reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; - wire [NUM_REQS-1:0] grant_unqual; + wire [NUM_REQS-1:0] grant; - for (genvar i = 0; i < NUM_REQS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin - if (j > i) begin - assign pri[j][i] = requests[i] && state[i][j]; + for (genvar r = 0; r < NUM_REQS; ++r) begin + for (genvar c = 0; c < NUM_REQS; ++c) begin + if (r > c) begin + assign pri[r][c] = requests[c] && state[c][r]; end - else if (j < i) begin - assign pri[j][i] = requests[i] && !state[j][i]; + else if (r < c) begin + assign pri[r][c] = requests[c] && !state[r][c]; end else begin - assign pri[j][i] = 0; + assign pri[r][c] = 0; end end - assign grant_unqual[i] = requests[i] && !(| pri[i]); end - for (genvar i = 0; i < NUM_REQS; ++i) begin - for (genvar j = i + 1; j < NUM_REQS; ++j) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin + assign grant[r] = requests[r] && ~(| pri[r]); + end + + for (genvar r = 0; r < NUM_REQS; ++r) begin + for (genvar c = r + 1; c < NUM_REQS; ++c) begin always @(posedge clk) begin if (reset) begin - state[i][j] <= '0; - end else begin - state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i]; + state[r][c] <= '0; + end else if (grant_ready) begin + state[r][c] <= (state[r][c] || grant[c]) && ~grant[r]; end end end end - reg [NUM_REQS-1:0] grant_unqual_prev; - always @(posedge clk) begin - if (reset) begin - grant_unqual_prev <= '0; - end else if (grant_ready) begin - grant_unqual_prev <= grant_unqual; - end - end - assign grant_onehot = grant_ready ? grant_unqual : grant_unqual_prev; + assign grant_onehot = grant; VX_onehot_encoder #( .N (NUM_REQS) ) encoder ( - .data_in (grant_unqual), - .data_out (grant_index), - `UNUSED_PIN (valid_out) + .data_in (grant_onehot), + .data_out (grant_index), + .valid_out (grant_valid) ); - - assign grant_valid = (| requests); - end endmodule diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 52a9811846..adb7c3bebc 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -416,14 +416,12 @@ module VX_rr_arbiter #( end end - assign grant_valid = (| requests); - VX_onehot_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), .data_out (grant_index), - `UNUSED_PIN (valid_out) + .valid_out(grant_valid) ); end else begin From eaa7ed7fe206721111fd03033e19ffcf632f088d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 02:38:54 -0700 Subject: [PATCH 017/407] rtl arbiter update --- hw/rtl/libs/VX_fair_arbiter.sv | 2 +- hw/rtl/libs/VX_rr_arbiter.sv | 71 ++++++++++++++++++---------------- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index d4dba9a3f6..430966aee7 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -47,7 +47,7 @@ module VX_fair_arbiter #( always @(posedge clk) begin if (reset) begin grant_hist <= '0; - end else if (grant_ready) begin + end else if (grant_valid && grant_ready) begin grant_hist <= rem_valid ? (grant_hist | grant_onehot) : grant_onehot; end end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index adb7c3bebc..60cc4813f1 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -377,41 +377,38 @@ module VX_rr_arbiter #( end else if (MODEL == 1) begin `IGNORE_UNOPTFLAT_BEGIN - wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs; + wire [NUM_REQS-1:0] masked_pri_reqs, unmasked_pri_reqs; `IGNORE_UNOPTFLAT_END - wire [NUM_REQS-1:0] grant_masked, grant_unmasked; + reg [NUM_REQS-1:0] reqs_mask; - reg [NUM_REQS-1:0] pointer_reg; + wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; - wire [NUM_REQS-1:0] req_masked = requests & pointer_reg; - - assign mask_higher_pri_regs[0] = 1'b0; + assign masked_pri_reqs[0] = 1'b0; for (genvar i = 1; i < NUM_REQS; ++i) begin - assign mask_higher_pri_regs[i] = mask_higher_pri_regs[i-1] | req_masked[i-1]; + assign masked_pri_reqs[i] = masked_pri_reqs[i-1] | masked_reqs[i-1]; end - assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0]; - - assign unmask_higher_pri_regs[0] = 1'b0; + assign unmasked_pri_reqs[0] = 1'b0; for (genvar i = 1; i < NUM_REQS; ++i) begin - assign unmask_higher_pri_regs[i] = unmask_higher_pri_regs[i-1] | requests[i-1]; + assign unmasked_pri_reqs[i] = unmasked_pri_reqs[i-1] | requests[i-1]; end - assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_masked = masked_reqs & ~masked_pri_reqs[NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_unmasked = requests & ~unmasked_pri_reqs[NUM_REQS-1:0]; + + wire has_masked_reqs = (| masked_reqs); + wire has_unmasked_reqs = (| requests); - wire no_req_masked = ~(|req_masked); - assign grant_onehot = ({NUM_REQS{no_req_masked}} & grant_unmasked) | grant_masked; + assign grant_onehot = ({NUM_REQS{~has_masked_reqs}} & grant_unmasked) | grant_masked; always @(posedge clk) begin if (reset) begin - pointer_reg <= {NUM_REQS{1'b1}}; + reqs_mask <= {NUM_REQS{1'b1}}; end else if (grant_ready) begin - if (|req_masked) begin - pointer_reg <= mask_higher_pri_regs; - end else if (|requests) begin - pointer_reg <= unmask_higher_pri_regs; - end else begin - pointer_reg <= pointer_reg; + if (has_masked_reqs) begin + reqs_mask <= masked_pri_reqs; + end else if (has_unmasked_reqs) begin + reqs_mask <= unmasked_pri_reqs; end end end @@ -426,35 +423,41 @@ module VX_rr_arbiter #( end else begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [NUM_REQS-1:0] state; + reg grant_valid_r; + reg [LOG_NUM_REQS-1:0] grant_index_r; + reg [NUM_REQS-1:0] grant_onehot_r; + reg [LOG_NUM_REQS-1:0] next_grant_index; + + wire [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index_qual; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign next_grant_index_qual[i] = LOG_NUM_REQS'(i) + next_grant_index; + end always @(*) begin grant_index_r = 'x; grant_onehot_r = 'x; + grant_valid_r = 0; for (integer i = 0; i < NUM_REQS; ++i) begin - for (integer j = 0; j < NUM_REQS; ++j) begin - if (state[i] && requests[(j + 1) % NUM_REQS]) begin - grant_index_r = LOG_NUM_REQS'((j + 1) % NUM_REQS); - grant_onehot_r = '0; - grant_onehot_r[(j + 1) % NUM_REQS] = 1; - end + if (requests[next_grant_index_qual[i]]) begin + grant_valid_r = 1; + grant_index_r = next_grant_index_qual[i]; + grant_onehot_r = NUM_REQS'(1) << next_grant_index_qual[i]; + break; end end end always @(posedge clk) begin if (reset) begin - state <= '0; - end else if (grant_ready) begin - state <= grant_index_r; + next_grant_index <= '0; + end else if (grant_valid && grant_ready) begin + next_grant_index <= grant_index_r + LOG_NUM_REQS'(1); end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = grant_valid_r; end endmodule From 32a882e26fc20afb5047f833526a64c04d7929c3 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 18:41:10 -0700 Subject: [PATCH 018/407] arbiters optimization --- hw/rtl/libs/VX_fair_arbiter.sv | 12 ++++++------ hw/rtl/libs/VX_onehot_mux.sv | 10 ++++++++++ hw/rtl/libs/VX_onehot_shift.sv | 30 ++++++++++++++++++++++++++++++ hw/rtl/libs/VX_rr_arbiter.sv | 26 ++++++++++++-------------- hw/rtl/libs/VX_transpose.sv | 29 +++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 hw/rtl/libs/VX_onehot_shift.sv create mode 100644 hw/rtl/libs/VX_transpose.sv diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index 430966aee7..9a6ca84597 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -38,17 +38,17 @@ module VX_fair_arbiter #( end else begin - reg [NUM_REQS-1:0] grant_hist; + reg [NUM_REQS-1:0] reqs_mask; - wire [NUM_REQS-1:0] requests_sel = requests & ~grant_hist; - wire rem_valid = (| requests_sel); - wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_sel : requests; + wire [NUM_REQS-1:0] requests_rem = requests & reqs_mask; + wire rem_valid = (| requests_rem); + wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests; always @(posedge clk) begin if (reset) begin - grant_hist <= '0; + reqs_mask <= '1; end else if (grant_valid && grant_ready) begin - grant_hist <= rem_valid ? (grant_hist | grant_onehot) : grant_onehot; + reqs_mask <= rem_valid ? (reqs_mask & ~grant_onehot) : ~grant_onehot; end end diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index cc0fffaa6c..74e19a41b5 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -124,6 +124,16 @@ module VX_onehot_mux #( assign data_out[i] = (| gather); end end else if (MODEL == 2) begin + VX_find_first #( + .N (N), + .DATAW (DATAW) + ) find_first ( + .valid_in (sel_in), + .data_in (data_in), + .data_out (data_out), + `UNUSED_PIN (valid_out) + ); + end else if (MODEL == 3) begin reg [DATAW-1:0] data_out_r; always @(*) begin data_out_r = 'x; diff --git a/hw/rtl/libs/VX_onehot_shift.sv b/hw/rtl/libs/VX_onehot_shift.sv new file mode 100644 index 0000000000..950d1f380e --- /dev/null +++ b/hw/rtl/libs/VX_onehot_shift.sv @@ -0,0 +1,30 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +module VX_onehot_shift #( + parameter N = 1, + parameter M = 1 +) ( + input wire [N-1:0] data_in0, + input wire [M-1:0] data_in1, + output wire [N*M-1:0] data_out +); + for (genvar i = 0; i < M; ++i) begin + for (genvar j = 0; j < N; ++j) begin + assign data_out[i*N + j] = data_in1[i] & data_in0[j]; + end + end + +endmodule diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 60cc4813f1..d9f5b767f2 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -426,32 +426,30 @@ module VX_rr_arbiter #( reg grant_valid_r; reg [LOG_NUM_REQS-1:0] grant_index_r; reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] next_grant_index; - - wire [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index_qual; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign next_grant_index_qual[i] = LOG_NUM_REQS'(i) + next_grant_index; - end + reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index; always @(*) begin grant_index_r = 'x; grant_onehot_r = 'x; grant_valid_r = 0; - for (integer i = 0; i < NUM_REQS; ++i) begin - if (requests[next_grant_index_qual[i]]) begin - grant_valid_r = 1; - grant_index_r = next_grant_index_qual[i]; - grant_onehot_r = NUM_REQS'(1) << next_grant_index_qual[i]; - break; + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (requests[next_grant_index[i]]) begin + grant_valid_r = 1; + grant_index_r = next_grant_index[i]; + grant_onehot_r = NUM_REQS'(1) << next_grant_index[i]; end end end always @(posedge clk) begin if (reset) begin - next_grant_index <= '0; + for (integer i = 0; i < NUM_REQS; ++i) begin + next_grant_index[i] <= LOG_NUM_REQS'(i); + end end else if (grant_valid && grant_ready) begin - next_grant_index <= grant_index_r + LOG_NUM_REQS'(1); + for (integer i = 0; i < NUM_REQS; ++i) begin + next_grant_index[i] <= grant_index_r + LOG_NUM_REQS'(i + 1); + end end end diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv new file mode 100644 index 0000000000..93a8c16835 --- /dev/null +++ b/hw/rtl/libs/VX_transpose.sv @@ -0,0 +1,29 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +module VX_transpose #( + parameter N = 1, + parameter M = 1 +) ( + input wire [N-1:0][M-1:0] data_in, + output wire [M-1:0][N-1:0] data_out +); + for (genvar i = 0; i < N; ++i) begin + for (genvar j = 0; j < M; ++j) begin + assign data_out[j][i] = data_in[i][j]; + end + end + +endmodule From 8fb73b6da7e5bc0824d6e69a91bf4687c9187614 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 22:11:49 -0700 Subject: [PATCH 019/407] fair arbiter optimization --- hw/rtl/libs/VX_fair_arbiter.sv | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index 9a6ca84597..3503ea21ed 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -40,15 +40,14 @@ module VX_fair_arbiter #( reg [NUM_REQS-1:0] reqs_mask; - wire [NUM_REQS-1:0] requests_rem = requests & reqs_mask; - wire rem_valid = (| requests_rem); - wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests; + wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; + wire [NUM_REQS-1:0] requests_qual = (| masked_reqs) ? masked_reqs : requests; always @(posedge clk) begin if (reset) begin reqs_mask <= '1; end else if (grant_valid && grant_ready) begin - reqs_mask <= rem_valid ? (reqs_mask & ~grant_onehot) : ~grant_onehot; + reqs_mask <= (| reqs_mask) ? (reqs_mask & ~grant_onehot) : ~grant_onehot; end end From 1fb0691bc74e0909413908cf1b5cfe262c9c3514 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 11 Aug 2024 19:50:31 -0700 Subject: [PATCH 020/407] minor update --- hw/rtl/mem/VX_local_mem_top.sv | 3 ++- hw/syn/xilinx/xrt/Makefile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/rtl/mem/VX_local_mem_top.sv b/hw/rtl/mem/VX_local_mem_top.sv index d1cac7ebfc..5f9b17da01 100644 --- a/hw/rtl/mem/VX_local_mem_top.sv +++ b/hw/rtl/mem/VX_local_mem_top.sv @@ -87,7 +87,8 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( .WORD_SIZE (WORD_SIZE), .ADDR_WIDTH (ADDR_WIDTH), .UUID_WIDTH (UUID_WIDTH), - .TAG_WIDTH (TAG_WIDTH) + .TAG_WIDTH (TAG_WIDTH), + .OUT_BUF (3) ) local_mem ( .clk (clk), .reset (reset), diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 38ae29f369..e1acce8d65 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -4,7 +4,7 @@ include $(ROOT_DIR)/config.mk ifneq ($(findstring Makefile, $(MAKEFILE_LIST)), Makefile) help: $(ECHO) "Makefile Usage:" - $(ECHO) " make all TARGET= PLATFORM=" + $(ECHO) " make all TARGET= PLATFORM=" $(ECHO) " Command to generate the design for specified Target and Device." $(ECHO) "" $(ECHO) " make clean" From 6f3add273dacf5a49843ec34264a4a64856b4239 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 11 Aug 2024 20:28:39 -0700 Subject: [PATCH 021/407] elastic buffer lutram refactoring --- hw/rtl/VX_platform.vh | 10 ++++++++-- hw/rtl/core/VX_dispatch.sv | 2 +- hw/rtl/core/VX_operands.sv | 5 ++--- hw/rtl/core/VX_scoreboard.sv | 3 +-- hw/rtl/libs/VX_elastic_buffer.sv | 30 ++++++++++++++++++++++++------ hw/rtl/libs/VX_stream_arb.sv | 25 +++++++++---------------- hw/rtl/libs/VX_stream_xbar.sv | 11 ++++------- 7 files changed, 49 insertions(+), 37 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 59f5ef0f5b..cd0550efac 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -239,10 +239,16 @@ `RESET_RELAY_EX (dst, src, 1, 0) // size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2 -`define TO_OUT_BUF_SIZE(s) `MIN(s, 2) +`define TO_OUT_BUF_SIZE(s) `MIN(s & 7, 2) // reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3 -`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2)) +`define TO_OUT_BUF_REG(s) (((s & 7) < 2) ? (s & 7) : ((s & 7) - 2)) + +// lut(x): (x & 8) != 0 +`define TO_OUT_BUF_LUTRAM(s) ((s & 8) != 0) + +// rbuf(x): (x <= 2) ? 3 : x +`define TO_OUT_RBUF(s) ((s & 8) | `MAX(s & 7, 3)) `define REPEAT(n,f,s) `_REPEAT_``n(f,s) `define _REPEAT_0(f,s) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 8ea3a61250..04c3d92bf5 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -60,7 +60,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), - .OUT_REG (2), // 2-cycle EB for area reduction + .OUT_REG (2), // 2-cycle LUT EB for area reduction .LUTRAM (1) ) buffer ( .clk (clk), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index e3df0c1fad..bdf8d2cdf4 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -23,7 +23,7 @@ module VX_operands import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter NUM_BANKS = 4, - parameter OUT_BUF = 4 // using 2-cycle EB for area reduction + parameter OUT_BUF = 8+4 // using 2-cycle LUT EB for area reduction ) ( input wire clk, input wire reset, @@ -204,8 +204,7 @@ module VX_operands import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (1) + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 9b3a146c69..056e1c1652 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -290,8 +290,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), .ARBITER ("F"), - .LUTRAM (1), - .OUT_BUF (4) // using 2-cycle EB for area reduction + .OUT_BUF (8+4) // using 2-cycle LUT EB for area reduction ) out_arb ( .clk (clk), .reset (arb_reset), diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index ee6f31b58b..3bfcdeb9cb 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -43,7 +43,8 @@ module VX_elastic_buffer #( end else if (SIZE == 1) begin VX_pipe_buffer #( - .DATAW (DATAW) + .DATAW (DATAW), + .DEPTH (`MAX(OUT_REG, 1)) ) pipe_buffer ( .clk (clk), .reset (reset), @@ -57,16 +58,33 @@ module VX_elastic_buffer #( end else if (SIZE == 2 && LUTRAM == 0) begin - VX_skid_buffer #( + wire valid_out_t; + wire [DATAW-1:0] data_out_t; + wire ready_out_t; + + VX_stream_buffer #( .DATAW (DATAW), - .HALF_BW (OUT_REG == 2), - .OUT_REG (OUT_REG) - ) skid_buffer ( + .OUT_REG (OUT_REG == 1) + ) stream_buffer ( .clk (clk), .reset (reset), .valid_in (valid_in), .data_in (data_in), .ready_in (ready_in), + .valid_out (valid_out_t), + .data_out (data_out_t), + .ready_out (ready_out_t) + ); + + VX_pipe_buffer #( + .DATAW (DATAW), + .DEPTH ((OUT_REG > 1) ? (OUT_REG-1) : 0) + ) out_buf ( + .clk (clk), + .reset (reset), + .valid_in (valid_out_t), + .data_in (data_out_t), + .ready_in (ready_out_t), .valid_out (valid_out), .data_out (data_out), .ready_out (ready_out) @@ -105,7 +123,7 @@ module VX_elastic_buffer #( VX_pipe_buffer #( .DATAW (DATAW), - .DEPTH ((OUT_REG > 0) ? (OUT_REG-1) : 0) + .DEPTH ((OUT_REG > 1) ? (OUT_REG-1) : 0) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 98fed58592..d5157a8dd1 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -21,7 +21,6 @@ module VX_stream_arb #( parameter `STRING ARBITER = "R", parameter MAX_FANOUT = `MAX_FANOUT, parameter OUT_BUF = 0, - parameter LUTRAM = 0, parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS), parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), parameter NUM_REQS_W = `UP(LOG_NUM_REQS) @@ -58,8 +57,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), .reset (slice_reset), @@ -103,8 +101,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3), // registered output - .LUTRAM (LUTRAM) + .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output ) fanout_slice_arb ( .clk (clk), .reset (slice_reset), @@ -130,8 +127,7 @@ module VX_stream_arb #( .DATAW (DATAW + LOG_NUM_REQS2), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) fanout_join_arb ( .clk (clk), .reset (reset), @@ -185,7 +181,7 @@ module VX_stream_arb #( .DATAW (LOG_NUM_REQS + DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), @@ -218,8 +214,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), .reset (slice_reset), @@ -253,8 +248,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3), // registered output - .LUTRAM (LUTRAM) + .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output ) fanout_fork_arb ( .clk (clk), .reset (reset), @@ -281,8 +275,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) fanout_slice_arb ( .clk (clk), .reset (slice_reset), @@ -329,7 +322,7 @@ module VX_stream_arb #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), @@ -357,7 +350,7 @@ module VX_stream_arb #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (out_buf_reset[i]), diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index b7bdcbf5e8..8cdb9ced6d 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -22,7 +22,6 @@ module VX_stream_xbar #( parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS), parameter ARBITER = "R", parameter OUT_BUF = 0, - parameter LUTRAM = 0, parameter MAX_FANOUT = `MAX_FANOUT, parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1) ) ( @@ -67,8 +66,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), .reset (slice_reset), @@ -96,8 +94,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), .reset (reset), @@ -133,7 +130,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (out_buf_reset[i]), @@ -156,7 +153,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), From ed66ee2806726d31a7773d90f059aae405b679e1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 04:09:56 -0700 Subject: [PATCH 022/407] arbitration update --- hw/rtl/cache/VX_cache.sv | 6 +- hw/rtl/core/VX_alu_muldiv.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 4 +- hw/rtl/fpu/VX_fpu_dpi.sv | 2 +- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- hw/rtl/libs/VX_avs_adapter.sv | 2 +- hw/rtl/libs/VX_axi_adapter.sv | 4 +- hw/rtl/libs/VX_fair_arbiter.sv | 66 ----------- hw/rtl/libs/VX_generic_arbiter.sv | 14 --- hw/rtl/libs/VX_rr_arbiter.sv | 183 +++++++++++++++++------------- hw/rtl/mem/VX_local_mem.sv | 2 +- 13 files changed, 116 insertions(+), 175 deletions(-) delete mode 100644 hw/rtl/libs/VX_fair_arbiter.sv diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 1131791bbc..8221c284cb 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -317,7 +317,7 @@ module VX_cache import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (CORE_REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (REQ_XBAR_BUF) ) req_xbar ( .clk (clk), @@ -452,7 +452,7 @@ module VX_cache import VX_gpu_pkg::*; #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), .DATAW (CORE_RSP_DATAW), - .ARBITER ("F") + .ARBITER ("R") ) rsp_xbar ( .clk (clk), .reset (rsp_xbar_reset), @@ -501,7 +501,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), - .ARBITER ("F") + .ARBITER ("R") ) mem_req_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 3beb035f45..650c278336 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -324,7 +324,7 @@ module VX_alu_muldiv #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (1) ) rsp_buf ( .clk (clk), diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 86bcaf05e0..70eab1529a 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -121,7 +121,7 @@ module VX_alu_unit #( .NUM_INPUTS (RSP_ARB_SIZE), .DATAW (RSP_ARB_DATAW), .OUT_BUF (PARTIAL_BW ? 1 : 3), - .ARBITER ("F") + .ARBITER ("R") ) rsp_arb ( .clk (clk), .reset (block_reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index bdf8d2cdf4..5dbb73791f 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -23,7 +23,7 @@ module VX_operands import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter NUM_BANKS = 4, - parameter OUT_BUF = 8+4 // using 2-cycle LUT EB for area reduction + parameter OUT_BUF = 3 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 056e1c1652..df25aff268 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -289,8 +289,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), - .ARBITER ("F"), - .OUT_BUF (8+4) // using 2-cycle LUT EB for area reduction + .ARBITER ("R"), + .OUT_BUF (3) ) out_arb ( .clk (clk), .reset (arb_reset), diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 781b5b88e3..67022e8fd6 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -470,7 +470,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_FPC), .DATAW (RSP_DATAW), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index ad398dcd78..967bbbc29f 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -296,7 +296,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_FPC), .DATAW (RSP_DATAW + 2), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 35d329c7bf..659114c8de 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -199,7 +199,7 @@ module VX_avs_adapter #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), .DATAW (DATA_WIDTH + TAG_WIDTH), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 7fffb9be23..9cd8625606 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -203,11 +203,11 @@ module VX_axi_adapter #( `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)); `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)); end - + VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), .DATAW (DATA_WIDTH + TAG_WIDTH), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv deleted file mode 100644 index 3503ea21ed..0000000000 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_platform.vh" - -`TRACING_OFF -module VX_fair_arbiter #( - parameter NUM_REQS = 1, - parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) -) ( - input wire clk, - input wire reset, - input wire [NUM_REQS-1:0] requests, - output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid, - input wire grant_ready -); - if (NUM_REQS == 1) begin - - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - `UNUSED_VAR (grant_ready) - - assign grant_index = '0; - assign grant_onehot = requests; - assign grant_valid = requests[0]; - - end else begin - - reg [NUM_REQS-1:0] reqs_mask; - - wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; - wire [NUM_REQS-1:0] requests_qual = (| masked_reqs) ? masked_reqs : requests; - - always @(posedge clk) begin - if (reset) begin - reqs_mask <= '1; - end else if (grant_valid && grant_ready) begin - reqs_mask <= (| reqs_mask) ? (reqs_mask & ~grant_onehot) : ~grant_onehot; - end - end - - VX_priority_encoder #( - .N (NUM_REQS) - ) priority_enc ( - .data_in (requests_qual), - .index_out (grant_index), - .onehot_out (grant_onehot), - .valid_out (grant_valid) - ); - - end - -endmodule -`TRACING_ON diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index a1f7be4a0a..a3c4b71ddc 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -56,20 +56,6 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "F") begin - - VX_fair_arbiter #( - .NUM_REQS (NUM_REQS) - ) fair_arbiter ( - .clk (clk), - .reset (reset), - .requests (requests), - .grant_valid (grant_valid), - .grant_index (grant_index), - .grant_onehot (grant_onehot), - .grant_ready (grant_ready) - ); - end else if (TYPE == "M") begin VX_matrix_arbiter #( diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index d9f5b767f2..a222022bf8 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -16,7 +16,7 @@ `TRACING_OFF module VX_rr_arbiter #( parameter NUM_REQS = 1, - parameter MODEL = 1, + parameter MODEL = 2, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS), parameter LUT_OPT = 0 ) ( @@ -41,14 +41,15 @@ module VX_rr_arbiter #( end else if (LUT_OPT && NUM_REQS == 2) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 3'b0_01, - 3'b1_?1: begin grant_onehot_r = 2'b01; grant_index_r = LOG_NUM_REQS'(0); end - default: begin grant_onehot_r = 2'b10; grant_index_r = LOG_NUM_REQS'(1); end + 3'b1_?1: begin grant_index_r = LOG_NUM_REQS'(0); end + 3'b0_1?, + 3'b1_10: begin grant_index_r = LOG_NUM_REQS'(1); end + default: begin grant_index_r = 'x; end endcase end @@ -61,24 +62,26 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, 5'b01_0?1, - 5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end + 5'b10_??1: begin grant_index_r = LOG_NUM_REQS'(0); end 5'b00_?1?, 5'b01_010, - 5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end - default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end + 5'b10_?10: begin grant_index_r = LOG_NUM_REQS'(1); end + 5'b00_10?, + 5'b01_1??, + 5'b10_100: begin grant_index_r = LOG_NUM_REQS'(2); end + default: begin grant_index_r = 'x; end endcase end @@ -91,13 +94,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -105,16 +107,20 @@ module VX_rr_arbiter #( 6'b00_0001, 6'b01_00?1, 6'b10_0??1, - 6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end + 6'b11_???1: begin grant_index_r = LOG_NUM_REQS'(0); end 6'b00_??1?, 6'b01_0010, 6'b10_0?10, - 6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end + 6'b11_??10: begin grant_index_r = LOG_NUM_REQS'(1); end 6'b00_?10?, 6'b01_?1??, 6'b10_0100, - 6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end - default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end + 6'b11_?100: begin grant_index_r = LOG_NUM_REQS'(2); end + 6'b00_100?, + 6'b01_10??, + 6'b10_1???, + 6'b11_1000: begin grant_index_r = LOG_NUM_REQS'(3); end + default: begin grant_index_r = 'x; end endcase end @@ -127,13 +133,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -142,23 +147,28 @@ module VX_rr_arbiter #( 8'b001_000?1, 8'b010_00??1, 8'b011_0???1, - 8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end + 8'b100_????1: begin grant_index_r = LOG_NUM_REQS'(0); end 8'b000_???1?, 8'b001_00010, 8'b010_00?10, 8'b011_0??10, - 8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end + 8'b100_???10: begin grant_index_r = LOG_NUM_REQS'(1); end 8'b000_??10?, 8'b001_??1??, 8'b010_00100, 8'b011_0?100, - 8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end + 8'b100_??100: begin grant_index_r = LOG_NUM_REQS'(2); end 8'b000_?100?, 8'b001_?10??, 8'b010_?1???, 8'b011_01000, - 8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end - default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end + 8'b100_?1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 8'b000_1000?, + 8'b001_100??, + 8'b010_10???, + 8'b011_1????, + 8'b100_10000: begin grant_index_r = LOG_NUM_REQS'(4); end + default: begin grant_index_r = 'x; end endcase end @@ -171,13 +181,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -187,32 +196,38 @@ module VX_rr_arbiter #( 9'b010_000??1, 9'b011_00???1, 9'b100_0????1, - 9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end + 9'b101_?????1: begin grant_index_r = LOG_NUM_REQS'(0); end 9'b000_????1?, 9'b001_000010, 9'b010_000?10, 9'b011_00??10, 9'b100_0???10, - 9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end + 9'b101_????10: begin grant_index_r = LOG_NUM_REQS'(1); end 9'b000_???10?, 9'b001_???1??, 9'b010_000100, 9'b011_00?100, 9'b100_0??100, - 9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end + 9'b101_???100: begin grant_index_r = LOG_NUM_REQS'(2); end 9'b000_??100?, 9'b001_??10??, 9'b010_??1???, 9'b011_001000, 9'b100_0?1000, - 9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end + 9'b101_??1000: begin grant_index_r = LOG_NUM_REQS'(3); end 9'b000_?1000?, 9'b001_?100??, 9'b010_?10???, 9'b011_?1????, 9'b100_010000, - 9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end - default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end + 9'b101_?10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 9'b000_10000?, + 9'b001_1000??, + 9'b010_100???, + 9'b011_10????, + 9'b100_1?????, + 9'b101_100000: begin grant_index_r = LOG_NUM_REQS'(5); end + default: begin grant_index_r = 'x; end endcase end @@ -225,60 +240,66 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 10'b000_000001, - 10'b001_0000?1, - 10'b010_000??1, - 10'b011_00???1, - 10'b100_00???1, - 10'b101_0????1, - 10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end + 10'b000_0000001, + 10'b001_00000?1, + 10'b010_0000??1, + 10'b011_000???1, + 10'b100_000???1, + 10'b101_00????1, + 10'b110_??????1: begin grant_index_r = LOG_NUM_REQS'(0); end 10'b000_?????1?, 10'b001_0000010, 10'b010_0000?10, 10'b011_000??10, 10'b100_00???10, 10'b101_0????10, - 10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end + 10'b110_?????10: begin grant_index_r = LOG_NUM_REQS'(1); end 10'b000_????10?, 10'b001_????1??, 10'b010_0000100, 10'b011_000?100, 10'b100_00??100, 10'b101_0???100, - 10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end + 10'b110_????100: begin grant_index_r = LOG_NUM_REQS'(2); end 10'b000_???100?, 10'b001_???10??, 10'b010_???1???, 10'b011_0001000, 10'b100_00?1000, 10'b101_0??1000, - 10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end + 10'b110_???1000: begin grant_index_r = LOG_NUM_REQS'(3); end 10'b000_??1000?, 10'b001_??100??, 10'b010_??10???, 10'b011_??1????, 10'b100_0010000, 10'b101_0?10000, - 10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end + 10'b110_??10000: begin grant_index_r = LOG_NUM_REQS'(4); end 10'b000_?10000?, 10'b001_?1000??, 10'b010_?100???, 10'b011_?10????, 10'b100_?1?????, 10'b101_0100000, - 10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end - default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end + 10'b110_?100000: begin grant_index_r = LOG_NUM_REQS'(5); end + 10'b000_100000?, + 10'b001_10000??, + 10'b010_1000???, + 10'b011_100????, + 10'b100_10?????, + 10'b101_1??????, + 10'b110_1000000: begin grant_index_r = LOG_NUM_REQS'(6); end + default: begin grant_index_r = 'x; end endcase end @@ -291,13 +312,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -309,7 +329,7 @@ module VX_rr_arbiter #( 11'b100_000????1, 11'b101_00?????1, 11'b110_0??????1, - 11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end + 11'b111_???????1: begin grant_index_r = LOG_NUM_REQS'(0); end 11'b000_??????1?, 11'b001_00000010, 11'b010_00000?10, @@ -317,7 +337,7 @@ module VX_rr_arbiter #( 11'b100_000???10, 11'b101_00????10, 11'b110_0?????10, - 11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end + 11'b111_??????10: begin grant_index_r = LOG_NUM_REQS'(1); end 11'b000_?????10?, 11'b001_?????1??, 11'b010_00000100, @@ -325,7 +345,7 @@ module VX_rr_arbiter #( 11'b100_000??100, 11'b101_00???100, 11'b110_0????100, - 11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end + 11'b111_?????100: begin grant_index_r = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, 11'b010_????1???, @@ -333,7 +353,7 @@ module VX_rr_arbiter #( 11'b100_000?1000, 11'b101_00??1000, 11'b110_0???1000, - 11'b111_????1000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end + 11'b111_????1000: begin grant_index_r = LOG_NUM_REQS'(3); end 11'b000_???1000?, 11'b001_???100??, 11'b010_???10???, @@ -341,7 +361,7 @@ module VX_rr_arbiter #( 11'b100_00010000, 11'b101_00?10000, 11'b110_0??10000, - 11'b111_???10000: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end + 11'b111_???10000: begin grant_index_r = LOG_NUM_REQS'(4); end 11'b000_??10000?, 11'b001_??1000??, 11'b010_??100???, @@ -349,7 +369,7 @@ module VX_rr_arbiter #( 11'b100_??1?????, 11'b101_00100000, 11'b110_0?100000, - 11'b111_??100000: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end + 11'b111_??100000: begin grant_index_r = LOG_NUM_REQS'(5); end 11'b000_?100000?, 11'b001_?10000??, 11'b010_?1000???, @@ -357,8 +377,16 @@ module VX_rr_arbiter #( 11'b100_?10?????, 11'b101_?1??????, 11'b110_01000000, - 11'b111_?1000000: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end - default: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end + 11'b111_?1000000: begin grant_index_r = LOG_NUM_REQS'(6); end + 11'b000_1000000?, + 11'b001_100000??, + 11'b010_10000???, + 11'b011_1000????, + 11'b100_100?????, + 11'b101_10??????, + 11'b110_1???????, + 11'b111_10000000: begin grant_index_r = LOG_NUM_REQS'(7); end + default: begin grant_index_r = 'x; end endcase end @@ -371,7 +399,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (MODEL == 1) begin @@ -393,8 +421,8 @@ module VX_rr_arbiter #( assign unmasked_pri_reqs[i] = unmasked_pri_reqs[i-1] | requests[i-1]; end - wire [NUM_REQS-1:0] grant_masked = masked_reqs & ~masked_pri_reqs[NUM_REQS-1:0]; - wire [NUM_REQS-1:0] grant_unmasked = requests & ~unmasked_pri_reqs[NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_masked = masked_reqs & ~masked_pri_reqs; + wire [NUM_REQS-1:0] grant_unmasked = requests & ~unmasked_pri_reqs; wire has_masked_reqs = (| masked_reqs); wire has_unmasked_reqs = (| requests); @@ -421,41 +449,34 @@ module VX_rr_arbiter #( .valid_out(grant_valid) ); - end else begin + end else if (MODEL == 2) begin - reg grant_valid_r; - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index; + reg [LOG_NUM_REQS-1:0] grant_table [NUM_REQS-1:0]; + reg [LOG_NUM_REQS-1:0] state; - always @(*) begin - grant_index_r = 'x; - grant_onehot_r = 'x; - grant_valid_r = 0; - for (integer i = NUM_REQS-1; i >= 0; --i) begin - if (requests[next_grant_index[i]]) begin - grant_valid_r = 1; - grant_index_r = next_grant_index[i]; - grant_onehot_r = NUM_REQS'(1) << next_grant_index[i]; + for (genvar i = 0; i < NUM_REQS; ++i) begin + always @(*) begin + grant_table[i] = 'x; + for (integer j = NUM_REQS-1; j >= 0; --j) begin + if (requests[(i+j+1) % NUM_REQS]) begin + grant_table[i] = LOG_NUM_REQS'(i+j+1); + end end end end always @(posedge clk) begin if (reset) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - next_grant_index[i] <= LOG_NUM_REQS'(i); - end + state <= 0; end else if (grant_valid && grant_ready) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - next_grant_index[i] <= grant_index_r + LOG_NUM_REQS'(i + 1); - end + state <= grant_index; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; - assign grant_valid = grant_valid_r; + assign grant_index = grant_table[state]; + assign grant_onehot = NUM_REQS'(1) << grant_index; + assign grant_valid = (| requests); + end endmodule diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 5d095b0838..abd44b5648 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -121,7 +121,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), From 9053919e92e36a09a0bb0f3a310c0398fa30e914 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 05:24:46 -0700 Subject: [PATCH 023/407] fixed synthesis warning --- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index a222022bf8..85cf96f9ab 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -451,7 +451,7 @@ module VX_rr_arbiter #( end else if (MODEL == 2) begin - reg [LOG_NUM_REQS-1:0] grant_table [NUM_REQS-1:0]; + reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] grant_table; reg [LOG_NUM_REQS-1:0] state; for (genvar i = 0; i < NUM_REQS; ++i) begin From 79362dea4b7e2e8779224338d1dc8f2ac5308439 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 14:01:11 -0700 Subject: [PATCH 024/407] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 25 ++++++++++--------------- hw/rtl/libs/VX_stream_unpack.sv | 30 +++++++++++++++--------------- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index dbc53336bf..e6ca41e4b0 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -84,8 +84,8 @@ module VX_mem_coalescer #( // tag + mask + offest localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W); - localparam STATE_SETUP = 0; - localparam STATE_SEND = 1; + localparam STATE_WAIT = 0; + localparam STATE_SEND = 1; logic state_r, state_n; @@ -179,11 +179,9 @@ module VX_mem_coalescer #( end end - wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask; - for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin - assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i]; - end - wire batch_completed = ~(| pending_mask); + wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & ~processed_mask_r)); + + wire out_req_fire = out_req_valid && out_req_ready; always @(*) begin state_n = state_r; @@ -201,9 +199,9 @@ module VX_mem_coalescer #( in_req_ready_n = 0; case (state_r) - STATE_SETUP: begin + STATE_WAIT: begin // wait for pending outgoing request to submit - if (out_req_valid && out_req_ready) begin + if (out_req_fire) begin out_req_valid_n = 0; end if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin @@ -220,15 +218,14 @@ module VX_mem_coalescer #( out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - in_req_ready_n = batch_completed; + in_req_ready_n = is_last_batch; - if (batch_completed) begin + if (is_last_batch) begin processed_mask_n = '0; end else begin processed_mask_n = processed_mask_r | current_pmask; end - - state_n = STATE_SETUP; + state_n = STATE_WAIT; end endcase end @@ -347,8 +344,6 @@ module VX_mem_coalescer #( end end - wire out_req_fire = out_req_valid && out_req_ready; - always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index e8b905cdf0..6a6aa0e9e7 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,8 +15,8 @@ `TRACING_OFF module VX_stream_unpack #( - parameter NUM_REQS = 1, - parameter DATA_WIDTH = 1, + parameter NUM_REQS = 1, + parameter DATA_WIDTH = 1, parameter TAG_WIDTH = 1, parameter OUT_BUF = 0 ) ( @@ -31,28 +31,28 @@ module VX_stream_unpack #( output wire ready_in, // output - output wire [NUM_REQS-1:0] valid_out, + output wire [NUM_REQS-1:0] valid_out, output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] data_out, output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag_out, input wire [NUM_REQS-1:0] ready_out ); if (NUM_REQS > 1) begin - reg [NUM_REQS-1:0] sent_mask; + reg [NUM_REQS-1:0] rem_mask; wire [NUM_REQS-1:0] ready_out_r; - wire [NUM_REQS-1:0] sent_mask_n = sent_mask | ready_out_r; - wire sent_all = ~(| (mask_in & ~sent_mask_n)); + wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_r; + wire sent_all = ~(| (mask_in & rem_mask_n)); always @(posedge clk) begin if (reset) begin - sent_mask <= '0; + rem_mask <= '1; end else begin if (valid_in) begin if (sent_all) begin - sent_mask <= '0; + rem_mask <= '1; end else begin - sent_mask <= sent_mask_n; + rem_mask <= rem_mask_n; end end end @@ -68,7 +68,7 @@ module VX_stream_unpack #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in && mask_in[i] && ~sent_mask[i]), + .valid_in (valid_in && mask_in[i] && rem_mask[i]), .ready_in (ready_out_r[i]), .data_in ({data_in[i], tag_in}), .data_out ({data_out[i], tag_out[i]}), @@ -76,13 +76,13 @@ module VX_stream_unpack #( .ready_out (ready_out[i]) ); end - + end else begin - + `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (mask_in) - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; assign tag_out = tag_in; assign ready_in = ready_out; From d74ee43a662fbd94a91d45a9836f90bba2e3f061 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 14:19:09 -0700 Subject: [PATCH 025/407] minor update --- hw/rtl/core/VX_dispatch.sv | 3 +-- hw/rtl/core/VX_lmem_unit.sv | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 04c3d92bf5..dcc15d5e3d 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -60,8 +60,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), - .OUT_REG (2), // 2-cycle LUT EB for area reduction - .LUTRAM (1) + .OUT_REG (1) ) buffer ( .clk (clk), .reset (buffer_reset), diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index d93befda7e..6b53a7d7dd 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -57,7 +57,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (REQ_DATAW), .SIZE (2), - .OUT_REG (1) + .OUT_REG (3) ) req_global_buf ( .clk (clk), .reset (block_reset[i]), From 2edda834c39dd7f0ce393d6c6e964d9025fe00d2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 18:11:21 -0700 Subject: [PATCH 026/407] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 12 ++---------- hw/rtl/libs/VX_stream_unpack.sv | 6 +----- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index e6ca41e4b0..5b646dcb08 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -185,7 +185,6 @@ module VX_mem_coalescer #( always @(*) begin state_n = state_r; - out_req_valid_n = out_req_valid_r; out_req_mask_n = out_req_mask_r; out_req_rw_n = out_req_rw_r; @@ -194,7 +193,6 @@ module VX_mem_coalescer #( out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; - processed_mask_n = processed_mask_r; in_req_ready_n = 0; @@ -209,6 +207,7 @@ module VX_mem_coalescer #( end end default/*STATE_SEND*/: begin + state_n = STATE_WAIT; out_req_valid_n = 1; out_req_mask_n = batch_valid_r; out_req_rw_n = in_req_rw; @@ -217,15 +216,8 @@ module VX_mem_coalescer #( out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - + processed_mask_n= is_last_batch ? '0 (processed_mask_r | current_pmask); in_req_ready_n = is_last_batch; - - if (is_last_batch) begin - processed_mask_n = '0; - end else begin - processed_mask_n = processed_mask_r | current_pmask; - end - state_n = STATE_WAIT; end endcase end diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index 6a6aa0e9e7..c81b300998 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -49,11 +49,7 @@ module VX_stream_unpack #( rem_mask <= '1; end else begin if (valid_in) begin - if (sent_all) begin - rem_mask <= '1; - end else begin - rem_mask <= rem_mask_n; - end + rem_mask <= sent_all ? '1 : rem_mask_n; end end end From 14ae4b8c13ac225d5ee69904169d761996316f36 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 20:07:50 -0700 Subject: [PATCH 027/407] minor update --- hw/rtl/cache/VX_bank_flush.sv | 7 +------ hw/rtl/libs/VX_mem_coalescer.sv | 10 +++------- hw/rtl/libs/VX_priority_encoder.sv | 11 +++++++---- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 6c02c1e135..2d62e354cc 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -114,12 +114,7 @@ module VX_bank_flush #( assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin - reg [NUM_WAYS-1:0] flush_way_r; - always @(*) begin - flush_way_r = '0; - flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1; - end - assign flush_way = flush_way_r; + assign flush_way = NUM_WAYS'(1) << counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; end else begin assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 5b646dcb08..cd6bcb904f 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -168,12 +168,8 @@ module VX_mem_coalescer #( for (integer i = 0; i < OUT_REQS; ++i) begin for (integer j = 0; j < DATA_RATIO; ++j) begin if (current_pmask[i * DATA_RATIO + j]) begin - for (integer k = 0; k < DATA_IN_SIZE; ++k) begin - if (in_req_byteen[DATA_RATIO * i + j][k]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; - end - end + req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_byteen[DATA_RATIO * i + j]; + req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_data[DATA_RATIO * i + j]; end end end @@ -216,7 +212,7 @@ module VX_mem_coalescer #( out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - processed_mask_n= is_last_batch ? '0 (processed_mask_r | current_pmask); + processed_mask_n= is_last_batch ? '0 : (processed_mask_r | current_pmask); in_req_ready_n = is_last_batch; end endcase diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 8bba538b1b..43d7d80ba9 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -73,11 +73,14 @@ module VX_priority_encoder #( end else if (MODEL == 2) begin - `IGNORE_WARNINGS_BEGIN + `IGNORE_UNOPTFLAT_BEGIN wire [N-1:0] higher_pri_regs; - `IGNORE_WARNINGS_END - assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | reversed[N-2:0]; - assign higher_pri_regs[0] = 1'b0; + `IGNORE_UNOPTFLAT_END + + assign higher_pri_regs[0] = 1'b0; + for (genvar i = 1; i < N; ++i) begin + assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; + end assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; VX_lzc #( From 6c1ee9bfea39505bec258b0f705aa4f79fb0dbf9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 20:08:08 -0700 Subject: [PATCH 028/407] arbiter fixes --- hw/rtl/libs/VX_cyclic_arbiter.sv | 31 +++++++++++++++++++------------ hw/rtl/libs/VX_generic_arbiter.sv | 2 ++ hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index c4a42da14b..d721e51305 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -30,6 +30,7 @@ module VX_cyclic_arbiter #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) + `UNUSED_VAR (grant_ready) assign grant_index = '0; assign grant_onehot = requests; @@ -39,29 +40,35 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; + wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; - end else begin - if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin + end else if (grant_valid && grant_ready) begin + if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; - end else if (~grant_valid || grant_ready) begin - grant_index_r <= grant_index_r + LOG_NUM_REQS'(1); + end else begin + grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); end end end - reg [NUM_REQS-1:0] grant_onehot_r; - always @(*) begin - grant_onehot_r = '0; - grant_onehot_r[grant_index_r] = 1'b1; - end + VX_priority_encoder #( + .N (NUM_REQS) + ) priority_encoder ( + .data_in (requests), + `UNUSED_PIN (onehot_out), + .index_out (grant_index_um), + `UNUSED_PIN (valid_out) + ); + + assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; - assign grant_valid = requests[grant_index_r]; + assign grant_index = grant_index_ql; + assign grant_onehot = NUM_REQS'(1) << grant_index_ql; + assign grant_valid = (| requests); end diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index a3c4b71ddc..f55b866f8c 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,5 +90,7 @@ module VX_generic_arbiter #( end + `RUNTIME_ASSERT ((~grant_valid || (requests[grant_index] != 0)), ("invalid arbiter grant!")) + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 85cf96f9ab..8c0fa0558d 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -459,7 +459,7 @@ module VX_rr_arbiter #( grant_table[i] = 'x; for (integer j = NUM_REQS-1; j >= 0; --j) begin if (requests[(i+j+1) % NUM_REQS]) begin - grant_table[i] = LOG_NUM_REQS'(i+j+1); + grant_table[i] = LOG_NUM_REQS'((i+j+1) % NUM_REQS); end end end From 5126a7c472aecbc6be0a472b486aa6e694e9e63a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 21:32:20 -0700 Subject: [PATCH 029/407] minor update --- hw/rtl/VX_platform.vh | 7 +++++ hw/rtl/cache/VX_cache_data.sv | 2 +- hw/rtl/cache/VX_cache_tags.sv | 2 +- hw/rtl/cache/VX_cache_wrap.sv | 52 +++++++++++++++++------------------ hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_core.sv | 4 +-- hw/rtl/core/VX_fpu_unit.sv | 2 +- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/core/VX_lmem_unit.sv | 4 +-- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_schedule.sv | 1 - hw/rtl/core/VX_split_join.sv | 2 +- 12 files changed, 44 insertions(+), 38 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index cd0550efac..730b3cd7d8 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -50,8 +50,15 @@ `define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args `else `ifdef VERILATOR + +`ifndef TRACING_ALL `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ +`else +`define TRACING_ON +`define TRACING_OFF +`endif + `ifndef NDEBUG `define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \ x \ diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index a114e1689f..efc873f411 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -75,7 +75,7 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata; wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; - if (WRITEBACK) begin + if (WRITEBACK) begin : dirty_bytes if (DIRTY_BYTES) begin wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 7fef69be69..6c6ac92f2c 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -100,7 +100,7 @@ module VX_cache_tags #( wire fill_s = fill && (!WRITEBACK || ~stall); wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin + for (genvar i = 0; i < NUM_WAYS; ++i) begin : ways wire do_fill = fill_s && evict_way[i]; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 37940297f6..afae06181c 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -103,7 +103,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) ) mem_bus_cache_if(); - if (NC_OR_BYPASS) begin + if (NC_OR_BYPASS) begin : bypass_if `RESET_RELAY (nc_bypass_reset, reset); @@ -148,31 +148,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if); end - if (PASSTHRU != 0) begin - - for (genvar i = 0; i < NUM_REQS; ++i) begin - `UNUSED_VAR (core_bus_cache_if[i].req_valid) - `UNUSED_VAR (core_bus_cache_if[i].req_data) - assign core_bus_cache_if[i].req_ready = 0; - - assign core_bus_cache_if[i].rsp_valid = 0; - assign core_bus_cache_if[i].rsp_data = '0; - `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) - end - - assign mem_bus_cache_if.req_valid = 0; - assign mem_bus_cache_if.req_data = '0; - `UNUSED_VAR (mem_bus_cache_if.req_ready) - - `UNUSED_VAR (mem_bus_cache_if.rsp_valid) - `UNUSED_VAR (mem_bus_cache_if.rsp_data) - assign mem_bus_cache_if.rsp_ready = 0; - - `ifdef PERF_ENABLE - assign cache_perf = '0; - `endif - - end else begin + if (PASSTHRU == 0) begin : cache_if `RESET_RELAY (cache_reset, reset); @@ -205,6 +181,30 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_if (mem_bus_cache_if) ); + end else begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin + `UNUSED_VAR (core_bus_cache_if[i].req_valid) + `UNUSED_VAR (core_bus_cache_if[i].req_data) + assign core_bus_cache_if[i].req_ready = 0; + + assign core_bus_cache_if[i].rsp_valid = 0; + assign core_bus_cache_if[i].rsp_data = '0; + `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) + end + + assign mem_bus_cache_if.req_valid = 0; + assign mem_bus_cache_if.req_data = '0; + `UNUSED_VAR (mem_bus_cache_if.req_ready) + + `UNUSED_VAR (mem_bus_cache_if.rsp_valid) + `UNUSED_VAR (mem_bus_cache_if.rsp_data) + assign mem_bus_cache_if.rsp_ready = 0; + + `ifdef PERF_ENABLE + assign cache_perf = '0; + `endif + end `ifdef DBG_TRACE_CACHE diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 70eab1529a..72ef74b9c9 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -55,7 +55,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alu_blocks `RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1)); diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 83af50f16c..d8cd804f9a 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -232,7 +232,7 @@ module VX_core import VX_gpu_pkg::*; #( `endif - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks VX_lsu_mem_if #( .NUM_LANES (DCACHE_CHANNELS), @@ -240,7 +240,7 @@ module VX_core import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_coalesced_if(); - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if `RESET_RELAY (mem_coalescer_reset, reset); diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 496b24e295..127ba97555 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -53,7 +53,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpu_blocks `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index e8edf64c78..6f068d45ff 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : ibuf_slices VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 6b53a7d7dd..0b524c5401 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -41,7 +41,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( `RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin @@ -151,7 +151,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index d40f5fcfbf..5e280e48f0 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_blocks `RESET_RELAY (slice_reset, reset); diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 71a74c6ac3..11a62469b9 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -386,7 +386,6 @@ module VX_schedule import VX_gpu_pkg::*; #( `RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT); for (genvar i = 0; i < `NUM_WARPS; ++i) begin - VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 7f887e602c..9f47023b08 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -45,7 +45,7 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_push = valid && split.valid && split.is_dvg; wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_slices `RESET_RELAY (ipdom_reset, reset); From 3ae3afc59be69a04b3c3cfae1c26207cd95a7c29 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 21:34:41 -0700 Subject: [PATCH 030/407] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index cd6bcb904f..9ef462223b 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -168,8 +168,13 @@ module VX_mem_coalescer #( for (integer i = 0; i < OUT_REQS; ++i) begin for (integer j = 0; j < DATA_RATIO; ++j) begin if (current_pmask[i * DATA_RATIO + j]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_byteen[DATA_RATIO * i + j]; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_data[DATA_RATIO * i + j]; + for (integer k = 0; k < DATA_IN_SIZE; ++k) begin + // perform byte-level merge since each thread may have different bytes enabled + if (in_req_byteen[DATA_RATIO * i + j][k]) begin + req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; + end + end end end end From 76f4cd66d3e3d9fe60b13c929c8287738e4605cc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 03:08:48 -0700 Subject: [PATCH 031/407] minor update --- hw/rtl/core/VX_schedule.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 11a62469b9..46fad97be6 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -356,7 +356,9 @@ module VX_schedule import VX_gpu_pkg::*; #( `endif VX_elastic_buffer #( - .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH) + .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH), + .SIZE (2), // need a skid buffer to buffer out schedule_ready + .OUT_REG (1) // should be registered for BRAM acces in fetch unit ) out_buf ( .clk (clk), .reset (reset), From ee39da74b4951d829a7bde787bcd23d2b49be948 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 04:14:02 -0700 Subject: [PATCH 032/407] increasing reset delay --- hw/rtl/VX_config.vh | 2 +- hw/rtl/core/VX_lmem_unit.sv | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 8d1c280fd8..d46c679e9a 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -214,7 +214,7 @@ `endif `define STACK_SIZE (1 << `STACK_LOG2_SIZE) -`define RESET_DELAY 8 +`define RESET_DELAY 16 `ifndef STALL_TIMEOUT `define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED))) diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 0b524c5401..988133cc12 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -37,7 +37,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .NUM_LANES (`NUM_LSU_LANES), .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_switch_if[`NUM_LSU_BLOCKS](); + ) lsu_lmem_if[`NUM_LSU_BLOCKS](); `RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1); @@ -103,17 +103,17 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_in_if[i].req_data.tag }), .ready_in (req_local_ready), - .valid_out (lsu_switch_if[i].req_valid), + .valid_out (lsu_lmem_if[i].req_valid), .data_out ({ - lsu_switch_if[i].req_data.mask, - lsu_switch_if[i].req_data.rw, - lsu_switch_if[i].req_data.byteen, - lsu_switch_if[i].req_data.addr, - lsu_switch_if[i].req_data.flags, - lsu_switch_if[i].req_data.data, - lsu_switch_if[i].req_data.tag + lsu_lmem_if[i].req_data.mask, + lsu_lmem_if[i].req_data.rw, + lsu_lmem_if[i].req_data.byteen, + lsu_lmem_if[i].req_data.addr, + lsu_lmem_if[i].req_data.flags, + lsu_lmem_if[i].req_data.data, + lsu_lmem_if[i].req_data.tag }), - .ready_out (lsu_switch_if[i].req_ready) + .ready_out (lsu_lmem_if[i].req_ready) ); assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) @@ -128,15 +128,15 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (block_reset[i]), .valid_in ({ - lsu_switch_if[i].rsp_valid, + lsu_lmem_if[i].rsp_valid, lsu_mem_out_if[i].rsp_valid }), .ready_in ({ - lsu_switch_if[i].rsp_ready, + lsu_lmem_if[i].rsp_ready, lsu_mem_out_if[i].rsp_ready }), .data_in ({ - lsu_switch_if[i].rsp_data, + lsu_lmem_if[i].rsp_data, lsu_mem_out_if[i].rsp_data }), .data_out (lsu_mem_in_if[i].rsp_data), @@ -168,7 +168,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( ) lsu_adapter ( .clk (clk), .reset (block_reset[i]), - .lsu_mem_if (lsu_switch_if[i]), + .lsu_mem_if (lsu_lmem_if[i]), .mem_bus_if (lmem_bus_tmp_if) ); From d6f1393627daad324c9ebe1e01fa07899bc6b763 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 18:34:06 -0700 Subject: [PATCH 033/407] memory coalescer timing optimization --- hw/rtl/libs/VX_mem_coalescer.sv | 18 ++++++++++-------- hw/rtl/libs/VX_pipe_register.sv | 9 ++++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 9ef462223b..17e5923bdf 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -113,12 +113,13 @@ module VX_mem_coalescer #( logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n; logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; - logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n; + logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx; wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; + for (genvar i = 0; i < NUM_REQS; i++) begin assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W]; assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; @@ -128,7 +129,7 @@ module VX_mem_coalescer #( wire [DATA_RATIO-1:0] batch_mask; wire [DATA_RATIO_W-1:0] batch_idx; - assign batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + assign batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & req_rem_mask_r[i * DATA_RATIO +: DATA_RATIO]; VX_priority_encoder #( .N (DATA_RATIO) @@ -180,7 +181,7 @@ module VX_mem_coalescer #( end end - wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & ~processed_mask_r)); + wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & req_rem_mask_r)); wire out_req_fire = out_req_valid && out_req_ready; @@ -194,7 +195,7 @@ module VX_mem_coalescer #( out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; - processed_mask_n = processed_mask_r; + req_rem_mask_n = req_rem_mask_r; in_req_ready_n = 0; case (state_r) @@ -217,7 +218,7 @@ module VX_mem_coalescer #( out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - processed_mask_n= is_last_batch ? '0 : (processed_mask_r | current_pmask); + req_rem_mask_n = is_last_batch ? '1 : (req_rem_mask_r & ~current_pmask); in_req_ready_n = is_last_batch; end endcase @@ -225,13 +226,14 @@ module VX_mem_coalescer #( VX_pipe_register #( .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), - .RESETW (1 + NUM_REQS + 1) + .RESETW (1 + NUM_REQS + 1), + .INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0}) ) pipe_reg ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), - .data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) + .data_in ({state_n, req_rem_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), + .data_out ({state_r, req_rem_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) ); wire out_rsp_fire = out_rsp_valid && out_rsp_ready; diff --git a/hw/rtl/libs/VX_pipe_register.sv b/hw/rtl/libs/VX_pipe_register.sv index 2c1cddfd64..69184898fb 100644 --- a/hw/rtl/libs/VX_pipe_register.sv +++ b/hw/rtl/libs/VX_pipe_register.sv @@ -17,6 +17,7 @@ module VX_pipe_register #( parameter DATAW = 1, parameter RESETW = 0, + parameter [`UP(RESETW)-1:0] INIT_VALUE = {`UP(RESETW){1'b0}}, parameter DEPTH = 1 ) ( input wire clk, @@ -46,7 +47,7 @@ module VX_pipe_register #( always @(posedge clk) begin if (reset) begin - value <= RESETW'(0); + value <= INIT_VALUE; end else if (enable) begin value <= data_in; end @@ -58,7 +59,7 @@ module VX_pipe_register #( always @(posedge clk) begin if (reset) begin - value_r <= RESETW'(0); + value_r <= INIT_VALUE; end else if (enable) begin value_r <= data_in[DATAW-1:DATAW-RESETW]; end @@ -74,10 +75,12 @@ module VX_pipe_register #( end else begin wire [DEPTH:0][DATAW-1:0] data_delayed; assign data_delayed[0] = data_in; + for (genvar i = 1; i <= DEPTH; ++i) begin VX_pipe_register #( .DATAW (DATAW), - .RESETW (RESETW) + .RESETW (RESETW), + .INIT_VALUE (INIT_VALUE) ) pipe_reg ( .clk (clk), .reset (reset), From aef1411af5e6f028d090852c108dfdd679888b2c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 21:38:33 -0700 Subject: [PATCH 034/407] scoreboard timing optimization --- hw/rtl/core/VX_operands.sv | 55 +++++++++--------- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 105 ++++++++++------------------------- 3 files changed, 58 insertions(+), 104 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 5dbb73791f..3f64caf77e 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -37,15 +37,15 @@ module VX_operands import VX_gpu_pkg::*; #( VX_operands_if.master operands_if ); `UNUSED_SPARAM (INSTANCE_ID) - localparam NUM_SRC_REGS = 3; - localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS); + localparam NUM_SRC_OPDS = 3; + localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_OPDS); localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS; localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH; localparam REGS_DATAW = `XLEN * `NUM_THREADS; - localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW; + localparam DATAW = META_DATAW + NUM_SRC_OPDS * REGS_DATAW; localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS); localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS; localparam XLEN_SIZE = `XLEN / 8; @@ -53,10 +53,10 @@ module VX_operands import VX_gpu_pkg::*; #( `UNUSED_VAR (writeback_if.data.sop) - wire [NUM_SRC_REGS-1:0] src_valid; - wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready; - wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data; - wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; + wire [NUM_SRC_OPDS-1:0] src_valid; + wire [NUM_SRC_OPDS-1:0] req_in_valid, req_in_ready; + wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_in_data; + wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; @@ -68,40 +68,39 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n; - wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; - reg [NUM_SRC_REGS-1:0] data_fetched_n; - wire [NUM_SRC_REGS-1:0] data_fetched_st1; + reg [NUM_SRC_OPDS-1:0] data_fetched_n; + wire [NUM_SRC_OPDS-1:0] data_fetched_st1; reg has_collision_n; wire has_collision_st1; - wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3, - scoreboard_if.data.rs2, - scoreboard_if.data.rs1}; + wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds; + assign src_opds = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1}; - for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin if (ISSUE_WIS != 0) begin - assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; + assign req_in_data[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; end else begin - assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS]; + assign req_in_data[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; end if (NUM_BANKS != 1) begin - assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0]; + assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; end else begin assign req_bank_idx[i] = '0; end end - for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin - assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i]; + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin + assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i]; end - assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid; + assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; VX_stream_xbar #( - .NUM_INPUTS (NUM_SRC_REGS), + .NUM_INPUTS (NUM_SRC_OPDS), .NUM_OUTPUTS (NUM_BANKS), .DATAW (PER_BANK_ADDRW), .ARBITER ("P"), // use priority arbiter @@ -132,8 +131,8 @@ module VX_operands import VX_gpu_pkg::*; #( always @(*) begin has_collision_n = 0; - for (integer i = 0; i < NUM_SRC_REGS; ++i) begin - for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin + for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin + for (integer j = 1; j < (NUM_SRC_OPDS-i); ++j) begin has_collision_n |= src_valid[i] && src_valid[j+i] && (req_bank_idx[i] == req_bank_idx[j+i]); @@ -163,8 +162,8 @@ module VX_operands import VX_gpu_pkg::*; #( }; VX_pipe_register #( - .DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), - .RESETW (1 + NUM_SRC_REGS) + .DATAW (1 + NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), + .RESETW (1 + NUM_SRC_OPDS) ) pipe_reg1 ( .clk (clk), .reset (reset), @@ -182,8 +181,8 @@ module VX_operands import VX_gpu_pkg::*; #( `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW VX_pipe_register #( - .DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (1 + NUM_SRC_REGS * REGS_DATAW) + .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), + .RESETW (1 + NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 46fad97be6..5fe81a7184 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -357,7 +357,7 @@ module VX_schedule import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH), - .SIZE (2), // need a skid buffer to buffer out schedule_ready + .SIZE (2), // need to buffer out ready_in .OUT_REG (1) // should be registered for BRAM acces in fetch unit ) out_buf ( .clk (clk), diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index df25aff268..cd9f3093d7 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -30,6 +30,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_scoreboard_if.master scoreboard_if ); `UNUSED_SPARAM (INSTANCE_ID) + localparam NUM_SRC_OPDS = 3; + localparam NUM_OPDS = NUM_SRC_OPDS + 1; localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1; VX_ibuffer_if staging_if [PER_ISSUE_WARPS](); @@ -100,9 +102,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `endif for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin - VX_elastic_buffer #( - .DATAW (DATAW), - .SIZE (1) + VX_pipe_buffer #( + .DATAW (DATAW) ) stanging_buf ( .clk (clk), .reset (reset), @@ -118,7 +119,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin reg [`NUM_REGS-1:0] inuse_regs; - reg [3:0] operands_busy, operands_busy_n; + reg [NUM_OPDS-1:0] operands_busy, operands_busy_n; wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready; @@ -128,6 +129,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #( && (writeback_if.data.wis == ISSUE_WIS_W'(w)) && writeback_if.data.eop; + wire [NUM_OPDS-1:0][`NR_BITS-1:0] ibuf_opds, stg_opds; + assign ibuf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd}; + assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd}; + `ifdef PERF_ENABLE reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units; reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu; @@ -135,29 +140,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #( always @(*) begin perf_inuse_units_per_cycle[w] = '0; perf_inuse_sfu_per_cycle[w] = '0; - if (staging_if[w].valid) begin - if (operands_busy[0]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1; - if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1; - end - end - if (operands_busy[1]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1; - if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1; - end - end - if (operands_busy[2]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1; - if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1; - end - end - if (operands_busy[3]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1; - if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1; + for (integer i = 0; i < NUM_OPDS; ++i) begin + if (staging_if[w].valid && operands_busy[i]) begin + perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1; + if (inuse_units[stg_opds[i]] == `EX_SFU) begin + perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1; end end end @@ -165,56 +152,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `endif always @(*) begin - operands_busy_n = operands_busy; - if (ibuffer_fire) begin - operands_busy_n = { - inuse_regs[ibuffer_if[w].data.rs3], - inuse_regs[ibuffer_if[w].data.rs2], - inuse_regs[ibuffer_if[w].data.rs1], - inuse_regs[ibuffer_if[w].data.rd] - }; - end - if (writeback_fire) begin + for (integer i = 0; i < NUM_OPDS; ++i) begin + operands_busy_n[i] = operands_busy[i]; if (ibuffer_fire) begin - if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin - operands_busy_n[0] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin - operands_busy_n[1] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin - operands_busy_n[2] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin - operands_busy_n[3] = 0; - end - end else begin - if (writeback_if.data.rd == staging_if[w].data.rd) begin - operands_busy_n[0] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs1) begin - operands_busy_n[1] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs2) begin - operands_busy_n[2] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs3) begin - operands_busy_n[3] = 0; - end + operands_busy_n[i] = inuse_regs[ibuf_opds[i]]; end - end - if (staging_fire && staging_if[w].data.wb) begin - if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin - operands_busy_n[0] = 1; - end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin - operands_busy_n[1] = 1; - end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin - operands_busy_n[2] = 1; + if (writeback_fire) begin + if (ibuffer_fire) begin + if (writeback_if.data.rd == ibuf_opds[i]) begin + operands_busy_n[i] = 0; + end + end else begin + if (writeback_if.data.rd == stg_opds[i]) begin + operands_busy_n[i] = 0; + end + end end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin - operands_busy_n[3] = 1; + if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin + operands_busy_n[i] = 1; end end end @@ -289,7 +244,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), - .ARBITER ("R"), + .ARBITER ("C"), .OUT_BUF (3) ) out_arb ( .clk (clk), From cfb5cd5326af65ff6eb3e551e28c6f6b034d4422 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 21:39:08 -0700 Subject: [PATCH 035/407] arbiter runtime assertion --- hw/rtl/libs/VX_generic_arbiter.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index f55b866f8c..db0173349b 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,7 +90,7 @@ module VX_generic_arbiter #( end - `RUNTIME_ASSERT ((~grant_valid || (requests[grant_index] != 0)), ("invalid arbiter grant!")) + `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("invalid arbiter grant!")) endmodule `TRACING_ON From 58e5435f0fbf569bc4a48c2d156e68db109b1301 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 22:30:54 -0700 Subject: [PATCH 036/407] a priority arbiter performs better than round-robin during commit arbitration --- hw/rtl/core/VX_commit.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index d78c2ec891..ff3039484f 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -58,7 +58,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (`NUM_EX_UNITS), .DATAW (DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (1) ) commit_arb ( .clk (clk), From 9c346dee86798c9d922ef786f44103523b8fcfd7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 01:55:22 -0700 Subject: [PATCH 037/407] read-only cache optimization --- hw/rtl/VX_define.vh | 14 ++++ hw/rtl/cache/VX_cache.sv | 109 ++++++++++++------------------- hw/rtl/cache/VX_cache_cluster.sv | 6 +- hw/rtl/cache/VX_cache_wrap.sv | 15 ++++- 4 files changed, 75 insertions(+), 69 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 4384660165..7d5dbb3424 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -359,6 +359,20 @@ assign src.rsp_data = dst.rsp_data; \ assign dst.rsp_ready = src.rsp_ready +`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \ + assign dst.req_valid = src.req_valid; \ + assign dst.req_data.rw = 0; \ + assign dst.req_data.byteen = '0; \ + assign dst.req_data.addr = src.req_data.addr; \ + assign dst.req_data.flags = src.req_data.flags; \ + assign dst.req_data.data = '0; \ + assign dst.req_data.tag = src.req_data.tag; \ + assign src.req_ready = dst.req_ready; \ + assign src.rsp_valid = dst.rsp_valid; \ + assign src.rsp_data.data = dst.rsp_data.data; \ + assign src.rsp_data.tag = dst.rsp_data.tag; \ + assign dst.rsp_ready = src.rsp_ready + `define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data.rw = src.req_data.rw; \ diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 8221c284cb..ea34beeaa0 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -158,37 +158,6 @@ module VX_cache import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - // Memory request buffering - wire mem_req_valid_s; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire mem_req_rw_s; - wire [LINE_SIZE-1:0] mem_req_byteen_s; - wire [`CS_LINE_WIDTH-1:0] mem_req_data_s; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; - wire mem_req_flush_s; - wire mem_req_ready_s; - - wire mem_bus_if_flush; - - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_valid_s), - .ready_in (mem_req_ready_s), - .data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}), - .data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}), - .valid_out (mem_bus_if.req_valid), - .ready_out (mem_bus_if.req_ready) - ); - - assign mem_bus_if.req_data.flags = mem_bus_if_flush ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; - - /////////////////////////////////////////////////////////////////////////// - // Memory response buffering wire mem_rsp_valid_s; wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s; @@ -471,20 +440,18 @@ module VX_cache import VX_gpu_pkg::*; #( assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; end - /////////////////////////////////////////////////////////////////////////// - - wire mem_req_valid_p; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; - wire mem_req_rw_p; - wire [LINE_SIZE-1:0] mem_req_byteen_p; - wire [`CS_LINE_WIDTH-1:0] mem_req_data_p; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p; - wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p; - wire mem_req_flush_p; - wire mem_req_ready_p; - // Memory request arbitration + wire mem_req_valid; + wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr; + wire mem_req_rw; + wire [LINE_SIZE-1:0] mem_req_byteen; + wire [`CS_LINE_WIDTH-1:0] mem_req_data; + wire [MEM_TAG_WIDTH-1:0] mem_req_tag; + wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; + wire mem_req_flush; + wire mem_req_ready; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin @@ -508,39 +475,49 @@ module VX_cache import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), - .valid_out (mem_req_valid_p), - .ready_out (mem_req_ready_p), + .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, mem_req_id, mem_req_flush}), + .valid_out (mem_req_valid), + .ready_out (mem_req_ready), `UNUSED_PIN (sel_out) ); if (NUM_BANKS > 1) begin - wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p); - assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p}); + wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); + assign mem_req_tag = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id}); end else begin - assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); + assign mem_req_tag = MEM_TAG_WIDTH'(mem_req_id); end - // Memory request multi-port handling + // Memory request buffering - assign mem_req_valid_s = mem_req_valid_p; - assign mem_req_addr_s = mem_req_addr_p; - assign mem_req_tag_s = mem_req_tag_p; - assign mem_req_flush_s = mem_req_flush_p; - assign mem_req_ready_p = mem_req_ready_s; + wire mem_req_flush_b; - if (WRITE_ENABLE != 0) begin - assign mem_req_rw_s = mem_req_rw_p; - assign mem_req_byteen_s = mem_req_byteen_p; - assign mem_req_data_s = mem_req_data_p; - end else begin - `UNUSED_VAR (mem_req_byteen_p) - `UNUSED_VAR (mem_req_data_p) - `UNUSED_VAR (mem_req_rw_p) + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); + + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_valid), + .ready_in (mem_req_ready), + .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), + .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), + .valid_out (mem_bus_tmp_if.req_valid), + .ready_out (mem_bus_tmp_if.req_ready) + ); - assign mem_req_rw_s = 0; - assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; - assign mem_req_data_s = '0; + assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; + + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); + end else begin + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 939768b63a..17b9b45083 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -197,6 +197,10 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .bus_out_if (mem_bus_tmp_if) ); - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); + end else begin + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if[0]); + end endmodule diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index afae06181c..153b68e7de 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -103,6 +103,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) ) mem_bus_cache_if(); + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); + if (NC_OR_BYPASS) begin : bypass_if `RESET_RELAY (nc_bypass_reset, reset); @@ -136,7 +141,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .core_bus_out_if(core_bus_cache_if), .mem_bus_in_if (mem_bus_cache_if), - .mem_bus_out_if (mem_bus_if) + .mem_bus_out_if (mem_bus_tmp_if) ); end else begin @@ -145,7 +150,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); end - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if); + `ASSIGN_VX_MEM_BUS_IF (mem_bus_tmp_if, mem_bus_cache_if); + end + + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); + end else begin + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end if (PASSTHRU == 0) begin : cache_if From 98db24950096a1b3609c003f3a5de810f1b2cc46 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 01:56:31 -0700 Subject: [PATCH 038/407] minor updates --- hw/rtl/cache/VX_cache_bank.sv | 10 +++++----- hw/rtl/cache/VX_cache_bypass.sv | 6 +----- hw/rtl/core/VX_schedule.sv | 10 +--------- hw/rtl/libs/VX_priority_encoder.sv | 5 ++--- 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index dbbb4aba31..3dede22d56 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -266,7 +266,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin - assign req_uuid_sel = 0; + assign req_uuid_sel = '0; end VX_pipe_register #( @@ -283,7 +283,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin - assign req_uuid_st0 = 0; + assign req_uuid_st0 = '0; end wire do_init_st0 = valid_st0 && is_init_st0; @@ -365,7 +365,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin - assign req_uuid_st1 = 0; + assign req_uuid_st1 = '0; end wire is_read_st1 = is_creq_st1 && ~rw_st1; @@ -622,8 +622,8 @@ module VX_cache_bank #( assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1; end else begin assign mreq_queue_rw = 0; - assign mreq_queue_data = 0; - assign mreq_queue_byteen = 0; + assign mreq_queue_data = '0; + assign mreq_queue_byteen = '0; `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 53d847c4e2..f36d542b35 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,11 +268,7 @@ module VX_cache_bypass #( assign rsp_idx = 1'b0; end - reg [NUM_REQS-1:0] rsp_nc_valid_r; - always @(*) begin - rsp_nc_valid_r = '0; - rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc; - end + reg [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 5fe81a7184..4454280c4c 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -374,14 +374,6 @@ module VX_schedule import VX_gpu_pkg::*; #( // Track pending instructions per warp - reg [`NUM_WARPS-1:0] per_warp_incr; - always @(*) begin - per_warp_incr = 0; - if (schedule_if_fire) begin - per_warp_incr[schedule_if.data.wid] = 1; - end - end - wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; @@ -394,7 +386,7 @@ module VX_schedule import VX_gpu_pkg::*; #( ) counter ( .clk (clk), .reset (pending_instr_reset[i]), - .incr (per_warp_incr[i]), + .incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))), .decr (commit_sched_if.committed_warps[i]), .empty (pending_warp_empty[i]), .alm_empty (pending_warp_alm_empty[i]), diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 43d7d80ba9..27465b414d 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -115,9 +115,8 @@ module VX_priority_encoder #( onehot_r = 'x; for (integer i = N-1; i >= 0; --i) begin if (reversed[i]) begin - index_r = LN'(i); - onehot_r = '0; - onehot_r[i] = 1'b1; + index_r = LN'(i); + onehot_r = N'(1) << i; end end end From 2b22d47dd99f52c97bb7fb8e5a86b69c54a02850 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 05:11:19 -0700 Subject: [PATCH 039/407] minor update --- hw/rtl/VX_define.vh | 20 ++++++++++---------- hw/rtl/cache/VX_cache_bypass.sv | 8 ++++---- hw/rtl/core/VX_lmem_unit.sv | 16 ++++++++-------- hw/rtl/core/VX_lsu_adapter.sv | 12 ++++++------ hw/rtl/interfaces/VX_lsu_mem_if.sv | 4 ++-- hw/rtl/mem/VX_mem_bus_if.sv | 4 ++-- 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 7d5dbb3424..8050ad6fcb 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -362,10 +362,10 @@ `define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data.rw = 0; \ - assign dst.req_data.byteen = '0; \ assign dst.req_data.addr = src.req_data.addr; \ - assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.data = '0; \ + assign dst.req_data.byteen = '0; \ + assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.tag = src.req_data.tag; \ assign src.req_ready = dst.req_ready; \ assign src.rsp_valid = dst.rsp_valid; \ @@ -376,10 +376,10 @@ `define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data.rw = src.req_data.rw; \ - assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.addr = src.req_data.addr; \ - assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.data = src.req_data.data; \ + assign dst.req_data.byteen = src.req_data.byteen; \ + assign dst.req_data.flags = src.req_data.flags; \ if (TD != TS) \ assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \ else \ @@ -391,12 +391,12 @@ assign dst.rsp_ready = src.rsp_ready `define ASSIGN_VX_LSU_MEM_IF(dst, src) \ - assign dst.req_valid = src.req_valid; \ - assign dst.req_data = src.req_data; \ - assign src.req_ready = dst.req_ready; \ - assign src.rsp_valid = dst.rsp_valid; \ - assign src.rsp_data = dst.rsp_data; \ - assign dst.rsp_ready = src.rsp_ready + assign dst.req_valid = src.req_valid; \ + assign dst.req_data = src.req_data; \ + assign src.req_ready = dst.req_ready; \ + assign src.rsp_valid = dst.rsp_valid; \ + assign src.rsp_data = dst.rsp_data; \ + assign dst.rsp_ready = src.rsp_ready `define BUFFER_DCR_BUS_IF(dst, src, enable) \ if (enable) begin \ diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index f36d542b35..5c1a123ef0 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -129,20 +129,20 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_req_nc_mux_in[i] = { core_bus_in_if[i].req_data.rw, - core_bus_in_if[i].req_data.byteen, core_bus_in_if[i].req_data.addr, - core_bus_in_if[i].req_data.flags, core_bus_in_if[i].req_data.data, + core_bus_in_if[i].req_data.byteen, + core_bus_in_if[i].req_data.flags, core_bus_in_if[i].req_data.tag }; end assign { core_req_nc_sel_rw, - core_req_nc_sel_byteen, core_req_nc_sel_addr, - core_req_nc_sel_flags, core_req_nc_sel_data, + core_req_nc_sel_byteen, + core_req_nc_sel_flags, core_req_nc_sel_tag } = core_req_nc_mux_in[core_req_nc_idx]; diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 988133cc12..01462dd653 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -65,10 +65,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_in ({ lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, + lsu_mem_in_if[i].req_data.byteen, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.tag }), .ready_in (req_global_ready), @@ -76,10 +76,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_out ({ lsu_mem_out_if[i].req_data.mask, lsu_mem_out_if[i].req_data.rw, - lsu_mem_out_if[i].req_data.byteen, lsu_mem_out_if[i].req_data.addr, - lsu_mem_out_if[i].req_data.flags, lsu_mem_out_if[i].req_data.data, + lsu_mem_out_if[i].req_data.byteen, + lsu_mem_out_if[i].req_data.flags, lsu_mem_out_if[i].req_data.tag }), .ready_out (lsu_mem_out_if[i].req_ready) @@ -96,10 +96,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_in ({ lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, + lsu_mem_in_if[i].req_data.byteen, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.tag }), .ready_in (req_local_ready), @@ -107,10 +107,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_out ({ lsu_lmem_if[i].req_data.mask, lsu_lmem_if[i].req_data.rw, - lsu_lmem_if[i].req_data.byteen, lsu_lmem_if[i].req_data.addr, - lsu_lmem_if[i].req_data.flags, lsu_lmem_if[i].req_data.data, + lsu_lmem_if[i].req_data.byteen, + lsu_lmem_if[i].req_data.flags, lsu_lmem_if[i].req_data.tag }), .ready_out (lsu_lmem_if[i].req_ready) diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/core/VX_lsu_adapter.sv index 48ef231635..8223416926 100644 --- a/hw/rtl/core/VX_lsu_adapter.sv +++ b/hw/rtl/core/VX_lsu_adapter.sv @@ -44,10 +44,10 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin assign req_data_in[i] = { lsu_mem_if.req_data.rw, - lsu_mem_if.req_data.byteen[i], lsu_mem_if.req_data.addr[i], - lsu_mem_if.req_data.flags[i], - lsu_mem_if.req_data.data[i] + lsu_mem_if.req_data.data[i], + lsu_mem_if.req_data.byteen[i], + lsu_mem_if.req_data.flags[i] }; end @@ -55,10 +55,10 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_valid = req_valid_out[i]; assign { mem_bus_if[i].req_data.rw, - mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, - mem_bus_if[i].req_data.flags, - mem_bus_if[i].req_data.data + mem_bus_if[i].req_data.data, + mem_bus_if[i].req_data.byteen, + mem_bus_if[i].req_data.flags } = req_data_out[i]; assign mem_bus_if[i].req_data.tag = req_tag_out[i]; assign req_ready_out[i] = mem_bus_if[i].req_ready; diff --git a/hw/rtl/interfaces/VX_lsu_mem_if.sv b/hw/rtl/interfaces/VX_lsu_mem_if.sv index 4b2c6d4afa..0789bcb134 100644 --- a/hw/rtl/interfaces/VX_lsu_mem_if.sv +++ b/hw/rtl/interfaces/VX_lsu_mem_if.sv @@ -25,10 +25,10 @@ interface VX_lsu_mem_if #( typedef struct packed { logic rw; logic [NUM_LANES-1:0] mask; - logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; logic [NUM_LANES-1:0][ADDR_WIDTH-1:0] addr; - logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags; logic [NUM_LANES-1:0][DATA_SIZE*8-1:0] data; + logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; + logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/mem/VX_mem_bus_if.sv b/hw/rtl/mem/VX_mem_bus_if.sv index 5f341904c6..15f2266902 100644 --- a/hw/rtl/mem/VX_mem_bus_if.sv +++ b/hw/rtl/mem/VX_mem_bus_if.sv @@ -23,10 +23,10 @@ interface VX_mem_bus_if #( typedef struct packed { logic rw; - logic [DATA_SIZE-1:0] byteen; logic [ADDR_WIDTH-1:0] addr; - logic [FLAGS_WIDTH-1:0] flags; logic [DATA_SIZE*8-1:0] data; + logic [DATA_SIZE-1:0] byteen; + logic [FLAGS_WIDTH-1:0] flags; logic [TAG_WIDTH-1:0] tag; } req_data_t; From aaff18cca259f00ed50ccbb69748f6ef33d2b322 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 05:11:51 -0700 Subject: [PATCH 040/407] bug fix --- hw/rtl/cache/VX_cache_bypass.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 5c1a123ef0..b2aeb87911 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,7 +268,7 @@ module VX_cache_bypass #( assign rsp_idx = 1'b0; end - reg [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; + wire [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; From 49738672ec812653cbcdf26df4a536bc7a6500ae Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 19:34:50 -0700 Subject: [PATCH 041/407] minor update --- hw/rtl/VX_cluster.sv | 4 ++-- hw/rtl/VX_socket.sv | 8 ++++---- hw/rtl/Vortex.sv | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 714e69dd43..b9a43f8457 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -102,8 +102,8 @@ module VX_cluster import VX_gpu_pkg::*; #( .WRITEBACK (`L2_WRITEBACK), .DIRTY_BYTES (`L2_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), + .CORE_OUT_BUF (3), + .MEM_OUT_BUF (3), .NC_ENABLE (1), .PASSTHRU (!`L2_ENABLED) ) l2cache ( diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 694edfe9cd..33c29e5150 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -106,7 +106,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITE_ENABLE (0), .NC_ENABLE (0), .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2) + .MEM_OUT_BUF (0) ) icache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.icache), @@ -153,7 +153,7 @@ module VX_socket import VX_gpu_pkg::*; #( .DIRTY_BYTES (`DCACHE_WRITEBACK), .NC_ENABLE (1), .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2) + .MEM_OUT_BUF (0) ) dcache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.dcache), @@ -185,8 +185,8 @@ module VX_socket import VX_gpu_pkg::*; #( .TAG_WIDTH (L1_MEM_TAG_WIDTH), .TAG_SEL_IDX (0), .ARBITER ("R"), - .REQ_OUT_BUF (2), - .RSP_OUT_BUF (2) + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (3) ) mem_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index b496120478..875faf47ea 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -86,8 +86,8 @@ module Vortex import VX_gpu_pkg::*; ( .WRITEBACK (`L3_WRITEBACK), .DIRTY_BYTES (`L3_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), + .CORE_OUT_BUF (3), + .MEM_OUT_BUF (3), .NC_ENABLE (1), .PASSTHRU (!`L3_ENABLED) ) l3cache ( From 65bd9afabb50bb27d642bfe996df8b70a9ec460e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 20:35:07 -0700 Subject: [PATCH 042/407] reset relay cleanup --- hw/rtl/cache/VX_cache.sv | 5 +---- hw/rtl/cache/VX_cache_cluster.sv | 4 +--- hw/rtl/cache/VX_cache_wrap.sv | 10 +++------- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_commit.sv | 4 +--- hw/rtl/core/VX_core.sv | 4 +--- hw/rtl/core/VX_dispatch.sv | 5 +---- hw/rtl/core/VX_dispatch_unit.sv | 6 ++---- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_lmem_unit.sv | 10 ++++------ hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_operands.sv | 20 +++++--------------- hw/rtl/core/VX_schedule.sv | 4 ++-- hw/rtl/core/VX_scoreboard.sv | 4 +--- hw/rtl/core/VX_split_join.sv | 5 +---- hw/rtl/libs/VX_avs_adapter.sv | 10 ++++------ hw/rtl/libs/VX_stream_arb.sv | 20 +++++--------------- hw/rtl/libs/VX_stream_switch.sv | 14 ++++---------- hw/rtl/libs/VX_stream_xbar.sv | 8 ++------ hw/rtl/mem/VX_local_mem.sv | 16 +++++++++------- 20 files changed, 50 insertions(+), 105 deletions(-) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index ea34beeaa0..bc55718509 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -136,17 +136,14 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_REQS; ++i) begin - VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), - .reset (core_rsp_reset[i]), + .reset (reset), .valid_in (core_rsp_valid_s[i]), .ready_in (core_rsp_ready_s[i]), .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 17b9b45083..5e0010a8c8 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -102,8 +102,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - `RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_REQS; ++i) begin VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), @@ -130,7 +128,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0) ) cache_arb ( .clk (clk), - .reset (cache_arb_reset[i]), + .reset (reset), .bus_in_if (core_bus_tmp_if), .bus_out_if (arb_core_bus_tmp_if) ); diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 153b68e7de..3b1076d46f 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -110,8 +110,6 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( if (NC_OR_BYPASS) begin : bypass_if - `RESET_RELAY (nc_bypass_reset, reset); - VX_cache_bypass #( .NUM_REQS (NUM_REQS), .TAG_SEL_IDX (TAG_SEL_IDX), @@ -135,7 +133,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .MEM_OUT_BUF (MEM_OUT_BUF) ) cache_bypass ( .clk (clk), - .reset (nc_bypass_reset), + .reset (reset), .core_bus_in_if (core_bus_if), .core_bus_out_if(core_bus_cache_if), @@ -160,9 +158,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( end if (PASSTHRU == 0) begin : cache_if - - `RESET_RELAY (cache_reset, reset); - + VX_cache #( .INSTANCE_ID (INSTANCE_ID), .CACHE_SIZE (CACHE_SIZE), @@ -184,7 +180,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) ) cache ( .clk (clk), - .reset (cache_reset), + .reset (reset), `ifdef PERF_ENABLE .cache_perf (cache_perf), `endif diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 72ef74b9c9..adbc7898b7 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -57,7 +57,7 @@ module VX_alu_unit #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alu_blocks - `RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1)); + `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV); diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index ff3039484f..7106cc65f0 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -53,8 +53,6 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j]; end - `RESET_RELAY (arb_reset, reset); - VX_stream_arb #( .NUM_INPUTS (`NUM_EX_UNITS), .DATAW (DATAW), @@ -62,7 +60,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( .OUT_BUF (1) ) commit_arb ( .clk (clk), - .reset (arb_reset), + .reset (reset), .valid_in (valid_in), .ready_in (ready_in), .data_in (data_in), diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index d8cd804f9a..35758824d4 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -306,8 +306,6 @@ module VX_core import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_bus_tmp_if[DCACHE_CHANNELS](); - `RESET_RELAY (lsu_adapter_reset, reset); - VX_lsu_adapter #( .NUM_LANES (DCACHE_CHANNELS), .DATA_SIZE (DCACHE_WORD_SIZE), @@ -318,7 +316,7 @@ module VX_core import VX_gpu_pkg::*; #( .RSP_OUT_BUF (0) ) lsu_adapter ( .clk (clk), - .reset (lsu_adapter_reset), + .reset (reset), .lsu_mem_if (dcache_coalesced_if), .mem_bus_if (dcache_bus_tmp_if) ); diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index dcc15d5e3d..0766fd83fd 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -54,16 +54,13 @@ module VX_dispatch import VX_gpu_pkg::*; #( assign operands_if.ready = operands_reset[operands_if.data.ex_type]; for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin - - `RESET_RELAY (buffer_reset, reset); - VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), .OUT_REG (1) ) buffer ( .clk (clk), - .reset (buffer_reset), + .reset (reset), .valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))), .ready_in (operands_reset[i]), .data_in ({ diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 618ea12214..3c84649bd0 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -85,8 +85,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); assign issue_indices[block_idx] = issue_idx; - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - wire valid_p, ready_p; if (`NUM_THREADS != NUM_LANES) begin @@ -102,7 +100,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire fire_eop = fire_p && is_last_p; always @(posedge clk) begin - if (block_reset) begin + if (reset) begin sent_mask_p <= '0; is_first_p <= 1; end else begin @@ -225,7 +223,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) buf_out ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (valid_p), .ready_in (ready_p), .data_in ({ diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index e77a3633a0..5d5af64d94 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -77,7 +77,7 @@ module VX_issue import VX_gpu_pkg::*; #( assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop; `endif - `RESET_RELAY (slice_reset, reset); + `RESET_RELAY_EN (slice_reset, reset, (`ISSUE_WIDTH > 1)); VX_issue_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 01462dd653..e61f626fdc 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -39,8 +39,6 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - `RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; @@ -60,7 +58,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .OUT_REG (3) ) req_global_buf ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), .data_in ({ lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, @@ -91,7 +89,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .OUT_REG (0) ) req_local_buf ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), .data_in ({ lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, @@ -126,7 +124,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .OUT_BUF (1) ) rsp_arb ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .valid_in ({ lsu_lmem_if[i].rsp_valid, lsu_mem_out_if[i].rsp_valid @@ -167,7 +165,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .RSP_OUT_BUF (0) ) lsu_adapter ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .lsu_mem_if (lsu_lmem_if[i]), .mem_bus_if (lmem_bus_tmp_if) ); diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 5e280e48f0..febaec5aa0 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -56,7 +56,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_blocks - `RESET_RELAY (slice_reset, reset); + `RESET_RELAY_EN (slice_reset, reset, (BLOCK_SIZE > 1)); VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 3f64caf77e..62e2bb8831 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -99,6 +99,8 @@ module VX_operands import VX_gpu_pkg::*; #( assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; + `RESET_RELAY (req_xbar_reset, reset); + VX_stream_xbar #( .NUM_INPUTS (NUM_SRC_OPDS), .NUM_OUTPUTS (NUM_BANKS), @@ -108,7 +110,7 @@ module VX_operands import VX_gpu_pkg::*; #( .OUT_BUF (0) // no output buffering ) req_xbar ( .clk (clk), - .reset (reset), + .reset (req_xbar_reset), `UNUSED_PIN(collisions), .valid_in (req_in_valid), .data_in (req_in_data), @@ -247,25 +249,13 @@ module VX_operands import VX_gpu_pkg::*; #( assign gpr_wr_bank_idx = '0; end - `ifdef GPR_RESET - reg wr_enabled = 0; - always @(posedge clk) begin - if (reset) begin - wr_enabled <= 1; - end - end - `else - wire wr_enabled = 1; - `endif - for (genvar b = 0; b < NUM_BANKS; ++b) begin wire gpr_wr_enabled; if (BANK_SEL_BITS != 0) begin - assign gpr_wr_enabled = wr_enabled - && writeback_if.valid + assign gpr_wr_enabled = writeback_if.valid && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); end else begin - assign gpr_wr_enabled = wr_enabled && writeback_if.valid; + assign gpr_wr_enabled = writeback_if.valid; end wire [BYTEENW-1:0] wren; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 4454280c4c..9cdf879eb8 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -377,7 +377,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; - `RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT); + `RESET_RELAY (pending_instr_reset, reset); for (genvar i = 0; i < `NUM_WARPS; ++i) begin VX_pending_size #( @@ -385,7 +385,7 @@ module VX_schedule import VX_gpu_pkg::*; #( .ALM_EMPTY (1) ) counter ( .clk (clk), - .reset (pending_instr_reset[i]), + .reset (pending_instr_reset), .incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))), .decr (commit_sched_if.committed_warps[i]), .empty (pending_warp_empty[i]), diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index cd9f3093d7..503cc22c8f 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -239,8 +239,6 @@ module VX_scoreboard import VX_gpu_pkg::*; #( assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w]; end - `RESET_RELAY (arb_reset, reset); - VX_stream_arb #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), @@ -248,7 +246,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .OUT_BUF (3) ) out_arb ( .clk (clk), - .reset (arb_reset), + .reset (reset), .valid_in (arb_valid_in), .ready_in (arb_ready_in), .data_in (arb_data_in), diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 9f47023b08..4b58ebc265 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -46,15 +46,12 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_slices - - `RESET_RELAY (ipdom_reset, reset); - VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), .DEPTH (`DV_STACK_SIZE) ) ipdom_stack ( .clk (clk), - .reset (ipdom_reset), + .reset (reset), .q0 (ipdom_q0), .q1 (ipdom_q1), .d (ipdom_data[i]), diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 659114c8de..f0941b0285 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -81,15 +81,13 @@ module VX_avs_adapter #( assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end - `RESET_RELAY_EX (bank_reset, reset, NUM_BANKS, 1); - for (genvar i = 0; i < NUM_BANKS; ++i) begin VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .incr (req_queue_push[i]), .decr (req_queue_pop[i]), `UNUSED_PIN (empty), @@ -105,7 +103,7 @@ module VX_avs_adapter #( .DEPTH (RD_QUEUE_SIZE) ) rd_req_queue ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .push (req_queue_push[i]), .pop (req_queue_pop[i]), .data_in (mem_req_tag), @@ -135,7 +133,7 @@ module VX_avs_adapter #( .OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)) ) req_out_buf ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .valid_in (valid_out_w), .ready_in (ready_out_w), .data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}), @@ -177,7 +175,7 @@ module VX_avs_adapter #( .DEPTH (RD_QUEUE_SIZE) ) rd_rsp_queue ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .push (avs_readdatavalid[i]), .pop (req_queue_pop[i]), .data_in (avs_readdata[i]), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index d5157a8dd1..ffb56eb260 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -49,8 +49,6 @@ module VX_stream_arb #( localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (SLICE_SIZE), .NUM_OUTPUTS (1), @@ -60,7 +58,7 @@ module VX_stream_arb #( .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), @@ -92,8 +90,6 @@ module VX_stream_arb #( wire [DATAW-1:0] data_tmp_u; wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u; - `RESET_RELAY (slice_reset, reset); - if (MAX_FANOUT != 1) begin VX_stream_arb #( .NUM_INPUTS (SLICE_SIZE), @@ -104,7 +100,7 @@ module VX_stream_arb #( .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output ) fanout_slice_arb ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), @@ -206,8 +202,6 @@ module VX_stream_arb #( localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (1), .NUM_OUTPUTS (SLICE_SIZE), @@ -217,7 +211,7 @@ module VX_stream_arb #( .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), @@ -267,8 +261,6 @@ module VX_stream_arb #( localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (1), .NUM_OUTPUTS (SLICE_SIZE), @@ -278,7 +270,7 @@ module VX_stream_arb #( .OUT_BUF (OUT_BUF) ) fanout_slice_arb ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_tmp[i]), .ready_in (ready_tmp[i]), .data_in (data_tmp[i]), @@ -342,8 +334,6 @@ module VX_stream_arb #( // #Inputs == #Outputs - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( @@ -353,7 +343,7 @@ module VX_stream_arb #( .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index 3a905cb1dd..c379dd7c0b 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -72,8 +72,6 @@ module VX_stream_switch #( end end - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( .DATAW (DATAW), @@ -81,7 +79,7 @@ module VX_stream_switch #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_out_r[i]), .ready_in (ready_out_r[i]), .data_in (data_out_r[i]), @@ -103,8 +101,6 @@ module VX_stream_switch #( assign ready_in[i] = ready_out_r[i][sel_in[i]]; end - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_INPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin localparam ii = i * NUM_REQS + j; @@ -115,7 +111,7 @@ module VX_stream_switch #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[ii]), + .reset (reset), .valid_in (valid_out_r[i][j]), .ready_in (ready_out_r[i][j]), .data_in (data_in[i]), @@ -124,7 +120,7 @@ module VX_stream_switch #( .ready_out (ready_out[ii]) ); end else begin - `UNUSED_VAR (out_buf_reset[ii]) + `UNUSED_VAR (reset) `UNUSED_VAR (valid_out_r[i][j]) assign ready_out_r[i][j] = '0; end @@ -137,8 +133,6 @@ module VX_stream_switch #( `UNUSED_VAR (sel_in) - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( .DATAW (DATAW), @@ -146,7 +140,7 @@ module VX_stream_switch #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 8cdb9ced6d..b37c9b6760 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -58,8 +58,6 @@ module VX_stream_xbar #( assign valid_in_q[j] = valid_in[j] && (sel_in[j] == i); end - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -69,7 +67,7 @@ module VX_stream_xbar #( .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in_q), .data_in (data_in), .ready_in (per_output_ready_in[i]), @@ -123,8 +121,6 @@ module VX_stream_xbar #( assign data_out_r = {NUM_OUTPUTS{data_in}}; assign ready_in = ready_out_r[sel_in]; - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( .DATAW (DATAW), @@ -133,7 +129,7 @@ module VX_stream_xbar #( .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_out_r[i]), .ready_in (ready_out_r[i]), .data_in (data_out_r[i]), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index abd44b5648..67a6dfc48b 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -116,6 +116,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_ready = req_ready_in[i]; end + `RESET_RELAY (req_xbar_reset, reset); + VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), @@ -125,7 +127,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), - .reset (reset), + .reset (req_xbar_reset), `ifdef PERF_ENABLE .collisions (perf_collisions), `else @@ -163,8 +165,6 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire bank_rsp_valid, bank_rsp_ready; wire [WORD_WIDTH-1:0] bank_rsp_data; - `RESET_RELAY_EN (bram_reset, reset, (NUM_BANKS > 1)); - VX_sp_ram #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), @@ -172,7 +172,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NO_RWCHECK (1) ) data_store ( .clk (clk), - .reset (bram_reset), + .reset (reset), .read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]), .write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]), .wren (per_bank_req_byteen[i]), @@ -185,7 +185,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( reg [BANK_ADDR_WIDTH-1:0] last_wr_addr; reg last_wr_valid; always @(posedge clk) begin - if (bram_reset) begin + if (reset) begin last_wr_valid <= 0; end else begin last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]; @@ -203,7 +203,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH) ) bram_buf ( .clk (clk), - .reset (bram_reset), + .reset (reset), .valid_in (bank_rsp_valid), .ready_in (bank_rsp_ready), .data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}), @@ -225,6 +225,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out; wire [NUM_REQS-1:0] rsp_ready_out; + `RESET_RELAY (rsp_xbar_reset, reset); + VX_stream_xbar #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), @@ -233,7 +235,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (OUT_BUF) ) rsp_xbar ( .clk (clk), - .reset (reset), + .reset (rsp_xbar_reset), `UNUSED_PIN (collisions), .sel_in (per_bank_rsp_idx), .valid_in (per_bank_rsp_valid), From f4983cb380c4b3634cc85f77bd9dcda9979f3e9e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 21:12:28 -0700 Subject: [PATCH 043/407] core memory unit refactoring --- hw/rtl/core/VX_core.sv | 126 ++------------------- hw/rtl/core/VX_lmem_unit.sv | 199 --------------------------------- hw/rtl/core/VX_mem_unit.sv | 217 ++++++++++++++++++++++++++++++++++++ hw/rtl/mem/VX_lmem_demux.sv | 132 ++++++++++++++++++++++ 4 files changed, 356 insertions(+), 318 deletions(-) delete mode 100644 hw/rtl/core/VX_lmem_unit.sv create mode 100644 hw/rtl/core/VX_mem_unit.sv create mode 100644 hw/rtl/mem/VX_lmem_demux.sv diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 35758824d4..65ad65c75b 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -202,132 +202,20 @@ module VX_core import VX_gpu_pkg::*; #( .commit_sched_if(commit_sched_if) ); - VX_lsu_mem_if #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_dcache_if[`NUM_LSU_BLOCKS](); - -`ifdef LMEM_ENABLE - `RESET_RELAY (lmem_unit_reset, reset); - VX_lmem_unit #( + VX_mem_unit #( .INSTANCE_ID (INSTANCE_ID) - ) lmem_unit ( - .clk (clk), - .reset (lmem_unit_reset), + ) mem_unit ( + .clk (clk), + .reset (lmem_unit_reset), `ifdef PERF_ENABLE - .cache_perf (mem_perf_tmp_if.lmem), + .cache_perf (mem_perf_tmp_if.lmem), `endif - .lsu_mem_in_if (lsu_mem_if), - .lsu_mem_out_if (lsu_dcache_if) + .lsu_mem_in_if (lsu_mem_if), + .dcache_bus_if (dcache_bus_if) ); -`else - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); - end - -`endif - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks - - VX_lsu_mem_if #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_coalesced_if(); - - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if - - `RESET_RELAY (mem_coalescer_reset, reset); - - VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), - .NUM_REQS (`NUM_LSU_LANES), - .DATA_IN_SIZE (LSU_WORD_SIZE), - .DATA_OUT_SIZE (DCACHE_WORD_SIZE), - .ADDR_WIDTH (LSU_ADDR_WIDTH), - .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), - .TAG_WIDTH (LSU_TAG_WIDTH), - .UUID_WIDTH (`UUID_WIDTH), - .QUEUE_SIZE (`LSUQ_OUT_SIZE) - ) mem_coalescer ( - .clk (clk), - .reset (mem_coalescer_reset), - - // Input request - .in_req_valid (lsu_dcache_if[i].req_valid), - .in_req_mask (lsu_dcache_if[i].req_data.mask), - .in_req_rw (lsu_dcache_if[i].req_data.rw), - .in_req_byteen (lsu_dcache_if[i].req_data.byteen), - .in_req_addr (lsu_dcache_if[i].req_data.addr), - .in_req_flags (lsu_dcache_if[i].req_data.flags), - .in_req_data (lsu_dcache_if[i].req_data.data), - .in_req_tag (lsu_dcache_if[i].req_data.tag), - .in_req_ready (lsu_dcache_if[i].req_ready), - - // Input response - .in_rsp_valid (lsu_dcache_if[i].rsp_valid), - .in_rsp_mask (lsu_dcache_if[i].rsp_data.mask), - .in_rsp_data (lsu_dcache_if[i].rsp_data.data), - .in_rsp_tag (lsu_dcache_if[i].rsp_data.tag), - .in_rsp_ready (lsu_dcache_if[i].rsp_ready), - - // Output request - .out_req_valid (dcache_coalesced_if.req_valid), - .out_req_mask (dcache_coalesced_if.req_data.mask), - .out_req_rw (dcache_coalesced_if.req_data.rw), - .out_req_byteen (dcache_coalesced_if.req_data.byteen), - .out_req_addr (dcache_coalesced_if.req_data.addr), - .out_req_flags (dcache_coalesced_if.req_data.flags), - .out_req_data (dcache_coalesced_if.req_data.data), - .out_req_tag (dcache_coalesced_if.req_data.tag), - .out_req_ready (dcache_coalesced_if.req_ready), - - // Output response - .out_rsp_valid (dcache_coalesced_if.rsp_valid), - .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), - .out_rsp_data (dcache_coalesced_if.rsp_data.data), - .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), - .out_rsp_ready (dcache_coalesced_if.rsp_ready) - ); - - end else begin - - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); - - end - - VX_mem_bus_if #( - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_bus_tmp_if[DCACHE_CHANNELS](); - - VX_lsu_adapter #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH), - .TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH), - .ARBITER ("P"), - .REQ_OUT_BUF (0), - .RSP_OUT_BUF (0) - ) lsu_adapter ( - .clk (clk), - .reset (reset), - .lsu_mem_if (dcache_coalesced_if), - .mem_bus_if (dcache_bus_tmp_if) - ); - - for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin - `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); - end - - end - - `ifdef PERF_ENABLE wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle; diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv deleted file mode 100644 index e61f626fdc..0000000000 --- a/hw/rtl/core/VX_lmem_unit.sv +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module VX_lmem_unit import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "" -) ( - input wire clk, - input wire reset, - -`ifdef PERF_ENABLE - output cache_perf_t cache_perf, -`endif - - VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], - VX_lsu_mem_if.master lsu_mem_out_if [`NUM_LSU_BLOCKS] -); - `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) - `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) - - localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; - localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; - localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); - - VX_lsu_mem_if #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices - - wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAG_LOCAL]; - end - - wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); - wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask); - - wire req_global_ready; - wire req_local_ready; - - VX_elastic_buffer #( - .DATAW (REQ_DATAW), - .SIZE (2), - .OUT_REG (3) - ) req_global_buf ( - .clk (clk), - .reset (reset), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), - .data_in ({ - lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, - lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.data, - lsu_mem_in_if[i].req_data.byteen, - lsu_mem_in_if[i].req_data.flags, - lsu_mem_in_if[i].req_data.tag - }), - .ready_in (req_global_ready), - .valid_out (lsu_mem_out_if[i].req_valid), - .data_out ({ - lsu_mem_out_if[i].req_data.mask, - lsu_mem_out_if[i].req_data.rw, - lsu_mem_out_if[i].req_data.addr, - lsu_mem_out_if[i].req_data.data, - lsu_mem_out_if[i].req_data.byteen, - lsu_mem_out_if[i].req_data.flags, - lsu_mem_out_if[i].req_data.tag - }), - .ready_out (lsu_mem_out_if[i].req_ready) - ); - - VX_elastic_buffer #( - .DATAW (REQ_DATAW), - .SIZE (0), - .OUT_REG (0) - ) req_local_buf ( - .clk (clk), - .reset (reset), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), - .data_in ({ - lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, - lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.data, - lsu_mem_in_if[i].req_data.byteen, - lsu_mem_in_if[i].req_data.flags, - lsu_mem_in_if[i].req_data.tag - }), - .ready_in (req_local_ready), - .valid_out (lsu_lmem_if[i].req_valid), - .data_out ({ - lsu_lmem_if[i].req_data.mask, - lsu_lmem_if[i].req_data.rw, - lsu_lmem_if[i].req_data.addr, - lsu_lmem_if[i].req_data.data, - lsu_lmem_if[i].req_data.byteen, - lsu_lmem_if[i].req_data.flags, - lsu_lmem_if[i].req_data.tag - }), - .ready_out (lsu_lmem_if[i].req_ready) - ); - - assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) - || (req_local_ready && is_addr_local); - - VX_stream_arb #( - .NUM_INPUTS (2), - .DATAW (RSP_DATAW), - .ARBITER ("R"), - .OUT_BUF (1) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in ({ - lsu_lmem_if[i].rsp_valid, - lsu_mem_out_if[i].rsp_valid - }), - .ready_in ({ - lsu_lmem_if[i].rsp_ready, - lsu_mem_out_if[i].rsp_ready - }), - .data_in ({ - lsu_lmem_if[i].rsp_data, - lsu_mem_out_if[i].rsp_data - }), - .data_out (lsu_mem_in_if[i].rsp_data), - .valid_out (lsu_mem_in_if[i].rsp_valid), - .ready_out (lsu_mem_in_if[i].rsp_ready), - `UNUSED_PIN (sel_out) - ); - end - - VX_mem_bus_if #( - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lmem_bus_if[LSU_NUM_REQS](); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices - VX_mem_bus_if #( - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lmem_bus_tmp_if[`NUM_LSU_LANES](); - - VX_lsu_adapter #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH), - .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), - .ARBITER ("P"), - .REQ_OUT_BUF (3), - .RSP_OUT_BUF (0) - ) lsu_adapter ( - .clk (clk), - .reset (reset), - .lsu_mem_if (lsu_lmem_if[i]), - .mem_bus_if (lmem_bus_tmp_if) - ); - - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); - end - end - - `RESET_RELAY (lmem_reset, reset); - - VX_local_mem #( - .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), - .SIZE (1 << `LMEM_LOG_SIZE), - .NUM_REQS (LSU_NUM_REQS), - .NUM_BANKS (`LMEM_NUM_BANKS), - .WORD_SIZE (LSU_WORD_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), - .UUID_WIDTH (`UUID_WIDTH), - .TAG_WIDTH (LSU_TAG_WIDTH), - .OUT_BUF (3) - ) local_mem ( - .clk (clk), - .reset (lmem_reset), - `ifdef PERF_ENABLE - .cache_perf (cache_perf), - `endif - .mem_bus_if (lmem_bus_if) - ); - -endmodule diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv new file mode 100644 index 0000000000..b960d5ff24 --- /dev/null +++ b/hw/rtl/core/VX_mem_unit.sv @@ -0,0 +1,217 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_mem_unit import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "" +) ( + input wire clk, + input wire reset, + +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], + VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS] +); + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_dcache_if[`NUM_LSU_BLOCKS](); + +`ifdef LMEM_ENABLE + + `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) + `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) + + localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); + + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_lmem_if[`NUM_LSU_BLOCKS](); + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices + VX_lmem_demux #( + .REQ0_OUT_BUF (3), + .REQ1_OUT_BUF (0), + .RSP_OUT_BUF (1) + ) lmem_demux ( + .clk (clk), + .reset (reset), + .lsu_in_if (lsu_mem_in_if[i]), + .cache_out_if (lsu_dcache_if[i]), + .lmem_out_if (lsu_lmem_if[i]) + ); + end + + VX_mem_bus_if #( + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lmem_bus_if[LSU_NUM_REQS](); + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices + VX_mem_bus_if #( + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lmem_bus_tmp_if[`NUM_LSU_LANES](); + + VX_lsu_adapter #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH), + .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), + .ARBITER ("P"), + .REQ_OUT_BUF (3), + .RSP_OUT_BUF (0) + ) lmem_adapter ( + .clk (clk), + .reset (reset), + .lsu_mem_if (lsu_lmem_if[i]), + .mem_bus_if (lmem_bus_tmp_if) + ); + + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin + `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); + end + end + + `RESET_RELAY (lmem_reset, reset); + + VX_local_mem #( + .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), + .SIZE (1 << `LMEM_LOG_SIZE), + .NUM_REQS (LSU_NUM_REQS), + .NUM_BANKS (`LMEM_NUM_BANKS), + .WORD_SIZE (LSU_WORD_SIZE), + .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .UUID_WIDTH (`UUID_WIDTH), + .TAG_WIDTH (LSU_TAG_WIDTH), + .OUT_BUF (3) + ) local_mem ( + .clk (clk), + .reset (lmem_reset), + `ifdef PERF_ENABLE + .cache_perf (cache_perf), + `endif + .mem_bus_if (lmem_bus_if) + ); + +`else + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); + end + +`endif + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks + + VX_lsu_mem_if #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_coalesced_if(); + + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if + + `RESET_RELAY (mem_coalescer_reset, reset); + + VX_mem_coalescer #( + .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), + .NUM_REQS (`NUM_LSU_LANES), + .DATA_IN_SIZE (LSU_WORD_SIZE), + .DATA_OUT_SIZE (DCACHE_WORD_SIZE), + .ADDR_WIDTH (LSU_ADDR_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), + .TAG_WIDTH (LSU_TAG_WIDTH), + .UUID_WIDTH (`UUID_WIDTH), + .QUEUE_SIZE (`LSUQ_OUT_SIZE) + ) mem_coalescer ( + .clk (clk), + .reset (mem_coalescer_reset), + + // Input request + .in_req_valid (lsu_dcache_if[i].req_valid), + .in_req_mask (lsu_dcache_if[i].req_data.mask), + .in_req_rw (lsu_dcache_if[i].req_data.rw), + .in_req_byteen (lsu_dcache_if[i].req_data.byteen), + .in_req_addr (lsu_dcache_if[i].req_data.addr), + .in_req_flags (lsu_dcache_if[i].req_data.flags), + .in_req_data (lsu_dcache_if[i].req_data.data), + .in_req_tag (lsu_dcache_if[i].req_data.tag), + .in_req_ready (lsu_dcache_if[i].req_ready), + + // Input response + .in_rsp_valid (lsu_dcache_if[i].rsp_valid), + .in_rsp_mask (lsu_dcache_if[i].rsp_data.mask), + .in_rsp_data (lsu_dcache_if[i].rsp_data.data), + .in_rsp_tag (lsu_dcache_if[i].rsp_data.tag), + .in_rsp_ready (lsu_dcache_if[i].rsp_ready), + + // Output request + .out_req_valid (dcache_coalesced_if.req_valid), + .out_req_mask (dcache_coalesced_if.req_data.mask), + .out_req_rw (dcache_coalesced_if.req_data.rw), + .out_req_byteen (dcache_coalesced_if.req_data.byteen), + .out_req_addr (dcache_coalesced_if.req_data.addr), + .out_req_flags (dcache_coalesced_if.req_data.flags), + .out_req_data (dcache_coalesced_if.req_data.data), + .out_req_tag (dcache_coalesced_if.req_data.tag), + .out_req_ready (dcache_coalesced_if.req_ready), + + // Output response + .out_rsp_valid (dcache_coalesced_if.rsp_valid), + .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), + .out_rsp_data (dcache_coalesced_if.rsp_data.data), + .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), + .out_rsp_ready (dcache_coalesced_if.rsp_ready) + ); + + end else begin + + `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); + + end + + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_bus_tmp_if[DCACHE_CHANNELS](); + + VX_lsu_adapter #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH), + .TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH), + .ARBITER ("P"), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (0) + ) lsu_adapter ( + .clk (clk), + .reset (reset), + .lsu_mem_if (dcache_coalesced_if), + .mem_bus_if (dcache_bus_tmp_if) + ); + + for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin + `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); + end + + end + +endmodule diff --git a/hw/rtl/mem/VX_lmem_demux.sv b/hw/rtl/mem/VX_lmem_demux.sv new file mode 100644 index 0000000000..47a3912a5b --- /dev/null +++ b/hw/rtl/mem/VX_lmem_demux.sv @@ -0,0 +1,132 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_lmem_demux import VX_gpu_pkg::*; #( + parameter REQ0_OUT_BUF = 0, + parameter REQ1_OUT_BUF = 0, + parameter RSP_OUT_BUF = 0 +) ( + input wire clk, + input wire reset, + VX_lsu_mem_if.slave lsu_in_if, + VX_lsu_mem_if.master cache_out_if, + VX_lsu_mem_if.master lmem_out_if +); + localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + + wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; + for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin + assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL]; + end + + wire is_addr_global = | (lsu_in_if.req_data.mask & ~is_addr_local_mask); + wire is_addr_local = | (lsu_in_if.req_data.mask & is_addr_local_mask); + + wire req_global_ready; + wire req_local_ready; + + VX_elastic_buffer #( + .DATAW (REQ_DATAW), + .SIZE (2), + .OUT_REG (REQ0_OUT_BUF) + ) req_global_buf ( + .clk (clk), + .reset (reset), + .valid_in (lsu_in_if.req_valid && is_addr_global), + .data_in ({ + lsu_in_if.req_data.mask & ~is_addr_local_mask, + lsu_in_if.req_data.rw, + lsu_in_if.req_data.addr, + lsu_in_if.req_data.data, + lsu_in_if.req_data.byteen, + lsu_in_if.req_data.flags, + lsu_in_if.req_data.tag + }), + .ready_in (req_global_ready), + .valid_out (cache_out_if.req_valid), + .data_out ({ + cache_out_if.req_data.mask, + cache_out_if.req_data.rw, + cache_out_if.req_data.addr, + cache_out_if.req_data.data, + cache_out_if.req_data.byteen, + cache_out_if.req_data.flags, + cache_out_if.req_data.tag + }), + .ready_out (cache_out_if.req_ready) + ); + + VX_elastic_buffer #( + .DATAW (REQ_DATAW), + .SIZE (0), + .OUT_REG (REQ1_OUT_BUF) + ) req_local_buf ( + .clk (clk), + .reset (reset), + .valid_in (lsu_in_if.req_valid && is_addr_local), + .data_in ({ + lsu_in_if.req_data.mask & is_addr_local_mask, + lsu_in_if.req_data.rw, + lsu_in_if.req_data.addr, + lsu_in_if.req_data.data, + lsu_in_if.req_data.byteen, + lsu_in_if.req_data.flags, + lsu_in_if.req_data.tag + }), + .ready_in (req_local_ready), + .valid_out (lmem_out_if.req_valid), + .data_out ({ + lmem_out_if.req_data.mask, + lmem_out_if.req_data.rw, + lmem_out_if.req_data.addr, + lmem_out_if.req_data.data, + lmem_out_if.req_data.byteen, + lmem_out_if.req_data.flags, + lmem_out_if.req_data.tag + }), + .ready_out (lmem_out_if.req_ready) + ); + + assign lsu_in_if.req_ready = (req_global_ready && is_addr_global) + || (req_local_ready && is_addr_local); + + VX_stream_arb #( + .NUM_INPUTS (2), + .DATAW (RSP_DATAW), + .ARBITER ("R"), + .OUT_BUF (RSP_OUT_BUF) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in ({ + lmem_out_if.rsp_valid, + cache_out_if.rsp_valid + }), + .ready_in ({ + lmem_out_if.rsp_ready, + cache_out_if.rsp_ready + }), + .data_in ({ + lmem_out_if.rsp_data, + cache_out_if.rsp_data + }), + .data_out (lsu_in_if.rsp_data), + .valid_out (lsu_in_if.rsp_valid), + .ready_out (lsu_in_if.rsp_ready), + `UNUSED_PIN (sel_out) + ); + +endmodule From b83190c6e13bae20adda82c76a123964be6c8332 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 21:29:06 -0700 Subject: [PATCH 044/407] minor update --- hw/rtl/core/VX_core.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 6 ++++-- hw/rtl/mem/VX_local_mem.sv | 18 +++++++++--------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 65ad65c75b..30a774ee5f 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -210,7 +210,7 @@ module VX_core import VX_gpu_pkg::*; #( .clk (clk), .reset (lmem_unit_reset), `ifdef PERF_ENABLE - .cache_perf (mem_perf_tmp_if.lmem), + .lmem_perf (mem_perf_tmp_if.lmem), `endif .lsu_mem_in_if (lsu_mem_if), .dcache_bus_if (dcache_bus_if) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index b960d5ff24..fef21a81f9 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -20,7 +20,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( input wire reset, `ifdef PERF_ENABLE - output cache_perf_t cache_perf, + output cache_perf_t lmem_perf, `endif VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], @@ -106,13 +106,15 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (lmem_reset), `ifdef PERF_ENABLE - .cache_perf (cache_perf), + .lmem_perf (lmem_perf), `endif .mem_bus_if (lmem_bus_if) ); `else + assign lmem_perf = '0; + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 67a6dfc48b..aff058cb96 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -43,7 +43,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( // PERF `ifdef PERF_ENABLE - output cache_perf_t cache_perf, + output cache_perf_t lmem_perf, `endif VX_mem_bus_if.slave mem_bus_if [NUM_REQS] @@ -290,14 +290,14 @@ module VX_local_mem import VX_gpu_pkg::*; #( end end - assign cache_perf.reads = perf_reads; - assign cache_perf.writes = perf_writes; - assign cache_perf.read_misses = '0; - assign cache_perf.write_misses = '0; - assign cache_perf.bank_stalls = perf_collisions; - assign cache_perf.mshr_stalls = '0; - assign cache_perf.mem_stalls = '0; - assign cache_perf.crsp_stalls = perf_crsp_stalls; + assign lmem_perf.reads = perf_reads; + assign lmem_perf.writes = perf_writes; + assign lmem_perf.read_misses = '0; + assign lmem_perf.write_misses = '0; + assign lmem_perf.bank_stalls = perf_collisions; + assign lmem_perf.mshr_stalls = '0; + assign lmem_perf.mem_stalls = '0; + assign lmem_perf.crsp_stalls = perf_crsp_stalls; `endif From d5fa26350c5dfdc5252d67eee8b85df82fe0bae6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 16 Aug 2024 01:35:20 -0700 Subject: [PATCH 045/407] minor update --- hw/rtl/core/VX_mem_unit.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index fef21a81f9..8df2724393 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -113,8 +113,9 @@ module VX_mem_unit import VX_gpu_pkg::*; #( `else +`ifdef PERF_ENABLE assign lmem_perf = '0; - +`endif for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); end From f6ed49f19c92cf66c6d3a32401ea9bb1789d5643 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 16 Aug 2024 08:19:55 -0700 Subject: [PATCH 046/407] minor update --- hw/rtl/cache/VX_cache.sv | 91 +++++++++++---------- hw/rtl/mem/VX_lmem_demux.sv | 10 +-- hw/rtl/{core => mem}/VX_lsu_adapter.sv | 0 hw/rtl/{interfaces => mem}/VX_lsu_mem_if.sv | 0 4 files changed, 51 insertions(+), 50 deletions(-) rename hw/rtl/{core => mem}/VX_lsu_adapter.sv (100%) rename hw/rtl/{interfaces => mem}/VX_lsu_mem_if.sv (100%) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index bc55718509..60493665b4 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -155,7 +155,13 @@ module VX_cache import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); + // Memory response buffering + wire mem_rsp_valid_s; wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; @@ -168,14 +174,51 @@ module VX_cache import VX_gpu_pkg::*; #( ) mem_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (mem_bus_if.rsp_valid), - .ready_in (mem_bus_if.rsp_ready), - .data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}), + .valid_in (mem_bus_tmp_if.rsp_valid), + .ready_in (mem_bus_tmp_if.rsp_ready), + .data_in ({mem_bus_tmp_if.rsp_data.tag, mem_bus_tmp_if.rsp_data.data}), .data_out ({mem_rsp_tag_s, mem_rsp_data_s}), .valid_out (mem_rsp_valid_s), .ready_out (mem_rsp_ready_s) ); + // Memory request buffering + + wire mem_req_valid; + wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr; + wire mem_req_rw; + wire [LINE_SIZE-1:0] mem_req_byteen; + wire [`CS_LINE_WIDTH-1:0] mem_req_data; + wire [MEM_TAG_WIDTH-1:0] mem_req_tag; + wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; + wire mem_req_flush; + wire mem_req_ready; + + wire mem_req_flush_b; + + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_valid), + .ready_in (mem_req_ready), + .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), + .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), + .valid_out (mem_bus_tmp_if.req_valid), + .ready_out (mem_bus_tmp_if.req_ready) + ); + + assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; + + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); + end else begin + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); + end + /////////////////////////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] per_bank_core_req_valid; @@ -439,16 +482,6 @@ module VX_cache import VX_gpu_pkg::*; #( // Memory request arbitration - wire mem_req_valid; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr; - wire mem_req_rw; - wire [LINE_SIZE-1:0] mem_req_byteen; - wire [`CS_LINE_WIDTH-1:0] mem_req_data; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag; - wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; - wire mem_req_flush; - wire mem_req_ready; - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin @@ -485,38 +518,6 @@ module VX_cache import VX_gpu_pkg::*; #( assign mem_req_tag = MEM_TAG_WIDTH'(mem_req_id); end - // Memory request buffering - - wire mem_req_flush_b; - - VX_mem_bus_if #( - .DATA_SIZE (LINE_SIZE), - .TAG_WIDTH (MEM_TAG_WIDTH) - ) mem_bus_tmp_if(); - - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_valid), - .ready_in (mem_req_ready), - .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), - .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), - .valid_out (mem_bus_tmp_if.req_valid), - .ready_out (mem_bus_tmp_if.req_ready) - ); - - assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; - - if (WRITE_ENABLE) begin - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); - end else begin - `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); - end - `ifdef PERF_ENABLE // per cycle: core_reads, core_writes wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; diff --git a/hw/rtl/mem/VX_lmem_demux.sv b/hw/rtl/mem/VX_lmem_demux.sv index 47a3912a5b..b3158ad8ad 100644 --- a/hw/rtl/mem/VX_lmem_demux.sv +++ b/hw/rtl/mem/VX_lmem_demux.sv @@ -28,6 +28,9 @@ module VX_lmem_demux import VX_gpu_pkg::*; #( localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; + wire req_global_ready; + wire req_local_ready; + for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL]; end @@ -35,8 +38,8 @@ module VX_lmem_demux import VX_gpu_pkg::*; #( wire is_addr_global = | (lsu_in_if.req_data.mask & ~is_addr_local_mask); wire is_addr_local = | (lsu_in_if.req_data.mask & is_addr_local_mask); - wire req_global_ready; - wire req_local_ready; + assign lsu_in_if.req_ready = (req_global_ready && is_addr_global) + || (req_local_ready && is_addr_local); VX_elastic_buffer #( .DATAW (REQ_DATAW), @@ -100,9 +103,6 @@ module VX_lmem_demux import VX_gpu_pkg::*; #( .ready_out (lmem_out_if.req_ready) ); - assign lsu_in_if.req_ready = (req_global_ready && is_addr_global) - || (req_local_ready && is_addr_local); - VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/mem/VX_lsu_adapter.sv similarity index 100% rename from hw/rtl/core/VX_lsu_adapter.sv rename to hw/rtl/mem/VX_lsu_adapter.sv diff --git a/hw/rtl/interfaces/VX_lsu_mem_if.sv b/hw/rtl/mem/VX_lsu_mem_if.sv similarity index 100% rename from hw/rtl/interfaces/VX_lsu_mem_if.sv rename to hw/rtl/mem/VX_lsu_mem_if.sv From 304761c6fc12a0651485880babfd61f41b48ff3c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 16 Aug 2024 22:32:35 -0700 Subject: [PATCH 047/407] fixed blackbox temp driver mode with --rebuild=3 --- ci/blackbox.sh | 408 ++++++++++------------------- ci/regression.sh.in | 3 + config.mk.in | 3 - hw/syn/xilinx/test/kernel/Makefile | 6 +- tests/kernel/common.mk | 6 +- tests/opencl/common.mk | 19 +- tests/regression/basic/Makefile | 2 +- tests/regression/common.mk | 21 +- tests/unittest/common.mk | 4 +- 9 files changed, 176 insertions(+), 296 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index fe94677aa2..5c0dfbde15 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +SCRIPT_DIR=$(dirname "$0") +ROOT_DIR=$SCRIPT_DIR/.. + show_usage() { echo "Vortex BlackBox Test Driver v1.0" @@ -29,302 +32,169 @@ show_help() echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp" } -SCRIPT_DIR=$(dirname "$0") -ROOT_DIR=$SCRIPT_DIR/.. - -DRIVER=simx -APP=sgemm -CLUSTERS=1 -CORES=1 -WARPS=4 -THREADS=4 -L2= -L3= -DEBUG=0 -DEBUG_LEVEL=0 -SCOPE=0 -HAS_ARGS=0 -PERF_CLASS=0 -REBUILD=2 -TEMPBUILD=0 -LOGFILE=run.log - -for i in "$@" -do -case $i in - --driver=*) - DRIVER=${i#*=} - shift - ;; - --app=*) - APP=${i#*=} - shift - ;; - --clusters=*) - CLUSTERS=${i#*=} - shift - ;; - --cores=*) - CORES=${i#*=} - shift - ;; - --warps=*) - WARPS=${i#*=} - shift - ;; - --threads=*) - THREADS=${i#*=} - shift - ;; - --l2cache) - L2=-DL2_ENABLE - shift - ;; - --l3cache) - L3=-DL3_ENABLE - shift - ;; - --debug=*) - DEBUG_LEVEL=${i#*=} - DEBUG=1 - shift - ;; - --scope) - SCOPE=1 - CORES=1 - shift - ;; - --perf=*) - PERF_FLAG=-DPERF_ENABLE - PERF_CLASS=${i#*=} - shift - ;; - --args=*) - ARGS=${i#*=} - HAS_ARGS=1 - shift - ;; - --rebuild=*) - REBUILD=${i#*=} - shift - ;; - --log=*) - LOGFILE=${i#*=} - shift - ;; - --help) - show_help - exit 0 - ;; - *) - show_usage - exit -1 - ;; -esac -done - -if [ $REBUILD -eq 3 ]; -then - REBUILD=1 - TEMPBUILD=1 -fi - -case $DRIVER in - gpu) - DRIVER_PATH= - ;; - simx) - DRIVER_PATH=$ROOT_DIR/runtime/simx - ;; - rtlsim) - DRIVER_PATH=$ROOT_DIR/runtime/rtlsim - ;; - opae) - DRIVER_PATH=$ROOT_DIR/runtime/opae - ;; - xrt) - DRIVER_PATH=$ROOT_DIR/runtime/xrt - ;; - *) - echo "invalid driver: $DRIVER" - exit -1 - ;; -esac - -if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; -then - APP_PATH=$ROOT_DIR/tests/opencl/$APP -elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; -then - APP_PATH=$ROOT_DIR/tests/regression/$APP -else - echo "Application folder not found: $APP" - exit -1 -fi - -if [ "$DRIVER" = "gpu" ]; -then - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? +add_option() { + if [ -n "$1" ]; then + echo "$1 $2" else - echo "running: make -C $APP_PATH run-$DRIVER" - make -C $APP_PATH run-$DRIVER - status=$? + echo "$2" fi +} - exit $status -fi - -CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" - -echo "CONFIGS=$CONFIGS" - -if [ $REBUILD -ne 0 ] -then - BLACKBOX_CACHE=blackbox.$DRIVER.cache - if [ -f "$BLACKBOX_CACHE" ] - then - LAST_CONFIGS=`cat $BLACKBOX_CACHE` - fi +DEFAULTS() { + DRIVER=simx + APP=sgemm + DEBUG=0 + DEBUG_LEVEL=0 + SCOPE=0 + HAS_ARGS=0 + PERF_CLASS=0 + CONFIGS="$CONFIGS" + REBUILD=2 + TEMPBUILD=0 + LOGFILE=run.log +} - if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; +parse_args() { + DEFAULTS + for i in "$@"; do + case $i in + --driver=*) DRIVER=${i#*=} ;; + --app=*) APP=${i#*=} ;; + --clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;; + --cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;; + --warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;; + --threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;; + --l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;; + --l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;; + --perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;; + --debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;; + --scope) SCOPE=1; ;; + --args=*) HAS_ARGS=1; ARGS=${i#*=} ;; + --rebuild=*) REBUILD=${i#*=} ;; + --log=*) LOGFILE=${i#*=} ;; + --help) show_help; exit 0 ;; + *) show_usage; exit 1 ;; + esac + done + + if [ $REBUILD -eq 3 ]; then - make -C $DRIVER_PATH clean-driver > /dev/null - echo "$CONFIGS+$DEBUG+$SCOPE" > $BLACKBOX_CACHE + REBUILD=1 + TEMPBUILD=1 fi -fi - -# export performance monitor class identifier -export VORTEX_PROFILING=$PERF_CLASS +} -status=0 +set_driver_path() { + case $DRIVER in + gpu) DRIVER_PATH="" ;; + simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;; + *) echo "Invalid driver: $DRIVER"; exit 1 ;; + esac +} -# ensure config update -make -C $ROOT_DIR/hw config > /dev/null +set_app_path() { + if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then + APP_PATH="$ROOT_DIR/tests/opencl/$APP" + elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then + APP_PATH="$ROOT_DIR/tests/regression/$APP" + else + echo "Application folder not found: $APP" + exit 1 + fi +} -# ensure the stub driver is present -make -C $ROOT_DIR/runtime/stub > /dev/null +build_driver() { + local cmd_opts="" + [ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL") + [ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1") + [ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"") + [ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"") -if [ $DEBUG -ne 0 ] -then - # running application - if [ $TEMPBUILD -eq 1 ] - then - # setup temp directory - TEMPDIR=$(mktemp -d) - mkdir -p "$TEMPDIR/$DRIVER" + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null" + eval "$cmd_opts make -C $DRIVER_PATH > /dev/null" + else + echo "Running: make -C $DRIVER_PATH > /dev/null" + make -C $DRIVER_PATH > /dev/null + fi +} - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi +run_app() { + local cmd_opts="" + [ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1") + [ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"") + [ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"") - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? + if [ $DEBUG -ne 0 ]; then + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" + eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" else - echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? + echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" + make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 fi - - # cleanup temp directory - trap "rm -rf $TEMPDIR" EXIT else - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER" + eval "$cmd_opts make -C $APP_PATH run-$DRIVER" else - echo "running: DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null + echo "Running: make -C $APP_PATH run-$DRIVER" + make -C $APP_PATH run-$DRIVER fi + fi + status=$? + exit $status +} - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - else - echo "running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - fi +main() { + parse_args "$@" + set_driver_path + set_app_path + + # execute on default installed GPU + if [ "$DRIVER" = "gpu" ]; then + run_app + exit $status fi - if [ -f "$APP_PATH/trace.vcd" ] - then - mv -f $APP_PATH/trace.vcd . + if [ -n "$CONFIGS" ]; then + echo "CONFIGS=$CONFIGS" fi -else - if [ $TEMPBUILD -eq 1 ] - then - # setup temp directory - TEMPDIR=$(mktemp -d) - mkdir -p "$TEMPDIR/$DRIVER" - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DESTDIR=$TEMPDIR/$DRIVER SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: DESTDIR=$TEMPDIR/$DRIVER CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi + if [ $REBUILD -ne 0 ]; then + BLACKBOX_CACHE=blackbox.$DRIVER.cache + LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "") - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? - else - echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER" - VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER - status=$? + if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then + make -C $DRIVER_PATH clean-driver > /dev/null + echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE" fi + fi + + export VORTEX_PROFILING=$PERF_CLASS - # cleanup temp directory + make -C "$ROOT_DIR/hw" config > /dev/null + make -C "$ROOT_DIR/runtime/stub" > /dev/null + + if [ $TEMPBUILD -eq 1 ]; then + # setup temp directory + TEMPDIR=$(mktemp -d) + mkdir -p "$TEMPDIR" + # build stub driver + echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub" + DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null + # register tempdir cleanup on exit trap "rm -rf $TEMPDIR" EXIT - else + fi - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: CONFIGS=$CONFIGS make -C $DRIVER_PATH" - CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi + build_driver + run_app - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? - else - echo "running: make -C $APP_PATH run-$DRIVER" - make -C $APP_PATH run-$DRIVER - status=$? - fi + if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then + mv -f $APP_PATH/trace.vcd . fi -fi -exit $status + exit $status +} + +main "$@" \ No newline at end of file diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 3cd46a463b..e0da29e20e 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -105,6 +105,9 @@ regression() ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar" + # test temp driver mode for + ./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3 + echo "regression tests done!" } diff --git a/config.mk.in b/config.mk.in index 81339f195f..12593924ff 100644 --- a/config.mk.in +++ b/config.mk.in @@ -31,7 +31,4 @@ RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) -VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime -VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel - THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party \ No newline at end of file diff --git a/hw/syn/xilinx/test/kernel/Makefile b/hw/syn/xilinx/test/kernel/Makefile index 515533689a..9f3b95c1a1 100644 --- a/hw/syn/xilinx/test/kernel/Makefile +++ b/hw/syn/xilinx/test/kernel/Makefile @@ -19,9 +19,9 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy CFLAGS += -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_HOME)/hw +CFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(VORTEX_HOME)/hw -LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 +LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 PROJECT = kernel @@ -48,4 +48,4 @@ $(PROJECT).elf: $(SRCS) $(CC) $(CFLAGS) -MM $^ > .depend; clean: - rm -rf *.bin *.elf *.hex *.dump *.coe .depend + rm -rf *.bin *.elf *.hex *.dump *.coe .depend diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index e3f6b472b6..050b1b48d2 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -6,6 +6,8 @@ else CFLAGS += -march=rv32imaf -mabi=ilp32f endif +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-divergence=0 @@ -23,13 +25,13 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw +CFLAGS += -I$(VORTEX_HOME)/kernel/include -I$(ROOT_DIR)/hw CFLAGS += -DXLEN_$(XLEN) -DNDEBUG LIBC_LIB += -L$(LIBC_VORTEX)/lib -lm -lc LIBC_LIB += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a -LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(ROOT_DIR)/kernel/libvortex.a $(LIBC_LIB) +LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(VORTEX_KN_PATH)/libvortex.a $(LIBC_LIB) all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 2e287a9449..dd5af90db6 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -15,6 +15,9 @@ STARTUP_ADDR ?= 0x80000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=32 endif +VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + POCL_PATH ?= $(TOOLDIR)/pocl LLVM_POCL ?= $(TOOLDIR)/llvm-vortex @@ -26,14 +29,14 @@ VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a VX_CFLAGS += -O3 -mcmodel=medany --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) VX_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -VX_CFLAGS += -I$(ROOT_DIR)/hw -I$(VORTEX_KN_PATH)/include -DXLEN_$(XLEN) -DNDEBUG +VX_CFLAGS += -I$(ROOT_DIR)/hw -I$(VORTEX_HOME)/kernel/include -DXLEN_$(XLEN) -DNDEBUG VX_CFLAGS += -Xclang -target-feature -Xclang +vortex VX_CFLAGS += -Xclang -target-feature -Xclang +zicond VX_CFLAGS += -mllvm -disable-loop-idiom-all #VX_CFLAGS += -mllvm -vortex-branch-divergence=0 #VX_CFLAGS += -mllvm -print-after-all -VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortex.a $(VX_LIBS) +VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) VX_BINTOOL += OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py @@ -80,7 +83,7 @@ all: $(PROJECT) $(CC) $(CXXFLAGS) -c $< -o $@ $(PROJECT): $(OBJS) - $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -L$(ROOT_DIR)/runtime -lvortex -L$(POCL_PATH)/lib -lOpenCL -o $@ + $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -L$(VORTEX_RT_PATH) -lvortex -L$(POCL_PATH)/lib -lOpenCL -o $@ $(PROJECT).host: $(OBJS) $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -lOpenCL -o $@ @@ -89,19 +92,19 @@ run-gpu: $(PROJECT).host $(KERNEL_SRCS) ./$(PROJECT).host $(OPTS) run-simx: $(PROJECT) $(KERNEL_SRCS) - LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) $(KERNEL_SRCS) - LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) run-opae: $(PROJECT) $(KERNEL_SRCS) - SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/basic/Makefile b/tests/regression/basic/Makefile index a8e86cc179..5940ca65cc 100644 --- a/tests/regression/basic/Makefile +++ b/tests/regression/basic/Makefile @@ -13,7 +13,7 @@ OPTS ?= -n256 include ../common.mk -VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) +VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++ diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 12b45e8486..c4a00bc135 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -5,6 +5,9 @@ TARGET ?= opaesim XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt XRT_DEVICE_INDEX ?= 0 +VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d STARTUP_ADDR ?= 0x180000000 @@ -36,7 +39,7 @@ VX_CP = $(LLVM_VORTEX)/bin/llvm-objcopy #VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy VX_CFLAGS += -O3 -mcmodel=medany -fno-rtti -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -VX_CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw +VX_CFLAGS += -I$(VORTEX_HOME)/kernel/include -I$(ROOT_DIR)/hw VX_CFLAGS += -DXLEN_$(XLEN) VX_CFLAGS += -DNDEBUG @@ -45,12 +48,12 @@ VX_LIBS += -L$(LIBC_VORTEX)/lib -lm -lc VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a #VX_LIBS += -lgcc -VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortex.a $(VX_LIBS) +VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_RT_PATH)/include -I$(ROOT_DIR)/hw +CXXFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(ROOT_DIR)/hw -LDFLAGS += -L$(ROOT_DIR)/runtime -lvortex +LDFLAGS += -L$(VORTEX_RT_PATH) -lvortex # Debugging ifdef DEBUG @@ -86,19 +89,19 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ run-simx: $(PROJECT) kernel.vxbin - LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) kernel.vxbin - LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) run-opae: $(PROJECT) kernel.vxbin - SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/unittest/common.mk b/tests/unittest/common.mk index a6f6b2794c..384a2f02c8 100644 --- a/tests/unittest/common.mk +++ b/tests/unittest/common.mk @@ -1,6 +1,8 @@ +ROOT_DIR := $(realpath ../../..) + CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_RT_PATH)/common +CXXFLAGS += -I$(VORTEX_HOME)/runtime/common # Debugging ifdef DEBUG From 9fc9b433073e54b7d4da1a6cadba8786fbe8e27e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 02:18:04 -0700 Subject: [PATCH 048/407] OPAE runtime bug fix --- runtime/opae/Makefile | 5 +++-- runtime/opae/driver.h | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 1a9810eca8..9650915ea4 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -1,3 +1,4 @@ +ROOT_DIR := $(realpath ../..) include ../common.mk TARGET ?= opaesim @@ -25,9 +26,9 @@ SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp # set up target types ifeq ($(TARGET), opaesim) OPAESIM = $(DESTDIR)/libopae-c-sim.so - CXXFLAGS += -I$(SIM_DIR)/opaesim + CXXFLAGS += -DOPAESIM -I$(SIM_DIR)/opaesim else - CXXFLAGS += -I$(SYN_DIR) + CXXFLAGS += -I$(SYN_DIR) -I$(ROOT_DIR)/hw/syn/altera/opae endif # Debugging diff --git a/runtime/opae/driver.h b/runtime/opae/driver.h index 0d1d4daa77..0a45b6f678 100644 --- a/runtime/opae/driver.h +++ b/runtime/opae/driver.h @@ -13,7 +13,11 @@ #pragma once +#ifdef OPAESIM #include +#else +#include +#endif typedef fpga_result (*pfn_fpgaGetProperties)(fpga_token token, fpga_properties *prop); typedef fpga_result (*pfn_fpgaPropertiesSetObjectType)(fpga_properties prop, fpga_objtype objtype); From 4b6f8efeaa2178627b390ababcc4d7952c219c11 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:07:10 -0700 Subject: [PATCH 049/407] removing trace_pkg to fix unsupported package dependencies --- hw/rtl/VX_gpu_pkg.sv | 376 +++++++++++++++++++++++++++++++ hw/rtl/afu/opae/vortex_afu.sv | 3 - hw/rtl/core/VX_commit.sv | 2 +- hw/rtl/core/VX_dcr_data.sv | 2 +- hw/rtl/core/VX_decode.sv | 2 +- hw/rtl/core/VX_issue_slice.sv | 2 +- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/core/VX_trace_pkg.sv | 399 --------------------------------- hw/unittest/core_top/Makefile | 2 +- hw/unittest/issue_top/Makefile | 2 +- sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/xrtsim/Makefile | 2 +- 13 files changed, 386 insertions(+), 412 deletions(-) delete mode 100644 hw/rtl/core/VX_trace_pkg.sv diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 393f2a66f2..f290678559 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -308,6 +308,382 @@ package VX_gpu_pkg; `IGNORE_UNUSED_END +////////////////////////////////// Tracing //////////////////////////////////// + +`ifdef SIMULATION + +`ifdef SV_DPI + import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/); +`endif + + task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); + case (ex_type) + `EX_ALU: `TRACE(level, ("ALU")); + `EX_LSU: `TRACE(level, ("LSU")); + `EX_FPU: `TRACE(level, ("FPU")); + `EX_SFU: `TRACE(level, ("SFU")); + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_ex_op(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + case (op_args.alu.xtype) + `ALU_TYPE_ARITH: begin + if (op_args.alu.is_w) begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDIW")); + `INST_ALU_SLL: `TRACE(level, ("SLLIW")); + `INST_ALU_SRL: `TRACE(level, ("SRLIW")); + `INST_ALU_SRA: `TRACE(level, ("SRAIW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDW")); + `INST_ALU_SUB: `TRACE(level, ("SUBW")); + `INST_ALU_SLL: `TRACE(level, ("SLLW")); + `INST_ALU_SRL: `TRACE(level, ("SRLW")); + `INST_ALU_SRA: `TRACE(level, ("SRAW")); + default: `TRACE(level, ("?")); + endcase + end + end else begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDI")); + `INST_ALU_SLL: `TRACE(level, ("SLLI")); + `INST_ALU_SRL: `TRACE(level, ("SRLI")); + `INST_ALU_SRA: `TRACE(level, ("SRAI")); + `INST_ALU_SLT: `TRACE(level, ("SLTI")); + `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); + `INST_ALU_XOR: `TRACE(level, ("XORI")); + `INST_ALU_OR: `TRACE(level, ("ORI")); + `INST_ALU_AND: `TRACE(level, ("ANDI")); + `INST_ALU_LUI: `TRACE(level, ("LUI")); + `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADD")); + `INST_ALU_SUB: `TRACE(level, ("SUB")); + `INST_ALU_SLL: `TRACE(level, ("SLL")); + `INST_ALU_SRL: `TRACE(level, ("SRL")); + `INST_ALU_SRA: `TRACE(level, ("SRA")); + `INST_ALU_SLT: `TRACE(level, ("SLT")); + `INST_ALU_SLTU: `TRACE(level, ("SLTU")); + `INST_ALU_XOR: `TRACE(level, ("XOR")); + `INST_ALU_OR: `TRACE(level, ("OR")); + `INST_ALU_AND: `TRACE(level, ("AND")); + `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); + `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); + default: `TRACE(level, ("?")); + endcase + end + end + end + `ALU_TYPE_BRANCH: begin + case (`INST_BR_BITS'(op_type)) + `INST_BR_EQ: `TRACE(level, ("BEQ")); + `INST_BR_NE: `TRACE(level, ("BNE")); + `INST_BR_LT: `TRACE(level, ("BLT")); + `INST_BR_GE: `TRACE(level, ("BGE")); + `INST_BR_LTU: `TRACE(level, ("BLTU")); + `INST_BR_GEU: `TRACE(level, ("BGEU")); + `INST_BR_JAL: `TRACE(level, ("JAL")); + `INST_BR_JALR: `TRACE(level, ("JALR")); + `INST_BR_ECALL: `TRACE(level, ("ECALL")); + `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); + `INST_BR_URET: `TRACE(level, ("URET")); + `INST_BR_SRET: `TRACE(level, ("SRET")); + `INST_BR_MRET: `TRACE(level, ("MRET")); + default: `TRACE(level, ("?")); + endcase + end + `ALU_TYPE_MULDIV: begin + if (op_args.alu.is_w) begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MULW")); + `INST_M_DIV: `TRACE(level, ("DIVW")); + `INST_M_DIVU: `TRACE(level, ("DIVUW")); + `INST_M_REM: `TRACE(level, ("REMW")); + `INST_M_REMU: `TRACE(level, ("REMUW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MUL")); + `INST_M_MULH: `TRACE(level, ("MULH")); + `INST_M_MULHSU:`TRACE(level, ("MULHSU")); + `INST_M_MULHU: `TRACE(level, ("MULHU")); + `INST_M_DIV: `TRACE(level, ("DIV")); + `INST_M_DIVU: `TRACE(level, ("DIVU")); + `INST_M_REM: `TRACE(level, ("REM")); + `INST_M_REMU: `TRACE(level, ("REMU")); + default: `TRACE(level, ("?")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_LSU: begin + if (op_args.lsu.is_float) begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LW: `TRACE(level, ("FLW")); + `INST_LSU_LD: `TRACE(level, ("FLD")); + `INST_LSU_SW: `TRACE(level, ("FSW")); + `INST_LSU_SD: `TRACE(level, ("FSD")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LB: `TRACE(level, ("LB")); + `INST_LSU_LH: `TRACE(level, ("LH")); + `INST_LSU_LW: `TRACE(level, ("LW")); + `INST_LSU_LD: `TRACE(level, ("LD")); + `INST_LSU_LBU:`TRACE(level, ("LBU")); + `INST_LSU_LHU:`TRACE(level, ("LHU")); + `INST_LSU_LWU:`TRACE(level, ("LWU")); + `INST_LSU_SB: `TRACE(level, ("SB")); + `INST_LSU_SH: `TRACE(level, ("SH")); + `INST_LSU_SW: `TRACE(level, ("SW")); + `INST_LSU_SD: `TRACE(level, ("SD")); + `INST_LSU_FENCE:`TRACE(level,("FENCE")); + default: `TRACE(level, ("?")); + endcase + end + end + `EX_FPU: begin + case (`INST_FPU_BITS'(op_type)) + `INST_FPU_ADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FADD.D")); + else + `TRACE(level, ("FADD.S")); + end + `INST_FPU_SUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSUB.D")); + else + `TRACE(level, ("FSUB.S")); + end + `INST_FPU_MUL: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMUL.D")); + else + `TRACE(level, ("FMUL.S")); + end + `INST_FPU_DIV: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FDIV.D")); + else + `TRACE(level, ("FDIV.S")); + end + `INST_FPU_SQRT: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSQRT.D")); + else + `TRACE(level, ("FSQRT.S")); + end + `INST_FPU_MADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMADD.D")); + else + `TRACE(level, ("FMADD.S")); + end + `INST_FPU_MSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMSUB.D")); + else + `TRACE(level, ("FMSUB.S")); + end + `INST_FPU_NMADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMADD.D")); + else + `TRACE(level, ("FNMADD.S")); + end + `INST_FPU_NMSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMSUB.D")); + else + `TRACE(level, ("FNMSUB.S")); + end + `INST_FPU_CMP: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.D")); + 1: `TRACE(level, ("FLT.D")); + 2: `TRACE(level, ("FEQ.D")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.S")); + 1: `TRACE(level, ("FLT.S")); + 2: `TRACE(level, ("FEQ.S")); + default: `TRACE(level, ("?")); + endcase + end + end + `INST_FPU_F2F: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FCVT.D.S")); + end else begin + `TRACE(level, ("FCVT.S.D")); + end + end + `INST_FPU_F2I: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.D")); + end else begin + `TRACE(level, ("FCVT.W.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.S")); + end else begin + `TRACE(level, ("FCVT.W.S")); + end + end + end + `INST_FPU_F2U: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.D")); + end else begin + `TRACE(level, ("FCVT.WU.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.S")); + end else begin + `TRACE(level, ("FCVT.WU.S")); + end + end + end + `INST_FPU_I2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.L")); + end else begin + `TRACE(level, ("FCVT.D.W")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.L")); + end else begin + `TRACE(level, ("FCVT.S.W")); + end + end + end + `INST_FPU_U2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.LU")); + end else begin + `TRACE(level, ("FCVT.D.WU")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.LU")); + end else begin + `TRACE(level, ("FCVT.S.WU")); + end + end + end + `INST_FPU_MISC: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.D")); + 1: `TRACE(level, ("FSGNJN.D")); + 2: `TRACE(level, ("FSGNJX.D")); + 3: `TRACE(level, ("FCLASS.D")); + 4: `TRACE(level, ("FMV.X.D")); + 5: `TRACE(level, ("FMV.D.X")); + 6: `TRACE(level, ("FMIN.D")); + 7: `TRACE(level, ("FMAX.D")); + endcase + end else begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.S")); + 1: `TRACE(level, ("FSGNJN.S")); + 2: `TRACE(level, ("FSGNJX.S")); + 3: `TRACE(level, ("FCLASS.S")); + 4: `TRACE(level, ("FMV.X.S")); + 5: `TRACE(level, ("FMV.S.X")); + 6: `TRACE(level, ("FMIN.S")); + 7: `TRACE(level, ("FMAX.S")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_SFU: begin + case (`INST_SFU_BITS'(op_type)) + `INST_SFU_TMC: `TRACE(level, ("TMC")); + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")); + `INST_SFU_BAR: `TRACE(level, ("BAR")); + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end + default: `TRACE(level, ("?")); + endcase + end + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_op_args(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); + end + `EX_LSU: begin + `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); + end + `EX_FPU: begin + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + end + `EX_SFU: begin + if (`INST_SFU_IS_CSR(op_type)) begin + `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); + end + end + default:; + endcase + endtask + + task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); + case (addr) + `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); + `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); + `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); + `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); + `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); + default: `TRACE(level, ("?")); + endcase + endtask + +`endif + endpackage `endif // VX_GPU_PKG_VH diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index b67cae3a59..cb5725e783 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -518,7 +518,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); assign cci_vx_mem_bus_if[1].req_data.flags = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.flags) //-- @@ -571,7 +570,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); assign cci_vx_mem_bus_if[0].req_data.flags = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.flags) //-- VX_mem_bus_if #( @@ -639,7 +637,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .avs_readdatavalid(avs_readdatavalid) ); - assign mem_bus_if[0].req_data.flags = '0; `UNUSED_VAR (mem_bus_if[0].req_data.flags) // CCI-P Read Request /////////////////////////////////////////////////////////// diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index 7106cc65f0..f945c79039 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_commit import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 4ac137547c..b20d95fc7b 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; ( +module VX_dcr_data import VX_gpu_pkg::*; ( input wire clk, input wire reset, diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 9660859ce7..4f6ffe100f 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -27,7 +27,7 @@ use_``x = 1 `endif -module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_decode import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 03b91b5fe8..4b4e168a29 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_issue_slice import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter ISSUE_ID = 0 ) ( diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 6de9011821..f83b23fb3c 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_lsu_slice import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL diff --git a/hw/rtl/core/VX_trace_pkg.sv b/hw/rtl/core/VX_trace_pkg.sv deleted file mode 100644 index b4eae96fed..0000000000 --- a/hw/rtl/core/VX_trace_pkg.sv +++ /dev/null @@ -1,399 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`ifndef VX_TRACE_PKG_VH -`define VX_TRACE_PKG_VH - -`include "VX_define.vh" - -package VX_trace_pkg; - -`ifdef SIMULATION - -`ifdef SV_DPI - import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/); -`endif - - import VX_gpu_pkg::*; - - task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); - case (ex_type) - `EX_ALU: `TRACE(level, ("ALU")); - `EX_LSU: `TRACE(level, ("LSU")); - `EX_FPU: `TRACE(level, ("FPU")); - `EX_SFU: `TRACE(level, ("SFU")); - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_ex_op(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - case (op_args.alu.xtype) - `ALU_TYPE_ARITH: begin - if (op_args.alu.is_w) begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDIW")); - `INST_ALU_SLL: `TRACE(level, ("SLLIW")); - `INST_ALU_SRL: `TRACE(level, ("SRLIW")); - `INST_ALU_SRA: `TRACE(level, ("SRAIW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDW")); - `INST_ALU_SUB: `TRACE(level, ("SUBW")); - `INST_ALU_SLL: `TRACE(level, ("SLLW")); - `INST_ALU_SRL: `TRACE(level, ("SRLW")); - `INST_ALU_SRA: `TRACE(level, ("SRAW")); - default: `TRACE(level, ("?")); - endcase - end - end else begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDI")); - `INST_ALU_SLL: `TRACE(level, ("SLLI")); - `INST_ALU_SRL: `TRACE(level, ("SRLI")); - `INST_ALU_SRA: `TRACE(level, ("SRAI")); - `INST_ALU_SLT: `TRACE(level, ("SLTI")); - `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); - `INST_ALU_XOR: `TRACE(level, ("XORI")); - `INST_ALU_OR: `TRACE(level, ("ORI")); - `INST_ALU_AND: `TRACE(level, ("ANDI")); - `INST_ALU_LUI: `TRACE(level, ("LUI")); - `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADD")); - `INST_ALU_SUB: `TRACE(level, ("SUB")); - `INST_ALU_SLL: `TRACE(level, ("SLL")); - `INST_ALU_SRL: `TRACE(level, ("SRL")); - `INST_ALU_SRA: `TRACE(level, ("SRA")); - `INST_ALU_SLT: `TRACE(level, ("SLT")); - `INST_ALU_SLTU: `TRACE(level, ("SLTU")); - `INST_ALU_XOR: `TRACE(level, ("XOR")); - `INST_ALU_OR: `TRACE(level, ("OR")); - `INST_ALU_AND: `TRACE(level, ("AND")); - `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); - `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); - default: `TRACE(level, ("?")); - endcase - end - end - end - `ALU_TYPE_BRANCH: begin - case (`INST_BR_BITS'(op_type)) - `INST_BR_EQ: `TRACE(level, ("BEQ")); - `INST_BR_NE: `TRACE(level, ("BNE")); - `INST_BR_LT: `TRACE(level, ("BLT")); - `INST_BR_GE: `TRACE(level, ("BGE")); - `INST_BR_LTU: `TRACE(level, ("BLTU")); - `INST_BR_GEU: `TRACE(level, ("BGEU")); - `INST_BR_JAL: `TRACE(level, ("JAL")); - `INST_BR_JALR: `TRACE(level, ("JALR")); - `INST_BR_ECALL: `TRACE(level, ("ECALL")); - `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); - `INST_BR_URET: `TRACE(level, ("URET")); - `INST_BR_SRET: `TRACE(level, ("SRET")); - `INST_BR_MRET: `TRACE(level, ("MRET")); - default: `TRACE(level, ("?")); - endcase - end - `ALU_TYPE_MULDIV: begin - if (op_args.alu.is_w) begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MULW")); - `INST_M_DIV: `TRACE(level, ("DIVW")); - `INST_M_DIVU: `TRACE(level, ("DIVUW")); - `INST_M_REM: `TRACE(level, ("REMW")); - `INST_M_REMU: `TRACE(level, ("REMUW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MUL")); - `INST_M_MULH: `TRACE(level, ("MULH")); - `INST_M_MULHSU:`TRACE(level, ("MULHSU")); - `INST_M_MULHU: `TRACE(level, ("MULHU")); - `INST_M_DIV: `TRACE(level, ("DIV")); - `INST_M_DIVU: `TRACE(level, ("DIVU")); - `INST_M_REM: `TRACE(level, ("REM")); - `INST_M_REMU: `TRACE(level, ("REMU")); - default: `TRACE(level, ("?")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_LSU: begin - if (op_args.lsu.is_float) begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LW: `TRACE(level, ("FLW")); - `INST_LSU_LD: `TRACE(level, ("FLD")); - `INST_LSU_SW: `TRACE(level, ("FSW")); - `INST_LSU_SD: `TRACE(level, ("FSD")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LB: `TRACE(level, ("LB")); - `INST_LSU_LH: `TRACE(level, ("LH")); - `INST_LSU_LW: `TRACE(level, ("LW")); - `INST_LSU_LD: `TRACE(level, ("LD")); - `INST_LSU_LBU:`TRACE(level, ("LBU")); - `INST_LSU_LHU:`TRACE(level, ("LHU")); - `INST_LSU_LWU:`TRACE(level, ("LWU")); - `INST_LSU_SB: `TRACE(level, ("SB")); - `INST_LSU_SH: `TRACE(level, ("SH")); - `INST_LSU_SW: `TRACE(level, ("SW")); - `INST_LSU_SD: `TRACE(level, ("SD")); - `INST_LSU_FENCE:`TRACE(level,("FENCE")); - default: `TRACE(level, ("?")); - endcase - end - end - `EX_FPU: begin - case (`INST_FPU_BITS'(op_type)) - `INST_FPU_ADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); - else - `TRACE(level, ("FADD.S")); - end - `INST_FPU_SUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); - else - `TRACE(level, ("FSUB.S")); - end - `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMUL.D")); - else - `TRACE(level, ("FMUL.S")); - end - `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FDIV.D")); - else - `TRACE(level, ("FDIV.S")); - end - `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSQRT.D")); - else - `TRACE(level, ("FSQRT.S")); - end - `INST_FPU_MADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); - else - `TRACE(level, ("FMADD.S")); - end - `INST_FPU_MSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); - else - `TRACE(level, ("FMSUB.S")); - end - `INST_FPU_NMADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); - else - `TRACE(level, ("FNMADD.S")); - end - `INST_FPU_NMSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); - else - `TRACE(level, ("FNMSUB.S")); - end - `INST_FPU_CMP: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.D")); - 1: `TRACE(level, ("FLT.D")); - 2: `TRACE(level, ("FEQ.D")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.S")); - 1: `TRACE(level, ("FLT.S")); - 2: `TRACE(level, ("FEQ.S")); - default: `TRACE(level, ("?")); - endcase - end - end - `INST_FPU_F2F: begin - if (op_args.fpu.fmt[0]) begin - `TRACE(level, ("FCVT.D.S")); - end else begin - `TRACE(level, ("FCVT.S.D")); - end - end - `INST_FPU_F2I: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.D")); - end else begin - `TRACE(level, ("FCVT.W.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.S")); - end else begin - `TRACE(level, ("FCVT.W.S")); - end - end - end - `INST_FPU_F2U: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.D")); - end else begin - `TRACE(level, ("FCVT.WU.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.S")); - end else begin - `TRACE(level, ("FCVT.WU.S")); - end - end - end - `INST_FPU_I2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.L")); - end else begin - `TRACE(level, ("FCVT.D.W")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.L")); - end else begin - `TRACE(level, ("FCVT.S.W")); - end - end - end - `INST_FPU_U2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.LU")); - end else begin - `TRACE(level, ("FCVT.D.WU")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.LU")); - end else begin - `TRACE(level, ("FCVT.S.WU")); - end - end - end - `INST_FPU_MISC: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.D")); - 1: `TRACE(level, ("FSGNJN.D")); - 2: `TRACE(level, ("FSGNJX.D")); - 3: `TRACE(level, ("FCLASS.D")); - 4: `TRACE(level, ("FMV.X.D")); - 5: `TRACE(level, ("FMV.D.X")); - 6: `TRACE(level, ("FMIN.D")); - 7: `TRACE(level, ("FMAX.D")); - endcase - end else begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.S")); - 1: `TRACE(level, ("FSGNJN.S")); - 2: `TRACE(level, ("FSGNJX.S")); - 3: `TRACE(level, ("FCLASS.S")); - 4: `TRACE(level, ("FMV.X.S")); - 5: `TRACE(level, ("FMV.S.X")); - 6: `TRACE(level, ("FMIN.S")); - 7: `TRACE(level, ("FMAX.S")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_SFU: begin - case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); - endcase - end - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_op_args(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); - end - `EX_LSU: begin - `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); - end - `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); - end - `EX_SFU: begin - if (`INST_SFU_IS_CSR(op_type)) begin - `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); - end - end - default:; - endcase - endtask - - task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); - case (addr) - `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); - `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); - `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); - `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); - `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); - default: `TRACE(level, ("?")); - endcase - endtask - -`endif - -endpackage - -`endif // VX_TRACE_PKG_VH diff --git a/hw/unittest/core_top/Makefile b/hw/unittest/core_top/Makefile index d9fbf40f6f..f9d037999d 100644 --- a/hw/unittest/core_top/Makefile +++ b/hw/unittest/core_top/Makefile @@ -16,7 +16,7 @@ SRCS += $(SRC_DIR)/main.cpp DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core diff --git a/hw/unittest/issue_top/Makefile b/hw/unittest/issue_top/Makefile index 7e298849c2..b6a8b05273 100644 --- a/hw/unittest/issue_top/Makefile +++ b/hw/unittest/issue_top/Makefile @@ -16,7 +16,7 @@ SRCS += $(SRC_DIR)/main.cpp DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 2e549ca74d..9c6314ecfd 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -54,7 +54,7 @@ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 3deffc759d..638d7403f9 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -26,7 +26,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 765e3e2682..1e0d11b664 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -53,7 +53,7 @@ SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) From 20b82fd34d24675debc14a5eda0baebba45331a0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:09:50 -0700 Subject: [PATCH 050/407] update configure to deep-copy syn directory tree --- configure | 2 +- hw/syn/altera/opae/Makefile | 10 ++++------ hw/syn/altera/quartus/common.mk | 4 +--- hw/syn/xilinx/xrt/Makefile | 6 ++---- hw/syn/yosys/Makefile | 8 +++----- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/configure b/configure index 62975784b7..37e95a2bd1 100755 --- a/configure +++ b/configure @@ -164,7 +164,7 @@ if [ "$OSVERSION" == "unsupported" ]; then fi # project subdirectories to build -SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*") +SUBDIRS=("." "!ci" "!perf" "hw*" "!hw/syn*" "kernel*" "runtime*" "sim*" "tests*") # Get the directory of the script SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 62a9bb72c1..4e031ea69a 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -7,8 +7,6 @@ PREFIX ?= build$(XLEN) TARGET ?= fpga NUM_CORES ?= 1 -SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae - RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae @@ -105,17 +103,17 @@ $(IP_CACHE_DIR)/ip-gen.log: $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) swconfig: vortex_afu.h -vortex_afu.h: $(SRC_DIR)/vortex_afu.json +vortex_afu.h: vortex_afu.json afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ $(BUILD_DIR)/setup.cfg: - mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/setup.cfg $(BUILD_DIR)/setup.cfg + mkdir -p $(BUILD_DIR); cp setup.cfg $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/vortex_afu.qsf: - mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf + mkdir -p $(BUILD_DIR); cp vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.json: - mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.json $(BUILD_DIR)/vortex_afu.json + mkdir -p $(BUILD_DIR); cp vortex_afu.json $(BUILD_DIR)/vortex_afu.json gen-sources: $(BUILD_DIR)/sources.txt $(BUILD_DIR)/sources.txt: diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/quartus/common.mk index 3890dcfe87..d84797d5ac 100644 --- a/hw/syn/altera/quartus/common.mk +++ b/hw/syn/altera/quartus/common.mk @@ -1,8 +1,6 @@ ROOT_DIR := $(realpath ../../../../../..) include $(ROOT_DIR)/config.mk -SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus - RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts @@ -79,7 +77,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): gen-sources - quartus_sh -t $(SRC_DIR)/project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc $(SRC_DIR)/project.sdc -inc "src" + quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc project.sdc -inc "src" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index e1acce8d65..e5cab8a081 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -19,8 +19,6 @@ NUM_CORES ?= 1 PREFIX ?= build$(XLEN) MAX_JOBS ?= 8 -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/xrt - RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/xrt @@ -94,7 +92,7 @@ VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] endif VPP_FLAGS += --report_level 2 -VPP_FLAGS += --config $(SRC_DIR)/vitis.ini +VPP_FLAGS += --config vitis.ini # Enable perf counters ifdef PERF @@ -163,7 +161,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 80bfdae02b..493c7ba6b9 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -1,8 +1,6 @@ ROOT_DIR := $(realpath ../../..) include $(ROOT_DIR)/config.mk -SRC_DIR := $(VORTEX_HOME)/hw/syn/yosys - TOP_LEVEL_ENTITY ?= Vortex PREFIX ?= build NUM_CORES ?= 1 @@ -84,13 +82,13 @@ $(BUILD_DIR)/project.v: gen-sources cd $(BUILD_DIR); $(SCRIPT_DIR)/sv2v.sh -t$(TOP_LEVEL_ENTITY) -Isrc -oproject.v build: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v + cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v elaborate: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" + cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" synthesis: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" + cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" clean: $(RMDIR) $(BUILD_DIR) From 8fe02093e2b3f01e360b15506df40316cab8f0a5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:11:16 -0700 Subject: [PATCH 051/407] minor udpate --- hw/scripts/gen_sources.sh | 84 +++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/hw/scripts/gen_sources.sh b/hw/scripts/gen_sources.sh index 0748b36320..8a12a6c563 100755 --- a/hw/scripts/gen_sources.sh +++ b/hw/scripts/gen_sources.sh @@ -1,18 +1,20 @@ #!/bin/bash # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + defines=() includes=() externs=() @@ -21,40 +23,47 @@ output_file="" define_header="" top_module="" copy_folder="" -prepropressor=0 +preprocessor=0 defines_str="" params_str="" includes_str="" -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# Helper function to append options +add_option() { + if [ -n "$1" ]; then + echo "$1 $2" + else + echo "$2" + fi +} -# parse command arguments +# Parse command arguments while getopts D:G:T:I:J:O:H:C:Ph flag do case "${flag}" in D) defines+=( ${OPTARG} ) - defines_str+="-D${OPTARG} " + defines_str=$(add_option "$defines_str" "-D${OPTARG}") ;; - G) params_str+="-G${OPTARG} " + G) params_str=$(add_option "$params_str" "-G${OPTARG}") ;; - T) top_module=( ${OPTARG} ) + T) top_module="${OPTARG}" ;; I) includes+=( ${OPTARG} ) - includes_str+="-I${OPTARG} " + includes_str=$(add_option "$includes_str" "-I${OPTARG}") ;; J) externs+=( ${OPTARG} ) - includes_str+="-I${OPTARG} " + includes_str=$(add_option "$includes_str" "-I${OPTARG}") ;; - O) output_file=( ${OPTARG} ) + O) output_file="${OPTARG}" ;; - H) define_header=( ${OPTARG} ) + H) define_header="${OPTARG}" ;; - C) copy_folder=( ${OPTARG} ) + C) copy_folder="${OPTARG}" ;; - P) prepropressor=1 + P) preprocessor=1 ;; - h) echo "Usage: [-D] [-G=] [-T] [-I] [-J] [-O] [-C: copy to] [-H] [-P: macro prepropressing] [-h help]" + h) echo "Usage: [-D] [-G=] [-T] [-I] [-J] [-O] [-C: copy to] [-H] [-P: macro preprocessing] [-h help]" exit 0 ;; \?) echo "Invalid option: -$OPTARG" 1>&2 @@ -70,33 +79,32 @@ if [ "$define_header" != "" ]; then # dump defines into a header file for value in ${defines[@]}; do arrNV=(${value//=/ }) - if (( ${#arrNV[@]} > 1 )); - then + if (( ${#arrNV[@]} > 1 )); then echo "\`define ${arrNV[0]} ${arrNV[1]}" else echo "\`define $value" - fi + fi done - } > $define_header + } > "$define_header" fi if [ "$copy_folder" != "" ]; then - # copy source files - mkdir -p $copy_folder + # copy source files + mkdir -p "$copy_folder" for dir in ${includes[@]}; do find "$dir" -maxdepth 1 -type f | while read -r file; do file_ext="${file##*.}" - file_name=$(basename -- $file) - if [ $prepropressor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then + file_name=$(basename -- "$file") + if [ $preprocessor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then if [[ -n "$params_str" && $file_name == "$top_module."* ]]; then temp_file=$(mktemp) - $script_dir/repl_params.py $params_str -T$top_module $file > $temp_file - verilator $defines_str $includes_str -E -P $temp_file > $copy_folder/$file_name + "$SCRIPT_DIR/repl_params.py" "$params_str" -T"$top_module" "$file" > "$temp_file" + verilator "$defines_str" "$includes_str" -E -P "$temp_file" > "$copy_folder/$file_name" else - verilator $defines_str $includes_str -E -P $file > $copy_folder/$file_name - fi + verilator "$defines_str" "$includes_str" -E -P "$file" > "$copy_folder/$file_name" + fi else - cp $file $copy_folder + cp "$file" "$copy_folder" fi done done @@ -112,7 +120,7 @@ if [ "$output_file" != "" ]; then fi for dir in ${externs[@]}; do - echo "+incdir+$(realpath $dir)" + echo "+incdir+$(realpath "$dir")" done for dir in ${externs[@]}; do @@ -124,24 +132,24 @@ if [ "$output_file" != "" ]; then if [ "$copy_folder" != "" ]; then # dump include directories - echo "+incdir+$(realpath $copy_folder)" + echo "+incdir+$(realpath "$copy_folder")" # dump source files - find "$(realpath $copy_folder)" -maxdepth 1 -type f -name "*_pkg.sv" -print - find "$(realpath $copy_folder)" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print + find "$(realpath "$copy_folder")" -maxdepth 1 -type f -name "*_pkg.sv" -print + find "$(realpath "$copy_folder")" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print else # dump include directories for dir in ${includes[@]}; do - echo "+incdir+$(realpath $dir)" + echo "+incdir+$(realpath "$dir")" done - + # dump source files for dir in ${includes[@]}; do - find "$(realpath $dir)" -maxdepth 1 -type f -name "*_pkg.sv" -print + find "$(realpath "$dir")" -maxdepth 1 -type f -name "*_pkg.sv" -print done for dir in ${includes[@]}; do - find "$(realpath $dir)" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print + find "$(realpath "$dir")" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print done fi - } > $output_file -fi + } > "$output_file" +fi \ No newline at end of file From 1f43d4a2fce89ba012cbae8c4f8fecb2c421b021 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:55:32 -0700 Subject: [PATCH 052/407] ASE simulation fixes + docs update --- docs/altera_fpga_guide.md | 19 ++++++++++-- hw/syn/altera/README | 16 ++++------ .../altera/opae/{run_ase.sh => start_ase.sh} | 18 ----------- hw/syn/altera/opae/stop_ase.sh | 31 +++++++++++++++++++ 4 files changed, 53 insertions(+), 31 deletions(-) rename hw/syn/altera/opae/{run_ase.sh => start_ase.sh} (74%) create mode 100755 hw/syn/altera/opae/stop_ase.sh diff --git a/docs/altera_fpga_guide.md b/docs/altera_fpga_guide.md index 61d1ae26e3..e8070beb25 100644 --- a/docs/altera_fpga_guide.md +++ b/docs/altera_fpga_guide.md @@ -34,7 +34,7 @@ The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware - `NUM_THREADS`: Number of threads per warps - `PERF_ENABLE`: enable the use of all profile counters -You configure the syntesis build from the command line: +You can configure the synthesis build from the command line: $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make @@ -43,7 +43,7 @@ OPAE Build Progress You could check the last 10 lines in the build log for possible errors until build completion. - $ tail -n 10 /build.log + $ tail -n 10 /synth/build.log Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. @@ -70,10 +70,23 @@ Sample FPGA Run Test Ensure you have the correct opae runtime for the FPGA target - $ make -C runtime/opae clean $ TARGET=FPGA make -C runtime/opae Run the following from your Vortex build directory $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" +Testing OPAE Synthesis using Intel ASE Simulation +------------------------------------------------- + +Building ASE synthesis + + $ TARGET=asesim make -C runtime/opae + +Building ASE runtime + + $ TARGET=asesim make -C runtime/opae + +Running ASE simulation + + $ ASE_LOG=0 ASE_WORKDIR=/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16" \ No newline at end of file diff --git a/hw/syn/altera/README b/hw/syn/altera/README index 11d048442f..3f9168d5c6 100644 --- a/hw/syn/altera/README +++ b/hw/syn/altera/README @@ -10,10 +10,10 @@ cd build_fpga && qsub-synth # check last 10 lines in build log for possible errors tail -n 10 ./build_arria10_fpga_1c/build.log -# Check if the job is submitted to the queue and running. Status should be R +# Check if the job is submitted to the queue and running. Status should be R qstat | grep -# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C +# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C watch ‘qstat | grep ’ # @@ -35,7 +35,7 @@ fpgaconf --bus 0xaf /synth/vortex_afu.gbs # get portid fpgainfo port -# Running the Test case +# Running the Test case cd /driver/tests/basic make run-fpga @@ -54,13 +54,9 @@ TARGET=asesim make -C runtime/opae PREFIX=build_base CONFIGS="-DEXT_F_DISABLE -DL1_DISABLE -DSM_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" TARGET=asesim make # ASE test runs -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n1 -t0 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n1 -t1 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/demo/demo -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/dogfood/dogfood -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/opencl/vecadd/vecadd -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/opencl/sgemm/sgemm -n4 +start_ase.sh +ASE_LOG=0 ASE_WORKDIR=/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=vecadd +stop_ase.sh # modify "vsim_run.tcl" to dump VCD trace vcd file trace.vcd diff --git a/hw/syn/altera/opae/run_ase.sh b/hw/syn/altera/opae/start_ase.sh similarity index 74% rename from hw/syn/altera/opae/run_ase.sh rename to hw/syn/altera/opae/start_ase.sh index 04fd275400..d408b21703 100755 --- a/hw/syn/altera/opae/run_ase.sh +++ b/hw/syn/altera/opae/start_ase.sh @@ -17,12 +17,6 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" BUILD_DIR=$(realpath $1) -PROGRAM=$(basename "$2") -PROGRAM_DIR=`dirname $2` - -POCL_PATH=$TOOLDIR/pocl -VORTEX_RT_PATH=$SCRIPT_DIR/../../../../runtime - # Export ASE_WORKDIR variable export ASE_WORKDIR=$BUILD_DIR/synth/work @@ -35,7 +29,6 @@ rm -f $BUILD_DIR/synth/nohup.out pushd $BUILD_DIR/synth echo " [DBG] starting ASE simnulator (stdout saved to '$BUILD_DIR/synth/nohup.out')" setsid make sim &> /dev/null & -SIM_PID=$! popd # Wait for simulator readiness @@ -44,14 +37,3 @@ while [ ! -f $ASE_WORKDIR/.ase_ready.pid ] do sleep 1 done - -# run application -pushd $PROGRAM_DIR -shift 2 -echo " [DBG] running ./$PROGRAM $*" -ASE_LOG=0 LD_LIBRARY_PATH=$POCL_PATH/lib:$VORTEX_RT_PATH/opae:$LD_LIBRARY_PATH ./$PROGRAM $* -popd - -# stop the simulator (kill process group) -kill -- -$(ps -o pgid= $SIM_PID | grep -o '[0-9]*') -wait $SIM_PID 2> /dev/null \ No newline at end of file diff --git a/hw/syn/altera/opae/stop_ase.sh b/hw/syn/altera/opae/stop_ase.sh new file mode 100755 index 0000000000..caee290db1 --- /dev/null +++ b/hw/syn/altera/opae/stop_ase.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +BUILD_DIR=$(realpath $1) + +# Export ASE_WORKDIR variable +export ASE_WORKDIR=$BUILD_DIR/synth/work + +# stop the simulator (kill process group) +if [ -f "$ASE_WORKDIR/.ase_ready.pid" ]; then + SIM_PID=$(grep '^pid' "$ASE_WORKDIR/.ase_ready.pid" | cut -d'=' -f2 | tr -d ' ') + echo " [DBG] stopping ASE simulator (pid=$SIM_PID)" + kill -- -$(ps -o pgid= $SIM_PID | grep -o '[0-9]*') + wait $SIM_PID 2> /dev/null +else + echo "ASE PID file does not exist." +fi \ No newline at end of file From 62a4ee7a3e75ed6ea05816df07f6673a69a77ece Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 05:32:21 -0700 Subject: [PATCH 053/407] minor update --- hw/scripts/gen_sources.sh | 6 +++--- hw/syn/yosys/Makefile | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/scripts/gen_sources.sh b/hw/scripts/gen_sources.sh index 8a12a6c563..ed9143eb3e 100755 --- a/hw/scripts/gen_sources.sh +++ b/hw/scripts/gen_sources.sh @@ -98,10 +98,10 @@ if [ "$copy_folder" != "" ]; then if [ $preprocessor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then if [[ -n "$params_str" && $file_name == "$top_module."* ]]; then temp_file=$(mktemp) - "$SCRIPT_DIR/repl_params.py" "$params_str" -T"$top_module" "$file" > "$temp_file" - verilator "$defines_str" "$includes_str" -E -P "$temp_file" > "$copy_folder/$file_name" + $script_dir/repl_params.py $params_str -T$top_module "$file" > "$temp_file" + verilator $defines_str $includes_str -E -P "$temp_file" > "$copy_folder/$file_name" else - verilator "$defines_str" "$includes_str" -E -P "$file" > "$copy_folder/$file_name" + verilator $defines_str $includes_str -E -P "$file" > "$copy_folder/$file_name" fi else cp "$file" "$copy_folder" diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 493c7ba6b9..80bfdae02b 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -1,6 +1,8 @@ ROOT_DIR := $(realpath ../../..) include $(ROOT_DIR)/config.mk +SRC_DIR := $(VORTEX_HOME)/hw/syn/yosys + TOP_LEVEL_ENTITY ?= Vortex PREFIX ?= build NUM_CORES ?= 1 @@ -82,13 +84,13 @@ $(BUILD_DIR)/project.v: gen-sources cd $(BUILD_DIR); $(SCRIPT_DIR)/sv2v.sh -t$(TOP_LEVEL_ENTITY) -Isrc -oproject.v build: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v + cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v elaborate: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" + cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" synthesis: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" + cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" clean: $(RMDIR) $(BUILD_DIR) From 9638f5a6e63c742ef28df490b67363615d0fe60e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 06:05:26 -0700 Subject: [PATCH 054/407] minor update --- hw/syn/altera/opae/Makefile | 10 ++++++---- hw/syn/altera/quartus/common.mk | 4 +++- hw/syn/xilinx/xrt/Makefile | 6 ++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 4e031ea69a..62a9bb72c1 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -7,6 +7,8 @@ PREFIX ?= build$(XLEN) TARGET ?= fpga NUM_CORES ?= 1 +SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae + RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae @@ -103,17 +105,17 @@ $(IP_CACHE_DIR)/ip-gen.log: $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) swconfig: vortex_afu.h -vortex_afu.h: vortex_afu.json +vortex_afu.h: $(SRC_DIR)/vortex_afu.json afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ $(BUILD_DIR)/setup.cfg: - mkdir -p $(BUILD_DIR); cp setup.cfg $(BUILD_DIR)/setup.cfg + mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/setup.cfg $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/vortex_afu.qsf: - mkdir -p $(BUILD_DIR); cp vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf + mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.json: - mkdir -p $(BUILD_DIR); cp vortex_afu.json $(BUILD_DIR)/vortex_afu.json + mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.json $(BUILD_DIR)/vortex_afu.json gen-sources: $(BUILD_DIR)/sources.txt $(BUILD_DIR)/sources.txt: diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/quartus/common.mk index d84797d5ac..3890dcfe87 100644 --- a/hw/syn/altera/quartus/common.mk +++ b/hw/syn/altera/quartus/common.mk @@ -1,6 +1,8 @@ ROOT_DIR := $(realpath ../../../../../..) include $(ROOT_DIR)/config.mk +SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus + RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts @@ -77,7 +79,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): gen-sources - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc project.sdc -inc "src" + quartus_sh -t $(SRC_DIR)/project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc $(SRC_DIR)/project.sdc -inc "src" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index e5cab8a081..e1acce8d65 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -19,6 +19,8 @@ NUM_CORES ?= 1 PREFIX ?= build$(XLEN) MAX_JOBS ?= 8 +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/xrt + RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/xrt @@ -92,7 +94,7 @@ VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] endif VPP_FLAGS += --report_level 2 -VPP_FLAGS += --config vitis.ini +VPP_FLAGS += --config $(SRC_DIR)/vitis.ini # Enable perf counters ifdef PERF @@ -161,7 +163,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) From a03471837cbec94917f804efdb2dec716baa593a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 15:21:13 -0700 Subject: [PATCH 055/407] minor update --- hw/rtl/VX_config.vh | 2 +- hw/rtl/core/VX_mem_unit.sv | 4 ++-- hw/rtl/mem/{VX_lmem_demux.sv => VX_lmem_switch.sv} | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename hw/rtl/mem/{VX_lmem_demux.sv => VX_lmem_switch.sv} (98%) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index d46c679e9a..ea036959d5 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -214,7 +214,7 @@ `endif `define STACK_SIZE (1 << `STACK_LOG2_SIZE) -`define RESET_DELAY 16 +`define RESET_DELAY 8 `ifndef STALL_TIMEOUT `define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED))) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 8df2724393..5bfbf311fe 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -46,11 +46,11 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ) lsu_lmem_if[`NUM_LSU_BLOCKS](); for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices - VX_lmem_demux #( + VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), .RSP_OUT_BUF (1) - ) lmem_demux ( + ) lmem_switch ( .clk (clk), .reset (reset), .lsu_in_if (lsu_mem_in_if[i]), diff --git a/hw/rtl/mem/VX_lmem_demux.sv b/hw/rtl/mem/VX_lmem_switch.sv similarity index 98% rename from hw/rtl/mem/VX_lmem_demux.sv rename to hw/rtl/mem/VX_lmem_switch.sv index b3158ad8ad..da2a190a2b 100644 --- a/hw/rtl/mem/VX_lmem_demux.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_lmem_demux import VX_gpu_pkg::*; #( +module VX_lmem_switch import VX_gpu_pkg::*; #( parameter REQ0_OUT_BUF = 0, parameter REQ1_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 From b6663eaff907994ffb286952dd806cda2081c3d4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 15:49:49 -0700 Subject: [PATCH 056/407] output register fix --- hw/rtl/core/VX_mem_unit.sv | 3 ++- hw/rtl/mem/VX_lmem_switch.sv | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 5bfbf311fe..841707da1e 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -49,7 +49,8 @@ module VX_mem_unit import VX_gpu_pkg::*; #( VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), - .RSP_OUT_BUF (1) + .RSP_OUT_BUF (1), + .ARBITER ("R") ) lmem_switch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/mem/VX_lmem_switch.sv b/hw/rtl/mem/VX_lmem_switch.sv index da2a190a2b..628190a8dd 100644 --- a/hw/rtl/mem/VX_lmem_switch.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -16,7 +16,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( parameter REQ0_OUT_BUF = 0, parameter REQ1_OUT_BUF = 0, - parameter RSP_OUT_BUF = 0 + parameter RSP_OUT_BUF = 0, + parameter `STRING ARBITER = "R" ) ( input wire clk, input wire reset, @@ -43,8 +44,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (REQ_DATAW), - .SIZE (2), - .OUT_REG (REQ0_OUT_BUF) + .SIZE (`TO_OUT_BUF_SIZE(REQ0_OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(REQ0_OUT_BUF)) ) req_global_buf ( .clk (clk), .reset (reset), @@ -74,8 +75,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (REQ_DATAW), - .SIZE (0), - .OUT_REG (REQ1_OUT_BUF) + .SIZE (`TO_OUT_BUF_SIZE(REQ1_OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(REQ1_OUT_BUF)) ) req_local_buf ( .clk (clk), .reset (reset), @@ -106,7 +107,7 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), - .ARBITER ("R"), + .ARBITER (ARBITER), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), From 9d3d35c6b4541fa550e68d00f29526f93e3fd56e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 16:03:02 -0700 Subject: [PATCH 057/407] operands timing optimization --- hw/rtl/core/VX_operands.sv | 52 ++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 62e2bb8831..1b9c6f0109 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -59,17 +59,18 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; - wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; + wire [NUM_BANKS-1:0] gpr_rd_valid_st1; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1; wire pipe_valid_st1, pipe_ready_st1; wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2, src_data_m_st2; reg [NUM_SRC_OPDS-1:0] data_fetched_n; wire [NUM_SRC_OPDS-1:0] data_fetched_st1; @@ -176,32 +177,34 @@ module VX_operands import VX_gpu_pkg::*; #( assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2; - assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n; + always @(*) begin + gpr_rd_data_st1 = '0; + for (integer b = 0; b < NUM_BANKS; ++b) begin + if (gpr_rd_valid_st1[b]) begin + gpr_rd_data_st1[gpr_rd_req_idx_st1[b]] = gpr_rd_data[b]; + end + end + end + + assign src_data_m_st2 = src_data_st2 | gpr_rd_data_st2; + + assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_m_st2; wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW VX_pipe_register #( - .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), + .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_SRC_OPDS * REGS_DATAW + META_DATAW), .RESETW (1 + NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), .enable (pipe_ready_st1), - .data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}), - .data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2}) + .data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_data_st1, pipe_data_st1}), + .data_out ({pipe_valid_st2, src_data_st2, gpr_rd_data_st2, pipe_data_st2}) ); - always @(*) begin - src_data_n = src_data_st2; - for (integer b = 0; b < NUM_BANKS; ++b) begin - if (gpr_rd_valid_st2[b]) begin - src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b]; - end - end - end - VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -211,12 +214,7 @@ module VX_operands import VX_gpu_pkg::*; #( .reset (reset), .valid_in (pipe_valid_st2), .ready_in (pipe_ready_st2), - .data_in ({ - pipe_data_st2, - src_data_n[0], - src_data_n[1], - src_data_n[2] - }), + .data_in ({pipe_data_st2, src_data_m_st2}), .data_out ({ operands_if.data.wis, operands_if.data.tmask, @@ -227,9 +225,9 @@ module VX_operands import VX_gpu_pkg::*; #( operands_if.data.op_args, operands_if.data.rd, operands_if.data.uuid, - operands_if.data.rs1_data, + operands_if.data.rs3_data, operands_if.data.rs2_data, - operands_if.data.rs3_data + operands_if.data.rs1_data }), .valid_out (operands_if.valid), .ready_out (operands_if.ready) @@ -280,7 +278,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data_st1[b]) + .rdata (gpr_rd_data[b]) ); end From 51862dbc06ddd8f3a6f24a611f1420c234d09eea Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 19:05:47 -0700 Subject: [PATCH 058/407] doc update --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7cafd498d5..22635bab31 100644 --- a/README.md +++ b/README.md @@ -88,19 +88,19 @@ make -s make -s make install ``` -- Building Vortex 64-bit simply requires using --xlen=64 configure option. +- Building Vortex 64-bit requires setting --xlen=64 configure option. ```sh -../configure --xlen=32 --tooldir=$HOME/tools +../configure --xlen=64 --tooldir=$HOME/tools ``` - Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source /ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login. ```sh echo "source /ci/toolchain_env.sh" >> ~/.bashrc ``` -- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder. +- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder. ```sh ../configure ``` -- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information. +- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information. ```sh ./ci/blackbox.sh --app=demo --debug=3 ``` From adcad92a7332260f88a178e9a1504bcb6c39c770 Mon Sep 17 00:00:00 2001 From: tinebp Date: Sat, 17 Aug 2024 19:09:02 -0700 Subject: [PATCH 059/407] extending OS support --- README.md | 2 +- ci/toolchain_install.sh.in | 4 ++-- configure | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7cafd498d5..40446187c5 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Vortex is a full-stack open-source RISC-V GPGPU. ## Build Instructions More detailed build instructions can be found [here](docs/install_vortex.md). ### Supported OS Platforms -- Ubuntu 18.04, 20.04 +- Ubuntu 18.04, 20.04, 22.04, 24.04 - Centos 7 ### Toolchain Dependencies - [POCL](http://portablecl.org/) diff --git a/ci/toolchain_install.sh.in b/ci/toolchain_install.sh.in index 73e27eb552..01ebe889ba 100755 --- a/ci/toolchain_install.sh.in +++ b/ci/toolchain_install.sh.in @@ -24,8 +24,8 @@ riscv32() { case $OSVERSION in "centos/7") parts=$(eval echo {a..l}) ;; - "ubuntu/focal") parts=$(eval echo {a..k}) ;; - *) parts=$(eval echo {a..j}) ;; + "ubuntu/bionic") parts=$(eval echo {a..j}) ;; + *) parts=$(eval echo {a..k}) ;; esac rm -f riscv32-gnu-toolchain.tar.bz2.parta* for x in $parts diff --git a/configure b/configure index 37e95a2bd1..de04b648b7 100755 --- a/configure +++ b/configure @@ -26,6 +26,8 @@ detect_osversion() { case "$VERSION_CODENAME" in bionic) osversion="ubuntu/bionic";; focal) osversion="ubuntu/focal";; + jammy) osversion="ubuntu/focal";; + noble) osversion="ubuntu/focal";; # Add new versions as needed esac ;; From 06ef53025dd6bb8e62da8f17df7eb8d56316c979 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 21:19:10 -0700 Subject: [PATCH 060/407] minor update --- docs/altera_fpga_guide.md | 4 ++-- docs/xilinx_fpga_guide.md | 18 +++++++++++++++++- hw/rtl/libs/VX_mem_scheduler.sv | 6 ++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/docs/altera_fpga_guide.md b/docs/altera_fpga_guide.md index e8070beb25..ba95d942a3 100644 --- a/docs/altera_fpga_guide.md +++ b/docs/altera_fpga_guide.md @@ -76,8 +76,8 @@ Run the following from your Vortex build directory $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" -Testing OPAE Synthesis using Intel ASE Simulation -------------------------------------------------- +Testing Vortex using OPAE with Intel ASE Simulation +--------------------------------------------------- Building ASE synthesis diff --git a/docs/xilinx_fpga_guide.md b/docs/xilinx_fpga_guide.md index f2960deb6c..959ca67737 100644 --- a/docs/xilinx_fpga_guide.md +++ b/docs/xilinx_fpga_guide.md @@ -33,4 +33,20 @@ Ensure you have the correct opae runtime for the FPGA target Run the following from your Vortex build directory - $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" \ No newline at end of file + $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" + +Testing Vortex using XRT Hardware Emulation +------------------------------------------- + +Building XRT's hw_emu target + + $ cd hw/syn/xilinx/xrt + $ PREFIX=test2 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw_emu make + +Building XRT hw_meu runtime + + $ TARGET=hw_emu make -C runtime/xrt + +Running XRT hw_emu simulation + + $ TARGET=hw_emu FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm \ No newline at end of file diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f173d7d0ad..5324d7ffa7 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -223,8 +223,6 @@ module VX_mem_scheduler #( if (COALESCE_ENABLE) begin - `RESET_RELAY (coalescer_reset, reset); - VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), .NUM_REQS (CORE_REQS), @@ -236,8 +234,8 @@ module VX_mem_scheduler #( .UUID_WIDTH (UUID_WIDTH), .QUEUE_SIZE (MEM_QUEUE_SIZE) ) coalescer ( - .clk (clk), - .reset (coalescer_reset), + .clk (clk), + .reset (reset), // Input request .in_req_valid (reqq_valid), From de47307428f1e1c5c195bed067788687ec31f07b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 01:57:36 -0700 Subject: [PATCH 061/407] minor update --- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 8c0fa0558d..bbfd8269d8 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -16,7 +16,7 @@ `TRACING_OFF module VX_rr_arbiter #( parameter NUM_REQS = 1, - parameter MODEL = 2, + parameter MODEL = 1, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS), parameter LUT_OPT = 0 ) ( From a2b24b4ed0d8bf3c35eb7f557be1544cf0b28bf3 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 02:10:34 -0700 Subject: [PATCH 062/407] xilinx non-xrt synthesis fixes --- hw/syn/xilinx/test/Makefile | 10 ++----- hw/syn/xilinx/test/project.tcl.in | 43 +++++++++++++++---------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/test/Makefile index e15789516e..bf950b4ed2 100644 --- a/hw/syn/xilinx/test/Makefile +++ b/hw/syn/xilinx/test/Makefile @@ -28,10 +28,7 @@ CFLAGS += -DEXT_F_DISABLE # update memory layout for 2MB RAM CFLAGS += -DSTARTUP_ADDR=32\'h80000 -CFLAGS += -DIO_BASE_ADDR=32\'hFF000 - -COE_FILE := $(SRC_DIR)/project_1_files/kernel.bin.coe -ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') +CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build @@ -40,9 +37,6 @@ project_1/sources.txt: mkdir -p project_1 $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt -project.tcl: project.tcl.in - sed -e 's/%COE_FILE%/$(ESCAPED_COE_FILE)/g' < $< > $@ - build: project_1/vortex.xpr project_1/vortex.xpr: project_1/sources.txt project.tcl $(VIVADO) -mode batch -source project.tcl -tclargs project_1/sources.txt project_1/src $(SCRIPT_DIR) @@ -51,4 +45,4 @@ run: project_1/vortex.xpr $(VIVADO) project_1/vortex.xpr & clean: - rm -rf project_1 project.tcl + rm -rf project_1 diff --git a/hw/syn/xilinx/test/project.tcl.in b/hw/syn/xilinx/test/project.tcl.in index a2692f637b..61ee634648 100644 --- a/hw/syn/xilinx/test/project.tcl.in +++ b/hw/syn/xilinx/test/project.tcl.in @@ -46,7 +46,6 @@ set proj_dir [get_property directory [current_project]] # Set project properties set obj [current_project] -set_property -name "board_part" -value "xilinx.com:au280:part0:1.1" -objects $obj set_property -name "compxlib.activehdl_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/activehdl" -objects $obj set_property -name "compxlib.funcsim" -value "1" -objects $obj set_property -name "compxlib.ies_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/ies" -objects $obj @@ -260,7 +259,7 @@ set_property -name "name" -value "utils_1" -objects $obj # Proc to create BD design_1 proc cr_bd_design_1 { parentCell } { -# The design that will be created by this Tcl proc contains the following +# The design that will be created by this Tcl proc contains the following # module references: # Vortex_top @@ -277,7 +276,7 @@ set bCheckIPsPassed 1 ################################################################## set bCheckIPs 1 if { $bCheckIPs == 1 } { - set list_check_ips "\ + set list_check_ips "\ xilinx.com:ip:axi_bram_ctrl:4.1\ xilinx.com:ip:blk_mem_gen:8.4\ " @@ -304,7 +303,7 @@ if { $bCheckIPs == 1 } { ################################################################## set bCheckModules 1 if { $bCheckModules == 1 } { - set list_check_mods "\ + set list_check_mods "\ Vortex_top\ " @@ -369,7 +368,7 @@ set vx_reset [ create_bd_port -dir I -type rst vx_reset ] set_property -dict [ list \ CONFIG.POLARITY {ACTIVE_HIGH} \ ] $vx_reset - + set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] @@ -384,7 +383,7 @@ if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_nam catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} return 1 } - + # Create instance: axi_bram_ctrl_0, and set properties set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] set_property -dict [ list \ @@ -399,7 +398,7 @@ set_property -dict [ list \ CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {%COE_FILE%} \ + CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/project_1_files/kernel.bin.coe} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ @@ -475,24 +474,24 @@ pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 validate_bd_design save_bd_design - close_bd_design $design_name + close_bd_design $design_name } # End of cr_bd_design_1() cr_bd_design_1 "" -set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] -set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] -set_property IS_ENABLED "1" [get_files design_1.bd ] -set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] -#set_property IS_LOCKED "0" [get_files design_1.bd ] -set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] -set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] -set_property PFM_NAME "" [get_files design_1.bd ] -set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] -set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] -set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] -set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] -set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] -set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] +set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] +set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] +set_property IS_ENABLED "1" [get_files design_1.bd ] +set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] +#set_property IS_LOCKED "0" [get_files design_1.bd ] +set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] +set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] +set_property PFM_NAME "" [get_files design_1.bd ] +set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] +set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] +set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] +set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] +set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] +set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] #call make_wrapper to create wrapper files set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] From 3612ceda805fe7381574fd33cc539f3b0b608719 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 02:13:43 -0700 Subject: [PATCH 063/407] minor update --- hw/rtl/libs/VX_cyclic_arbiter.sv | 15 ++++++--------- hw/rtl/libs/VX_priority_encoder.sv | 2 +- hw/rtl/libs/VX_scan.sv | 26 +++++++++++++------------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index d721e51305..dc4de1300f 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -40,17 +40,17 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; - wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; + wire [LOG_NUM_REQS-1:0] grant_index_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; end else if (grant_valid && grant_ready) begin - if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin + if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; end else begin - grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); + grant_index_r <= grant_index + LOG_NUM_REQS'(1); end end end @@ -61,14 +61,11 @@ module VX_cyclic_arbiter #( .data_in (requests), `UNUSED_PIN (onehot_out), .index_out (grant_index_um), - `UNUSED_PIN (valid_out) + .valid_out (grant_valid) ); - assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; - - assign grant_index = grant_index_ql; - assign grant_onehot = NUM_REQS'(1) << grant_index_ql; - assign grant_valid = (| requests); + assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; + assign grant_onehot = NUM_REQS'(1) << grant_index; end diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 27465b414d..1d34f0e511 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -53,7 +53,7 @@ module VX_priority_encoder #( VX_scan #( .N (N), - .OP (2) + .OP ("|") ) scan ( .data_in (reversed), .data_out (scan_lo) diff --git a/hw/rtl/libs/VX_scan.sv b/hw/rtl/libs/VX_scan.sv index f263dd218b..48de2964a9 100644 --- a/hw/rtl/libs/VX_scan.sv +++ b/hw/rtl/libs/VX_scan.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,8 +19,8 @@ `TRACING_OFF module VX_scan #( parameter N = 1, - parameter OP = 0, // 0: XOR, 1: AND, 2: OR - parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO + parameter `STRING OP = "^", // ^: XOR, &: AND, |: OR + parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO ) ( input wire [N-1:0] data_in, output wire [N-1:0] data_out @@ -28,7 +28,7 @@ module VX_scan #( localparam LOGN = `CLOG2(N); `IGNORE_UNOPTFLAT_BEGIN - wire [LOGN:0][N-1:0] t; + wire [LOGN:0][N-1:0] t; `IGNORE_UNOPTFLAT_END // reverses bits @@ -39,29 +39,29 @@ module VX_scan #( end // optimize for the common case of small and-scans - if ((N == 2) && (OP == 1)) begin + if ((N == 2) && (OP == "&")) begin assign t[LOGN] = {t[0][1], &t[0][1:0]}; - end else if ((N == 3) && (OP == 1)) begin + end else if ((N == 3) && (OP == "&")) begin assign t[LOGN] = {t[0][2], &t[0][2:1], &t[0][2:0]}; - end else if ((N == 4) && (OP == 1)) begin + end else if ((N == 4) && (OP == "&")) begin assign t[LOGN] = {t[0][3], &t[0][3:2], &t[0][3:1], &t[0][3:0]}; end else begin // general case wire [N-1:0] fill; for (genvar i = 0; i < LOGN; ++i) begin wire [N-1:0] shifted = N'({fill, t[i]} >> (1< Date: Sun, 18 Aug 2024 16:03:59 -0700 Subject: [PATCH 064/407] synthesis of the memory unit and local memory --- hw/rtl/core/VX_mem_unit_top.sv | 123 ++++++++++++++++++++++++ hw/rtl/mem/VX_local_mem_top.sv | 13 ++- hw/syn/altera/quartus/Makefile | 7 +- hw/syn/altera/quartus/mem_unit/Makefile | 7 ++ hw/unittest/Makefile | 8 +- hw/unittest/local_mem_top/Makefile | 26 +++++ hw/unittest/local_mem_top/main.cpp | 49 ++++++++++ hw/unittest/mem_unit_top/Makefile | 26 +++++ hw/unittest/mem_unit_top/main.cpp | 49 ++++++++++ 9 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 hw/rtl/core/VX_mem_unit_top.sv create mode 100755 hw/syn/altera/quartus/mem_unit/Makefile create mode 100644 hw/unittest/local_mem_top/Makefile create mode 100644 hw/unittest/local_mem_top/main.cpp create mode 100644 hw/unittest/mem_unit_top/Makefile create mode 100644 hw/unittest/mem_unit_top/main.cpp diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv new file mode 100644 index 0000000000..6c7e2ff66d --- /dev/null +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -0,0 +1,123 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_mem_unit_top import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + parameter LSU_WORD_WIDTH = LSU_WORD_SIZE * 8 +) ( + // Clock + input wire clk, + input wire reset, + + // LSU memory request + input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_valid, + input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_rw, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_req_byteen, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_req_addr, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_req_flags, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_req_data, + input wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_req_tag, + output wire [`NUM_LSU_BLOCKS-1:0] lsu_req_ready, + + // LSU memory response + output wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_valid, + output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_rsp_data, + output wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_rsp_tag, + input wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_ready, + + // Memory request + output wire [DCACHE_NUM_REQS-1:0] mem_req_valid, + output wire [DCACHE_NUM_REQS-1:0] mem_req_rw, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] mem_req_addr, + output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_req_data, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_req_tag, + input wire [DCACHE_NUM_REQS-1:0] mem_req_ready, + + // Memory response + input wire [DCACHE_NUM_REQS-1:0] mem_rsp_valid, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_rsp_data, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_rsp_tag, + output wire [DCACHE_NUM_REQS-1:0] mem_rsp_ready +); + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_mem_if[`NUM_LSU_BLOCKS](); + + // LSU memory request + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + assign lsu_mem_if[i].req_valid = lsu_req_valid[i]; + assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i]; + assign lsu_mem_if[i].req_data.byteen = lsu_req_byteen[i]; + assign lsu_mem_if[i].req_data.addr = lsu_req_addr[i]; + assign lsu_mem_if[i].req_data.flags = lsu_req_flags[i]; + assign lsu_mem_if[i].req_data.data = lsu_req_data[i]; + assign lsu_mem_if[i].req_data.tag = lsu_req_tag[i]; + assign lsu_req_ready[i] = lsu_mem_if[i].req_ready; + end + + // LSU memory response + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid; + assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data; + assign lsu_rsp_tag[i] = lsu_mem_if[i].rsp_data.tag; + assign lsu_mem_if[i].rsp_ready = lsu_rsp_ready[i]; + end + + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) mem_bus_if[DCACHE_NUM_REQS](); + + // memory request + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + assign mem_req_valid[i] = mem_bus_if[i].req_valid; + assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; + assign mem_req_byteen[i] = mem_bus_if[i].req_data.byteen; + assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; + assign mem_req_flags[i] = mem_bus_if[i].req_data.flags; + assign mem_req_data[i] = mem_bus_if[i].req_data.data; + assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; + assign mem_bus_if[i].req_ready = mem_req_ready[i]; + end + + // memory response + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; + assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; + assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; + assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; + end + +`ifdef PERF_ENABLE + cache_perf_t lmem_perf = '0; +`endif + + VX_mem_unit #( + .INSTANCE_ID (INSTANCE_ID) + ) mem_unit ( + .clk (clk), + .reset (reset), + `ifdef PERF_ENABLE + .lmem_perf (lmem_perf), + `endif + .lsu_mem_in_if (lsu_mem_if), + .dcache_bus_if (mem_bus_if) + ); + +endmodule diff --git a/hw/rtl/mem/VX_local_mem_top.sv b/hw/rtl/mem/VX_local_mem_top.sv index 5f9b17da01..fda15cde20 100644 --- a/hw/rtl/mem/VX_local_mem_top.sv +++ b/hw/rtl/mem/VX_local_mem_top.sv @@ -24,8 +24,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( // Number of banks parameter NUM_BANKS = 4, - // Address width - parameter ADDR_WIDTH = `CLOG2(SIZE), // Size of a word in bytes parameter WORD_SIZE = `XLEN/8, @@ -33,7 +31,13 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( parameter UUID_WIDTH = 0, // Request tag size - parameter TAG_WIDTH = 16 + parameter TAG_WIDTH = 16, + + // Address width + parameter NUM_WORDS = SIZE / WORD_SIZE, + parameter WORDS_PER_BANK = NUM_WORDS / NUM_BANKS, + parameter BANK_ADDR_WIDTH = `CLOG2(WORDS_PER_BANK), + parameter ADDR_WIDTH = BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS) ) ( input wire clk, input wire reset, @@ -56,7 +60,8 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( ); VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) + .TAG_WIDTH (TAG_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH) ) mem_bus_if[NUM_REQS](); // memory request diff --git a/hw/syn/altera/quartus/Makefile b/hw/syn/altera/quartus/Makefile index d0a2999bd9..f8993bf871 100644 --- a/hw/syn/altera/quartus/Makefile +++ b/hw/syn/altera/quartus/Makefile @@ -9,7 +9,7 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline lmem cache fpu core issue vortex top test +.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top test ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: @@ -30,6 +30,11 @@ pipeline: cp pipeline/Makefile pipeline/$(BUILD_DIR) $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & +mem_unit: + mkdir -p mem_unit/$(BUILD_DIR) + cp mem_unit/Makefile mem_unit/$(BUILD_DIR) + $(MAKE) -C mem_unit/$(BUILD_DIR) clean && $(MAKE) -C mem_unit/$(BUILD_DIR) > mem_unit/$(BUILD_DIR)/build.log 2>&1 & + lmem: mkdir -p lmem/$(BUILD_DIR) cp lmem/Makefile lmem/$(BUILD_DIR) diff --git a/hw/syn/altera/quartus/mem_unit/Makefile b/hw/syn/altera/quartus/mem_unit/Makefile new file mode 100755 index 0000000000..585e5fc34d --- /dev/null +++ b/hw/syn/altera/quartus/mem_unit/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_mem_init_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core diff --git a/hw/unittest/Makefile b/hw/unittest/Makefile index 5722ec9bce..f37d6ae1b3 100644 --- a/hw/unittest/Makefile +++ b/hw/unittest/Makefile @@ -5,6 +5,8 @@ all: $(MAKE) -C cache_top $(MAKE) -C core_top $(MAKE) -C issue_top + $(MAKE) -C local_mem_top + $(MAKE) -C mem_unit_top run: $(MAKE) -C cache run @@ -13,6 +15,8 @@ run: $(MAKE) -C cache_top run $(MAKE) -C core_top run $(MAKE) -C issue_top run + $(MAKE) -C local_mem_top run + $(MAKE) -C mem_unit_top run clean: $(MAKE) -C cache clean @@ -20,4 +24,6 @@ clean: $(MAKE) -C mem_streamer clean $(MAKE) -C cache_top clean $(MAKE) -C core_top clean - $(MAKE) -C issue_top clean \ No newline at end of file + $(MAKE) -C issue_top clean + $(MAKE) -C local_mem_top clean + $(MAKE) -C mem_unit_top clean \ No newline at end of file diff --git a/hw/unittest/local_mem_top/Makefile b/hw/unittest/local_mem_top/Makefile new file mode 100644 index 0000000000..22a8adfae9 --- /dev/null +++ b/hw/unittest/local_mem_top/Makefile @@ -0,0 +1,26 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := local_mem_top + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi + +SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) + +CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common +CXXFLAGS += -I$(ROOT_DIR)/hw + +SRCS := $(DPI_DIR)/util_dpi.cpp +SRCS += $(SRC_DIR)/main.cpp + +DBG_TRACE_FLAGS := + +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv + +RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem + +TOP := VX_local_mem_top + +include ../common.mk \ No newline at end of file diff --git a/hw/unittest/local_mem_top/main.cpp b/hw/unittest/local_mem_top/main.cpp new file mode 100644 index 0000000000..5191b44337 --- /dev/null +++ b/hw/unittest/local_mem_top/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file diff --git a/hw/unittest/mem_unit_top/Makefile b/hw/unittest/mem_unit_top/Makefile new file mode 100644 index 0000000000..a44befbce7 --- /dev/null +++ b/hw/unittest/mem_unit_top/Makefile @@ -0,0 +1,26 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := mem_unit_top + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi + +SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) + +CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common +CXXFLAGS += -I$(ROOT_DIR)/hw + +SRCS := $(DPI_DIR)/util_dpi.cpp +SRCS += $(SRC_DIR)/main.cpp + +DBG_TRACE_FLAGS := + +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv + +RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core + +TOP := VX_mem_unit_top + +include ../common.mk \ No newline at end of file diff --git a/hw/unittest/mem_unit_top/main.cpp b/hw/unittest/mem_unit_top/main.cpp new file mode 100644 index 0000000000..5191b44337 --- /dev/null +++ b/hw/unittest/mem_unit_top/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file From 2762bd53ff60e086177081face5e039382f13af5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 18:56:17 -0700 Subject: [PATCH 065/407] minor updates --- hw/rtl/core/VX_mem_unit.sv | 2 +- hw/rtl/core/VX_mem_unit_top.sv | 4 ++++ hw/syn/altera/quartus/mem_unit/Makefile | 4 ++-- hw/unittest/mem_unit_top/Makefile | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 841707da1e..4f94c2765b 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -205,7 +205,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .ARBITER ("P"), .REQ_OUT_BUF (0), .RSP_OUT_BUF (0) - ) lsu_adapter ( + ) dcache_adapter ( .clk (clk), .reset (reset), .lsu_mem_if (dcache_coalesced_if), diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv index 6c7e2ff66d..c1acb63825 100644 --- a/hw/rtl/core/VX_mem_unit_top.sv +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -24,6 +24,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( // LSU memory request input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_valid, input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_rw, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_req_mask, input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_req_byteen, input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_req_addr, input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_req_flags, @@ -33,6 +34,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( // LSU memory response output wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_valid, + output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_rsp_mask, output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_rsp_data, output wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_rsp_tag, input wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_ready, @@ -63,6 +65,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin assign lsu_mem_if[i].req_valid = lsu_req_valid[i]; assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i]; + assign lsu_mem_if[i].req_data.mask = lsu_req_mask[i]; assign lsu_mem_if[i].req_data.byteen = lsu_req_byteen[i]; assign lsu_mem_if[i].req_data.addr = lsu_req_addr[i]; assign lsu_mem_if[i].req_data.flags = lsu_req_flags[i]; @@ -74,6 +77,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( // LSU memory response for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid; + assign lsu_rsp_mask[i] = lsu_mem_if[i].rsp_data.mask; assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data; assign lsu_rsp_tag[i] = lsu_mem_if[i].rsp_data.tag; assign lsu_mem_if[i].rsp_ready = lsu_rsp_ready[i]; diff --git a/hw/syn/altera/quartus/mem_unit/Makefile b/hw/syn/altera/quartus/mem_unit/Makefile index 585e5fc34d..209492265a 100755 --- a/hw/syn/altera/quartus/mem_unit/Makefile +++ b/hw/syn/altera/quartus/mem_unit/Makefile @@ -1,7 +1,7 @@ -PROJECT = VX_mem_init_top +PROJECT = VX_mem_unit_top TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv include ../../common.mk -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu diff --git a/hw/unittest/mem_unit_top/Makefile b/hw/unittest/mem_unit_top/Makefile index a44befbce7..8809551f4b 100644 --- a/hw/unittest/mem_unit_top/Makefile +++ b/hw/unittest/mem_unit_top/Makefile @@ -19,7 +19,7 @@ DBG_TRACE_FLAGS := RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu TOP := VX_mem_unit_top From 1814ff6d403568b39963853e7f7a5b9f033c0c6b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 22:02:37 -0700 Subject: [PATCH 066/407] xilinx standalone synthesis fixes --- hw/scripts/bin2coe.py | 91 ++++++++ hw/syn/xilinx/test/Makefile | 22 +- hw/syn/xilinx/test/kernel/Makefile | 51 ----- hw/syn/xilinx/test/kernel/kernel.dat | 3 - hw/syn/xilinx/test/kernel/main.c | 36 --- hw/syn/xilinx/test/kernel/start.S | 23 -- hw/syn/xilinx/test/project.tcl.in | 2 +- .../xilinx/test/project_1_files/Vortex_top.v | 192 +++++----------- .../test/project_1_files/Vortex_wrap.sv | 208 ++++++++++++++++++ tests/kernel/common.mk | 4 +- 10 files changed, 373 insertions(+), 259 deletions(-) create mode 100755 hw/scripts/bin2coe.py delete mode 100644 hw/syn/xilinx/test/kernel/Makefile delete mode 100644 hw/syn/xilinx/test/kernel/kernel.dat delete mode 100644 hw/syn/xilinx/test/kernel/main.c delete mode 100644 hw/syn/xilinx/test/kernel/start.S create mode 100644 hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv diff --git a/hw/scripts/bin2coe.py b/hw/scripts/bin2coe.py new file mode 100755 index 0000000000..95b3bcbeb1 --- /dev/null +++ b/hw/scripts/bin2coe.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +g_memory = {} + +def hex2bin(ch): + return int(ch, 16) if ch.isdigit() or ch in 'abcdefABCDEF' else 0 + +def process_binary(binfname, wordsize, binaddr): + with open(binfname, 'rb') as f: + buffer = list(f.read()) + g_memory[binaddr] = buffer + return (len(buffer) + wordsize - 1) // wordsize + +def process_data(datfname, wordsize): + offset, buffer = 0, [] + with open(datfname, 'r') as f: + for line in f: + line = line.strip() + if line.startswith("#"): + continue + if line.startswith("@"): + if buffer: + g_memory[offset] = buffer + offset = int(line[1:], 16) + buffer = [] + else: + for i in range(0, len(line), 2): + byte = hex2bin(line[i]) << 4 | hex2bin(line[i+1]) + buffer.append(byte) + if len(buffer) % wordsize: + buffer.extend([0] * (wordsize - len(buffer) % wordsize)) + offset += 1 + if buffer: + g_memory[offset] = buffer + return offset + +def write_coe(outfname, wordsize, depth, defval): + with open(outfname, 'w') as f: + f.write("MEMORY_INITIALIZATION_RADIX=16;\nMEMORY_INITIALIZATION_VECTOR=\n") + i = 0 + for addr in sorted(g_memory): + while i < addr: + f.write(f"{defval},\n") + i += 1 + data = g_memory[addr] + for j in range(0, len(data), wordsize): + f.write(",".join([f"{byte:02x}" for byte in data[j:j+wordsize][::-1]]) + ",\n") + i += 1 + while i < depth: + f.write(f"{defval},\n") + i += 1 + f.seek(f.tell() - 2, 0) # Remove the last comma + f.write(";\n") + +def main(): + parser = argparse.ArgumentParser(description="Binary to Xilinx COE File Converter") + parser.add_argument("--binary", help="Input binary file.") + parser.add_argument("--data", help="Input data file.") + parser.add_argument("--out", default="output.coe", help="Output file (optional).") + parser.add_argument("--wordsize", type=int, default=4, help="Word size in bytes (default 4).") + parser.add_argument("--depth", type=int, default=0, help="Address size (optional).") + parser.add_argument("--binaddr", type=int, default=0, help="Binary address (optional).") + parser.add_argument("--default", default="00", help="Default hex value as string (optional).") + + args = parser.parse_args() + + depth = max( + process_binary(args.binary, args.wordsize, args.binaddr) if args.binary else 0, + process_data(args.data, args.wordsize) if args.data else 0, + args.depth + ) + + write_coe(args.out, args.wordsize, depth, args.default) + +if __name__ == "__main__": + main() diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/test/Makefile index bf950b4ed2..5b6a76da36 100644 --- a/hw/syn/xilinx/test/Makefile +++ b/hw/syn/xilinx/test/Makefile @@ -10,6 +10,8 @@ DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts +KERNEL ?= fibonacci + # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) @@ -32,17 +34,27 @@ CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build +$(KERNEL).bin: + $(MAKE) -C $(ROOT_DIR)/kernel clean + STACK_BASE_ADDR=0xFF000 $(MAKE) -C $(ROOT_DIR)/kernel + $(MAKE) -C $(ROOT_DIR)/tests/kernel/$(KERNEL) clean + STARTUP_ADDR=0x8000 $(MAKE) -C $(ROOT_DIR)/tests/kernel/$(KERNEL) + cp $(ROOT_DIR)/tests/kernel/$(KERNEL)/$(KERNEL).bin $(KERNEL).bin + +kernel.bin.coe: $(KERNEL).bin + $(SCRIPT_DIR)/bin2coe.py --out=$@ --binary=$(KERNEL).bin --binaddr=8192 --depth=16384 --wordsize=64 + gen-sources: project_1/sources.txt project_1/sources.txt: mkdir -p project_1 $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt -build: project_1/vortex.xpr -project_1/vortex.xpr: project_1/sources.txt project.tcl +build: project_1/project_1.xpr +project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project.tcl $(VIVADO) -mode batch -source project.tcl -tclargs project_1/sources.txt project_1/src $(SCRIPT_DIR) -run: project_1/vortex.xpr - $(VIVADO) project_1/vortex.xpr & +run: project_1/project_1.xpr + $(VIVADO) project_1/project_1.xpr & clean: - rm -rf project_1 + rm -rf project_1 $(KERNEL).bin kernel.bin.coe diff --git a/hw/syn/xilinx/test/kernel/Makefile b/hw/syn/xilinx/test/kernel/Makefile deleted file mode 100644 index 9f3b95c1a1..0000000000 --- a/hw/syn/xilinx/test/kernel/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -ROOT_DIR := $(realpath ../../../../..) -include $(ROOT_DIR)/config.mk - -ifeq ($(XLEN),64) -CFLAGS += -march=rv64imafd -mabi=lp64d -else -CFLAGS += -march=rv32imaf -mabi=ilp32f -endif - -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/test/kernel - -SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts - -BIN2COE_PATH ?= $(SCRIPT_DIR)/bin2coe - -CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc -AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar -DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump -CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy - -CFLAGS += -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(VORTEX_HOME)/hw - -LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 - -PROJECT = kernel - -SRCS = $(SRC_DIR)/main.c $(SRC_DIR)/start.S - -all: $(PROJECT).elf $(PROJECT).hex $(PROJECT).bin $(PROJECT).dump $(PROJECT).bin.coe - -$(PROJECT).dump: $(PROJECT).elf - $(DP) -D $< > $@ - -$(PROJECT).hex: $(PROJECT).elf - $(CP) -O ihex $< $@ - -$(PROJECT).bin: $(PROJECT).elf - $(CP) -O binary $< $@ - -$(PROJECT).bin.coe: $(PROJECT).bin - $(BIN2COE_PATH)/bin2coe $< --out=$@ --binary=$(PROJECT).bin --data=$(PROJECT).dat --binaddr=8192 --depth=16384 --wordsize=64 - -$(PROJECT).elf: $(SRCS) - $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -.depend: $(SRCS) - $(CC) $(CFLAGS) -MM $^ > .depend; - -clean: - rm -rf *.bin *.elf *.hex *.dump *.coe .depend diff --git a/hw/syn/xilinx/test/kernel/kernel.dat b/hw/syn/xilinx/test/kernel/kernel.dat deleted file mode 100644 index 6e197b719f..0000000000 --- a/hw/syn/xilinx/test/kernel/kernel.dat +++ /dev/null @@ -1,3 +0,0 @@ -@1 -000000C00000008000000002, -00000003000000020000000100000000, \ No newline at end of file diff --git a/hw/syn/xilinx/test/kernel/main.c b/hw/syn/xilinx/test/kernel/main.c deleted file mode 100644 index 4fcfd99c0a..0000000000 --- a/hw/syn/xilinx/test/kernel/main.c +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -typedef struct { - uint32_t count; - uint32_t src_addr; - uint32_t dst_addr; -} kernel_arg_t; - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)csr_read(VX_CSR_MSCRATCH); - uint32_t count = arg->count; - int32_t* src_ptr = (int32_t*)arg->src_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - uint32_t offset = vx_core_id() * count; - - for (uint32_t i = 0; i < count; ++i) { - dst_ptr[offset + i] = src_ptr[offset + i]; - } - - return 0; -} diff --git a/hw/syn/xilinx/test/kernel/start.S b/hw/syn/xilinx/test/kernel/start.S deleted file mode 100644 index e9295d6433..0000000000 --- a/hw/syn/xilinx/test/kernel/start.S +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -.section .init, "ax" -.global _start -.type _start, @function -_start: - # call main routine - call main - - # end execution - .insn r 0x0b, 0, 0, x0, x0, x0 -.size _start, .-_start \ No newline at end of file diff --git a/hw/syn/xilinx/test/project.tcl.in b/hw/syn/xilinx/test/project.tcl.in index 61ee634648..45f9a9104c 100644 --- a/hw/syn/xilinx/test/project.tcl.in +++ b/hw/syn/xilinx/test/project.tcl.in @@ -398,7 +398,7 @@ set_property -dict [ list \ CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/project_1_files/kernel.bin.coe} \ + CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/kernel.bin.coe} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_top.v b/hw/syn/xilinx/test/project_1_files/Vortex_top.v index a7adf71bc6..cd634b9b67 100644 --- a/hw/syn/xilinx/test/project_1_files/Vortex_top.v +++ b/hw/syn/xilinx/test/project_1_files/Vortex_top.v @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,7 +22,7 @@ module Vortex_top #( input wire clk, input wire reset, - // AXI4 memory interface + // AXI4 memory interface output wire m_axi_mem_awvalid, input wire m_axi_mem_awready, output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr, @@ -68,141 +68,55 @@ module Vortex_top #( output wire busy ); - wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_awid_a [C_M_AXI_MEM_NUM_BANKS]; - wire [7:0] m_axi_mem_awlen_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_awsize_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_awburst_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_awlock_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_awcache_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_awprot_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_awqos_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_wvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_wready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_mem_wdata_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_arvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_arready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_mem_arid_a [C_M_AXI_MEM_NUM_BANKS]; - wire [7:0] m_axi_mem_arlen_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_arsize_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_arburst_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_arlock_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_arcache_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_arprot_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_arqos_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_rvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_rready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_mem_rdata_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_rlast_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_rid_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_bvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_bready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_bresp_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_bid_a [C_M_AXI_MEM_NUM_BANKS]; - - assign m_axi_mem_awvalid = m_axi_mem_awvalid_a[0]; - assign m_axi_mem_awready_a[0] = m_axi_mem_awready; - assign m_axi_mem_awaddr = m_axi_mem_awaddr_a[0]; - assign m_axi_mem_awid = m_axi_mem_awid_a[0]; - assign m_axi_mem_awlen = m_axi_mem_awlen_a[0]; - assign m_axi_mem_awsize = m_axi_mem_awsize_a[0]; - assign m_axi_mem_awburst = m_axi_mem_awburst_a[0]; - assign m_axi_mem_awlock = m_axi_mem_awlock_a[0]; - assign m_axi_mem_awcache = m_axi_mem_awcache_a[0]; - assign m_axi_mem_awprot = m_axi_mem_awprot_a[0]; - assign m_axi_mem_awqos = m_axi_mem_awqos_a[0]; - - assign m_axi_mem_wvalid = m_axi_mem_wvalid_a[0]; - assign m_axi_mem_wready_a[0] = m_axi_mem_wready; - assign m_axi_mem_wdata = m_axi_mem_wdata_a[0]; - assign m_axi_mem_wstrb = m_axi_mem_wstrb_a[0]; - assign m_axi_mem_wlast = m_axi_mem_wlast_a[0]; - - assign m_axi_mem_arvalid = m_axi_mem_arvalid_a[0]; - assign m_axi_mem_arready_a[0] = m_axi_mem_arready; - assign m_axi_mem_araddr = m_axi_mem_araddr_a[0]; - assign m_axi_mem_arid = m_axi_mem_arid_a[0]; - assign m_axi_mem_arlen = m_axi_mem_arlen_a[0]; - assign m_axi_mem_arsize = m_axi_mem_arsize_a[0]; - assign m_axi_mem_arburst = m_axi_mem_arburst_a[0]; - assign m_axi_mem_arlock = m_axi_mem_arlock_a[0]; - assign m_axi_mem_arcache = m_axi_mem_arcache_a[0]; - assign m_axi_mem_arprot = m_axi_mem_arprot_a[0]; - assign m_axi_mem_arqos = m_axi_mem_arqos_a[0]; - - assign m_axi_mem_rvalid_a[0] = m_axi_mem_rvalid; - assign m_axi_mem_rready = m_axi_mem_rready_a[0]; - assign m_axi_mem_rdata_a[0] = m_axi_mem_rdata; - assign m_axi_mem_rlast_a[0] = m_axi_mem_rlast; - assign m_axi_mem_rid_a[0] = m_axi_mem_rid; - assign m_axi_mem_rresp_a[0] = m_axi_mem_rresp; - - assign m_axi_mem_bvalid_a[0] = m_axi_mem_bvalid; - assign m_axi_mem_bready = m_axi_mem_bready_a[0]; - assign m_axi_mem_bresp_a[0] = m_axi_mem_bresp; - assign m_axi_mem_bid_a[0] = m_axi_mem_bid; - - Vortex_axi #( - .AXI_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH), - .AXI_ADDR_WIDTH (C_M_AXI_GMEM_ADDR_WIDTH), - .AXI_TID_WIDTH (C_M_AXI_GMEM_ID_WIDTH) - ) inst ( - .clk (clk), - .reset (reset), - - .m_axi_awvalid (m_axi_mem_awvalid_a), - .m_axi_awready (m_axi_mem_awready_a), - .m_axi_awaddr (m_axi_mem_awaddr_a), - .m_axi_awid (m_axi_mem_awid_a), - .m_axi_awlen (m_axi_mem_awlen_a), - .m_axi_awsize (m_axi_mem_awsize_a), - .m_axi_awburst (m_axi_mem_awburst_a), - .m_axi_awlock (m_axi_mem_awlock_a), - .m_axi_awcache (m_axi_mem_awcache_a), - .m_axi_awprot (m_axi_mem_awprot_a), - .m_axi_awqos (m_axi_mem_awqos_a), - - .m_axi_wvalid (m_axi_mem_wvalid_a), - .m_axi_wready (m_axi_mem_wready_a), - .m_axi_wdata (m_axi_mem_wdata_a), - .m_axi_wstrb (m_axi_mem_wstrb_a), - .m_axi_wlast (m_axi_mem_wlast_a), - - .m_axi_bvalid (m_axi_mem_bvalid_a), - .m_axi_bready (m_axi_mem_bready_a), - .m_axi_bid (m_axi_mem_bid_a), - .m_axi_bresp (m_axi_mem_bresp_a), - - .m_axi_arvalid (m_axi_mem_arvalid_a), - .m_axi_arready (m_axi_mem_arready_a), - .m_axi_araddr (m_axi_mem_araddr_a), - .m_axi_arid (m_axi_mem_arid_a), - .m_axi_arlen (m_axi_mem_arlen_a), - .m_axi_arsize (m_axi_mem_arsize_a), - .m_axi_arburst (m_axi_mem_arburst_a), - .m_axi_arlock (m_axi_mem_arlock_a), - .m_axi_arcache (m_axi_mem_arcache_a), - .m_axi_arprot (m_axi_mem_arprot_a), - .m_axi_arqos (m_axi_mem_arqos_a), - - .m_axi_rvalid (m_axi_mem_rvalid_a), - .m_axi_rready (m_axi_mem_rready_a), - .m_axi_rdata (m_axi_mem_rdata_a), - .m_axi_rid (m_axi_mem_rid_a), - .m_axi_rresp (m_axi_mem_rresp_a), - .m_axi_rlast (m_axi_mem_rlast_a), - - .dcr_wr_valid (dcr_wr_valid), - .dcr_wr_addr (dcr_wr_addr), - .dcr_wr_data (dcr_wr_data), - - .busy (busy) + Vortex_wrap #( + .C_M_AXI_GMEM_DATA_WIDTH(C_M_AXI_GMEM_DATA_WIDTH), + .C_M_AXI_GMEM_ADDR_WIDTH(C_M_AXI_GMEM_ADDR_WIDTH), + .C_M_AXI_GMEM_ID_WIDTH(C_M_AXI_GMEM_ID_WIDTH), + .C_M_AXI_MEM_NUM_BANKS(C_M_AXI_MEM_NUM_BANKS) + ) wrapper ( + .clk(clk), + .reset(reset), + .m_axi_mem_awvalid(m_axi_mem_awvalid), + .m_axi_mem_awready(m_axi_mem_awready), + .m_axi_mem_awaddr(m_axi_mem_awaddr), + .m_axi_mem_awid(m_axi_mem_awid), + .m_axi_mem_awlen(m_axi_mem_awlen), + .m_axi_mem_awsize(m_axi_mem_awsize), + .m_axi_mem_awburst(m_axi_mem_awburst), + .m_axi_mem_awlock(m_axi_mem_awlock), + .m_axi_mem_awcache(m_axi_mem_awcache), + .m_axi_mem_awprot(m_axi_mem_awprot), + .m_axi_mem_awqos(m_axi_mem_awqos), + .m_axi_mem_wvalid(m_axi_mem_wvalid), + .m_axi_mem_wready(m_axi_mem_wready), + .m_axi_mem_wdata(m_axi_mem_wdata), + .m_axi_mem_wstrb(m_axi_mem_wstrb), + .m_axi_mem_wlast(m_axi_mem_wlast), + .m_axi_mem_arvalid(m_axi_mem_arvalid), + .m_axi_mem_arready(m_axi_mem_arready), + .m_axi_mem_araddr(m_axi_mem_araddr), + .m_axi_mem_arid(m_axi_mem_arid), + .m_axi_mem_arlen(m_axi_mem_arlen), + .m_axi_mem_arsize(m_axi_mem_arsize), + .m_axi_mem_arburst(m_axi_mem_arburst), + .m_axi_mem_arlock(m_axi_mem_arlock), + .m_axi_mem_arcache(m_axi_mem_arcache), + .m_axi_mem_arprot(m_axi_mem_arprot), + .m_axi_mem_arqos(m_axi_mem_arqos), + .m_axi_mem_rvalid(m_axi_mem_rvalid), + .m_axi_mem_rready(m_axi_mem_rready), + .m_axi_mem_rdata(m_axi_mem_rdata), + .m_axi_mem_rlast(m_axi_mem_rlast), + .m_axi_mem_rid(m_axi_mem_rid), + .m_axi_mem_rresp(m_axi_mem_rresp), + .m_axi_mem_bvalid(m_axi_mem_bvalid), + .m_axi_mem_bready(m_axi_mem_bready), + .m_axi_mem_bresp(m_axi_mem_bresp), + .m_axi_mem_bid(m_axi_mem_bid), + .dcr_wr_valid(dcr_wr_valid), + .dcr_wr_addr(dcr_wr_addr), + .dcr_wr_data(dcr_wr_data), + .busy(busy) ); - + endmodule diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv b/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv new file mode 100644 index 0000000000..5ec7a868e0 --- /dev/null +++ b/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv @@ -0,0 +1,208 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module Vortex_wrap #( + parameter C_M_AXI_GMEM_DATA_WIDTH = 512, + parameter C_M_AXI_GMEM_ADDR_WIDTH = `XLEN, + parameter C_M_AXI_GMEM_ID_WIDTH = 32, + parameter C_M_AXI_MEM_NUM_BANKS = 1 +) ( + input wire clk, + input wire reset, + + // AXI4 memory interface + output wire m_axi_mem_awvalid, + input wire m_axi_mem_awready, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr, + output wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_awid, + output wire [7:0] m_axi_mem_awlen, + output wire [2:0] m_axi_mem_awsize, + output wire [1:0] m_axi_mem_awburst, + output wire [1:0] m_axi_mem_awlock, + output wire [3:0] m_axi_mem_awcache, + output wire [2:0] m_axi_mem_awprot, + output wire [3:0] m_axi_mem_awqos, + output wire m_axi_mem_wvalid, + input wire m_axi_mem_wready, + output wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_mem_wdata, + output wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb, + output wire m_axi_mem_wlast, + output wire m_axi_mem_arvalid, + input wire m_axi_mem_arready, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_araddr, + output wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_mem_arid, + output wire [7:0] m_axi_mem_arlen, + output wire [2:0] m_axi_mem_arsize, + output wire [1:0] m_axi_mem_arburst, + output wire [1:0] m_axi_mem_arlock, + output wire [3:0] m_axi_mem_arcache, + output wire [2:0] m_axi_mem_arprot, + output wire [3:0] m_axi_mem_arqos, + input wire m_axi_mem_rvalid, + output wire m_axi_mem_rready, + input wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_mem_rdata, + input wire m_axi_mem_rlast, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_rid, + input wire [1:0] m_axi_mem_rresp, + input wire m_axi_mem_bvalid, + output wire m_axi_mem_bready, + input wire [1:0] m_axi_mem_bresp, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_bid, + + input wire dcr_wr_valid, + input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, + input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, + + output wire busy +); + + wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_awid_a [C_M_AXI_MEM_NUM_BANKS]; + wire [7:0] m_axi_mem_awlen_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_awsize_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_awburst_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_awlock_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_awcache_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_awprot_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_awqos_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_wvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_wready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_mem_wdata_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_arvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_arready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_mem_arid_a [C_M_AXI_MEM_NUM_BANKS]; + wire [7:0] m_axi_mem_arlen_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_arsize_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_arburst_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_arlock_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_arcache_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_arprot_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_arqos_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_rvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_rready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_mem_rdata_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_rlast_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_rid_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_bvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_bready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_bresp_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_bid_a [C_M_AXI_MEM_NUM_BANKS]; + + assign m_axi_mem_awvalid = m_axi_mem_awvalid_a[0]; + assign m_axi_mem_awready_a[0] = m_axi_mem_awready; + assign m_axi_mem_awaddr = m_axi_mem_awaddr_a[0]; + assign m_axi_mem_awid = m_axi_mem_awid_a[0]; + assign m_axi_mem_awlen = m_axi_mem_awlen_a[0]; + assign m_axi_mem_awsize = m_axi_mem_awsize_a[0]; + assign m_axi_mem_awburst = m_axi_mem_awburst_a[0]; + assign m_axi_mem_awlock = m_axi_mem_awlock_a[0]; + assign m_axi_mem_awcache = m_axi_mem_awcache_a[0]; + assign m_axi_mem_awprot = m_axi_mem_awprot_a[0]; + assign m_axi_mem_awqos = m_axi_mem_awqos_a[0]; + + assign m_axi_mem_wvalid = m_axi_mem_wvalid_a[0]; + assign m_axi_mem_wready_a[0] = m_axi_mem_wready; + assign m_axi_mem_wdata = m_axi_mem_wdata_a[0]; + assign m_axi_mem_wstrb = m_axi_mem_wstrb_a[0]; + assign m_axi_mem_wlast = m_axi_mem_wlast_a[0]; + + assign m_axi_mem_arvalid = m_axi_mem_arvalid_a[0]; + assign m_axi_mem_arready_a[0] = m_axi_mem_arready; + assign m_axi_mem_araddr = m_axi_mem_araddr_a[0]; + assign m_axi_mem_arid = m_axi_mem_arid_a[0]; + assign m_axi_mem_arlen = m_axi_mem_arlen_a[0]; + assign m_axi_mem_arsize = m_axi_mem_arsize_a[0]; + assign m_axi_mem_arburst = m_axi_mem_arburst_a[0]; + assign m_axi_mem_arlock = m_axi_mem_arlock_a[0]; + assign m_axi_mem_arcache = m_axi_mem_arcache_a[0]; + assign m_axi_mem_arprot = m_axi_mem_arprot_a[0]; + assign m_axi_mem_arqos = m_axi_mem_arqos_a[0]; + + assign m_axi_mem_rvalid_a[0] = m_axi_mem_rvalid; + assign m_axi_mem_rready = m_axi_mem_rready_a[0]; + assign m_axi_mem_rdata_a[0] = m_axi_mem_rdata; + assign m_axi_mem_rlast_a[0] = m_axi_mem_rlast; + assign m_axi_mem_rid_a[0] = m_axi_mem_rid; + assign m_axi_mem_rresp_a[0] = m_axi_mem_rresp; + + assign m_axi_mem_bvalid_a[0] = m_axi_mem_bvalid; + assign m_axi_mem_bready = m_axi_mem_bready_a[0]; + assign m_axi_mem_bresp_a[0] = m_axi_mem_bresp; + assign m_axi_mem_bid_a[0] = m_axi_mem_bid; + + Vortex_axi #( + .AXI_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH), + .AXI_ADDR_WIDTH (C_M_AXI_GMEM_ADDR_WIDTH), + .AXI_TID_WIDTH (C_M_AXI_GMEM_ID_WIDTH) + ) inst ( + .clk (clk), + .reset (reset), + + .m_axi_awvalid (m_axi_mem_awvalid_a), + .m_axi_awready (m_axi_mem_awready_a), + .m_axi_awaddr (m_axi_mem_awaddr_a), + .m_axi_awid (m_axi_mem_awid_a), + .m_axi_awlen (m_axi_mem_awlen_a), + .m_axi_awsize (m_axi_mem_awsize_a), + .m_axi_awburst (m_axi_mem_awburst_a), + .m_axi_awlock (m_axi_mem_awlock_a), + .m_axi_awcache (m_axi_mem_awcache_a), + .m_axi_awprot (m_axi_mem_awprot_a), + .m_axi_awqos (m_axi_mem_awqos_a), + + .m_axi_wvalid (m_axi_mem_wvalid_a), + .m_axi_wready (m_axi_mem_wready_a), + .m_axi_wdata (m_axi_mem_wdata_a), + .m_axi_wstrb (m_axi_mem_wstrb_a), + .m_axi_wlast (m_axi_mem_wlast_a), + + .m_axi_bvalid (m_axi_mem_bvalid_a), + .m_axi_bready (m_axi_mem_bready_a), + .m_axi_bid (m_axi_mem_bid_a), + .m_axi_bresp (m_axi_mem_bresp_a), + + .m_axi_arvalid (m_axi_mem_arvalid_a), + .m_axi_arready (m_axi_mem_arready_a), + .m_axi_araddr (m_axi_mem_araddr_a), + .m_axi_arid (m_axi_mem_arid_a), + .m_axi_arlen (m_axi_mem_arlen_a), + .m_axi_arsize (m_axi_mem_arsize_a), + .m_axi_arburst (m_axi_mem_arburst_a), + .m_axi_arlock (m_axi_mem_arlock_a), + .m_axi_arcache (m_axi_mem_arcache_a), + .m_axi_arprot (m_axi_mem_arprot_a), + .m_axi_arqos (m_axi_mem_arqos_a), + + .m_axi_rvalid (m_axi_mem_rvalid_a), + .m_axi_rready (m_axi_mem_rready_a), + .m_axi_rdata (m_axi_mem_rdata_a), + .m_axi_rid (m_axi_mem_rid_a), + .m_axi_rresp (m_axi_mem_rresp_a), + .m_axi_rlast (m_axi_mem_rlast_a), + + .dcr_wr_valid (dcr_wr_valid), + .dcr_wr_addr (dcr_wr_addr), + .dcr_wr_data (dcr_wr_data), + + .busy (busy) + ); + +endmodule diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index 050b1b48d2..b02454412a 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -8,6 +8,8 @@ endif VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel +STARTUP_ADDR ?= 0x80000000 + LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-divergence=0 @@ -31,7 +33,7 @@ CFLAGS += -DXLEN_$(XLEN) -DNDEBUG LIBC_LIB += -L$(LIBC_VORTEX)/lib -lm -lc LIBC_LIB += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a -LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(VORTEX_KN_PATH)/libvortex.a $(LIBC_LIB) +LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(LIBC_LIB) all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump From 693a9f648d41a5ee76755f924e0576b215816f7d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 19 Aug 2024 18:25:38 -0700 Subject: [PATCH 067/407] Ci script update --- ci/system_updates.sh | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/ci/system_updates.sh b/ci/system_updates.sh index 43abbe5ab4..a62ed253be 100755 --- a/ci/system_updates.sh +++ b/ci/system_updates.sh @@ -16,12 +16,31 @@ set -e +# Function to check if GCC version is less than 11 +check_gcc_version() { + local gcc_version + gcc_version=$(gcc -dumpversion) + if dpkg --compare-versions "$gcc_version" lt 11; then + return 0 # GCC version is less than 11 + else + return 1 # GCC version is 11 or greater + fi +} + +# Update package list apt-get update -y -add-apt-repository -y ppa:ubuntu-toolchain-r/test -apt-get update -apt-get install -y g++-11 gcc-11 -update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 -update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 +# install system dependencies +apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache -apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache +# Check and install GCC 11 if necessary +if check_gcc_version; then + echo "GCC version is less than 11. Installing GCC 11..." + add-apt-repository -y ppa:ubuntu-toolchain-r/test + apt-get update + apt-get install -y g++-11 gcc-11 + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 +else + echo "GCC version is 11 or greater. No need to install GCC 11." +fi From 5e241c153c9e0d7e3e29eae596c47ed5805c7fbd Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 19 Aug 2024 18:36:37 -0700 Subject: [PATCH 068/407] Ci script update --- .github/workflows/ci.yml | 6 +++--- README.md | 13 +++++-------- ci/{system_updates.sh => install_dependencies.sh} | 0 miscs/docker/Dockerfile.ubuntu | 2 +- 4 files changed, 9 insertions(+), 12 deletions(-) rename ci/{system_updates.sh => install_dependencies.sh} (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f49dd42bf4..724ec2a138 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,7 +46,7 @@ jobs: - name: Install Dependencies if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true' run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Setup Toolchain if: steps.cache-toolchain.outputs.cache-hit != 'true' @@ -75,7 +75,7 @@ jobs: - name: Install Dependencies run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Cache Toolchain Directory id: cache-toolchain @@ -126,7 +126,7 @@ jobs: - name: Install Dependencies run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Cache Toolchain Directory id: cache-toolchain diff --git a/README.md b/README.md index d789d00bd9..d4ed68a590 100644 --- a/README.md +++ b/README.md @@ -44,19 +44,16 @@ More detailed build instructions can be found [here](docs/install_vortex.md). - [Ramulator](https://github.com/CMU-SAFARI/ramulator.git) - [Yosys](https://github.com/YosysHQ/yosys) - [Sv2v](https://github.com/zachjs/sv2v) -### Install development tools -```sh -sudo apt-get install build-essential -sudo apt-get install binutils -sudo apt-get install python -sudo apt-get install uuid-dev -sudo apt-get install git -``` ### Install Vortex codebase ```sh git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git cd vortex ``` +### Install system dependencies +```sh +# ensure dependent libraries are present +sudo ./ci/install_dependencies.sh +``` ### Configure your build folder ```sh mkdir build diff --git a/ci/system_updates.sh b/ci/install_dependencies.sh similarity index 100% rename from ci/system_updates.sh rename to ci/install_dependencies.sh diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.ubuntu index c3e72a0f47..f3a864ce53 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.ubuntu @@ -39,7 +39,7 @@ RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /v WORKDIR /vortex # install system dependencies -RUN ./ci/system_updates.sh +RUN ./ci/install_dependencies.sh # Configure the build folder RUN mkdir build && cd build && ../configure From 005d480bb459a6dda8419918caf921a49ac3a07e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Aug 2024 23:30:44 -0700 Subject: [PATCH 069/407] minor updates --- hw/rtl/fpu/VX_fpu_dsp.sv | 32 ++++++++++---------- hw/rtl/libs/VX_pe_serializer.sv | 2 +- hw/rtl/libs/VX_pipe_buffer.sv | 7 +++-- hw/rtl/libs/VX_sp_ram.sv | 2 ++ hw/rtl/mem/VX_lmem_switch.sv | 52 ++++++++++++++++----------------- 5 files changed, 49 insertions(+), 46 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 967bbbc29f..2e479976a1 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -51,20 +51,20 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam FPU_DIVSQRT = 1; localparam FPU_CVT = 2; localparam FPU_NCP = 3; - localparam NUM_FPC = 4; - localparam FPC_BITS = `LOG2UP(NUM_FPC); + localparam NUM_FPCORES = 4; + localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) - wire [NUM_FPC-1:0] per_core_ready_in; - wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result; - wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out; - wire [NUM_FPC-1:0] per_core_ready_out; - wire [NUM_FPC-1:0] per_core_valid_out; - wire [NUM_FPC-1:0] per_core_has_fflags; - fflags_t [NUM_FPC-1:0] per_core_fflags; + wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; + wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; + wire [NUM_FPCORES-1:0] per_core_ready_out; + wire [NUM_FPCORES-1:0] per_core_valid_out; + wire [NUM_FPCORES-1:0] per_core_has_fflags; + fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire div_ready_in, sqrt_ready_in; wire [NUM_LANES-1:0][31:0] div_result, sqrt_result; @@ -74,7 +74,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire div_has_fflags, sqrt_has_fflags; fflags_t div_fflags, sqrt_fflags; - reg [FPC_BITS-1:0] core_select; + reg [FPCORES_BITS-1:0] core_select; reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed; always @(*) begin @@ -122,6 +122,9 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (datab) `UNUSED_VAR (datac) + // can accept new request? + assign ready_in = per_core_ready_in[core_select]; + VX_fpu_fma #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) @@ -272,10 +275,10 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out; + reg [NUM_FPCORES-1:0][RSP_DATAW+2-1:0] per_core_data_out; always @(*) begin - for (integer i = 0; i < NUM_FPC; ++i) begin + for (integer i = 0; i < NUM_FPCORES; ++i) begin per_core_data_out[i][RSP_DATAW+1:2] = { per_core_result[i], per_core_has_fflags[i], @@ -294,7 +297,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (op_ret_int_out) VX_stream_arb #( - .NUM_INPUTS (NUM_FPC), + .NUM_INPUTS (NUM_FPCORES), .DATAW (RSP_DATAW + 2), .ARBITER ("R"), .OUT_BUF (OUT_BUF) @@ -326,9 +329,6 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `endif end - // can accept new request? - assign ready_in = per_core_ready_in[core_select]; - endmodule `endif diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index eac1eddcb7..4e3a291328 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -137,7 +137,7 @@ module VX_pe_serializer #( assign pe_data_in_s = data_in; - assign enable = ready_out_u || ~valid_out_u; + assign enable = ready_out_u || ~valid_out_s; assign ready_in = enable; assign pe_enable = enable; diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv index 167235c178..6ed6cf8eca 100644 --- a/hw/rtl/libs/VX_pipe_buffer.sv +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -24,8 +24,9 @@ `TRACING_OFF module VX_pipe_buffer #( - parameter DATAW = 1, - parameter DEPTH = 1 + parameter DATAW = 1, + parameter RESETW = 0, + parameter DEPTH = 1 ) ( input wire clk, input wire reset, @@ -57,7 +58,7 @@ module VX_pipe_buffer #( assign ready[i] = (ready[i+1] || ~valid[i+1]); VX_pipe_register #( .DATAW (1 + DATAW), - .RESETW (1) + .RESETW (1 + RESETW) ) pipe_register ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 4ab2a9b7a4..3e73a013fd 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -24,6 +24,7 @@ module VX_sp_ram #( parameter RW_ASSERT = 0, parameter LUTRAM = 0, parameter RESET_RAM = 0, + parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -48,6 +49,7 @@ module VX_sp_ram #( .RW_ASSERT (RW_ASSERT), .LUTRAM (LUTRAM), .RESET_RAM (RESET_RAM), + .READ_ENABLE (READ_ENABLE), .INIT_ENABLE (INIT_ENABLE), .INIT_FILE (INIT_FILE), .INIT_VALUE (INIT_VALUE), diff --git a/hw/rtl/mem/VX_lmem_switch.sv b/hw/rtl/mem/VX_lmem_switch.sv index 628190a8dd..6429077857 100644 --- a/hw/rtl/mem/VX_lmem_switch.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -22,8 +22,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( input wire clk, input wire reset, VX_lsu_mem_if.slave lsu_in_if, - VX_lsu_mem_if.master cache_out_if, - VX_lsu_mem_if.master lmem_out_if + VX_lsu_mem_if.master global_out_if, + VX_lsu_mem_if.master local_out_if ); localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; @@ -60,17 +60,17 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( lsu_in_if.req_data.tag }), .ready_in (req_global_ready), - .valid_out (cache_out_if.req_valid), + .valid_out (global_out_if.req_valid), .data_out ({ - cache_out_if.req_data.mask, - cache_out_if.req_data.rw, - cache_out_if.req_data.addr, - cache_out_if.req_data.data, - cache_out_if.req_data.byteen, - cache_out_if.req_data.flags, - cache_out_if.req_data.tag + global_out_if.req_data.mask, + global_out_if.req_data.rw, + global_out_if.req_data.addr, + global_out_if.req_data.data, + global_out_if.req_data.byteen, + global_out_if.req_data.flags, + global_out_if.req_data.tag }), - .ready_out (cache_out_if.req_ready) + .ready_out (global_out_if.req_ready) ); VX_elastic_buffer #( @@ -91,17 +91,17 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( lsu_in_if.req_data.tag }), .ready_in (req_local_ready), - .valid_out (lmem_out_if.req_valid), + .valid_out (local_out_if.req_valid), .data_out ({ - lmem_out_if.req_data.mask, - lmem_out_if.req_data.rw, - lmem_out_if.req_data.addr, - lmem_out_if.req_data.data, - lmem_out_if.req_data.byteen, - lmem_out_if.req_data.flags, - lmem_out_if.req_data.tag + local_out_if.req_data.mask, + local_out_if.req_data.rw, + local_out_if.req_data.addr, + local_out_if.req_data.data, + local_out_if.req_data.byteen, + local_out_if.req_data.flags, + local_out_if.req_data.tag }), - .ready_out (lmem_out_if.req_ready) + .ready_out (local_out_if.req_ready) ); VX_stream_arb #( @@ -113,16 +113,16 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in ({ - lmem_out_if.rsp_valid, - cache_out_if.rsp_valid + local_out_if.rsp_valid, + global_out_if.rsp_valid }), .ready_in ({ - lmem_out_if.rsp_ready, - cache_out_if.rsp_ready + local_out_if.rsp_ready, + global_out_if.rsp_ready }), .data_in ({ - lmem_out_if.rsp_data, - cache_out_if.rsp_data + local_out_if.rsp_data, + global_out_if.rsp_data }), .data_out (lsu_in_if.rsp_data), .valid_out (lsu_in_if.rsp_valid), From 771a10ea0ccef0090a0d223fc73ad38a4cdfc30d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Aug 2024 23:31:16 -0700 Subject: [PATCH 070/407] minor update --- hw/rtl/core/VX_mem_unit.sv | 61 +++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 4f94c2765b..6569c1d472 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -55,8 +55,8 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .lsu_in_if (lsu_mem_in_if[i]), - .cache_out_if (lsu_dcache_if[i]), - .lmem_out_if (lsu_lmem_if[i]) + .global_out_if(lsu_dcache_if[i]), + .local_out_if (lsu_lmem_if[i]) ); end @@ -65,7 +65,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapter_slices VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) @@ -123,15 +123,15 @@ module VX_mem_unit import VX_gpu_pkg::*; #( `endif - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks + VX_lsu_mem_if #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); - VX_lsu_mem_if #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_coalesced_if(); + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks `RESET_RELAY (mem_coalescer_reset, reset); @@ -168,30 +168,35 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .in_rsp_ready (lsu_dcache_if[i].rsp_ready), // Output request - .out_req_valid (dcache_coalesced_if.req_valid), - .out_req_mask (dcache_coalesced_if.req_data.mask), - .out_req_rw (dcache_coalesced_if.req_data.rw), - .out_req_byteen (dcache_coalesced_if.req_data.byteen), - .out_req_addr (dcache_coalesced_if.req_data.addr), - .out_req_flags (dcache_coalesced_if.req_data.flags), - .out_req_data (dcache_coalesced_if.req_data.data), - .out_req_tag (dcache_coalesced_if.req_data.tag), - .out_req_ready (dcache_coalesced_if.req_ready), + .out_req_valid (dcache_coalesced_if[i].req_valid), + .out_req_mask (dcache_coalesced_if[i].req_data.mask), + .out_req_rw (dcache_coalesced_if[i].req_data.rw), + .out_req_byteen (dcache_coalesced_if[i].req_data.byteen), + .out_req_addr (dcache_coalesced_if[i].req_data.addr), + .out_req_flags (dcache_coalesced_if[i].req_data.flags), + .out_req_data (dcache_coalesced_if[i].req_data.data), + .out_req_tag (dcache_coalesced_if[i].req_data.tag), + .out_req_ready (dcache_coalesced_if[i].req_ready), // Output response - .out_rsp_valid (dcache_coalesced_if.rsp_valid), - .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), - .out_rsp_data (dcache_coalesced_if.rsp_data.data), - .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), - .out_rsp_ready (dcache_coalesced_if.rsp_ready) + .out_rsp_valid (dcache_coalesced_if[i].rsp_valid), + .out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask), + .out_rsp_data (dcache_coalesced_if[i].rsp_data.data), + .out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag), + .out_rsp_ready (dcache_coalesced_if[i].rsp_ready) ); + end - end else begin - - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); + end else begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); end + end + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapter_slices + VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), .TAG_WIDTH (DCACHE_TAG_WIDTH) @@ -208,7 +213,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ) dcache_adapter ( .clk (clk), .reset (reset), - .lsu_mem_if (dcache_coalesced_if), + .lsu_mem_if (dcache_coalesced_if[i]), .mem_bus_if (dcache_bus_tmp_if) ); From 9797c6c48aee4496498252426a1e79a02d8ac127 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 03:38:15 -0700 Subject: [PATCH 071/407] minor udpate --- hw/rtl/libs/VX_cyclic_arbiter.sv | 12 +++++++----- hw/rtl/libs/VX_priority_encoder.sv | 3 +-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index dc4de1300f..0b8fcedfee 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -40,17 +40,17 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; - wire [LOG_NUM_REQS-1:0] grant_index_um; + wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; end else if (grant_valid && grant_ready) begin - if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin + if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; end else begin - grant_index_r <= grant_index + LOG_NUM_REQS'(1); + grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); end end end @@ -64,8 +64,10 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); - assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; - assign grant_onehot = NUM_REQS'(1) << grant_index; + assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; + + assign grant_index = grant_index_ql; + assign grant_onehot = NUM_REQS'(1) << grant_index_ql; end diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 1d34f0e511..f96a07bb73 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -65,11 +65,10 @@ module VX_priority_encoder #( ) lzc ( .data_in (reversed), .data_out (index_out), - `UNUSED_PIN (valid_out) + .valid_out(valid_out) ); assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - assign valid_out = scan_lo[N-1]; end else if (MODEL == 2) begin From 177f0efc597850e229ffe0f3ca9c120463a27770 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 03:39:09 -0700 Subject: [PATCH 072/407] minor update --- hw/rtl/core/VX_operands.sv | 81 +++++++++++++++++++------------------- hw/rtl/mem/VX_local_mem.sv | 13 +++--- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 1b9c6f0109..f47b4964f3 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -59,18 +59,17 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; - wire [NUM_BANKS-1:0] gpr_rd_valid_st1; + wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; - wire pipe_valid_st1, pipe_ready_st1; + wire pipe_valid_st1, pipe_ready_st1, pipe_in_ready; wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2, src_data_m_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_m_st2; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; reg [NUM_SRC_OPDS-1:0] data_fetched_n; wire [NUM_SRC_OPDS-1:0] data_fetched_st1; @@ -123,15 +122,8 @@ module VX_operands import VX_gpu_pkg::*; #( .ready_out (gpr_rd_ready) ); - wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1; - assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}}; - assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; - - wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; - wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; - always @(*) begin has_collision_n = 0; for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin @@ -164,47 +156,54 @@ module VX_operands import VX_gpu_pkg::*; #( scoreboard_if.data.uuid }; - VX_pipe_register #( - .DATAW (1 + NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), - .RESETW (1 + NUM_SRC_OPDS) + assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; + + wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; + wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; + + VX_pipe_buffer #( + .DATAW (NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), + .RESETW (NUM_SRC_OPDS) ) pipe_reg1 ( .clk (clk), .reset (reset), - .enable (pipe_in_ready), - .data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), - .data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}) + .valid_in (scoreboard_if.valid), + .ready_in (pipe_in_ready), + .data_in ({data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), + .data_out ({data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), + .valid_out(pipe_valid_st1), + .ready_out(pipe_ready_st1) ); - assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2; - - always @(*) begin - gpr_rd_data_st1 = '0; - for (integer b = 0; b < NUM_BANKS; ++b) begin - if (gpr_rd_valid_st1[b]) begin - gpr_rd_data_st1[gpr_rd_req_idx_st1[b]] = gpr_rd_data[b]; - end - end - end - - assign src_data_m_st2 = src_data_st2 | gpr_rd_data_st2; - assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_m_st2; wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW - VX_pipe_register #( - .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_SRC_OPDS * REGS_DATAW + META_DATAW), - .RESETW (1 + NUM_SRC_OPDS * REGS_DATAW) + VX_pipe_buffer #( + .DATAW (NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), + .RESETW (NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), - .enable (pipe_ready_st1), - .data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_data_st1, pipe_data_st1}), - .data_out ({pipe_valid_st2, src_data_st2, gpr_rd_data_st2, pipe_data_st2}) + .valid_in (pipe_valid2_st1), + .ready_in (pipe_ready_st1), + .data_in ({src_data_st1, gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), + .data_out ({src_data_st2, gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), + .valid_out(pipe_valid_st2), + .ready_out(pipe_ready_st2) ); + always @(*) begin + src_data_m_st2 = src_data_st2; + for (integer b = 0; b < NUM_BANKS; ++b) begin + if (gpr_rd_valid_st2[b]) begin + src_data_m_st2[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b]; + end + end + end + VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -264,6 +263,8 @@ module VX_operands import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), + .READ_ENABLE (1), + .OUT_REG (1), .WRENW (BYTEENW), `ifdef GPR_RESET .RESET_RAM (1), @@ -278,7 +279,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data[b]) + .rdata (gpr_rd_data_st2[b]) ); end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index aff058cb96..72e55fe8be 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -163,12 +163,13 @@ module VX_local_mem import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_BANKS; ++i) begin wire bank_rsp_valid, bank_rsp_ready; - wire [WORD_WIDTH-1:0] bank_rsp_data; VX_sp_ram #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), + .READ_ENABLE (1), + .OUT_REG (1), .NO_RWCHECK (1) ) data_store ( .clk (clk), @@ -178,7 +179,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .wren (per_bank_req_byteen[i]), .addr (per_bank_req_addr[i]), .wdata (per_bank_req_data[i]), - .rdata (bank_rsp_data) + .rdata (per_bank_rsp_data[i]) ); // read-during-write hazard detection @@ -194,20 +195,20 @@ module VX_local_mem import VX_gpu_pkg::*; #( end wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr); - // drop write response and stall on read-during-write hazard + // drop write response assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard; assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard; // register BRAM output VX_pipe_buffer #( - .DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH) + .DATAW (REQ_SEL_WIDTH + TAG_WIDTH) ) bram_buf ( .clk (clk), .reset (reset), .valid_in (bank_rsp_valid), .ready_in (bank_rsp_ready), - .data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}), - .data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}), + .data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}), + .data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}), .valid_out (per_bank_rsp_valid[i]), .ready_out (per_bank_rsp_ready[i]) ); From 811ceb5dc086fc0041dd13ec7a0afcc37550810e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 13:00:05 -0700 Subject: [PATCH 073/407] minor update --- hw/rtl/libs/VX_priority_encoder.sv | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index f96a07bb73..2138ea457c 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -49,6 +49,27 @@ module VX_priority_encoder #( end else if (MODEL == 1) begin + `IGNORE_UNOPTFLAT_BEGIN + wire [N-1:0] higher_pri_regs; + `IGNORE_UNOPTFLAT_END + + assign higher_pri_regs[0] = 1'b0; + for (genvar i = 1; i < N; ++i) begin + assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; + end + assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; + + VX_lzc #( + .N (N), + .REVERSE (1) + ) lzc ( + .data_in (reversed), + .data_out (index_out), + .valid_out (valid_out) + ); + + end else if (MODEL == 2) begin + wire [N-1:0] scan_lo; VX_scan #( @@ -70,27 +91,6 @@ module VX_priority_encoder #( assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - end else if (MODEL == 2) begin - - `IGNORE_UNOPTFLAT_BEGIN - wire [N-1:0] higher_pri_regs; - `IGNORE_UNOPTFLAT_END - - assign higher_pri_regs[0] = 1'b0; - for (genvar i = 1; i < N; ++i) begin - assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; - end - assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; - - VX_lzc #( - .N (N), - .REVERSE (1) - ) lzc ( - .data_in (reversed), - .data_out (index_out), - .valid_out (valid_out) - ); - end else if (MODEL == 3) begin assign onehot_out = reversed & -reversed; From ca3499f3dfe3864c8cdc13119989b4f085565cab Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 17:54:30 -0700 Subject: [PATCH 074/407] minor update --- hw/syn/altera/{quartus => dut}/Makefile | 0 hw/syn/altera/{quartus => dut}/cache/Makefile | 0 hw/syn/altera/{quartus => dut}/common.mk | 0 hw/syn/altera/{quartus => dut}/core/Makefile | 0 hw/syn/altera/{quartus => dut}/fpu/Makefile | 0 hw/syn/altera/{quartus => dut}/issue/Makefile | 0 hw/syn/altera/{quartus => dut}/lmem/Makefile | 0 hw/syn/altera/{quartus => dut}/mem_unit/Makefile | 0 hw/syn/altera/{quartus => dut}/project.sdc | 0 hw/syn/altera/{quartus => dut}/project.tcl | 0 hw/syn/altera/{quartus => dut}/test/Makefile | 0 hw/syn/altera/{quartus => dut}/timing-html.tcl | 0 hw/syn/altera/{quartus => dut}/top/Makefile | 0 hw/syn/altera/{quartus => dut}/unittest/Makefile | 0 hw/syn/altera/{quartus => dut}/vortex/Makefile | 0 hw/syn/xilinx/{test => sandbox}/Makefile | 0 hw/syn/xilinx/{test => sandbox}/project.tcl.in | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_top.v | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_wrap.sv | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/kernel.bin.coe | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/testbench.v | 0 21 files changed, 0 insertions(+), 0 deletions(-) rename hw/syn/altera/{quartus => dut}/Makefile (100%) rename hw/syn/altera/{quartus => dut}/cache/Makefile (100%) rename hw/syn/altera/{quartus => dut}/common.mk (100%) rename hw/syn/altera/{quartus => dut}/core/Makefile (100%) rename hw/syn/altera/{quartus => dut}/fpu/Makefile (100%) rename hw/syn/altera/{quartus => dut}/issue/Makefile (100%) rename hw/syn/altera/{quartus => dut}/lmem/Makefile (100%) rename hw/syn/altera/{quartus => dut}/mem_unit/Makefile (100%) rename hw/syn/altera/{quartus => dut}/project.sdc (100%) rename hw/syn/altera/{quartus => dut}/project.tcl (100%) rename hw/syn/altera/{quartus => dut}/test/Makefile (100%) rename hw/syn/altera/{quartus => dut}/timing-html.tcl (100%) rename hw/syn/altera/{quartus => dut}/top/Makefile (100%) rename hw/syn/altera/{quartus => dut}/unittest/Makefile (100%) rename hw/syn/altera/{quartus => dut}/vortex/Makefile (100%) rename hw/syn/xilinx/{test => sandbox}/Makefile (100%) rename hw/syn/xilinx/{test => sandbox}/project.tcl.in (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_top.v (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_wrap.sv (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/kernel.bin.coe (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/testbench.v (100%) diff --git a/hw/syn/altera/quartus/Makefile b/hw/syn/altera/dut/Makefile similarity index 100% rename from hw/syn/altera/quartus/Makefile rename to hw/syn/altera/dut/Makefile diff --git a/hw/syn/altera/quartus/cache/Makefile b/hw/syn/altera/dut/cache/Makefile similarity index 100% rename from hw/syn/altera/quartus/cache/Makefile rename to hw/syn/altera/dut/cache/Makefile diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/dut/common.mk similarity index 100% rename from hw/syn/altera/quartus/common.mk rename to hw/syn/altera/dut/common.mk diff --git a/hw/syn/altera/quartus/core/Makefile b/hw/syn/altera/dut/core/Makefile similarity index 100% rename from hw/syn/altera/quartus/core/Makefile rename to hw/syn/altera/dut/core/Makefile diff --git a/hw/syn/altera/quartus/fpu/Makefile b/hw/syn/altera/dut/fpu/Makefile similarity index 100% rename from hw/syn/altera/quartus/fpu/Makefile rename to hw/syn/altera/dut/fpu/Makefile diff --git a/hw/syn/altera/quartus/issue/Makefile b/hw/syn/altera/dut/issue/Makefile similarity index 100% rename from hw/syn/altera/quartus/issue/Makefile rename to hw/syn/altera/dut/issue/Makefile diff --git a/hw/syn/altera/quartus/lmem/Makefile b/hw/syn/altera/dut/lmem/Makefile similarity index 100% rename from hw/syn/altera/quartus/lmem/Makefile rename to hw/syn/altera/dut/lmem/Makefile diff --git a/hw/syn/altera/quartus/mem_unit/Makefile b/hw/syn/altera/dut/mem_unit/Makefile similarity index 100% rename from hw/syn/altera/quartus/mem_unit/Makefile rename to hw/syn/altera/dut/mem_unit/Makefile diff --git a/hw/syn/altera/quartus/project.sdc b/hw/syn/altera/dut/project.sdc similarity index 100% rename from hw/syn/altera/quartus/project.sdc rename to hw/syn/altera/dut/project.sdc diff --git a/hw/syn/altera/quartus/project.tcl b/hw/syn/altera/dut/project.tcl similarity index 100% rename from hw/syn/altera/quartus/project.tcl rename to hw/syn/altera/dut/project.tcl diff --git a/hw/syn/altera/quartus/test/Makefile b/hw/syn/altera/dut/test/Makefile similarity index 100% rename from hw/syn/altera/quartus/test/Makefile rename to hw/syn/altera/dut/test/Makefile diff --git a/hw/syn/altera/quartus/timing-html.tcl b/hw/syn/altera/dut/timing-html.tcl similarity index 100% rename from hw/syn/altera/quartus/timing-html.tcl rename to hw/syn/altera/dut/timing-html.tcl diff --git a/hw/syn/altera/quartus/top/Makefile b/hw/syn/altera/dut/top/Makefile similarity index 100% rename from hw/syn/altera/quartus/top/Makefile rename to hw/syn/altera/dut/top/Makefile diff --git a/hw/syn/altera/quartus/unittest/Makefile b/hw/syn/altera/dut/unittest/Makefile similarity index 100% rename from hw/syn/altera/quartus/unittest/Makefile rename to hw/syn/altera/dut/unittest/Makefile diff --git a/hw/syn/altera/quartus/vortex/Makefile b/hw/syn/altera/dut/vortex/Makefile similarity index 100% rename from hw/syn/altera/quartus/vortex/Makefile rename to hw/syn/altera/dut/vortex/Makefile diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/sandbox/Makefile similarity index 100% rename from hw/syn/xilinx/test/Makefile rename to hw/syn/xilinx/sandbox/Makefile diff --git a/hw/syn/xilinx/test/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in similarity index 100% rename from hw/syn/xilinx/test/project.tcl.in rename to hw/syn/xilinx/sandbox/project.tcl.in diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_top.v b/hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v similarity index 100% rename from hw/syn/xilinx/test/project_1_files/Vortex_top.v rename to hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv b/hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv similarity index 100% rename from hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv rename to hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv diff --git a/hw/syn/xilinx/test/project_1_files/kernel.bin.coe b/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe similarity index 100% rename from hw/syn/xilinx/test/project_1_files/kernel.bin.coe rename to hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe diff --git a/hw/syn/xilinx/test/project_1_files/testbench.v b/hw/syn/xilinx/sandbox/project_1_files/testbench.v similarity index 100% rename from hw/syn/xilinx/test/project_1_files/testbench.v rename to hw/syn/xilinx/sandbox/project_1_files/testbench.v From e4bfa47895964cbbcb50bb7f9bae9239f0877594 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 22 Aug 2024 02:51:17 -0700 Subject: [PATCH 075/407] adding test coverage for xilinx synthesis --- hw/scripts/bin2coe.py | 127 +- hw/syn/altera/dut/Makefile | 9 +- hw/syn/altera/dut/common.mk | 3 +- hw/syn/xilinx/dut/Makefile | 63 + hw/syn/xilinx/dut/cache/Makefile | 7 + hw/syn/xilinx/dut/common.mk | 37 + hw/syn/xilinx/dut/core/Makefile | 14 + hw/syn/xilinx/dut/fpu/Makefile | 11 + hw/syn/xilinx/dut/issue/Makefile | 14 + hw/syn/xilinx/dut/lmem/Makefile | 7 + hw/syn/xilinx/dut/mem_unit/Makefile | 7 + hw/syn/xilinx/dut/project.tcl | 82 + hw/syn/xilinx/dut/project.xdc | 1 + hw/syn/xilinx/dut/top/Makefile | 32 + .../dut/test => xilinx/dut/unittest}/Makefile | 4 +- hw/syn/xilinx/dut/vortex/Makefile | 16 + hw/syn/xilinx/sandbox/Makefile | 24 +- .../{project_1_files => }/Vortex_top.v | 0 .../{project_1_files => }/Vortex_wrap.sv | 0 hw/syn/xilinx/sandbox/project.tcl.in | 1899 +- .../sandbox/project_1_files/kernel.bin.coe | 16386 ---------------- .../sandbox/{project_1_files => }/testbench.v | 0 22 files changed, 419 insertions(+), 18324 deletions(-) create mode 100644 hw/syn/xilinx/dut/Makefile create mode 100644 hw/syn/xilinx/dut/cache/Makefile create mode 100644 hw/syn/xilinx/dut/common.mk create mode 100644 hw/syn/xilinx/dut/core/Makefile create mode 100644 hw/syn/xilinx/dut/fpu/Makefile create mode 100644 hw/syn/xilinx/dut/issue/Makefile create mode 100644 hw/syn/xilinx/dut/lmem/Makefile create mode 100644 hw/syn/xilinx/dut/mem_unit/Makefile create mode 100644 hw/syn/xilinx/dut/project.tcl create mode 100644 hw/syn/xilinx/dut/project.xdc create mode 100644 hw/syn/xilinx/dut/top/Makefile rename hw/syn/{altera/dut/test => xilinx/dut/unittest}/Makefile (79%) create mode 100644 hw/syn/xilinx/dut/vortex/Makefile rename hw/syn/xilinx/sandbox/{project_1_files => }/Vortex_top.v (100%) rename hw/syn/xilinx/sandbox/{project_1_files => }/Vortex_wrap.sv (100%) delete mode 100644 hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe rename hw/syn/xilinx/sandbox/{project_1_files => }/testbench.v (100%) diff --git a/hw/scripts/bin2coe.py b/hw/scripts/bin2coe.py index 95b3bcbeb1..eaaa3619ee 100755 --- a/hw/scripts/bin2coe.py +++ b/hw/scripts/bin2coe.py @@ -14,78 +14,83 @@ # limitations under the License. import argparse +import os -g_memory = {} - -def hex2bin(ch): - return int(ch, 16) if ch.isdigit() or ch in 'abcdefABCDEF' else 0 - -def process_binary(binfname, wordsize, binaddr): - with open(binfname, 'rb') as f: - buffer = list(f.read()) - g_memory[binaddr] = buffer - return (len(buffer) + wordsize - 1) // wordsize - -def process_data(datfname, wordsize): - offset, buffer = 0, [] - with open(datfname, 'r') as f: - for line in f: - line = line.strip() - if line.startswith("#"): - continue - if line.startswith("@"): - if buffer: - g_memory[offset] = buffer - offset = int(line[1:], 16) - buffer = [] - else: - for i in range(0, len(line), 2): - byte = hex2bin(line[i]) << 4 | hex2bin(line[i+1]) - buffer.append(byte) - if len(buffer) % wordsize: - buffer.extend([0] * (wordsize - len(buffer) % wordsize)) - offset += 1 - if buffer: - g_memory[offset] = buffer - return offset - -def write_coe(outfname, wordsize, depth, defval): - with open(outfname, 'w') as f: - f.write("MEMORY_INITIALIZATION_RADIX=16;\nMEMORY_INITIALIZATION_VECTOR=\n") - i = 0 - for addr in sorted(g_memory): - while i < addr: - f.write(f"{defval},\n") - i += 1 - data = g_memory[addr] - for j in range(0, len(data), wordsize): - f.write(",".join([f"{byte:02x}" for byte in data[j:j+wordsize][::-1]]) + ",\n") - i += 1 - while i < depth: - f.write(f"{defval},\n") - i += 1 - f.seek(f.tell() - 2, 0) # Remove the last comma - f.write(";\n") +def parse_binfile_option(option): + addr, path = option.split(':') + return int(addr, 0), path + +def parse_value_option(option): + addr, value = option.split(':') + return int(addr, 0), value + +def load_binary_data(addr, path, word_size, memory, little_endian): + with open(path, 'rb') as f: + binary_data = f.read() + + word_count = len(binary_data) // word_size + if len(binary_data) % word_size != 0: + word_count += 1 + + for i in range(word_count): + word_data = binary_data[i * word_size: (i + 1) * word_size] + if little_endian: + word_data = word_data[::-1] # Reverse the byte order for little-endian + hex_value = word_data.hex().zfill(word_size * 2) + memory[addr + i] = hex_value + +def add_value_data(addr, value, memory, word_size): + value = value.zfill(word_size * 2) + memory[addr] = value + +def binary_to_coe(output_file, word_size, depth, default_value, memory): + if depth == 0: + depth = max(memory.keys()) + 1 + + with open(output_file, 'w') as coe_file: + coe_file.write("; This file was generated from binary blobs and/or values\n") + coe_file.write("memory_initialization_radix=16;\n") + coe_file.write("memory_initialization_vector=\n") + + for addr in range(depth): + hex_value = memory.get(addr, default_value) + coe_file.write(f"{hex_value},\n") + + coe_file.seek(coe_file.tell() - 2) + coe_file.write(";\n") def main(): - parser = argparse.ArgumentParser(description="Binary to Xilinx COE File Converter") - parser.add_argument("--binary", help="Input binary file.") - parser.add_argument("--data", help="Input data file.") + parser = argparse.ArgumentParser(description="Convert binaries and values to a Xilinx COE file.") + parser.add_argument("--binfile", action='append', help="Binary file with starting address in the format :") + parser.add_argument("--value", action='append', help="Hex value with starting address in the format :") parser.add_argument("--out", default="output.coe", help="Output file (optional).") parser.add_argument("--wordsize", type=int, default=4, help="Word size in bytes (default 4).") parser.add_argument("--depth", type=int, default=0, help="Address size (optional).") - parser.add_argument("--binaddr", type=int, default=0, help="Binary address (optional).") parser.add_argument("--default", default="00", help="Default hex value as string (optional).") + parser.add_argument("--little_endian", action='store_true', help="Interpret binary files as little-endian (default is big-endian).") args = parser.parse_args() - depth = max( - process_binary(args.binary, args.wordsize, args.binaddr) if args.binary else 0, - process_data(args.data, args.wordsize) if args.data else 0, - args.depth - ) + if args.binfile is None and args.value is None: + raise ValueError("At least one --binfile or --value must be provided.") + + # Initialize memory dictionary + memory = {} + + # Process binary files + if args.binfile: + for option in args.binfile: + addr, path = parse_binfile_option(option) + load_binary_data(addr, path, args.wordsize, memory, args.little_endian) + + # Process individual values + if args.value: + for option in args.value: + addr, value = parse_value_option(option) + add_value_data(addr, value, memory, args.wordsize) - write_coe(args.out, args.wordsize, depth, args.default) + # Generate the COE file + binary_to_coe(args.out, args.wordsize, args.depth, args.default.zfill(args.wordsize * 2), memory) if __name__ == "__main__": main() diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index f8993bf871..924b7602b4 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -9,7 +9,7 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top test +.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: @@ -68,9 +68,4 @@ vortex: ip-gen top: ip-gen mkdir -p top/$(BUILD_DIR) cp top/Makefile top/$(BUILD_DIR) - $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & - -test: ip-gen - mkdir -p test/$(BUILD_DIR) - cp test/Makefile test/$(BUILD_DIR) - $(MAKE) -C test/$(BUILD_DIR) clean && $(MAKE) -C test/$(BUILD_DIR) > test/$(BUILD_DIR)/build.log 2>&1 & + $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file diff --git a/hw/syn/altera/dut/common.mk b/hw/syn/altera/dut/common.mk index 3890dcfe87..1adcb3d49e 100644 --- a/hw/syn/altera/dut/common.mk +++ b/hw/syn/altera/dut/common.mk @@ -1,7 +1,7 @@ ROOT_DIR := $(realpath ../../../../../..) include $(ROOT_DIR)/config.mk -SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus +SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/dut RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae @@ -21,7 +21,6 @@ endif CONFIGS += -DNDEBUG CONFIGS += -DQUARTUS CONFIGS += -DSYNTHESIS -CONFIGS += -DNOGLOBALS PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf diff --git a/hw/syn/xilinx/dut/Makefile b/hw/syn/xilinx/dut/Makefile new file mode 100644 index 0000000000..b8f67b8a57 --- /dev/null +++ b/hw/syn/xilinx/dut/Makefile @@ -0,0 +1,63 @@ +ROOT_DIR := $(realpath ../../../..) +include $(ROOT_DIR)/config.mk + +PREFIX ?= build + +BUILD_DIR := $(PREFIX) + +.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top + +dogfood: + mkdir -p dogfood/$(BUILD_DIR) + cp dogfood/Makefile dogfood/$(BUILD_DIR) + $(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 & + +unittest: + mkdir -p unittest/$(BUILD_DIR) + cp unittest/Makefile unittest/$(BUILD_DIR) + $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & + +pipeline: + mkdir -p pipeline/$(BUILD_DIR) + cp pipeline/Makefile pipeline/$(BUILD_DIR) + $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & + +mem_unit: + mkdir -p mem_unit/$(BUILD_DIR) + cp mem_unit/Makefile mem_unit/$(BUILD_DIR) + $(MAKE) -C mem_unit/$(BUILD_DIR) clean && $(MAKE) -C mem_unit/$(BUILD_DIR) > mem_unit/$(BUILD_DIR)/build.log 2>&1 & + +lmem: + mkdir -p lmem/$(BUILD_DIR) + cp lmem/Makefile lmem/$(BUILD_DIR) + $(MAKE) -C lmem/$(BUILD_DIR) clean && $(MAKE) -C lmem/$(BUILD_DIR) > lmem/$(BUILD_DIR)/build.log 2>&1 & + +cache: + mkdir -p cache/$(BUILD_DIR) + cp cache/Makefile cache/$(BUILD_DIR) + $(MAKE) -C cache/$(BUILD_DIR) clean && $(MAKE) -C cache/$(BUILD_DIR) > cache/$(BUILD_DIR)/build.log 2>&1 & + +fpu: + mkdir -p fpu/$(BUILD_DIR) + cp fpu/Makefile fpu/$(BUILD_DIR) + $(MAKE) -C fpu/$(BUILD_DIR) clean && $(MAKE) -C fpu/$(BUILD_DIR) > fpu/$(BUILD_DIR)/build.log 2>&1 & + +core: + mkdir -p core/$(BUILD_DIR) + cp core/Makefile core/$(BUILD_DIR) + $(MAKE) -C core/$(BUILD_DIR) clean && $(MAKE) -C core/$(BUILD_DIR) > core/$(BUILD_DIR)/build.log 2>&1 & + +issue: + mkdir -p issue/$(BUILD_DIR) + cp issue/Makefile issue/$(BUILD_DIR) + $(MAKE) -C issue/$(BUILD_DIR) clean && $(MAKE) -C issue/$(BUILD_DIR) > issue/$(BUILD_DIR)/build.log 2>&1 & + +vortex: + mkdir -p vortex/$(BUILD_DIR) + cp vortex/Makefile vortex/$(BUILD_DIR) + $(MAKE) -C vortex/$(BUILD_DIR) clean && $(MAKE) -C vortex/$(BUILD_DIR) > vortex/$(BUILD_DIR)/build.log 2>&1 & + +top: + mkdir -p top/$(BUILD_DIR) + cp top/Makefile top/$(BUILD_DIR) + $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file diff --git a/hw/syn/xilinx/dut/cache/Makefile b/hw/syn/xilinx/dut/cache/Makefile new file mode 100644 index 0000000000..f96a761423 --- /dev/null +++ b/hw/syn/xilinx/dut/cache/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_cache_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk new file mode 100644 index 0000000000..b435b14090 --- /dev/null +++ b/hw/syn/xilinx/dut/common.mk @@ -0,0 +1,37 @@ +ROOT_DIR := $(realpath ../../../../../..) +include $(ROOT_DIR)/config.mk + +DEVICE ?= xcu55c-fsvh2892-2L-e + +VIVADO := $(XILINX_VIVADO)/bin/vivado + +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/dut + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +AFU_DIR := $(RTL_DIR)/afu/xrt +SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts + +CONFIGS += -DNDEBUG +CONFIGS += -DVIVADO +CONFIGS += -DSYNTHESIS + +# Build targets +all: $(PROJECT).xpr + +gen-sources: project_1/sources.txt +project_1/sources.txt: + mkdir -p project_1 + $(SCRIPT_DIR)/gen_sources.sh $(CONFIGS) $(RTL_INCLUDE) -T$(TOP_LEVEL_ENTITY) -P -Cproject_1/src -Oproject_1/sources.txt + +build: $(PROJECT).xpr +$(PROJECT).xpr: project_1/sources.txt + $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) + +clean: + rm -rf project_1 + rm -rf .Xil + rm -f *.rpt + rm -f vivado*.log + rm -f vivado*.jou + +.PHONY: all gen-sources build clean \ No newline at end of file diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile new file mode 100644 index 0000000000..eeeaa52338 --- /dev/null +++ b/hw/syn/xilinx/dut/core/Makefile @@ -0,0 +1,14 @@ +PROJECT = VX_core_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile new file mode 100644 index 0000000000..b7826dc689 --- /dev/null +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -0,0 +1,11 @@ +PROJECT = VX_fpu_dsp +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile new file mode 100644 index 0000000000..c1804a3989 --- /dev/null +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -0,0 +1,14 @@ +PROJECT = VX_issue_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/lmem/Makefile b/hw/syn/xilinx/dut/lmem/Makefile new file mode 100644 index 0000000000..b3ba57c8d1 --- /dev/null +++ b/hw/syn/xilinx/dut/lmem/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_local_mem_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem diff --git a/hw/syn/xilinx/dut/mem_unit/Makefile b/hw/syn/xilinx/dut/mem_unit/Makefile new file mode 100644 index 0000000000..209492265a --- /dev/null +++ b/hw/syn/xilinx/dut/mem_unit/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_mem_unit_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl new file mode 100644 index 0000000000..bee841d79e --- /dev/null +++ b/hw/syn/xilinx/dut/project.tcl @@ -0,0 +1,82 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if { $::argc != 5 } { + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" + puts "Usage: $::argv0 \n" + exit +} + +# Set the project name +set project_name "project_1" + +set top_module [lindex $::argv 0] +set device_part [lindex $::argv 1] +set vcs_file [lindex $::argv 2] +set xdc_file [lindex $::argv 3] +set tool_dir [lindex $::argv 4] + +#puts top_module +#puts $device_part +#puts $vcs_file +#puts xdc_file +#puts $tool_dir + +source "${tool_dir}/parse_vcs_list.tcl" +set vlist [parse_vcs_list "${vcs_file}"] + +set vsources_list [lindex $vlist 0] +set vincludes_list [lindex $vlist 1] +set vdefines_list [lindex $vlist 2] + +#puts $vsources_list +#puts $vincludes_list +#puts $vdefines_list + +# Create project +create_project $project_name $project_name -force -part $device_part + +# Add constrains file +read_xdc $xdc_file + +# Add the design sources +add_files -norecurse -verbose $vsources_list + +# process defines +set obj [current_fileset] +foreach def $vdefines_list { + set_property verilog_define $def $obj +} + +# Synthesis +synth_design -top $top_module -include_dirs $vincludes_list -flatten_hierarchy none +write_checkpoint -force post_synth.dcp +report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages + +# Optimize +opt_design + +# Place +place_design +write_checkpoint -force post_place.dcp +report_place_status -file place.rpt + +# Route +route_design +write_checkpoint -force post_route.dcp +report_route_status -file route.rpt + +# Generate the synthesis report +report_timing -file timing.rpt +report_power -file power.rpt +report_drc -file drc.rpt \ No newline at end of file diff --git a/hw/syn/xilinx/dut/project.xdc b/hw/syn/xilinx/dut/project.xdc new file mode 100644 index 0000000000..8c74ebb4a9 --- /dev/null +++ b/hw/syn/xilinx/dut/project.xdc @@ -0,0 +1 @@ +## empty \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile new file mode 100644 index 0000000000..341690206d --- /dev/null +++ b/hw/syn/xilinx/dut/top/Makefile @@ -0,0 +1,32 @@ +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +# AFU parameters +CONFIGS += -DNOPAE +CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +endif + +#CONFIGS += -DNUM_CORES=2 +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 +#CONFIGS += -DL2_ENABLE + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/dut/test/Makefile b/hw/syn/xilinx/dut/unittest/Makefile similarity index 79% rename from hw/syn/altera/dut/test/Makefile rename to hw/syn/xilinx/dut/unittest/Makefile index 0c4a7ae4e8..2bfb18e4e4 100644 --- a/hw/syn/altera/dut/test/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -1,4 +1,4 @@ -PROJECT = Vortex +PROJECT = Unittest TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv @@ -8,4 +8,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile new file mode 100644 index 0000000000..7429df414e --- /dev/null +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -0,0 +1,16 @@ +PROJECT = Vortex +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_CORES=2 +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 +#CONFIGS += -DL2_ENABLE + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index 5b6a76da36..bcfd91f9c4 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -1,17 +1,22 @@ ROOT_DIR := $(realpath ../../../..) include $(ROOT_DIR)/config.mk +DEVICE ?= xcu55c-fsvh2892-2L-e + VIVADO := $(XILINX_VIVADO)/bin/vivado -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/test +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/sandbox RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi -AFU_DIR := $(RTL_DIR)/afu/opae +AFU_DIR := $(RTL_DIR)/afu/xrt SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts KERNEL ?= fibonacci +COE_FILE := $(shell realpath kernel.bin.coe) +ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') + # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) @@ -19,14 +24,13 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) -RTL_INCLUDE += -I$(SRC_DIR)/project_1_files +RTL_INCLUDE += -I$(SRC_DIR) # compilation flags CFLAGS += -DNDEBUG -DSYNTHESIS -DVIVADO CFLAGS += $(CONFIGS) CFLAGS += $(RTL_INCLUDE) CFLAGS += -DEXT_F_DISABLE -#CFLAGS += -DNUM_CORES 4 # update memory layout for 2MB RAM CFLAGS += -DSTARTUP_ADDR=32\'h80000 @@ -34,6 +38,9 @@ CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build +project2.tcl: project.tcl + @sed -e "s/@COE_FILE@/$(ESCAPED_COE_FILE)/g" $< > $@ + $(KERNEL).bin: $(MAKE) -C $(ROOT_DIR)/kernel clean STACK_BASE_ADDR=0xFF000 $(MAKE) -C $(ROOT_DIR)/kernel @@ -42,7 +49,7 @@ $(KERNEL).bin: cp $(ROOT_DIR)/tests/kernel/$(KERNEL)/$(KERNEL).bin $(KERNEL).bin kernel.bin.coe: $(KERNEL).bin - $(SCRIPT_DIR)/bin2coe.py --out=$@ --binary=$(KERNEL).bin --binaddr=8192 --depth=16384 --wordsize=64 + $(SCRIPT_DIR)/bin2coe.py --out=$@ --binfile=8192:$(KERNEL).bin --depth=16384 --wordsize=64 --little_endian gen-sources: project_1/sources.txt project_1/sources.txt: @@ -50,11 +57,12 @@ project_1/sources.txt: $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt build: project_1/project_1.xpr -project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project.tcl - $(VIVADO) -mode batch -source project.tcl -tclargs project_1/sources.txt project_1/src $(SCRIPT_DIR) +project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project2.tcl + $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) run: project_1/project_1.xpr $(VIVADO) project_1/project_1.xpr & clean: - rm -rf project_1 $(KERNEL).bin kernel.bin.coe + rm -rf project_1 project2.tcl $(KERNEL).bin kernel.bin.coe + rm -rf .Xil *.log *.jou diff --git a/hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v b/hw/syn/xilinx/sandbox/Vortex_top.v similarity index 100% rename from hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v rename to hw/syn/xilinx/sandbox/Vortex_top.v diff --git a/hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv b/hw/syn/xilinx/sandbox/Vortex_wrap.sv similarity index 100% rename from hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv rename to hw/syn/xilinx/sandbox/Vortex_wrap.sv diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 45f9a9104c..e92e31a446 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -1,15 +1,28 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + if { $::argc != 3 } { puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" - puts "Usage: $::argv0 \n" + puts "Usage: $::argv0 \n" exit } -set vcs_file [lindex $::argv 0] -set files_dir [lindex $::argv 1] +set device_part [lindex $::argv 0] +set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] +#puts $device_part #puts $vcs_file -#puts $files_dir #puts $tool_dir set origin_dir [file normalize "."] @@ -39,81 +52,11 @@ set vdefines_list [lindex $vlist 2] #puts ${vdefines_list} # Create project -create_project ${project_name} ./${project_name} -force -part xcu280-fsvh2892-2L-e +create_project $project_name $project_name -force -part $device_part # Set the directory path for the new project set proj_dir [get_property directory [current_project]] -# Set project properties -set obj [current_project] -set_property -name "compxlib.activehdl_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/activehdl" -objects $obj -set_property -name "compxlib.funcsim" -value "1" -objects $obj -set_property -name "compxlib.ies_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/ies" -objects $obj -set_property -name "compxlib.modelsim_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/modelsim" -objects $obj -set_property -name "compxlib.overwrite_libs" -value "0" -objects $obj -set_property -name "compxlib.questa_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/questa" -objects $obj -set_property -name "compxlib.riviera_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/riviera" -objects $obj -set_property -name "compxlib.timesim" -value "1" -objects $obj -set_property -name "compxlib.vcs_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/vcs" -objects $obj -set_property -name "compxlib.xsim_compiled_library_dir" -value "" -objects $obj -set_property -name "corecontainer.enable" -value "0" -objects $obj -set_property -name "default_lib" -value "xil_defaultlib" -objects $obj -set_property -name "enable_optional_runs_sta" -value "0" -objects $obj -set_property -name "enable_vhdl_2008" -value "1" -objects $obj -set_property -name "generate_ip_upgrade_log" -value "1" -objects $obj -set_property -name "ip_cache_permissions" -value "read write" -objects $obj -set_property -name "ip_interface_inference_priority" -value "" -objects $obj -set_property -name "ip_output_repo" -value "$proj_dir/${project_name}.cache/ip" -objects $obj -set_property -name "legacy_ip_repo_paths" -value "" -objects $obj -set_property -name "mem.enable_memory_map_generation" -value "1" -objects $obj -set_property -name "platform.board_id" -value "au280" -objects $obj -set_property -name "platform.default_output_type" -value "undefined" -objects $obj -set_property -name "platform.design_intent.datacenter" -value "undefined" -objects $obj -set_property -name "platform.design_intent.embedded" -value "undefined" -objects $obj -set_property -name "platform.design_intent.external_host" -value "undefined" -objects $obj -set_property -name "platform.design_intent.server_managed" -value "undefined" -objects $obj -set_property -name "platform.rom.debug_type" -value "0" -objects $obj -set_property -name "platform.rom.prom_type" -value "0" -objects $obj -set_property -name "platform.slrconstraintmode" -value "0" -objects $obj -set_property -name "preferred_sim_model" -value "rtl" -objects $obj -set_property -name "project_type" -value "Default" -objects $obj -set_property -name "pr_flow" -value "0" -objects $obj -set_property -name "sim.central_dir" -value "$proj_dir/${project_name}.ip_user_files" -objects $obj -set_property -name "sim.ip.auto_export_scripts" -value "1" -objects $obj -set_property -name "sim.use_ip_compiled_libs" -value "1" -objects $obj -set_property -name "simulator.activehdl_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.activehdl_install_dir" -value "" -objects $obj -set_property -name "simulator.ies_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.ies_install_dir" -value "" -objects $obj -set_property -name "simulator.modelsim_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.modelsim_install_dir" -value "" -objects $obj -set_property -name "simulator.questa_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.riviera_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.riviera_install_dir" -value "" -objects $obj -set_property -name "simulator.vcs_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.vcs_install_dir" -value "" -objects $obj -set_property -name "simulator.xcelium_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.xcelium_install_dir" -value "" -objects $obj -set_property -name "simulator_language" -value "Verilog" -objects $obj -set_property -name "source_mgmt_mode" -value "All" -objects $obj -set_property -name "target_language" -value "Verilog" -objects $obj -set_property -name "target_simulator" -value "XSim" -objects $obj -set_property -name "tool_flow" -value "Vivado" -objects $obj -set_property -name "webtalk.activehdl_export_sim" -value "27" -objects $obj -set_property -name "webtalk.ies_export_sim" -value "27" -objects $obj -set_property -name "webtalk.modelsim_export_sim" -value "27" -objects $obj -set_property -name "webtalk.questa_export_sim" -value "27" -objects $obj -set_property -name "webtalk.riviera_export_sim" -value "27" -objects $obj -set_property -name "webtalk.vcs_export_sim" -value "27" -objects $obj -set_property -name "webtalk.xcelium_export_sim" -value "5" -objects $obj -set_property -name "webtalk.xsim_export_sim" -value "27" -objects $obj -set_property -name "webtalk.xsim_launch_sim" -value "91" -objects $obj -set_property -name "xpm_libraries" -value "XPM_CDC XPM_MEMORY" -objects $obj -set_property -name "xsim.array_display_limit" -value "1024" -objects $obj -set_property -name "xsim.radix" -value "hex" -objects $obj -set_property -name "xsim.time_unit" -value "ns" -objects $obj -set_property -name "xsim.trace_limit" -value "65536" -objects $obj - # Create 'sources_1' fileset (if not found) if {[string equal [get_filesets -quiet sources_1] ""]} { create_fileset -srcset sources_1 @@ -131,21 +74,8 @@ foreach def $vdefines_list { # Set 'sources_1' fileset properties set obj [get_filesets sources_1] -set_property -name "design_mode" -value "RTL" -objects $obj -set_property -name "edif_extra_search_paths" -value "" -objects $obj -set_property -name "elab_link_dcps" -value "1" -objects $obj -set_property -name "elab_load_timing_constraints" -value "1" -objects $obj -set_property -name "generic" -value "" -objects $obj -set_property -name "include_dirs" -value "" -objects $obj -set_property -name "lib_map_file" -value "" -objects $obj -set_property -name "loop_count" -value "1000" -objects $obj set_property -name "name" -value "sources_1" -objects $obj set_property -name "top" -value "design_1_wrapper" -objects $obj -set_property -name "top_auto_set" -value "0" -objects $obj -set_property -name "verilog_define" -value "" -objects $obj -set_property -name "verilog_uppercase" -value "1" -objects $obj -set_property -name "verilog_version" -value "verilog_2001" -objects $obj -set_property -name "vhdl_version" -value "vhdl_2k" -objects $obj # Create 'constrs_1' fileset (if not found) if {[string equal [get_filesets -quiet constrs_1] ""]} { @@ -172,7 +102,7 @@ if {[string equal [get_filesets -quiet sim_1] ""]} { set obj [get_filesets sim_1] # Import local files from the original project set files [list \ - [file normalize "$files_dir/testbench.v" ]\ + [file normalize "testbench.v" ]\ ] set imported_files [import_files -fileset sim_1 $files] @@ -202,52 +132,14 @@ set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $o set_property -name "include_dirs" -value "" -objects $obj set_property -name "incremental" -value "1" -objects $obj set_property -name "name" -value "sim_1" -objects $obj -set_property -name "nl.cell" -value "" -objects $obj -set_property -name "nl.incl_unisim_models" -value "0" -objects $obj -set_property -name "nl.mode" -value "funcsim" -objects $obj -set_property -name "nl.process_corner" -value "slow" -objects $obj -set_property -name "nl.rename_top" -value "" -objects $obj -set_property -name "nl.sdf_anno" -value "1" -objects $obj -set_property -name "nl.write_all_overrides" -value "0" -objects $obj set_property -name "source_set" -value "sources_1" -objects $obj set_property -name "systemc_include_dirs" -value "" -objects $obj set_property -name "top" -value "testbench" -objects $obj set_property -name "top_auto_set" -value "0" -objects $obj set_property -name "top_lib" -value "xil_defaultlib" -objects $obj -set_property -name "transport_int_delay" -value "0" -objects $obj -set_property -name "transport_path_delay" -value "0" -objects $obj -set_property -name "unifast" -value "0" -objects $obj set_property -name "verilog_define" -value "" -objects $obj set_property -name "verilog_uppercase" -value "0" -objects $obj -set_property -name "xelab.dll" -value "0" -objects $obj -set_property -name "xsim.compile.tcl.pre" -value "" -objects $obj -set_property -name "xsim.compile.xsc.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvhdl.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvhdl.nosort" -value "1" -objects $obj -set_property -name "xsim.compile.xvhdl.relax" -value "1" -objects $obj -set_property -name "xsim.compile.xvlog.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvlog.nosort" -value "1" -objects $obj -set_property -name "xsim.compile.xvlog.relax" -value "1" -objects $obj -set_property -name "xsim.elaborate.debug_level" -value "typical" -objects $obj -set_property -name "xsim.elaborate.load_glbl" -value "1" -objects $obj -set_property -name "xsim.elaborate.mt_level" -value "auto" -objects $obj -set_property -name "xsim.elaborate.rangecheck" -value "0" -objects $obj -set_property -name "xsim.elaborate.relax" -value "1" -objects $obj -set_property -name "xsim.elaborate.sdf_delay" -value "sdfmax" -objects $obj -set_property -name "xsim.elaborate.snapshot" -value "" -objects $obj -set_property -name "xsim.elaborate.xelab.more_options" -value "" -objects $obj -set_property -name "xsim.elaborate.xsc.more_options" -value "" -objects $obj -set_property -name "xsim.simulate.add_positional" -value "0" -objects $obj -set_property -name "xsim.simulate.custom_tcl" -value "" -objects $obj -set_property -name "xsim.simulate.log_all_signals" -value "0" -objects $obj -set_property -name "xsim.simulate.no_quit" -value "0" -objects $obj -set_property -name "xsim.simulate.runtime" -value "4000ns" -objects $obj -set_property -name "xsim.simulate.saif" -value "" -objects $obj -set_property -name "xsim.simulate.saif_all_signals" -value "0" -objects $obj -set_property -name "xsim.simulate.saif_scope" -value "" -objects $obj -set_property -name "xsim.simulate.tcl.post" -value "" -objects $obj -set_property -name "xsim.simulate.wdb" -value "" -objects $obj -set_property -name "xsim.simulate.xsim.more_options" -value "" -objects $obj + # Set 'utils_1' fileset object set obj [get_filesets utils_1] @@ -398,7 +290,7 @@ set_property -dict [ list \ CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/kernel.bin.coe} \ + CONFIG.Coe_File {@COE_FILE@} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ @@ -493,1735 +385,26 @@ set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] -#call make_wrapper to create wrapper files +# Call make_wrapper to create wrapper files set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] add_files -norecurse -fileset sources_1 $wrapper_path -# Create 'synth_1' run (if not found) -if {[string equal [get_runs -quiet synth_1] ""]} { - create_run -name synth_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Synthesis 2020} -strategy "Vivado Synthesis Defaults" -report_strategy {No Reports} -constrset constrs_1 -} else { - set_property strategy "Vivado Synthesis Defaults" [get_runs synth_1] - set_property flow "Vivado Synthesis 2020" [get_runs synth_1] -} -set obj [get_runs synth_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Synthesis Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'synth_1_synth_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs synth_1] synth_1_synth_report_utilization_0] "" ] } { - create_report_config -report_name synth_1_synth_report_utilization_0 -report_type report_utilization:1.0 -steps synth_design -runs synth_1 -} -set obj [get_report_configs -of_objects [get_runs synth_1] synth_1_synth_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Synth Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs synth_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "flow" -value "Vivado Synthesis 2020" -objects $obj -set_property -name "name" -value "synth_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "write_incremental_synth_checkpoint" -value "0" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/synth_1" -objects $obj -set_property -name "strategy" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "steps.synth_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.synth_design.tcl.post" -value "" -objects $obj -set_property -name "steps.synth_design.args.flatten_hierarchy" -value "rebuilt" -objects $obj -set_property -name "steps.synth_design.args.gated_clock_conversion" -value "off" -objects $obj -set_property -name "steps.synth_design.args.bufg" -value "12" -objects $obj -set_property -name "steps.synth_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.synth_design.args.retiming" -value "0" -objects $obj -set_property -name "steps.synth_design.args.fsm_extraction" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.keep_equivalent_registers" -value "0" -objects $obj -set_property -name "steps.synth_design.args.resource_sharing" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.control_set_opt_threshold" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.no_lc" -value "0" -objects $obj -set_property -name "steps.synth_design.args.no_srlextract" -value "0" -objects $obj -set_property -name "steps.synth_design.args.shreg_min_size" -value "3" -objects $obj -set_property -name "steps.synth_design.args.max_bram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_dsp" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_bram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.cascade_dsp" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.assert" -value "0" -objects $obj -set_property -name "steps.synth_design.args.more options" -value "" -objects $obj - -# Create 'synth_1_copy_1' run (if not found) -if {[string equal [get_runs -quiet synth_1_copy_1] ""]} { - create_run -name synth_1_copy_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Synthesis 2020} -strategy "Vivado Synthesis Defaults" -report_strategy {No Reports} -constrset constrs_1 -} else { - set_property strategy "Vivado Synthesis Defaults" [get_runs synth_1_copy_1] - set_property flow "Vivado Synthesis 2020" [get_runs synth_1_copy_1] -} -set obj [get_runs synth_1_copy_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Synthesis Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'synth_1_copy_1_synth_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs synth_1_copy_1] synth_1_copy_1_synth_report_utilization_0] "" ] } { - create_report_config -report_name synth_1_copy_1_synth_report_utilization_0 -report_type report_utilization:1.0 -steps synth_design -runs synth_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs synth_1_copy_1] synth_1_copy_1_synth_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Synth Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs synth_1_copy_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "flow" -value "Vivado Synthesis 2020" -objects $obj -set_property -name "name" -value "synth_1_copy_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "write_incremental_synth_checkpoint" -value "0" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/synth_1" -objects $obj -set_property -name "strategy" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "steps.synth_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.synth_design.tcl.post" -value "" -objects $obj -set_property -name "steps.synth_design.args.flatten_hierarchy" -value "rebuilt" -objects $obj -set_property -name "steps.synth_design.args.gated_clock_conversion" -value "off" -objects $obj -set_property -name "steps.synth_design.args.bufg" -value "12" -objects $obj -set_property -name "steps.synth_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.synth_design.args.retiming" -value "0" -objects $obj -set_property -name "steps.synth_design.args.fsm_extraction" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.keep_equivalent_registers" -value "0" -objects $obj -set_property -name "steps.synth_design.args.resource_sharing" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.control_set_opt_threshold" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.no_lc" -value "0" -objects $obj -set_property -name "steps.synth_design.args.no_srlextract" -value "0" -objects $obj -set_property -name "steps.synth_design.args.shreg_min_size" -value "3" -objects $obj -set_property -name "steps.synth_design.args.max_bram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_dsp" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_bram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.cascade_dsp" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.assert" -value "0" -objects $obj -set_property -name "steps.synth_design.args.more options" -value "" -objects $obj - -# set the current synth run -current_run -synthesis [get_runs synth_1] - -# preserve signal names -set_property STEPS.SYNTH_DESIGN.ARGS.FLATTEN_HIERARCHY none [get_runs synth_1] - -# Create 'impl_1' run (if not found) -if {[string equal [get_runs -quiet impl_1] ""]} { - create_run -name impl_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1] - set_property flow "Vivado Implementation 2020" [get_runs impl_1] -} -set obj [get_runs impl_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# Create 'impl_1_copy_1' run (if not found) -if {[string equal [get_runs -quiet impl_1_copy_1] ""]} { - create_run -name impl_1_copy_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1_copy_1] - set_property flow "Vivado Implementation 2020" [get_runs impl_1_copy_1] -} -set obj [get_runs impl_1_copy_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_copy_1_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_1_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1_copy_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1_copy_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# Create 'impl_1_copy_2' run (if not found) -if {[string equal [get_runs -quiet impl_1_copy_2] ""]} { - create_run -name impl_1_copy_2 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1_copy_2] - set_property flow "Vivado Implementation 2020" [get_runs impl_1_copy_2] -} -set obj [get_runs impl_1_copy_2] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_copy_2_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_2_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1_copy_2] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1_copy_2" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# set the current impl run -current_run -implementation [get_runs impl_1] - -puts "INFO: Project created:${project_name}" -# Create 'drc_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "drc_1" ] ] ""]} { -create_dashboard_gadget -name {drc_1} -type drc -} -set obj [get_dashboard_gadgets [ list "drc_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_drc_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.critical_warning" -value "1" -objects $obj -set_property -name "statistics.error" -value "1" -objects $obj -set_property -name "statistics.info" -value "1" -objects $obj -set_property -name "statistics.warning" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'methodology_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "methodology_1" ] ] ""]} { -create_dashboard_gadget -name {methodology_1} -type methodology -} -set obj [get_dashboard_gadgets [ list "methodology_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_methodology_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.critical_warning" -value "1" -objects $obj -set_property -name "statistics.error" -value "1" -objects $obj -set_property -name "statistics.info" -value "1" -objects $obj -set_property -name "statistics.warning" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'power_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "power_1" ] ] ""]} { -create_dashboard_gadget -name {power_1} -type power -} -set obj [get_dashboard_gadgets [ list "power_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_power_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.clocks" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.gth" -value "1" -objects $obj -set_property -name "statistics.gtp" -value "1" -objects $obj -set_property -name "statistics.gtx" -value "1" -objects $obj -set_property -name "statistics.gtz" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.logic" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.phaser" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.pl_static" -value "1" -objects $obj -set_property -name "statistics.ps7" -value "1" -objects $obj -set_property -name "statistics.ps" -value "1" -objects $obj -set_property -name "statistics.ps_static" -value "1" -objects $obj -set_property -name "statistics.signals" -value "1" -objects $obj -set_property -name "statistics.total_power" -value "1" -objects $obj -set_property -name "statistics.transceiver" -value "1" -objects $obj -set_property -name "statistics.xadc" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'timing_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "timing_1" ] ] ""]} { -create_dashboard_gadget -name {timing_1} -type timing -} -set obj [get_dashboard_gadgets [ list "timing_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_timing_summary_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.ths" -value "1" -objects $obj -set_property -name "statistics.tns" -value "1" -objects $obj -set_property -name "statistics.tpws" -value "1" -objects $obj -set_property -name "statistics.whs" -value "1" -objects $obj -set_property -name "statistics.wns" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Table" -objects $obj - -# Create 'utilization_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "utilization_1" ] ] ""]} { -create_dashboard_gadget -name {utilization_1} -type utilization -} -set obj [get_dashboard_gadgets [ list "utilization_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "synth_1#synth_1_synth_report_utilization_0" -objects $obj -set_property -name "run.step" -value "synth_design" -objects $obj -set_property -name "run.type" -value "synthesis" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.bufg" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.ff" -value "1" -objects $obj -set_property -name "statistics.gt" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.lut" -value "1" -objects $obj -set_property -name "statistics.lutram" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.uram" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'utilization_2' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "utilization_2" ] ] ""]} { -create_dashboard_gadget -name {utilization_2} -type utilization -} -set obj [get_dashboard_gadgets [ list "utilization_2" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_place_report_utilization_0" -objects $obj -set_property -name "run.step" -value "place_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.bufg" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.ff" -value "1" -objects $obj -set_property -name "statistics.gt" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.lut" -value "1" -objects $obj -set_property -name "statistics.lutram" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.uram" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -move_dashboard_gadget -name {utilization_1} -row 0 -col 0 -move_dashboard_gadget -name {power_1} -row 1 -col 0 -move_dashboard_gadget -name {drc_1} -row 2 -col 0 -move_dashboard_gadget -name {timing_1} -row 0 -col 1 -move_dashboard_gadget -name {utilization_2} -row 1 -col 1 -move_dashboard_gadget -name {methodology_1} -row 2 -col 1 +update_compile_order -fileset sources_1 + +# Synthesis +launch_runs synth_1 +wait_on_run synth_1 +open_run synth_1 + +# Implementation +launch_runs impl_1 +wait_on_run impl_1 +open_run impl_1 + +# Generate reports +report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages +report_place_status -file place.rpt +report_route_status -file route.rpt +report_timing -file timing.rpt +report_power -file power.rpt +report_drc -file drc.rpt \ No newline at end of file diff --git a/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe b/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe deleted file mode 100644 index a316d82b5f..0000000000 --- a/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe +++ /dev/null @@ -1,16386 +0,0 @@ -MEMORY_INITIALIZATION_RADIX=16; -MEMORY_INITIALIZATION_VECTOR= -0, -000000C00000008000000002, -00000003000000020000000100000000, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -00f586b30007a60340d585b300d7073300d787b3002797930027171300f707330207086302e787b3cc5027f30480258304402683040027030000000b008000ef, -00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008067fef718e300c6a02300478793, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0; diff --git a/hw/syn/xilinx/sandbox/project_1_files/testbench.v b/hw/syn/xilinx/sandbox/testbench.v similarity index 100% rename from hw/syn/xilinx/sandbox/project_1_files/testbench.v rename to hw/syn/xilinx/sandbox/testbench.v From 7ae7ffa007e6c83207e44905f77a7f1495c20477 Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 22 Aug 2024 18:37:34 +0200 Subject: [PATCH 076/407] pulled master and made initial changes --- hw/rtl/Vortex_hbm.sv | 229 +++++++++++++++++++++++++ hw/rtl/cache/VX_cache_wrap_l3.sv | 286 +++++++++++++++++++++++++++++++ sim/rtlsim/Makefile | 2 +- sim/rtlsim/processor.cpp | 246 ++++++++++++++------------ 4 files changed, 651 insertions(+), 112 deletions(-) create mode 100644 hw/rtl/Vortex_hbm.sv create mode 100644 hw/rtl/cache/VX_cache_wrap_l3.sv diff --git a/hw/rtl/Vortex_hbm.sv b/hw/rtl/Vortex_hbm.sv new file mode 100644 index 0000000000..253c325bb8 --- /dev/null +++ b/hw/rtl/Vortex_hbm.sv @@ -0,0 +1,229 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module Vortex_hbm import VX_gpu_pkg::*; ( + `SCOPE_IO_DECL + + // Clock + input wire clk, + input wire reset, + + // Memory request + output wire mem_req_valid [`NUM_MEM_PORTS], + output wire mem_req_rw [`NUM_MEM_PORTS], + output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS], + output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS], + output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS], + output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS], + input wire mem_req_ready [`NUM_MEM_PORTS], + + // Memory response + input wire mem_rsp_valid [`NUM_MEM_PORTS], + input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS], + input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS], + output wire mem_rsp_ready [`NUM_MEM_PORTS], + + // DCR write request + input wire dcr_wr_valid, + input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, + input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, + + // Status + output wire busy +); + +`ifdef SCOPE + localparam scope_cluster = 0; + `SCOPE_IO_SWITCH (`NUM_CLUSTERS); +`endif + +`ifdef PERF_ENABLE + VX_mem_perf_if mem_perf_if(); + assign mem_perf_if.icache = 'x; + assign mem_perf_if.dcache = 'x; + assign mem_perf_if.l2cache = 'x; + assign mem_perf_if.lmem = 'x; +`endif + + VX_mem_bus_if #( + .DATA_SIZE (`L2_LINE_SIZE), + .TAG_WIDTH (L2_MEM_TAG_WIDTH) + ) per_cluster_mem_bus_if[`NUM_CLUSTERS](); + + VX_mem_bus_if #( + .DATA_SIZE (`L3_LINE_SIZE), + .TAG_WIDTH (L3_MEM_TAG_WIDTH) + ) mem_bus_if[`NUM_MEM_PORTS](); + + `RESET_RELAY (l3_reset, reset); + + VX_cache_wrap_l3 #( + .INSTANCE_ID ("l3cache"), + .CACHE_SIZE (`L3_CACHE_SIZE), + .LINE_SIZE (`L3_LINE_SIZE), + .NUM_BANKS (`L3_NUM_BANKS), + .NUM_WAYS (`L3_NUM_WAYS), + .WORD_SIZE (L3_WORD_SIZE), + .NUM_MEM_PORTS (`NUM_MEM_PORTS), + .NUM_REQS (L3_NUM_REQS), + .CRSQ_SIZE (`L3_CRSQ_SIZE), + .MSHR_SIZE (`L3_MSHR_SIZE), + .MRSQ_SIZE (`L3_MRSQ_SIZE), + .MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE), + .TAG_WIDTH (L2_MEM_TAG_WIDTH), + .WRITE_ENABLE (1), + .WRITEBACK (`L3_WRITEBACK), + .DIRTY_BYTES (`L3_WRITEBACK), + .UUID_WIDTH (`UUID_WIDTH), + .CORE_OUT_BUF (2), + .MEM_OUT_BUF (2), + .NC_ENABLE (1), + .PASSTHRU (!`L3_ENABLED) + ) l3cache ( + .clk (clk), + .reset (l3_reset), + + `ifdef PERF_ENABLE + .cache_perf (mem_perf_if.l3cache), + `endif + + .core_bus_if (per_cluster_mem_bus_if), + .mem_bus_if (mem_bus_if) + ); + + wire mem_req_fire[`NUM_MEM_PORTS-1:0]; + wire mem_rsp_fire[`NUM_MEM_PORTS-1:0]; + + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin + assign mem_req_valid[i] = mem_bus_if[i].req_valid; + assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; + assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen; + assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; + assign mem_req_data[i] = mem_bus_if[i].req_data.data; + assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; + assign mem_bus_if[i].req_ready = mem_req_ready[i]; + `UNUSED_VAR (mem_bus_if[i].req_data.atype) + + assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; + assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; + assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; + assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; + + assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i]; + assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i]; + `UNUSED_VAR (mem_req_fire[i]) + `UNUSED_VAR (mem_rsp_fire[i]) + end + + VX_dcr_bus_if dcr_bus_if(); + assign dcr_bus_if.write_valid = dcr_wr_valid; + assign dcr_bus_if.write_addr = dcr_wr_addr; + assign dcr_bus_if.write_data = dcr_wr_data; + + wire [`NUM_CLUSTERS-1:0] per_cluster_busy; + + // Generate all clusters + for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters + + `RESET_RELAY (cluster_reset, reset); + + VX_dcr_bus_if cluster_dcr_bus_if(); + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); + + VX_cluster #( + .CLUSTER_ID (cluster_id), + .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) + ) cluster ( + `SCOPE_IO_BIND (scope_cluster + cluster_id) + + .clk (clk), + .reset (cluster_reset), + + `ifdef PERF_ENABLE + .mem_perf_if (mem_perf_if), + `endif + + .dcr_bus_if (cluster_dcr_bus_if), + + .mem_bus_if (per_cluster_mem_bus_if[cluster_id]), + + .busy (per_cluster_busy[cluster_id]) + ); + end + + `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1)); + +`ifdef PERF_ENABLE + + reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads; + mem_perf_t mem_perf; + + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin + always @(posedge clk) begin + if (reset) begin + perf_mem_pending_reads <= '0; + end else begin + perf_mem_pending_reads <= $signed(perf_mem_pending_reads) + + `PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i]))); + end + end + end + + wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0]; + wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0]; + + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin + assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw; + assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw; + end + + always @(posedge clk) begin + if (reset) begin + mem_perf <= '0; + end else begin + for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin + mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]); + mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]); + end + mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads; + end + end + assign mem_perf_if.mem = mem_perf; + +`endif + +`ifdef DBG_TRACE_MEM + always @(posedge clk) begin + for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin + if (mem_req_fire[i]) begin + if (mem_req_rw[i]) + `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i])); + else + `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i])); + end + if (mem_rsp_fire[i]) begin + `TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); + end + end + end +`endif + +`ifdef SIMULATION + always @(posedge clk) begin + $fflush(); // flush stdout buffer + end +`endif + +endmodule diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv new file mode 100644 index 0000000000..9a8f1688f1 --- /dev/null +++ b/hw/rtl/cache/VX_cache_wrap_l3.sv @@ -0,0 +1,286 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + + parameter TAG_SEL_IDX = 0, + + // Number of Word requests per cycle + parameter NUM_REQS = 4, + + + // Size of cache in bytes + parameter CACHE_SIZE = 4096, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 64, + // Number of banks + parameter NUM_BANKS = 1, + // Number of associative ways + parameter NUM_WAYS = 1, + // Size of a word in bytes + parameter WORD_SIZE = 4, + // Number of memory ports + parameter NUM_MEM_PORTS = 4, + + // Core Response Queue Size + parameter CRSQ_SIZE = 2, + // Miss Reserv Queue Knob + parameter MSHR_SIZE = 8, + // Memory Response Queue Size + parameter MRSQ_SIZE = 0, + // Memory Request Queue Size + parameter MREQ_SIZE = 4, + + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Enable cache writeback + parameter WRITEBACK = 0, + + // Enable dirty bytes on writeback + parameter DIRTY_BYTES = 0, + + // Request debug identifier + parameter UUID_WIDTH = 0, + + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 1, + + // enable bypass for non-cacheable addresses + parameter NC_ENABLE = 0, + + // Force bypass for all requests + parameter PASSTHRU = 0, + + // Core response output buffer + parameter CORE_OUT_BUF = 0, + + // Memory request output buffer + parameter MEM_OUT_BUF = 0 + ) ( + + input wire clk, + input wire reset, + + // PERF +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + VX_mem_bus_if.slave core_bus_if [NUM_REQS], + VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] +); + + `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) + + localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); + localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + + localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : + `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + + localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); + + VX_mem_bus_if #( + .DATA_SIZE (WORD_SIZE), + .TAG_WIDTH (TAG_WIDTH) + ) core_bus_cache_if[NUM_REQS](); + + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) + ) mem_bus_cache_if[NUM_MEM_PORTS](); + + if (NC_OR_BYPASS) begin + + `RESET_RELAY (nc_bypass_reset, reset); + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_cache_bypass #( + .NUM_REQS (NUM_REQS), + .TAG_SEL_IDX (TAG_SEL_IDX), + + .PASSTHRU (PASSTHRU), + .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), + + .WORD_SIZE (WORD_SIZE), + .LINE_SIZE (LINE_SIZE), + + .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), + .CORE_TAG_WIDTH (TAG_WIDTH), + + .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), + .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), + .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), + + .UUID_WIDTH (UUID_WIDTH), + + .CORE_OUT_BUF (CORE_OUT_BUF), + .MEM_OUT_BUF (MEM_OUT_BUF) + ) cache_bypass ( + .clk (clk), + .reset (nc_bypass_reset), + + .core_bus_in_if (core_bus_if), + .core_bus_out_if(core_bus_cache_if), + + .mem_bus_in_if (mem_bus_cache_if[i]), + .mem_bus_out_if (mem_bus_if[i]) + ); + end + + end else begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin + `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); + end + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if[i], mem_bus_cache_if[i]); + end + end + + if (PASSTHRU != 0) begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin + `UNUSED_VAR (core_bus_cache_if[i].req_valid) + `UNUSED_VAR (core_bus_cache_if[i].req_data) + assign core_bus_cache_if[i].req_ready = 0; + + assign core_bus_cache_if[i].rsp_valid = 0; + assign core_bus_cache_if[i].rsp_data = '0; + `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) + end + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign mem_bus_cache_if[i].req_valid = 0; + assign mem_bus_cache_if[i].req_data = '0; + `UNUSED_VAR (mem_bus_cache_if[i].req_ready) + + `UNUSED_VAR (mem_bus_cache_if[i].rsp_valid) + `UNUSED_VAR (mem_bus_cache_if[i].rsp_data) + assign mem_bus_cache_if[i].rsp_ready = 0; + end + + `ifdef PERF_ENABLE + assign cache_perf = '0; + `endif + + end else begin + + `RESET_RELAY (cache_reset, reset); + + VX_cache #( + .INSTANCE_ID (INSTANCE_ID), + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQS (NUM_REQS), + .CRSQ_SIZE (CRSQ_SIZE), + .MSHR_SIZE (MSHR_SIZE), + .MRSQ_SIZE (MRSQ_SIZE), + .MREQ_SIZE (MREQ_SIZE), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .UUID_WIDTH (UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), + .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), + .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) + ) cache ( + .clk (clk), + .reset (cache_reset), + `ifdef PERF_ENABLE + .cache_perf (cache_perf), + `endif + .core_bus_if (core_bus_cache_if), + .mem_bus_if (mem_bus_cache_if[0]) + ); + + end + +`ifdef DBG_TRACE_CACHE + + for (genvar i = 0; i < NUM_REQS; ++i) begin + wire [`UP(UUID_WIDTH)-1:0] core_req_uuid; + wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid; + + if (UUID_WIDTH != 0) begin + assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; + assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin + assign core_req_uuid = 0; + assign core_rsp_uuid = 0; + end + + wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready; + wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready; + + always @(posedge clk) begin + if (core_req_fire) begin + if (core_bus_if[i].req_data.rw) + `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); + else + `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); + end + if (core_rsp_fire) begin + `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); + end + end + end + + wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_req_uuid; + wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin + assign mem_req_uuid[i] = mem_bus_if[i].req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; + assign mem_rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin + assign mem_req_uuid[i] = 0; + assign mem_rsp_uuid[i] = 0; + end + end + + wire mem_req_fire [NUM_MEM_PORTS-1:0]; + wire mem_rsp_fire [NUM_MEM_PORTS-1:0]; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign mem_req_fire[i] = mem_bus_if[i].req_valid && mem_bus_if[i].req_ready; + assign mem_rsp_fire[i] = mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready; + end + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + always @(posedge clk) begin + if (mem_req_fire[i]) begin + if (mem_bus_if[i].req_data.rw) + `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d) bank=%d\n", + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_req_uuid[i], i)); + else + `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d) bank=%d\n", + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_req_uuid[i], i)); + end + if (mem_rsp_fire[i]) begin + `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", + $time, INSTANCE_ID, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data, mem_rsp_uuid[i])); + end + end + end +`endif + +endmodule diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 3deffc759d..1970788131 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -43,7 +43,7 @@ ifdef AXI_BUS TOP = Vortex_axi CXXFLAGS += -DAXI_BUS else - TOP = Vortex + TOP = Vortex_hbm endif VL_FLAGS = --exe diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index e5e00f49eb..7c812f7e88 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -17,8 +17,8 @@ #include "VVortex_axi.h" typedef VVortex_axi Device; #else -#include "VVortex.h" -typedef VVortex Device; +#include "VVortex_hbm.h" +typedef VVortex_hbm Device; #endif #ifdef VCD_OUTPUT @@ -123,6 +123,15 @@ class Processor::Impl { tfp_->open("trace.vcd"); #endif + pending_mem_reqs_.resize(NUM_MEM_PORTS); + dram_queue_.resize(NUM_MEM_PORTS); + + mem_rd_rsp_active_.resize(NUM_MEM_PORTS); + mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); + + mem_wr_rsp_active_.resize(NUM_MEM_PORTS); + mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); + ram_ = nullptr; #ifndef NDEBUG @@ -210,15 +219,18 @@ class Processor::Impl { print_bufs_.clear(); - pending_mem_reqs_.clear(); + for (int i = 0; i < NUM_MEM_PORTS; ++i) { - { - std::queue empty; - std::swap(dram_queue_, empty); - } + pending_mem_reqs_.at(i).clear(); - mem_rd_rsp_active_ = false; - mem_wr_rsp_active_ = false; + { + std::queue empty; + std::swap(dram_queue_.at(i), empty); + } + + mem_rd_rsp_active_.at(i) = false; + mem_wr_rsp_active_.at(i) = false; + } this->mem_bus_reset(); @@ -250,17 +262,19 @@ class Processor::Impl { dram_sim_.tick(); - if (!dram_queue_.empty()) { - auto mem_req = dram_queue_.front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (!dram_queue_.at(i).empty()) { + auto mem_req = dram_queue_.at(i).front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { + dram_queue_.at(i).pop(); } - }, mem_req)) { - dram_queue_.pop(); } } @@ -437,116 +451,126 @@ class Processor::Impl { #else void mem_bus_reset() { - device_->mem_req_ready = 0; - device_->mem_rsp_valid = 0; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + device_->mem_req_ready[i] = 0; + device_->mem_rsp_valid[i] = 0; + } } void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->mem_rsp_ready; - return; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (!clk) { + mem_rd_rsp_ready_.at(i) = device_->mem_rsp_ready[i]; + return; + } } - if (ram_ == nullptr) { - device_->mem_req_ready = 0; - return; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (ram_ == nullptr) { + device_->mem_req_ready[i] = 0; + return; + } } // process memory read responses - if (mem_rd_rsp_active_ - && device_->mem_rsp_valid && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready) { - device_->mem_rsp_valid = 1; - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (mem_rd_rsp_active_.at(i) + && device_->mem_rsp_valid[i] && mem_rd_rsp_ready_.at(i)) { + mem_rd_rsp_active_.at(i) = false; + } + if (!mem_rd_rsp_active_.at(i)) { + if (!pending_mem_reqs_.at(i).empty() + && (*pending_mem_reqs_.at(i).begin())->ready) { + device_->mem_rsp_valid[i] = 1; + auto mem_rsp_it = pending_mem_reqs_.at(i).begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); + } + printf("\n"); + */ + memcpy(VDataCast::get(device_->mem_rsp_data[i]), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_tag[i] = mem_rsp->tag; + pending_mem_reqs_.at(i).erase(mem_rsp_it); + mem_rd_rsp_active_.at(i) = true; + delete mem_rsp; + } else { + device_->mem_rsp_valid[i] = 0; } - printf("\n"); - */ - memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE); - device_->mem_rsp_tag = mem_rsp->tag; - pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - delete mem_rsp; - } else { - device_->mem_rsp_valid = 0; } } // process memory requests - if (device_->mem_req_valid && running_) { - uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); - if (device_->mem_req_rw) { - auto byteen = device_->mem_req_byteen; - auto data = VDataCast::get(device_->mem_req_data); - - if (byte_addr >= uint64_t(IO_COUT_ADDR) - && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < IO_COUT_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); + for (int j = 0; j < NUM_MEM_PORTS; ++j) { + if (device_->mem_req_valid[j] && running_) { + uint64_t byte_addr = (device_->mem_req_addr[j] * MEM_BLOCK_SIZE); + if (device_->mem_req_rw[j]) { + auto byteen = device_->mem_req_byteen[j]; + auto data = VDataCast::get(device_->mem_req_data[j]); + + if (byte_addr >= uint64_t(IO_COUT_ADDR) + && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < IO_COUT_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } } } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%d=%02x,", i, data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; + } else { + // process writes + /* + printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); } - } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%d=%02x,", i, data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag[j]; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = true; + // send dram request + dram_queue_.at(j).push(mem_req); + } + } else { + // process reads auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag; + mem_req->tag = device_->mem_req_tag[j]; mem_req->addr = byte_addr; - mem_req->write = true; - mem_req->ready = true; + mem_req->write = false; + mem_req->ready = false; + ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + pending_mem_reqs_.at(j).emplace_back(mem_req); + + //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); // send dram request - dram_queue_.push(mem_req); + dram_queue_.at(j).push(mem_req); } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag; - mem_req->addr = byte_addr; - mem_req->write = false; - mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); - pending_mem_reqs_.emplace_back(mem_req); - - //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); - - // send dram request - dram_queue_.push(mem_req); } - } - device_->mem_req_ready = running_; + device_->mem_req_ready[j] = running_; + } } #endif @@ -583,9 +607,9 @@ class Processor::Impl { std::unordered_map print_bufs_; - std::list pending_mem_reqs_; + std::vector> pending_mem_reqs_; - std::queue dram_queue_; + std::vector> dram_queue_; DramSim dram_sim_; @@ -597,11 +621,11 @@ class Processor::Impl { RAM* ram_; - bool mem_rd_rsp_active_; - bool mem_rd_rsp_ready_; + std::vector mem_rd_rsp_active_; + std::vector mem_rd_rsp_ready_; - bool mem_wr_rsp_active_; - bool mem_wr_rsp_ready_; + std::vector mem_wr_rsp_active_; + std::vector mem_wr_rsp_ready_; bool running_; }; From df99b9da0e28dd07ed9c7f47d939d052b0a9fed8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 22 Aug 2024 16:29:27 -0700 Subject: [PATCH 077/407] minor update --- hw/syn/xilinx/dut/project.tcl | 2 +- hw/syn/yosys/synth.sh | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index bee841d79e..05b76d21f4 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -59,7 +59,7 @@ foreach def $vdefines_list { } # Synthesis -synth_design -top $top_module -include_dirs $vincludes_list -flatten_hierarchy none +synth_design -top $top_module -include_dirs $vincludes_list -mode out_of_context -flatten_hierarchy none write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index 79708b1890..b44f16e6b7 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -20,13 +20,15 @@ # exit when any command fails set -e +library="" +sdc_file="" source="" top_level="" dir_list=() inc_args="" macro_args="" no_warnings=1 -process="elaborate,netlist,techmap,verilog" +process="elaborate,netlist,techmap,verilog,link" declare -a excluded_warnings=("Resizing cell port") @@ -66,8 +68,14 @@ checkErrors() usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; } [ $# -eq 0 ] && usage -while getopts "s:t:I:D:P:Wh" arg; do +while getopts "c:l:s:t:I:D:P:Wh" arg; do case $arg in + l) # library + library=${OPTARG} + ;; + c) # SDC constraints + sdc_file=${OPTARG} + ;; s) # source source=${OPTARG} ;; @@ -95,6 +103,16 @@ while getopts "s:t:I:D:P:Wh" arg; do done { + # read device library + if [ -n "$library" ]; then + echo "read_liberty $library" + fi + + # read design constraints + if [ -n "$sdc_file" ]; then + echo "read_sdc $sdc_file" + fi + # read design sources for dir in "${dir_list[@]}" do @@ -117,6 +135,11 @@ done echo "synth -top $top_level" fi + # link design + if echo "$process" | grep -q "link"; then + echo "link_design -top $top_level" + fi + # convert to netlist if echo "$process" | grep -q "netlist"; then echo "proc; opt" From 6eeb8eac0f3ed3a8242bf7b76feb19ec7b6402b2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 23 Aug 2024 00:54:48 -0700 Subject: [PATCH 078/407] minor update --- hw/rtl/libs/VX_stream_buffer.sv | 84 ++++++++++++++--------------- hw/syn/xilinx/dut/core/Makefile | 2 +- hw/syn/xilinx/dut/fpu/Makefile | 2 +- hw/syn/xilinx/dut/issue/Makefile | 2 +- hw/syn/xilinx/dut/project.xdc | 5 +- hw/syn/xilinx/dut/top/Makefile | 2 +- hw/syn/xilinx/dut/unittest/Makefile | 2 +- hw/syn/xilinx/dut/vortex/Makefile | 2 +- 8 files changed, 49 insertions(+), 52 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index bebe8ec71f..ea45619333 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -1,18 +1,18 @@ // Copyright 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// A stream elastic buffer operates at full-bandwidth where push and pop can happen simultaneously +// A stream elastic buffer operates at full-bandwidth where fire_in and fire_out can happen simultaneously // It has the following benefits: // + full-bandwidth throughput // + ready_in and ready_out are decoupled @@ -27,21 +27,21 @@ module VX_stream_buffer #( parameter DATAW = 1, parameter OUT_REG = 0, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); +); if (PASSTHRU != 0) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; end else begin if (OUT_REG != 0) begin @@ -49,77 +49,71 @@ module VX_stream_buffer #( reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] buffer; reg valid_out_r; - reg use_buffer; - - wire push = valid_in && ready_in; - wire stall_out = valid_out_r && ~ready_out; - + reg no_buffer; + + wire fire_in = valid_in && ready_in; + wire flow_out = ready_out || ~valid_out_r; + always @(posedge clk) begin if (reset) begin - valid_out_r <= 0; - use_buffer <= 0; + valid_out_r <= 0; + no_buffer <= 1; end else begin if (ready_out) begin - use_buffer <= 0; + no_buffer <= 1; end else if (valid_in && valid_out) begin - use_buffer <= 1; + no_buffer <= 0; end - if (~stall_out) begin - valid_out_r <= valid_in || use_buffer; + if (flow_out) begin + valid_out_r <= valid_in || ~no_buffer; end end end always @(posedge clk) begin - if (push) begin + if (fire_in) begin buffer <= data_in; end - if (~stall_out) begin - data_out_r <= use_buffer ? buffer : data_in; + if (flow_out) begin + data_out_r <= no_buffer ? data_in : buffer; end end - assign ready_in = ~use_buffer; + assign ready_in = no_buffer; assign valid_out = valid_out_r; assign data_out = data_out_r; end else begin - reg [1:0][DATAW-1:0] shift_reg; - reg valid_out_r, ready_in_r, rd_ptr_r; + reg [DATAW-1:0] shift_reg [1:0]; + reg [1:0] fifo_state; - wire push = valid_in && ready_in; - wire pop = valid_out_r && ready_out; + wire fire_in = valid_in && ready_in; + wire fire_out = valid_out && ready_out; always @(posedge clk) begin if (reset) begin - valid_out_r <= 0; - ready_in_r <= 1; - rd_ptr_r <= 1; + fifo_state <= 2'b00; end else begin - if (push) begin - if (!pop) begin - ready_in_r <= rd_ptr_r; - valid_out_r <= 1; - end - end else if (pop) begin - ready_in_r <= 1; - valid_out_r <= rd_ptr_r; - end - rd_ptr_r <= rd_ptr_r ^ (push ^ pop); - end + case ({fire_in, fire_out}) + 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 + 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 + default: fifo_state <= fifo_state; + endcase + end end always @(posedge clk) begin - if (push) begin + if (fire_in) begin shift_reg[1] <= shift_reg[0]; shift_reg[0] <= data_in; end end - assign ready_in = ready_in_r; - assign valid_out = valid_out_r; - assign data_out = shift_reg[rd_ptr_r]; + assign ready_in = ~fifo_state[1]; + assign valid_out = fifo_state[0]; + assign data_out = shift_reg[fifo_state[1]]; + end end diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index eeeaa52338..86bb0b53c2 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -11,4 +11,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index b7826dc689..133a8a4e93 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -8,4 +8,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) +RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile index c1804a3989..bb93f44d27 100644 --- a/hw/syn/xilinx/dut/issue/Makefile +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -11,4 +11,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/project.xdc b/hw/syn/xilinx/dut/project.xdc index 8c74ebb4a9..f786e78373 100644 --- a/hw/syn/xilinx/dut/project.xdc +++ b/hw/syn/xilinx/dut/project.xdc @@ -1 +1,4 @@ -## empty \ No newline at end of file +set CLK_FREQ_MHZ 300 +set clk_port_name clk +set clk_port [get_ports $clk_port_name] +create_clock -name core_clock -period [expr 1000.0 / $CLK_FREQ_MHZ] $clk_port \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index 341690206d..bc55224f64 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -29,4 +29,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 2bfb18e4e4..061e754419 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -8,4 +8,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index 7429df414e..ee49be4367 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -13,4 +13,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) From 4f9b15d96d8a72743074200d8bda1be487d67d85 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:54:17 -0700 Subject: [PATCH 079/407] minor update --- hw/syn/xilinx/dut/project.tcl | 16 ++++++++++++++-- hw/syn/xilinx/sandbox/project.tcl.in | 17 ++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index 05b76d21f4..c3e7e431c4 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -11,6 +11,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Start time +set start_time [clock seconds] + if { $::argc != 5 } { puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" puts "Usage: $::argv0 \n" @@ -77,6 +80,15 @@ write_checkpoint -force post_route.dcp report_route_status -file route.rpt # Generate the synthesis report -report_timing -file timing.rpt +report_timing_summary -file timing.rpt report_power -file power.rpt -report_drc -file drc.rpt \ No newline at end of file +report_drc -file drc.rpt + +# End time and calculation +set elapsed_time [expr {[clock seconds] - $start_time}] + +# Display elapsed time +set hours [format "%02d" [expr {$elapsed_time / 3600}]] +set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] +set seconds [format "%02d" [expr {$elapsed_time % 60}]] +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index e92e31a446..7a25f6278b 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -11,6 +11,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Start time +set start_time [clock seconds] + if { $::argc != 3 } { puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" puts "Usage: $::argv0 \n" @@ -140,7 +143,6 @@ set_property -name "top_lib" -value "xil_defaultlib" -objects $obj set_property -name "verilog_define" -value "" -objects $obj set_property -name "verilog_uppercase" -value "0" -objects $obj - # Set 'utils_1' fileset object set obj [get_filesets utils_1] # Empty (no sources present) @@ -405,6 +407,15 @@ open_run impl_1 report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages report_place_status -file place.rpt report_route_status -file route.rpt -report_timing -file timing.rpt +report_timing_summary -file timing.rpt report_power -file power.rpt -report_drc -file drc.rpt \ No newline at end of file +report_drc -file drc.rpt + +# End time and calculation +set elapsed_time [expr {[clock seconds] - $start_time}] + +# Display elapsed time +set hours [format "%02d" [expr {$elapsed_time / 3600}]] +set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] +set seconds [format "%02d" [expr {$elapsed_time % 60}]] +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file From ade6b2c9856e0334fadf35f699f74fe7023d977c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:55:25 -0700 Subject: [PATCH 080/407] timing optimization --- hw/rtl/VX_socket.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 33c29e5150..9ed76814bc 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -105,7 +105,7 @@ module VX_socket import VX_gpu_pkg::*; #( .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (0), .NC_ENABLE (0), - .CORE_OUT_BUF (2), + .CORE_OUT_BUF (3), .MEM_OUT_BUF (0) ) icache ( `ifdef PERF_ENABLE @@ -152,7 +152,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITEBACK (`DCACHE_WRITEBACK), .DIRTY_BYTES (`DCACHE_WRITEBACK), .NC_ENABLE (1), - .CORE_OUT_BUF (2), + .CORE_OUT_BUF (3), .MEM_OUT_BUF (0) ) dcache ( `ifdef PERF_ENABLE @@ -185,7 +185,7 @@ module VX_socket import VX_gpu_pkg::*; #( .TAG_WIDTH (L1_MEM_TAG_WIDTH), .TAG_SEL_IDX (0), .ARBITER ("R"), - .REQ_OUT_BUF (0), + .REQ_OUT_BUF (3), .RSP_OUT_BUF (3) ) mem_arb ( .clk (clk), From bcf7d9f9606944b90a8012a15be5a3e677b20650 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:56:14 -0700 Subject: [PATCH 081/407] timing optimization --- hw/rtl/fpu/VX_fcvt_unit.sv | 73 +++++++++++++++++++------------------- hw/rtl/fpu/VX_fncp_unit.sv | 67 +++++++++++++++++----------------- hw/rtl/fpu/VX_fpu_cvt.sv | 5 +-- hw/rtl/fpu/VX_fpu_div.sv | 2 +- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- hw/rtl/fpu/VX_fpu_ncp.sv | 5 +-- hw/rtl/fpu/VX_fpu_sqrt.sv | 2 +- 7 files changed, 80 insertions(+), 76 deletions(-) diff --git a/hw/rtl/fpu/VX_fcvt_unit.sv b/hw/rtl/fpu/VX_fcvt_unit.sv index b5b7b1690c..5756a25eda 100644 --- a/hw/rtl/fpu/VX_fcvt_unit.sv +++ b/hw/rtl/fpu/VX_fcvt_unit.sv @@ -1,17 +1,17 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// Modified port of cast module from fpnew Libray +// Modified port of cast module from fpnew Libray // reference: https://github.com/pulp-platform/fpnew `include "VX_fpu_define.vh" @@ -22,7 +22,8 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( parameter LATENCY = 1, parameter INT_WIDTH = 32, parameter MAN_BITS = 23, - parameter EXP_BITS = 8 + parameter EXP_BITS = 8, + parameter OUT_REG = 0 ) ( input wire clk, input wire reset, @@ -35,10 +36,10 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( input wire is_signed, input wire [31:0] dataa, - output wire [31:0] result, + output wire [31:0] result, output wire [`FP_FLAGS_BITS-1:0] fflags -); +); // Constants localparam EXP_BIAS = 2**(EXP_BITS-1)-1; @@ -55,11 +56,11 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( localparam FMT_SHIFT_COMPENSATION = S_MAN_WIDTH - 1 - MAN_BITS; localparam NUM_FP_STICKY = 2 * S_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R localparam NUM_INT_STICKY = 2 * S_MAN_WIDTH - INT_WIDTH; // removed int and R - + // Input processing - - fclass_t fclass; - VX_fp_classifier #( + + fclass_t fclass; + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_classifier ( @@ -69,9 +70,9 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( ); wire [S_MAN_WIDTH-1:0] input_mant; - wire [S_EXP_WIDTH-1:0] input_exp; + wire [S_EXP_WIDTH-1:0] input_exp; wire input_sign; - + wire i2f_sign = dataa[INT_WIDTH-1]; wire f2i_sign = dataa[INT_WIDTH-1] && is_signed; wire [S_MAN_WIDTH-1:0] f2i_mantissa = f2i_sign ? (-dataa) : dataa; @@ -81,7 +82,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( assign input_sign = is_itof ? f2i_sign : i2f_sign; // Pipeline stage0 - + wire is_itof_s0; wire is_signed_s0; wire [2:0] rnd_mode_s0; @@ -92,7 +93,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + S_EXP_WIDTH + S_MAN_WIDTH), - .DEPTH (LATENCY > 2) + .DEPTH (LATENCY > 1) ) pipe_reg0 ( .clk (clk), .reset (reset), @@ -100,7 +101,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_in ({is_itof, is_signed, frm, fclass, input_sign, input_exp, input_mant}), .data_out ({is_itof_s0, is_signed_s0, rnd_mode_s0, fclass_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0}) ); - + // Normalization wire [LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount @@ -113,12 +114,12 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_out (renorm_shamt_s0), .valid_out (mant_is_nonzero_s0) ); - + wire mant_is_zero_s0 = ~mant_is_nonzero_s0; - wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa + wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa wire [S_EXP_WIDTH-1:0] input_exp_n_s0; // unbiased true exponent - + // Realign input mantissa, append zeroes if destination is wider assign input_mant_n_s0 = encoded_mant_s0 << renorm_shamt_s0; @@ -140,7 +141,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + 1 + S_MAN_WIDTH + S_EXP_WIDTH), - .DEPTH (LATENCY > 1) + .DEPTH (LATENCY > 2) ) pipe_reg1 ( .clk (clk), .reset (reset), @@ -169,30 +170,30 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( wire of_before_round_s1 = overflow; // Pipeline stage2 - + wire is_itof_s2; wire is_signed_s2; wire [2:0] rnd_mode_s2; - fclass_t fclass_s2; + fclass_t fclass_s2; wire mant_is_zero_s2; wire input_sign_s2; wire [2*S_MAN_WIDTH:0] destination_mant_s2; wire [EXP_BITS-1:0] final_exp_s2; wire of_before_round_s2; - + VX_pipe_register #( .DATAW (1 + 1 + `INST_FRM_BITS + $bits(fclass_t) + 1 + 1 + (2*S_MAN_WIDTH+1) + EXP_BITS + 1), - .DEPTH (LATENCY > 3) + .DEPTH (LATENCY > 0) ) pipe_reg2 ( .clk (clk), .reset (reset), .enable (enable), .data_in ({is_itof_s1, is_signed_s1, rnd_mode_s1, fclass_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}), .data_out ({is_itof_s2, is_signed_s2, rnd_mode_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2}) - ); - + ); + // Rouding and classification - + wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments wire [INT_WIDTH-1:0] final_int_s2; // integer shifted in position wire [1:0] f2i_round_sticky_bits_s2, i2f_round_sticky_bits_s2; @@ -237,20 +238,20 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( wire is_itof_s3; wire is_signed_s3; - fclass_t fclass_s3; + fclass_t fclass_s3; wire mant_is_zero_s3; wire input_sign_s3; wire rounded_sign_s3; wire [INT_WIDTH-1:0] rounded_abs_s3; - wire of_before_round_s3; + wire of_before_round_s3; wire f2i_round_has_sticky_s3; wire i2f_round_has_sticky_s3; - `UNUSED_VAR (fclass_s3) + `UNUSED_VAR (fclass_s3) VX_pipe_register #( .DATAW (1 + 1 + $bits(fclass_t) + 1 + 1 + 32 + 1 + 1 + 1 + 1), - .DEPTH (LATENCY > 4) + .DEPTH (LATENCY > 3) ) pipe_reg3 ( .clk (clk), .reset (reset), @@ -258,7 +259,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_in ({is_itof_s2, is_signed_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, f2i_round_has_sticky_s2, i2f_round_has_sticky_s2}), .data_out ({is_itof_s3, is_signed_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, f2i_round_has_sticky_s3, i2f_round_has_sticky_s3}) ); - + // Assemble regular result, nan box short ones. Int zeroes need to be detected wire [INT_WIDTH-1:0] fmt_result_s3 = mant_is_zero_s3 ? 0 : {rounded_sign_s3, rounded_abs_s3[EXP_BITS+MAN_BITS-1:0]}; @@ -278,18 +279,18 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( f2i_special_result_s3[INT_WIDTH-2:0] = 2**(INT_WIDTH-1) - 1; // alone yields 2**(31)-1 f2i_special_result_s3[INT_WIDTH-1] = ~is_signed_s3; // for unsigned casts yields 2**31 end - end + end // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) - wire f2i_result_is_special_s3 = fclass_s3.is_nan + wire f2i_result_is_special_s3 = fclass_s3.is_nan | fclass_s3.is_inf | of_before_round_s3 | (input_sign_s3 & ~is_signed_s3 & ~rounded_int_res_zero_s3); - + fflags_t f2i_special_status_s3; fflags_t i2f_status_s3, f2i_status_s3; fflags_t tmp_fflags_s3; - + // All integer special cases are invalid assign f2i_special_status_s3 = {1'b1, 4'h0}; @@ -306,7 +307,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (32 + `FP_FLAGS_BITS), - .DEPTH (LATENCY > 0) + .DEPTH (OUT_REG) ) pipe_reg4 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fncp_unit.sv b/hw/rtl/fpu/VX_fncp_unit.sv index a0876dcd70..27836fcbcf 100644 --- a/hw/rtl/fpu/VX_fncp_unit.sv +++ b/hw/rtl/fpu/VX_fncp_unit.sv @@ -1,17 +1,17 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// Modified port of noncomp module from fpnew Libray +// Modified port of noncomp module from fpnew Libray // reference: https://github.com/pulp-platform/fpnew `include "VX_fpu_define.vh" @@ -19,9 +19,10 @@ `ifdef FPU_DSP module VX_fncp_unit import VX_fpu_pkg::*; #( - parameter LATENCY = 2, + parameter LATENCY = 1, parameter EXP_BITS = 8, - parameter MAN_BITS = 23 + parameter MAN_BITS = 23, + parameter OUT_REG = 0 ) ( input wire clk, input wire reset, @@ -33,10 +34,10 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( input wire [31:0] dataa, input wire [31:0] datab, - output wire [31:0] result, + output wire [31:0] result, output wire [`FP_FLAGS_BITS-1:0] fflags -); +); localparam NEG_INF = 32'h00000001, NEG_NORM = 32'h00000002, NEG_SUBNORM = 32'h00000004, @@ -55,15 +56,15 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( wire a_smaller, ab_equal; // Setup - assign a_sign = dataa[31]; + assign a_sign = dataa[31]; assign a_exponent = dataa[30:23]; assign a_mantissa = dataa[22:0]; - assign b_sign = datab[31]; + assign b_sign = datab[31]; assign b_exponent = datab[30:23]; assign b_mantissa = datab[22:0]; - VX_fp_classifier #( + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_a ( @@ -72,7 +73,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( .clss_o (a_fclass) ); - VX_fp_classifier #( + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_b ( @@ -82,7 +83,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( ); assign a_smaller = (dataa < datab) ^ (a_sign || b_sign); - assign ab_equal = (dataa == datab) + assign ab_equal = (dataa == datab) || (a_fclass.is_zero && b_fclass.is_zero); // +0 == -0 // Pipeline stage0 @@ -101,54 +102,54 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (4 + 2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1), - .DEPTH (LATENCY > 1) + .DEPTH (LATENCY > 0) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (enable), .data_in ({op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}), .data_out ({op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0}) - ); + ); // FCLASS reg [31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg - always @(*) begin + always @(*) begin if (a_fclass_s0.is_normal) begin fclass_mask_s0 = a_sign_s0 ? NEG_NORM : POS_NORM; - end + end else if (a_fclass_s0.is_inf) begin fclass_mask_s0 = a_sign_s0 ? NEG_INF : POS_INF; - end + end else if (a_fclass_s0.is_zero) begin fclass_mask_s0 = a_sign_s0 ? NEG_ZERO : POS_ZERO; - end + end else if (a_fclass_s0.is_subnormal) begin fclass_mask_s0 = a_sign_s0 ? NEG_SUBNORM : POS_SUBNORM; - end + end else if (a_fclass_s0.is_nan) begin fclass_mask_s0 = {22'h0, a_fclass_s0.is_quiet, a_fclass_s0.is_signaling, 8'h0}; - end - else begin + end + else begin fclass_mask_s0 = QUT_NAN; end end - // Min/Max + // Min/Max reg [31:0] fminmax_res_s0; always @(*) begin if (a_fclass_s0.is_nan && b_fclass_s0.is_nan) fminmax_res_s0 = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN - else if (a_fclass_s0.is_nan) + else if (a_fclass_s0.is_nan) fminmax_res_s0 = datab_s0; - else if (b_fclass_s0.is_nan) + else if (b_fclass_s0.is_nan) fminmax_res_s0 = dataa_s0; - else begin + else begin // FMIN, FMAX fminmax_res_s0 = (op_mod_s0[0] ^ a_smaller_s0) ? dataa_s0 : datab_s0; end end - // Sign injection + // Sign injection reg [31:0] fsgnj_res_s0; // result of sign injection always @(*) begin case (op_mod_s0[1:0]) @@ -158,12 +159,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( endcase end - // Comparison + // Comparison reg fcmp_res_s0; // result of comparison reg fcmp_fflags_NV_s0; // comparison fflags always @(*) begin case (op_mod_s0[1:0]) - 0: begin // LE + 0: begin // LE if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin fcmp_res_s0 = 0; fcmp_fflags_NV_s0 = 1; @@ -179,12 +180,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( end else begin fcmp_res_s0 = (a_smaller_s0 & ~ab_equal_s0); fcmp_fflags_NV_s0 = 0; - end + end end 2: begin // EQ if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin fcmp_res_s0 = 0; - fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling; + fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling; end else begin fcmp_res_s0 = ab_equal_s0; fcmp_fflags_NV_s0 = 0; @@ -192,7 +193,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( end default: begin fcmp_res_s0 = 'x; - fcmp_fflags_NV_s0 = 'x; + fcmp_fflags_NV_s0 = 'x; end endcase end @@ -216,7 +217,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( // FMV result_s0 = dataa_s0; fflags_NV_s0 = 0; - end + end 6,7: begin // MIN/MAX result_s0 = fminmax_res_s0; @@ -229,7 +230,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (32 + 1), - .DEPTH (LATENCY > 0) + .DEPTH (OUT_REG) ) pipe_reg1 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 37a2ab4194..fe99f1ea16 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), @@ -88,7 +88,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fcvt_unit #( - .LATENCY (`LATENCY_FCVT) + .LATENCY (`LATENCY_FCVT), + .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) ) fcvt_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 81fc8f022c..44b5bedfb3 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -68,7 +68,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 3522d8a1e5..a5cb89a1a9 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -99,7 +99,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 34b822d89f..a7057455ba 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), @@ -93,7 +93,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fncp_unit #( - .LATENCY (`LATENCY_FNCP) + .LATENCY (`LATENCY_FNCP), + .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) ) fncp_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index a6e6dda9a6..5aacf2d29c 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -62,7 +62,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), From 370daf1025d27ac0436aaf70d918205507070dbf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:56:56 -0700 Subject: [PATCH 082/407] fifo refactoring --- hw/rtl/libs/VX_fifo_queue.sv | 277 +++++++++++---------------------- hw/rtl/libs/VX_pending_size.sv | 184 +++++++++++++--------- 2 files changed, 207 insertions(+), 254 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index ea00d67c70..201a45aa9e 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -36,225 +36,134 @@ module VX_fifo_queue #( output wire [SIZEW-1:0] size ); - localparam ADDRW = `CLOG2(DEPTH); - `STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!")) `STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!")) `STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!")) `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!")) + VX_pending_size #( + .SIZE (DEPTH), + .ALM_EMPTY (ALM_EMPTY), + .ALM_FULL (ALM_FULL) + ) pending_size ( + .clk (clk), + .reset (reset), + .incr (push), + .decr (pop), + .empty (empty), + .full (full), + .alm_empty(alm_empty), + .alm_full(alm_full), + .size (size) + ); + if (DEPTH == 1) begin reg [DATAW-1:0] head_r; - reg size_r; always @(posedge clk) begin - if (reset) begin - head_r <= '0; - size_r <= '0; - end else begin - `ASSERT(~push || ~full, ("runtime error: writing to a full queue")); - `ASSERT(~pop || ~empty, ("runtime error: reading an empty queue")); - if (push) begin - if (~pop) begin - size_r <= 1; - end - end else if (pop) begin - size_r <= '0; - end - if (push) begin - head_r <= data_in; - end + if (push) begin + head_r <= data_in; end end - assign data_out = head_r; - assign empty = (size_r == 0); - assign alm_empty = 1'b1; - assign full = (size_r != 0); - assign alm_full = 1'b1; - assign size = size_r; + assign data_out = head_r; end else begin - reg empty_r, alm_empty_r; - reg full_r, alm_full_r; - reg [ADDRW-1:0] used_r; - wire [ADDRW-1:0] used_n; + localparam ADDRW = `CLOG2(DEPTH); - always @(posedge clk) begin - if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - full_r <= 0; - alm_full_r <= 0; - used_r <= '0; - end else begin - `ASSERT(~(push && ~pop) || ~full, ("runtime error: incrementing full queue")); - `ASSERT(~(pop && ~push) || ~empty, ("runtime error: decrementing empty queue")); - if (push) begin - if (~pop) begin - empty_r <= 0; - if (used_r == ADDRW'(ALM_EMPTY)) - alm_empty_r <= 0; - if (used_r == ADDRW'(DEPTH-1)) - full_r <= 1; - if (used_r == ADDRW'(ALM_FULL-1)) - alm_full_r <= 1; - end - end else if (pop) begin - full_r <= 0; - if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; - if (used_r == ADDRW'(1)) - empty_r <= 1; - if (used_r == ADDRW'(ALM_EMPTY+1)) - alm_empty_r <= 1; - end - used_r <= used_n; - end - end + if (OUT_REG != 0) begin - if (DEPTH == 2 && LUTRAM == 0) begin + wire [DATAW-1:0] dout; + reg [DATAW-1:0] dout_r; + reg [ADDRW-1:0] wr_ptr_r; + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] rd_ptr_n_r; - assign used_n = used_r ^ (push ^ pop); - - if (0 == OUT_REG) begin - - reg [1:0][DATAW-1:0] shift_reg; - - always @(posedge clk) begin - if (push) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; - end - end - - assign data_out = shift_reg[!used_r[0]]; - - end else begin - - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - - always @(posedge clk) begin - if (push) begin - buffer <= data_in; - end - if (push && (empty_r || (used_r && pop))) begin - data_out_r <= data_in; - end else if (pop) begin - data_out_r <= buffer; + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= '0; + rd_ptr_r <= '0; + rd_ptr_n_r <= 1; + end else begin + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + if (pop) begin + rd_ptr_r <= rd_ptr_n_r; + if (DEPTH > 2) begin + rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); + end else begin // (DEPTH == 2); + rd_ptr_n_r <= ~rd_ptr_n_r; + end end end - - assign data_out = data_out_r; - end - end else begin - - assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop))); - - if (0 == OUT_REG) begin - - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] wr_ptr_r; - - always @(posedge clk) begin - if (reset) begin - rd_ptr_r <= '0; - wr_ptr_r <= '0; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_r + ADDRW'(pop); - end + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (1'b1), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_n_r), + .rdata (dout) + ); + + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW] == ADDRW'(1)); + + always @(posedge clk) begin + if (push && (empty || (going_empty && pop))) begin + dout_r <= data_in; + end else if (pop) begin + dout_r <= dout; end + end - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_r), - .rdata (data_out) - ); - - end else begin + assign data_out = dout_r; - wire [DATAW-1:0] dout; - reg [DATAW-1:0] dout_r; - reg [ADDRW-1:0] wr_ptr_r; - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r; + end else begin - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - rd_ptr_n_r <= 1; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - if (DEPTH > 2) begin - rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); - end else begin // (DEPTH == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; - end - end - end - end + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] wr_ptr_r; - wire going_empty; - if (ALM_EMPTY == 1) begin - assign going_empty = alm_empty_r; + always @(posedge clk) begin + if (reset) begin + rd_ptr_r <= '0; + wr_ptr_r <= '0; end else begin - assign going_empty = (used_r == ADDRW'(1)); + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + rd_ptr_r <= rd_ptr_r + ADDRW'(pop); end + end - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_n_r), - .rdata (dout) - ); + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (1'b1), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_r), + .rdata (data_out) + ); - always @(posedge clk) begin - if (push && (empty_r || (going_empty && pop))) begin - dout_r <= data_in; - end else if (pop) begin - dout_r <= dout; - end - end - - assign data_out = dout_r; - end end - - assign empty = empty_r; - assign alm_empty = alm_empty_r; - assign full = full_r; - assign alm_full = alm_full_r; - assign size = {full_r, used_r}; end + `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("runtime error: incrementing full queue")); + `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("runtime error: decrementing empty queue")); + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 031e576953..3d593156fe 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -13,7 +13,7 @@ `include "VX_platform.vh" -//`TRACING_OFF +`TRACING_OFF module VX_pending_size #( parameter SIZE = 1, parameter INCRW = 1, @@ -34,97 +34,141 @@ module VX_pending_size #( ); `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) - localparam ADDRW = `LOG2UP(SIZE); - reg empty_r, alm_empty_r; - reg full_r, alm_full_r; + if (SIZE == 1) begin - if (INCRW != 1 || DECRW != 1) begin - - reg [SIZEW-1:0] size_r; - - wire [SIZEW-1:0] size_n = size_r + SIZEW'(incr) - SIZEW'(decr); + reg size_r; always @(posedge clk) begin if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - alm_full_r <= 0; - full_r <= 0; - size_r <= '0; + size_r <= '0; end else begin - `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); - `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); - size_r <= size_n; - empty_r <= (size_n == SIZEW'(0)); - alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY)); - full_r <= (size_n == SIZEW'(SIZE)); - alm_full_r <= (size_n == SIZEW'(ALM_FULL)); + if (incr) begin + if (~decr) begin + size_r <= 1; + end + end else if (decr) begin + size_r <= '0; + end end end - assign size = size_r; + assign empty = (size_r == 0); + assign full = (size_r != 0); + assign alm_empty = 1'b1; + assign alm_full = 1'b1; + assign size = size_r; end else begin - reg [ADDRW-1:0] used_r; - wire [ADDRW-1:0] used_n; + logic empty_r, alm_empty_r; + logic full_r, alm_full_r; - always @(posedge clk) begin - if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - full_r <= 0; - alm_full_r <= 0; - used_r <= '0; - end else begin - `ASSERT(~(incr && ~decr) || ~full, ("runtime error: counter overflow")); - `ASSERT(~(decr && ~incr) || ~empty, ("runtime error: counter underflow")); - if (incr) begin - if (~decr) begin - empty_r <= 0; - if (used_r == ADDRW'(ALM_EMPTY)) - alm_empty_r <= 0; - if (used_r == ADDRW'(SIZE-1)) - full_r <= 1; - if (used_r == ADDRW'(ALM_FULL-1)) - alm_full_r <= 1; - end - end else if (decr) begin - if (used_r == ADDRW'(1)) - empty_r <= 1; - if (used_r == ADDRW'(ALM_EMPTY+1)) - alm_empty_r <= 1; - full_r <= 0; - if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; + if (INCRW != 1 || DECRW != 1) begin + + localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); + + logic [SIZEW-1:0] size_n, size_r; + + assign size_n = $signed(size_r) + SIZEW'($signed(SUBW'(incr) - SUBW'(decr))); + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + alm_empty_r <= 1; + alm_full_r <= 0; + size_r <= '0; + end else begin + `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); + `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); + empty_r <= (size_n == SIZEW'(0)); + full_r <= (size_n == SIZEW'(SIZE)); + alm_empty_r <= (size_n <= SIZEW'(ALM_EMPTY)); + alm_full_r <= (size_n >= SIZEW'(ALM_FULL)); + size_r <= size_n; end - used_r <= used_n; end - end - if (SIZE == 2) begin - assign used_n = used_r ^ (incr ^ decr); + assign size = size_r; + end else begin - assign used_n = $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr))); - end - if (SIZE > 1) begin - if (SIZEW > ADDRW) begin - assign size = {full_r, used_r}; + localparam ADDRW = `LOG2UP(SIZE); + + reg [ADDRW-1:0] used_r; + + wire is_empty_n = (used_r == ADDRW'(1)); + wire is_full_n = (used_r == ADDRW'(SIZE-1)); + + if (SIZE > 2) begin + + wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); + wire is_alm_empty_n= (used_r == ADDRW'(ALM_EMPTY+1)); + wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); + wire is_alm_full_n = (used_r == ADDRW'(ALM_FULL-1)); + + wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr}; + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + alm_empty_r <= 0; + alm_full_r <= 0; + used_r <= '0; + end else begin + if (incr) begin + if (~decr) begin + empty_r <= 0; + if (is_alm_empty) + alm_empty_r <= 0; + if (is_full_n) + full_r <= 1; + if (is_alm_full_n) + alm_full_r <= 1; + end + end else if (decr) begin + full_r <= 0; + if (is_alm_full) + alm_full_r <= 0; + if (is_empty_n) + empty_r <= 1; + if (is_alm_empty_n) + alm_empty_r <= 1; + end + used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop)); + end + end + end else begin - assign size = used_r; + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + used_r <= '0; + end else begin + empty_r <= (empty_r & ~incr) | (~full_r & decr & ~incr); + full_r <= (~empty_r & incr & ~decr) | (full_r & ~(decr ^ incr)); + used_r <= used_r ^ (incr ^ decr); + end + end + + assign alm_empty_r = used_r; + assign alm_full_r = used_r; end - end else begin - assign size = full_r; + + assign size = {full_r, used_r}; + end - end + assign empty = empty_r; + assign full = full_r; + assign alm_empty = alm_empty_r; + assign alm_full = alm_full_r; - assign empty = empty_r; - assign alm_empty = alm_empty_r; - assign alm_full = alm_full_r; - assign full = full_r; + end endmodule -//`TRACING_ON +`TRACING_ON From 31a5ab714ef0ef7bb2cd5dc9ad73fcac8db9bb1d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:57:45 -0700 Subject: [PATCH 083/407] xbar timing optimitzaion --- hw/rtl/libs/VX_stream_xbar.sv | 56 ++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index b37c9b6760..3dd30bc86a 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -49,15 +49,35 @@ module VX_stream_xbar #( // (#inputs > 1) and (#outputs > 1) + wire [NUM_INPUTS-1:0][NUM_OUTPUTS-1:0] per_output_valid_in; + wire [NUM_OUTPUTS-1:0][NUM_INPUTS-1:0] per_output_valid_in_w; + wire [NUM_OUTPUTS-1:0][NUM_INPUTS-1:0] per_output_ready_in; + wire [NUM_INPUTS-1:0][NUM_OUTPUTS-1:0] per_output_ready_in_w; + + VX_transpose #( + .N (NUM_OUTPUTS), + .M (NUM_INPUTS) + ) rdy_in_transpose ( + .data_in (per_output_ready_in), + .data_out (per_output_ready_in_w) + ); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + VX_transpose #( + .N (NUM_INPUTS), + .M (NUM_OUTPUTS) + ) val_in_transpose ( + .data_in (per_output_valid_in), + .data_out (per_output_valid_in_w) + ); - wire [NUM_INPUTS-1:0] valid_in_q; - for (genvar j = 0; j < NUM_INPUTS; ++j) begin - assign valid_in_q[j] = valid_in[j] && (sel_in[j] == i); - end + for (genvar i = 0; i < NUM_INPUTS; ++i) begin + assign per_output_valid_in[i] = NUM_OUTPUTS'(valid_in[i]) << sel_in[i]; + assign ready_in[i] = | per_output_ready_in_w[i]; + end + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -68,7 +88,7 @@ module VX_stream_xbar #( ) xbar_arb ( .clk (clk), .reset (reset), - .valid_in (valid_in_q), + .valid_in (per_output_valid_in_w[i]), .data_in (data_in), .ready_in (per_output_ready_in[i]), .valid_out (valid_out[i]), @@ -78,10 +98,6 @@ module VX_stream_xbar #( ); end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - assign ready_in[i] = per_output_ready_in[sel_in[i]][i]; - end - end else begin // (#inputs >= 1) and (#outputs == 1) @@ -112,14 +128,12 @@ module VX_stream_xbar #( // (#inputs == 1) and (#outputs > 1) - logic [NUM_OUTPUTS-1:0] valid_out_r, ready_out_r; - logic [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r; - always @(*) begin - valid_out_r = '0; - valid_out_r[sel_in] = valid_in; - end - assign data_out_r = {NUM_OUTPUTS{data_in}}; - assign ready_in = ready_out_r[sel_in]; + wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; + wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + + assign ready_in[0] = ready_out_w[sel_in[0]]; + assign valid_out_w = NUM_OUTPUTS'(valid_in[0]) << sel_in[0]; + assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( @@ -130,9 +144,9 @@ module VX_stream_xbar #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_out_r[i]), - .ready_in (ready_out_r[i]), - .data_in (data_out_r[i]), + .valid_in (valid_out_w[i]), + .ready_in (ready_out_w[i]), + .data_in (data_out_w[i]), .data_out (data_out[i]), .valid_out (valid_out[i]), .ready_out (ready_out[i]) From cd97945d0d62a51707dab82f702adbf70fab0d96 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 04:51:27 -0700 Subject: [PATCH 084/407] minor update --- hw/rtl/libs/VX_pending_size.sv | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 3d593156fe..b456239da4 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -98,11 +98,10 @@ module VX_pending_size #( reg [ADDRW-1:0] used_r; - wire is_empty_n = (used_r == ADDRW'(1)); - wire is_full_n = (used_r == ADDRW'(SIZE-1)); - if (SIZE > 2) begin + wire is_empty_n = (used_r == ADDRW'(1)); + wire is_full_n = (used_r == ADDRW'(SIZE-1)); wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); wire is_alm_empty_n= (used_r == ADDRW'(ALM_EMPTY+1)); wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); From 0ed589a3bfb5e49965c4dab3c74ee5ce898701cf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 07:49:08 -0700 Subject: [PATCH 085/407] minor update --- hw/rtl/libs/VX_fifo_queue.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 201a45aa9e..e6f94b3b29 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -116,7 +116,7 @@ module VX_fifo_queue #( .rdata (dout) ); - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW] == ADDRW'(1)); + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); always @(posedge clk) begin if (push && (empty || (going_empty && pop))) begin From 1f5cc5343415aef138e9d1a2f5acb91544584144 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 09:16:23 -0700 Subject: [PATCH 086/407] minor update --- hw/rtl/fpu/VX_fpu_cvt.sv | 4 ++-- hw/rtl/fpu/VX_fpu_ncp.sv | 4 ++-- hw/rtl/libs/VX_stream_arb.sv | 26 +++++++++++++------------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index fe99f1ea16..1b6617c600 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (2) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), @@ -89,7 +89,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fcvt_unit #( .LATENCY (`LATENCY_FCVT), - .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_REG (1) ) fcvt_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index a7057455ba..16c0df7580 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (2) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), @@ -94,7 +94,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fncp_unit #( .LATENCY (`LATENCY_FNCP), - .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_REG (1) ) fncp_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index ffb56eb260..413da98f04 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -143,9 +143,9 @@ module VX_stream_arb #( // (#inputs <= max_fanout) and (#outputs == 1) - wire valid_in_r; - wire [DATAW-1:0] data_in_r; - wire ready_in_r; + wire valid_in_w; + wire [DATAW-1:0] data_in_w; + wire ready_in_w; wire arb_valid; wire [NUM_REQS_W-1:0] arb_index; @@ -165,12 +165,12 @@ module VX_stream_arb #( .grant_ready (arb_ready) ); - assign valid_in_r = arb_valid; - assign data_in_r = data_in[arb_index]; - assign arb_ready = ready_in_r; + assign valid_in_w = arb_valid; + assign data_in_w = data_in[arb_index]; + assign arb_ready = ready_in_w; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign ready_in[i] = ready_in_r && arb_onehot[i]; + assign ready_in[i] = ready_in_w && arb_onehot[i]; end VX_elastic_buffer #( @@ -181,9 +181,9 @@ module VX_stream_arb #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in_r), - .ready_in (ready_in_r), - .data_in ({arb_index, data_in_r}), + .valid_in (valid_in_w), + .ready_in (ready_in_w), + .data_in ({arb_index, data_in_w}), .data_out ({sel_out, data_out}), .valid_out (valid_out), .ready_out (ready_out) @@ -285,7 +285,7 @@ module VX_stream_arb #( // (#inputs == 1) and (#outputs <= max_fanout) - wire [NUM_OUTPUTS-1:0] ready_in_r; + wire [NUM_OUTPUTS-1:0] ready_in_w; wire [NUM_OUTPUTS-1:0] arb_requests; wire arb_valid; @@ -305,7 +305,7 @@ module VX_stream_arb #( .grant_ready (arb_ready) ); - assign arb_requests = ready_in_r; + assign arb_requests = ready_in_w; assign arb_ready = valid_in[0]; assign ready_in = arb_valid; @@ -319,7 +319,7 @@ module VX_stream_arb #( .clk (clk), .reset (reset), .valid_in (valid_in && arb_onehot[i]), - .ready_in (ready_in_r[i]), + .ready_in (ready_in_w[i]), .data_in (data_in), .data_out (data_out[i]), .valid_out (valid_out[i]), From 10a870516151992ada988f7278c26a63576a514b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 10:42:48 -0700 Subject: [PATCH 087/407] minor update --- hw/rtl/libs/VX_pending_size.sv | 57 ++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index b456239da4..610c2bc04f 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -61,8 +61,8 @@ module VX_pending_size #( end else begin - logic empty_r, alm_empty_r; - logic full_r, alm_full_r; + reg empty_r, alm_empty_r; + reg full_r, alm_full_r; if (INCRW != 1 || DECRW != 1) begin @@ -98,43 +98,55 @@ module VX_pending_size #( reg [ADDRW-1:0] used_r; + wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); + wire is_alm_empty_n = (used_r == ADDRW'(ALM_EMPTY+1)); + wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); + wire is_alm_full_n = (used_r == ADDRW'(ALM_FULL-1)); + + always @(posedge clk) begin + if (reset) begin + alm_empty_r <= 1; + alm_full_r <= 0; + end else begin + if (incr) begin + if (~decr) begin + if (is_alm_empty) + alm_empty_r <= 0; + if (is_alm_full_n) + alm_full_r <= 1; + end + end else if (decr) begin + if (is_alm_full) + alm_full_r <= 0; + if (is_alm_empty_n) + alm_empty_r <= 1; + end + end + end + if (SIZE > 2) begin - wire is_empty_n = (used_r == ADDRW'(1)); - wire is_full_n = (used_r == ADDRW'(SIZE-1)); - wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); - wire is_alm_empty_n= (used_r == ADDRW'(ALM_EMPTY+1)); - wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); - wire is_alm_full_n = (used_r == ADDRW'(ALM_FULL-1)); + wire is_empty_n = (used_r == ADDRW'(1)); + wire is_full_n = (used_r == ADDRW'(SIZE-1)); wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr}; always @(posedge clk) begin if (reset) begin - empty_r <= 1; - full_r <= 0; - alm_empty_r <= 0; - alm_full_r <= 0; - used_r <= '0; + empty_r <= 1; + full_r <= 0; + used_r <= '0; end else begin if (incr) begin if (~decr) begin empty_r <= 0; - if (is_alm_empty) - alm_empty_r <= 0; if (is_full_n) full_r <= 1; - if (is_alm_full_n) - alm_full_r <= 1; end end else if (decr) begin full_r <= 0; - if (is_alm_full) - alm_full_r <= 0; if (is_empty_n) empty_r <= 1; - if (is_alm_empty_n) - alm_empty_r <= 1; end used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop)); end @@ -153,9 +165,6 @@ module VX_pending_size #( used_r <= used_r ^ (incr ^ decr); end end - - assign alm_empty_r = used_r; - assign alm_full_r = used_r; end assign size = {full_r, used_r}; From 4570a20eee56931c6b7b320fdf8b9dbaa86d86a4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 12:15:12 -0700 Subject: [PATCH 088/407] minor update --- hw/rtl/libs/VX_stream_buffer.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index ea45619333..81978b7350 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -85,8 +85,8 @@ module VX_stream_buffer #( end else begin - reg [DATAW-1:0] shift_reg [1:0]; - reg [1:0] fifo_state; + reg [1:0][DATAW-1:0] shift_reg; + reg [1:0] fifo_state; wire fire_in = valid_in && ready_in; wire fire_out = valid_out && ready_out; From 3b336d7fb3ef638b141934bf02a8ad15b25d8671 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 16:59:18 -0700 Subject: [PATCH 089/407] register vs combinational signals naming consistency --- hw/rtl/cache/VX_cache_bank.sv | 8 +- hw/rtl/cache/VX_cache_bypass.sv | 20 ++-- hw/rtl/core/VX_csr_data.sv | 152 ++++++++++++++-------------- hw/rtl/core/VX_decode.sv | 20 ++-- hw/rtl/core/VX_gather_unit.sv | 20 ++-- hw/rtl/core/VX_lsu_slice.sv | 22 ++--- hw/rtl/fpu/VX_fpu_dsp.sv | 10 +- hw/rtl/libs/VX_onehot_encoder.sv | 12 +-- hw/rtl/libs/VX_onehot_mux.sv | 110 ++++++++++----------- hw/rtl/libs/VX_popcount.sv | 60 +++++------ hw/rtl/libs/VX_priority_encoder.sv | 16 +-- hw/rtl/libs/VX_rr_arbiter.sv | 154 ++++++++++++++--------------- 12 files changed, 302 insertions(+), 302 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 3dede22d56..22d956dba8 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -414,12 +414,12 @@ module VX_cache_bank #( wire [LINE_SIZE-1:0] dirty_byteen_st1; if (`CS_WORDS_PER_LINE > 1) begin - reg [LINE_SIZE-1:0] write_byteen_r; + reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w; always @(*) begin - write_byteen_r = '0; - write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1; + write_byteen_w = '0; + write_byteen_w[wsel_st1] = byteen_st1; end - assign write_byteen_st1 = write_byteen_r; + assign write_byteen_st1 = write_byteen_w; end else begin assign write_byteen_st1 = byteen_st1; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index b2aeb87911..dc88c6c1fd 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -158,21 +158,21 @@ module VX_cache_bypass #( wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; if (WORDS_PER_LINE > 1) begin - reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; - reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; + reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_w; + reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_w; wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; always @(*) begin - mem_req_byteen_in_r = '0; - mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen; + mem_req_byteen_in_w = '0; + mem_req_byteen_in_w[req_wsel] = core_req_nc_sel_byteen; - mem_req_data_in_r = 'x; - mem_req_data_in_r[req_wsel] = core_req_nc_sel_data; + mem_req_data_in_w = 'x; + mem_req_data_in_w[req_wsel] = core_req_nc_sel_data; end - assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r; - assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; + assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_w; + assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_w; if (NUM_REQS > 1) begin assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); end else begin @@ -268,10 +268,10 @@ module VX_cache_bypass #( assign rsp_idx = 1'b0; end - wire [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; + wire [NUM_REQS-1:0] rsp_nc_valid = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid[i]; assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/core/VX_csr_data.sv b/hw/rtl/core/VX_csr_data.sv index a2b0741add..aa9b30e05c 100644 --- a/hw/rtl/core/VX_csr_data.sv +++ b/hw/rtl/core/VX_csr_data.sv @@ -155,41 +155,41 @@ import VX_fpu_pkg::*; // CSRs read ////////////////////////////////////////////////////////////// - reg [`XLEN-1:0] read_data_ro_r; - reg [`XLEN-1:0] read_data_rw_r; - reg read_addr_valid_r; + reg [`XLEN-1:0] read_data_ro_w; + reg [`XLEN-1:0] read_data_rw_w; + reg read_addr_valid_w; always @(*) begin - read_data_ro_r = '0; - read_data_rw_r = '0; - read_addr_valid_r = 1; + read_data_ro_w = '0; + read_data_rw_w = '0; + read_addr_valid_w = 1; case (read_addr) - `VX_CSR_MVENDORID : read_data_ro_r = `XLEN'(`VENDOR_ID); - `VX_CSR_MARCHID : read_data_ro_r = `XLEN'(`ARCHITECTURE_ID); - `VX_CSR_MIMPID : read_data_ro_r = `XLEN'(`IMPLEMENTATION_ID); - `VX_CSR_MISA : read_data_ro_r = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)}); + `VX_CSR_MVENDORID : read_data_ro_w = `XLEN'(`VENDOR_ID); + `VX_CSR_MARCHID : read_data_ro_w = `XLEN'(`ARCHITECTURE_ID); + `VX_CSR_MIMPID : read_data_ro_w = `XLEN'(`IMPLEMENTATION_ID); + `VX_CSR_MISA : read_data_ro_w = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)}); `ifdef EXT_F_ENABLE - `VX_CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]); - `VX_CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]); - `VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]); + `VX_CSR_FFLAGS : read_data_rw_w = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]); + `VX_CSR_FRM : read_data_rw_w = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]); + `VX_CSR_FCSR : read_data_rw_w = `XLEN'(fcsr[read_wid]); `endif - `VX_CSR_MSCRATCH : read_data_rw_r = mscratch; + `VX_CSR_MSCRATCH : read_data_rw_w = mscratch; - `VX_CSR_WARP_ID : read_data_ro_r = `XLEN'(read_wid); - `VX_CSR_CORE_ID : read_data_ro_r = `XLEN'(CORE_ID); - `VX_CSR_ACTIVE_THREADS: read_data_ro_r = `XLEN'(thread_masks[read_wid]); - `VX_CSR_ACTIVE_WARPS: read_data_ro_r = `XLEN'(active_warps); - `VX_CSR_NUM_THREADS: read_data_ro_r = `XLEN'(`NUM_THREADS); - `VX_CSR_NUM_WARPS : read_data_ro_r = `XLEN'(`NUM_WARPS); - `VX_CSR_NUM_CORES : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS); - `VX_CSR_LOCAL_MEM_BASE: read_data_ro_r = `XLEN'(`LMEM_BASE_ADDR); + `VX_CSR_WARP_ID : read_data_ro_w = `XLEN'(read_wid); + `VX_CSR_CORE_ID : read_data_ro_w = `XLEN'(CORE_ID); + `VX_CSR_ACTIVE_THREADS: read_data_ro_w = `XLEN'(thread_masks[read_wid]); + `VX_CSR_ACTIVE_WARPS: read_data_ro_w = `XLEN'(active_warps); + `VX_CSR_NUM_THREADS: read_data_ro_w = `XLEN'(`NUM_THREADS); + `VX_CSR_NUM_WARPS : read_data_ro_w = `XLEN'(`NUM_WARPS); + `VX_CSR_NUM_CORES : read_data_ro_w = `XLEN'(`NUM_CORES * `NUM_CLUSTERS); + `VX_CSR_LOCAL_MEM_BASE: read_data_ro_w = `XLEN'(`LMEM_BASE_ADDR); - `CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_r, cycles); + `CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_w, cycles); - `VX_CSR_MPM_RESERVED : read_data_ro_r = 'x; - `VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x; + `VX_CSR_MPM_RESERVED : read_data_ro_w = 'x; + `VX_CSR_MPM_RESERVED_H : read_data_ro_w = 'x; - `CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret); + `CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_w, commit_csr_if.instret); `VX_CSR_SATP, `VX_CSR_MSTATUS, @@ -200,77 +200,77 @@ import VX_fpu_pkg::*; `VX_CSR_MTVEC, `VX_CSR_MEPC, `VX_CSR_PMPCFG0, - `VX_CSR_PMPADDR0 : read_data_ro_r = `XLEN'(0); + `VX_CSR_PMPADDR0 : read_data_ro_w = `XLEN'(0); default: begin - read_addr_valid_r = 0; + read_addr_valid_w = 0; if ((read_addr >= `VX_CSR_MPM_USER && read_addr < (`VX_CSR_MPM_USER + 32)) || (read_addr >= `VX_CSR_MPM_USER_H && read_addr < (`VX_CSR_MPM_USER_H + 32))) begin - read_addr_valid_r = 1; + read_addr_valid_w = 1; `ifdef PERF_ENABLE case (base_dcrs.mpm_class) `VX_DCR_MPM_CLASS_CORE: begin case (read_addr) // PERF: pipeline - `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles); - `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls); - `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls); - `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_w, pipeline_perf_if.sched.idles); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_w, pipeline_perf_if.sched.stalls); + `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_w, pipeline_perf_if.issue.ibf_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_w, pipeline_perf_if.issue.scb_stalls); + `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_w, pipeline_perf_if.issue.opd_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_ALU]); `ifdef EXT_F_ENABLE - `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_FPU]); `else - `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0)); + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, `PERF_CTR_BITS'(0)); `endif - `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_LSU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_SFU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]); // PERF: memory - `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches); - `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads); - `CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_r, pipeline_perf_if.stores); - `CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_r, pipeline_perf_if.ifetch_latency); - `CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency); + `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_w, pipeline_perf_if.ifetches); + `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_w, pipeline_perf_if.loads); + `CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_w, pipeline_perf_if.stores); + `CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_w, pipeline_perf_if.ifetch_latency); + `CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_w, pipeline_perf_if.load_latency); default:; endcase end `VX_DCR_MPM_CLASS_MEM: begin case (read_addr) // PERF: icache - `CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_r, mem_perf_if.icache.reads); - `CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_r, mem_perf_if.icache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_r, mem_perf_if.icache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_w, mem_perf_if.icache.reads); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_w, mem_perf_if.icache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_w, mem_perf_if.icache.mshr_stalls); // PERF: dcache - `CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_r, mem_perf_if.dcache.reads); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_r, mem_perf_if.dcache.writes); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_r, mem_perf_if.dcache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_r, mem_perf_if.dcache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_r, mem_perf_if.dcache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_r, mem_perf_if.dcache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_w, mem_perf_if.dcache.reads); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_w, mem_perf_if.dcache.writes); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_w, mem_perf_if.dcache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_w, mem_perf_if.dcache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_w, mem_perf_if.dcache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_w, mem_perf_if.dcache.mshr_stalls); // PERF: lmem - `CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_r, mem_perf_if.lmem.reads); - `CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_r, mem_perf_if.lmem.writes); - `CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_r, mem_perf_if.lmem.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_w, mem_perf_if.lmem.reads); + `CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_w, mem_perf_if.lmem.writes); + `CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_w, mem_perf_if.lmem.bank_stalls); // PERF: l2cache - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_r, mem_perf_if.l2cache.reads); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_r, mem_perf_if.l2cache.writes); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_r, mem_perf_if.l2cache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_r, mem_perf_if.l2cache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l2cache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_w, mem_perf_if.l2cache.reads); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_w, mem_perf_if.l2cache.writes); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_w, mem_perf_if.l2cache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_w, mem_perf_if.l2cache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l2cache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l2cache.mshr_stalls); // PERF: l3cache - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_r, mem_perf_if.l3cache.reads); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_r, mem_perf_if.l3cache.writes); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_r, mem_perf_if.l3cache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_r, mem_perf_if.l3cache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l3cache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l3cache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_w, mem_perf_if.l3cache.reads); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_w, mem_perf_if.l3cache.writes); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_w, mem_perf_if.l3cache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_w, mem_perf_if.l3cache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l3cache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l3cache.mshr_stalls); // PERF: memory - `CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_r, mem_perf_if.mem.reads); - `CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_r, mem_perf_if.mem.writes); - `CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_r, mem_perf_if.mem.latency); + `CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_w, mem_perf_if.mem.reads); + `CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, mem_perf_if.mem.writes); + `CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, mem_perf_if.mem.latency); default:; endcase end @@ -282,12 +282,12 @@ import VX_fpu_pkg::*; endcase end - assign read_data_ro = read_data_ro_r; - assign read_data_rw = read_data_rw_r; + assign read_data_ro = read_data_ro_w; + assign read_data_rw = read_data_rw_w; `UNUSED_VAR (base_dcrs) - `RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid)) + `RUNTIME_ASSERT(~read_enable || read_addr_valid_w, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid)) `ifdef PERF_ENABLE `UNUSED_VAR (mem_perf_if.icache); diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 4f6ffe100f..de317d4978 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -15,15 +15,15 @@ `ifdef EXT_F_ENABLE `define USED_IREG(x) \ - x``_r = {1'b0, ``x}; \ + x``_v = {1'b0, ``x}; \ use_``x = 1 `define USED_FREG(x) \ - x``_r = {1'b1, ``x}; \ + x``_v = {1'b1, ``x}; \ use_``x = 1 `else `define USED_IREG(x) \ - x``_r = ``x; \ + x``_v = ``x; \ use_``x = 1 `endif @@ -50,7 +50,7 @@ module VX_decode import VX_gpu_pkg::*; #( reg [`EX_BITS-1:0] ex_type; reg [`INST_OP_BITS-1:0] op_type; op_args_t op_args; - reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r; + reg [`NR_BITS-1:0] rd_v, rs1_v, rs2_v, rs3_v; reg use_rd, use_rs1, use_rs2, use_rs3; reg is_wstall; @@ -155,10 +155,10 @@ module VX_decode import VX_gpu_pkg::*; #( ex_type = '0; op_type = 'x; op_args = 'x; - rd_r = '0; - rs1_r = '0; - rs2_r = '0; - rs3_r = '0; + rd_v = '0; + rs1_v = '0; + rs2_v = '0; + rs3_v = '0; use_rd = 0; use_rs1 = 0; use_rs2 = 0; @@ -527,7 +527,7 @@ module VX_decode import VX_gpu_pkg::*; #( end // disable write to integer register r0 - wire wb = use_rd && (rd_r != 0); + wire wb = use_rd && (rd_v != 0); VX_elastic_buffer #( .DATAW (DATAW), @@ -537,7 +537,7 @@ module VX_decode import VX_gpu_pkg::*; #( .reset (reset), .valid_in (fetch_if.valid), .ready_in (fetch_if.ready), - .data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_r, rs1_r, rs2_r, rs3_r}), + .data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_v, rs1_v, rs2_v, rs3_v}), .data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}), .valid_out (decode_if.valid), .ready_out (decode_if.ready) diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 293495ebaf..402824dacb 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -94,31 +94,31 @@ module VX_gather_unit import VX_gpu_pkg::*; #( .ready_out (commit_tmp_if.ready) ); - logic [`NUM_THREADS-1:0] commit_tmask_r; - logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_r; + logic [`NUM_THREADS-1:0] commit_tmask_w; + logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_w; if (PID_BITS != 0) begin always @(*) begin - commit_tmask_r = '0; - commit_data_r = 'x; + commit_tmask_w = '0; + commit_data_w = 'x; for (integer j = 0; j < NUM_LANES; ++j) begin - commit_tmask_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j]; - commit_data_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; + commit_tmask_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j]; + commit_data_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; end end end else begin - assign commit_tmask_r = commit_tmp_if.data.tmask; - assign commit_data_r = commit_tmp_if.data.data; + assign commit_tmask_w = commit_tmp_if.data.tmask; + assign commit_data_w = commit_tmp_if.data.data; end assign commit_out_if[i].valid = commit_tmp_if.valid; assign commit_out_if[i].data = { commit_tmp_if.data.uuid, commit_tmp_if.data.wid, - commit_tmask_r, + commit_tmask_w, commit_tmp_if.data.PC, commit_tmp_if.data.wb, commit_tmp_if.data.rd, - commit_data_r, + commit_data_w, 1'b0, // PID commit_tmp_if.data.sop, commit_tmp_if.data.eop diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index f83b23fb3c..8c277f3e93 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -158,30 +158,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( // byte enable formatting for (genvar i = 0; i < NUM_LANES; ++i) begin - reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r; + reg [LSU_WORD_SIZE-1:0] mem_req_byteen_w; always @(*) begin - mem_req_byteen_r = '0; + mem_req_byteen_w = '0; case (`INST_LSU_WSIZE(execute_if.data.op_type)) 0: begin // 8-bit - mem_req_byteen_r[req_align[i]] = 1'b1; + mem_req_byteen_w[req_align[i]] = 1'b1; end 1: begin // 16 bit - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; end `ifdef XLEN_64 2: begin // 32 bit - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; end `endif // 3: 64 bit - default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}}; + default : mem_req_byteen_w = {LSU_WORD_SIZE{1'b1}}; endcase end - assign mem_req_byteen[i] = mem_req_byteen_r; + assign mem_req_byteen[i] = mem_req_byteen_w; end // memory misalignment not supported! diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 2e479976a1..c75e3e3fdc 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -315,15 +315,15 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin `ifdef FPU_RV64F - reg [`XLEN-1:0] result_r; + reg [`XLEN-1:0] result_w; always @(*) begin case (op_ret_int_out) - 2'b11: result_r = `XLEN'($signed(result_s[i])); - 2'b01: result_r = {32'h00000000, result_s[i]}; - default: result_r = {32'hffffffff, result_s[i]}; + 2'b11: result_w = `XLEN'($signed(result_s[i])); + 2'b01: result_w = {32'h00000000, result_s[i]}; + default: result_w = {32'hffffffff, result_s[i]}; endcase end - assign result[i] = result_r; + assign result[i] = result_w; `else assign result[i] = result_s[i]; `endif diff --git a/hw/rtl/libs/VX_onehot_encoder.sv b/hw/rtl/libs/VX_onehot_encoder.sv index 8f7ada2579..6246a673c5 100644 --- a/hw/rtl/libs/VX_onehot_encoder.sv +++ b/hw/rtl/libs/VX_onehot_encoder.sv @@ -87,29 +87,29 @@ module VX_onehot_encoder #( end else begin - reg [LN-1:0] index_r; + reg [LN-1:0] index_w; if (REVERSE != 0) begin always @(*) begin - index_r = 'x; + index_w = 'x; for (integer i = N-1; i >= 0; --i) begin if (data_in[i]) begin - index_r = LN'(N-1-i); + index_w = LN'(N-1-i); end end end end else begin always @(*) begin - index_r = 'x; + index_w = 'x; for (integer i = 0; i < N; ++i) begin if (data_in[i]) begin - index_r = LN'(i); + index_w = LN'(i); end end end end - assign data_out = index_r; + assign data_out = index_w; assign valid_out = (| data_in); end diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index 74e19a41b5..e13186015a 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -31,86 +31,86 @@ module VX_onehot_mux #( `UNUSED_VAR (sel_in) assign data_out = sel_in[0] ? data_in[0] : data_in[1]; end else if (LUT_OPT && N == 3) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 3'b001: data_out_r = data_in[0]; - 3'b010: data_out_r = data_in[1]; - 3'b100: data_out_r = data_in[2]; - default: data_out_r = 'x; + 3'b001: data_out_w = data_in[0]; + 3'b010: data_out_w = data_in[1]; + 3'b100: data_out_w = data_in[2]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 4) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 4'b0001: data_out_r = data_in[0]; - 4'b0010: data_out_r = data_in[1]; - 4'b0100: data_out_r = data_in[2]; - 4'b1000: data_out_r = data_in[3]; - default: data_out_r = 'x; + 4'b0001: data_out_w = data_in[0]; + 4'b0010: data_out_w = data_in[1]; + 4'b0100: data_out_w = data_in[2]; + 4'b1000: data_out_w = data_in[3]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 5) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 5'b00001: data_out_r = data_in[0]; - 5'b00010: data_out_r = data_in[1]; - 5'b00100: data_out_r = data_in[2]; - 5'b01000: data_out_r = data_in[3]; - 5'b10000: data_out_r = data_in[4]; - default: data_out_r = 'x; + 5'b00001: data_out_w = data_in[0]; + 5'b00010: data_out_w = data_in[1]; + 5'b00100: data_out_w = data_in[2]; + 5'b01000: data_out_w = data_in[3]; + 5'b10000: data_out_w = data_in[4]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 6) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 6'b000001: data_out_r = data_in[0]; - 6'b000010: data_out_r = data_in[1]; - 6'b000100: data_out_r = data_in[2]; - 6'b001000: data_out_r = data_in[3]; - 6'b010000: data_out_r = data_in[4]; - 6'b100000: data_out_r = data_in[5]; - default: data_out_r = 'x; + 6'b000001: data_out_w = data_in[0]; + 6'b000010: data_out_w = data_in[1]; + 6'b000100: data_out_w = data_in[2]; + 6'b001000: data_out_w = data_in[3]; + 6'b010000: data_out_w = data_in[4]; + 6'b100000: data_out_w = data_in[5]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 7) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 7'b0000001: data_out_r = data_in[0]; - 7'b0000010: data_out_r = data_in[1]; - 7'b0000100: data_out_r = data_in[2]; - 7'b0001000: data_out_r = data_in[3]; - 7'b0010000: data_out_r = data_in[4]; - 7'b0100000: data_out_r = data_in[5]; - 7'b1000000: data_out_r = data_in[6]; - default: data_out_r = 'x; + 7'b0000001: data_out_w = data_in[0]; + 7'b0000010: data_out_w = data_in[1]; + 7'b0000100: data_out_w = data_in[2]; + 7'b0001000: data_out_w = data_in[3]; + 7'b0010000: data_out_w = data_in[4]; + 7'b0100000: data_out_w = data_in[5]; + 7'b1000000: data_out_w = data_in[6]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 8) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 8'b00000001: data_out_r = data_in[0]; - 8'b00000010: data_out_r = data_in[1]; - 8'b00000100: data_out_r = data_in[2]; - 8'b00001000: data_out_r = data_in[3]; - 8'b00010000: data_out_r = data_in[4]; - 8'b00100000: data_out_r = data_in[5]; - 8'b01000000: data_out_r = data_in[6]; - 8'b10000000: data_out_r = data_in[7]; - default: data_out_r = 'x; + 8'b00000001: data_out_w = data_in[0]; + 8'b00000010: data_out_w = data_in[1]; + 8'b00000100: data_out_w = data_in[2]; + 8'b00001000: data_out_w = data_in[3]; + 8'b00010000: data_out_w = data_in[4]; + 8'b00100000: data_out_w = data_in[5]; + 8'b01000000: data_out_w = data_in[6]; + 8'b10000000: data_out_w = data_in[7]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (MODEL == 1) begin wire [N-1:0][DATAW-1:0] mask; for (genvar i = 0; i < N; ++i) begin @@ -134,16 +134,16 @@ module VX_onehot_mux #( `UNUSED_PIN (valid_out) ); end else if (MODEL == 3) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin - data_out_r = 'x; + data_out_w = 'x; for (integer i = 0; i < N; ++i) begin if (sel_in[i]) begin - data_out_r = data_in[i]; + data_out_w = data_in[i]; end end end - assign data_out = data_out_r; + assign data_out = data_out_w; end endmodule diff --git a/hw/rtl/libs/VX_popcount.sv b/hw/rtl/libs/VX_popcount.sv index eaec78789e..3d94dd00f8 100644 --- a/hw/rtl/libs/VX_popcount.sv +++ b/hw/rtl/libs/VX_popcount.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,21 +21,21 @@ module VX_popcount63( reg [2:0] sum; always @(*) begin case (data_in) - 6'd0: sum=3'd0; 6'd1: sum=3'd1; 6'd2: sum=3'd1; 6'd3: sum=3'd2; + 6'd0: sum=3'd0; 6'd1: sum=3'd1; 6'd2: sum=3'd1; 6'd3: sum=3'd2; 6'd4: sum=3'd1; 6'd5: sum=3'd2; 6'd6: sum=3'd2; 6'd7: sum=3'd3; - 6'd8: sum=3'd1; 6'd9: sum=3'd2; 6'd10: sum=3'd2; 6'd11: sum=3'd3; + 6'd8: sum=3'd1; 6'd9: sum=3'd2; 6'd10: sum=3'd2; 6'd11: sum=3'd3; 6'd12: sum=3'd2; 6'd13: sum=3'd3; 6'd14: sum=3'd3; 6'd15: sum=3'd4; - 6'd16: sum=3'd1; 6'd17: sum=3'd2; 6'd18: sum=3'd2; 6'd19: sum=3'd3; + 6'd16: sum=3'd1; 6'd17: sum=3'd2; 6'd18: sum=3'd2; 6'd19: sum=3'd3; 6'd20: sum=3'd2; 6'd21: sum=3'd3; 6'd22: sum=3'd3; 6'd23: sum=3'd4; - 6'd24: sum=3'd2; 6'd25: sum=3'd3; 6'd26: sum=3'd3; 6'd27: sum=3'd4; + 6'd24: sum=3'd2; 6'd25: sum=3'd3; 6'd26: sum=3'd3; 6'd27: sum=3'd4; 6'd28: sum=3'd3; 6'd29: sum=3'd4; 6'd30: sum=3'd4; 6'd31: sum=3'd5; - 6'd32: sum=3'd1; 6'd33: sum=3'd2; 6'd34: sum=3'd2; 6'd35: sum=3'd3; + 6'd32: sum=3'd1; 6'd33: sum=3'd2; 6'd34: sum=3'd2; 6'd35: sum=3'd3; 6'd36: sum=3'd2; 6'd37: sum=3'd3; 6'd38: sum=3'd3; 6'd39: sum=3'd4; - 6'd40: sum=3'd2; 6'd41: sum=3'd3; 6'd42: sum=3'd3; 6'd43: sum=3'd4; + 6'd40: sum=3'd2; 6'd41: sum=3'd3; 6'd42: sum=3'd3; 6'd43: sum=3'd4; 6'd44: sum=3'd3; 6'd45: sum=3'd4; 6'd46: sum=3'd4; 6'd47: sum=3'd5; - 6'd48: sum=3'd2; 6'd49: sum=3'd3; 6'd50: sum=3'd3; 6'd51: sum=3'd4; + 6'd48: sum=3'd2; 6'd49: sum=3'd3; 6'd50: sum=3'd3; 6'd51: sum=3'd4; 6'd52: sum=3'd3; 6'd53: sum=3'd4; 6'd54: sum=3'd4; 6'd55: sum=3'd5; - 6'd56: sum=3'd3; 6'd57: sum=3'd4; 6'd58: sum=3'd4; 6'd59: sum=3'd5; + 6'd56: sum=3'd3; 6'd57: sum=3'd4; 6'd58: sum=3'd4; 6'd59: sum=3'd5; 6'd60: sum=3'd4; 6'd61: sum=3'd5; 6'd62: sum=3'd5; 6'd63: sum=3'd6; endcase end @@ -49,7 +49,7 @@ module VX_popcount32( reg [1:0] sum; always @(*) begin case (data_in) - 3'd0: sum=2'd0; 3'd1: sum=2'd1; 3'd2: sum=2'd1; 3'd3: sum=2'd2; + 3'd0: sum=2'd0; 3'd1: sum=2'd1; 3'd2: sum=2'd1; 3'd3: sum=2'd2; 3'd4: sum=2'd1; 3'd5: sum=2'd2; 3'd6: sum=2'd2; 3'd7: sum=2'd3; endcase end @@ -88,12 +88,12 @@ endmodule module VX_popcount #( parameter MODEL = 1, parameter N = 1, - parameter M = `CLOG2(N+1) + parameter M = `CLOG2(N+1) ) ( input wire [N-1:0] data_in, output wire [M-1:0] data_out ); - `UNUSED_PARAM (MODEL) + `UNUSED_PARAM (MODEL) `ifndef SYNTHESIS assign data_out = $countones(data_in); @@ -113,10 +113,10 @@ module VX_popcount #( t_in[N-1:0] = data_in; end VX_popcount32 pc32(t_in, t_out); - assign data_out = t_out[M-1:0]; - + assign data_out = t_out[M-1:0]; + end else if (N <= 6) begin - + reg [5:0] t_in; wire [2:0] t_out; always @(*) begin @@ -125,9 +125,9 @@ module VX_popcount #( end VX_popcount63 pc63(t_in, t_out); assign data_out = t_out[M-1:0]; - + end else if (N <= 9) begin - + reg [8:0] t_in; wire [4:0] t1_out; wire [3:0] t2_out; @@ -141,7 +141,7 @@ module VX_popcount #( assign data_out = t2_out[M-1:0]; end else if (N <= 12) begin - + reg [11:0] t_in; wire [5:0] t1_out; wire [3:0] t2_out; @@ -155,7 +155,7 @@ module VX_popcount #( assign data_out = t2_out[M-1:0]; end else if (N <= 18) begin - + reg [17:0] t_in; wire [8:0] t1_out; wire [5:0] t2_out; @@ -177,17 +177,17 @@ module VX_popcount #( localparam LOGPN = `CLOG2(PN); `IGNORE_UNOPTFLAT_BEGIN - wire [M-1:0] tmp [LOGPN-1:0][PN-1:0]; + wire [M-1:0] tmp [LOGPN-1:0][PN-1:0]; `IGNORE_UNOPTFLAT_END for (genvar j = 0; j < LOGPN; ++j) begin localparam D = j + 1; localparam Q = (D < LOGPN) ? (D + 1) : M; - for (genvar i = 0; i < (1 << (LOGPN-j-1)); ++i) begin + for (genvar i = 0; i < (1 << (LOGPN-j-1)); ++i) begin localparam l = i * 2; localparam r = i * 2 + 1; - wire [Q-1:0] res; - if (j == 0) begin + wire [Q-1:0] res; + if (j == 0) begin if (r < N) begin assign res = data_in[l] + data_in[r]; end else if (l < N) begin @@ -203,20 +203,20 @@ module VX_popcount #( end assign data_out = tmp[LOGPN-1][0]; - + end else begin - reg [M-1:0] cnt_r; + reg [M-1:0] cnt_w; always @(*) begin - cnt_r = '0; + cnt_w = '0; for (integer i = 0; i < N; ++i) begin - cnt_r = cnt_r + M'(data_in[i]); + cnt_w = cnt_w + M'(data_in[i]); end end - assign data_out = cnt_r; - + assign data_out = cnt_w; + end `endif diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 2138ea457c..3dc5291ee2 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -106,22 +106,22 @@ module VX_priority_encoder #( end else begin - reg [LN-1:0] index_r; - reg [N-1:0] onehot_r; + reg [LN-1:0] index_w; + reg [N-1:0] onehot_w; always @(*) begin - index_r = 'x; - onehot_r = 'x; + index_w = 'x; + onehot_w = 'x; for (integer i = N-1; i >= 0; --i) begin if (reversed[i]) begin - index_r = LN'(i); - onehot_r = N'(1) << i; + index_w = LN'(i); + onehot_w = N'(1) << i; end end end - assign index_out = index_r; - assign onehot_out = onehot_r; + assign index_out = index_w; + assign onehot_out = onehot_w; assign valid_out = (| reversed); end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index bbfd8269d8..6199d5794c 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -40,16 +40,16 @@ module VX_rr_arbiter #( end else if (LUT_OPT && NUM_REQS == 2) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 3'b0_01, - 3'b1_?1: begin grant_index_r = LOG_NUM_REQS'(0); end + 3'b1_?1: begin grant_index_w = LOG_NUM_REQS'(0); end 3'b0_1?, - 3'b1_10: begin grant_index_r = LOG_NUM_REQS'(1); end - default: begin grant_index_r = 'x; end + 3'b1_10: begin grant_index_w = LOG_NUM_REQS'(1); end + default: begin grant_index_w = 'x; end endcase end @@ -57,31 +57,31 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, 5'b01_0?1, - 5'b10_??1: begin grant_index_r = LOG_NUM_REQS'(0); end + 5'b10_??1: begin grant_index_w = LOG_NUM_REQS'(0); end 5'b00_?1?, 5'b01_010, - 5'b10_?10: begin grant_index_r = LOG_NUM_REQS'(1); end + 5'b10_?10: begin grant_index_w = LOG_NUM_REQS'(1); end 5'b00_10?, 5'b01_1??, - 5'b10_100: begin grant_index_r = LOG_NUM_REQS'(2); end - default: begin grant_index_r = 'x; end + 5'b10_100: begin grant_index_w = LOG_NUM_REQS'(2); end + default: begin grant_index_w = 'x; end endcase end @@ -89,38 +89,38 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 6'b00_0001, 6'b01_00?1, 6'b10_0??1, - 6'b11_???1: begin grant_index_r = LOG_NUM_REQS'(0); end + 6'b11_???1: begin grant_index_w = LOG_NUM_REQS'(0); end 6'b00_??1?, 6'b01_0010, 6'b10_0?10, - 6'b11_??10: begin grant_index_r = LOG_NUM_REQS'(1); end + 6'b11_??10: begin grant_index_w = LOG_NUM_REQS'(1); end 6'b00_?10?, 6'b01_?1??, 6'b10_0100, - 6'b11_?100: begin grant_index_r = LOG_NUM_REQS'(2); end + 6'b11_?100: begin grant_index_w = LOG_NUM_REQS'(2); end 6'b00_100?, 6'b01_10??, 6'b10_1???, - 6'b11_1000: begin grant_index_r = LOG_NUM_REQS'(3); end - default: begin grant_index_r = 'x; end + 6'b11_1000: begin grant_index_w = LOG_NUM_REQS'(3); end + default: begin grant_index_w = 'x; end endcase end @@ -128,18 +128,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -147,28 +147,28 @@ module VX_rr_arbiter #( 8'b001_000?1, 8'b010_00??1, 8'b011_0???1, - 8'b100_????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 8'b100_????1: begin grant_index_w = LOG_NUM_REQS'(0); end 8'b000_???1?, 8'b001_00010, 8'b010_00?10, 8'b011_0??10, - 8'b100_???10: begin grant_index_r = LOG_NUM_REQS'(1); end + 8'b100_???10: begin grant_index_w = LOG_NUM_REQS'(1); end 8'b000_??10?, 8'b001_??1??, 8'b010_00100, 8'b011_0?100, - 8'b100_??100: begin grant_index_r = LOG_NUM_REQS'(2); end + 8'b100_??100: begin grant_index_w = LOG_NUM_REQS'(2); end 8'b000_?100?, 8'b001_?10??, 8'b010_?1???, 8'b011_01000, - 8'b100_?1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 8'b100_?1000: begin grant_index_w = LOG_NUM_REQS'(3); end 8'b000_1000?, 8'b001_100??, 8'b010_10???, 8'b011_1????, - 8'b100_10000: begin grant_index_r = LOG_NUM_REQS'(4); end - default: begin grant_index_r = 'x; end + 8'b100_10000: begin grant_index_w = LOG_NUM_REQS'(4); end + default: begin grant_index_w = 'x; end endcase end @@ -176,18 +176,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -196,38 +196,38 @@ module VX_rr_arbiter #( 9'b010_000??1, 9'b011_00???1, 9'b100_0????1, - 9'b101_?????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 9'b101_?????1: begin grant_index_w = LOG_NUM_REQS'(0); end 9'b000_????1?, 9'b001_000010, 9'b010_000?10, 9'b011_00??10, 9'b100_0???10, - 9'b101_????10: begin grant_index_r = LOG_NUM_REQS'(1); end + 9'b101_????10: begin grant_index_w = LOG_NUM_REQS'(1); end 9'b000_???10?, 9'b001_???1??, 9'b010_000100, 9'b011_00?100, 9'b100_0??100, - 9'b101_???100: begin grant_index_r = LOG_NUM_REQS'(2); end + 9'b101_???100: begin grant_index_w = LOG_NUM_REQS'(2); end 9'b000_??100?, 9'b001_??10??, 9'b010_??1???, 9'b011_001000, 9'b100_0?1000, - 9'b101_??1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 9'b101_??1000: begin grant_index_w = LOG_NUM_REQS'(3); end 9'b000_?1000?, 9'b001_?100??, 9'b010_?10???, 9'b011_?1????, 9'b100_010000, - 9'b101_?10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 9'b101_?10000: begin grant_index_w = LOG_NUM_REQS'(4); end 9'b000_10000?, 9'b001_1000??, 9'b010_100???, 9'b011_10????, 9'b100_1?????, - 9'b101_100000: begin grant_index_r = LOG_NUM_REQS'(5); end - default: begin grant_index_r = 'x; end + 9'b101_100000: begin grant_index_w = LOG_NUM_REQS'(5); end + default: begin grant_index_w = 'x; end endcase end @@ -235,18 +235,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -256,50 +256,50 @@ module VX_rr_arbiter #( 10'b011_000???1, 10'b100_000???1, 10'b101_00????1, - 10'b110_??????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 10'b110_??????1: begin grant_index_w = LOG_NUM_REQS'(0); end 10'b000_?????1?, 10'b001_0000010, 10'b010_0000?10, 10'b011_000??10, 10'b100_00???10, 10'b101_0????10, - 10'b110_?????10: begin grant_index_r = LOG_NUM_REQS'(1); end + 10'b110_?????10: begin grant_index_w = LOG_NUM_REQS'(1); end 10'b000_????10?, 10'b001_????1??, 10'b010_0000100, 10'b011_000?100, 10'b100_00??100, 10'b101_0???100, - 10'b110_????100: begin grant_index_r = LOG_NUM_REQS'(2); end + 10'b110_????100: begin grant_index_w = LOG_NUM_REQS'(2); end 10'b000_???100?, 10'b001_???10??, 10'b010_???1???, 10'b011_0001000, 10'b100_00?1000, 10'b101_0??1000, - 10'b110_???1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 10'b110_???1000: begin grant_index_w = LOG_NUM_REQS'(3); end 10'b000_??1000?, 10'b001_??100??, 10'b010_??10???, 10'b011_??1????, 10'b100_0010000, 10'b101_0?10000, - 10'b110_??10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 10'b110_??10000: begin grant_index_w = LOG_NUM_REQS'(4); end 10'b000_?10000?, 10'b001_?1000??, 10'b010_?100???, 10'b011_?10????, 10'b100_?1?????, 10'b101_0100000, - 10'b110_?100000: begin grant_index_r = LOG_NUM_REQS'(5); end + 10'b110_?100000: begin grant_index_w = LOG_NUM_REQS'(5); end 10'b000_100000?, 10'b001_10000??, 10'b010_1000???, 10'b011_100????, 10'b100_10?????, 10'b101_1??????, - 10'b110_1000000: begin grant_index_r = LOG_NUM_REQS'(6); end - default: begin grant_index_r = 'x; end + 10'b110_1000000: begin grant_index_w = LOG_NUM_REQS'(6); end + default: begin grant_index_w = 'x; end endcase end @@ -307,18 +307,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -329,7 +329,7 @@ module VX_rr_arbiter #( 11'b100_000????1, 11'b101_00?????1, 11'b110_0??????1, - 11'b111_???????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 11'b111_???????1: begin grant_index_w = LOG_NUM_REQS'(0); end 11'b000_??????1?, 11'b001_00000010, 11'b010_00000?10, @@ -337,7 +337,7 @@ module VX_rr_arbiter #( 11'b100_000???10, 11'b101_00????10, 11'b110_0?????10, - 11'b111_??????10: begin grant_index_r = LOG_NUM_REQS'(1); end + 11'b111_??????10: begin grant_index_w = LOG_NUM_REQS'(1); end 11'b000_?????10?, 11'b001_?????1??, 11'b010_00000100, @@ -345,7 +345,7 @@ module VX_rr_arbiter #( 11'b100_000??100, 11'b101_00???100, 11'b110_0????100, - 11'b111_?????100: begin grant_index_r = LOG_NUM_REQS'(2); end + 11'b111_?????100: begin grant_index_w = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, 11'b010_????1???, @@ -353,7 +353,7 @@ module VX_rr_arbiter #( 11'b100_000?1000, 11'b101_00??1000, 11'b110_0???1000, - 11'b111_????1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 11'b111_????1000: begin grant_index_w = LOG_NUM_REQS'(3); end 11'b000_???1000?, 11'b001_???100??, 11'b010_???10???, @@ -361,7 +361,7 @@ module VX_rr_arbiter #( 11'b100_00010000, 11'b101_00?10000, 11'b110_0??10000, - 11'b111_???10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 11'b111_???10000: begin grant_index_w = LOG_NUM_REQS'(4); end 11'b000_??10000?, 11'b001_??1000??, 11'b010_??100???, @@ -369,7 +369,7 @@ module VX_rr_arbiter #( 11'b100_??1?????, 11'b101_00100000, 11'b110_0?100000, - 11'b111_??100000: begin grant_index_r = LOG_NUM_REQS'(5); end + 11'b111_??100000: begin grant_index_w = LOG_NUM_REQS'(5); end 11'b000_?100000?, 11'b001_?10000??, 11'b010_?1000???, @@ -377,7 +377,7 @@ module VX_rr_arbiter #( 11'b100_?10?????, 11'b101_?1??????, 11'b110_01000000, - 11'b111_?1000000: begin grant_index_r = LOG_NUM_REQS'(6); end + 11'b111_?1000000: begin grant_index_w = LOG_NUM_REQS'(6); end 11'b000_1000000?, 11'b001_100000??, 11'b010_10000???, @@ -385,8 +385,8 @@ module VX_rr_arbiter #( 11'b100_100?????, 11'b101_10??????, 11'b110_1???????, - 11'b111_10000000: begin grant_index_r = LOG_NUM_REQS'(7); end - default: begin grant_index_r = 'x; end + 11'b111_10000000: begin grant_index_w = LOG_NUM_REQS'(7); end + default: begin grant_index_w = 'x; end endcase end @@ -394,12 +394,12 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (MODEL == 1) begin From 383dc1f6b8d0b87e665b50865acb6f2cf3525d12 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 17:38:01 -0700 Subject: [PATCH 090/407] timing optimization --- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_fpu_unit.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_sfu_unit.sv | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index adbc7898b7..7ab808c701 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -43,7 +43,7 @@ module VX_alu_unit #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (PARTIAL_BW ? 1 : 0) + .OUT_BUF (PARTIAL_BW ? 3 : 0) ) dispatch_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 127ba97555..c13055ab7f 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -41,7 +41,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (PARTIAL_BW ? 1 : 0) + .OUT_BUF (PARTIAL_BW ? 3 : 0) ) dispatch_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index febaec5aa0..425f1aeeeb 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -42,7 +42,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (1) + .OUT_BUF (3) ) dispatch_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 5ef4211d0f..a77520866d 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -58,7 +58,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (1) + .OUT_BUF (3) ) dispatch_unit ( .clk (clk), .reset (reset), From e05fe0d75bcb90e80de6c6b0b4955e6f54e5f6b4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 18:11:06 -0700 Subject: [PATCH 091/407] dispatch_unit speed up --- hw/rtl/core/VX_dispatch_unit.sv | 38 +++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 3c84649bd0..3281dd9f94 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -55,7 +55,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign dispatch_if[i].ready = dispatch_ready[i]; end - wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; wire [BLOCK_SIZE-1:0] block_ready; wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask; wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_regs; @@ -66,25 +65,42 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire batch_done = (& block_done); + // batch select logic + logic [BATCH_COUNT_W-1:0] batch_idx; if (BATCH_COUNT != 1) begin - always @(posedge clk) begin - if (reset) begin - batch_idx <= '0; - end else begin - batch_idx <= batch_idx + BATCH_COUNT_W'(batch_done); - end + + wire [BATCH_COUNT-1:0] valid_batches; + for (genvar i = 0; i < BATCH_COUNT; ++i) begin + assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE]; end + + VX_generic_arbiter #( + .NUM_REQS (BATCH_COUNT), + .TYPE ("P") + ) batch_sel ( + .clk (clk), + .reset (reset), + .requests (valid_batches), + .grant_index (batch_idx), + `UNUSED_PIN (grant_onehot), + `UNUSED_PIN (grant_valid), + .grant_ready (batch_done) + ); + end else begin assign batch_idx = 0; `UNUSED_VAR (batch_done) end + wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); + end - wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); - assign issue_indices[block_idx] = issue_idx; + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx]; wire valid_p, ready_p; if (`NUM_THREADS != NUM_LANES) begin @@ -246,8 +262,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( reg [`ISSUE_WIDTH-1:0] ready_in; always @(*) begin ready_in = 0; - for (integer i = 0; i < BLOCK_SIZE; ++i) begin - ready_in[issue_indices[i]] = block_ready[i] && block_eop[i]; + for (integer block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + ready_in[issue_indices[block_idx]] = block_ready[block_idx] && block_eop[block_idx]; end end assign dispatch_ready = ready_in; From e538dfa3164523c59a018afed04aef2e2dd21e4e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 19:11:06 -0700 Subject: [PATCH 092/407] minor update --- hw/rtl/VX_define.vh | 8 -------- hw/rtl/core/VX_mem_unit.sv | 4 ++-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 8050ad6fcb..9a8d81c677 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -390,14 +390,6 @@ assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \ assign dst.rsp_ready = src.rsp_ready -`define ASSIGN_VX_LSU_MEM_IF(dst, src) \ - assign dst.req_valid = src.req_valid; \ - assign dst.req_data = src.req_data; \ - assign src.req_ready = dst.req_ready; \ - assign src.rsp_valid = dst.rsp_valid; \ - assign src.rsp_data = dst.rsp_data; \ - assign dst.rsp_ready = src.rsp_ready - `define BUFFER_DCR_BUS_IF(dst, src, enable) \ if (enable) begin \ reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 6569c1d472..7a7e9e2db4 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -118,7 +118,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( assign lmem_perf = '0; `endif for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); + `ASSIGN_VX_MEM_BUS_IF (lsu_dcache_if[i], lsu_mem_if[i]); end `endif @@ -190,7 +190,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end else begin for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); + `ASSIGN_VX_MEM_BUS_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); end end From 592297582e7786da4981e32ee2ba579991c5c9f6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 19:44:03 -0700 Subject: [PATCH 093/407] fpu_unit timing optimization --- hw/rtl/core/VX_dispatch_unit.sv | 14 ++++++++++- hw/rtl/core/VX_fpu_unit.sv | 42 ++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 3281dd9f94..5e6893e97a 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -233,6 +233,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw); + logic [OUT_DATAW-1:0] execute_data, execute_data_w; + VX_elastic_buffer #( .DATAW (OUT_DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -253,10 +255,20 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( block_pid[block_idx], block_sop[block_idx], block_eop[block_idx]}), - .data_out (execute_if[block_idx].data), + .data_out (execute_data), .valid_out (execute_if[block_idx].valid), .ready_out (execute_if[block_idx].ready) ); + + if (`NUM_THREADS != NUM_LANES) begin + assign execute_data_w = execute_data; + end else begin + always @(*) begin + execute_data_w = execute_data; + execute_data_w[2:0] = {1'b0, 1'b1, 1'b1}; // default pid, sop, and eop + end + end + assign execute_if[block_idx].data = execute_data_w; end reg [`ISSUE_WIDTH-1:0] ready_in; diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index c13055ab7f..ae36e4b22b 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -71,9 +71,9 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0] fpu_rsp_tmask; wire [`PC_BITS-1:0] fpu_rsp_PC; wire [`NR_BITS-1:0] fpu_rsp_rd; - wire [PID_WIDTH-1:0] fpu_rsp_pid; - wire fpu_rsp_sop; - wire fpu_rsp_eop; + wire [PID_WIDTH-1:0] fpu_rsp_pid, fpu_rsp_pid_u; + wire fpu_rsp_sop, fpu_rsp_sop_u; + wire fpu_rsp_eop, fpu_rsp_eop_u; wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag; wire mdata_full; @@ -93,13 +93,26 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .acquire_en (execute_fire), .write_addr (fpu_req_tag), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), - .read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), + .read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid_u, fpu_rsp_sop_u, fpu_rsp_eop_u}), .read_addr (fpu_rsp_tag), .release_en (fpu_rsp_fire), .full (mdata_full), `UNUSED_PIN (empty) ); + if (PID_BITS != 0) begin + assign fpu_rsp_pid = fpu_rsp_pid_u; + assign fpu_rsp_sop = fpu_rsp_sop_u; + assign fpu_rsp_eop = fpu_rsp_eop_u; + end else begin + `UNUSED_VAR (fpu_rsp_pid_u) + `UNUSED_VAR (fpu_rsp_sop_u) + `UNUSED_VAR (fpu_rsp_eop_u) + assign fpu_rsp_pid = 0; + assign fpu_rsp_sop = 1; + assign fpu_rsp_eop = 1; + end + // resolve dynamic FRM from CSR wire [`INST_FRM_BITS-1:0] fpu_req_frm; `ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].read_wid, per_block_execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS) @@ -200,8 +213,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( `endif - // handle FPU response - + // handle CSR update fflags_t fpu_rsp_fflags_q; if (PID_BITS != 0) begin @@ -218,9 +230,21 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( assign fpu_rsp_fflags_q = fpu_rsp_fflags; end - assign fpu_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags; - `ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS) - assign fpu_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q; + VX_fpu_csr_if fpu_csr_tmp_if(); + assign fpu_csr_tmp_if.write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags; + `ASSIGN_BLOCKED_WID (fpu_csr_tmp_if.write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS) + assign fpu_csr_tmp_if.write_fflags = fpu_rsp_fflags_q; + + VX_pipe_register #( + .DATAW (1 + `NW_WIDTH + $bits(fflags_t)), + .RESETW (1) + ) fpu_csr_reg ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in ({fpu_csr_tmp_if.write_enable, fpu_csr_tmp_if.write_wid, fpu_csr_tmp_if.write_fflags}), + .data_out ({fpu_csr_if[block_idx].write_enable, fpu_csr_if[block_idx].write_wid, fpu_csr_if[block_idx].write_fflags}) + ); // send response From b6879b25e33a45c747b9888d8ebbb1d927634046 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 20:46:25 -0700 Subject: [PATCH 094/407] switching to python3 dependency --- ci/travis_run.py | 2 +- hw/unittest/common.mk | 2 +- kernel/scripts/vxbin.py | 2 +- miscs/docker/Dockerfile.ubuntu | 1 - sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/xrtsim/Makefile | 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ci/travis_run.py b/ci/travis_run.py index 907cf5ce4d..70459cbeed 100755 --- a/ci/travis_run.py +++ b/ci/travis_run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2019-2023 # diff --git a/hw/unittest/common.mk b/hw/unittest/common.mk index 48aefd4159..71f6914bfc 100644 --- a/hw/unittest/common.mk +++ b/hw/unittest/common.mk @@ -25,7 +25,7 @@ VL_FLAGS += $(RTL_PKGS) VL_FLAGS += --cc $(TOP) --top-module $(TOP) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/kernel/scripts/vxbin.py b/kernel/scripts/vxbin.py index 501d8949a3..1dcd6a099a 100755 --- a/kernel/scripts/vxbin.py +++ b/kernel/scripts/vxbin.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2019-2023 # diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.ubuntu index f3a864ce53..64bb5813de 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.ubuntu @@ -21,7 +21,6 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ software-properties-common \ build-essential \ - python \ python3 \ git \ wget \ diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 9c6314ecfd..32182d5a8c 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -79,7 +79,7 @@ VL_FLAGS += $(RTL_PKGS) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 638d7403f9..2f38ae1f25 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -61,7 +61,7 @@ VL_FLAGS += --cc $(TOP) --top-module $(TOP) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 1e0d11b664..c63fe3d569 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -78,7 +78,7 @@ VL_FLAGS += $(RTL_PKGS) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) From bdcc5f59913e6b8bbdd682223a24bb5f584012fe Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 05:11:48 -0700 Subject: [PATCH 095/407] FPU decode optimization --- hw/rtl/VX_define.vh | 29 +++++++------- hw/rtl/VX_gpu_pkg.sv | 79 ++++++++++++++++++++------------------ hw/rtl/core/VX_decode.sv | 26 +++++++++---- hw/rtl/fpu/VX_fpu_cvt.sv | 4 +- hw/rtl/fpu/VX_fpu_div.sv | 4 +- hw/rtl/fpu/VX_fpu_dpi.sv | 74 +++++++++++++++-------------------- hw/rtl/fpu/VX_fpu_dsp.sv | 40 +++++++++---------- hw/rtl/fpu/VX_fpu_fma.sv | 4 +- hw/rtl/fpu/VX_fpu_fpnew.sv | 13 ++----- hw/rtl/fpu/VX_fpu_ncp.sv | 4 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 4 +- 11 files changed, 134 insertions(+), 147 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 9a8d81c677..861d9f28cf 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -227,22 +227,19 @@ `define INST_FENCE_D 1'h0 `define INST_FENCE_I 1'h1 -`define INST_FPU_ADD 4'b0000 -`define INST_FPU_SUB 4'b0001 -`define INST_FPU_MUL 4'b0010 -`define INST_FPU_DIV 4'b0011 -`define INST_FPU_SQRT 4'b0100 -`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2 -`define INST_FPU_F2F 4'b0110 -`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7 -`define INST_FPU_F2I 4'b1000 -`define INST_FPU_F2U 4'b1001 -`define INST_FPU_I2F 4'b1010 -`define INST_FPU_U2F 4'b1011 -`define INST_FPU_MADD 4'b1100 -`define INST_FPU_MSUB 4'b1101 -`define INST_FPU_NMSUB 4'b1110 -`define INST_FPU_NMADD 4'b1111 +`define INST_FPU_ADD 4'b0000 // SUB=fmt[1] +`define INST_FPU_MUL 4'b0001 +`define INST_FPU_MADD 4'b0010 // SUB=fmt[1] +`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1] +`define INST_FPU_DIV 4'b0100 +`define INST_FPU_SQRT 4'b0101 +`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2 +`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1 +`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7 `define INST_FPU_BITS 4 `define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3) `define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index f290678559..f94714d06a 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -464,61 +464,64 @@ package VX_gpu_pkg; `EX_FPU: begin case (`INST_FPU_BITS'(op_type)) `INST_FPU_ADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); - else - `TRACE(level, ("FADD.S")); + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSUB.D")); + else + `TRACE(level, ("FSUB.S")); + end else begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FADD.D")); + else + `TRACE(level, ("FADD.S")); + end end - `INST_FPU_SUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); - else - `TRACE(level, ("FSUB.S")); + `INST_FPU_MADD: begin + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMSUB.D")); + else + `TRACE(level, ("FMSUB.S")); + end else begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMADD.D")); + else + `TRACE(level, ("FMADD.S")); + end + end + `INST_FPU_NMADD: begin + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMSUB.D")); + else + `TRACE(level, ("FNMSUB.S")); + end else begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMADD.D")); + else + `TRACE(level, ("FNMADD.S")); + end end `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) `TRACE(level, ("FMUL.D")); else `TRACE(level, ("FMUL.S")); end `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) `TRACE(level, ("FDIV.D")); else `TRACE(level, ("FDIV.S")); end `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) `TRACE(level, ("FSQRT.D")); else `TRACE(level, ("FSQRT.S")); end - `INST_FPU_MADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); - else - `TRACE(level, ("FMADD.S")); - end - `INST_FPU_MSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); - else - `TRACE(level, ("FMSUB.S")); - end - `INST_FPU_NMADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); - else - `TRACE(level, ("FNMADD.S")); - end - `INST_FPU_NMSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); - else - `TRACE(level, ("FNMSUB.S")); - end `INST_FPU_CMP: begin - if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm[1:0]) 0: `TRACE(level, ("FLE.D")); 1: `TRACE(level, ("FLT.D")); @@ -602,7 +605,7 @@ package VX_gpu_pkg; end end `INST_FPU_MISC: begin - if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm) 0: `TRACE(level, ("FSGNJ.D")); 1: `TRACE(level, ("FSGNJN.D")); diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index de317d4978..d3ca4d6e4a 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -376,14 +376,16 @@ module VX_decode import VX_gpu_pkg::*; #( `USED_IREG (rs2); end `ifdef EXT_F_ENABLE - `INST_FMADD, - `INST_FMSUB, - `INST_FNMSUB, - `INST_FNMADD: begin + `INST_FMADD, // 7'b1000011 + `INST_FMSUB, // 7'b1000111 + `INST_FNMSUB, // 7'b1001011 + `INST_FNMADD: // 7'b1001111 + begin ex_type = `EX_FPU; - op_type = `INST_OP_BITS'({2'b11, opcode[3:2]}); + op_type = `INST_OP_BITS'({2'b00, 1'b1, opcode[3]}); op_args.fpu.frm = func3; op_args.fpu.fmt[0] = func2[0]; // float / double + op_args.fpu.fmt[1] = opcode[3] ^ opcode[2]; // SUB use_rd = 1; `USED_FREG (rd); `USED_FREG (rs1); @@ -399,9 +401,10 @@ module VX_decode import VX_gpu_pkg::*; #( case (func5) 5'b00000, // FADD 5'b00001, // FSUB - 5'b00010, // FMUL - 5'b00011: begin // FDIV - op_type = `INST_OP_BITS'(func5[1:0]); + 5'b00010: // FMUL + begin + op_type = `INST_OP_BITS'({2'b00, 1'b0, func5[1]}); + op_args.fpu.fmt[1] = func5[0]; // SUB `USED_FREG (rd); `USED_FREG (rs1); `USED_FREG (rs2); @@ -430,6 +433,13 @@ module VX_decode import VX_gpu_pkg::*; #( `USED_FREG (rs1); end `endif + 5'b00011: begin + // FDIV + op_type = `INST_OP_BITS'(`INST_FPU_DIV); + `USED_FREG (rd); + `USED_FREG (rs1); + `USED_FREG (rs2); + end 5'b01011: begin // FSQRT op_type = `INST_OP_BITS'(`INST_FPU_SQRT); diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 1b6617c600..7587f8342a 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -73,8 +73,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 44b5bedfb3..68138bb7cf 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -77,8 +77,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 67022e8fd6..0ba7d54f37 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -76,7 +76,6 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub; reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f; - reg dst_fmt, int_fmt; reg [NUM_LANES-1:0][63:0] operands [3]; @@ -88,7 +87,8 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end end - `UNUSED_VAR (fmt) + wire f_fmt = fmt[0]; + wire i_fmt = fmt[1]; always @(*) begin is_fadd = 0; @@ -106,25 +106,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( is_ftou = 0; is_f2f = 0; - dst_fmt = 0; - int_fmt = 0; - - `ifdef FLEN_64 - dst_fmt = fmt[0]; - `endif - - `ifdef XLEN_64 - int_fmt = fmt[1]; - `endif - case (op_type) - `INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end - `INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end + `INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = ~i_fmt; is_fsub = i_fmt; end + `INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = ~i_fmt; is_fmsub = i_fmt; end + `INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = ~i_fmt; is_fnmsub = i_fmt; end `INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end - `INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end - `INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end - `INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end - `INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end `INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end `INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end `INST_FPU_CMP: begin core_select = FPU_NCP; is_fcmp = 1; end @@ -164,13 +150,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); - dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); - dpi_fmul (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]); - dpi_fmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]); - dpi_fmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]); - dpi_fnmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]); - dpi_fnmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]); + dpi_fadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); + dpi_fsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); + dpi_fmul (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]); + dpi_fmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]); + dpi_fmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]); + dpi_fnmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]); + dpi_fnmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]); result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] : is_fsub ? result_fsub[i][`XLEN-1:0] : @@ -226,7 +212,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); + dpi_fdiv (fdiv_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0]; end end @@ -265,7 +251,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); + dpi_fsqrt (fsqrt_fire, int'(f_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0]; end end @@ -313,11 +299,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); - dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); - dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); - dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); - dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]); + dpi_itof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); + dpi_utof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); + dpi_ftoi (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); + dpi_ftou (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); + dpi_f2f (fcvt_fire, int'(f_fmt), operands[0][i], result_f2f[i]); result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] : is_utof ? result_utof[i][`XLEN-1:0] : @@ -384,17 +370,17 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]); - dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); - dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); - dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); - dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); - dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); - dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); - dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); - dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); - result_fmvx[i] = dst_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension - result_fmvf[i] = dst_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing + dpi_fclss (fncp_fire, int'(f_fmt), operands[0][i], result_fclss[i]); + dpi_fle (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); + dpi_flt (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); + dpi_feq (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); + dpi_fmin (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); + dpi_fmax (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); + dpi_fsgnj (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); + dpi_fsgnjn (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); + dpi_fsgnjx (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); + result_fmvx[i] = f_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension + result_fmvf[i] = f_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing end end diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index c75e3e3fdc..9e8edef095 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -74,31 +74,29 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire div_has_fflags, sqrt_has_fflags; fflags_t div_fflags, sqrt_fflags; - reg [FPCORES_BITS-1:0] core_select; reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed; + wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; + always @(*) begin - is_madd = 0; - is_sub = 0; - is_neg = 0; - is_div = 0; - is_itof = 0; - is_signed = 0; + is_madd = 'x; + is_sub = 'x; + is_neg = 'x; + is_div = 'x; + is_itof = 'x; + is_signed = 'x; case (op_type) - `INST_FPU_ADD: begin core_select = FPU_FMA; end - `INST_FPU_SUB: begin core_select = FPU_FMA; is_sub = 1; end - `INST_FPU_MUL: begin core_select = FPU_FMA; is_neg = 1; end - `INST_FPU_MADD: begin core_select = FPU_FMA; is_madd = 1; end - `INST_FPU_MSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; end - `INST_FPU_NMADD: begin core_select = FPU_FMA; is_madd = 1; is_neg = 1; end - `INST_FPU_NMSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; is_neg = 1; end - `INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end - `INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end - `INST_FPU_F2I: begin core_select = FPU_CVT; is_signed = 1; end - `INST_FPU_F2U: begin core_select = FPU_CVT; end - `INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end - `INST_FPU_U2F: begin core_select = FPU_CVT; is_itof = 1; end - default: begin core_select = FPU_NCP; end + `INST_FPU_ADD: begin is_madd = 0; is_neg = 0; is_sub = fmt[1]; end + `INST_FPU_MUL: begin is_madd = 0; is_neg = 1; is_sub = 0; end + `INST_FPU_MADD: begin is_madd = 1; is_neg = 0; is_sub = fmt[1]; end + `INST_FPU_NMADD: begin is_madd = 1; is_neg = 1; is_sub = fmt[1]; end + `INST_FPU_DIV: begin is_div = 1; end + `INST_FPU_SQRT: begin is_div = 0; end + `INST_FPU_F2I: begin is_itof = 0; is_signed = 1; end + `INST_FPU_F2U: begin is_itof = 0; is_signed = 0; end + `INST_FPU_I2F: begin is_itof = 1; is_signed = 1; end + `INST_FPU_U2F: begin is_itof = 1; is_signed = 0; end + default: begin end endcase end diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index a5cb89a1a9..ce99138cbe 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -108,8 +108,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 9ee7f1a2c5..ad95f0347e 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -134,20 +134,13 @@ module VX_fpu_fpnew fpu_op = fpnew_pkg::ADD; fpu_operands[1] = dataa; fpu_operands[2] = datab; - end - `INST_FPU_SUB: begin - fpu_op = fpnew_pkg::ADD; - fpu_operands[1] = dataa; - fpu_operands[2] = datab; - fpu_op_mod = 1; + fpu_op_mod = fmt[1]; // FADD or FSUB end `INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end + `INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = fmt[1]; end + `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = ~fmt[1]; end `INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end `INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end - `INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end - `INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end - `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end - `INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end `ifdef FLEN_64 `INST_FPU_F2F: begin fpu_op = fpnew_pkg::F2F; fpu_src_fmt = fmt[0] ? fpnew_pkg::FP32 : fpnew_pkg::FP64; end `endif diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 16c0df7580..bfc69316b9 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -78,8 +78,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 5aacf2d29c..425f43d6e2 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -71,8 +71,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), From b40441b68f5ecbe6f4eb33b26f1d64568581dc21 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 05:12:44 -0700 Subject: [PATCH 096/407] minor update --- hw/rtl/libs/VX_pe_serializer.sv | 83 ++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 38 deletions(-) diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 4e3a291328..8ae7900b50 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -35,8 +35,8 @@ module VX_pe_serializer #( // PE output wire pe_enable, - output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in, - input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out, + output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_out, + input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_in, // output output wire valid_out, @@ -49,32 +49,44 @@ module VX_pe_serializer #( wire [TAG_WIDTH-1:0] tag_out_u; wire ready_out_u; - wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s; - wire valid_out_s; - wire [TAG_WIDTH-1:0] tag_out_s; + wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_out_w; + wire pe_valid_in; + wire [TAG_WIDTH-1:0] pe_tag_in; wire enable; VX_shift_register #( .DATAW (1 + TAG_WIDTH), - .DEPTH (LATENCY + PE_REG), + .DEPTH (PE_REG + LATENCY), .RESETW (1) ) shift_reg ( .clk (clk), .reset (reset), .enable (enable), - .data_in ({valid_in, tag_in}), - .data_out ({valid_out_s, tag_out_s}) + .data_in ({valid_in, tag_in}), + .data_out ({pe_valid_in, pe_tag_in}) ); VX_pipe_register #( - .DATAW (NUM_PES * DATA_IN_WIDTH), - .DEPTH (PE_REG) - ) pe_reg ( + .DATAW (NUM_PES * DATA_IN_WIDTH), + .DEPTH (PE_REG) + ) pe_data_reg ( .clk (clk), .reset (reset), .enable (enable), - .data_in (pe_data_in_s), - .data_out (pe_data_in) + .data_in (pe_data_out_w), + .data_out (pe_data_out) + ); + + VX_pipe_register #( + .DATAW (1), + .RESETW (1), + .DEPTH (PE_REG) + ) pe_en_reg ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in (enable), + .data_out (pe_enable) ); if (NUM_LANES != NUM_PES) begin @@ -82,35 +94,32 @@ module VX_pe_serializer #( localparam BATCH_SIZE = NUM_LANES / NUM_PES; localparam BATCH_SIZEW = `LOG2UP(BATCH_SIZE); - reg [BATCH_SIZEW-1:0] batch_in_idx; - reg [BATCH_SIZEW-1:0] batch_out_idx; + reg [BATCH_SIZEW-1:0] batch_in_idx, batch_out_idx; + reg batch_in_done, batch_out_done; for (genvar i = 0; i < NUM_PES; ++i) begin - assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i]; + assign pe_data_out_w[i] = data_in[batch_in_idx * NUM_PES + i]; end always @(posedge clk) begin if (reset) begin - batch_in_idx <= '0; - batch_out_idx <= '0; + batch_in_idx <= '0; + batch_out_idx <= '0; + batch_in_done <= 0; + batch_out_done <= 0; end else if (enable) begin - if (valid_in) begin - batch_in_idx <= batch_in_idx + BATCH_SIZEW'(1); - end - if (valid_out_s) begin - batch_out_idx <= batch_out_idx + BATCH_SIZEW'(1); - end + batch_in_idx <= batch_in_idx + BATCH_SIZEW'(valid_in); + batch_out_idx <= batch_out_idx + BATCH_SIZEW'(pe_valid_in); + batch_in_done <= valid_in && (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-2)); + batch_out_done <= pe_valid_in && (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-2)); end end - wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1)); - wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1)); - - reg valid_out_r; reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r; reg [TAG_WIDTH-1:0] tag_out_r; + reg valid_out_r; - wire valid_out_b = valid_out_s && batch_out_done; + wire valid_out_b = pe_valid_in && batch_out_done; wire ready_out_b = ready_out_u || ~valid_out_u; always @(posedge clk) begin @@ -120,14 +129,13 @@ module VX_pe_serializer #( valid_out_r <= valid_out_b; end if (ready_out_b) begin - data_out_r[batch_out_idx] <= pe_data_out; - tag_out_r <= tag_out_s; + data_out_r[batch_out_idx] <= pe_data_in; + tag_out_r <= pe_tag_in; end end assign enable = ready_out_b || ~valid_out_b; assign ready_in = enable && batch_in_done; - assign pe_enable = enable; assign valid_out_u = valid_out_r; assign data_out_u = data_out_r; @@ -135,15 +143,14 @@ module VX_pe_serializer #( end else begin - assign pe_data_in_s = data_in; + assign pe_data_out_w = data_in; - assign enable = ready_out_u || ~valid_out_s; + assign enable = ready_out_u || ~pe_valid_in; assign ready_in = enable; - assign pe_enable = enable; - assign valid_out_u = valid_out_s; - assign data_out_u = pe_data_out; - assign tag_out_u = tag_out_s; + assign valid_out_u = pe_valid_in; + assign data_out_u = pe_data_in; + assign tag_out_u = pe_tag_in; end From df3fc150f4af27f5cbe50c3a7fe06a0428b48070 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 06:06:52 -0700 Subject: [PATCH 097/407] minor update --- hw/rtl/libs/VX_cyclic_arbiter.sv | 12 +++++------- hw/rtl/libs/VX_rr_arbiter.sv | 16 ++++++++-------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 0b8fcedfee..e134bea912 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -40,17 +40,17 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; - wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; + wire [LOG_NUM_REQS-1:0] grant_index_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; end else if (grant_valid && grant_ready) begin - if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin + if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; end else begin - grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); + grant_index_r <= grant_index + LOG_NUM_REQS'(1); end end end @@ -64,10 +64,8 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); - assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; - - assign grant_index = grant_index_ql; - assign grant_onehot = NUM_REQS'(1) << grant_index_ql; + assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 6199d5794c..e0af433f5d 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -62,7 +62,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin @@ -94,7 +94,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin @@ -133,7 +133,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin @@ -181,7 +181,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin @@ -240,7 +240,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin @@ -312,7 +312,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin @@ -399,7 +399,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (MODEL == 1) begin @@ -474,7 +474,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_table[state]; - assign grant_onehot = NUM_REQS'(1) << grant_index; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; assign grant_valid = (| requests); end From 088aed022ff239a5f4b901a2915d9f928536d4a4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 15:52:17 -0700 Subject: [PATCH 098/407] minor update --- hw/rtl/core/VX_dispatch.sv | 6 +- hw/rtl/libs/VX_rr_arbiter.sv | 105 +++++++++++++++++++---------------- 2 files changed, 59 insertions(+), 52 deletions(-) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 0766fd83fd..3fe98ba938 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -50,8 +50,8 @@ module VX_dispatch import VX_gpu_pkg::*; #( `UNUSED_PIN (valid_out) ); - wire [`NUM_EX_UNITS-1:0] operands_reset; - assign operands_if.ready = operands_reset[operands_if.data.ex_type]; + wire [`NUM_EX_UNITS-1:0] operands_ready_in; + assign operands_if.ready = operands_ready_in[operands_if.data.ex_type]; for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin VX_elastic_buffer #( @@ -62,7 +62,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))), - .ready_in (operands_reset[i]), + .ready_in (operands_ready_in[i]), .data_in ({ operands_if.data.uuid, operands_if.data.wis, diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index e0af433f5d..5c279989ba 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -41,15 +41,16 @@ module VX_rr_arbiter #( end else if (LUT_OPT && NUM_REQS == 2) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 3'b0_01, - 3'b1_?1: begin grant_index_w = LOG_NUM_REQS'(0); end + 3'b1_?1: begin grant_onehot_w = 2'b01; grant_index_w = LOG_NUM_REQS'(0); end 3'b0_1?, - 3'b1_10: begin grant_index_w = LOG_NUM_REQS'(1); end - default: begin grant_index_w = 'x; end + 3'b1_10: begin grant_onehot_w = 2'b10; grant_index_w = LOG_NUM_REQS'(1); end + default: begin grant_onehot_w = 2'b00; grant_index_w = 'x; end endcase end @@ -62,26 +63,27 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, 5'b01_0?1, - 5'b10_??1: begin grant_index_w = LOG_NUM_REQS'(0); end + 5'b10_??1: begin grant_onehot_w = 3'b001; grant_index_w = LOG_NUM_REQS'(0); end 5'b00_?1?, 5'b01_010, - 5'b10_?10: begin grant_index_w = LOG_NUM_REQS'(1); end + 5'b10_?10: begin grant_onehot_w = 3'b010; grant_index_w = LOG_NUM_REQS'(1); end 5'b00_10?, 5'b01_1??, - 5'b10_100: begin grant_index_w = LOG_NUM_REQS'(2); end - default: begin grant_index_w = 'x; end + 5'b10_100: begin grant_onehot_w = 3'b100; grant_index_w = LOG_NUM_REQS'(2); end + default: begin grant_onehot_w = 3'b000; grant_index_w = 'x; end endcase end @@ -94,12 +96,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -107,20 +110,20 @@ module VX_rr_arbiter #( 6'b00_0001, 6'b01_00?1, 6'b10_0??1, - 6'b11_???1: begin grant_index_w = LOG_NUM_REQS'(0); end + 6'b11_???1: begin grant_onehot_w = 4'b0001; grant_index_w = LOG_NUM_REQS'(0); end 6'b00_??1?, 6'b01_0010, 6'b10_0?10, - 6'b11_??10: begin grant_index_w = LOG_NUM_REQS'(1); end + 6'b11_??10: begin grant_onehot_w = 4'b0010; grant_index_w = LOG_NUM_REQS'(1); end 6'b00_?10?, 6'b01_?1??, 6'b10_0100, - 6'b11_?100: begin grant_index_w = LOG_NUM_REQS'(2); end + 6'b11_?100: begin grant_onehot_w = 4'b0100; grant_index_w = LOG_NUM_REQS'(2); end 6'b00_100?, 6'b01_10??, 6'b10_1???, - 6'b11_1000: begin grant_index_w = LOG_NUM_REQS'(3); end - default: begin grant_index_w = 'x; end + 6'b11_1000: begin grant_onehot_w = 4'b1000; grant_index_w = LOG_NUM_REQS'(3); end + default: begin grant_onehot_w = 4'b0000; grant_index_w = 'x; end endcase end @@ -133,12 +136,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -147,28 +151,28 @@ module VX_rr_arbiter #( 8'b001_000?1, 8'b010_00??1, 8'b011_0???1, - 8'b100_????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 8'b100_????1: begin grant_onehot_w = 5'b00001; grant_index_w = LOG_NUM_REQS'(0); end 8'b000_???1?, 8'b001_00010, 8'b010_00?10, 8'b011_0??10, - 8'b100_???10: begin grant_index_w = LOG_NUM_REQS'(1); end + 8'b100_???10: begin grant_onehot_w = 5'b00010; grant_index_w = LOG_NUM_REQS'(1); end 8'b000_??10?, 8'b001_??1??, 8'b010_00100, 8'b011_0?100, - 8'b100_??100: begin grant_index_w = LOG_NUM_REQS'(2); end + 8'b100_??100: begin grant_onehot_w = 5'b00100; grant_index_w = LOG_NUM_REQS'(2); end 8'b000_?100?, 8'b001_?10??, 8'b010_?1???, 8'b011_01000, - 8'b100_?1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 8'b100_?1000: begin grant_onehot_w = 5'b01000; grant_index_w = LOG_NUM_REQS'(3); end 8'b000_1000?, 8'b001_100??, 8'b010_10???, 8'b011_1????, - 8'b100_10000: begin grant_index_w = LOG_NUM_REQS'(4); end - default: begin grant_index_w = 'x; end + 8'b100_10000: begin grant_onehot_w = 5'b10000; grant_index_w = LOG_NUM_REQS'(4); end + default: begin grant_onehot_w = 5'b00000; grant_index_w = 'x; end endcase end @@ -181,12 +185,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -196,38 +201,38 @@ module VX_rr_arbiter #( 9'b010_000??1, 9'b011_00???1, 9'b100_0????1, - 9'b101_?????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 9'b101_?????1: begin grant_onehot_w = 6'b000001; grant_index_w = LOG_NUM_REQS'(0); end 9'b000_????1?, 9'b001_000010, 9'b010_000?10, 9'b011_00??10, 9'b100_0???10, - 9'b101_????10: begin grant_index_w = LOG_NUM_REQS'(1); end + 9'b101_????10: begin grant_onehot_w = 6'b000010; grant_index_w = LOG_NUM_REQS'(1); end 9'b000_???10?, 9'b001_???1??, 9'b010_000100, 9'b011_00?100, 9'b100_0??100, - 9'b101_???100: begin grant_index_w = LOG_NUM_REQS'(2); end + 9'b101_???100: begin grant_onehot_w = 6'b000100; grant_index_w = LOG_NUM_REQS'(2); end 9'b000_??100?, 9'b001_??10??, 9'b010_??1???, 9'b011_001000, 9'b100_0?1000, - 9'b101_??1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 9'b101_??1000: begin grant_onehot_w = 6'b001000; grant_index_w = LOG_NUM_REQS'(3); end 9'b000_?1000?, 9'b001_?100??, 9'b010_?10???, 9'b011_?1????, 9'b100_010000, - 9'b101_?10000: begin grant_index_w = LOG_NUM_REQS'(4); end + 9'b101_?10000: begin grant_onehot_w = 6'b010000; grant_index_w = LOG_NUM_REQS'(4); end 9'b000_10000?, 9'b001_1000??, 9'b010_100???, 9'b011_10????, 9'b100_1?????, - 9'b101_100000: begin grant_index_w = LOG_NUM_REQS'(5); end - default: begin grant_index_w = 'x; end + 9'b101_100000: begin grant_onehot_w = 6'b100000; grant_index_w = LOG_NUM_REQS'(5); end + default: begin grant_onehot_w = 6'b000000; grant_index_w = 'x; end endcase end @@ -240,12 +245,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -256,50 +262,50 @@ module VX_rr_arbiter #( 10'b011_000???1, 10'b100_000???1, 10'b101_00????1, - 10'b110_??????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 10'b110_??????1: begin grant_onehot_w = 7'b0000001; grant_index_w = LOG_NUM_REQS'(0); end 10'b000_?????1?, 10'b001_0000010, 10'b010_0000?10, 10'b011_000??10, 10'b100_00???10, 10'b101_0????10, - 10'b110_?????10: begin grant_index_w = LOG_NUM_REQS'(1); end + 10'b110_?????10: begin grant_onehot_w = 7'b0000010; grant_index_w = LOG_NUM_REQS'(1); end 10'b000_????10?, 10'b001_????1??, 10'b010_0000100, 10'b011_000?100, 10'b100_00??100, 10'b101_0???100, - 10'b110_????100: begin grant_index_w = LOG_NUM_REQS'(2); end + 10'b110_????100: begin grant_onehot_w = 7'b0000100; grant_index_w = LOG_NUM_REQS'(2); end 10'b000_???100?, 10'b001_???10??, 10'b010_???1???, 10'b011_0001000, 10'b100_00?1000, 10'b101_0??1000, - 10'b110_???1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 10'b110_???1000: begin grant_onehot_w = 7'b0001000; grant_index_w = LOG_NUM_REQS'(3); end 10'b000_??1000?, 10'b001_??100??, 10'b010_??10???, 10'b011_??1????, 10'b100_0010000, 10'b101_0?10000, - 10'b110_??10000: begin grant_index_w = LOG_NUM_REQS'(4); end + 10'b110_??10000: begin grant_onehot_w = 7'b0010000; grant_index_w = LOG_NUM_REQS'(4); end 10'b000_?10000?, 10'b001_?1000??, 10'b010_?100???, 10'b011_?10????, 10'b100_?1?????, 10'b101_0100000, - 10'b110_?100000: begin grant_index_w = LOG_NUM_REQS'(5); end + 10'b110_?100000: begin grant_onehot_w = 7'b0100000; grant_index_w = LOG_NUM_REQS'(5); end 10'b000_100000?, 10'b001_10000??, 10'b010_1000???, 10'b011_100????, 10'b100_10?????, 10'b101_1??????, - 10'b110_1000000: begin grant_index_w = LOG_NUM_REQS'(6); end - default: begin grant_index_w = 'x; end + 10'b110_1000000: begin grant_onehot_w = 7'b1000000; grant_index_w = LOG_NUM_REQS'(6); end + default: begin grant_onehot_w = 7'b0000000; grant_index_w = 'x; end endcase end @@ -312,12 +318,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -329,7 +336,7 @@ module VX_rr_arbiter #( 11'b100_000????1, 11'b101_00?????1, 11'b110_0??????1, - 11'b111_???????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 11'b111_???????1: begin grant_onehot_w = 8'b00000001; grant_index_w = LOG_NUM_REQS'(0); end 11'b000_??????1?, 11'b001_00000010, 11'b010_00000?10, @@ -337,7 +344,7 @@ module VX_rr_arbiter #( 11'b100_000???10, 11'b101_00????10, 11'b110_0?????10, - 11'b111_??????10: begin grant_index_w = LOG_NUM_REQS'(1); end + 11'b111_??????10: begin grant_onehot_w = 8'b00000010; grant_index_w = LOG_NUM_REQS'(1); end 11'b000_?????10?, 11'b001_?????1??, 11'b010_00000100, @@ -345,7 +352,7 @@ module VX_rr_arbiter #( 11'b100_000??100, 11'b101_00???100, 11'b110_0????100, - 11'b111_?????100: begin grant_index_w = LOG_NUM_REQS'(2); end + 11'b111_?????100: begin grant_onehot_w = 8'b00000100; grant_index_w = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, 11'b010_????1???, @@ -353,7 +360,7 @@ module VX_rr_arbiter #( 11'b100_000?1000, 11'b101_00??1000, 11'b110_0???1000, - 11'b111_????1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 11'b111_????1000: begin grant_onehot_w = 8'b00001000; grant_index_w = LOG_NUM_REQS'(3); end 11'b000_???1000?, 11'b001_???100??, 11'b010_???10???, @@ -361,7 +368,7 @@ module VX_rr_arbiter #( 11'b100_00010000, 11'b101_00?10000, 11'b110_0??10000, - 11'b111_???10000: begin grant_index_w = LOG_NUM_REQS'(4); end + 11'b111_???10000: begin grant_onehot_w = 8'b00010000; grant_index_w = LOG_NUM_REQS'(4); end 11'b000_??10000?, 11'b001_??1000??, 11'b010_??100???, @@ -369,7 +376,7 @@ module VX_rr_arbiter #( 11'b100_??1?????, 11'b101_00100000, 11'b110_0?100000, - 11'b111_??100000: begin grant_index_w = LOG_NUM_REQS'(5); end + 11'b111_??100000: begin grant_onehot_w = 8'b00100000; grant_index_w = LOG_NUM_REQS'(5); end 11'b000_?100000?, 11'b001_?10000??, 11'b010_?1000???, @@ -377,7 +384,7 @@ module VX_rr_arbiter #( 11'b100_?10?????, 11'b101_?1??????, 11'b110_01000000, - 11'b111_?1000000: begin grant_index_w = LOG_NUM_REQS'(6); end + 11'b111_?1000000: begin grant_onehot_w = 8'b01000000; grant_index_w = LOG_NUM_REQS'(6); end 11'b000_1000000?, 11'b001_100000??, 11'b010_10000???, @@ -385,8 +392,8 @@ module VX_rr_arbiter #( 11'b100_100?????, 11'b101_10??????, 11'b110_1???????, - 11'b111_10000000: begin grant_index_w = LOG_NUM_REQS'(7); end - default: begin grant_index_w = 'x; end + 11'b111_10000000: begin grant_onehot_w = 8'b10000000; grant_index_w = LOG_NUM_REQS'(7); end + default: begin grant_onehot_w = 8'b00000000; grant_index_w = 'x; end endcase end @@ -399,7 +406,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (MODEL == 1) begin From 2ca343910969dafea2f011d6cbb5d650fa9048e7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 15:52:27 -0700 Subject: [PATCH 099/407] xrt runtime update --- runtime/xrt/vortex.cpp | 59 +++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 408bf23edd..8c273cf7f8 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -66,11 +66,14 @@ struct platform_info_t { }; static const platform_info_t g_platforms[] = { - {"vortex_xrtsim", 4, 0x10, 0x0}, // 64 KB banks - {"xilinx_u50", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_u200", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_u280", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_vck5000", 0, 0x21, 0xC000000000}, + {"vortex_xrtsim", 4, 16, 0x0}, // 16 x 64 KB = 1 MB + {"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 + {"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 + {"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 + {"xilinx_u280", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 + {"xilinx_u55c", 5, 29, 0x0}, // 32 x 512 MB = 16 GB HBM2 + {"xilinx_vck5000", 0, 33, 0xC000000000}, // 1 x 8 GB = 8 GB DDR4 + {"xilinx_kv260", 0, 32, 0x0}, // 1 x 4 GB = 4 GB DDR4 }; #ifdef CPP_API @@ -277,6 +280,8 @@ class vx_device { xrtDevice_ = xrtDevice; xrtKernel_ = xrtKernel; + printf("info: device name=%s.\n", device_name.c_str()); + CHECK_ERR(get_platform_info(device_name, &platform_), { fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str()); return err; @@ -286,9 +291,37 @@ class vx_device { return err; }); + CHECK_ERR(this->read_register(MMIO_DEV_ADDR, (uint32_t *)&dev_caps_), { + return err; + }); + + CHECK_ERR(this->read_register(MMIO_DEV_ADDR + 4, (uint32_t *)&dev_caps_ + 1), { + return err; + }); + + CHECK_ERR(this->read_register(MMIO_ISA_ADDR, (uint32_t *)&isa_caps_), { + return err; + }); + + CHECK_ERR(this->read_register(MMIO_ISA_ADDR + 4, (uint32_t *)&isa_caps_ + 1), { + return err; + }); + uint32_t num_banks = 1 << platform_.lg2_num_banks; uint64_t bank_size = 1ull << platform_.lg2_bank_size; + // adjust memory bank size to architecture limit + int isa_arch = VX_ISA_ARCH(isa_caps_); + if (isa_arch == 32) { + uint64_t max_mem_size = 1ull << 32; + uint64_t need_bank_size = max_mem_size / num_banks; + if (bank_size > need_bank_size) { + printf("info: adjusted bank size from 0x%lx to 0x%lx bytes.\n", bank_size, need_bank_size); + bank_size = need_bank_size; + platform_.lg2_bank_size = log2ceil(bank_size); + } + } + for (uint32_t i = 0; i < num_banks; ++i) { uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12); uint64_t reg_value = platform_.mem_base + i * bank_size; @@ -305,22 +338,6 @@ class vx_device { #endif } - CHECK_ERR(this->read_register(MMIO_DEV_ADDR, (uint32_t *)&dev_caps_), { - return err; - }); - - CHECK_ERR(this->read_register(MMIO_DEV_ADDR + 4, (uint32_t *)&dev_caps_ + 1), { - return err; - }); - - CHECK_ERR(this->read_register(MMIO_ISA_ADDR, (uint32_t *)&isa_caps_), { - return err; - }); - - CHECK_ERR(this->read_register(MMIO_ISA_ADDR + 4, (uint32_t *)&isa_caps_ + 1), { - return err; - }); - global_mem_size_ = num_banks * bank_size; #ifdef BANK_INTERLEAVE From 51719f69bb127ab08892941020e3e0ad1a65b6df Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 16:51:00 -0700 Subject: [PATCH 100/407] minor update --- hw/rtl/cache/VX_cache.sv | 12 ++++++------ hw/rtl/cache/VX_cache_bank.sv | 12 ++++++------ hw/rtl/cache/VX_cache_bypass.sv | 4 ++-- hw/rtl/cache/VX_cache_cluster.sv | 6 +++--- hw/rtl/libs/VX_stream_arb.sv | 1 - 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 60493665b4..90b34a1e41 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -93,8 +93,8 @@ module VX_cache import VX_gpu_pkg::*; #( localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; - localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); - localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); + localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); + localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1); localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; @@ -139,7 +139,7 @@ module VX_cache import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_REQS; ++i) begin VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), - .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), @@ -198,7 +198,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( .clk (clk), @@ -388,8 +388,8 @@ module VX_cache import VX_gpu_pkg::*; #( .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), - .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) + .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)), + .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) ) bank ( .clk (clk), .reset (bank_reset), diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 22d956dba8..19c24ad5d3 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -53,11 +53,11 @@ module VX_cache_bank #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, - // Core response output buffer - parameter CORE_OUT_BUF = 0, + // Core response output register + parameter CORE_OUT_REG = 0, - // Memory request output buffer - parameter MEM_OUT_BUF = 0, + // Memory request output register + parameter MEM_OUT_REG = 0, parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE), parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS), @@ -567,7 +567,7 @@ module VX_cache_bank #( VX_elastic_buffer #( .DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH), .SIZE (CRSQ_SIZE), - .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) + .OUT_REG (CORE_OUT_REG) ) core_rsp_queue ( .clk (clk), .reset (reset), @@ -632,7 +632,7 @@ module VX_cache_bank #( .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1), .DEPTH (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + .OUT_REG (MEM_OUT_REG) ) mem_req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index dc88c6c1fd..7992ec9e8d 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -219,7 +219,7 @@ module VX_cache_bypass #( VX_elastic_buffer #( .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( .clk (clk), @@ -307,7 +307,7 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(CORE_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 5e0010a8c8..a56c9a8173 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -125,8 +125,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_SEL_IDX (TAG_SEL_IDX), .ARBITER ("R"), .REQ_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0), - .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0) - ) cache_arb ( + .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? CORE_OUT_BUF : 0) + ) core_arb ( .clk (clk), .reset (reset), .bus_in_if (core_bus_tmp_if), @@ -186,7 +186,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (MEM_TAG_WIDTH), .TAG_SEL_IDX (TAG_SEL_IDX), .ARBITER ("R"), - .REQ_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0), + .REQ_OUT_BUF ((NUM_CACHES > 1) ? MEM_OUT_BUF : 0), .RSP_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0) ) mem_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 413da98f04..13cde1cd91 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -335,7 +335,6 @@ module VX_stream_arb #( // #Inputs == #Outputs for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), From 9718a5b405e644238edd876635c8d3ebd77929b2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 19:20:07 -0700 Subject: [PATCH 101/407] fpu timing optimization --- hw/rtl/fpu/VX_fpu_dsp.sv | 103 +++++++++++++++------------------ hw/rtl/libs/VX_onehot_shift.sv | 2 + hw/rtl/libs/VX_transpose.sv | 2 + 3 files changed, 51 insertions(+), 56 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 9e8edef095..5e37387857 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,51 +54,25 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); - localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; + localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) wire [NUM_FPCORES-1:0] per_core_ready_in; - wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; wire [NUM_FPCORES-1:0] per_core_ready_out; wire [NUM_FPCORES-1:0] per_core_valid_out; wire [NUM_FPCORES-1:0] per_core_has_fflags; fflags_t [NUM_FPCORES-1:0] per_core_fflags; - wire div_ready_in, sqrt_ready_in; - wire [NUM_LANES-1:0][31:0] div_result, sqrt_result; - wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out; - wire div_ready_out, sqrt_ready_out; - wire div_valid_out, sqrt_valid_out; - wire div_has_fflags, sqrt_has_fflags; - fflags_t div_fflags, sqrt_fflags; - - reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed; - - wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; - - always @(*) begin - is_madd = 'x; - is_sub = 'x; - is_neg = 'x; - is_div = 'x; - is_itof = 'x; - is_signed = 'x; - case (op_type) - `INST_FPU_ADD: begin is_madd = 0; is_neg = 0; is_sub = fmt[1]; end - `INST_FPU_MUL: begin is_madd = 0; is_neg = 1; is_sub = 0; end - `INST_FPU_MADD: begin is_madd = 1; is_neg = 0; is_sub = fmt[1]; end - `INST_FPU_NMADD: begin is_madd = 1; is_neg = 1; is_sub = fmt[1]; end - `INST_FPU_DIV: begin is_div = 1; end - `INST_FPU_SQRT: begin is_div = 0; end - `INST_FPU_F2I: begin is_itof = 0; is_signed = 1; end - `INST_FPU_F2U: begin is_itof = 0; is_signed = 0; end - `INST_FPU_I2F: begin is_itof = 1; is_signed = 1; end - `INST_FPU_U2F: begin is_itof = 1; is_signed = 0; end - default: begin end - endcase - end + wire [1:0] div_sqrt_ready_in; + wire [1:0][NUM_LANES*`XLEN-1:0] div_sqrt_result; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; + wire [1:0] div_sqrt_ready_out; + wire [1:0] div_sqrt_valid_out; + wire [1:0] div_sqrt_has_fflags; + fflags_t [1:0] div_sqrt_fflags; `RESET_RELAY (fma_reset, reset); `RESET_RELAY (div_reset, reset); @@ -120,7 +94,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (datab) `UNUSED_VAR (datac) + // Decode instruction type + wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; + wire is_sqrt = op_type[0]; + wire is_itof = op_type[1]; + wire is_signed = ~op_type[0]; + wire is_madd = op_type[1]; + wire is_neg = op_type[0]; + wire is_sub = fmt[1]; + // can accept new request? + assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; assign ready_in = per_core_ready_in[core_select]; VX_fpu_fma #( @@ -154,19 +138,19 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_div ( .clk (clk), .reset (div_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div), - .ready_in (div_ready_in), + .valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_sqrt), + .ready_in (div_sqrt_ready_in[0]), .mask_in (mask_in), .tag_in (tag_in), .frm (frm), .dataa (dataa_s), .datab (datab_s), - .has_fflags (div_has_fflags), - .fflags (div_fflags), - .result (div_result), - .tag_out (div_tag_out), - .valid_out (div_valid_out), - .ready_out (div_ready_out) + .has_fflags (div_sqrt_has_fflags[0]), + .fflags (div_sqrt_fflags[0]), + .result (div_sqrt_result[0]), + .tag_out (div_sqrt_tag_out[0]), + .valid_out (div_sqrt_valid_out[0]), + .ready_out (div_sqrt_ready_out[0]) ); VX_fpu_sqrt #( @@ -175,18 +159,18 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_sqrt ( .clk (clk), .reset (sqrt_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div), - .ready_in (sqrt_ready_in), + .valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_sqrt), + .ready_in (div_sqrt_ready_in[1]), .mask_in (mask_in), .tag_in (tag_in), .frm (frm), .dataa (dataa_s), - .has_fflags (sqrt_has_fflags), - .fflags (sqrt_fflags), - .result (sqrt_result), - .tag_out (sqrt_tag_out), - .valid_out (sqrt_valid_out), - .ready_out (sqrt_ready_out) + .has_fflags (div_sqrt_has_fflags[1]), + .fflags (div_sqrt_fflags[1]), + .result (div_sqrt_result[1]), + .tag_out (div_sqrt_tag_out[1]), + .valid_out (div_sqrt_valid_out[1]), + .ready_out (div_sqrt_ready_out[1]) ); wire cvt_ret_int_in = ~is_itof; @@ -246,7 +230,15 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - assign per_core_ready_in[FPU_DIVSQRT] = is_div ? div_ready_in : sqrt_ready_in; + wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; + for (genvar i = 0; i < 2; ++i) begin + assign div_sqrt_arb_data_in[i] = { + div_sqrt_result[i], + div_sqrt_has_fflags[i], + div_sqrt_fflags[i], + div_sqrt_tag_out[i] + }; + end VX_stream_arb #( .NUM_INPUTS (2), @@ -256,10 +248,9 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) div_sqrt_arb ( .clk (clk), .reset (reset), - .valid_in ({sqrt_valid_out, div_valid_out}), - .ready_in ({sqrt_ready_out, div_ready_out}), - .data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out}, - {div_result, div_has_fflags, div_fflags, div_tag_out}}), + .valid_in (div_sqrt_valid_out), + .ready_in (div_sqrt_ready_out), + .data_in (div_sqrt_arb_data_in), .data_out ({ per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], diff --git a/hw/rtl/libs/VX_onehot_shift.sv b/hw/rtl/libs/VX_onehot_shift.sv index 950d1f380e..5ab5712a21 100644 --- a/hw/rtl/libs/VX_onehot_shift.sv +++ b/hw/rtl/libs/VX_onehot_shift.sv @@ -13,6 +13,7 @@ `include "VX_platform.vh" +`TRACING_OFF module VX_onehot_shift #( parameter N = 1, parameter M = 1 @@ -28,3 +29,4 @@ module VX_onehot_shift #( end endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv index 93a8c16835..7b2c273ef0 100644 --- a/hw/rtl/libs/VX_transpose.sv +++ b/hw/rtl/libs/VX_transpose.sv @@ -13,6 +13,7 @@ `include "VX_platform.vh" +`TRACING_OFF module VX_transpose #( parameter N = 1, parameter M = 1 @@ -27,3 +28,4 @@ module VX_transpose #( end endmodule +`TRACING_ON From 6d5e71a062424f07a3ca80fff4900c17e779b9f0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 20:12:05 -0700 Subject: [PATCH 102/407] minor update --- hw/rtl/core/VX_decode.sv | 7 ++++--- hw/rtl/core/VX_schedule.sv | 4 ++-- hw/rtl/interfaces/VX_decode_sched_if.sv | 10 +++++----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index d3ca4d6e4a..897dfcc11f 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -557,9 +557,10 @@ module VX_decode import VX_gpu_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; - assign decode_sched_if.valid = fetch_fire; - assign decode_sched_if.wid = fetch_if.data.wid; - assign decode_sched_if.is_wstall = is_wstall; + assign decode_sched_if.valid = fetch_fire; + assign decode_sched_if.wid = fetch_if.data.wid; + assign decode_sched_if.unlock = ~is_wstall; + `ifndef L1_ENABLE assign fetch_if.ibuf_pop = decode_if.ibuf_pop; `endif diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 9cdf879eb8..fbe0bd9599 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -189,7 +189,7 @@ module VX_schedule import VX_gpu_pkg::*; #( end // decode unlock - if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin + if (decode_sched_if.valid && decode_sched_if.unlock) begin stalled_warps_n[decode_sched_if.wid] = 0; end @@ -415,7 +415,7 @@ module VX_schedule import VX_gpu_pkg::*; #( timeout_ctr <= '0; timeout_enable <= 0; end else begin - if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin + if (decode_sched_if.valid && decode_sched_if.unlock) begin timeout_enable <= 1; end if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin diff --git a/hw/rtl/interfaces/VX_decode_sched_if.sv b/hw/rtl/interfaces/VX_decode_sched_if.sv index b82aafb557..1f47c30e94 100644 --- a/hw/rtl/interfaces/VX_decode_sched_if.sv +++ b/hw/rtl/interfaces/VX_decode_sched_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,18 +16,18 @@ interface VX_decode_sched_if (); wire valid; - wire is_wstall; + wire unlock; wire [`NW_WIDTH-1:0] wid; modport master ( output valid, - output is_wstall, + output unlock, output wid ); modport slave ( input valid, - input is_wstall, + input unlock, input wid ); From 5adfd5ec68a4851121d68a1ac82d8d940f5ee9a3 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 26 Aug 2024 23:45:00 -0700 Subject: [PATCH 103/407] minor update --- hw/rtl/VX_socket.sv | 2 +- hw/rtl/core/VX_alu_muldiv.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 2 +- hw/rtl/fpu/VX_fpu_dpi.sv | 2 +- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- hw/rtl/libs/VX_priority_encoder.sv | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 9ed76814bc..54822176da 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -184,7 +184,7 @@ module VX_socket import VX_gpu_pkg::*; #( .DATA_SIZE (`L1_LINE_SIZE), .TAG_WIDTH (L1_MEM_TAG_WIDTH), .TAG_SEL_IDX (0), - .ARBITER ("R"), + .ARBITER ("P"), // prioritize the icache .REQ_OUT_BUF (3), .RSP_OUT_BUF (3) ) mem_arb ( diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 650c278336..8e3a1ba4fb 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -324,7 +324,7 @@ module VX_alu_muldiv #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (1) ) rsp_buf ( .clk (clk), diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 7a7e9e2db4..9f1695a288 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -50,7 +50,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), .RSP_OUT_BUF (1), - .ARBITER ("R") + .ARBITER ("P") ) lmem_switch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 0ba7d54f37..9670241b3b 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -430,7 +430,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (0) ) div_sqrt_arb ( .clk (clk), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 5e37387857..bfe0baa054 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -243,7 +243,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (0) ) div_sqrt_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 3dc5291ee2..a3928492a9 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -43,7 +43,7 @@ module VX_priority_encoder #( end else if (N == 2) begin - assign onehot_out = {~reversed[0], reversed[0]}; + assign onehot_out = {reversed[1] && ~reversed[0], reversed[0]}; assign index_out = ~reversed[0]; assign valid_out = (| reversed); From 4480ed8b0e3e03a3081e0e6afdcabad22f3de8f7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 27 Aug 2024 01:19:02 -0700 Subject: [PATCH 104/407] minor update --- hw/rtl/libs/VX_cyclic_arbiter.sv | 9 ++++++--- hw/rtl/mem/VX_local_mem.sv | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index e134bea912..a6673c8b7e 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -41,6 +41,7 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; wire [LOG_NUM_REQS-1:0] grant_index_um; + wire [NUM_REQS-1:0] grant_onehot_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin @@ -59,13 +60,15 @@ module VX_cyclic_arbiter #( .N (NUM_REQS) ) priority_encoder ( .data_in (requests), - `UNUSED_PIN (onehot_out), + .onehot_out (grant_onehot_um), .index_out (grant_index_um), .valid_out (grant_valid) ); - assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; + wire is_hit = requests[grant_index_r]; + + assign grant_index = is_hit ? grant_index_r : grant_index_um; + assign grant_onehot = is_hit ? (NUM_REQS'(1) << grant_index) : grant_onehot_um; end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 72e55fe8be..6f0c1270e5 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -123,7 +123,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("R"), + .ARBITER ("C"), .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), From c162d04b8fe7604d6672af535a78561ec5dcb21d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 27 Aug 2024 03:17:01 -0700 Subject: [PATCH 105/407] minor update --- hw/rtl/fpu/VX_fpu_dsp.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index bfe0baa054..fcf94591cc 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,12 +54,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); - localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH; + localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) wire [NUM_FPCORES-1:0] per_core_ready_in; - wire [NUM_FPCORES-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; wire [NUM_FPCORES-1:0] per_core_ready_out; wire [NUM_FPCORES-1:0] per_core_valid_out; @@ -67,7 +67,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES*`XLEN-1:0] div_sqrt_result; + wire [1:0][NUM_LANES*32:0] div_sqrt_result; wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; wire [1:0] div_sqrt_ready_out; wire [1:0] div_sqrt_valid_out; From 91b8c6e67a0634b313d6d4af1cf111e91f925b1e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 00:40:28 -0700 Subject: [PATCH 106/407] fixed xilinx fpu ip dut synthesis --- hw/syn/xilinx/dut/common.mk | 6 ++- hw/syn/xilinx/dut/core/Makefile | 1 + hw/syn/xilinx/dut/fpu/Makefile | 1 + hw/syn/xilinx/dut/project.tcl | 50 +++++++++++++------ hw/syn/xilinx/dut/top/Makefile | 1 + hw/syn/xilinx/dut/vortex/Makefile | 1 + hw/syn/xilinx/sandbox/project.tcl.in | 4 +- hw/syn/xilinx/{xrt => }/scripts/gen_ip.tcl | 23 +++++---- hw/syn/xilinx/{xrt => }/scripts/gen_xo.tcl | 0 hw/syn/xilinx/{xrt => scripts}/kill_build.sh | 0 .../xilinx/{xrt => scripts}/kill_hwserver.sh | 0 hw/syn/xilinx/{xrt => scripts}/kill_sim.sh | 0 .../{xrt => }/scripts/package_kernel.tcl | 12 ++--- hw/syn/xilinx/{xrt => }/scripts/xsim.tcl | 0 hw/syn/xilinx/xrt/Makefile | 2 +- 15 files changed, 69 insertions(+), 32 deletions(-) rename hw/syn/xilinx/{xrt => }/scripts/gen_ip.tcl (86%) rename hw/syn/xilinx/{xrt => }/scripts/gen_xo.tcl (100%) rename hw/syn/xilinx/{xrt => scripts}/kill_build.sh (100%) rename hw/syn/xilinx/{xrt => scripts}/kill_hwserver.sh (100%) rename hw/syn/xilinx/{xrt => scripts}/kill_sim.sh (100%) rename hw/syn/xilinx/{xrt => }/scripts/package_kernel.tcl (98%) rename hw/syn/xilinx/{xrt => }/scripts/xsim.tcl (100%) diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index b435b14090..f0588ede80 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -25,7 +25,11 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt - $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) +ifdef FPU_IP + FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts +else + $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts +endif clean: rm -rf project_1 diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index 86bb0b53c2..deda5cce9e 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -1,6 +1,7 @@ PROJECT = VX_core_top TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index 133a8a4e93..bb66103752 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -1,6 +1,7 @@ PROJECT = VX_fpu_dsp TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index c3e7e431c4..bd9cb02e70 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -14,9 +14,9 @@ # Start time set start_time [clock seconds] -if { $::argc != 5 } { +if { $::argc != 6 } { puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" + puts "Usage: $::argv0 \n" exit } @@ -28,6 +28,7 @@ set device_part [lindex $::argv 1] set vcs_file [lindex $::argv 2] set xdc_file [lindex $::argv 3] set tool_dir [lindex $::argv 4] +set script_dir [lindex $::argv 5] #puts top_module #puts $device_part @@ -35,6 +36,14 @@ set tool_dir [lindex $::argv 4] #puts xdc_file #puts $tool_dir +# create fpu ip +if {[info exists ::env(FPU_IP)]} { + set ip_dir $::env(FPU_IP) + set argv [list $ip_dir $device_part] + set argc 2 + source ${script_dir}/gen_ip.tcl +} + source "${tool_dir}/parse_vcs_list.tcl" set vlist [parse_vcs_list "${vcs_file}"] @@ -61,25 +70,38 @@ foreach def $vdefines_list { set_property verilog_define $def $obj } +# add fpu ip +if {[info exists ::env(FPU_IP)]} { + set ip_dir $::env(FPU_IP) + add_files -norecurse -verbose ${ip_dir}/xil_fma/xil_fma.xci + add_files -norecurse -verbose ${ip_dir}/xil_fdiv/xil_fdiv.xci + add_files -norecurse -verbose ${ip_dir}/xil_fsqrt/xil_fsqrt.xci +} + +update_compile_order -fileset sources_1 + +set_property top $top_module [current_fileset] +set_property \ + -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \ + -value {-mode out_of_context -flatten_hierarchy "rebuilt"} \ + -objects [get_runs synth_1] + # Synthesis -synth_design -top $top_module -include_dirs $vincludes_list -mode out_of_context -flatten_hierarchy none +launch_runs synth_1 +wait_on_run synth_1 +open_run synth_1 write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages -# Optimize -opt_design +# Implementation +launch_runs impl_1 +wait_on_run impl_1 +open_run impl_1 +write_checkpoint -force post_impl.dcp -# Place -place_design -write_checkpoint -force post_place.dcp +# Generate the synthesis report report_place_status -file place.rpt - -# Route -route_design -write_checkpoint -force post_route.dcp report_route_status -file route.rpt - -# Generate the synthesis report report_timing_summary -file timing.rpt report_power -file power.rpt report_drc -file drc.rpt diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index bc55224f64..0480b08e52 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -1,6 +1,7 @@ PROJECT = vortex_afu TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index ee49be4367..e2525fae23 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -1,6 +1,7 @@ PROJECT = Vortex TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 7a25f6278b..0e9a23f0a9 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -397,14 +397,16 @@ update_compile_order -fileset sources_1 launch_runs synth_1 wait_on_run synth_1 open_run synth_1 +write_checkpoint -force post_synth.dcp +report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages # Implementation launch_runs impl_1 wait_on_run impl_1 open_run impl_1 +write_checkpoint -force post_impl.dcp # Generate reports -report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages report_place_status -file place.rpt report_route_status -file route.rpt report_timing_summary -file timing.rpt diff --git a/hw/syn/xilinx/xrt/scripts/gen_ip.tcl b/hw/syn/xilinx/scripts/gen_ip.tcl similarity index 86% rename from hw/syn/xilinx/xrt/scripts/gen_ip.tcl rename to hw/syn/xilinx/scripts/gen_ip.tcl index 5aae6db74c..a1048fc77e 100644 --- a/hw/syn/xilinx/xrt/scripts/gen_ip.tcl +++ b/hw/syn/xilinx/scripts/gen_ip.tcl @@ -1,31 +1,36 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -if { $::argc != 1 } { - puts "ERROR: Program \"$::argv0\" requires 1 arguments!\n" - puts "Usage: $::argv0 \n" +if { $::argc < 1 || $::argc > 2 } { + puts "ERROR: Program \"$::argv0\" requires 1 or 2 arguments!\n" + puts "Usage: $::argv0 []\n" exit } set ip_dir [lindex $::argv 0] +# create_ip requires that a project is open in memory. +if { $::argc == 2 } { + set device_part [lindex $::argv 1] + create_project -in_memory -part $device_part +} else { + # Create project without specifying a device part + create_project -in_memory +} + # IP folder does not exist. Create IP folder file mkdir ${ip_dir} -# create_ip requires that a project is open in memory. -# Create project but don't do anything with it -create_project -in_memory - create_ip -name floating_point -vendor xilinx.com -library ip -version 7.1 -module_name xil_fdiv -dir ${ip_dir} set_property -dict [list CONFIG.Component_Name {xil_fdiv} CONFIG.Operation_Type {Divide} CONFIG.Flow_Control {NonBlocking} CONFIG.Has_ACLKEN {true} CONFIG.C_Has_UNDERFLOW {true} CONFIG.C_Has_OVERFLOW {true} CONFIG.C_Has_INVALID_OP {true} CONFIG.C_Has_DIVIDE_BY_ZERO {true} CONFIG.A_Precision_Type {Single} CONFIG.C_A_Exponent_Width {8} CONFIG.C_A_Fraction_Width {24} CONFIG.Result_Precision_Type {Single} CONFIG.C_Result_Exponent_Width {8} CONFIG.C_Result_Fraction_Width {24} CONFIG.C_Mult_Usage {No_Usage} CONFIG.Has_RESULT_TREADY {false} CONFIG.C_Latency {28} CONFIG.C_Rate {1}] [get_ips xil_fdiv] diff --git a/hw/syn/xilinx/xrt/scripts/gen_xo.tcl b/hw/syn/xilinx/scripts/gen_xo.tcl similarity index 100% rename from hw/syn/xilinx/xrt/scripts/gen_xo.tcl rename to hw/syn/xilinx/scripts/gen_xo.tcl diff --git a/hw/syn/xilinx/xrt/kill_build.sh b/hw/syn/xilinx/scripts/kill_build.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_build.sh rename to hw/syn/xilinx/scripts/kill_build.sh diff --git a/hw/syn/xilinx/xrt/kill_hwserver.sh b/hw/syn/xilinx/scripts/kill_hwserver.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_hwserver.sh rename to hw/syn/xilinx/scripts/kill_hwserver.sh diff --git a/hw/syn/xilinx/xrt/kill_sim.sh b/hw/syn/xilinx/scripts/kill_sim.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_sim.sh rename to hw/syn/xilinx/scripts/kill_sim.sh diff --git a/hw/syn/xilinx/xrt/scripts/package_kernel.tcl b/hw/syn/xilinx/scripts/package_kernel.tcl similarity index 98% rename from hw/syn/xilinx/xrt/scripts/package_kernel.tcl rename to hw/syn/xilinx/scripts/package_kernel.tcl index 607e7955dd..c88bca2296 100644 --- a/hw/syn/xilinx/xrt/scripts/package_kernel.tcl +++ b/hw/syn/xilinx/scripts/package_kernel.tcl @@ -1,10 +1,10 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -51,15 +51,15 @@ create_project -force kernel_pack $path_to_tmp_project add_files -norecurse ${vsources_list} set obj [get_filesets sources_1] -set files [list \ +set ip_files [list \ [file normalize "${build_dir}/ip/xil_fdiv/xil_fdiv.xci"] \ [file normalize "${build_dir}/ip/xil_fma/xil_fma.xci"] \ [file normalize "${build_dir}/ip/xil_fsqrt/xil_fsqrt.xci"] \ ] -add_files -verbose -norecurse -fileset $obj $files +add_files -verbose -norecurse -fileset $obj $ip_files set_property include_dirs ${vincludes_list} [current_fileset] -#set_property verilog_define ${vdefines_list} [current_fileset] +set_property verilog_define ${vdefines_list} [current_fileset] set obj [get_filesets sources_1] set_property -verbose -name "top" -value ${krnl_name} -objects $obj @@ -238,7 +238,7 @@ for {set i 0} {$i < 1} {incr i} { set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] set_property address_offset [expr {0x040 + $i * 12}] $reg set_property size [expr {8*8}] $reg - set regparam [::ipx::add_register_parameter -quiet {ASSOCIATED_BUSIF} $reg] + set regparam [::ipx::add_register_parameter -quiet {ASSOCIATED_BUSIF} $reg] set_property value m_axi_mem_$i $regparam } diff --git a/hw/syn/xilinx/xrt/scripts/xsim.tcl b/hw/syn/xilinx/scripts/xsim.tcl similarity index 100% rename from hw/syn/xilinx/xrt/scripts/xsim.tcl rename to hw/syn/xilinx/scripts/xsim.tcl diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index e1acce8d65..9e86bd1a5d 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -163,7 +163,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/../scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) From f4426e012704082f069b8ee6e2e50d0150edef32 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 01:27:51 -0700 Subject: [PATCH 107/407] fpu timing optimization --- hw/rtl/fpu/VX_fpu_cvt.sv | 2 +- hw/rtl/fpu/VX_fpu_div.sv | 4 ++-- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- hw/rtl/fpu/VX_fpu_ncp.sv | 2 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 4 ++-- hw/rtl/libs/VX_pe_serializer.sv | 30 ++++++++++++------------------ 6 files changed, 19 insertions(+), 25 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 7587f8342a..a2bf939882 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 68138bb7cf..9cdea7867d 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -67,8 +67,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(2*32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index ce99138cbe..c42de701c1 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -99,7 +99,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index bfc69316b9..225033e1e4 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 425f43d6e2..c6961e1dbb 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -61,8 +61,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 8ae7900b50..7a891cfc77 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -115,31 +115,25 @@ module VX_pe_serializer #( end end - reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r; - reg [TAG_WIDTH-1:0] tag_out_r; - reg valid_out_r; + reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r, data_out_n; - wire valid_out_b = pe_valid_in && batch_out_done; - wire ready_out_b = ready_out_u || ~valid_out_u; + always @(*) begin + data_out_n = data_out_r; + if (pe_valid_in) begin + data_out_n[batch_out_idx] = pe_data_in; + end + end always @(posedge clk) begin - if (reset) begin - valid_out_r <= 1'b0; - end else if (ready_out_b) begin - valid_out_r <= valid_out_b; - end - if (ready_out_b) begin - data_out_r[batch_out_idx] <= pe_data_in; - tag_out_r <= pe_tag_in; - end + data_out_r <= data_out_n; end - assign enable = ready_out_b || ~valid_out_b; + assign enable = ready_out_u || ~batch_out_done; assign ready_in = enable && batch_in_done; - assign valid_out_u = valid_out_r; - assign data_out_u = data_out_r; - assign tag_out_u = tag_out_r; + assign valid_out_u = batch_out_done; + assign data_out_u = data_out_n; + assign tag_out_u = pe_tag_in; end else begin From cf42025c205a71864e0f993aadb05dbdef448476 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 01:35:55 -0700 Subject: [PATCH 108/407] minor update --- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index fcf94591cc..b692d2cda1 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -67,7 +67,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES*32:0] div_sqrt_result; + wire [1:0][NUM_LANES*32-1:0] div_sqrt_result; wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; wire [1:0] div_sqrt_ready_out; wire [1:0] div_sqrt_valid_out; From 4cc7426c441c6552736223dfa1f2973f74e0326c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 02:52:20 -0700 Subject: [PATCH 109/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 6 +++--- hw/rtl/cache/VX_cache_data.sv | 4 ++-- hw/rtl/cache/VX_cache_wrap.sv | 6 +++--- hw/rtl/libs/VX_mem_adapter.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 2 +- hw/rtl/mem/VX_local_mem.sv | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 19c24ad5d3..883a561a11 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -673,7 +673,7 @@ module VX_cache_bank #( end if (core_req_fire) begin if (core_req_rw) - `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); + `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); else `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); end @@ -682,9 +682,9 @@ module VX_cache_bank #( end if (mreq_queue_push) begin if (do_creq_wr_st1 && !WRITEBACK) - `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); + `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); else if (do_writeback_st1) - `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); + `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); else `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index efc873f411..318463f76a 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -185,13 +185,13 @@ module VX_cache_data #( `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)); + `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)); end if (read && ~stall) begin `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)); end if (write && ~stall) begin - `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); + `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); end end `endif diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 3b1076d46f..513c29b5d1 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -158,7 +158,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( end if (PASSTHRU == 0) begin : cache_if - + VX_cache #( .INSTANCE_ID (INSTANCE_ID), .CACHE_SIZE (CACHE_SIZE), @@ -234,7 +234,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); + `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); else `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); end @@ -261,7 +261,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin if (mem_bus_if.req_data.rw) - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); else `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 263df0159f..068628be28 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -153,7 +153,7 @@ module VX_mem_adapter #( end assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out; `RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out), - ("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out)) + ("%t: *** out-of-order memory reponse! cur=0x%0h, expected=0x%0h", $time, mem_rsp_tag_in_x, mem_rsp_tag_out)) wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr}; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 17e5923bdf..75563b71b5 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -360,7 +360,7 @@ module VX_mem_coalescer #( `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); if ($countones(out_req_pmask) > 1) begin - `TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); + `TRACE(1, ("%t: *** %s: coalesced=%0d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); end end if (out_rsp_fire) begin diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 6f0c1270e5..2ba09fd611 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -334,7 +334,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); end else begin `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", @@ -352,7 +352,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin - `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); end else begin `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", From 6c1e7850046c606aac2e1b1d6b56ea31141e2def Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 03:08:08 -0700 Subject: [PATCH 110/407] minor update --- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 11 ++++------- hw/rtl/libs/VX_mem_scheduler.sv | 12 ++++++------ hw/rtl/mem/VX_gbar_unit.sv | 14 +++++++------- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 47bfcc6bf6..06acfde397 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -194,7 +194,7 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (br_enable) begin - `TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)); end end diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 75563b71b5..e15d065642 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -342,7 +342,7 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); + `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE(1, (", flags=")); `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); @@ -351,20 +351,17 @@ module VX_mem_coalescer #( `TRACE(1, (", data=")); `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); end else begin - `TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); + `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE(1, (", flags=")); `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); end `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); - `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); - if ($countones(out_req_pmask) > 1) begin - `TRACE(1, ("%t: *** %s: coalesced=%0d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); - end + `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)); end if (out_rsp_fire) begin - `TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); + `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS); `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS); diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 5324d7ffa7..c5b3021776 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -584,39 +584,39 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); + `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS); `TRACE(1, (", data=")); `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); end else begin - `TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); + `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); end `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s-core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); + `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS); `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)); end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%d: %s-mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); + `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS); `TRACE(1, (", data=")); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); end else begin - `TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); + `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); end `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)); end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); + `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS); `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)); end diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index a6e5d9baab..3e5bbebcb5 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_gbar_unit #( +module VX_gbar_unit #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, @@ -26,7 +26,7 @@ module VX_gbar_unit #( reg [`NB_WIDTH-1:0][`NUM_CORES-1:0] barrier_masks; wire [`CLOG2(`NUM_CORES+1)-1:0] active_barrier_count; wire [`NUM_CORES-1:0] curr_barrier_mask = barrier_masks[gbar_bus_if.req_id]; - + `POP_COUNT(active_barrier_count, curr_barrier_mask); `UNUSED_VAR (active_barrier_count) @@ -56,15 +56,15 @@ module VX_gbar_unit #( assign gbar_bus_if.rsp_valid = rsp_valid; assign gbar_bus_if.rsp_id = rsp_bar_id; assign gbar_bus_if.req_ready = 1; // global barrier unit is always ready (no dependencies) - + `ifdef DBG_TRACE_GBAR always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin - `TRACE(1, ("%d: %s-acquire: bar_id=%0d, size=%0d, core_id=%0d\n", + `TRACE(1, ("%d: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)); end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%d: %s-release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)); + `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)); end end `endif From 74a47ebbe473b80f9eb0f191944b1eb949804aaf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 04:36:13 -0700 Subject: [PATCH 111/407] displatch unit fix --- hw/rtl/core/VX_dispatch_unit.sv | 20 ++++++++++++++------ hw/rtl/core/VX_lsu_slice.sv | 4 ++-- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 5e6893e97a..0bd4b45c4d 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -68,8 +68,9 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( // batch select logic logic [BATCH_COUNT_W-1:0] batch_idx; - if (BATCH_COUNT != 1) begin + if (BATCH_COUNT != 1) begin + wire [BATCH_COUNT_W-1:0] batch_idx_n; wire [BATCH_COUNT-1:0] valid_batches; for (genvar i = 0; i < BATCH_COUNT; ++i) begin assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE]; @@ -82,12 +83,19 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .requests (valid_batches), - .grant_index (batch_idx), + .grant_index (batch_idx_n), `UNUSED_PIN (grant_onehot), `UNUSED_PIN (grant_valid), .grant_ready (batch_done) ); + always @(posedge clk) begin + if (reset) begin + batch_idx <= '0; + end else if (batch_done) begin + batch_idx <= batch_idx_n; + end + end end else begin assign batch_idx = 0; `UNUSED_VAR (batch_done) @@ -98,12 +106,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); end - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : blocks wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx]; wire valid_p, ready_p; - if (`NUM_THREADS != NUM_LANES) begin + if (`NUM_THREADS != NUM_LANES) begin : threads_split reg [NUM_PACKETS-1:0] sent_mask_p; wire [PID_WIDTH-1:0] start_p_n, start_p, end_p; wire dispatch_valid_r; @@ -206,7 +214,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( end else begin assign block_ready[block_idx] = ready_p && block_enable; end - assign block_done[block_idx] = ~dispatch_valid[issue_idx] || fire_eop; + assign block_done[block_idx] = fire_eop || ~dispatch_valid[issue_idx]; end else begin assign valid_p = dispatch_valid[issue_idx]; assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS]; @@ -217,7 +225,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign block_sop[block_idx] = 1'b1; assign block_eop[block_idx] = 1'b1; assign block_ready[block_idx] = ready_p; - assign block_done[block_idx] = ~valid_p || ready_p; + assign block_done[block_idx] = ready_p || ~valid_p; end wire [ISSUE_ISW_W-1:0] isw; diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 8c277f3e93..25a8223a80 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -517,13 +517,13 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid)); + `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); end else begin `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); `TRACE(1, (", flags=")); `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid)); + `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); end end if (mem_rsp_fire) begin diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index b692d2cda1..0f0e551b78 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -67,7 +67,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES*32-1:0] div_sqrt_result; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result; wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; wire [1:0] div_sqrt_ready_out; wire [1:0] div_sqrt_valid_out; From 41e41c9688ee04e7af107a96836f6469d22b15c1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 18:46:30 -0700 Subject: [PATCH 112/407] adjust SimX's split/join to match RTL. --- sim/simx/emulator.cpp | 13 +------------ sim/simx/emulator.h | 13 +++++++++---- sim/simx/execute.cpp | 15 ++++++++------- tests/regression/dogfood/main.cpp | 2 +- 4 files changed, 19 insertions(+), 24 deletions(-) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 7ed9a10f9a..14e213ba68 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -30,17 +30,6 @@ using namespace vortex; -Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask, Word PC) - : tmask(tmask) - , PC(PC) - , fallthrough(false) -{} - -Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask) - : tmask(tmask) - , fallthrough(true) -{} - Emulator::warp_t::warp_t(const Arch& arch) : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) @@ -85,7 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) - , ipdom_size_((arch.num_threads()-1) * 2) + , ipdom_size_(arch.num_threads()-1) { this->clear(); } diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index de466d3520..c37bbd68be 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -57,10 +57,15 @@ class Emulator { private: struct ipdom_entry_t { - ipdom_entry_t(const ThreadMask &tmask, Word PC); - ipdom_entry_t(const ThreadMask &tmask); - - ThreadMask tmask; + ipdom_entry_t(const ThreadMask &orig_tmask, const ThreadMask &else_tmask, Word PC) + : orig_tmask (orig_tmask) + , else_tmask (else_tmask) + , PC (PC) + , fallthrough(false) + {} + + ThreadMask orig_tmask; + ThreadMask else_tmask; Word PC; bool fallthrough; }; diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index db098726b7..9f7e6a74a0 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1347,11 +1347,9 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } else { next_tmask = else_tmask; } - // push reconvergence thread mask onto the stack - warp.ipdom_stack.emplace(warp.tmask); - // push not taken thread mask onto the stack + // push reconvergence and not-taken thread mask onto the stack auto ntaken_tmask = ~next_tmask & warp.tmask; - warp.ipdom_stack.emplace(ntaken_tmask, next_pc); + warp.ipdom_stack.emplace(warp.tmask, ntaken_tmask, next_pc); } // return divergent state for (uint32_t t = thread_start; t < num_threads; ++t) { @@ -1372,11 +1370,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::cout << "IPDOM stack is empty!\n" << std::flush; std::abort(); } - next_tmask = warp.ipdom_stack.top().tmask; - if (!warp.ipdom_stack.top().fallthrough) { + if (warp.ipdom_stack.top().fallthrough) { + next_tmask = warp.ipdom_stack.top().orig_tmask; + warp.ipdom_stack.pop(); + } else { + next_tmask = warp.ipdom_stack.top().else_tmask; next_pc = warp.ipdom_stack.top().PC; + warp.ipdom_stack.top().fallthrough = true; } - warp.ipdom_stack.pop(); } } break; case 4: { diff --git a/tests/regression/dogfood/main.cpp b/tests/regression/dogfood/main.cpp index 1fcf9d5111..d308821f0a 100644 --- a/tests/regression/dogfood/main.cpp +++ b/tests/regression/dogfood/main.cpp @@ -12,7 +12,7 @@ TestSuite* testSuite = nullptr; const char* kernel_file = "kernel.vxbin"; -int count = 1; +int count = 64; std::unordered_set selected; std::unordered_set excluded; int testid_s = 0; From 0f41774fea57748fda1bcea2ea02fe88f19f946a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 19:07:15 -0700 Subject: [PATCH 113/407] SimX's decode minor fix --- sim/simx/decode.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index dba57c4ef2..c8af41b265 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -460,6 +460,11 @@ std::shared_ptr Emulator::decode(uint32_t code) const { switch (op) { case Opcode::FCI: switch (func7) { + case 0x20: // FCVT.S.D + case 0x21: // FCVT.D.S + instr->setDestReg(rd, RegType::Float); + instr->addSrcReg(rs1, RegType::Float); + break; case 0x2c: // FSQRT.S case 0x2d: // FSQRT.D instr->setDestReg(rd, RegType::Float); From a38960674ef2dca89f3d802bf532a03d0daca11f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 21:10:05 -0700 Subject: [PATCH 114/407] SimX split.N fix --- sim/simx/decode.cpp | 12 ++++++------ sim/simx/emulator.cpp | 6 ++---- sim/simx/execute.cpp | 7 ++----- sim/simx/types.h | 21 +++++++++++++++++++++ 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index c8af41b265..795a05eed0 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -86,7 +86,7 @@ static const char* op_string(const Instr &instr) { auto func3 = instr.getFunc3(); auto func7 = instr.getFunc7(); auto rd = instr.getRDest(); - auto rs2 = instr.getRSrc(1); + auto rs1 = instr.getRSrc(1); auto imm = instr.getImm(); switch (opcode) { @@ -343,7 +343,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x60: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.W.S"; case 1: return "FCVT.WU.S"; case 2: return "FCVT.L.S"; @@ -352,7 +352,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x61: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.W.D"; case 1: return "FCVT.WU.D"; case 2: return "FCVT.L.D"; @@ -361,7 +361,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x68: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.S.W"; case 1: return "FCVT.S.WU"; case 2: return "FCVT.S.L"; @@ -370,7 +370,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x69: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.D.W"; case 1: return "FCVT.D.WU"; case 2: return "FCVT.D.L"; @@ -395,7 +395,7 @@ static const char* op_string(const Instr &instr) { switch (func3) { case 0: return "TMC"; case 1: return "WSPAWN"; - case 2: return rs2 ? "SPLIT.N" : "SPLIT"; + case 2: return rs1 ? "SPLIT.N" : "SPLIT"; case 3: return "JOIN"; case 4: return "BAR"; case 5: return rd ? "PRED.N" : "PRED"; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 14e213ba68..88a0ecff3b 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -162,10 +162,8 @@ instr_trace_t* Emulator::step() { uint64_t uuid = 0; #endif - DPH(1, "Fetch: cid=" << core_->id() << ", wid=" << scheduled_warp << ", tmask="); - for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) - DPN(1, warp.tmask.test(i)); - DPN(1, ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << uuid << ")" << std::endl); + DP(1, "Fetch: cid=" << core_->id() << ", wid=" << scheduled_warp << ", tmask=" << ThreadMaskOS(warp.tmask, arch_.num_threads()) + << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << uuid << ")"); // Fetch uint32_t instr_code = 0; diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 9f7e6a74a0..ca4dac8d47 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1328,7 +1328,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { auto stack_size = warp.ipdom_stack.size(); ThreadMask then_tmask, else_tmask; - auto not_pred = rsrc2 & 0x1; + auto not_pred = (rsrc1 != 0); for (uint32_t t = 0; t < num_threads; ++t) { auto cond = (warp.ireg_file.at(t).at(rsrc0) & 0x1) ^ not_pred; then_tmask[t] = warp.tmask.test(t) && cond; @@ -1472,10 +1472,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } if (warp.tmask != next_tmask) { - DPH(3, "*** New Tmask="); - for (uint32_t i = 0; i < num_threads; ++i) - DPN(3, next_tmask.test(i)); - DPN(3, std::endl); + DP(3, "*** New Tmask=" << ThreadMaskOS(next_tmask, num_threads)); warp.tmask = next_tmask; if (!next_tmask.any()) { active_warps_.reset(wid); diff --git a/sim/simx/types.h b/sim/simx/types.h index b452dd379a..17cf1685ff 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -58,6 +58,27 @@ typedef std::bitset WarpMask; /////////////////////////////////////////////////////////////////////////////// +class ThreadMaskOS { +public: + ThreadMaskOS(const ThreadMask& mask, int size) + : mask_(mask) + , size_(size) + {} + + friend std::ostream& operator<<(std::ostream& os, const ThreadMaskOS& wrapper) { + for (int i = 0; i < wrapper.size_; ++i) { + os << wrapper.mask_[i]; + } + return os; + } + +private: + const ThreadMask& mask_; + int size_; +}; + +/////////////////////////////////////////////////////////////////////////////// + enum class RegType { None, Integer, From fa1fd396456aec14602437b1f394ed74440a8c20 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 21:31:09 -0700 Subject: [PATCH 115/407] minor updates --- hw/rtl/cache/VX_cache.sv | 2 +- hw/rtl/cache/VX_cache_cluster.sv | 2 +- hw/rtl/cache/VX_cache_tags.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_commit.sv | 2 +- hw/rtl/core/VX_dispatch.sv | 2 +- hw/rtl/core/VX_fetch.sv | 2 +- hw/rtl/core/VX_fpu_unit.sv | 2 +- hw/rtl/core/VX_gather_unit.sv | 2 +- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 8 ++++---- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 2 +- hw/rtl/core/VX_split_join.sv | 2 +- hw/rtl/fpu/VX_fpu_cvt.sv | 2 +- hw/rtl/fpu/VX_fpu_div.sv | 6 +++--- hw/rtl/fpu/VX_fpu_fma.sv | 6 +++--- hw/rtl/fpu/VX_fpu_fpnew.sv | 2 +- hw/rtl/fpu/VX_fpu_ncp.sv | 2 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 6 +++--- hw/rtl/libs/VX_avs_adapter.sv | 14 ++++++-------- 23 files changed, 37 insertions(+), 39 deletions(-) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 90b34a1e41..6d3e1351ea 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -136,7 +136,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : core_rsp_bufs VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index a56c9a8173..dbf4ffec7a 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -102,7 +102,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : core_arbs VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), .TAG_WIDTH (TAG_WIDTH) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 6c6ac92f2c..4d5b0bcd37 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -100,7 +100,7 @@ module VX_cache_tags #( wire fill_s = fill && (!WRITEBACK || ~stall); wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : ways + for (genvar i = 0; i < NUM_WAYS; ++i) begin : tag_stores wire do_fill = fill_s && evict_way[i]; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 7ab808c701..8b2bf7363c 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -55,7 +55,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alu_blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index f945c79039..160bcf4d4a 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -41,7 +41,7 @@ module VX_commit import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask; wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : commit_arbs wire [`NUM_EX_UNITS-1:0] valid_in; wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in; diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 3fe98ba938..4326298a11 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -53,7 +53,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( wire [`NUM_EX_UNITS-1:0] operands_ready_in; assign operands_if.ready = operands_ready_in[operands_if.data.ex_type]; - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : buffers VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index de622bd1d8..44f3e51da1 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -71,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #( // This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests. // This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus. wire [`NUM_WARPS-1:0] pending_ibuf_full; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_reads VX_pending_size #( .SIZE (`IBUF_SIZE) ) pending_reads ( diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index ae36e4b22b..0d7f02311e 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -53,7 +53,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpu_blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpus `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 402824dacb..69295321b6 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -74,7 +74,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]]; end - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: out_bufs VX_commit_if #( .NUM_LANES (NUM_LANES) ) commit_tmp_if(); diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index 6f068d45ff..f5d879f334 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : ibuf_slices + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : instr_bufs VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 425f1aeeeb..b155ed0d73 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsus `RESET_RELAY_EN (slice_reset, reset, (BLOCK_SIZE > 1)); diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 9f1695a288..bb00df0b5a 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -45,7 +45,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_switches VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), @@ -65,7 +65,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapter_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapters VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) @@ -131,7 +131,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescers `RESET_RELAY (mem_coalescer_reset, reset); @@ -195,7 +195,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapter_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapters VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index f47b4964f3..d84c1a0727 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -246,7 +246,7 @@ module VX_operands import VX_gpu_pkg::*; #( assign gpr_wr_bank_idx = '0; end - for (genvar b = 0; b < NUM_BANKS; ++b) begin + for (genvar b = 0; b < NUM_BANKS; ++b) begin : gpr_rams wire gpr_wr_enabled; if (BANK_SEL_BITS != 0) begin assign gpr_wr_enabled = writeback_if.valid diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index fbe0bd9599..b1b855aaf3 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -379,7 +379,7 @@ module VX_schedule import VX_gpu_pkg::*; #( `RESET_RELAY (pending_instr_reset, reset); - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_sizes VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 503cc22c8f..b2d9ff2bec 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -101,7 +101,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `endif - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : stanging_bufs VX_pipe_buffer #( .DATAW (DATAW) ) stanging_buf ( diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 4b58ebc265..8689d216d3 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -45,7 +45,7 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_push = valid && split.valid && split.is_dvg; wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_slices + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), .DEPTH (`DV_STACK_SIZE) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index a2bf939882..94dee73160 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -86,7 +86,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fcvt_units VX_fcvt_unit #( .LATENCY (`LATENCY_FCVT), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 9cdea7867d..ea63387d7d 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -94,7 +94,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs acl_fdiv fdiv ( .clk (clk), .areset (1'b0), @@ -112,7 +112,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs wire [3:0] tuser; xil_fdiv fdiv ( .aclk (clk), @@ -134,7 +134,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin fdivs reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index c42de701c1..331074cf07 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -125,7 +125,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fmadds acl_fmadd fmadd ( .clk (clk), .areset (1'b0), @@ -143,7 +143,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fmas wire [2:0] tuser; xil_fma fma ( @@ -168,7 +168,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fmas reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index ad95f0347e..85e790996e 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -162,7 +162,7 @@ module VX_fpu_fpnew end `UNUSED_VAR (mask_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : fpnew_cores wire [(TAG_WIDTH+1)-1:0] fpu_tag; wire fpu_valid_out_uq; wire fpu_ready_in_uq; diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 225033e1e4..52b2979b6d 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -91,7 +91,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fncp_units VX_fncp_unit #( .LATENCY (`LATENCY_FNCP), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index c6961e1dbb..f6c542fc38 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -88,7 +88,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts acl_fsqrt fsqrt ( .clk (clk), .areset (1'b0), @@ -105,7 +105,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts wire tuser; xil_fsqrt fsqrt ( @@ -126,7 +126,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index f0941b0285..046c32bfc7 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -64,7 +64,6 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; - wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size; wire [BANK_ADDRW-1:0] req_bank_sel; wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; @@ -81,8 +80,7 @@ module VX_avs_adapter #( assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin - + for (genvar i = 0; i < NUM_BANKS; ++i) begin : pending_sizes VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( @@ -94,10 +92,11 @@ module VX_avs_adapter #( `UNUSED_PIN (alm_empty), .full (req_queue_going_full[i]), `UNUSED_PIN (alm_full), - .size (req_queue_size[i]) + `UNUSED_PIN (size) ); - `UNUSED_VAR (req_queue_size) + end + for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_req_queues VX_fifo_queue #( .DATAW (TAG_WIDTH), .DEPTH (RD_QUEUE_SIZE) @@ -116,7 +115,7 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : req_out_bufs wire valid_out; wire rw_out; wire [DATA_SIZE-1:0] byteen_out; @@ -168,8 +167,7 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out; wire [NUM_BANKS-1:0] rsp_queue_empty; - for (genvar i = 0; i < NUM_BANKS; ++i) begin - + for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_rsp_queues VX_fifo_queue #( .DATAW (DATA_WIDTH), .DEPTH (RD_QUEUE_SIZE) From 105f8841291aab0235410b13e8dcc3752f9403d8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 00:48:51 -0700 Subject: [PATCH 116/407] migration from fpnew to latest cvfpu core to resolve fpnew bugs and feature limitations --- .gitmodules | 6 +++--- hw/rtl/fpu/VX_fpu_fpnew.sv | 2 +- hw/rtl/libs/VX_avs_adapter.sv | 1 - hw/syn/altera/dut/core/Makefile | 3 ++- hw/syn/altera/dut/fpu/Makefile | 3 ++- hw/syn/altera/dut/issue/Makefile | 3 ++- hw/syn/altera/dut/top/Makefile | 3 ++- hw/syn/altera/dut/unittest/Makefile | 3 ++- hw/syn/altera/dut/vortex/Makefile | 3 ++- hw/syn/altera/opae/Makefile | 3 ++- hw/syn/xilinx/dut/core/Makefile | 3 ++- hw/syn/xilinx/dut/fpu/Makefile | 3 ++- hw/syn/xilinx/dut/issue/Makefile | 3 ++- hw/syn/xilinx/dut/top/Makefile | 3 ++- hw/syn/xilinx/dut/unittest/Makefile | 3 ++- hw/syn/xilinx/dut/vortex/Makefile | 3 ++- hw/syn/xilinx/sandbox/Makefile | 3 ++- hw/syn/xilinx/xrt/Makefile | 3 ++- hw/syn/yosys/Makefile | 3 ++- sim/opaesim/Makefile | 7 ++++--- sim/opaesim/verilator.vlt | 8 -------- sim/opaesim/verilator.vlt.in | 8 ++++++++ sim/rtlsim/Makefile | 7 ++++--- sim/rtlsim/verilator.vlt | 5 ----- sim/rtlsim/verilator.vlt.in | 5 +++++ sim/xrtsim/Makefile | 5 +++-- sim/xrtsim/verilator.vlt | 5 ----- sim/xrtsim/verilator.vlt.in | 5 +++++ third_party/cvfpu | 1 + third_party/fpnew | 1 - 30 files changed, 66 insertions(+), 48 deletions(-) delete mode 100644 sim/opaesim/verilator.vlt create mode 100644 sim/opaesim/verilator.vlt.in delete mode 100644 sim/rtlsim/verilator.vlt create mode 100644 sim/rtlsim/verilator.vlt.in delete mode 100644 sim/xrtsim/verilator.vlt create mode 100644 sim/xrtsim/verilator.vlt.in create mode 160000 third_party/cvfpu delete mode 160000 third_party/fpnew diff --git a/.gitmodules b/.gitmodules index df3ca47e28..32abfe9cb0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ -[submodule "third_party/fpnew"] - path = third_party/fpnew - url = https://github.com/pulp-platform/fpnew.git [submodule "third_party/softfloat"] path = third_party/softfloat url = https://github.com/ucb-bar/berkeley-softfloat-3.git [submodule "third_party/ramulator"] path = third_party/ramulator url = https://github.com/CMU-SAFARI/ramulator2.git +[submodule "third_party/cvfpu"] + path = third_party/cvfpu + url = https://github.com/openhwgroup/cvfpu.git diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 85e790996e..a2b0e170a9 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -193,7 +193,7 @@ module VX_fpu_fpnew .tag_i ({fpu_tag_in, fpu_has_fflags}), .in_valid_i (fpu_valid_in), .in_ready_o (fpu_ready_in_uq), - .flush_i (reset), + .flush_i (1'b0), .result_o (fpu_result[i]), .status_o (fpu_status_uq), .tag_o (fpu_tag), diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 046c32bfc7..61322f673a 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -54,7 +54,6 @@ module VX_avs_adapter #( input wire avs_readdatavalid [NUM_BANKS] ); localparam DATA_SIZE = DATA_WIDTH/8; - localparam RD_QUEUE_ADDR_WIDTH = `CLOG2(RD_QUEUE_SIZE+1); localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; diff --git a/hw/syn/altera/dut/core/Makefile b/hw/syn/altera/dut/core/Makefile index eeeaa52338..0a3b19285f 100644 --- a/hw/syn/altera/dut/core/Makefile +++ b/hw/syn/altera/dut/core/Makefile @@ -9,6 +9,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/fpu/Makefile b/hw/syn/altera/dut/fpu/Makefile index b7826dc689..e3cb9445b0 100644 --- a/hw/syn/altera/dut/fpu/Makefile +++ b/hw/syn/altera/dut/fpu/Makefile @@ -6,6 +6,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) diff --git a/hw/syn/altera/dut/issue/Makefile b/hw/syn/altera/dut/issue/Makefile index c1804a3989..8e3bead119 100644 --- a/hw/syn/altera/dut/issue/Makefile +++ b/hw/syn/altera/dut/issue/Makefile @@ -9,6 +9,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index 341690206d..2249392360 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -27,6 +27,7 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/dut/unittest/Makefile b/hw/syn/altera/dut/unittest/Makefile index 2bfb18e4e4..d26eabe3c7 100644 --- a/hw/syn/altera/dut/unittest/Makefile +++ b/hw/syn/altera/dut/unittest/Makefile @@ -6,6 +6,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/vortex/Makefile b/hw/syn/altera/dut/vortex/Makefile index 7429df414e..64fb051e0e 100644 --- a/hw/syn/altera/dut/vortex/Makefile +++ b/hw/syn/altera/dut/vortex/Makefile @@ -11,6 +11,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 62a9bb72c1..5ec7a7ff8a 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -58,7 +58,8 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index deda5cce9e..c94fd8637f 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -10,6 +10,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index bb66103752..ba76e8eb81 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -7,6 +7,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile index bb93f44d27..b5690ca01e 100644 --- a/hw/syn/xilinx/dut/issue/Makefile +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -9,6 +9,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index 0480b08e52..ab7a18162f 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -28,6 +28,7 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 061e754419..7f4dfd3a3e 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -6,6 +6,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index e2525fae23..45423f7ae7 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -12,6 +12,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index bcfd91f9c4..c4e4db43cc 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -20,7 +20,8 @@ ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 9e86bd1a5d..1a7589f56c 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -75,7 +75,8 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 80bfdae02b..911361df84 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -44,7 +44,8 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 32182d5a8c..6402fb4756 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -58,8 +58,9 @@ RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip @@ -72,7 +73,7 @@ VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) diff --git a/sim/opaesim/verilator.vlt b/sim/opaesim/verilator.vlt deleted file mode 100644 index 66a59bd12f..0000000000 --- a/sim/opaesim/verilator.vlt +++ /dev/null @@ -1,8 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" - -lint_off -file "*/afu/opae/ccip/ccip_if_pkg.sv" -lint_off -file "*/afu/opae/local_mem_cfg_pkg.sv" diff --git a/sim/opaesim/verilator.vlt.in b/sim/opaesim/verilator.vlt.in new file mode 100644 index 0000000000..0b118e05e5 --- /dev/null +++ b/sim/opaesim/verilator.vlt.in @@ -0,0 +1,8 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" + +lint_off -file "@VORTEX_HOME@/hw/rtl/afu/opae/ccip/ccip_if_pkg.sv" +lint_off -file "@VORTEX_HOME@/hw/rtl/afu/opae/local_mem_cfg_pkg.sv" diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 2f38ae1f25..89ba412f57 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -30,8 +30,9 @@ RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) @@ -50,7 +51,7 @@ VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED VL_FLAGS += --x-initial unique --x-assign unique -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) diff --git a/sim/rtlsim/verilator.vlt b/sim/rtlsim/verilator.vlt deleted file mode 100644 index 9cfccbeb4b..0000000000 --- a/sim/rtlsim/verilator.vlt +++ /dev/null @@ -1,5 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" diff --git a/sim/rtlsim/verilator.vlt.in b/sim/rtlsim/verilator.vlt.in new file mode 100644 index 0000000000..56de6b2cf8 --- /dev/null +++ b/sim/rtlsim/verilator.vlt.in @@ -0,0 +1,5 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index c63fe3d569..88dc930b78 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -57,8 +57,9 @@ RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) diff --git a/sim/xrtsim/verilator.vlt b/sim/xrtsim/verilator.vlt deleted file mode 100644 index 9cfccbeb4b..0000000000 --- a/sim/xrtsim/verilator.vlt +++ /dev/null @@ -1,5 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" diff --git a/sim/xrtsim/verilator.vlt.in b/sim/xrtsim/verilator.vlt.in new file mode 100644 index 0000000000..893ecbbd33 --- /dev/null +++ b/sim/xrtsim/verilator.vlt.in @@ -0,0 +1,5 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" \ No newline at end of file diff --git a/third_party/cvfpu b/third_party/cvfpu new file mode 160000 index 0000000000..a6af691551 --- /dev/null +++ b/third_party/cvfpu @@ -0,0 +1 @@ +Subproject commit a6af691551ffbd76d5d9cf30774d3295a41615e4 diff --git a/third_party/fpnew b/third_party/fpnew deleted file mode 160000 index 79e4531390..0000000000 --- a/third_party/fpnew +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 79e453139072df42c9ec8f697132ba485d74e23d From 847dee347389193e4f6f9e30257c75aabfdc633c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 01:30:54 -0700 Subject: [PATCH 117/407] minor update --- hw/rtl/fpu/VX_fpu_div.sv | 2 +- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index ea63387d7d..79b91a1f5a 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -134,7 +134,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 331074cf07..3095846c1c 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -125,7 +125,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fmadds + for (genvar i = 0; i < NUM_PES; ++i) begin : fmas acl_fmadd fmadd ( .clk (clk), .areset (1'b0), From 5f2bf2418b42883e045b5d3b8f8b342f423367a9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 02:40:54 -0700 Subject: [PATCH 118/407] minor update --- hw/rtl/fpu/VX_fpu_fpnew.sv | 5 ++--- hw/syn/altera/dut/core/Makefile | 1 - hw/syn/altera/dut/fpu/Makefile | 1 - hw/syn/altera/dut/issue/Makefile | 1 - hw/syn/altera/dut/top/Makefile | 1 - hw/syn/altera/dut/unittest/Makefile | 1 - hw/syn/altera/dut/vortex/Makefile | 1 - hw/syn/altera/opae/Makefile | 1 - hw/syn/xilinx/dut/core/Makefile | 1 - hw/syn/xilinx/dut/fpu/Makefile | 1 - hw/syn/xilinx/dut/issue/Makefile | 1 - hw/syn/xilinx/dut/top/Makefile | 1 - hw/syn/xilinx/dut/unittest/Makefile | 1 - hw/syn/xilinx/dut/vortex/Makefile | 1 - hw/syn/xilinx/sandbox/Makefile | 1 - hw/syn/xilinx/xrt/Makefile | 1 - hw/syn/yosys/Makefile | 1 - 17 files changed, 2 insertions(+), 19 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index a2b0e170a9..030ae35573 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -176,8 +176,7 @@ module VX_fpu_fpnew .Features (FPU_FEATURES), .Implementation (FPU_IMPLEMENTATION), .TagType (logic[(TAG_WIDTH+1)-1:0]), - .TrueSIMDClass (1), - .EnableSIMDMask (1) + .DivSqrtSel (fpnew_pkg::PULP) ) fpnew_core ( .clk_i (clk), .rst_ni (~reset), @@ -189,7 +188,7 @@ module VX_fpu_fpnew .dst_fmt_i (fpu_dst_fmt), .int_fmt_i (fpu_int_fmt), .vectorial_op_i (1'b0), - .simd_mask_i (mask_in[i]), + .simd_mask_i (1'b1), .tag_i ({fpu_tag_in, fpu_has_fflags}), .in_valid_i (fpu_valid_in), .in_ready_o (fpu_ready_in_uq), diff --git a/hw/syn/altera/dut/core/Makefile b/hw/syn/altera/dut/core/Makefile index 0a3b19285f..c78c4a6519 100644 --- a/hw/syn/altera/dut/core/Makefile +++ b/hw/syn/altera/dut/core/Makefile @@ -10,6 +10,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/fpu/Makefile b/hw/syn/altera/dut/fpu/Makefile index e3cb9445b0..38d5c718ca 100644 --- a/hw/syn/altera/dut/fpu/Makefile +++ b/hw/syn/altera/dut/fpu/Makefile @@ -7,6 +7,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) diff --git a/hw/syn/altera/dut/issue/Makefile b/hw/syn/altera/dut/issue/Makefile index 8e3bead119..45f6981d67 100644 --- a/hw/syn/altera/dut/issue/Makefile +++ b/hw/syn/altera/dut/issue/Makefile @@ -10,6 +10,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index 2249392360..99889f4ae1 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -28,6 +28,5 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/dut/unittest/Makefile b/hw/syn/altera/dut/unittest/Makefile index d26eabe3c7..c4479f1542 100644 --- a/hw/syn/altera/dut/unittest/Makefile +++ b/hw/syn/altera/dut/unittest/Makefile @@ -7,6 +7,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/vortex/Makefile b/hw/syn/altera/dut/vortex/Makefile index 64fb051e0e..80c2560219 100644 --- a/hw/syn/altera/dut/vortex/Makefile +++ b/hw/syn/altera/dut/vortex/Makefile @@ -12,6 +12,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 5ec7a7ff8a..53b1210d83 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -59,7 +59,6 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index c94fd8637f..2ce824a3f1 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -11,6 +11,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index ba76e8eb81..c3d3fd99f1 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -8,6 +8,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile index b5690ca01e..07e8f343d0 100644 --- a/hw/syn/xilinx/dut/issue/Makefile +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -10,6 +10,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index ab7a18162f..3a06715b53 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -29,6 +29,5 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 7f4dfd3a3e..1bc66aa388 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -7,6 +7,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index 45423f7ae7..eb6d45a88d 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -13,6 +13,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index c4e4db43cc..94c054b57c 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -21,7 +21,6 @@ ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 1a7589f56c..6368441e06 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -76,7 +76,6 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 911361df84..cba0137a3e 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -45,7 +45,6 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) From 961b9c3d635bfc92bf866ea80d4c4ddfecfee96d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 02:41:36 -0700 Subject: [PATCH 119/407] minor update --- sim/opaesim/Makefile | 1 - sim/rtlsim/Makefile | 1 - sim/xrtsim/Makefile | 1 - 3 files changed, 3 deletions(-) diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 6402fb4756..984686d3b8 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -60,7 +60,6 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 89ba412f57..591a2c2260 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -32,7 +32,6 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 88dc930b78..81f8f28b5d 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -59,7 +59,6 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) From fc5bb387a25d0e98530a3b0ace6de9cf7a2d5d14 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 03:02:50 -0700 Subject: [PATCH 120/407] minor update --- ci/regression.sh.in | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index e0da29e20e..aee991cd4a 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -240,8 +240,14 @@ config2() ./ci/blackbox.sh --driver=opae --app=diverge # disable DPI - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + if [ "$XLEN" == "64" ]; then + # need to disable trig on 64-bit due to a bug inside fpnew's sqrt core. + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar" + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar" + else + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + fi # custom program startup address make -C tests/regression/dogfood clean-kernel From 6eee0728fbe5eb67af87f7cacccb0a02dbb87c72 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 03:22:09 -0700 Subject: [PATCH 121/407] minor update --- hw/syn/xilinx/dut/project.tcl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index bd9cb02e70..c89645c5f7 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -65,10 +65,7 @@ read_xdc $xdc_file add_files -norecurse -verbose $vsources_list # process defines -set obj [current_fileset] -foreach def $vdefines_list { - set_property verilog_define $def $obj -} +set_property verilog_define ${vdefines_list} [current_fileset] # add fpu ip if {[info exists ::env(FPU_IP)]} { From 7d0c1411297d99a34c49ed58a60fde63077d8b83 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 01:44:41 -0700 Subject: [PATCH 122/407] minor updates --- hw/rtl/VX_platform.vh | 3 - hw/rtl/core/VX_alu_muldiv.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 1 + hw/rtl/core/VX_core.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 4 +- hw/rtl/fpu/VX_fpu_dsp.sv | 141 +++++++++++++++++++++----------- hw/rtl/libs/VX_mem_coalescer.sv | 25 ++---- hw/rtl/libs/VX_pe_serializer.sv | 2 +- hw/rtl/libs/VX_stream_arb.sv | 4 +- hw/rtl/libs/VX_stream_switch.sv | 47 ++++++----- hw/rtl/libs/VX_stream_unpack.sv | 6 +- hw/syn/xilinx/xrt/Makefile | 6 +- sim/xrtsim/Makefile | 2 +- sim/xrtsim/verilator.vlt.in | 2 +- 14 files changed, 144 insertions(+), 103 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 730b3cd7d8..74907ad4c3 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -254,9 +254,6 @@ // lut(x): (x & 8) != 0 `define TO_OUT_BUF_LUTRAM(s) ((s & 8) != 0) -// rbuf(x): (x <= 2) ? 3 : x -`define TO_OUT_RBUF(s) ((s & 8) | `MAX(s & 7, 3)) - `define REPEAT(n,f,s) `_REPEAT_``n(f,s) `define _REPEAT_0(f,s) `define _REPEAT_1(f,s) `f(0) diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 8e3a1ba4fb..bd498a0bba 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -325,7 +325,7 @@ module VX_alu_muldiv #( .NUM_INPUTS (2), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), .ARBITER ("P"), - .OUT_BUF (1) + .OUT_BUF (2) ) rsp_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 8b2bf7363c..9b3d6deea7 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -109,6 +109,7 @@ module VX_alu_unit #( `endif + // can accept new request? assign per_block_execute_if[block_idx].ready = `ifdef EXT_M_ENABLE is_muldiv_op ? muldiv_execute_if.ready : diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 30a774ee5f..f306c5d232 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -212,7 +212,7 @@ module VX_core import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE .lmem_perf (mem_perf_tmp_if.lmem), `endif - .lsu_mem_in_if (lsu_mem_if), + .lsu_mem_if (lsu_mem_if), .dcache_bus_if (dcache_bus_if) ); diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index bb00df0b5a..cd901f8ace 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -23,7 +23,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( output cache_perf_t lmem_perf, `endif - VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], + VX_lsu_mem_if.slave lsu_mem_if [`NUM_LSU_BLOCKS], VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS] ); VX_lsu_mem_if #( @@ -54,7 +54,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ) lmem_switch ( .clk (clk), .reset (reset), - .lsu_in_if (lsu_mem_in_if[i]), + .lsu_in_if (lsu_mem_if[i]), .global_out_if(lsu_dcache_if[i]), .local_out_if (lsu_lmem_if[i]) ); diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 0f0e551b78..b1f115155a 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,11 +54,23 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); + localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * `XLEN); localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) + wire [NUM_FPCORES-1:0] per_core_valid_in; + wire [NUM_FPCORES-1:0][REQ_DATAW-1:0] per_core_data_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0] per_core_mask_in; + wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_in; + wire [NUM_FPCORES-1:0][`INST_FPU_BITS-1:0] per_core_op_type; + wire [NUM_FPCORES-1:0][`INST_FMT_BITS-1:0] per_core_fmt; + wire [NUM_FPCORES-1:0][`INST_FRM_BITS-1:0] per_core_frm; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_dataa; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datab; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datac; wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; wire [NUM_FPCORES-1:0] per_core_ready_out; @@ -94,18 +106,44 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (datab) `UNUSED_VAR (datac) - // Decode instruction type + // Decode fpu core type wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; - wire is_sqrt = op_type[0]; - wire is_itof = op_type[1]; - wire is_signed = ~op_type[0]; - wire is_madd = op_type[1]; - wire is_neg = op_type[0]; - wire is_sub = fmt[1]; - - // can accept new request? - assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; - assign ready_in = per_core_ready_in[core_select]; + + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_INPUTS (1), + .NUM_OUTPUTS (NUM_FPCORES), + .OUT_BUF (0) + ) req_switch ( + .clk (clk), + .reset (reset), + .sel_in (core_select), + .valid_in (valid_in), + .ready_in (ready_in), + .data_in ({mask_in, tag_in, op_type, fmt, frm, dataa_s, datab_s, datac_s}), + .data_out (per_core_data_in), + .valid_out (per_core_valid_in), + .ready_out (per_core_ready_in) + ); + + for (genvar i = 0; i < NUM_FPCORES; ++i) begin + assign { + per_core_mask_in[i], + per_core_tag_in[i], + per_core_op_type[i], + per_core_fmt[i], + per_core_frm[i], + per_core_dataa[i], + per_core_datab[i], + per_core_datac[i] + } = per_core_data_in[i]; + end + + // FMA core + + wire is_madd = per_core_op_type[FPU_FMA][1]; + wire is_neg = per_core_op_type[FPU_FMA][0]; + wire is_sub = per_core_fmt[FPU_FMA][1]; VX_fpu_fma #( .NUM_LANES (NUM_LANES), @@ -113,17 +151,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_fma ( .clk (clk), .reset (fma_reset), - .valid_in (valid_in && (core_select == FPU_FMA)), + .valid_in (per_core_valid_in[FPU_FMA]), .ready_in (per_core_ready_in[FPU_FMA]), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), + .mask_in (per_core_mask_in[FPU_FMA]), + .tag_in (per_core_tag_in[FPU_FMA]), + .frm (per_core_frm[FPU_FMA]), .is_madd (is_madd), .is_sub (is_sub), .is_neg (is_neg), - .dataa (dataa_s), - .datab (datab_s), - .datac (datac_s), + .dataa (per_core_dataa[FPU_FMA]), + .datab (per_core_datab[FPU_FMA]), + .datac (per_core_datac[FPU_FMA]), .has_fflags (per_core_has_fflags[FPU_FMA]), .fflags (per_core_fflags[FPU_FMA]), .result (per_core_result[FPU_FMA]), @@ -132,19 +170,24 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .valid_out (per_core_valid_out[FPU_FMA]) ); + // Div/Sqrt cores + + wire is_sqrt = per_core_op_type[FPU_DIVSQRT][0]; + assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; + VX_fpu_div #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_div ( .clk (clk), .reset (div_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_sqrt), + .valid_in (per_core_valid_in[FPU_DIVSQRT] && ~is_sqrt), .ready_in (div_sqrt_ready_in[0]), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), - .dataa (dataa_s), - .datab (datab_s), + .mask_in (per_core_mask_in[FPU_DIVSQRT]), + .tag_in (per_core_tag_in[FPU_DIVSQRT]), + .frm (per_core_frm[FPU_DIVSQRT]), + .dataa (per_core_dataa[FPU_DIVSQRT]), + .datab (per_core_datab[FPU_DIVSQRT]), .has_fflags (div_sqrt_has_fflags[0]), .fflags (div_sqrt_fflags[0]), .result (div_sqrt_result[0]), @@ -159,12 +202,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_sqrt ( .clk (clk), .reset (sqrt_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_sqrt), + .valid_in (per_core_valid_in[FPU_DIVSQRT] && is_sqrt), .ready_in (div_sqrt_ready_in[1]), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), - .dataa (dataa_s), + .mask_in (per_core_mask_in[FPU_DIVSQRT]), + .tag_in (per_core_tag_in[FPU_DIVSQRT]), + .frm (per_core_frm[FPU_DIVSQRT]), + .dataa (per_core_dataa[FPU_DIVSQRT]), .has_fflags (div_sqrt_has_fflags[1]), .fflags (div_sqrt_fflags[1]), .result (div_sqrt_result[1]), @@ -173,23 +216,27 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (div_sqrt_ready_out[1]) ); + // CVT core + + wire is_itof = per_core_op_type[FPU_CVT][1]; + wire is_signed = ~per_core_op_type[FPU_CVT][0]; wire cvt_ret_int_in = ~is_itof; wire cvt_ret_int_out; VX_fpu_cvt #( .NUM_LANES (NUM_LANES), - .TAG_WIDTH (TAG_WIDTH+1) + .TAG_WIDTH (1+TAG_WIDTH) ) fpu_cvt ( .clk (clk), .reset (cvt_reset), - .valid_in (valid_in && (core_select == FPU_CVT)), + .valid_in (per_core_valid_in[FPU_CVT]), .ready_in (per_core_ready_in[FPU_CVT]), - .mask_in (mask_in), - .tag_in ({cvt_ret_int_in, tag_in}), - .frm (frm), + .mask_in (per_core_mask_in[FPU_CVT]), + .tag_in ({cvt_ret_int_in, per_core_tag_in[FPU_CVT]}), + .frm (per_core_frm[FPU_CVT]), .is_itof (is_itof), .is_signed (is_signed), - .dataa (dataa_s), + .dataa (per_core_dataa[FPU_CVT]), .has_fflags (per_core_has_fflags[FPU_CVT]), .fflags (per_core_fflags[FPU_CVT]), .result (per_core_result[FPU_CVT]), @@ -198,12 +245,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (per_core_ready_out[FPU_CVT]) ); - wire ncp_ret_int_in = (op_type == `INST_FPU_CMP) - || `INST_FPU_IS_CLASS(op_type, frm) - || `INST_FPU_IS_MVXW(op_type, frm); + // NCP core + + wire ncp_ret_int_in = (per_core_op_type[FPU_NCP] == `INST_FPU_CMP) + || `INST_FPU_IS_CLASS(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]) + || `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_int_out; - wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm); + wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_sext_out; VX_fpu_ncp #( @@ -212,14 +261,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_ncp ( .clk (clk), .reset (ncp_reset), - .valid_in (valid_in && (core_select == FPU_NCP)), + .valid_in (per_core_valid_in[FPU_NCP]), .ready_in (per_core_ready_in[FPU_NCP]), - .mask_in (mask_in), - .tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}), - .op_type (op_type), - .frm (frm), - .dataa (dataa_s), - .datab (datab_s), + .mask_in (per_core_mask_in[FPU_NCP]), + .tag_in ({ncp_ret_sext_in, ncp_ret_int_in, per_core_tag_in[FPU_NCP]}), + .op_type (per_core_op_type[FPU_NCP]), + .frm (per_core_frm[FPU_NCP]), + .dataa (per_core_dataa[FPU_NCP]), + .datab (per_core_datab[FPU_NCP]), .result (per_core_result[FPU_NCP]), .has_fflags (per_core_has_fflags[FPU_NCP]), .fflags (per_core_fflags[FPU_NCP]), diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index e15d065642..5c283e06c2 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -80,7 +80,6 @@ module VX_mem_coalescer #( `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask")); localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; - localparam NUM_REQS_W = `LOG2UP(NUM_REQS); // tag + mask + offest localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W); @@ -115,13 +114,8 @@ module VX_mem_coalescer #( logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; - wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx; - - wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; - for (genvar i = 0; i < NUM_REQS; i++) begin - assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W]; assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; end @@ -140,21 +134,18 @@ module VX_mem_coalescer #( .valid_out (batch_valid_n[i]) ); - if (OUT_REQS > 1) begin - assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx}; - end else begin - assign seed_idx[i] = batch_idx; + wire [DATA_RATIO-1:0][OUT_ADDR_WIDTH-1:0] addr_base; + wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; + for (genvar j = 0; j < DATA_RATIO; ++j) begin + assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W]; + assign req_flags[j] = in_req_flags[DATA_RATIO * i + j]; end - end - for (genvar i = 0; i < OUT_REQS; ++i) begin - assign seed_addr_n[i] = in_addr_base[seed_idx[i]]; - assign seed_flags_n[i] = in_req_flags[seed_idx[i]]; - end + assign seed_addr_n[i] = addr_base[batch_idx]; + assign seed_flags_n[i] = req_flags[batch_idx]; - for (genvar i = 0; i < OUT_REQS; ++i) begin for (genvar j = 0; j < DATA_RATIO; ++j) begin - assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]); + assign addr_matches_n[i * DATA_RATIO + j] = (addr_base[j] == seed_addr_n[i]); end end diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 7a891cfc77..2f9c83483b 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -128,7 +128,7 @@ module VX_pe_serializer #( data_out_r <= data_out_n; end - assign enable = ready_out_u || ~batch_out_done; + assign enable = ready_out_u || ~valid_out_u; assign ready_in = enable && batch_in_done; assign valid_out_u = batch_out_done; diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 13cde1cd91..3a457f8b89 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -97,7 +97,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output + .OUT_BUF (3) ) fanout_slice_arb ( .clk (clk), .reset (reset), @@ -242,7 +242,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output + .OUT_BUF (3) ) fanout_fork_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index c379dd7c0b..f3723ebb01 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -38,36 +38,36 @@ module VX_stream_switch #( ); if (NUM_INPUTS > NUM_OUTPUTS) begin - wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_r; - wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_r; + wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_w; + wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_w; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin localparam ii = i * NUM_REQS + j; if (ii < NUM_INPUTS) begin - assign valid_in_r[i][j] = valid_in[ii]; - assign data_in_r[i][j] = data_in[ii]; + assign valid_in_w[i][j] = valid_in[ii]; + assign data_in_w[i][j] = data_in[ii]; end else begin - assign valid_in_r[i][j] = 0; - assign data_in_r[i][j] = '0; + assign valid_in_w[i][j] = 0; + assign data_in_w[i][j] = '0; end end end - wire [NUM_OUTPUTS-1:0] valid_out_r; - wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r; - wire [NUM_OUTPUTS-1:0] ready_out_r; + wire [NUM_OUTPUTS-1:0] valid_out_w; + wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + wire [NUM_OUTPUTS-1:0] ready_out_w; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - assign valid_out_r[i] = valid_in_r[i][sel_in[i]]; - assign data_out_r[i] = data_in_r[i][sel_in[i]]; + assign valid_out_w[i] = valid_in_w[i][sel_in[i]]; + assign data_out_w[i] = data_in_w[i][sel_in[i]]; end for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin localparam ii = i * NUM_REQS + j; if (ii < NUM_INPUTS) begin - assign ready_in[ii] = ready_out_r[i] & (sel_in[i] == LOG_NUM_REQS'(j)); + assign ready_in[ii] = ready_out_w[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end end end @@ -80,9 +80,9 @@ module VX_stream_switch #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_out_r[i]), - .ready_in (ready_out_r[i]), - .data_in (data_out_r[i]), + .valid_in (valid_out_w[i]), + .ready_in (ready_out_w[i]), + .data_in (data_out_w[i]), .data_out (data_out[i]), .valid_out (valid_out[i]), .ready_out (ready_out[i]) @@ -91,14 +91,14 @@ module VX_stream_switch #( end else if (NUM_OUTPUTS > NUM_INPUTS) begin - wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_r; - wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_r; + wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_w; + wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_w; for (genvar i = 0; i < NUM_INPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin - assign valid_out_r[i][j] = valid_in[i] & (sel_in[i] == LOG_NUM_REQS'(j)); + assign valid_out_w[i][j] = valid_in[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end - assign ready_in[i] = ready_out_r[i][sel_in[i]]; + assign ready_in[i] = ready_out_w[i][sel_in[i]]; end for (genvar i = 0; i < NUM_INPUTS; ++i) begin @@ -112,17 +112,16 @@ module VX_stream_switch #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_out_r[i][j]), - .ready_in (ready_out_r[i][j]), + .valid_in (valid_out_w[i][j]), + .ready_in (ready_out_w[i][j]), .data_in (data_in[i]), .data_out (data_out[ii]), .valid_out (valid_out[ii]), .ready_out (ready_out[ii]) ); end else begin - `UNUSED_VAR (reset) - `UNUSED_VAR (valid_out_r[i][j]) - assign ready_out_r[i][j] = '0; + `UNUSED_VAR (valid_out_w[i][j]) + assign ready_out_w[i][j] = '0; end end end diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index c81b300998..cb85d4804f 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -39,9 +39,9 @@ module VX_stream_unpack #( if (NUM_REQS > 1) begin reg [NUM_REQS-1:0] rem_mask; - wire [NUM_REQS-1:0] ready_out_r; + wire [NUM_REQS-1:0] ready_out_w; - wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_r; + wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_w; wire sent_all = ~(| (mask_in & rem_mask_n)); always @(posedge clk) begin @@ -65,7 +65,7 @@ module VX_stream_unpack #( .clk (clk), .reset (reset), .valid_in (valid_in && mask_in[i] && rem_mask[i]), - .ready_in (ready_out_r[i]), + .ready_in (ready_out_w[i]), .data_in ({data_in[i], tag_in}), .data_out ({data_out[i], tag_out[i]}), .valid_out (valid_out[i]), diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 6368441e06..4e3259f340 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -90,7 +90,11 @@ else ifeq ($(DEV_ARCH), versal) # versal else # alveo -VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] +ifneq ($(findstring xilinx_u55c,$(XSA)),) + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] +else + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] +endif endif VPP_FLAGS += --report_level 2 diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 81f8f28b5d..e45b0bfa24 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -71,7 +71,7 @@ VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) diff --git a/sim/xrtsim/verilator.vlt.in b/sim/xrtsim/verilator.vlt.in index 893ecbbd33..56de6b2cf8 100644 --- a/sim/xrtsim/verilator.vlt.in +++ b/sim/xrtsim/verilator.vlt.in @@ -2,4 +2,4 @@ lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" -lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" \ No newline at end of file +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" From 01fedb066c4f602a7f8433cd4e4a1ce0b98cf332 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 01:57:08 -0700 Subject: [PATCH 123/407] minor updates --- hw/syn/xilinx/dut/common.mk | 9 ++++-- hw/syn/xilinx/dut/project.tcl | 37 +++++++++++++++++------- hw/syn/xilinx/sandbox/Makefile | 7 ++++- hw/syn/xilinx/sandbox/project.tcl.in | 27 +++++++++++++---- hw/syn/xilinx/scripts/package_kernel.tcl | 5 ++++ 5 files changed, 67 insertions(+), 18 deletions(-) diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index f0588ede80..b2a8e71c75 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk DEVICE ?= xcu55c-fsvh2892-2L-e +MAX_JOBS ?= 8 + VIVADO := $(XILINX_VIVADO)/bin/vivado SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/dut @@ -11,6 +13,9 @@ RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/xrt SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts +NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') +JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) + CONFIGS += -DNDEBUG CONFIGS += -DVIVADO CONFIGS += -DSYNTHESIS @@ -26,9 +31,9 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt ifdef FPU_IP - FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts else - $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts endif clean: diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index c89645c5f7..e23ce2997a 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -15,9 +15,9 @@ set start_time [clock seconds] if { $::argc != 6 } { - puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" - exit + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" + puts "Usage: $::argv0 \n" + exit } # Set the project name @@ -30,11 +30,20 @@ set xdc_file [lindex $::argv 3] set tool_dir [lindex $::argv 4] set script_dir [lindex $::argv 5] -#puts top_module -#puts $device_part -#puts $vcs_file -#puts xdc_file -#puts $tool_dir +puts "Using top_module=$top_module" +puts "Using device_part=$device_part" +puts "Using vcs_file=$vcs_file" +puts "Using xdc_file=$xdc_file" +puts "Using tool_dir=$tool_dir" +puts "Using script_dir=$script_dir" + +# Set the number of jobs based on MAX_JOBS environment variable +if {[info exists ::env(MAX_JOBS)]} { + set num_jobs $::env(MAX_JOBS) + puts "using num_jobs=$num_jobs" +} else { + set num_jobs 0 +} # create fpu ip if {[info exists ::env(FPU_IP)]} { @@ -84,14 +93,22 @@ set_property \ -objects [get_runs synth_1] # Synthesis -launch_runs synth_1 +if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs +} else { + launch_runs synth_1 +} wait_on_run synth_1 open_run synth_1 write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages # Implementation -launch_runs impl_1 +if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs +} else { + launch_runs impl_1 +} wait_on_run impl_1 open_run impl_1 write_checkpoint -force post_impl.dcp diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index 94c054b57c..d1ebf9afaa 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk DEVICE ?= xcu55c-fsvh2892-2L-e +MAX_JOBS ?= 8 + VIVADO := $(XILINX_VIVADO)/bin/vivado SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/sandbox @@ -14,6 +16,9 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts KERNEL ?= fibonacci +NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') +JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) + COE_FILE := $(shell realpath kernel.bin.coe) ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') @@ -58,7 +63,7 @@ project_1/sources.txt: build: project_1/project_1.xpr project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project2.tcl - $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) run: project_1/project_1.xpr $(VIVADO) project_1/project_1.xpr & diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 0e9a23f0a9..d4fa45581f 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -24,9 +24,18 @@ set device_part [lindex $::argv 0] set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] -#puts $device_part -#puts $vcs_file -#puts $tool_dir +uuts "Using device_part=$device_part" +puts "Using vcs_file=$vcs_file" +puts "Using tool_dir=$tool_dir" + +# Set the number of jobs based on MAX_JOBS environment variable +if {[info exists ::env(MAX_JOBS)]} { + set num_jobs $::env(MAX_JOBS) + puts "using num_jobs=$num_jobs" + #puts $num_jobs +} else { + set num_jobs 0 +} set origin_dir [file normalize "."] @@ -394,14 +403,22 @@ add_files -norecurse -fileset sources_1 $wrapper_path update_compile_order -fileset sources_1 # Synthesis -launch_runs synth_1 +if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs +} else { + launch_runs synth_1 +} wait_on_run synth_1 open_run synth_1 write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages # Implementation -launch_runs impl_1 +if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs +} else { + launch_runs impl_1 +} wait_on_run impl_1 open_run impl_1 write_checkpoint -force post_impl.dcp diff --git a/hw/syn/xilinx/scripts/package_kernel.tcl b/hw/syn/xilinx/scripts/package_kernel.tcl index c88bca2296..ed8a683acc 100644 --- a/hw/syn/xilinx/scripts/package_kernel.tcl +++ b/hw/syn/xilinx/scripts/package_kernel.tcl @@ -22,6 +22,11 @@ set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] set build_dir [lindex $::argv 3] +puts "Using krnl_name=$krnl_name" +puts "Using vcs_file=$vcs_file" +puts "Using tool_dir=$tool_dir" +puts "Using build_dir=$build_dir" + set path_to_packaged "${build_dir}/xo/packaged_kernel" set path_to_tmp_project "${build_dir}/xo/project" From 83ea236b840ecd83e55a22e28d439f6d67edde29 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 01:58:21 -0700 Subject: [PATCH 124/407] minor update --- hw/syn/altera/dut/Makefile | 7 +------ hw/syn/xilinx/dut/Makefile | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index 924b7602b4..5f1dd62fe5 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -9,17 +9,12 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top +.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) -dogfood: - mkdir -p dogfood/$(BUILD_DIR) - cp dogfood/Makefile dogfood/$(BUILD_DIR) - $(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 & - unittest: mkdir -p unittest/$(BUILD_DIR) cp unittest/Makefile unittest/$(BUILD_DIR) diff --git a/hw/syn/xilinx/dut/Makefile b/hw/syn/xilinx/dut/Makefile index b8f67b8a57..0255287fbd 100644 --- a/hw/syn/xilinx/dut/Makefile +++ b/hw/syn/xilinx/dut/Makefile @@ -5,12 +5,7 @@ PREFIX ?= build BUILD_DIR := $(PREFIX) -.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top - -dogfood: - mkdir -p dogfood/$(BUILD_DIR) - cp dogfood/Makefile dogfood/$(BUILD_DIR) - $(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 & +.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top unittest: mkdir -p unittest/$(BUILD_DIR) From 431c0cfc46d6ac55b0550f9daeb8190ee9029a8a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 02:14:08 -0700 Subject: [PATCH 125/407] minor update --- hw/rtl/VX_socket.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 54822176da..833ba49d7c 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -106,7 +106,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITE_ENABLE (0), .NC_ENABLE (0), .CORE_OUT_BUF (3), - .MEM_OUT_BUF (0) + .MEM_OUT_BUF (2) ) icache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.icache), @@ -153,7 +153,7 @@ module VX_socket import VX_gpu_pkg::*; #( .DIRTY_BYTES (`DCACHE_WRITEBACK), .NC_ENABLE (1), .CORE_OUT_BUF (3), - .MEM_OUT_BUF (0) + .MEM_OUT_BUF (2) ) dcache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.dcache), From 72c63a47f366704acfe936c861f0613b9a361e0a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 01:19:24 -0700 Subject: [PATCH 126/407] adding read-first mode support to block ram --- hw/rtl/VX_platform.vh | 2 +- hw/rtl/core/VX_mem_unit_top.sv | 2 +- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/core/VX_split_join.sv | 3 +- hw/rtl/libs/VX_dp_ram.sv | 358 ++++++++++++++++++++++----------- hw/rtl/libs/VX_sp_ram.sv | 28 +-- hw/rtl/mem/VX_local_mem.sv | 2 +- 7 files changed, 259 insertions(+), 138 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 74907ad4c3..e15758d273 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -158,7 +158,7 @@ `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) -`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) +`define NO_RW_RAM_CHECK (* ramstyle = "no_rw_check" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) `elsif VIVADO diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv index c1acb63825..1eac9da103 100644 --- a/hw/rtl/core/VX_mem_unit_top.sv +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -120,7 +120,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE .lmem_perf (lmem_perf), `endif - .lsu_mem_in_if (lsu_mem_if), + .lsu_mem_if (lsu_mem_if), .dcache_bus_if (mem_bus_if) ); diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index d84c1a0727..3025b9dab1 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -263,8 +263,8 @@ module VX_operands import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), - .READ_ENABLE (1), .OUT_REG (1), + .READ_ENABLE (1), .WRENW (BYTEENW), `ifdef GPR_RESET .RESET_RAM (1), diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 8689d216d3..c5542e1375 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -48,7 +48,8 @@ module VX_split_join import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), - .DEPTH (`DV_STACK_SIZE) + .DEPTH (`DV_STACK_SIZE), + .OUT_REG (0) ) ipdom_stack ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 6683eaecc0..70df4f6888 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -17,13 +17,13 @@ module VX_dp_ram #( parameter DATAW = 1, parameter SIZE = 1, - parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, - parameter NO_RWCHECK = 0, parameter LUTRAM = 0, + parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, parameter RESET_RAM = 0, + parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", @@ -48,9 +48,10 @@ module VX_dp_ram #( if (INIT_FILE != "") begin \ initial $readmemh(INIT_FILE, ram); \ end else begin \ - initial \ + initial begin \ for (integer i = 0; i < SIZE; ++i) \ ram[i] = INIT_VALUE; \ + end \ end \ end @@ -61,79 +62,171 @@ module VX_dp_ram #( `RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask")); end - wire [DATAW-1:0] rdata_w; - -`ifdef SYNTHESIS - if (WRENW > 1) begin - `ifdef QUARTUS - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + if (OUT_REG && !READ_ENABLE) begin + `UNUSED_PARAM (NO_RWCHECK) + reg [DATAW-1:0] rdata_r; + wire cs = read || write; + if (WRENW != 1) begin + `ifdef QUARTUS + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + end else begin + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end end end end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; + `else + // default synthesis + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; end end end - assign rdata_w = ram[raddr]; end else begin - reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; + reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; end end end - assign rdata_w = ram[raddr]; end - end - `else - // default synthesis - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + `endif + end else begin + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) + ram[waddr] <= wdata; + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + + end else begin + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) + ram[waddr] <= wdata; + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end end end end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; + end + assign rdata = rdata_r; + end else begin + // OUT_REG==0 || READ_ENABLE=1 + wire [DATAW-1:0] rdata_w; + `ifdef SYNTHESIS + if (WRENW > 1) begin + `ifdef QUARTUS + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin for (integer i = 0; i < WRENW; ++i) begin if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end end assign rdata_w = ram[raddr]; end else begin - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; + if (NO_RWCHECK != 0) begin + `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end + end + `else + // default synthesis + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -144,23 +237,38 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end - end - `endif - end else begin - // (WRENW == 1) - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; + end else begin + if (NO_RWCHECK != 0) begin + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; end end - assign rdata_w = ram[raddr]; + `endif end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; + // (WRENW == 1) + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -169,77 +277,89 @@ module VX_dp_ram #( end assign rdata_w = ram[raddr]; end else begin - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; + if (NO_RWCHECK != 0) begin + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end end + assign rdata_w = ram[raddr]; + end else begin + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata_w = ram[raddr]; end - assign rdata_w = ram[raddr]; end end - end -`else - // simulation - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin - assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; - end + `else + // simulation + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION - reg [DATAW-1:0] prev_data; - reg [ADDRW-1:0] prev_waddr; - reg prev_write; + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin + assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; + end - always @(posedge clk) begin - if (RESET_RAM && reset) begin - for (integer i = 0; i < SIZE; ++i) begin - ram[i] <= DATAW'(INIT_VALUE); - end - end else begin - if (write) begin - ram[waddr] <= ram_n; + always @(posedge clk) begin + if (RESET_RAM && reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + end else begin + if (write) begin + ram[waddr] <= ram_n; + end end end - if (reset) begin - prev_write <= 0; - prev_data <= '0; - prev_waddr <= '0; + + if (LUTRAM || !NO_RWCHECK) begin + assign rdata_w = ram[raddr]; end else begin - prev_write <= write; - prev_data <= ram[waddr]; - prev_waddr <= waddr; - end - end + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; - if (LUTRAM || !NO_RWCHECK) begin - `UNUSED_VAR (prev_write) - `UNUSED_VAR (prev_data) - `UNUSED_VAR (prev_waddr) - assign rdata_w = ram[raddr]; - end else begin - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard")); + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[waddr]; + prev_waddr <= waddr; + end + end + + assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard")); + end end - end -`endif + `endif - if (OUT_REG != 0) begin - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (READ_ENABLE && reset) begin - rdata_r <= '0; - end else if (!READ_ENABLE || read) begin - rdata_r <= rdata_w; + if (OUT_REG != 0) begin + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (READ_ENABLE && reset) begin + rdata_r <= '0; + end else if (!READ_ENABLE || read) begin + rdata_r <= rdata_w; + end end + assign rdata = rdata_r; + end else begin + assign rdata = rdata_w; end - assign rdata = rdata_r; - end else begin - assign rdata = rdata_w; + end endmodule diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 3e73a013fd..efce4b5f2f 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -17,13 +17,13 @@ module VX_sp_ram #( parameter DATAW = 1, parameter SIZE = 1, - parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, + parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, - parameter LUTRAM = 0, parameter RESET_RAM = 0, + parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", @@ -40,20 +40,20 @@ module VX_sp_ram #( output wire [DATAW-1:0] rdata ); VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .ADDR_MIN (ADDR_MIN), - .WRENW (WRENW), - .OUT_REG (OUT_REG), + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .OUT_REG (OUT_REG), + .LUTRAM (LUTRAM), .NO_RWCHECK (NO_RWCHECK), - .RW_ASSERT (RW_ASSERT), - .LUTRAM (LUTRAM), - .RESET_RAM (RESET_RAM), - .READ_ENABLE (READ_ENABLE), - .INIT_ENABLE (INIT_ENABLE), - .INIT_FILE (INIT_FILE), + .RW_ASSERT (RW_ASSERT), + .RESET_RAM (RESET_RAM), + .RESET_OUT (RESET_OUT), + .READ_ENABLE(READ_ENABLE), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), .INIT_VALUE (INIT_VALUE), - .ADDRW (ADDRW) + .ADDRW (ADDRW) ) dp_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 2ba09fd611..462103c09f 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -168,8 +168,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), - .READ_ENABLE (1), .OUT_REG (1), + .READ_ENABLE (0), .NO_RWCHECK (1) ) data_store ( .clk (clk), From d979cf277fef9ad3f19ec7ff296a290e5b422070 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 04:00:57 -0700 Subject: [PATCH 127/407] decoder logic specialization --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/cache/VX_bank_flush.sv | 8 +++- hw/rtl/cache/VX_cache_bypass.sv | 9 ++-- hw/rtl/cache/VX_cache_data.sv | 2 +- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/libs/VX_cyclic_arbiter.sv | 12 ++++- hw/rtl/libs/VX_decoder.sv | 46 +++++++++++++++++++ .../{VX_onehot_encoder.sv => VX_encoder.sv} | 2 +- hw/rtl/libs/VX_matrix_arbiter.sv | 2 +- hw/rtl/libs/VX_mem_adapter.sv | 20 +++++++- hw/rtl/libs/VX_rr_arbiter.sv | 15 ++++-- hw/rtl/libs/VX_stream_xbar.sv | 18 ++++++-- 12 files changed, 116 insertions(+), 22 deletions(-) create mode 100644 hw/rtl/libs/VX_decoder.sv rename hw/rtl/libs/{VX_onehot_encoder.sv => VX_encoder.sv} (99%) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index cb5725e783..61465103eb 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -963,7 +963,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_TID_WIDTH-1:0] cout_tid; - VX_onehot_encoder #( + VX_encoder #( .N (`VX_MEM_BYTEEN_WIDTH) ) cout_tid_enc ( .data_in (vx_mem_req_byteen), diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 2d62e354cc..608eefa7d2 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -114,7 +114,13 @@ module VX_bank_flush #( assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin - assign flush_way = NUM_WAYS'(1) << counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; + VX_decoder #( + .N (`CS_WAY_SEL_BITS) + ) ctr_decoder ( + .shift_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .data_in (1'b1), + .data_out (flush_way) + ); end else begin assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 7992ec9e8d..a3d872d7fd 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -56,6 +56,7 @@ module VX_cache_bypass #( localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); + localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; @@ -72,7 +73,7 @@ module VX_cache_bypass #( wire core_req_nc_valid; wire [NUM_REQS-1:0] core_req_nc_valids; wire [NUM_REQS-1:0] core_req_nc_idxs; - wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; + wire [REQ_SEL_WIDTH-1:0] core_req_nc_idx; wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_ready; @@ -261,17 +262,15 @@ module VX_cache_bypass #( .data_out (mem_rsp_tag_id_nc) ); - wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; + wire [REQ_SEL_WIDTH-1:0] rsp_idx; if (NUM_REQS > 1) begin assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; end else begin assign rsp_idx = 1'b0; end - wire [NUM_REQS-1:0] rsp_nc_valid = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid[i]; + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 318463f76a..18d44b6dbf 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -140,7 +140,7 @@ module VX_cache_data #( assign line_wren = fill; end - VX_onehot_encoder #( + VX_encoder #( .N (NUM_WAYS) ) way_enc ( .data_in (way_sel), diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 4f81632699..0ca67d1598 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -148,7 +148,7 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); - VX_onehot_encoder #( + VX_encoder #( .N (MSHR_SIZE) ) prev_sel ( .data_in (addr_matches & ~next_table_x), diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index a6673c8b7e..592b7a03bb 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -41,7 +41,7 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; wire [LOG_NUM_REQS-1:0] grant_index_um; - wire [NUM_REQS-1:0] grant_onehot_um; + wire [NUM_REQS-1:0] grant_onehot_w, grant_onehot_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin @@ -65,10 +65,18 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); + VX_decoder #( + .N (LOG_NUM_REQS) + ) grant_decoder ( + .shift_in (grant_index), + .data_in (1'b1), + .data_out (grant_onehot_w) + ); + wire is_hit = requests[grant_index_r]; assign grant_index = is_hit ? grant_index_r : grant_index_um; - assign grant_onehot = is_hit ? (NUM_REQS'(1) << grant_index) : grant_onehot_um; + assign grant_onehot = is_hit ? grant_onehot_w : grant_onehot_um; end diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv new file mode 100644 index 0000000000..34a378e713 --- /dev/null +++ b/hw/rtl/libs/VX_decoder.sv @@ -0,0 +1,46 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +// Fast encoder using parallel prefix computation +// Adapted from BaseJump STL: http://bjump.org/data_out.html + +`TRACING_OFF +module VX_decoder #( + parameter N = 1, + parameter M = 1, +`ifdef VIVADO + parameter MODEL = 1, +`else + parameter MODEL = 0, +`endif + parameter D = 1 << N +) ( + input wire [N-1:0] shift_in, + input wire [M-1:0] data_in, + output wire [D-1:0][M-1:0] data_out +); + if (MODEL == 1) begin + reg [D-1:0][M-1:0] data_out_w; + always @(*) begin + data_out_w = '0; + data_out_w[shift_in] = data_in; + end + assign data_out = data_out_w; + end else begin + assign data_out = (D*M)'(data_in) << (shift_in * M); + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_onehot_encoder.sv b/hw/rtl/libs/VX_encoder.sv similarity index 99% rename from hw/rtl/libs/VX_onehot_encoder.sv rename to hw/rtl/libs/VX_encoder.sv index 6246a673c5..85d72ce52b 100644 --- a/hw/rtl/libs/VX_onehot_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -17,7 +17,7 @@ // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_onehot_encoder #( +module VX_encoder #( parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 9f0ead356e..eff4eb7e13 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -74,7 +74,7 @@ module VX_matrix_arbiter #( assign grant_onehot = grant; - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) encoder ( .data_in (grant_onehot), diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 068628be28..3e84a6292b 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -97,10 +97,26 @@ module VX_mem_adapter #( assign mem_req_addr_out_w = mem_req_addr_in_qual; end + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH/8) + ) req_be_dec ( + .shift_in (req_idx), + .data_in (mem_req_byteen_in), + .data_out (mem_req_byteen_out_w) + ); + + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH) + ) req_data_dec ( + .shift_in (req_idx), + .data_in (mem_req_data_in), + .data_out (mem_req_data_out_w) + ); + assign mem_req_valid_out_w = mem_req_valid_in; assign mem_req_rw_out_w = mem_req_rw_in; - assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3)); - assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW); assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx}); assign mem_req_ready_in = mem_req_ready_out_w; diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 5c279989ba..4b22a40046 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -448,7 +448,7 @@ module VX_rr_arbiter #( end end - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), @@ -480,9 +480,16 @@ module VX_rr_arbiter #( end end - assign grant_index = grant_table[state]; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; - assign grant_valid = (| requests); + VX_decoder #( + .N (LOG_NUM_REQS) + ) grant_decoder ( + .shift_in (grant_index), + .data_in (grant_valid), + .data_out (grant_onehot) + ); + + assign grant_index = grant_table[state]; + assign grant_valid = (| requests); end diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 3dd30bc86a..5a3b129ea2 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -72,12 +72,17 @@ module VX_stream_xbar #( ); for (genvar i = 0; i < NUM_INPUTS; ++i) begin - assign per_output_valid_in[i] = NUM_OUTPUTS'(valid_in[i]) << sel_in[i]; + VX_decoder #( + .N (OUT_WIDTH) + ) sel_in_decoder ( + .shift_in (sel_in[i]), + .data_in (valid_in[i]), + .data_out (per_output_valid_in[i]) + ); assign ready_in[i] = | per_output_ready_in_w[i]; end for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -131,8 +136,15 @@ module VX_stream_xbar #( wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + VX_decoder #( + .N (OUT_WIDTH) + ) sel_in_decoder ( + .shift_in (sel_in[0]), + .data_in (valid_in[0]), + .data_out (valid_out_w) + ); + assign ready_in[0] = ready_out_w[sel_in[0]]; - assign valid_out_w = NUM_OUTPUTS'(valid_in[0]) << sel_in[0]; assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin From 82150891944a2dcb46a12557671e23d6830da3aa Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 04:03:46 -0700 Subject: [PATCH 128/407] minor update --- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index b1f115155a..00b79ba218 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,7 +54,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); - localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * `XLEN); + localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * 32); localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) From 32636fac70c011d494da5931ed618f32794bfdcc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 10:15:02 -0700 Subject: [PATCH 129/407] minor update --- runtime/opae/vortex.cpp | 3 +-- runtime/xrt/vortex.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 390d5acc4e..970ccb12af 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -206,7 +206,6 @@ class vx_device { int get_caps(uint32_t caps_id, uint64_t * value) { uint64_t _value; - switch (caps_id) { case VX_CAPS_VERSION: _value = (dev_caps_ >> 0) & 0xff; @@ -227,7 +226,7 @@ class vx_device { _value = global_mem_size_; break; case VX_CAPS_LOCAL_MEM_SIZE: - _value = 1ull << ((dev_caps_ >> 48) & 0xff); + _value = 1ull << ((dev_caps_ >> 40) & 0xff); break; case VX_CAPS_ISA_FLAGS: _value = isa_caps_; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 8c273cf7f8..0ee9653df9 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -310,15 +310,15 @@ class vx_device { uint32_t num_banks = 1 << platform_.lg2_num_banks; uint64_t bank_size = 1ull << platform_.lg2_bank_size; - // adjust memory bank size to architecture limit + // adjust memory banks allocation to architecture limit int isa_arch = VX_ISA_ARCH(isa_caps_); if (isa_arch == 32) { uint64_t max_mem_size = 1ull << 32; - uint64_t need_bank_size = max_mem_size / num_banks; - if (bank_size > need_bank_size) { - printf("info: adjusted bank size from 0x%lx to 0x%lx bytes.\n", bank_size, need_bank_size); - bank_size = need_bank_size; - platform_.lg2_bank_size = log2ceil(bank_size); + uint32_t need_num_banks = max_mem_size / bank_size; + if (num_banks > need_num_banks) { + printf("info: adjusted number of banks from %d to %d.\n", num_banks, need_num_banks); + num_banks = need_num_banks; + platform_.lg2_num_banks = log2ceil(num_banks); } } @@ -416,7 +416,7 @@ class vx_device { _value = global_mem_size_; break; case VX_CAPS_LOCAL_MEM_SIZE: - _value = 1ull << ((dev_caps_ >> 48) & 0xff); + _value = 1ull << ((dev_caps_ >> 40) & 0xff); break; case VX_CAPS_ISA_FLAGS: _value = isa_caps_; From d7eae0c8862469e42c4d58647e720cc4b404f9bc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 02:33:30 -0700 Subject: [PATCH 130/407] minor update --- hw/rtl/VX_platform.vh | 2 +- hw/rtl/cache/VX_bank_flush.sv | 4 ++-- hw/rtl/libs/VX_cyclic_arbiter.sv | 4 ++-- hw/rtl/libs/VX_decoder.sv | 12 ++++-------- hw/rtl/libs/VX_mem_adapter.sv | 8 ++++---- hw/rtl/libs/VX_rr_arbiter.sv | 4 ++-- hw/rtl/libs/VX_stream_xbar.sv | 8 ++++---- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index e15758d273..74907ad4c3 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -158,7 +158,7 @@ `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) -`define NO_RW_RAM_CHECK (* ramstyle = "no_rw_check" *) +`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) `elsif VIVADO diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 608eefa7d2..3ceffaa6bc 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -117,8 +117,8 @@ module VX_bank_flush #( VX_decoder #( .N (`CS_WAY_SEL_BITS) ) ctr_decoder ( - .shift_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), - .data_in (1'b1), + .data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .valid_in (1'b1), .data_out (flush_way) ); end else begin diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 592b7a03bb..167042a3a8 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -68,8 +68,8 @@ module VX_cyclic_arbiter #( VX_decoder #( .N (LOG_NUM_REQS) ) grant_decoder ( - .shift_in (grant_index), - .data_in (1'b1), + .data_in (grant_index), + .valid_in (1'b1), .data_out (grant_onehot_w) ); diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 34a378e713..45b37b1dbb 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -20,26 +20,22 @@ module VX_decoder #( parameter N = 1, parameter M = 1, -`ifdef VIVADO - parameter MODEL = 1, -`else parameter MODEL = 0, -`endif parameter D = 1 << N ) ( - input wire [N-1:0] shift_in, - input wire [M-1:0] data_in, + input wire [N-1:0] data_in, + input wire [M-1:0] valid_in, output wire [D-1:0][M-1:0] data_out ); if (MODEL == 1) begin reg [D-1:0][M-1:0] data_out_w; always @(*) begin data_out_w = '0; - data_out_w[shift_in] = data_in; + data_out_w[data_in] = valid_in; end assign data_out = data_out_w; end else begin - assign data_out = (D*M)'(data_in) << (shift_in * M); + assign data_out = (D*M)'(valid_in) << (data_in * M); end endmodule diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 3e84a6292b..6ee6060b83 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -101,8 +101,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH/8) ) req_be_dec ( - .shift_in (req_idx), - .data_in (mem_req_byteen_in), + .data_in (req_idx), + .valid_in (mem_req_byteen_in), .data_out (mem_req_byteen_out_w) ); @@ -110,8 +110,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH) ) req_data_dec ( - .shift_in (req_idx), - .data_in (mem_req_data_in), + .data_in (req_idx), + .valid_in (mem_req_data_in), .data_out (mem_req_data_out_w) ); diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 4b22a40046..a2a9a9654a 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -483,8 +483,8 @@ module VX_rr_arbiter #( VX_decoder #( .N (LOG_NUM_REQS) ) grant_decoder ( - .shift_in (grant_index), - .data_in (grant_valid), + .data_in (grant_index), + .valid_in (grant_valid), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 5a3b129ea2..d1b01125f9 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -75,8 +75,8 @@ module VX_stream_xbar #( VX_decoder #( .N (OUT_WIDTH) ) sel_in_decoder ( - .shift_in (sel_in[i]), - .data_in (valid_in[i]), + .data_in (sel_in[i]), + .valid_in (valid_in[i]), .data_out (per_output_valid_in[i]) ); assign ready_in[i] = | per_output_ready_in_w[i]; @@ -139,8 +139,8 @@ module VX_stream_xbar #( VX_decoder #( .N (OUT_WIDTH) ) sel_in_decoder ( - .shift_in (sel_in[0]), - .data_in (valid_in[0]), + .data_in (sel_in[0]), + .valid_in (valid_in[0]), .data_out (valid_out_w) ); From 40e04a409e57798c07896f838c244a8018dcb436 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 02:34:08 -0700 Subject: [PATCH 131/407] adding PE switch --- hw/rtl/core/VX_alu_unit.sv | 113 ++++++++++++--------------------- hw/rtl/core/VX_pe_switch.sv | 92 +++++++++++++++++++++++++++ hw/rtl/core/VX_sfu_unit.sv | 123 +++++++++++++----------------------- 3 files changed, 174 insertions(+), 154 deletions(-) create mode 100644 hw/rtl/core/VX_pe_switch.sv diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 9b3d6deea7..120ecd5f0b 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -30,16 +30,20 @@ module VX_alu_unit #( `UNUSED_SPARAM (INSTANCE_ID) localparam BLOCK_SIZE = `NUM_ALU_BLOCKS; localparam NUM_LANES = `NUM_ALU_LANES; - localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); - localparam PID_WIDTH = `UP(PID_BITS); - localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; - localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED; localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS); + localparam PE_COUNT = 1 + `EXT_M_ENABLED; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + localparam PE_IDX_INT = 0; + localparam PE_IDX_MDV = PE_IDX_INT + `EXT_M_ENABLED; VX_execute_if #( .NUM_LANES (NUM_LANES) ) per_block_execute_if[BLOCK_SIZE](); + VX_commit_if #( + .NUM_LANES (NUM_LANES) + ) per_block_commit_if[BLOCK_SIZE](); + VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), @@ -51,26 +55,41 @@ module VX_alu_unit #( .execute_if (per_block_execute_if) ); - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV); - VX_execute_if #( .NUM_LANES (NUM_LANES) - ) int_execute_if(); + ) pe_execute_if[PE_COUNT](); - VX_commit_if #( + VX_commit_if#( .NUM_LANES (NUM_LANES) - ) int_commit_if(); - - assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op; - assign int_execute_if.data = per_block_execute_if[block_idx].data; + ) pe_commit_if[PE_COUNT](); + + reg [PE_SEL_BITS-1:0] pe_select; + always @(*) begin + if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV)) + pe_select = PE_IDX_MDV; + else + pe_select = PE_IDX_INT; + end + + VX_pe_switch #( + .PE_COUNT (PE_COUNT), + .NUM_LANES (NUM_LANES), + .ARBITER ("R"), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (PARTIAL_BW ? 1 : 3) + ) pe_switch ( + .clk (clk), + .reset (block_reset), + .pe_sel (pe_select), + .execute_in_if (per_block_execute_if[block_idx]), + .commit_out_if (per_block_commit_if[block_idx]), + .execute_out_if (pe_execute_if), + .commit_in_if (pe_commit_if) + ); VX_alu_int #( .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)), @@ -79,76 +98,22 @@ module VX_alu_unit #( ) alu_int ( .clk (clk), .reset (block_reset), - .execute_if (int_execute_if), + .execute_if (pe_execute_if[PE_IDX_INT]), .branch_ctl_if (branch_ctl_if[block_idx]), - .commit_if (int_commit_if) + .commit_if (pe_commit_if[PE_IDX_INT]) ); `ifdef EXT_M_ENABLE - - VX_execute_if #( - .NUM_LANES (NUM_LANES) - ) muldiv_execute_if(); - - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) muldiv_commit_if(); - - assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op; - assign muldiv_execute_if.data = per_block_execute_if[block_idx].data; - VX_alu_muldiv #( .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)), .NUM_LANES (NUM_LANES) ) muldiv_unit ( .clk (clk), .reset (block_reset), - .execute_if (muldiv_execute_if), - .commit_if (muldiv_commit_if) + .execute_if (pe_execute_if[PE_IDX_MDV]), + .commit_if (pe_commit_if[PE_IDX_MDV]) ); - `endif - - // can accept new request? - assign per_block_execute_if[block_idx].ready = - `ifdef EXT_M_ENABLE - is_muldiv_op ? muldiv_execute_if.ready : - `endif - int_execute_if.ready; - - // send response - - VX_stream_arb #( - .NUM_INPUTS (RSP_ARB_SIZE), - .DATAW (RSP_ARB_DATAW), - .OUT_BUF (PARTIAL_BW ? 1 : 3), - .ARBITER ("R") - ) rsp_arb ( - .clk (clk), - .reset (block_reset), - .valid_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.valid, - `endif - int_commit_if.valid - }), - .ready_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.ready, - `endif - int_commit_if.ready - }), - .data_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.data, - `endif - int_commit_if.data - }), - .data_out (per_block_commit_if[block_idx].data), - .valid_out (per_block_commit_if[block_idx].valid), - .ready_out (per_block_commit_if[block_idx].ready), - `UNUSED_PIN (sel_out) - ); end VX_gather_unit #( diff --git a/hw/rtl/core/VX_pe_switch.sv b/hw/rtl/core/VX_pe_switch.sv new file mode 100644 index 0000000000..9c8d7a7b3d --- /dev/null +++ b/hw/rtl/core/VX_pe_switch.sv @@ -0,0 +1,92 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_pe_switch import VX_gpu_pkg::*; #( + parameter PE_COUNT = 0, + parameter NUM_LANES = 0, + parameter REQ_OUT_BUF = 0, + parameter RSP_OUT_BUF = 0, + parameter `STRING ARBITER = "R" +) ( + input wire clk, + input wire reset, + input wire [PE_SEL_BITS-1:0] pe_sel, + VX_execute_if.slave execute_in_if, + VX_commit_if.master commit_out_if, + VX_execute_if.master execute_out_if[PE_COUNT], + VX_commit_if .slave commit_in_if[PE_COUNT] +); + localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); + localparam PID_WIDTH = `UP(PID_BITS); + localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1; + localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + + wire [PE_COUNT-1:0] pe_req_valid; + wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data; + wire [PE_COUNT-1:0] pe_req_ready; + + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_OUTPUTS (PE_COUNT), + .OUT_BUF (REQ_OUT_BUF) + ) req_switch ( + .clk (clk), + .reset (reset), + .sel_in (pe_sel), + .valid_in (execute_in_if.valid), + .ready_in (execute_in_if.ready), + .data_in (execute_in_if.data), + .data_out (pe_req_data), + .valid_out (pe_req_valid), + .ready_out (pe_req_ready) + ); + + for (genvar i = 0; i < PE_COUNT; ++i) begin + assign execute_out_if[i].valid = pe_req_valid[i]; + assign execute_out_if[i].data = pe_req_data[i]; + assign pe_req_ready[i] = execute_out_if[i].ready; + end + + /////////////////////////////////////////////////////////////////////////// + + wire [PE_COUNT-1:0] pe_rsp_valid; + wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data; + wire [PE_COUNT-1:0] pe_rsp_ready; + + for (genvar i = 0; i < PE_COUNT; ++i) begin + assign pe_rsp_valid[i] = commit_in_if[i].valid; + assign pe_rsp_data[i] = commit_in_if[i].data; + assign commit_in_if[i].ready = pe_rsp_ready[i]; + end + + VX_stream_arb #( + .NUM_INPUTS (PE_COUNT), + .DATAW (RSP_DATAW), + .ARBITER (ARBITER), + .OUT_BUF (RSP_OUT_BUF) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (pe_rsp_valid), + .ready_in (pe_rsp_ready), + .data_in (pe_rsp_data), + .data_out (commit_out_if.data), + .valid_out (commit_out_if.valid), + .ready_out (commit_out_if.ready), + `UNUSED_PIN (sel_out) + ); + +endmodule diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index a77520866d..de0ce9fc42 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -41,20 +41,21 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( VX_warp_ctl_if.master warp_ctl_if ); `UNUSED_SPARAM (INSTANCE_ID) - localparam BLOCK_SIZE = 1; - localparam NUM_LANES = `NUM_SFU_LANES; - localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); - localparam PID_WIDTH = `UP(PID_BITS); - - localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1; - localparam RSP_ARB_SIZE = 1 + 1; - localparam RSP_ARB_IDX_WCTL = 0; - localparam RSP_ARB_IDX_CSRS = 1; + localparam BLOCK_SIZE = 1; + localparam NUM_LANES = `NUM_SFU_LANES; + localparam PE_COUNT = 2; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + localparam PE_IDX_WCTL = 0; + localparam PE_IDX_CSRS = 1; VX_execute_if #( .NUM_LANES (NUM_LANES) ) per_block_execute_if[BLOCK_SIZE](); + VX_commit_if #( + .NUM_LANES (NUM_LANES) + ) per_block_commit_if[BLOCK_SIZE](); + VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), @@ -66,20 +67,37 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .execute_if (per_block_execute_if) ); - wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in; - wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in; - wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in; - - // Warp control block VX_execute_if #( .NUM_LANES (NUM_LANES) - ) wctl_execute_if(); + ) pe_execute_if[PE_COUNT](); + VX_commit_if#( .NUM_LANES (NUM_LANES) - ) wctl_commit_if(); + ) pe_commit_if[PE_COUNT](); + + reg [PE_SEL_BITS-1:0] pe_select; + always @(*) begin + if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type)) + pe_select = PE_IDX_CSRS; + else + pe_select = PE_IDX_WCTL; + end - assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type); - assign wctl_execute_if.data = per_block_execute_if[0].data; + VX_pe_switch #( + .PE_COUNT (PE_COUNT), + .NUM_LANES (NUM_LANES), + .ARBITER ("R"), + .REQ_OUT_BUF(0), + .RSP_OUT_BUF(3) + ) pe_switch ( + .clk (clk), + .reset (reset), + .pe_sel (pe_select), + .execute_in_if (per_block_execute_if[0]), + .commit_out_if (per_block_commit_if[0]), + .execute_out_if (pe_execute_if), + .commit_in_if (pe_commit_if) + ); `RESET_RELAY (wctl_reset, reset); @@ -89,26 +107,11 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( ) wctl_unit ( .clk (clk), .reset (wctl_reset), - .execute_if (wctl_execute_if), + .execute_if (pe_execute_if[PE_IDX_WCTL]), .warp_ctl_if(warp_ctl_if), - .commit_if (wctl_commit_if) + .commit_if (pe_commit_if[PE_IDX_WCTL]) ); - assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid; - assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data; - assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL]; - - // CSR unit - VX_execute_if #( - .NUM_LANES (NUM_LANES) - ) csr_execute_if(); - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) csr_commit_if(); - - assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type); - assign csr_execute_if.data = per_block_execute_if[0].data; - `RESET_RELAY (csr_reset, reset); VX_csr_unit #( @@ -120,7 +123,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .reset (csr_reset), .base_dcrs (base_dcrs), - .execute_if (csr_execute_if), + .execute_if (pe_execute_if[PE_IDX_CSRS]), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_if), @@ -133,47 +136,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .sched_csr_if (sched_csr_if), .commit_csr_if (commit_csr_if), - .commit_if (csr_commit_if) - ); - - assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid; - assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data; - assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSRS]; - - // can accept new request? - - reg sfu_req_ready; - always @(*) begin - case (per_block_execute_if[0].data.op_type) - `INST_SFU_CSRRW, - `INST_SFU_CSRRS, - `INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready; - default: sfu_req_ready = wctl_execute_if.ready; - endcase - end - assign per_block_execute_if[0].ready = sfu_req_ready; - - // response arbitration - - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) arb_commit_if[BLOCK_SIZE](); - - VX_stream_arb #( - .NUM_INPUTS (RSP_ARB_SIZE), - .DATAW (RSP_ARB_DATAW), - .ARBITER ("R"), - .OUT_BUF (3) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in (rsp_arb_valid_in), - .ready_in (rsp_arb_ready_in), - .data_in (rsp_arb_data_in), - .data_out (arb_commit_if[0].data), - .valid_out (arb_commit_if[0].valid), - .ready_out (arb_commit_if[0].ready), - `UNUSED_PIN (sel_out) + .commit_if (pe_commit_if[PE_IDX_CSRS]) ); VX_gather_unit #( @@ -181,9 +144,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES), .OUT_BUF (3) ) gather_unit ( - .clk (clk), - .reset (reset), - .commit_in_if (arb_commit_if), + .clk (clk), + .reset (reset), + .commit_in_if (per_block_commit_if), .commit_out_if (commit_if) ); From a17580375bcc8f82c01376a11be98823b3399565 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 03:11:26 -0700 Subject: [PATCH 132/407] fpu timing optimization --- hw/rtl/fpu/VX_fpu_cvt.sv | 2 +- hw/rtl/fpu/VX_fpu_div.sv | 4 ++-- hw/rtl/fpu/VX_fpu_dsp.sv | 4 +--- hw/rtl/fpu/VX_fpu_fma.sv | 4 ++-- hw/rtl/fpu/VX_fpu_ncp.sv | 2 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 4 ++-- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 94dee73160..b622f5153f 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 79b91a1f5a..b6cfeb632c 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -67,8 +67,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(2*32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .PE_REG (0), + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 00b79ba218..1a6e944e2a 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -111,9 +111,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( VX_stream_switch #( .DATAW (REQ_DATAW), - .NUM_INPUTS (1), - .NUM_OUTPUTS (NUM_FPCORES), - .OUT_BUF (0) + .NUM_OUTPUTS (NUM_FPCORES) ) req_switch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 3095846c1c..30939be45d 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -98,8 +98,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(3*32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .PE_REG (1), // must be registered for DSPs + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 52b2979b6d..cccc09b978 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index f6c542fc38..0ca7a02dfa 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -61,8 +61,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .PE_REG (0), + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), From 33bec667c2c1214ddda910fd1d6e3f69eff20e63 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 04:12:58 -0700 Subject: [PATCH 133/407] minor update --- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_pe_switch.sv | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 120ecd5f0b..c853a5d60b 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -67,7 +67,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) pe_commit_if[PE_COUNT](); - reg [PE_SEL_BITS-1:0] pe_select; + reg [`UP(PE_SEL_BITS)-1:0] pe_select; always @(*) begin if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV)) pe_select = PE_IDX_MDV; diff --git a/hw/rtl/core/VX_pe_switch.sv b/hw/rtl/core/VX_pe_switch.sv index 9c8d7a7b3d..384fce3292 100644 --- a/hw/rtl/core/VX_pe_switch.sv +++ b/hw/rtl/core/VX_pe_switch.sv @@ -18,11 +18,12 @@ module VX_pe_switch import VX_gpu_pkg::*; #( parameter NUM_LANES = 0, parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0, - parameter `STRING ARBITER = "R" + parameter `STRING ARBITER = "R", + parameter PE_SEL_BITS = `CLOG2(PE_COUNT) ) ( input wire clk, input wire reset, - input wire [PE_SEL_BITS-1:0] pe_sel, + input wire [`UP(PE_SEL_BITS)-1:0] pe_sel, VX_execute_if.slave execute_in_if, VX_commit_if.master commit_out_if, VX_execute_if.master execute_out_if[PE_COUNT], @@ -32,7 +33,6 @@ module VX_pe_switch import VX_gpu_pkg::*; #( localparam PID_WIDTH = `UP(PID_BITS); localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1; localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; - localparam PE_SEL_BITS = `CLOG2(PE_COUNT); wire [PE_COUNT-1:0] pe_req_valid; wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data; From d16aee3ecd75f6636852a478856fa97f92426ba2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 10:37:51 -0700 Subject: [PATCH 134/407] minor update --- hw/rtl/core/VX_alu_unit.sv | 3 +-- hw/rtl/core/VX_sfu_unit.sv | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index c853a5d60b..f3e0b19e7f 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -69,10 +69,9 @@ module VX_alu_unit #( reg [`UP(PE_SEL_BITS)-1:0] pe_select; always @(*) begin + pe_select = PE_IDX_INT; if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV)) pe_select = PE_IDX_MDV; - else - pe_select = PE_IDX_INT; end VX_pe_switch #( diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index de0ce9fc42..93686ca557 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -77,10 +77,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( reg [PE_SEL_BITS-1:0] pe_select; always @(*) begin + pe_select = PE_IDX_WCTL; if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type)) pe_select = PE_IDX_CSRS; - else - pe_select = PE_IDX_WCTL; end VX_pe_switch #( From 45ed8abf22657e4750f5d48ff5d19de6fbd2f36c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 19:39:28 -0700 Subject: [PATCH 135/407] minor update --- hw/rtl/libs/VX_stream_xbar.sv | 16 ++++++++-------- third_party/Makefile | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index d1b01125f9..f2d9aa856e 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -63,14 +63,6 @@ module VX_stream_xbar #( .data_out (per_output_ready_in_w) ); - VX_transpose #( - .N (NUM_INPUTS), - .M (NUM_OUTPUTS) - ) val_in_transpose ( - .data_in (per_output_valid_in), - .data_out (per_output_valid_in_w) - ); - for (genvar i = 0; i < NUM_INPUTS; ++i) begin VX_decoder #( .N (OUT_WIDTH) @@ -82,6 +74,14 @@ module VX_stream_xbar #( assign ready_in[i] = | per_output_ready_in_w[i]; end + VX_transpose #( + .N (NUM_INPUTS), + .M (NUM_OUTPUTS) + ) val_in_transpose ( + .data_in (per_output_valid_in), + .data_out (per_output_valid_in_w) + ); + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), diff --git a/third_party/Makefile b/third_party/Makefile index a2f74264e3..24905e58c1 100644 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -1,6 +1,6 @@ -all: fpnew softfloat ramulator +all: cvfpu softfloat ramulator -fpnew: +cvfpu: softfloat: SPECIALIZE_TYPE=RISCV SOFTFLOAT_OPTS="-fPIC -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 -DSOFTFLOAT_FAST_DIV64TO32" $(MAKE) -C softfloat/build/Linux-x86_64-GCC @@ -13,4 +13,4 @@ clean: $(MAKE) -C softfloat/build/Linux-x86_64-GCC clean rm -rf ramulator/build ramulator/libramulator.so -.PHONY: all fpnew softfloat ramulator \ No newline at end of file +.PHONY: all cvfpu softfloat ramulator \ No newline at end of file From c28449f51500388515c7458f8aa60f2dcba99651 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 21:58:12 -0700 Subject: [PATCH 136/407] minor update --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d4ed68a590..5f61d06fdf 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md). - [LLVM](https://llvm.org/) - [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain) - [Verilator](https://www.veripool.org/verilator) -- [FpNew](https://github.com/pulp-platform/fpnew.git) +- [cvfpu](https://github.com/openhwgroup/cvfpu.git) - [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git) - [Ramulator](https://github.com/CMU-SAFARI/ramulator.git) - [Yosys](https://github.com/YosysHQ/yosys) From 19d614202327e5b4f610911f82e2ea293a6b1cb7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 3 Sep 2024 04:54:29 -0700 Subject: [PATCH 137/407] fixed fpu serialization --- ci/regression.sh.in | 27 +++++++++------------------ hw/rtl/fpu/VX_fpu_cvt.sv | 24 +++++++++++++++++------- hw/rtl/fpu/VX_fpu_div.sv | 8 +++++--- hw/rtl/fpu/VX_fpu_fma.sv | 29 ++++++++++++++++------------- hw/rtl/fpu/VX_fpu_ncp.sv | 16 ++++++++++------ hw/rtl/fpu/VX_fpu_sqrt.sv | 6 ++++-- hw/rtl/libs/VX_pe_serializer.sv | 12 +----------- 7 files changed, 62 insertions(+), 60 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index aee991cd4a..32e479c1e2 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -41,31 +41,23 @@ isa() make -C tests/riscv/isa run-simx make -C tests/riscv/isa run-rtlsim - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f if [ "$XLEN" == "64" ] then - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64d + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64d + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64f + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64fx + make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx fi # clean build @@ -257,8 +249,7 @@ config2() make -C tests/regression/dogfood clean-kernel # disabling M & F extensions - make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32i + make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i make -C sim/rtlsim clean # disabling ZICOND extension diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index b622f5153f..5f9dc944c5 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -46,21 +46,29 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][31:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; + for (genvar i = 0; i < NUM_LANES; ++i) begin + assign data_in[i][0 +: 32] = dataa[i]; + assign data_in[i][32 +: `INST_FRM_BITS] = frm; + assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof; + assign data_in[i][32 + `INST_FRM_BITS + 1 +: 1] = is_signed; + end + VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FCVT), - .DATA_IN_WIDTH(32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), @@ -69,7 +77,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (valid_in), - .data_in (dataa), + .data_in (data_in), .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), @@ -81,6 +89,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -94,9 +104,9 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .enable (pe_enable), - .frm (frm), - .is_itof (is_itof), - .is_signed (is_signed), + .frm (pe_data_in[0][32 +: `INST_FRM_BITS]), + .is_itof (pe_data_in[0][32 + `INST_FRM_BITS +: 1]), + .is_signed (pe_data_in[0][32 + `INST_FRM_BITS + 1 +: 1]), .dataa (pe_data_in[i][0 +: 32]), .result (pe_data_out[i][0 +: 32]), .fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS]) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index b6cfeb632c..6108b25801 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -46,13 +46,15 @@ module VX_fpu_div import VX_fpu_pkg::*; #( ); `UNUSED_VAR (frm) - wire [NUM_LANES-1:0][2*32-1:0] data_in; + localparam DATAW = 2 * 32; + + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][2*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; for (genvar i = 0; i < NUM_LANES; ++i) begin @@ -64,7 +66,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FDIV), - .DATA_IN_WIDTH(2*32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 30939be45d..1bcc5d0089 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -49,15 +49,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 3 * 32 + `INST_FRM_BITS; - wire [NUM_LANES-1:0][3*32-1:0] data_in; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][3*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; reg [NUM_LANES-1:0][31:0] a, b, c; @@ -66,9 +66,9 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( always @(*) begin if (is_madd) begin // MADD / MSUB / NMADD / NMSUB - a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i]; + a[i] = {is_neg ^ dataa[i][31], dataa[i][30:0]}; b[i] = datab[i]; - c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i]; + c[i] = {is_neg ^ is_sub ^ datac[i][31], datac[i][30:0]}; end else begin if (is_neg) begin // MUL @@ -77,9 +77,9 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( c[i] = '0; end else begin // ADD / SUB - a[i] = 32'h3f800000; // 1.0f - b[i] = dataa[i]; - c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i]; + a[i] = dataa[i]; + b[i] = 32'h3f800000; // 1.0f + c[i] = {is_sub ^ datab[i][31], datab[i][30:0]}; end end end @@ -89,13 +89,14 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( assign data_in[i][0 +: 32] = a[i]; assign data_in[i][32 +: 32] = b[i]; assign data_in[i][64 +: 32] = c[i]; + assign data_in[i][96 +: `INST_FRM_BITS] = frm; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FMA), - .DATA_IN_WIDTH(3*32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (1), // must be registered for DSPs @@ -116,6 +117,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -177,10 +180,10 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( dpi_fmadd ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, - {32'hffffffff, pe_data_in[i][32 +: 32]}, - {32'hffffffff, pe_data_in[i][64 +: 32]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + {32'hffffffff, pe_data_in[i][32 +: 32]}, // b + {32'hffffffff, pe_data_in[i][64 +: 32]}, // c + pe_data_in[0][96 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index cccc09b978..3728c2932e 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -45,27 +45,29 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS; - wire [NUM_LANES-1:0][2*32-1:0] data_in; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][2*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; for (genvar i = 0; i < NUM_LANES; ++i) begin assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; + assign data_in[i][64 +: `INST_FRM_BITS] = frm; + assign data_in[i][64 + `INST_FRM_BITS +: `INST_FPU_BITS] = op_type; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FNCP), - .DATA_IN_WIDTH(2*32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), @@ -86,6 +88,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -99,8 +103,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .enable (pe_enable), - .frm (frm), - .op_type (op_type), + .frm (pe_data_in[0][64 +: `INST_FRM_BITS]), + .op_type (pe_data_in[0][64 + `INST_FRM_BITS +: `INST_FPU_BITS]), .dataa (pe_data_in[i][0 +: 32]), .datab (pe_data_in[i][32 +: 32]), .result (pe_data_out[i][0 +: 32]), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 0ca7a02dfa..c9d97af7f8 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -46,19 +46,21 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `UNUSED_VAR (frm) + localparam DATAW = 32; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][31:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FSQRT), - .DATA_IN_WIDTH(32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 2f9c83483b..d96db52f02 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -77,17 +77,7 @@ module VX_pe_serializer #( .data_out (pe_data_out) ); - VX_pipe_register #( - .DATAW (1), - .RESETW (1), - .DEPTH (PE_REG) - ) pe_en_reg ( - .clk (clk), - .reset (reset), - .enable (1'b1), - .data_in (enable), - .data_out (pe_enable) - ); + assign pe_enable = enable; if (NUM_LANES != NUM_PES) begin From f9230bdac3f0095974b3876936259721991f4ec7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 3 Sep 2024 06:14:09 -0700 Subject: [PATCH 138/407] minor update --- hw/rtl/fpu/VX_fpu_cvt.sv | 5 +++-- hw/rtl/fpu/VX_fpu_div.sv | 18 ++++++++++-------- hw/rtl/fpu/VX_fpu_fma.sv | 5 +++-- hw/rtl/fpu/VX_fpu_ncp.sv | 5 +++-- hw/rtl/fpu/VX_fpu_sqrt.sv | 22 ++++++++++++++-------- 5 files changed, 33 insertions(+), 22 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 5f9dc944c5..b3d1e099a1 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -49,6 +49,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; @@ -68,8 +69,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FCVT), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 6108b25801..1a1da2758d 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -44,11 +44,10 @@ module VX_fpu_div import VX_fpu_pkg::*; #( output wire valid_out, input wire ready_out ); - `UNUSED_VAR (frm) - - localparam DATAW = 2 * 32; + localparam DATAW = 2 * 32 + `INST_FRM_BITS; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; @@ -60,14 +59,15 @@ module VX_fpu_div import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; + assign data_in[i][64 +: `INST_FRM_BITS] = frm; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FDIV), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) @@ -87,6 +87,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -145,9 +147,9 @@ module VX_fpu_div import VX_fpu_pkg::*; #( dpi_fdiv ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, - {32'hffffffff, pe_data_in[i][32 +: 32]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + {32'hffffffff, pe_data_in[i][32 +: 32]}, // b + pe_data_in[0][64 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 1bcc5d0089..ce09830d02 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -52,6 +52,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( localparam DATAW = 3 * 32 + `INST_FRM_BITS; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; @@ -96,8 +97,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FMA), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (1), // must be registered for DSPs .OUT_BUF (2) diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 3728c2932e..e39af42963 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -48,6 +48,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; @@ -67,8 +68,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FNCP), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index c9d97af7f8..557e21f203 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -43,10 +43,9 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); + localparam DATAW = 32 + `INST_FRM_BITS; - `UNUSED_VAR (frm) - - localparam DATAW = 32; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; @@ -56,12 +55,17 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; + for (genvar i = 0; i < NUM_LANES; ++i) begin + assign data_in[i][0 +: 32] = dataa[i]; + assign data_in[i][32 +: `INST_FRM_BITS] = frm; + end + VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FSQRT), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) @@ -69,7 +73,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (valid_in), - .data_in (dataa), + .data_in (data_in), .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), @@ -81,6 +85,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -137,8 +143,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( dpi_fsqrt ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + pe_data_in[0][32 +: `INST_FRM_BITS], // frm r, f ); From 335b53475a4cd37151d3e06884445f556fa7bc69 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 02:01:59 -0700 Subject: [PATCH 139/407] minor updates --- hw/rtl/fpu/VX_fpu_dsp.sv | 208 +++++++++++++++++++++----------- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 12 +- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- hw/rtl/libs/VX_stream_buffer.sv | 6 +- hw/rtl/libs/VX_stream_unpack.sv | 12 +- hw/rtl/mem/VX_local_mem.sv | 2 +- 7 files changed, 152 insertions(+), 92 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 1a6e944e2a..c12c82d87a 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -61,6 +61,8 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire [NUM_FPCORES-1:0] per_core_valid_in; wire [NUM_FPCORES-1:0][REQ_DATAW-1:0] per_core_data_in; + wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0] per_core_mask_in; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_in; wire [NUM_FPCORES-1:0][`INST_FPU_BITS-1:0] per_core_op_type; @@ -69,28 +71,13 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_dataa; wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datab; wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datac; - wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0] per_core_valid_out; wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; - wire [NUM_FPCORES-1:0] per_core_ready_out; - wire [NUM_FPCORES-1:0] per_core_valid_out; wire [NUM_FPCORES-1:0] per_core_has_fflags; fflags_t [NUM_FPCORES-1:0] per_core_fflags; - - wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result; - wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; - wire [1:0] div_sqrt_ready_out; - wire [1:0] div_sqrt_valid_out; - wire [1:0] div_sqrt_has_fflags; - fflags_t [1:0] div_sqrt_fflags; - - `RESET_RELAY (fma_reset, reset); - `RESET_RELAY (div_reset, reset); - `RESET_RELAY (sqrt_reset, reset); - `RESET_RELAY (cvt_reset, reset); - `RESET_RELAY (ncp_reset, reset); + wire [NUM_FPCORES-1:0] per_core_ready_out; wire [NUM_LANES-1:0][31:0] dataa_s; wire [NUM_LANES-1:0][31:0] datab_s; @@ -118,7 +105,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .sel_in (core_select), .valid_in (valid_in), .ready_in (ready_in), - .data_in ({mask_in, tag_in, op_type, fmt, frm, dataa_s, datab_s, datac_s}), + .data_in ({mask_in, tag_in, fmt, frm, dataa_s, datab_s, datac_s, op_type}), .data_out (per_core_data_in), .valid_out (per_core_valid_in), .ready_out (per_core_ready_in) @@ -128,21 +115,23 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( assign { per_core_mask_in[i], per_core_tag_in[i], - per_core_op_type[i], per_core_fmt[i], per_core_frm[i], per_core_dataa[i], per_core_datab[i], - per_core_datac[i] + per_core_datac[i], + per_core_op_type[i] } = per_core_data_in[i]; end - // FMA core + // FMA core /////////////////////////////////////////////////////////////// wire is_madd = per_core_op_type[FPU_FMA][1]; wire is_neg = per_core_op_type[FPU_FMA][0]; wire is_sub = per_core_fmt[FPU_FMA][1]; + `RESET_RELAY (fma_reset, reset); + VX_fpu_fma #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) @@ -168,24 +157,95 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .valid_out (per_core_valid_out[FPU_FMA]) ); - // Div/Sqrt cores + // Div/Sqrt cores ///////////////////////////////////////////////////////// + + wire [1:0] div_sqrt_valid_in; + wire [1:0][REQ_DATAW-1:0] div_sqrt_data_in; + wire [1:0] div_sqrt_ready_in; + + wire [1:0][NUM_LANES-1:0] div_sqrt_mask_in; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_in; + wire [1:0][`INST_FPU_BITS-1:0] div_sqrt_op_type; + wire [1:0][`INST_FMT_BITS-1:0] div_sqrt_fmt; + wire [1:0][`INST_FRM_BITS-1:0] div_sqrt_frm; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_dataa; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datab; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datac; + + wire [1:0] div_sqrt_valid_out; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; + wire [1:0] div_sqrt_has_fflags; + fflags_t [1:0] div_sqrt_fflags; + wire [1:0] div_sqrt_ready_out; + + wire div_sqrt_valid_tmp_in; + wire [REQ_DATAW-1:0] div_sqrt_data_tmp_in; + wire div_sqrt_ready_tmp_in; + + VX_elastic_buffer #( + .DATAW (REQ_DATAW) + ) div_sqrt_req_buffer ( + .clk (clk), + .reset (reset), + .valid_in (per_core_valid_in[FPU_DIVSQRT]), + .ready_in (per_core_ready_in[FPU_DIVSQRT]), + .data_in (per_core_data_in[FPU_DIVSQRT]), + .data_out (div_sqrt_data_tmp_in), + .valid_out (div_sqrt_valid_tmp_in), + .ready_out (div_sqrt_ready_tmp_in) + ); + + wire is_sqrt = div_sqrt_data_tmp_in[0]; // op_type[0] - wire is_sqrt = per_core_op_type[FPU_DIVSQRT][0]; - assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_OUTPUTS (2) + ) div_sqrt_req_switch ( + .clk (clk), + .reset (reset), + .sel_in (is_sqrt), + .valid_in (div_sqrt_valid_tmp_in), + .ready_in (div_sqrt_ready_tmp_in), + .data_in (div_sqrt_data_tmp_in), + .data_out (div_sqrt_data_in), + .valid_out (div_sqrt_valid_in), + .ready_out (div_sqrt_ready_in) + ); + + for (genvar i = 0; i < 2; ++i) begin + assign { + div_sqrt_mask_in[i], + div_sqrt_tag_in[i], + div_sqrt_fmt[i], + div_sqrt_frm[i], + div_sqrt_dataa[i], + div_sqrt_datab[i], + div_sqrt_datac[i], + div_sqrt_op_type[i] + } = div_sqrt_data_in[i]; + end + + `UNUSED_VAR (div_sqrt_op_type) + `UNUSED_VAR (div_sqrt_fmt) + `UNUSED_VAR (div_sqrt_datab) + `UNUSED_VAR (div_sqrt_datac) + + `RESET_RELAY (div_sqrt_reset, reset); VX_fpu_div #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_div ( .clk (clk), - .reset (div_reset), - .valid_in (per_core_valid_in[FPU_DIVSQRT] && ~is_sqrt), + .reset (div_sqrt_reset), + .valid_in (div_sqrt_valid_in[0]), .ready_in (div_sqrt_ready_in[0]), - .mask_in (per_core_mask_in[FPU_DIVSQRT]), - .tag_in (per_core_tag_in[FPU_DIVSQRT]), - .frm (per_core_frm[FPU_DIVSQRT]), - .dataa (per_core_dataa[FPU_DIVSQRT]), - .datab (per_core_datab[FPU_DIVSQRT]), + .mask_in (div_sqrt_mask_in[0]), + .tag_in (div_sqrt_tag_in[0]), + .frm (div_sqrt_frm[0]), + .dataa (div_sqrt_dataa[0]), + .datab (div_sqrt_datab[0]), .has_fflags (div_sqrt_has_fflags[0]), .fflags (div_sqrt_fflags[0]), .result (div_sqrt_result[0]), @@ -199,13 +259,13 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .TAG_WIDTH (TAG_WIDTH) ) fpu_sqrt ( .clk (clk), - .reset (sqrt_reset), - .valid_in (per_core_valid_in[FPU_DIVSQRT] && is_sqrt), + .reset (div_sqrt_reset), + .valid_in (div_sqrt_valid_in[1]), .ready_in (div_sqrt_ready_in[1]), - .mask_in (per_core_mask_in[FPU_DIVSQRT]), - .tag_in (per_core_tag_in[FPU_DIVSQRT]), - .frm (per_core_frm[FPU_DIVSQRT]), - .dataa (per_core_dataa[FPU_DIVSQRT]), + .mask_in (div_sqrt_mask_in[1]), + .tag_in (div_sqrt_tag_in[1]), + .frm (div_sqrt_frm[1]), + .dataa (div_sqrt_dataa[1]), .has_fflags (div_sqrt_has_fflags[1]), .fflags (div_sqrt_fflags[1]), .result (div_sqrt_result[1]), @@ -214,13 +274,47 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (div_sqrt_ready_out[1]) ); - // CVT core + wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; + for (genvar i = 0; i < 2; ++i) begin + assign div_sqrt_arb_data_in[i] = { + div_sqrt_result[i], + div_sqrt_has_fflags[i], + div_sqrt_fflags[i], + div_sqrt_tag_out[i] + }; + end + + VX_stream_arb #( + .NUM_INPUTS (2), + .DATAW (RSP_DATAW), + .ARBITER ("P"), + .OUT_BUF (0) + ) div_sqrt_rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (div_sqrt_valid_out), + .ready_in (div_sqrt_ready_out), + .data_in (div_sqrt_arb_data_in), + .data_out ({ + per_core_result[FPU_DIVSQRT], + per_core_has_fflags[FPU_DIVSQRT], + per_core_fflags[FPU_DIVSQRT], + per_core_tag_out[FPU_DIVSQRT] + }), + .valid_out (per_core_valid_out[FPU_DIVSQRT]), + .ready_out (per_core_ready_out[FPU_DIVSQRT]), + `UNUSED_PIN (sel_out) + ); + + // CVT core /////////////////////////////////////////////////////////////// wire is_itof = per_core_op_type[FPU_CVT][1]; wire is_signed = ~per_core_op_type[FPU_CVT][0]; wire cvt_ret_int_in = ~is_itof; wire cvt_ret_int_out; + `RESET_RELAY (cvt_reset, reset); + VX_fpu_cvt #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (1+TAG_WIDTH) @@ -243,7 +337,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (per_core_ready_out[FPU_CVT]) ); - // NCP core + // NCP core /////////////////////////////////////////////////////////////// wire ncp_ret_int_in = (per_core_op_type[FPU_NCP] == `INST_FPU_CMP) || `INST_FPU_IS_CLASS(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]) @@ -253,6 +347,8 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_sext_out; + `RESET_RELAY (ncp_reset, reset); + VX_fpu_ncp #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH+2) @@ -277,40 +373,6 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; - for (genvar i = 0; i < 2; ++i) begin - assign div_sqrt_arb_data_in[i] = { - div_sqrt_result[i], - div_sqrt_has_fflags[i], - div_sqrt_fflags[i], - div_sqrt_tag_out[i] - }; - end - - VX_stream_arb #( - .NUM_INPUTS (2), - .DATAW (RSP_DATAW), - .ARBITER ("P"), - .OUT_BUF (0) - ) div_sqrt_arb ( - .clk (clk), - .reset (reset), - .valid_in (div_sqrt_valid_out), - .ready_in (div_sqrt_ready_out), - .data_in (div_sqrt_arb_data_in), - .data_out ({ - per_core_result[FPU_DIVSQRT], - per_core_has_fflags[FPU_DIVSQRT], - per_core_fflags[FPU_DIVSQRT], - per_core_tag_out[FPU_DIVSQRT] - }), - .valid_out (per_core_valid_out[FPU_DIVSQRT]), - .ready_out (per_core_ready_out[FPU_DIVSQRT]), - `UNUSED_PIN (sel_out) - ); - - /////////////////////////////////////////////////////////////////////////// - reg [NUM_FPCORES-1:0][RSP_DATAW+2-1:0] per_core_data_out; always @(*) begin diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index ce09830d02..8ab5b10b34 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -100,7 +100,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_IN_WIDTH (DATAW), .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG (1), // must be registered for DSPs + .PE_REG (0), .OUT_BUF (2) ) pe_serializer ( .clk (clk), diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 5c283e06c2..32ad728b84 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -159,13 +159,11 @@ module VX_mem_coalescer #( req_data_merged = 'x; for (integer i = 0; i < OUT_REQS; ++i) begin for (integer j = 0; j < DATA_RATIO; ++j) begin - if (current_pmask[i * DATA_RATIO + j]) begin - for (integer k = 0; k < DATA_IN_SIZE; ++k) begin - // perform byte-level merge since each thread may have different bytes enabled - if (in_req_byteen[DATA_RATIO * i + j][k]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; - end + for (integer k = 0; k < DATA_IN_SIZE; ++k) begin + // perform byte-level merge since each thread may have different bytes enabled + if (current_pmask[i * DATA_RATIO + j] && in_req_byteen[DATA_RATIO * i + j][k]) begin + req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; end end end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index a2a9a9654a..894f4e3120 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -434,7 +434,7 @@ module VX_rr_arbiter #( wire has_masked_reqs = (| masked_reqs); wire has_unmasked_reqs = (| requests); - assign grant_onehot = ({NUM_REQS{~has_masked_reqs}} & grant_unmasked) | grant_masked; + assign grant_onehot = has_masked_reqs ? grant_masked : grant_unmasked; always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 81978b7350..5e8297f7a1 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -52,16 +52,16 @@ module VX_stream_buffer #( reg no_buffer; wire fire_in = valid_in && ready_in; - wire flow_out = ready_out || ~valid_out_r; + wire flow_out = ready_out || ~valid_out; always @(posedge clk) begin if (reset) begin valid_out_r <= 0; no_buffer <= 1; end else begin - if (ready_out) begin + if (flow_out) begin no_buffer <= 1; - end else if (valid_in && valid_out) begin + end else if (valid_in) begin no_buffer <= 0; end if (flow_out) begin diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index cb85d4804f..30e2a444f5 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -38,18 +38,18 @@ module VX_stream_unpack #( ); if (NUM_REQS > 1) begin - reg [NUM_REQS-1:0] rem_mask; + reg [NUM_REQS-1:0] rem_mask_r; wire [NUM_REQS-1:0] ready_out_w; - wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_w; - wire sent_all = ~(| (mask_in & rem_mask_n)); + wire [NUM_REQS-1:0] rem_mask_n = rem_mask_r & ~ready_out_w; + wire sent_all = (mask_in & rem_mask_n) == '0; always @(posedge clk) begin if (reset) begin - rem_mask <= '1; + rem_mask_r <= {NUM_REQS{1'b1}}; end else begin if (valid_in) begin - rem_mask <= sent_all ? '1 : rem_mask_n; + rem_mask_r <= {NUM_REQS{sent_all}} | rem_mask_n; end end end @@ -64,7 +64,7 @@ module VX_stream_unpack #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in && mask_in[i] && rem_mask[i]), + .valid_in (valid_in && mask_in[i] && rem_mask_r[i]), .ready_in (ready_out_w[i]), .data_in ({data_in[i], tag_in}), .data_out ({data_out[i], tag_out[i]}), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 462103c09f..6dbe283255 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -123,7 +123,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("C"), + .ARBITER ("P"), .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), From fd5903fef1c79165cf9102af9d9d678f51724261 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 03:34:25 -0700 Subject: [PATCH 140/407] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/syn/xilinx/dut/top/Makefile | 16 ---------------- sim/opaesim/opae_sim.cpp | 19 ++++--------------- sim/rtlsim/processor.cpp | 8 -------- sim/xrtsim/Makefile | 15 --------------- sim/xrtsim/xrt_sim.cpp | 11 ----------- 6 files changed, 5 insertions(+), 66 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 61465103eb..5dcb9a4307 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1011,7 +1011,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready; wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; - wire [$bits(t_local_mem_addr)-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr; + wire [LMEM_ADDR_WIDTH-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr; reg [STATE_WIDTH-1:0] state_prev; always @(posedge clk) begin diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index 3a06715b53..c471b78075 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -5,22 +5,6 @@ FPU_IP = 1 include ../../common.mk -# AFU parameters -CONFIGS += -DNOPAE -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -endif - #CONFIGS += -DNUM_CORES=2 #CONFIGS += -DNUM_WARPS=32 #CONFIGS += -DNUM_THREADS=32 diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 7a1bae3e4f..9cf185abf8 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,21 +35,10 @@ #include #include -//#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -//#endif - #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif -#undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) - #define CACHE_BLOCK_SIZE 64 #define CCI_LATENCY 8 @@ -419,7 +408,7 @@ class opae_sim::Impl { } void avs_bus_reset() { - for (int b = 0; b < MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { pending_mem_reqs_[b].clear(); device_->avs_readdatavalid[b] = 0; device_->avs_waitrequest[b] = 0; @@ -427,7 +416,7 @@ class opae_sim::Impl { } void avs_bus_eval() { - for (int b = 0; b < MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { // process memory responses device_->avs_readdatavalid[b] = 0; if (!pending_mem_reqs_[b].empty() @@ -443,7 +432,7 @@ class opae_sim::Impl { // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - unsigned byte_addr = (device_->avs_address[b] * MEMORY_BANKS + b) * MEM_BLOCK_SIZE; + unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * MEM_BLOCK_SIZE; if (device_->avs_write[b]) { uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); @@ -530,7 +519,7 @@ class opae_sim::Impl { std::unordered_map host_buffers_; int64_t host_buffer_ids_; - std::list pending_mem_reqs_[MEMORY_BANKS]; + std::list pending_mem_reqs_[PLATFORM_PARAM_LOCAL_MEMORY_BANKS]; std::list cci_reads_; std::list cci_writes_; diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index e5e00f49eb..d964a3d5a1 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -41,14 +41,6 @@ typedef VVortex Device; #include #include -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index e45b0bfa24..63787e5b67 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -32,21 +32,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED -# AFU parameters -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -endif - DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 880983bf10..80aed7f7da 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -33,21 +33,10 @@ #include #include -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif -#undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) - #define CACHE_BLOCK_SIZE 64 #ifndef TRACE_START_TIME From 32738e0b74e203827d3de1731e353c428bc42a28 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 03:39:29 -0700 Subject: [PATCH 141/407] CI script update --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 724ec2a138..64317337bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -106,7 +106,7 @@ jobs: make tests -s > /dev/null - name: Upload Build Artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} @@ -147,7 +147,7 @@ jobs: ${{ runner.os }}-thirdparty- - name: Download Build Artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} From 039e5e2ffce77ee473a05f14bc35fa8c486d6e23 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 03:52:55 -0700 Subject: [PATCH 142/407] minor update --- runtime/opae/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 9650915ea4..56355890db 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -25,9 +25,10 @@ SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp # set up target types ifeq ($(TARGET), opaesim) - OPAESIM = $(DESTDIR)/libopae-c-sim.so + BUILD_DEPS = $(DESTDIR)/libopae-c-sim.so CXXFLAGS += -DOPAESIM -I$(SIM_DIR)/opaesim else + BUILD_DEPS = $(ROOT_DIR)/hw/syn/altera/opae/vortex_afu.h CXXFLAGS += -I$(SYN_DIR) -I$(ROOT_DIR)/hw/syn/altera/opae endif @@ -48,12 +49,15 @@ PROJECT := libvortex-opae.so all: $(DESTDIR)/$(PROJECT) +$(ROOT_DIR)/hw/syn/altera/opae/vortex_afu.h: + $(MAKE) -C $(ROOT_DIR)/hw/syn/altera/opae swconfig + driver: $(DESTDIR)/libopae-c-sim.so $(DESTDIR)/libopae-c-sim.so: DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/opaesim $(DESTDIR)/libopae-c-sim.so -$(DESTDIR)/$(PROJECT): $(SRCS) $(OPAESIM) +$(DESTDIR)/$(PROJECT): $(SRCS) $(BUILD_DEPS) $(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@ clean-driver: From 7ca9a5e87e3b9b1a46f7a994ee56fd82d8c6b3b9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 13:39:51 -0700 Subject: [PATCH 143/407] reset relay refactory --- hw/rtl/afu/opae/vortex_afu.sv | 12 +++--------- hw/rtl/cache/VX_cache.sv | 12 +++--------- hw/rtl/cache/VX_cache_cluster.sv | 5 +---- hw/rtl/core/VX_alu_unit.sv | 8 +++----- hw/rtl/core/VX_fpu_unit.sv | 14 ++++++-------- hw/rtl/core/VX_issue.sv | 4 +--- hw/rtl/core/VX_issue_slice.sv | 15 +++++---------- hw/rtl/core/VX_lsu_slice.sv | 4 +--- hw/rtl/core/VX_lsu_unit.sv | 5 +---- hw/rtl/core/VX_mem_unit.sv | 11 +++-------- hw/rtl/core/VX_operands.sv | 8 ++------ hw/rtl/core/VX_schedule.sv | 8 ++------ hw/rtl/core/VX_sfu_unit.sv | 8 ++------ hw/rtl/fpu/VX_fpu_dsp.sv | 16 ++++------------ hw/rtl/libs/VX_stream_unpack.sv | 6 +++--- hw/rtl/mem/VX_local_mem.sv | 8 ++------ 16 files changed, 42 insertions(+), 102 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 5dcb9a4307..e5ff16483c 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -475,8 +475,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .TAG_WIDTH (AVS_REQ_TAGW) ) cci_vx_mem_bus_if[2](); - `RESET_RELAY (cci_adapter_reset, reset); - VX_mem_adapter #( .SRC_DATA_WIDTH (CCI_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), @@ -488,7 +486,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (0) ) cci_mem_adapter ( .clk (clk), - .reset (cci_adapter_reset), + .reset (reset), .mem_req_valid_in (cci_mem_req_valid), .mem_req_addr_in (cci_mem_req_addr), @@ -527,8 +525,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ assign vx_mem_req_valid_qual = vx_mem_req_valid && ~vx_mem_is_cout; - `RESET_RELAY (vx_adapter_reset, reset); - VX_mem_adapter #( .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), @@ -540,7 +536,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (2) ) vx_mem_adapter ( .clk (clk), - .reset (vx_adapter_reset), + .reset (reset), .mem_req_valid_in (vx_mem_req_valid_qual), .mem_req_addr_in (vx_mem_req_addr), @@ -595,8 +591,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ //-- - `RESET_RELAY (avs_adapter_reset, reset); - VX_avs_adapter #( .DATA_WIDTH (LMEM_DATA_WIDTH), .ADDR_WIDTH (LMEM_ADDR_WIDTH), @@ -608,7 +602,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (0) ) avs_adapter ( .clk (clk), - .reset (avs_adapter_reset), + .reset (reset), // Memory request .mem_req_valid (mem_bus_if[0].req_valid), diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 6d3e1351ea..ebb5d15193 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -319,8 +319,6 @@ module VX_cache import VX_gpu_pkg::*; #( wire [`PERF_CTR_BITS-1:0] perf_collisions; `endif - `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), @@ -330,7 +328,7 @@ module VX_cache import VX_gpu_pkg::*; #( .OUT_BUF (REQ_XBAR_BUF) ) req_xbar ( .clk (clk), - .reset (req_xbar_reset), + .reset (reset), `ifdef PERF_ENABLE .collisions(perf_collisions), `else @@ -369,8 +367,6 @@ module VX_cache import VX_gpu_pkg::*; #( assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id); end - `RESET_RELAY (bank_reset, reset); - VX_cache_bank #( .BANK_ID (bank_id), .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), @@ -392,7 +388,7 @@ module VX_cache import VX_gpu_pkg::*; #( .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) ) bank ( .clk (clk), - .reset (bank_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_read_misses (perf_read_miss_per_bank[bank_id]), @@ -455,8 +451,6 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; end - `RESET_RELAY (rsp_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), @@ -464,7 +458,7 @@ module VX_cache import VX_gpu_pkg::*; #( .ARBITER ("R") ) rsp_xbar ( .clk (clk), - .reset (rsp_xbar_reset), + .reset (reset), `UNUSED_PIN (collisions), .valid_in (per_bank_core_rsp_valid), .data_in (core_rsp_data_in), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index dbf4ffec7a..7173444ec6 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -139,9 +139,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( end for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches - - `RESET_RELAY (cache_reset, reset); - VX_cache_wrap #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), .CACHE_SIZE (CACHE_SIZE), @@ -169,7 +166,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .cache_perf (perf_cache_unit[i]), `endif .clk (clk), - .reset (cache_reset), + .reset (reset), .core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]), .mem_bus_if (cache_mem_bus_if[i]) ); diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index f3e0b19e7f..8ec044eeb4 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -57,8 +57,6 @@ module VX_alu_unit #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - VX_execute_if #( .NUM_LANES (NUM_LANES) ) pe_execute_if[PE_COUNT](); @@ -82,7 +80,7 @@ module VX_alu_unit #( .RSP_OUT_BUF (PARTIAL_BW ? 1 : 3) ) pe_switch ( .clk (clk), - .reset (block_reset), + .reset (reset), .pe_sel (pe_select), .execute_in_if (per_block_execute_if[block_idx]), .commit_out_if (per_block_commit_if[block_idx]), @@ -96,7 +94,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) alu_int ( .clk (clk), - .reset (block_reset), + .reset (reset), .execute_if (pe_execute_if[PE_IDX_INT]), .branch_ctl_if (branch_ctl_if[block_idx]), .commit_if (pe_commit_if[PE_IDX_INT]) @@ -108,7 +106,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) muldiv_unit ( .clk (clk), - .reset (block_reset), + .reset (reset), .execute_if (pe_execute_if[PE_IDX_MDV]), .commit_if (pe_commit_if[PE_IDX_MDV]) ); diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 0d7f02311e..10e5c236bf 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -57,8 +57,6 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - // Store request info wire fpu_req_valid, fpu_req_ready; wire fpu_rsp_valid, fpu_rsp_ready; @@ -89,7 +87,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .SIZE (`FPUQ_SIZE) ) tag_store ( .clk (clk), - .reset (block_reset), + .reset (reset), .acquire_en (execute_fire), .write_addr (fpu_req_tag), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), @@ -132,7 +130,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_dpi ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -161,7 +159,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_fpnew ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -190,7 +188,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_dsp ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -219,7 +217,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( if (PID_BITS != 0) begin fflags_t fpu_rsp_fflags_r; always @(posedge clk) begin - if (block_reset) begin + if (reset) begin fpu_rsp_fflags_r <= '0; end else if (fpu_rsp_fire) begin fpu_rsp_fflags_r <= fpu_rsp_eop ? '0 : (fpu_rsp_fflags_r | fpu_rsp_fflags); @@ -253,7 +251,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .SIZE (0) ) rsp_buf ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_rsp_valid), .ready_in (fpu_rsp_ready), .data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 5d5af64d94..a0f223ff5b 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -77,15 +77,13 @@ module VX_issue import VX_gpu_pkg::*; #( assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop; `endif - `RESET_RELAY_EN (slice_reset, reset, (`ISSUE_WIDTH > 1)); - VX_issue_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), .ISSUE_ID (issue_id) ) issue_slice ( `SCOPE_IO_BIND(issue_id) .clk (clk), - .reset (slice_reset), + .reset (reset), `ifdef PERF_ENABLE .issue_perf (per_issue_perf[issue_id]), `endif diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 4b4e168a29..24430a53f9 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -36,16 +36,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_scoreboard_if scoreboard_if(); VX_operands_if operands_if(); - `RESET_RELAY (ibuf_reset, reset); - `RESET_RELAY (scoreboard_reset, reset); - `RESET_RELAY (operands_reset, reset); - `RESET_RELAY (dispatch_reset, reset); - VX_ibuffer #( .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) ) ibuffer ( .clk (clk), - .reset (ibuf_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.ibf_stalls), `endif @@ -57,7 +52,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID)) ) scoreboard ( .clk (clk), - .reset (scoreboard_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.scb_stalls), .perf_units_uses(issue_perf.units_uses), @@ -72,7 +67,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID)) ) operands ( .clk (clk), - .reset (operands_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.opd_stalls), `endif @@ -85,7 +80,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID)) ) dispatch ( .clk (clk), - .reset (dispatch_reset), + .reset (reset), `ifdef PERF_ENABLE `UNUSED_PIN (perf_stalls), `endif @@ -105,7 +100,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) ) scope_tap ( .clk (clk), - .reset (scope_reset), + .reset (reset), .start (1'b0), .stop (1'b0), .triggers ({ diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 25a8223a80..49195eee6b 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -311,8 +311,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire [LSU_TAG_WIDTH-1:0] lsu_mem_rsp_tag; wire lsu_mem_rsp_ready; - `RESET_RELAY (mem_scheduler_reset, reset); - VX_mem_scheduler #( .INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)), .CORE_REQS (NUM_LANES), @@ -330,7 +328,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .CORE_OUT_BUF(0) ) mem_scheduler ( .clk (clk), - .reset (mem_scheduler_reset), + .reset (reset), // Input request .core_req_valid (mem_req_valid), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index b155ed0d73..8c594f5331 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -55,15 +55,12 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( ) per_block_commit_if[BLOCK_SIZE](); for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsus - - `RESET_RELAY_EN (slice_reset, reset, (BLOCK_SIZE > 1)); - VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( `SCOPE_IO_BIND (block_idx) .clk (clk), - .reset (slice_reset), + .reset (reset), .execute_if (per_block_execute_if[block_idx]), .commit_if (per_block_commit_if[block_idx]), .lsu_mem_if (lsu_mem_if[block_idx]) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index cd901f8ace..75f60e63cf 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -91,8 +91,6 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end end - `RESET_RELAY (lmem_reset, reset); - VX_local_mem #( .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), .SIZE (1 << `LMEM_LOG_SIZE), @@ -105,7 +103,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .OUT_BUF (3) ) local_mem ( .clk (clk), - .reset (lmem_reset), + .reset (reset), `ifdef PERF_ENABLE .lmem_perf (lmem_perf), `endif @@ -132,9 +130,6 @@ module VX_mem_unit import VX_gpu_pkg::*; #( if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescers - - `RESET_RELAY (mem_coalescer_reset, reset); - VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), .NUM_REQS (`NUM_LSU_LANES), @@ -146,8 +141,8 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .UUID_WIDTH (`UUID_WIDTH), .QUEUE_SIZE (`LSUQ_OUT_SIZE) ) mem_coalescer ( - .clk (clk), - .reset (mem_coalescer_reset), + .clk (clk), + .reset (reset), // Input request .in_req_valid (lsu_dcache_if[i].req_valid), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 3025b9dab1..a88522ee72 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -99,8 +99,6 @@ module VX_operands import VX_gpu_pkg::*; #( assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; - `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_SRC_OPDS), .NUM_OUTPUTS (NUM_BANKS), @@ -110,7 +108,7 @@ module VX_operands import VX_gpu_pkg::*; #( .OUT_BUF (0) // no output buffering ) req_xbar ( .clk (clk), - .reset (req_xbar_reset), + .reset (reset), `UNUSED_PIN(collisions), .valid_in (req_in_valid), .data_in (req_in_data), @@ -179,14 +177,12 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; - `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW - VX_pipe_buffer #( .DATAW (NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), .RESETW (NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), - .reset (pipe2_reset), + .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), .data_in ({src_data_st1, gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index b1b855aaf3..af0ee56212 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -289,13 +289,11 @@ module VX_schedule import VX_gpu_pkg::*; #( // split/join handling - `RESET_RELAY (split_join_reset, reset); - VX_split_join #( .INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID)) ) split_join ( .clk (clk), - .reset (split_join_reset), + .reset (reset), .valid (warp_ctl_if.valid), .wid (warp_ctl_if.wid), .split (warp_ctl_if.split), @@ -377,15 +375,13 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; - `RESET_RELAY (pending_instr_reset, reset); - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_sizes VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) ) counter ( .clk (clk), - .reset (pending_instr_reset), + .reset (reset), .incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))), .decr (commit_sched_if.committed_warps[i]), .empty (pending_warp_empty[i]), diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 93686ca557..5af6211f65 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -98,28 +98,24 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .commit_in_if (pe_commit_if) ); - `RESET_RELAY (wctl_reset, reset); - VX_wctl_unit #( .INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)), .NUM_LANES (NUM_LANES) ) wctl_unit ( .clk (clk), - .reset (wctl_reset), + .reset (reset), .execute_if (pe_execute_if[PE_IDX_WCTL]), .warp_ctl_if(warp_ctl_if), .commit_if (pe_commit_if[PE_IDX_WCTL]) ); - `RESET_RELAY (csr_reset, reset); - VX_csr_unit #( .INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)), .CORE_ID (CORE_ID), .NUM_LANES (NUM_LANES) ) csr_unit ( .clk (clk), - .reset (csr_reset), + .reset (reset), .base_dcrs (base_dcrs), .execute_if (pe_execute_if[PE_IDX_CSRS]), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index c12c82d87a..22e2b652dc 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -130,14 +130,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire is_neg = per_core_op_type[FPU_FMA][0]; wire is_sub = per_core_fmt[FPU_FMA][1]; - `RESET_RELAY (fma_reset, reset); - VX_fpu_fma #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_fma ( .clk (clk), - .reset (fma_reset), + .reset (reset), .valid_in (per_core_valid_in[FPU_FMA]), .ready_in (per_core_ready_in[FPU_FMA]), .mask_in (per_core_mask_in[FPU_FMA]), @@ -231,14 +229,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (div_sqrt_datab) `UNUSED_VAR (div_sqrt_datac) - `RESET_RELAY (div_sqrt_reset, reset); - VX_fpu_div #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_div ( .clk (clk), - .reset (div_sqrt_reset), + .reset (reset), .valid_in (div_sqrt_valid_in[0]), .ready_in (div_sqrt_ready_in[0]), .mask_in (div_sqrt_mask_in[0]), @@ -313,14 +309,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire cvt_ret_int_in = ~is_itof; wire cvt_ret_int_out; - `RESET_RELAY (cvt_reset, reset); - VX_fpu_cvt #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (1+TAG_WIDTH) ) fpu_cvt ( .clk (clk), - .reset (cvt_reset), + .reset (reset), .valid_in (per_core_valid_in[FPU_CVT]), .ready_in (per_core_ready_in[FPU_CVT]), .mask_in (per_core_mask_in[FPU_CVT]), @@ -347,14 +341,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_sext_out; - `RESET_RELAY (ncp_reset, reset); - VX_fpu_ncp #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH+2) ) fpu_ncp ( .clk (clk), - .reset (ncp_reset), + .reset (reset), .valid_in (per_core_valid_in[FPU_NCP]), .ready_in (per_core_ready_in[FPU_NCP]), .mask_in (per_core_mask_in[FPU_NCP]), diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index 30e2a444f5..37c238a77c 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -42,14 +42,14 @@ module VX_stream_unpack #( wire [NUM_REQS-1:0] ready_out_w; wire [NUM_REQS-1:0] rem_mask_n = rem_mask_r & ~ready_out_w; - wire sent_all = (mask_in & rem_mask_n) == '0; + wire sent_all = ~(| (mask_in & rem_mask_n)); always @(posedge clk) begin if (reset) begin - rem_mask_r <= {NUM_REQS{1'b1}}; + rem_mask_r <= '1; end else begin if (valid_in) begin - rem_mask_r <= {NUM_REQS{sent_all}} | rem_mask_n; + rem_mask_r <= sent_all ? '1 : rem_mask_n; end end end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 6dbe283255..700bcb48c4 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -116,8 +116,6 @@ module VX_local_mem import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_ready = req_ready_in[i]; end - `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), @@ -127,7 +125,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), - .reset (req_xbar_reset), + .reset (reset), `ifdef PERF_ENABLE .collisions (perf_collisions), `else @@ -226,8 +224,6 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out; wire [NUM_REQS-1:0] rsp_ready_out; - `RESET_RELAY (rsp_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), @@ -236,7 +232,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (OUT_BUF) ) rsp_xbar ( .clk (clk), - .reset (rsp_xbar_reset), + .reset (reset), `UNUSED_PIN (collisions), .sel_in (per_bank_rsp_idx), .valid_in (per_bank_rsp_valid), From 96fb3566a94291050795d17911eef7f3a9716eca Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 13:44:23 -0700 Subject: [PATCH 144/407] minor update --- hw/rtl/VX_socket.sv | 4 +--- hw/rtl/core/VX_core.sv | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 833ba49d7c..a6e58ebd18 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -49,14 +49,12 @@ module VX_socket import VX_gpu_pkg::*; #( `ifdef GBAR_ENABLE VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE](); - `RESET_RELAY (gbar_arb_reset, reset); - VX_gbar_arb #( .NUM_REQS (`SOCKET_SIZE), .OUT_BUF ((`SOCKET_SIZE > 1) ? 2 : 0) ) gbar_arb ( .clk (clk), - .reset (gbar_arb_reset), + .reset (reset), .bus_in_if (per_core_gbar_bus_if), .bus_out_if (gbar_bus_if) ); diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index f306c5d232..f97370e892 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -75,7 +75,6 @@ module VX_core import VX_gpu_pkg::*; #( assign mem_perf_tmp_if.mem = mem_perf_if.mem; `endif - `RESET_RELAY (dcr_data_reset, reset); `RESET_RELAY (schedule_reset, reset); `RESET_RELAY (fetch_reset, reset); `RESET_RELAY (decode_reset, reset); @@ -87,7 +86,7 @@ module VX_core import VX_gpu_pkg::*; #( VX_dcr_data dcr_data ( .clk (clk), - .reset (dcr_data_reset), + .reset (reset), .dcr_bus_if (dcr_bus_if), .base_dcrs (base_dcrs) ); From 37555b12086612bd46649a57c01a62226dcc86df Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 15:18:39 -0700 Subject: [PATCH 145/407] minor update --- ci/regression.sh.in | 2 +- hw/rtl/VX_config.vh | 2 +- hw/rtl/VX_define.vh | 18 ++++++++++++++++++ sim/opaesim/opae_sim.cpp | 22 ++++++++++++---------- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 32e479c1e2..2c56377c04 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -259,7 +259,7 @@ config2() AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress # test 128-bit MEM block - CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128" ./ci/blackbox.sh --driver=opae --app=mstress # test XLEN-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 9a95fb0be8..a93b73b305 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -651,7 +651,7 @@ `endif `ifndef MEMORY_BANKS -`define MEMORY_BANKS 8 +`define MEMORY_BANKS 2 `endif // Number of Memory Ports from LLC diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 861d9f28cf..5ef9a46d2a 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -14,6 +14,24 @@ `ifndef VX_DEFINE_VH `define VX_DEFINE_VH +`ifndef MEM_BLOCK_SIZE +`ifdef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH +`define MEM_BLOCK_SIZE (`PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) +`endif +`endif + +`ifndef MEM_ADDR_WIDTH +`ifdef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH +`define MEM_ADDR_WIDTH `PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH +`endif +`endif + +`ifndef MEMORY_BANKS +`ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS +`define MEMORY_BANKS `PLATFORM_PARAM_LOCAL_MEMORY_BANKS +`endif +`endif + `include "VX_platform.vh" `include "VX_config.vh" `include "VX_types.vh" diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 9cf185abf8..2f847dc207 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,6 +35,8 @@ #include #include +#define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) + #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif @@ -424,7 +426,7 @@ class opae_sim::Impl { auto mem_rd_it = pending_mem_reqs_[b].begin(); auto mem_req = *mem_rd_it; device_->avs_readdatavalid[b] = 1; - memcpy(device_->avs_readdata[b], mem_req->data.data(), MEM_BLOCK_SIZE); + memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); uint32_t addr = mem_req->addr; pending_mem_reqs_[b].erase(mem_rd_it); delete mem_req; @@ -432,19 +434,19 @@ class opae_sim::Impl { // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * MEM_BLOCK_SIZE; + unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; if (device_->avs_write[b]) { uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[byte_addr + i] = data[i]; } } /*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { + printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]); } printf("\n");*/ @@ -461,17 +463,17 @@ class opae_sim::Impl { auto mem_req = new mem_req_t(); mem_req->addr = device_->avs_address[b]; mem_req->bank_id = b; - ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); + ram_->read(mem_req->data.data(), byte_addr, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); for (auto& req : pending_mem_reqs_[b]) { if (req.cycles_left != 0) - printf(" !%0x", req.addr * MEM_BLOCK_SIZE); + printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); else - printf(" %0x", req.addr * MEM_BLOCK_SIZE); + printf(" %0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); } printf("}\n");*/ @@ -484,7 +486,7 @@ class opae_sim::Impl { } typedef struct { - std::array data; + std::array data; uint32_t addr; uint32_t bank_id; bool write; From 8d1baf677d538e1a56a801d4310a6aeb9204fa34 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 18:17:27 -0700 Subject: [PATCH 146/407] minor update --- ci/regression.sh.in | 2 +- sim/opaesim/vortex_afu_shim.sv | 62 ++++++++++++++++------------------ 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 2c56377c04..32e479c1e2 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -259,7 +259,7 @@ config2() AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress # test 128-bit MEM block - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress # test XLEN-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress diff --git a/sim/opaesim/vortex_afu_shim.sv b/sim/opaesim/vortex_afu_shim.sv index 8c64c83325..2a0d63e42e 100644 --- a/sim/opaesim/vortex_afu_shim.sv +++ b/sim/opaesim/vortex_afu_shim.sv @@ -1,30 +1,28 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -`include "VX_platform.vh" +`include "VX_define.vh" `IGNORE_WARNINGS_BEGIN `include "vortex_afu.vh" `IGNORE_WARNINGS_END -`include "VX_define.vh" - module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( // global signals input clk, input reset, // IF signals between CCI and AFU - input logic vcp2af_sRxPort_c0_TxAlmFull, + input logic vcp2af_sRxPort_c0_TxAlmFull, input logic vcp2af_sRxPort_c1_TxAlmFull, input t_ccip_vc vcp2af_sRxPort_c0_hdr_vc_used, @@ -35,15 +33,15 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( input t_ccip_c0_rsp vcp2af_sRxPort_c0_hdr_resp_type, input t_ccip_mdata vcp2af_sRxPort_c0_hdr_mdata, input t_ccip_clData vcp2af_sRxPort_c0_data, - input logic vcp2af_sRxPort_c0_rspValid, - input logic vcp2af_sRxPort_c0_mmioRdValid, - input logic vcp2af_sRxPort_c0_mmioWrValid, + input logic vcp2af_sRxPort_c0_rspValid, + input logic vcp2af_sRxPort_c0_mmioRdValid, + input logic vcp2af_sRxPort_c0_mmioWrValid, input t_ccip_mmioAddr vcp2af_sRxPort_c0_ReqMmioHdr_address, - input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length, + input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length, input logic vcp2af_sRxPort_c0_ReqMmioHdr_rsvd, - input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid, - + input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid, + input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used, input logic vcp2af_sRxPort_c1_hdr_rsvd1, input logic vcp2af_sRxPort_c1_hdr_hit_miss, @@ -51,34 +49,34 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( input logic vcp2af_sRxPort_c1_hdr_rsvd0, input t_ccip_clNum vcp2af_sRxPort_c1_hdr_cl_num, input t_ccip_c1_rsp vcp2af_sRxPort_c1_hdr_resp_type, - input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata, - input logic vcp2af_sRxPort_c1_rspValid, - + input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata, + input logic vcp2af_sRxPort_c1_rspValid, + output t_ccip_vc af2cp_sTxPort_c0_hdr_vc_sel, - output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1, + output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1, output t_ccip_clLen af2cp_sTxPort_c0_hdr_cl_len, output t_ccip_c0_req af2cp_sTxPort_c0_hdr_req_type, - output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0, + output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0, output t_ccip_clAddr af2cp_sTxPort_c0_hdr_address, output t_ccip_mdata af2cp_sTxPort_c0_hdr_mdata, - output logic af2cp_sTxPort_c0_valid, + output logic af2cp_sTxPort_c0_valid, output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd2, output t_ccip_vc af2cp_sTxPort_c1_hdr_vc_sel, output logic af2cp_sTxPort_c1_hdr_sop, - output logic af2cp_sTxPort_c1_hdr_rsvd1, + output logic af2cp_sTxPort_c1_hdr_rsvd1, output t_ccip_clLen af2cp_sTxPort_c1_hdr_cl_len, output t_ccip_c1_req af2cp_sTxPort_c1_hdr_req_type, - output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0, + output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0, output t_ccip_clAddr af2cp_sTxPort_c1_hdr_address, output t_ccip_mdata af2cp_sTxPort_c1_hdr_mdata, - output t_ccip_clData af2cp_sTxPort_c1_data, - output logic af2cp_sTxPort_c1_valid, + output t_ccip_clData af2cp_sTxPort_c1_data, + output logic af2cp_sTxPort_c1_valid, output t_ccip_tid af2cp_sTxPort_c2_hdr_tid, - output logic af2cp_sTxPort_c2_mmioRdValid, - output t_ccip_mmioData af2cp_sTxPort_c2_data, - + output logic af2cp_sTxPort_c2_mmioRdValid, + output t_ccip_mmioData af2cp_sTxPort_c2_data, + // Avalon signals for local memory access output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], @@ -119,7 +117,7 @@ always @ (*) begin c0_RxHdr.reqMmioHdr.address = vcp2af_sRxPort_c0_ReqMmioHdr_address; c0_RxHdr.reqMmioHdr.length = vcp2af_sRxPort_c0_ReqMmioHdr_length; c0_RxHdr.reqMmioHdr.rsvd = vcp2af_sRxPort_c0_ReqMmioHdr_rsvd; - c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid; + c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid; end else begin c0_RxHdr.rspMemHdr.vc_used = vcp2af_sRxPort_c0_hdr_vc_used; c0_RxHdr.rspMemHdr.rsvd1 = vcp2af_sRxPort_c0_hdr_rsvd1; @@ -134,7 +132,7 @@ end assign cp2af_sRxPort.c0TxAlmFull = vcp2af_sRxPort_c0_TxAlmFull; assign cp2af_sRxPort.c1TxAlmFull = vcp2af_sRxPort_c1_TxAlmFull; -assign cp2af_sRxPort.c0.hdr = c0_RxHdr; +assign cp2af_sRxPort.c0.hdr = c0_RxHdr; assign cp2af_sRxPort.c0.data = vcp2af_sRxPort_c0_data; assign cp2af_sRxPort.c0.rspValid = vcp2af_sRxPort_c0_rspValid; assign cp2af_sRxPort.c0.mmioRdValid = vcp2af_sRxPort_c0_mmioRdValid; @@ -147,8 +145,8 @@ assign cp2af_sRxPort.c1.hdr.format = vcp2af_sRxPort_c1_hdr_format; assign cp2af_sRxPort.c1.hdr.rsvd0 = vcp2af_sRxPort_c1_hdr_rsvd0; assign cp2af_sRxPort.c1.hdr.cl_num = vcp2af_sRxPort_c1_hdr_cl_num; assign cp2af_sRxPort.c1.hdr.resp_type = vcp2af_sRxPort_c1_hdr_resp_type; -assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata; -assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid; +assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata; +assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid; assign af2cp_sTxPort_c0_hdr_vc_sel = af2cp_sTxPort.c0.hdr.vc_sel; assign af2cp_sTxPort_c0_hdr_rsvd1 = af2cp_sTxPort.c0.hdr.rsvd1; @@ -168,11 +166,11 @@ assign af2cp_sTxPort_c1_hdr_req_type = af2cp_sTxPort.c1.hdr.req_type; assign af2cp_sTxPort_c1_hdr_rsvd0 = af2cp_sTxPort.c1.hdr.rsvd0; assign af2cp_sTxPort_c1_hdr_address = af2cp_sTxPort.c1.hdr.address; assign af2cp_sTxPort_c1_hdr_mdata = af2cp_sTxPort.c1.hdr.mdata; -assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data; +assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data; assign af2cp_sTxPort_c1_valid = af2cp_sTxPort.c1.valid; -assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid; -assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid; +assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid; +assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid; assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data; endmodule From 0aaca84016bfa6ed328439f134190a373ed070a8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 18:22:37 -0700 Subject: [PATCH 147/407] minor update --- hw/rtl/core/VX_issue_slice.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 24430a53f9..0d7fdea534 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -100,7 +100,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) ) scope_tap ( .clk (clk), - .reset (reset), + .reset (scope_reset), .start (1'b0), .stop (1'b0), .triggers ({ From fb0cd1c2724c028ced53891121d824f5571eabd7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 18:24:42 -0700 Subject: [PATCH 148/407] minor update --- hw/rtl/VX_define.vh | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 5ef9a46d2a..861d9f28cf 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -14,24 +14,6 @@ `ifndef VX_DEFINE_VH `define VX_DEFINE_VH -`ifndef MEM_BLOCK_SIZE -`ifdef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH -`define MEM_BLOCK_SIZE (`PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) -`endif -`endif - -`ifndef MEM_ADDR_WIDTH -`ifdef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH -`define MEM_ADDR_WIDTH `PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH -`endif -`endif - -`ifndef MEMORY_BANKS -`ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS -`define MEMORY_BANKS `PLATFORM_PARAM_LOCAL_MEMORY_BANKS -`endif -`endif - `include "VX_platform.vh" `include "VX_config.vh" `include "VX_types.vh" From cf9172b8fcf53fc4f83443de4e912f85700cef58 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 20:16:54 -0700 Subject: [PATCH 149/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- hw/rtl/core/VX_lsu_slice.sv | 8 ++++---- hw/rtl/libs/VX_dp_ram.sv | 4 ++-- hw/rtl/libs/VX_fifo_queue.sv | 4 ++-- hw/rtl/libs/VX_generic_arbiter.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 4 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 883a561a11..4dff675bd6 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -394,7 +394,7 @@ module VX_cache_bank #( `UNUSED_VAR (do_write_miss_st1) // ensure mshr replay always get a hit - `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay")); + `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)); // both tag and data stores use BRAM with no read-during-write protection. // we ned to stall the pipeline to prevent read-after-write hazards. @@ -599,7 +599,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin // ensure dirty bytes match the tag info wire has_dirty_bytes = (| dirty_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))); + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))); end assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) || do_writeback_st1) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 49195eee6b..7ee15bb143 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -188,8 +188,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin wire lsu_req_fire = execute_if.valid && execute_if.ready; `RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0), - ("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", - execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)); + ("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", + $time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)); end // store data formatting @@ -271,8 +271,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr]; assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1); - `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!")) - `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!")) + `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time)) + `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("%t: oops! broken sop request!", $time)) `UNUSED_VAR (mem_rsp_sop) end else begin assign pkt_waddr = 0; diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 70df4f6888..64b22150cf 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -59,7 +59,7 @@ module VX_dp_ram #( `UNUSED_VAR (read) if (WRENW > 1) begin - `RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask")); + `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)); end if (OUT_REG && !READ_ENABLE) begin @@ -341,7 +341,7 @@ module VX_dp_ram #( assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; if (RW_ASSERT) begin - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard")); + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)); end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index e6f94b3b29..eba9532f42 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -162,8 +162,8 @@ module VX_fifo_queue #( end end - `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("runtime error: incrementing full queue")); - `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("runtime error: decrementing empty queue")); + `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("%t: runtime error: incrementing full queue", $time)); + `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("%t: runtime error: decrementing empty queue", $time)); endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index db0173349b..3a3737d04c 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,7 +90,7 @@ module VX_generic_arbiter #( end - `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("invalid arbiter grant!")) + `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 32ad728b84..b284a64490 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -76,8 +76,8 @@ module VX_mem_coalescer #( `UNUSED_SPARAM (INSTANCE_ID) `STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) - `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask")); - `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask")); + `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)); + `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("%t: invalid request mask", $time)); localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; // tag + mask + offest diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index c5b3021776..9599adf13f 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -97,7 +97,7 @@ module VX_mem_scheduler #( `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter")) - `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("invalid request mask")); + `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)); wire ibuf_push; wire ibuf_pop; From 8db77ea1cd4b394f9b558e38cf10a059117bcf9d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:29:01 -0700 Subject: [PATCH 150/407] minor updates --- hw/rtl/VX_cluster.sv | 3 ++- hw/syn/xilinx/scripts/xsim.tcl | 13 +++++-------- hw/syn/xilinx/xrt/Makefile | 2 +- hw/syn/xilinx/xrt/{xrt.ini => xrt.ini.in} | 6 +++--- sim/opaesim/opae_sim.cpp | 2 +- sim/rtlsim/processor.cpp | 2 +- sim/xrtsim/xrt_sim.cpp | 2 +- 7 files changed, 14 insertions(+), 16 deletions(-) rename hw/syn/xilinx/xrt/{xrt.ini => xrt.ini.in} (51%) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index b9a43f8457..ef845ae07f 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -119,7 +119,8 @@ module VX_cluster import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// VX_dcr_bus_if socket_dcr_bus_tmp_if(); - assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); + wire is_dcr_base_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); + assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && is_dcr_base_addr; assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr; assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data; diff --git a/hw/syn/xilinx/scripts/xsim.tcl b/hw/syn/xilinx/scripts/xsim.tcl index 061bc17ab4..ccdc1262f8 100644 --- a/hw/syn/xilinx/scripts/xsim.tcl +++ b/hw/syn/xilinx/scripts/xsim.tcl @@ -14,12 +14,9 @@ # limitations under the License. # -#log_wave -r * -#run all -#exit +log_wave -r * -open_vcd xsim_dump.vcd -log_vcd /* -run all -close_vcd -exit +#open_vcd xsim_dump.vcd +#log_vcd /* +#run all +#close_vcd \ No newline at end of file diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 4e3259f340..fe9a56dc8a 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -117,7 +117,7 @@ endif # Debugging ifdef DEBUG - VPP_FLAGS += -g --debug.protocol all + VPP_FLAGS += -g --optimize 0 --debug.protocol all ifneq ($(TARGET), hw) VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS) diff --git a/hw/syn/xilinx/xrt/xrt.ini b/hw/syn/xilinx/xrt/xrt.ini.in similarity index 51% rename from hw/syn/xilinx/xrt/xrt.ini rename to hw/syn/xilinx/xrt/xrt.ini.in index 0942191123..99511f884c 100644 --- a/hw/syn/xilinx/xrt/xrt.ini +++ b/hw/syn/xilinx/xrt/xrt.ini.in @@ -1,9 +1,9 @@ -[Runtime] +[Runtime] runtime_log=console [Emulation] -#debug_mode=batch -#user_pre_sim_script=xsim.tcl +debug_mode=batch +user_pre_sim_script=@VORTEX_HOME@/hw/syn/xilinx/scripts/xsim.tcl [Debug] profile=true diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 2f847dc207..8e9e8c4d89 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -125,7 +125,7 @@ class opae_sim::Impl { } int init() { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index d964a3d5a1..25d219fcf0 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -98,7 +98,7 @@ void sim_trace_enable(bool enable) { class Processor::Impl { public: Impl() : dram_sim_(MEM_CLOCK_RATIO) { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 80aed7f7da..12a78c23d3 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -112,7 +112,7 @@ class xrt_sim::Impl { } int init() { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); From efc8834c750d47b24cd2ed717fc26cb139007dd8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:32:25 -0700 Subject: [PATCH 151/407] xilinx afu reset refactoring --- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 104 ++++++++++++++++------------------ hw/rtl/afu/xrt/VX_afu_wrap.sv | 89 +++++++++++++---------------- 2 files changed, 90 insertions(+), 103 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 687b55a8cd..a6cd31b05b 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -21,7 +21,6 @@ module VX_afu_ctrl #( // axi4 lite slave signals input wire clk, input wire reset, - input wire clk_en, input wire s_axi_awvalid, input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, @@ -191,7 +190,7 @@ module VX_afu_ctrl #( cmd_scope_writing <= 0; scope_bus_ctr <= '0; scope_bus_out_r <= 0; - end else if (clk_en) begin + end else begin if (s_axi_w_fire && waddr == ADDR_SCP_0) begin scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask); end @@ -244,7 +243,7 @@ module VX_afu_ctrl #( always @(posedge clk) begin if (reset) begin wstate <= WSTATE_IDLE; - end else if (clk_en) begin + end else begin case (wstate) WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE; WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA; @@ -256,10 +255,8 @@ module VX_afu_ctrl #( // waddr always @(posedge clk) begin - if (clk_en) begin - if (s_axi_aw_fire) - waddr <= s_axi_awaddr[ADDR_BITS-1:0]; - end + if (s_axi_aw_fire) + waddr <= s_axi_awaddr[ADDR_BITS-1:0]; end // wdata @@ -280,12 +277,13 @@ module VX_afu_ctrl #( for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin mem_r[i] <= '0; end - end else if (clk_en) begin + end else begin + dcr_wr_valid_r <= 0; + ap_reset_r <= 0; + if (ap_ready) ap_start_r <= auto_restart_r; - dcr_wr_valid_r <= 0; - if (s_axi_w_fire) begin case (waddr) ADDR_AP_CTRL: begin @@ -351,7 +349,7 @@ module VX_afu_ctrl #( always @(posedge clk) begin if (reset) begin rstate <= RSTATE_IDLE; - end else if (clk_en) begin + end else begin case (rstate) RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE; RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA; @@ -362,49 +360,47 @@ module VX_afu_ctrl #( // rdata always @(posedge clk) begin - if (clk_en) begin - if (s_axi_ar_fire) begin - rdata <= '0; - case (raddr) - ADDR_AP_CTRL: begin - rdata[0] <= ap_start_r; - rdata[1] <= ap_done; - rdata[2] <= ap_idle; - rdata[3] <= ap_ready; - rdata[7] <= auto_restart_r; - end - ADDR_GIE: begin - rdata <= 32'(gie_r); - end - ADDR_IER: begin - rdata <= 32'(ier_r); - end - ADDR_ISR: begin - rdata <= 32'(isr_r); - end - ADDR_DEV_0: begin - rdata <= dev_caps[31:0]; - end - ADDR_DEV_1: begin - rdata <= dev_caps[63:32]; - end - ADDR_ISA_0: begin - rdata <= isa_caps[31:0]; - end - ADDR_ISA_1: begin - rdata <= isa_caps[63:32]; - end - `ifdef SCOPE - ADDR_SCP_0: begin - rdata <= scope_bus_rdata[31:0]; - end - ADDR_SCP_1: begin - rdata <= scope_bus_rdata[63:32]; - end - `endif - default:; - endcase - end + if (s_axi_ar_fire) begin + rdata <= '0; + case (raddr) + ADDR_AP_CTRL: begin + rdata[0] <= ap_start_r; + rdata[1] <= ap_done; + rdata[2] <= ap_idle; + rdata[3] <= ap_ready; + rdata[7] <= auto_restart_r; + end + ADDR_GIE: begin + rdata <= 32'(gie_r); + end + ADDR_IER: begin + rdata <= 32'(ier_r); + end + ADDR_ISR: begin + rdata <= 32'(isr_r); + end + ADDR_DEV_0: begin + rdata <= dev_caps[31:0]; + end + ADDR_DEV_1: begin + rdata <= dev_caps[63:32]; + end + ADDR_ISA_0: begin + rdata <= isa_caps[31:0]; + end + ADDR_ISA_1: begin + rdata <= isa_caps[63:32]; + end + `ifdef SCOPE + ADDR_SCP_0: begin + rdata <= scope_bus_rdata[31:0]; + end + ADDR_SCP_1: begin + rdata <= scope_bus_rdata[63:32]; + end + `endif + default:; + endcase end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index a844802e9b..e1ba821263 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -87,8 +87,7 @@ module VX_afu_wrap #( reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; reg vx_busy_wait; - reg vx_running; - + reg vx_reset = 1; // asserted at initialization wire vx_busy; wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS]; @@ -101,8 +100,8 @@ module VX_afu_wrap #( wire ap_reset; wire ap_start; - wire ap_idle = ~vx_running; - wire ap_done = ~(state == STATE_RUN || vx_pending_writes != 0); + wire ap_idle = vx_reset; + wire ap_done = (state == STATE_IDLE) && (vx_pending_writes == '0); wire ap_ready = 1'b1; `ifdef SCOPE @@ -111,11 +110,24 @@ module VX_afu_wrap #( wire scope_reset = reset; `endif + reg m_axi_mem_wfire; + reg m_axi_mem_bfire; + + always @(*) begin + m_axi_mem_wfire = 0; + m_axi_mem_bfire = 0; + for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin + m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]; + m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i]; + end + end + always @(posedge ap_clk) begin if (reset || ap_reset) begin - state <= STATE_IDLE; - vx_busy_wait <= 0; - vx_running <= 0; + state <= STATE_IDLE; + vx_pending_writes <= '0; + vx_reset_ctr <= (`RESET_DELAY-1); + vx_reset <= 1; end else begin case (state) STATE_IDLE: begin @@ -124,11 +136,21 @@ module VX_afu_wrap #( `TRACE(2, ("%d: STATE RUN\n", $time)); `endif state <= STATE_RUN; - vx_running <= 0; + vx_reset_ctr <= 0; + vx_reset <= 1; end end STATE_RUN: begin - if (vx_running) begin + if (vx_reset) begin + // wait until the reset network is ready + if (vx_reset_ctr == 0) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: AFU: Begin execution\n", $time)); + `endif + vx_busy_wait <= 1; + vx_reset <= 0; + end + end else begin if (vx_busy_wait) begin // wait until processor goes busy if (vx_busy) begin @@ -137,44 +159,22 @@ module VX_afu_wrap #( end else begin // wait until the processor is not busy if (~vx_busy) begin - state <= STATE_IDLE; `ifdef DBG_TRACE_AFU `TRACE(2, ("%d: AFU: End execution\n", $time)); - `TRACE(2, ("%d: STATE IDLE\n", $time)); `endif + state <= STATE_IDLE; end end - end else begin - // wait until the reset sequence is complete - if (vx_reset_ctr == (`RESET_DELAY-1)) begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); - `endif - vx_running <= 1; - vx_busy_wait <= 1; - end end end endcase - end - end - - reg m_axi_mem_wfire; - reg m_axi_mem_bfire; - always @(*) begin - m_axi_mem_wfire = 0; - m_axi_mem_bfire = 0; - for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin - m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]; - m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i]; - end - end + // ensure reset network initialization + if (vx_reset_ctr != 0) begin + vx_reset_ctr <= vx_reset_ctr - 1; + end - always @(posedge ap_clk) begin - if (reset || ap_reset) begin - vx_pending_writes <= '0; - end else begin + // track pending writes if (m_axi_mem_wfire && ~m_axi_mem_bfire) vx_pending_writes <= vx_pending_writes + 1; if (~m_axi_mem_wfire && m_axi_mem_bfire) @@ -182,22 +182,13 @@ module VX_afu_wrap #( end end - always @(posedge ap_clk) begin - if (state == STATE_RUN) begin - vx_reset_ctr <= vx_reset_ctr + 1; - end else begin - vx_reset_ctr <= '0; - end - end - VX_afu_ctrl #( .AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), .AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_ctrl ( .clk (ap_clk), - .reset (reset || ap_reset), - .clk_en (1'b1), + .reset (reset), .s_axi_awvalid (s_axi_ctrl_awvalid), .s_axi_awready (s_axi_ctrl_awready), @@ -255,7 +246,7 @@ module VX_afu_wrap #( `SCOPE_IO_BIND (1) .clk (ap_clk), - .reset (reset || ap_reset || ~vx_running), + .reset (vx_reset), .m_axi_awvalid (m_axi_mem_awvalid_a), .m_axi_awready (m_axi_mem_awready_a), @@ -319,7 +310,7 @@ module VX_afu_wrap #( interrupt, \ vx_busy_wait, \ vx_busy, \ - vx_running \ + vx_reset \ } `define PROBES { \ From 7cbb026a12ea414eb25195d67b92100cd660ca39 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:34:44 -0700 Subject: [PATCH 152/407] minor update --- hw/rtl/core/VX_operands.sv | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index a88522ee72..e4bad5ced6 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -54,8 +54,8 @@ module VX_operands import VX_gpu_pkg::*; #( `UNUSED_VAR (writeback_if.data.sop) wire [NUM_SRC_OPDS-1:0] src_valid; - wire [NUM_SRC_OPDS-1:0] req_in_valid, req_in_ready; - wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_in_data; + wire [NUM_SRC_OPDS-1:0] req_valid_in, req_ready_in; + wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_data_in; wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; @@ -64,7 +64,8 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; - wire pipe_valid_st1, pipe_ready_st1, pipe_in_ready; + wire pipe_ready_in; + wire pipe_valid_st1, pipe_ready_st1; wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; @@ -82,9 +83,9 @@ module VX_operands import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin if (ISSUE_WIS != 0) begin - assign req_in_data[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; + assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; end else begin - assign req_in_data[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; + assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; end if (NUM_BANKS != 1) begin assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; @@ -97,7 +98,7 @@ module VX_operands import VX_gpu_pkg::*; #( assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i]; end - assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; + assign req_valid_in = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; VX_stream_xbar #( .NUM_INPUTS (NUM_SRC_OPDS), @@ -110,17 +111,17 @@ module VX_operands import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), `UNUSED_PIN(collisions), - .valid_in (req_in_valid), - .data_in (req_in_data), + .valid_in (req_valid_in), + .data_in (req_data_in), .sel_in (req_bank_idx), - .ready_in (req_in_ready), + .ready_in (req_ready_in), .valid_out (gpr_rd_valid), .data_out (gpr_rd_addr), .sel_out (gpr_rd_req_idx), .ready_out (gpr_rd_ready) ); - assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}}; + assign gpr_rd_ready = {NUM_BANKS{pipe_ready_in}}; always @(*) begin has_collision_n = 0; @@ -138,7 +139,7 @@ module VX_operands import VX_gpu_pkg::*; #( if (scoreboard_if.ready) begin data_fetched_n = '0; end else begin - data_fetched_n = data_fetched_st1 | req_in_ready; + data_fetched_n = data_fetched_st1 | req_ready_in; end end @@ -154,7 +155,7 @@ module VX_operands import VX_gpu_pkg::*; #( scoreboard_if.data.uuid }; - assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; + assign scoreboard_if.ready = pipe_ready_in && ~has_collision_n; wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; @@ -166,7 +167,7 @@ module VX_operands import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (scoreboard_if.valid), - .ready_in (pipe_in_ready), + .ready_in (pipe_ready_in), .data_in ({data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), .data_out ({data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), .valid_out(pipe_valid_st1), @@ -285,7 +286,7 @@ module VX_operands import VX_gpu_pkg::*; #( if (reset) begin collisions_r <= '0; end else begin - collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n); + collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_ready_in && has_collision_n); end end assign perf_stalls = collisions_r; From e178eb13300ba92a57099abdcac488f778be90b9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:35:10 -0700 Subject: [PATCH 153/407] operands's x-propagation bug fix (caught using vivado simulator) --- hw/rtl/core/VX_operands.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index e4bad5ced6..ef98ea79ef 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -134,12 +134,14 @@ module VX_operands import VX_gpu_pkg::*; #( end end + wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in; + always @(*) begin data_fetched_n = data_fetched_st1; if (scoreboard_if.ready) begin data_fetched_n = '0; end else begin - data_fetched_n = data_fetched_st1 | req_ready_in; + data_fetched_n = data_fetched_st1 | req_fire_in; end end From fdc62c5f98e43ec5974824362d7970b5fd3d05da Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 6 Sep 2024 01:27:54 -0700 Subject: [PATCH 154/407] minor update --- tests/regression/common.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index c4a00bc135..317e94b092 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,9 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From bfbe642170790fa488adb863c58dfff34007f524 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 01:36:17 -0700 Subject: [PATCH 155/407] adding RTL uuigen --- hw/dpi/util_dpi.cpp | 16 -------------- hw/dpi/util_dpi.vh | 2 -- hw/rtl/core/VX_schedule.sv | 37 +++++++++++++------------------- hw/rtl/core/VX_uuid_gen.sv | 43 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 41 deletions(-) create mode 100644 hw/rtl/core/VX_uuid_gen.sv diff --git a/hw/dpi/util_dpi.cpp b/hw/dpi/util_dpi.cpp index 020816b0b2..d804d4885f 100644 --- a/hw/dpi/util_dpi.cpp +++ b/hw/dpi/util_dpi.cpp @@ -47,8 +47,6 @@ extern "C" { void dpi_trace(int level, const char* format, ...); void dpi_trace_start(); void dpi_trace_stop(); - - uint64_t dpi_uuid_gen(bool reset, int wid); } bool sim_trace_enabled(); @@ -204,17 +202,3 @@ void dpi_trace_start() { void dpi_trace_stop() { sim_trace_enable(false); } - -/////////////////////////////////////////////////////////////////////////////// - -std::unordered_map g_uuid_gens; - -uint64_t dpi_uuid_gen(bool reset, int wid) { - if (reset) { - g_uuid_gens.clear(); - return 0; - } - uint32_t instr_uuid = g_uuid_gens[wid]++; - uint64_t uuid = (uint64_t(wid) << 32) | instr_uuid; - return uuid; -} \ No newline at end of file diff --git a/hw/dpi/util_dpi.vh b/hw/dpi/util_dpi.vh index 0da62b0410..74b095af10 100644 --- a/hw/dpi/util_dpi.vh +++ b/hw/dpi/util_dpi.vh @@ -30,6 +30,4 @@ import "DPI-C" function void dpi_trace(input int level, input string format /*ve import "DPI-C" function void dpi_trace_start(); import "DPI-C" function void dpi_trace_stop(); -import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid); - `endif diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index af0ee56212..6916d3e00a 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -331,30 +331,23 @@ module VX_schedule import VX_gpu_pkg::*; #( schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-5:0] }; + wire [`UUID_WIDTH-1:0] instr_uuid; `ifndef NDEBUG - localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS); - reg [`UUID_WIDTH-1:0] instr_uuid; - wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid); -`ifdef SV_DPI - always @(posedge clk) begin - if (reset) begin - instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 32'd0)); - end else if (schedule_fire) begin - instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid))); - end - end -`else - wire [GNW_WIDTH+16-1:0] w_uuid = {g_wid, 16'(schedule_pc)}; - always @(*) begin - instr_uuid = `UUID_WIDTH'(w_uuid); - end -`endif + VX_uuid_gen #( + .CORE_ID (CORE_ID) + ) uuid_gen ( + .clk (clk), + .reset (reset), + .incr (schedule_fire), + .wid (schedule_wid), + .uuid (instr_uuid) + ); `else - wire [`UUID_WIDTH-1:0] instr_uuid = '0; + assign instr_uuid = '0; `endif VX_elastic_buffer #( - .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH), + .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH + `UUID_WIDTH), .SIZE (2), // need to buffer out ready_in .OUT_REG (1) // should be registered for BRAM acces in fetch unit ) out_buf ( @@ -362,14 +355,12 @@ module VX_schedule import VX_gpu_pkg::*; #( .reset (reset), .valid_in (schedule_valid), .ready_in (schedule_ready), - .data_in ({schedule_tmask, schedule_pc, schedule_wid}), - .data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid}), + .data_in ({schedule_tmask, schedule_pc, schedule_wid, instr_uuid}), + .data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.uuid}), .valid_out (schedule_if.valid), .ready_out (schedule_if.ready) ); - assign schedule_if.data.uuid = instr_uuid; - // Track pending instructions per warp wire [`NUM_WARPS-1:0] pending_warp_empty; diff --git a/hw/rtl/core/VX_uuid_gen.sv b/hw/rtl/core/VX_uuid_gen.sv new file mode 100644 index 0000000000..8dca50e91f --- /dev/null +++ b/hw/rtl/core/VX_uuid_gen.sv @@ -0,0 +1,43 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_uuid_gen import VX_gpu_pkg::*; #( + parameter CORE_ID = 0 +) ( + input wire clk, + input wire reset, + input wire incr, + input wire [`NW_WIDTH-1:0] wid, + output wire [`UUID_WIDTH-1:0] uuid +); + localparam GNW_WIDTH = `UUID_WIDTH - 32; + reg [31:0] uuid_cntrs [0:`NUM_WARPS-1]; + reg [`NUM_WARPS-1:0] has_uuid_cntrs; + + always @(posedge clk) begin + if (reset) begin + has_uuid_cntrs <= '0; + end else if (incr) begin + has_uuid_cntrs[wid] <= 1; + end + if (incr) begin + uuid_cntrs[wid] <= has_uuid_cntrs[wid] ? (uuid_cntrs[wid] + 1) : 1; + end + end + + wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(wid); + assign uuid = {g_wid, (has_uuid_cntrs[wid] ? uuid_cntrs[wid] : 0)}; + +endmodule From 2041a4ad4ad78baa5afc587a34c8f41f1ea15105 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 01:43:30 -0700 Subject: [PATCH 156/407] xrt.ini update --- {hw/syn/xilinx => runtime}/xrt/xrt.ini.in | 2 +- {hw/syn/xilinx/scripts => runtime/xrt}/xsim.tcl | 0 tests/opencl/common.mk | 4 ++-- tests/regression/common.mk | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename {hw/syn/xilinx => runtime}/xrt/xrt.ini.in (65%) rename {hw/syn/xilinx/scripts => runtime/xrt}/xsim.tcl (100%) diff --git a/hw/syn/xilinx/xrt/xrt.ini.in b/runtime/xrt/xrt.ini.in similarity index 65% rename from hw/syn/xilinx/xrt/xrt.ini.in rename to runtime/xrt/xrt.ini.in index 99511f884c..90affb447d 100644 --- a/hw/syn/xilinx/xrt/xrt.ini.in +++ b/runtime/xrt/xrt.ini.in @@ -3,7 +3,7 @@ runtime_log=console [Emulation] debug_mode=batch -user_pre_sim_script=@VORTEX_HOME@/hw/syn/xilinx/scripts/xsim.tcl +user_pre_sim_script=@VORTEX_HOME@/runtime/xrt/xsim.tcl [Debug] profile=true diff --git a/hw/syn/xilinx/scripts/xsim.tcl b/runtime/xrt/xsim.tcl similarity index 100% rename from hw/syn/xilinx/scripts/xsim.tcl rename to runtime/xrt/xsim.tcl diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index dd5af90db6..8173a2535f 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -102,9 +102,9 @@ run-opae: $(PROJECT) $(KERNEL_SRCS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 317e94b092..4edc5c8592 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,9 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From a75ed78bf2274d5ec64ef049803a8d4a74bcf086 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 03:42:46 -0700 Subject: [PATCH 157/407] fixed getopt exitcode with invalid parameters --- sim/rtlsim/main.cpp | 8 +++----- sim/simx/main.cpp | 11 +++++------ tests/opencl/conv3/main.cc | 5 ++--- tests/opencl/oclprintf/main.cc | 5 ++--- tests/opencl/psort/main.cc | 5 ++--- tests/opencl/psum/main.cc | 5 ++--- tests/opencl/saxpy/main.cc | 5 ++--- tests/opencl/sfilter/main.cc | 5 ++--- tests/opencl/sgemm/main.cc | 5 ++--- tests/opencl/sgemm2/main.cc | 5 ++--- tests/opencl/sgemm3/main.cc | 5 ++--- tests/opencl/vecadd/main.cc | 5 ++--- tests/regression/basic/main.cpp | 5 ++--- tests/regression/conv3x/main.cpp | 5 ++--- tests/regression/demo/main.cpp | 5 ++--- tests/regression/diverge/main.cpp | 5 ++--- tests/regression/dogfood/main.cpp | 5 ++--- tests/regression/fence/main.cpp | 5 ++--- tests/regression/io_addr/main.cpp | 5 ++--- tests/regression/mstress/main.cpp | 5 ++--- tests/regression/printf/main.cpp | 5 ++--- tests/regression/sgemm2x/main.cpp | 5 ++--- tests/regression/sgemmx/main.cpp | 5 ++--- tests/regression/sort/main.cpp | 5 ++--- tests/regression/stencil3d/main.cpp | 9 +++------ tests/regression/vecaddx/main.cpp | 5 ++--- 26 files changed, 57 insertions(+), 86 deletions(-) diff --git a/sim/rtlsim/main.cpp b/sim/rtlsim/main.cpp index ea0ba9b95a..16ce795505 100644 --- a/sim/rtlsim/main.cpp +++ b/sim/rtlsim/main.cpp @@ -33,13 +33,11 @@ const char* program = nullptr; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "rh?")) != -1) { + while ((c = getopt(argc, argv, "rh")) != -1) { switch (c) { case 'h': - case '?': - show_usage(); - exit(0); - break; + show_usage(); + exit(0); default: show_usage(); exit(-1); diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index a8883c696c..6e4c96a827 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -40,7 +40,7 @@ const char* program = nullptr; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "t:w:c:rsh?")) != -1) { + while ((c = getopt(argc, argv, "t:w:c:rsh")) != -1) { switch (c) { case 't': num_threads = atoi(optarg); @@ -55,13 +55,12 @@ static void parse_args(int argc, char **argv) { showStats = true; break; case 'h': - case '?': - show_usage(); - exit(0); + show_usage(); + exit(0); break; default: - show_usage(); - exit(-1); + show_usage(); + exit(-1); } } diff --git a/tests/opencl/conv3/main.cc b/tests/opencl/conv3/main.cc index cda8e74ace..1220dabdb2 100644 --- a/tests/opencl/conv3/main.cc +++ b/tests/opencl/conv3/main.cc @@ -116,16 +116,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/oclprintf/main.cc b/tests/opencl/oclprintf/main.cc index ef82a33e59..c23e6dec08 100644 --- a/tests/opencl/oclprintf/main.cc +++ b/tests/opencl/oclprintf/main.cc @@ -81,16 +81,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/psort/main.cc b/tests/opencl/psort/main.cc index e0bd49b8ed..8ecfdc5239 100644 --- a/tests/opencl/psort/main.cc +++ b/tests/opencl/psort/main.cc @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "fn:h?")) != -1) { + while ((c = getopt(argc, argv, "fn:h")) != -1) { switch (c) { case 'f': float_enable = 1; @@ -96,10 +96,9 @@ static void parse_args(int argc, char **argv) { size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/psum/main.cc b/tests/opencl/psum/main.cc index 749d406195..5606de8c5b 100644 --- a/tests/opencl/psum/main.cc +++ b/tests/opencl/psum/main.cc @@ -104,7 +104,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:l:h?")) != -1) { + while ((c = getopt(argc, argv, "n:l:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -113,10 +113,9 @@ static void parse_args(int argc, char **argv) { local_size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/saxpy/main.cc b/tests/opencl/saxpy/main.cc index 2d896e6a9f..2397c720e4 100644 --- a/tests/opencl/saxpy/main.cc +++ b/tests/opencl/saxpy/main.cc @@ -126,13 +126,12 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; - case 'h': - case '?': { + case 'h':{ show_usage(); exit(0); } break; diff --git a/tests/opencl/sfilter/main.cc b/tests/opencl/sfilter/main.cc index b9d2356b28..97cfb689e7 100644 --- a/tests/opencl/sfilter/main.cc +++ b/tests/opencl/sfilter/main.cc @@ -124,16 +124,15 @@ int size = 16; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm/main.cc b/tests/opencl/sgemm/main.cc index 41c1bc5e86..31f99d2e4c 100644 --- a/tests/opencl/sgemm/main.cc +++ b/tests/opencl/sgemm/main.cc @@ -147,16 +147,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm2/main.cc b/tests/opencl/sgemm2/main.cc index 595a9fc515..c4ca06fdbc 100644 --- a/tests/opencl/sgemm2/main.cc +++ b/tests/opencl/sgemm2/main.cc @@ -147,16 +147,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm3/main.cc b/tests/opencl/sgemm3/main.cc index 570cee9aef..24dd397528 100644 --- a/tests/opencl/sgemm3/main.cc +++ b/tests/opencl/sgemm3/main.cc @@ -148,7 +148,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -157,10 +157,9 @@ static void parse_args(int argc, char **argv) { tile_size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/vecadd/main.cc b/tests/opencl/vecadd/main.cc index e1316ad3f9..190d29450f 100644 --- a/tests/opencl/vecadd/main.cc +++ b/tests/opencl/vecadd/main.cc @@ -141,16 +141,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/basic/main.cpp b/tests/regression/basic/main.cpp index 73f3e29a26..575333c4bb 100755 --- a/tests/regression/basic/main.cpp +++ b/tests/regression/basic/main.cpp @@ -38,7 +38,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -50,10 +50,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/conv3x/main.cpp b/tests/regression/conv3x/main.cpp index d5f8b4e81c..3a0e192fbd 100644 --- a/tests/regression/conv3x/main.cpp +++ b/tests/regression/conv3x/main.cpp @@ -109,7 +109,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:lh?")) != -1) { + while ((c = getopt(argc, argv, "n:k:lh")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -121,10 +121,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/demo/main.cpp b/tests/regression/demo/main.cpp index 4947cb64f9..3fdd036011 100644 --- a/tests/regression/demo/main.cpp +++ b/tests/regression/demo/main.cpp @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -95,8 +95,7 @@ static void parse_args(int argc, char **argv) { case 'k': kernel_file = optarg; break; - case 'h': - case '?': { + case 'h':{ show_usage(); exit(0); } break; diff --git a/tests/regression/diverge/main.cpp b/tests/regression/diverge/main.cpp index fc43846109..d858b17295 100644 --- a/tests/regression/diverge/main.cpp +++ b/tests/regression/diverge/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -44,10 +44,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/dogfood/main.cpp b/tests/regression/dogfood/main.cpp index d308821f0a..f2922c6325 100644 --- a/tests/regression/dogfood/main.cpp +++ b/tests/regression/dogfood/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch?")) != -1) { + while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -59,10 +59,9 @@ static void parse_args(int argc, char **argv) { stop_on_error = false; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/fence/main.cpp b/tests/regression/fence/main.cpp index ead4ad5513..716036b113 100644 --- a/tests/regression/fence/main.cpp +++ b/tests/regression/fence/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -44,10 +44,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/io_addr/main.cpp b/tests/regression/io_addr/main.cpp index 602064ffef..78d7cf56f9 100644 --- a/tests/regression/io_addr/main.cpp +++ b/tests/regression/io_addr/main.cpp @@ -42,7 +42,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -51,10 +51,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/mstress/main.cpp b/tests/regression/mstress/main.cpp index 7bf0dbe0e3..5a1f0d300d 100644 --- a/tests/regression/mstress/main.cpp +++ b/tests/regression/mstress/main.cpp @@ -83,7 +83,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -92,10 +92,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/printf/main.cpp b/tests/regression/printf/main.cpp index 18d778c4b5..eefa325927 100644 --- a/tests/regression/printf/main.cpp +++ b/tests/regression/printf/main.cpp @@ -33,7 +33,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -42,10 +42,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sgemm2x/main.cpp b/tests/regression/sgemm2x/main.cpp index 3da359ee5e..f10f8fcd15 100644 --- a/tests/regression/sgemm2x/main.cpp +++ b/tests/regression/sgemm2x/main.cpp @@ -103,7 +103,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -115,10 +115,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sgemmx/main.cpp b/tests/regression/sgemmx/main.cpp index 4c2b18c302..b31af9b046 100644 --- a/tests/regression/sgemmx/main.cpp +++ b/tests/regression/sgemmx/main.cpp @@ -99,7 +99,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -108,10 +108,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sort/main.cpp b/tests/regression/sort/main.cpp index 19e9aee50a..032ce18df9 100644 --- a/tests/regression/sort/main.cpp +++ b/tests/regression/sort/main.cpp @@ -34,7 +34,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -43,10 +43,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/stencil3d/main.cpp b/tests/regression/stencil3d/main.cpp index 0536effc08..5a5fcc716e 100644 --- a/tests/regression/stencil3d/main.cpp +++ b/tests/regression/stencil3d/main.cpp @@ -128,7 +128,7 @@ static void stencil_cpu(TYPE *out, const TYPE *in, uint32_t width, uint32_t heig {ny = 0;} else if (ny >= (int)height) {ny = height - 1;} - + if (nz < 0) {nz = 0;} else if (nz >= (int)depth) @@ -168,7 +168,7 @@ static void show_usage() static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { @@ -182,12 +182,9 @@ static void parse_args(int argc, char **argv) kernel_file = optarg; break; case 'h': - case '?': - { show_usage(); exit(0); - } - break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/vecaddx/main.cpp b/tests/regression/vecaddx/main.cpp index d80e2fdc1a..4a79861d3f 100644 --- a/tests/regression/vecaddx/main.cpp +++ b/tests/regression/vecaddx/main.cpp @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -96,10 +96,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); From aa1489d8ebbae963f74339a2c53c5c74082ae328 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 03:45:23 -0700 Subject: [PATCH 158/407] fixed trace.vcd copy --- ci/blackbox.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 5c0dfbde15..51639b201f 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -61,11 +61,11 @@ parse_args() { --driver=*) DRIVER=${i#*=} ;; --app=*) APP=${i#*=} ;; --clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;; - --cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;; - --warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;; + --cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;; + --warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;; --threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;; - --l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;; - --l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;; + --l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;; + --l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;; --perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;; --debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;; --scope) SCOPE=1; ;; @@ -143,7 +143,7 @@ run_app() { fi fi status=$? - exit $status + return $status } main() { @@ -154,7 +154,7 @@ main() { # execute on default installed GPU if [ "$DRIVER" = "gpu" ]; then run_app - exit $status + exit $? fi if [ -n "$CONFIGS" ]; then @@ -189,6 +189,7 @@ main() { build_driver run_app + status=$? if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then mv -f $APP_PATH/trace.vcd . From 0cbdc3be9e654226f3d506d08b0312ebcb356355 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 21:32:11 -0700 Subject: [PATCH 159/407] opae afu x warning fixes --- hw/rtl/afu/opae/vortex_afu.sv | 152 ++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 73 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index e5ff16483c..ffc0af282a 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -64,6 +64,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_H = 16'h0004; // AFU ID Higher + localparam CMD_IDLE = 0; localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ; localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE; localparam CMD_DCR_WRITE = `AFU_IMAGE_CMD_DCR_WRITE; @@ -139,14 +140,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // MMIO controller //////////////////////////////////////////////////////////// - t_ccip_c0_ReqMmioHdr mmio_hdr; - assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); - `UNUSED_VAR (mmio_hdr) + t_ccip_c0_ReqMmioHdr mmio_req_hdr; + assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr[$bits(t_ccip_c0_ReqMmioHdr)-1:0]); + `UNUSED_VAR (mmio_req_hdr) - `STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!")) - - t_if_ccip_c2_Tx mmio_tx; - assign af2cp_sTxPort.c2 = mmio_tx; + t_if_ccip_c2_Tx mmio_rsp; + assign af2cp_sTxPort.c2 = mmio_rsp; `ifdef SCOPE @@ -178,7 +177,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end scope_bus_in <= 0; if (cp2af_sRxPort.c0.mmioWrValid - && (MMIO_SCOPE_WRITE == mmio_hdr.address)) begin + && (MMIO_SCOPE_WRITE == mmio_req_hdr.address)) begin cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data); cmd_scope_writing <= 1; scope_bus_ctr <= 63; @@ -206,6 +205,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_QUEUE_DATAW-1:0] cout_q_dout; wire cout_q_full, cout_q_empty; + wire [COUT_QUEUE_DATAW-1:0] cout_q_dout_s = cout_q_dout & {COUT_QUEUE_DATAW{!cout_q_empty}}; + `ifdef SIMULATION `ifndef VERILATOR // disable assertions until full reset @@ -226,60 +227,22 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `endif `endif + // MMIO controller //////////////////////////////////////////////////////////// + + // Handle MMIO read requests always @(posedge clk) begin if (reset) begin - mmio_tx.mmioRdValid <= 0; - mmio_tx.hdr <= '0; + mmio_rsp.mmioRdValid <= 0; end else begin - mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; - mmio_tx.hdr.tid <= mmio_hdr.tid; - end - // serve MMIO write request - if (cp2af_sRxPort.c0.mmioWrValid) begin - case (mmio_hdr.address) - MMIO_CMD_ARG0: begin - cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - MMIO_CMD_ARG1: begin - cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - MMIO_CMD_ARG2: begin - cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - MMIO_CMD_TYPE: begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - `ifdef SCOPE - MMIO_SCOPE_WRITE: begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)); - `endif - end - `endif - default: begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data))); - `endif - end - endcase + mmio_rsp.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; end - // serve MMIO read requests + mmio_rsp.hdr.tid <= mmio_req_hdr.tid; + if (cp2af_sRxPort.c0.mmioRdValid) begin - case (mmio_hdr.address) + case (mmio_req_hdr.address) // AFU header - 16'h0000: mmio_tx.data <= { + 16'h0000: mmio_rsp.data <= { 4'b0001, // Feature type = AFU 8'b0, // reserved 4'b0, // afu minor revision = 0 @@ -289,50 +252,93 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ 4'b0, // afu major revision = 0 12'b0 // feature ID = 0 }; - AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low - AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi - 16'h0006: mmio_tx.data <= 64'h0; // next AFU - 16'h0008: mmio_tx.data <= 64'h0; // reserved + AFU_ID_L: mmio_rsp.data <= afu_id[63:0]; // afu id low + AFU_ID_H: mmio_rsp.data <= afu_id[127:64]; // afu id hi + 16'h0006: mmio_rsp.data <= 64'h0; // next AFU + 16'h0008: mmio_rsp.data <= 64'h0; // reserved MMIO_STATUS: begin - mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)}); + mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU - if (state != STATE_WIDTH'(mmio_tx.data)) begin - `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_hdr.address, state)); + if (state != STATE_WIDTH'(mmio_rsp.data)) begin + `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)); end `endif end `ifdef SCOPE MMIO_SCOPE_READ: begin - mmio_tx.data <= cmd_scope_rdata; + mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)); `endif end `endif MMIO_DEV_CAPS: begin - mmio_tx.data <= dev_caps; + mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)); `endif end MMIO_ISA_CAPS: begin - mmio_tx.data <= isa_caps; + mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU - if (state != STATE_WIDTH'(mmio_tx.data)) begin + if (state != STATE_WIDTH'(mmio_rsp.data)) begin `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)); end `endif end default: begin - mmio_tx.data <= 64'h0; + mmio_rsp.data <= 64'h0; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_hdr.address)); + `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)); `endif end endcase end end + // Handle MMIO write requests + always @(posedge clk) begin + if (cp2af_sRxPort.c0.mmioWrValid) begin + case (mmio_req_hdr.address) + MMIO_CMD_ARG0: begin + cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); + `endif + end + MMIO_CMD_ARG1: begin + cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); + `endif + end + MMIO_CMD_ARG2: begin + cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); + `endif + end + MMIO_CMD_TYPE: begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); + `endif + end + `ifdef SCOPE + MMIO_SCOPE_WRITE: begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)); + `endif + end + `endif + default: begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))); + `endif + end + endcase + end + end + // COMMAND FSM //////////////////////////////////////////////////////////////// wire cmd_mem_rd_done; @@ -351,9 +357,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end end - wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address); + wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address); wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ? - CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(0); + CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE); always @(posedge clk) begin if (reset) begin @@ -978,7 +984,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cout_q_push = vx_mem_req_valid && vx_mem_is_cout && ~cout_q_full; wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid - && (mmio_hdr.address == MMIO_STATUS) + && (mmio_req_hdr.address == MMIO_STATUS) && ~cout_q_empty; VX_fifo_queue #( @@ -1051,8 +1057,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .probes({ cmd_type, state, - mmio_hdr.address, - mmio_hdr.length, + mmio_req_hdr.address, + mmio_req_hdr.length, cp2af_sRxPort.c0.hdr.mdata, af2cp_sTxPort.c0.hdr.address, af2cp_sTxPort.c0.hdr.mdata, From 1a35d3fed120c3b035a07372d2d8f2f608cac7f9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 21:33:45 -0700 Subject: [PATCH 160/407] fixed byteen signal on memory read --- hw/rtl/VX_define.vh | 2 +- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 4 ++-- hw/rtl/core/VX_fetch.sv | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 861d9f28cf..69b14c7480 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -361,7 +361,7 @@ assign dst.req_data.rw = 0; \ assign dst.req_data.addr = src.req_data.addr; \ assign dst.req_data.data = '0; \ - assign dst.req_data.byteen = '0; \ + assign dst.req_data.byteen = '1; \ assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.tag = src.req_data.tag; \ assign src.req_ready = dst.req_ready; \ diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 4dff675bd6..a8f8dbdf2b 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -623,7 +623,7 @@ module VX_cache_bank #( end else begin assign mreq_queue_rw = 0; assign mreq_queue_data = '0; - assign mreq_queue_byteen = '0; + assign mreq_queue_byteen = '1; `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 18d44b6dbf..302a99e5eb 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -101,7 +101,7 @@ module VX_cache_data #( assign dirty_byteen = bs_rdata[way_idx]; end else begin - assign dirty_byteen = {LINE_SIZE{1'b1}}; + assign dirty_byteen = '1; end wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata; @@ -112,7 +112,7 @@ module VX_cache_data #( end assign dirty_data = flipped_rdata[way_idx]; end else begin - assign dirty_byteen = '0; + assign dirty_byteen = '1; assign dirty_data = '0; end diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 44f3e51da1..1da184288e 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -118,7 +118,7 @@ module VX_fetch import VX_gpu_pkg::*; #( assign icache_bus_if.req_data.flags = '0; assign icache_bus_if.req_data.rw = 0; - assign icache_bus_if.req_data.byteen = 4'b1111; + assign icache_bus_if.req_data.byteen = '1; assign icache_bus_if.req_data.data = '0; // Icache Response From 7bef62aef81c522477a08bd899fe41f0c388899b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 01:37:20 -0700 Subject: [PATCH 161/407] minor update --- hw/rtl/VX_gpu_pkg.sv | 30 ++++++++++++++++-------------- hw/rtl/Vortex.sv | 6 ++++++ hw/rtl/afu/opae/vortex_afu.vh | 8 ++++---- tests/regression/demo/common.h | 4 ++-- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index f94714d06a..1a55a18fe7 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -461,6 +461,21 @@ package VX_gpu_pkg; endcase end end + `EX_SFU: begin + case (`INST_SFU_BITS'(op_type)) + `INST_SFU_TMC: `TRACE(level, ("TMC")); + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")); + `INST_SFU_BAR: `TRACE(level, ("BAR")); + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end + default: `TRACE(level, ("?")); + endcase + end + `ifdef EXT_F_ENABLE `EX_FPU: begin case (`INST_FPU_BITS'(op_type)) `INST_FPU_ADD: begin @@ -632,20 +647,7 @@ package VX_gpu_pkg; default: `TRACE(level, ("?")); endcase end - `EX_SFU: begin - case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); - endcase - end + `endif default: `TRACE(level, ("?")); endcase endtask diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 875faf47ea..8f171a486a 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -189,6 +189,12 @@ module Vortex import VX_gpu_pkg::*; ( `endif + // dump device configuration + initial begin + `TRACE(0, ("CONFIGS: num_threads=%0d, num_warps=%0d, num_cores=%0d, num_clusters=%0d, socket_size=%0d, local_mem_base=0x%0h, num_barriers=%0d\n", + `NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS)); + end + `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (mem_req_fire) begin diff --git a/hw/rtl/afu/opae/vortex_afu.vh b/hw/rtl/afu/opae/vortex_afu.vh index 6aa532983a..31f09ae900 100644 --- a/hw/rtl/afu/opae/vortex_afu.vh +++ b/hw/rtl/afu/opae/vortex_afu.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,9 +17,9 @@ `define AFU_ACCEL_NAME "vortex_afu" `define AFU_ACCEL_UUID 128'h35F9452B_25C2_434C_93D5_6F8C60DB361C -`define AFU_IMAGE_CMD_MEM_READ 1 +`define AFU_IMAGE_CMD_MEM_READ 1 `define AFU_IMAGE_CMD_MEM_WRITE 2 -`define AFU_IMAGE_CMD_RUN 3 +`define AFU_IMAGE_CMD_RUN 3 `define AFU_IMAGE_CMD_DCR_WRITE 4 `define AFU_IMAGE_CMD_MAX_VALUE 4 diff --git a/tests/regression/demo/common.h b/tests/regression/demo/common.h index 98b8ff587e..be200ec045 100644 --- a/tests/regression/demo/common.h +++ b/tests/regression/demo/common.h @@ -2,7 +2,7 @@ #define _COMMON_H_ #ifndef TYPE -#define TYPE float +#define TYPE int #endif typedef struct { @@ -10,7 +10,7 @@ typedef struct { uint32_t task_size; uint64_t src0_addr; uint64_t src1_addr; - uint64_t dst_addr; + uint64_t dst_addr; } kernel_arg_t; #endif From 7823f5529cf7c677232000af9947880a2c9af4cd Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 01:38:48 -0700 Subject: [PATCH 162/407] minor update --- sim/opaesim/opae_sim.cpp | 14 +------------- sim/rtlsim/processor.cpp | 14 +------------- sim/xrtsim/xrt_sim.cpp | 14 +------------- 3 files changed, 3 insertions(+), 39 deletions(-) diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 8e9e8c4d89..f5acc3d215 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -143,19 +143,7 @@ class opae_sim::Impl { #endif ram_ = new RAM(0, RAM_PAGE_SIZE); - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // reset the device this->reset(); diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 25d219fcf0..f52e7c8da5 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -116,19 +116,7 @@ class Processor::Impl { #endif ram_ = nullptr; - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // reset the device this->reset(); diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 12a78c23d3..21961e5ddc 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -130,19 +130,7 @@ class xrt_sim::Impl { #endif ram_ = new RAM(0, RAM_PAGE_SIZE); - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // reset the device this->reset(); From 6626f9201c2820f78e12a2152df16e26f5143d0a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 02:46:32 -0700 Subject: [PATCH 163/407] minor update --- hw/rtl/VX_gpu_pkg.sv | 12 ++++++++---- hw/rtl/core/VX_issue_slice.sv | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 1a55a18fe7..67ff2176b4 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -320,8 +320,10 @@ package VX_gpu_pkg; case (ex_type) `EX_ALU: `TRACE(level, ("ALU")); `EX_LSU: `TRACE(level, ("LSU")); - `EX_FPU: `TRACE(level, ("FPU")); `EX_SFU: `TRACE(level, ("SFU")); + `ifdef EXT_F_ENABLE + `EX_FPU: `TRACE(level, ("FPU")); + `endif default: `TRACE(level, ("?")); endcase endtask @@ -664,14 +666,16 @@ package VX_gpu_pkg; `EX_LSU: begin `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); end - `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); - end `EX_SFU: begin if (`INST_SFU_IS_CSR(op_type)) begin `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); end end + `ifdef EXT_F_ENABLE + `EX_FPU: begin + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + end + `endif default:; endcase endtask diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 0d7fdea534..18dd41cd7d 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -135,7 +135,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (operands_if.valid && operands_if.ready) begin - `TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")); trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); From fa11d4c5022393e374f989623649be866fe8a19c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 05:26:00 -0700 Subject: [PATCH 164/407] TRACING refactoring to support vivado/quartus simulators --- ci/trace_csv.py | 5 +- hw/rtl/VX_gpu_pkg.sv | 342 ++++++++++++++++---------------- hw/rtl/VX_platform.vh | 32 +-- hw/rtl/Vortex.sv | 8 +- hw/rtl/afu/opae/vortex_afu.sv | 58 +++--- hw/rtl/afu/xrt/VX_afu_wrap.sv | 14 +- hw/rtl/cache/VX_cache_bank.sv | 22 +- hw/rtl/cache/VX_cache_data.sv | 8 +- hw/rtl/cache/VX_cache_mshr.sv | 22 +- hw/rtl/cache/VX_cache_tags.sv | 14 +- hw/rtl/cache/VX_cache_wrap.sv | 12 +- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/core/VX_commit.sv | 8 +- hw/rtl/core/VX_dcr_data.sv | 4 +- hw/rtl/core/VX_decode.sv | 8 +- hw/rtl/core/VX_fetch.sv | 4 +- hw/rtl/core/VX_issue_slice.sv | 18 +- hw/rtl/core/VX_lsu_slice.sv | 34 ++-- hw/rtl/core/VX_scoreboard.sv | 6 +- hw/rtl/libs/VX_axi_adapter.sv | 6 +- hw/rtl/libs/VX_dp_ram.sv | 4 +- hw/rtl/libs/VX_fifo_queue.sv | 4 +- hw/rtl/libs/VX_index_queue.sv | 24 +-- hw/rtl/libs/VX_mem_coalescer.sv | 44 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 52 ++--- hw/rtl/libs/VX_scope_tap.sv | 102 +++++----- hw/rtl/mem/VX_gbar_unit.sv | 4 +- hw/rtl/mem/VX_local_mem.sv | 12 +- 28 files changed, 441 insertions(+), 432 deletions(-) diff --git a/ci/trace_csv.py b/ci/trace_csv.py index 4a36f5f6a7..077f8027ed 100755 --- a/ci/trace_csv.py +++ b/ci/trace_csv.py @@ -44,7 +44,8 @@ def load_config(filename): 'num_barriers': int(config_match.group(7)), } return config - return None + print("Error: missing CONFIGS: header") + sys.exit(1) def parse_simx(log_lines): pc_pattern = r"PC=(0x[0-9a-fA-F]+)" @@ -274,6 +275,8 @@ def split_log_file(log_filename): if current_sublog is not None: sublogs.append(current_sublog) + else: + sublogs.append(log_lines) return sublogs diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 67ff2176b4..7748b8eec6 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -318,13 +318,13 @@ package VX_gpu_pkg; task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); case (ex_type) - `EX_ALU: `TRACE(level, ("ALU")); - `EX_LSU: `TRACE(level, ("LSU")); - `EX_SFU: `TRACE(level, ("SFU")); + `EX_ALU: `TRACE(level, ("ALU")) + `EX_LSU: `TRACE(level, ("LSU")) + `EX_SFU: `TRACE(level, ("SFU")) `ifdef EXT_F_ENABLE - `EX_FPU: `TRACE(level, ("FPU")); + `EX_FPU: `TRACE(level, ("FPU")) `endif - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase endtask @@ -340,141 +340,141 @@ package VX_gpu_pkg; if (op_args.alu.is_w) begin if (op_args.alu.use_imm) begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDIW")); - `INST_ALU_SLL: `TRACE(level, ("SLLIW")); - `INST_ALU_SRL: `TRACE(level, ("SRLIW")); - `INST_ALU_SRA: `TRACE(level, ("SRAIW")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADDIW")) + `INST_ALU_SLL: `TRACE(level, ("SLLIW")) + `INST_ALU_SRL: `TRACE(level, ("SRLIW")) + `INST_ALU_SRA: `TRACE(level, ("SRAIW")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDW")); - `INST_ALU_SUB: `TRACE(level, ("SUBW")); - `INST_ALU_SLL: `TRACE(level, ("SLLW")); - `INST_ALU_SRL: `TRACE(level, ("SRLW")); - `INST_ALU_SRA: `TRACE(level, ("SRAW")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADDW")) + `INST_ALU_SUB: `TRACE(level, ("SUBW")) + `INST_ALU_SLL: `TRACE(level, ("SLLW")) + `INST_ALU_SRL: `TRACE(level, ("SRLW")) + `INST_ALU_SRA: `TRACE(level, ("SRAW")) + default: `TRACE(level, ("?")) endcase end end else begin if (op_args.alu.use_imm) begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDI")); - `INST_ALU_SLL: `TRACE(level, ("SLLI")); - `INST_ALU_SRL: `TRACE(level, ("SRLI")); - `INST_ALU_SRA: `TRACE(level, ("SRAI")); - `INST_ALU_SLT: `TRACE(level, ("SLTI")); - `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); - `INST_ALU_XOR: `TRACE(level, ("XORI")); - `INST_ALU_OR: `TRACE(level, ("ORI")); - `INST_ALU_AND: `TRACE(level, ("ANDI")); - `INST_ALU_LUI: `TRACE(level, ("LUI")); - `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADDI")) + `INST_ALU_SLL: `TRACE(level, ("SLLI")) + `INST_ALU_SRL: `TRACE(level, ("SRLI")) + `INST_ALU_SRA: `TRACE(level, ("SRAI")) + `INST_ALU_SLT: `TRACE(level, ("SLTI")) + `INST_ALU_SLTU: `TRACE(level, ("SLTIU")) + `INST_ALU_XOR: `TRACE(level, ("XORI")) + `INST_ALU_OR: `TRACE(level, ("ORI")) + `INST_ALU_AND: `TRACE(level, ("ANDI")) + `INST_ALU_LUI: `TRACE(level, ("LUI")) + `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADD")); - `INST_ALU_SUB: `TRACE(level, ("SUB")); - `INST_ALU_SLL: `TRACE(level, ("SLL")); - `INST_ALU_SRL: `TRACE(level, ("SRL")); - `INST_ALU_SRA: `TRACE(level, ("SRA")); - `INST_ALU_SLT: `TRACE(level, ("SLT")); - `INST_ALU_SLTU: `TRACE(level, ("SLTU")); - `INST_ALU_XOR: `TRACE(level, ("XOR")); - `INST_ALU_OR: `TRACE(level, ("OR")); - `INST_ALU_AND: `TRACE(level, ("AND")); - `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); - `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADD")) + `INST_ALU_SUB: `TRACE(level, ("SUB")) + `INST_ALU_SLL: `TRACE(level, ("SLL")) + `INST_ALU_SRL: `TRACE(level, ("SRL")) + `INST_ALU_SRA: `TRACE(level, ("SRA")) + `INST_ALU_SLT: `TRACE(level, ("SLT")) + `INST_ALU_SLTU: `TRACE(level, ("SLTU")) + `INST_ALU_XOR: `TRACE(level, ("XOR")) + `INST_ALU_OR: `TRACE(level, ("OR")) + `INST_ALU_AND: `TRACE(level, ("AND")) + `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")) + `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")) + default: `TRACE(level, ("?")) endcase end end end `ALU_TYPE_BRANCH: begin case (`INST_BR_BITS'(op_type)) - `INST_BR_EQ: `TRACE(level, ("BEQ")); - `INST_BR_NE: `TRACE(level, ("BNE")); - `INST_BR_LT: `TRACE(level, ("BLT")); - `INST_BR_GE: `TRACE(level, ("BGE")); - `INST_BR_LTU: `TRACE(level, ("BLTU")); - `INST_BR_GEU: `TRACE(level, ("BGEU")); - `INST_BR_JAL: `TRACE(level, ("JAL")); - `INST_BR_JALR: `TRACE(level, ("JALR")); - `INST_BR_ECALL: `TRACE(level, ("ECALL")); - `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); - `INST_BR_URET: `TRACE(level, ("URET")); - `INST_BR_SRET: `TRACE(level, ("SRET")); - `INST_BR_MRET: `TRACE(level, ("MRET")); - default: `TRACE(level, ("?")); + `INST_BR_EQ: `TRACE(level, ("BEQ")) + `INST_BR_NE: `TRACE(level, ("BNE")) + `INST_BR_LT: `TRACE(level, ("BLT")) + `INST_BR_GE: `TRACE(level, ("BGE")) + `INST_BR_LTU: `TRACE(level, ("BLTU")) + `INST_BR_GEU: `TRACE(level, ("BGEU")) + `INST_BR_JAL: `TRACE(level, ("JAL")) + `INST_BR_JALR: `TRACE(level, ("JALR")) + `INST_BR_ECALL: `TRACE(level, ("ECALL")) + `INST_BR_EBREAK:`TRACE(level, ("EBREAK")) + `INST_BR_URET: `TRACE(level, ("URET")) + `INST_BR_SRET: `TRACE(level, ("SRET")) + `INST_BR_MRET: `TRACE(level, ("MRET")) + default: `TRACE(level, ("?")) endcase end `ALU_TYPE_MULDIV: begin if (op_args.alu.is_w) begin case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MULW")); - `INST_M_DIV: `TRACE(level, ("DIVW")); - `INST_M_DIVU: `TRACE(level, ("DIVUW")); - `INST_M_REM: `TRACE(level, ("REMW")); - `INST_M_REMU: `TRACE(level, ("REMUW")); - default: `TRACE(level, ("?")); + `INST_M_MUL: `TRACE(level, ("MULW")) + `INST_M_DIV: `TRACE(level, ("DIVW")) + `INST_M_DIVU: `TRACE(level, ("DIVUW")) + `INST_M_REM: `TRACE(level, ("REMW")) + `INST_M_REMU: `TRACE(level, ("REMUW")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MUL")); - `INST_M_MULH: `TRACE(level, ("MULH")); - `INST_M_MULHSU:`TRACE(level, ("MULHSU")); - `INST_M_MULHU: `TRACE(level, ("MULHU")); - `INST_M_DIV: `TRACE(level, ("DIV")); - `INST_M_DIVU: `TRACE(level, ("DIVU")); - `INST_M_REM: `TRACE(level, ("REM")); - `INST_M_REMU: `TRACE(level, ("REMU")); - default: `TRACE(level, ("?")); + `INST_M_MUL: `TRACE(level, ("MUL")) + `INST_M_MULH: `TRACE(level, ("MULH")) + `INST_M_MULHSU:`TRACE(level, ("MULHSU")) + `INST_M_MULHU: `TRACE(level, ("MULHU")) + `INST_M_DIV: `TRACE(level, ("DIV")) + `INST_M_DIVU: `TRACE(level, ("DIVU")) + `INST_M_REM: `TRACE(level, ("REM")) + `INST_M_REMU: `TRACE(level, ("REMU")) + default: `TRACE(level, ("?")) endcase end end - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase end `EX_LSU: begin if (op_args.lsu.is_float) begin case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LW: `TRACE(level, ("FLW")); - `INST_LSU_LD: `TRACE(level, ("FLD")); - `INST_LSU_SW: `TRACE(level, ("FSW")); - `INST_LSU_SD: `TRACE(level, ("FSD")); - default: `TRACE(level, ("?")); + `INST_LSU_LW: `TRACE(level, ("FLW")) + `INST_LSU_LD: `TRACE(level, ("FLD")) + `INST_LSU_SW: `TRACE(level, ("FSW")) + `INST_LSU_SD: `TRACE(level, ("FSD")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LB: `TRACE(level, ("LB")); - `INST_LSU_LH: `TRACE(level, ("LH")); - `INST_LSU_LW: `TRACE(level, ("LW")); - `INST_LSU_LD: `TRACE(level, ("LD")); - `INST_LSU_LBU:`TRACE(level, ("LBU")); - `INST_LSU_LHU:`TRACE(level, ("LHU")); - `INST_LSU_LWU:`TRACE(level, ("LWU")); - `INST_LSU_SB: `TRACE(level, ("SB")); - `INST_LSU_SH: `TRACE(level, ("SH")); - `INST_LSU_SW: `TRACE(level, ("SW")); - `INST_LSU_SD: `TRACE(level, ("SD")); - `INST_LSU_FENCE:`TRACE(level,("FENCE")); - default: `TRACE(level, ("?")); + `INST_LSU_LB: `TRACE(level, ("LB")) + `INST_LSU_LH: `TRACE(level, ("LH")) + `INST_LSU_LW: `TRACE(level, ("LW")) + `INST_LSU_LD: `TRACE(level, ("LD")) + `INST_LSU_LBU:`TRACE(level, ("LBU")) + `INST_LSU_LHU:`TRACE(level, ("LHU")) + `INST_LSU_LWU:`TRACE(level, ("LWU")) + `INST_LSU_SB: `TRACE(level, ("SB")) + `INST_LSU_SH: `TRACE(level, ("SH")) + `INST_LSU_SW: `TRACE(level, ("SW")) + `INST_LSU_SD: `TRACE(level, ("SD")) + `INST_LSU_FENCE:`TRACE(level,("FENCE")) + default: `TRACE(level, ("?")) endcase end end `EX_SFU: begin case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); + `INST_SFU_TMC: `TRACE(level, ("TMC")) + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")) + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")) else `TRACE(level, ("SPLIT")) end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")) + `INST_SFU_BAR: `TRACE(level, ("BAR")) + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")) else `TRACE(level, ("PRED")) end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")) else `TRACE(level, ("CSRRW")) end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")) else `TRACE(level, ("CSRRS")) end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")) else `TRACE(level, ("CSRRC")) end + default: `TRACE(level, ("?")) endcase end `ifdef EXT_F_ENABLE @@ -483,174 +483,174 @@ package VX_gpu_pkg; `INST_FPU_ADD: begin if (op_args.fpu.fmt[1]) begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); + `TRACE(level, ("FSUB.D")) else - `TRACE(level, ("FSUB.S")); + `TRACE(level, ("FSUB.S")) end else begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); + `TRACE(level, ("FADD.D")) else - `TRACE(level, ("FADD.S")); + `TRACE(level, ("FADD.S")) end end `INST_FPU_MADD: begin if (op_args.fpu.fmt[1]) begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); + `TRACE(level, ("FMSUB.D")) else - `TRACE(level, ("FMSUB.S")); + `TRACE(level, ("FMSUB.S")) end else begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); + `TRACE(level, ("FMADD.D")) else - `TRACE(level, ("FMADD.S")); + `TRACE(level, ("FMADD.S")) end end `INST_FPU_NMADD: begin if (op_args.fpu.fmt[1]) begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); + `TRACE(level, ("FNMSUB.D")) else - `TRACE(level, ("FNMSUB.S")); + `TRACE(level, ("FNMSUB.S")) end else begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); + `TRACE(level, ("FNMADD.D")) else - `TRACE(level, ("FNMADD.S")); + `TRACE(level, ("FNMADD.S")) end end `INST_FPU_MUL: begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMUL.D")); + `TRACE(level, ("FMUL.D")) else - `TRACE(level, ("FMUL.S")); + `TRACE(level, ("FMUL.S")) end `INST_FPU_DIV: begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FDIV.D")); + `TRACE(level, ("FDIV.D")) else - `TRACE(level, ("FDIV.S")); + `TRACE(level, ("FDIV.S")) end `INST_FPU_SQRT: begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSQRT.D")); + `TRACE(level, ("FSQRT.D")) else - `TRACE(level, ("FSQRT.S")); + `TRACE(level, ("FSQRT.S")) end `INST_FPU_CMP: begin if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.D")); - 1: `TRACE(level, ("FLT.D")); - 2: `TRACE(level, ("FEQ.D")); - default: `TRACE(level, ("?")); + 0: `TRACE(level, ("FLE.D")) + 1: `TRACE(level, ("FLT.D")) + 2: `TRACE(level, ("FEQ.D")) + default: `TRACE(level, ("?")) endcase end else begin case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.S")); - 1: `TRACE(level, ("FLT.S")); - 2: `TRACE(level, ("FEQ.S")); - default: `TRACE(level, ("?")); + 0: `TRACE(level, ("FLE.S")) + 1: `TRACE(level, ("FLT.S")) + 2: `TRACE(level, ("FEQ.S")) + default: `TRACE(level, ("?")) endcase end end `INST_FPU_F2F: begin if (op_args.fpu.fmt[0]) begin - `TRACE(level, ("FCVT.D.S")); + `TRACE(level, ("FCVT.D.S")) end else begin - `TRACE(level, ("FCVT.S.D")); + `TRACE(level, ("FCVT.S.D")) end end `INST_FPU_F2I: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.D")); + `TRACE(level, ("FCVT.L.D")) end else begin - `TRACE(level, ("FCVT.W.D")); + `TRACE(level, ("FCVT.W.D")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.S")); + `TRACE(level, ("FCVT.L.S")) end else begin - `TRACE(level, ("FCVT.W.S")); + `TRACE(level, ("FCVT.W.S")) end end end `INST_FPU_F2U: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.D")); + `TRACE(level, ("FCVT.LU.D")) end else begin - `TRACE(level, ("FCVT.WU.D")); + `TRACE(level, ("FCVT.WU.D")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.S")); + `TRACE(level, ("FCVT.LU.S")) end else begin - `TRACE(level, ("FCVT.WU.S")); + `TRACE(level, ("FCVT.WU.S")) end end end `INST_FPU_I2F: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.L")); + `TRACE(level, ("FCVT.D.L")) end else begin - `TRACE(level, ("FCVT.D.W")); + `TRACE(level, ("FCVT.D.W")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.L")); + `TRACE(level, ("FCVT.S.L")) end else begin - `TRACE(level, ("FCVT.S.W")); + `TRACE(level, ("FCVT.S.W")) end end end `INST_FPU_U2F: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.LU")); + `TRACE(level, ("FCVT.D.LU")) end else begin - `TRACE(level, ("FCVT.D.WU")); + `TRACE(level, ("FCVT.D.WU")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.LU")); + `TRACE(level, ("FCVT.S.LU")) end else begin - `TRACE(level, ("FCVT.S.WU")); + `TRACE(level, ("FCVT.S.WU")) end end end `INST_FPU_MISC: begin if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.D")); - 1: `TRACE(level, ("FSGNJN.D")); - 2: `TRACE(level, ("FSGNJX.D")); - 3: `TRACE(level, ("FCLASS.D")); - 4: `TRACE(level, ("FMV.X.D")); - 5: `TRACE(level, ("FMV.D.X")); - 6: `TRACE(level, ("FMIN.D")); - 7: `TRACE(level, ("FMAX.D")); + 0: `TRACE(level, ("FSGNJ.D")) + 1: `TRACE(level, ("FSGNJN.D")) + 2: `TRACE(level, ("FSGNJX.D")) + 3: `TRACE(level, ("FCLASS.D")) + 4: `TRACE(level, ("FMV.X.D")) + 5: `TRACE(level, ("FMV.D.X")) + 6: `TRACE(level, ("FMIN.D")) + 7: `TRACE(level, ("FMAX.D")) endcase end else begin case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.S")); - 1: `TRACE(level, ("FSGNJN.S")); - 2: `TRACE(level, ("FSGNJX.S")); - 3: `TRACE(level, ("FCLASS.S")); - 4: `TRACE(level, ("FMV.X.S")); - 5: `TRACE(level, ("FMV.S.X")); - 6: `TRACE(level, ("FMIN.S")); - 7: `TRACE(level, ("FMAX.S")); + 0: `TRACE(level, ("FSGNJ.S")) + 1: `TRACE(level, ("FSGNJN.S")) + 2: `TRACE(level, ("FSGNJX.S")) + 3: `TRACE(level, ("FCLASS.S")) + 4: `TRACE(level, ("FMV.X.S")) + 5: `TRACE(level, ("FMV.S.X")) + 6: `TRACE(level, ("FMIN.S")) + 7: `TRACE(level, ("FMAX.S")) endcase end end - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase end `endif - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase endtask @@ -661,19 +661,19 @@ package VX_gpu_pkg; ); case (ex_type) `EX_ALU: begin - `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); + `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)) end `EX_LSU: begin - `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); + `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)) end `EX_SFU: begin if (`INST_SFU_IS_CSR(op_type)) begin - `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); + `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)) end end `ifdef EXT_F_ENABLE `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)) end `endif default:; @@ -682,12 +682,12 @@ package VX_gpu_pkg; task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); case (addr) - `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); - `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); - `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); - `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); - `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); - default: `TRACE(level, ("?")); + `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")) + `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")) + `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")) + `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")) + `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")) + default: `TRACE(level, ("?")) endcase endtask diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 74907ad4c3..5a4426b285 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -47,7 +47,10 @@ `define UNUSED_VAR(x) `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x -`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args +`define TRACE(level, args) \ + if (level <= `DEBUG_LEVEL) begin \ + $write args; \ + end `else `ifdef VERILATOR @@ -122,9 +125,12 @@ `endif `ifdef SV_DPI -`define TRACE(level, args) dpi_trace(level, $sformatf args) +`define TRACE(level, args) dpi_trace(level, $sformatf args); `else -`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args +`define TRACE(level, args) \ + if (level <= `DEBUG_LEVEL) begin \ + $write args; \ + end `endif `endif @@ -211,23 +217,23 @@ `define SEXT(len, x) {{(len-$bits(x)+1){x[$bits(x)-1]}}, x[$bits(x)-2:0]} `define TRACE_ARRAY1D(lvl, fmt, arr, n) \ - `TRACE(lvl, ("{")); \ + `TRACE(lvl, ("{")) \ for (integer __i = (n-1); __i >= 0; --__i) begin \ - if (__i != (n-1)) `TRACE(lvl, (", ")); \ - `TRACE(lvl, (fmt, arr[__i])); \ + if (__i != (n-1)) `TRACE(lvl, (", ")) \ + `TRACE(lvl, (fmt, arr[__i])) \ end \ - `TRACE(lvl, ("}")); + `TRACE(lvl, ("}")) `define TRACE_ARRAY2D(lvl, fmt, arr, m, n) \ - `TRACE(lvl, ("{")); \ + `TRACE(lvl, ("{")) \ for (integer __i = n-1; __i >= 0; --__i) begin \ - if (__i != (n-1)) `TRACE(lvl, (", ")); \ - `TRACE(lvl, ("{")); \ + if (__i != (n-1)) `TRACE(lvl, (", ")) \ + `TRACE(lvl, ("{")) \ for (integer __j = (m-1); __j >= 0; --__j) begin \ - if (__j != (m-1)) `TRACE(lvl, (", "));\ - `TRACE(lvl, (fmt, arr[__i][__j])); \ + if (__j != (m-1)) `TRACE(lvl, (", "))\ + `TRACE(lvl, (fmt, arr[__i][__j])) \ end \ - `TRACE(lvl, ("}")); \ + `TRACE(lvl, ("}")) \ end \ `TRACE(lvl, ("}")) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 8f171a486a..dc9f6f0344 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -192,19 +192,19 @@ module Vortex import VX_gpu_pkg::*; ( // dump device configuration initial begin `TRACE(0, ("CONFIGS: num_threads=%0d, num_warps=%0d, num_cores=%0d, num_clusters=%0d, socket_size=%0d, local_mem_base=0x%0h, num_barriers=%0d\n", - `NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS)); + `NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS)) end `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)); + `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) else - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)); + `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) end if (mem_rsp_fire) begin - `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)); + `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) end end `endif diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index ffc0af282a..2ebd66fcfd 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -260,7 +260,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)); + `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) end `endif end @@ -268,28 +268,28 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_SCOPE_READ: begin mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)); + `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) `endif end `endif MMIO_DEV_CAPS: begin mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)); + `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) `endif end MMIO_ISA_CAPS: begin mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)); + `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) end `endif end default: begin mmio_rsp.data <= 64'h0; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)); + `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) `endif end endcase @@ -303,36 +303,36 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_CMD_ARG0: begin cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG1: begin cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG2: begin cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_TYPE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)); + `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) `endif end `endif default: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))) `endif end endcase @@ -372,25 +372,25 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ case (cmd_type) CMD_MEM_READ: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)); + `TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_READ; end CMD_MEM_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)); + `TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_WRITE; end CMD_DCR_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)); + `TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) `endif state <= STATE_DCR_WRITE; end CMD_RUN: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)); + `TRACE(2, ("%d: STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_running <= 0; @@ -404,7 +404,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_rd_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end end @@ -412,14 +412,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_wr_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end end STATE_DCR_WRITE: begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end STATE_RUN: begin @@ -434,8 +434,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (~vx_busy) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)); - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: AFU: End execution\n", $time)) + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end end @@ -443,7 +443,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // wait until the reset sequence is complete if (vx_reset_ctr == (`RESET_DELAY-1)) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); + `TRACE(2, ("%d: AFU: Begin execution\n", $time)) `endif vx_running <= 1; vx_busy_wait <= 1; @@ -745,7 +745,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)); + `TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) `endif end @@ -755,13 +755,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)); + `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) `endif end if (cci_rdq_pop) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)); + `TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) `endif end @@ -899,13 +899,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_wr_req_done <= 1; end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)); + `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) `endif end if (cci_wr_rsp_fire) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)); + `TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) `endif end end @@ -1086,13 +1086,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin if (avs_write[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])); + `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])) end if (avs_read[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])); + `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])) end if (avs_readdatavalid[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])); + `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])) end end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index e1ba821263..1efda8029e 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -133,7 +133,7 @@ module VX_afu_wrap #( STATE_IDLE: begin if (ap_start) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)); + `TRACE(2, ("%d: STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_reset_ctr <= 0; @@ -145,7 +145,7 @@ module VX_afu_wrap #( // wait until the reset network is ready if (vx_reset_ctr == 0) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); + `TRACE(2, ("%d: AFU: Begin execution\n", $time)) `endif vx_busy_wait <= 1; vx_reset <= 0; @@ -160,7 +160,7 @@ module VX_afu_wrap #( // wait until the processor is not busy if (~vx_busy) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)); + `TRACE(2, ("%d: AFU: End execution\n", $time)) `endif state <= STATE_IDLE; end @@ -365,16 +365,16 @@ module VX_afu_wrap #( always @(posedge ap_clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])); + `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) end if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])); + `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) end if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin - `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])); + `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) end if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])); + `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) end end end diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index a8f8dbdf2b..e18be4b665 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -394,7 +394,7 @@ module VX_cache_bank #( `UNUSED_VAR (do_write_miss_st1) // ensure mshr replay always get a hit - `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)); + `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) // both tag and data stores use BRAM with no read-during-write protection. // we ned to stall the pipeline to prevent read-after-write hazards. @@ -599,7 +599,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin // ensure dirty bytes match the tag info wire has_dirty_bytes = (| dirty_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))); + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) || do_writeback_st1) @@ -663,30 +663,30 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)); + `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) end if (mem_rsp_fire) begin - `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)); + `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) end if (replay_fire) begin - `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)); + `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin if (core_req_rw) - `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); + `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) else - `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); + `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end if (crsp_queue_fire) begin - `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)); + `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin if (do_creq_wr_st1 && !WRITEBACK) - `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); + `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) else if (do_writeback_st1) - `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); + `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) else - `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); + `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) end end `endif diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 302a99e5eb..12e0e1ca3d 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -182,16 +182,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); + `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)); + `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) end if (read && ~stall) begin - `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)); + `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) end if (write && ~stall) begin - `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); + `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 0ca67d1598..855b953243 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -269,33 +269,33 @@ module VX_cache_mshr #( end if (allocate_fire) `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)); + `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) if (lookup_valid) `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)); + `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) if (finalize_valid) `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)); + finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) if (fill_valid) `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)); + `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) if (dequeue_fire) `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)); + `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)) if (show_table) begin - `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)); + `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)) for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin - `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))); + `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))) if (write_table[i]) - `TRACE(3, ("(w)")); + `TRACE(3, ("(w)")) else - `TRACE(3, ("(r)")); + `TRACE(3, ("(r)")) if (next_table[i]) - `TRACE(3, ("->%0d", next_index[i])); + `TRACE(3, ("->%0d", next_index[i])) end end - `TRACE(3, ("\n")); + `TRACE(3, ("\n")) end end `endif diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 4d5b0bcd37..dc2e77092f 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -149,25 +149,25 @@ module VX_cache_tags #( wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel}; always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))); + `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)); + `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)); + `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) end if (lookup && ~stall) begin if (tag_matches != 0) begin if (write) - `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) else - `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) end else begin if (write) - `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) else - `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 513c29b5d1..6210c313e0 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -234,12 +234,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); + `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) else - `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); + `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) end if (core_rsp_fire) begin - `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); + `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) end end end @@ -262,14 +262,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( if (mem_req_fire) begin if (mem_bus_if.req_data.rw) `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) else `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)); + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) end if (mem_rsp_fire) begin `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)); + $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 06acfde397..083438e883 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -195,7 +195,7 @@ module VX_alu_int #( always @(posedge clk) begin if (br_enable) begin `TRACE(1, ("%d: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", - $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)); + $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index 160bcf4d4a..f993c9648e 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -178,11 +178,11 @@ module VX_commit import VX_gpu_pkg::*; #( for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) trace_ex_type(1, j); - `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)); - `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS); - `TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid)); + `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)) + `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS) + `TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid)) end end end diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index b20d95fc7b..03c5be61f1 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (dcr_bus_if.write_valid) begin - `TRACE(1, ("%d: base-dcr: state=", $time)); + `TRACE(1, ("%d: base-dcr: state=", $time)) trace_base_dcr(1, dcr_bus_if.write_addr); - `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)); + `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)) end end `endif diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 897dfcc11f..28d27a2993 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -568,14 +568,14 @@ module VX_decode import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (decode_if.valid && decode_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)) trace_ex_type(1, decode_if.data.ex_type); - `TRACE(1, (", op=")); + `TRACE(1, (", op=")) trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b", - decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3)); + decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3)) trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); - `TRACE(1, (" (#%0d)\n", decode_if.data.uuid)); + `TRACE(1, (" (#%0d)\n", decode_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 1da184288e..46283818ae 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -168,10 +168,10 @@ module VX_fetch import VX_gpu_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin if (schedule_fire) begin - `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)); + `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) end if (fetch_fire) begin - `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)); + `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 18dd41cd7d..a99bf2c8ff 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -135,18 +135,18 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (operands_if.valid && operands_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); - `TRACE(1, (", op=")); + `TRACE(1, (", op=")) trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); - `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS); - `TRACE(1, (", rs2_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS); - `TRACE(1, (", rs3_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS); + `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS) + `TRACE(1, (", rs2_data=")) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS) + `TRACE(1, (", rs3_data=")) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS) trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); - `TRACE(1, (" (#%0d)\n", operands_if.data.uuid)); + `TRACE(1, (" (#%0d)\n", operands_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 7ee15bb143..bd82aee31e 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -189,7 +189,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire lsu_req_fire = execute_if.valid && execute_if.ready; `RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0), ("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", - $time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)); + $time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)) end // store data formatting @@ -505,30 +505,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)); + `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)) end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)); - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); - `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); + `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) + `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)) + `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES) + `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end else begin - `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); + `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) + `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end end if (mem_rsp_fire) begin `TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", - $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)); - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES); - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)); + $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)) + `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES) + `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) end end `endif diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index b2d9ff2bec..14d88b8b12 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -208,7 +208,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE `TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, - operands_busy, staging_if[w].data.uuid)); + operands_busy, staging_if[w].data.uuid)) `endif timeout_ctr <= timeout_ctr + 1; end else if (ibuffer_fire) begin @@ -220,11 +220,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT), ("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, - operands_busy, staging_if[w].data.uuid)); + operands_busy, staging_if[w].data.uuid)) `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0, ("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", - $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid)); + $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid)) `endif end diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 9cd8625606..25ce1081bf 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -170,7 +170,7 @@ module VX_axi_adapter #( `UNUSED_VAR (m_axi_bid[i]) `UNUSED_VAR (m_axi_bresp[i]) assign m_axi_bready[i] = 1'b1; - `RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time)); + `RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time)) end // AXI read request channel @@ -200,8 +200,8 @@ module VX_axi_adapter #( assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)); - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)); + `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)) + `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)) end VX_stream_arb #( diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 64b22150cf..49f37caff8 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -59,7 +59,7 @@ module VX_dp_ram #( `UNUSED_VAR (read) if (WRENW > 1) begin - `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)); + `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)) end if (OUT_REG && !READ_ENABLE) begin @@ -341,7 +341,7 @@ module VX_dp_ram #( assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; if (RW_ASSERT) begin - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)); + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index eba9532f42..dd772ea731 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -162,8 +162,8 @@ module VX_fifo_queue #( end end - `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("%t: runtime error: incrementing full queue", $time)); - `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("%t: runtime error: decrementing empty queue", $time)); + `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("%t: runtime error: incrementing full queue", $time)) + `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("%t: runtime error: decrementing empty queue", $time)) endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_index_queue.sv b/hw/rtl/libs/VX_index_queue.sv index 23ec6ed835..e73db0ff94 100644 --- a/hw/rtl/libs/VX_index_queue.sv +++ b/hw/rtl/libs/VX_index_queue.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,9 +20,9 @@ module VX_index_queue #( ) ( input wire clk, input wire reset, - input wire [DATAW-1:0] write_data, + input wire [DATAW-1:0] write_data, output wire [`LOG2UP(SIZE)-1:0] write_addr, - input wire push, + input wire push, input wire pop, output wire full, output wire empty, @@ -30,33 +30,33 @@ module VX_index_queue #( output wire [DATAW-1:0] read_data ); reg [DATAW-1:0] entries [SIZE-1:0]; - reg [SIZE-1:0] valid; + reg [SIZE-1:0] valid; reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr; wire [`LOG2UP(SIZE)-1:0] rd_a, wr_a; wire enqueue, dequeue; assign rd_a = rd_ptr[`LOG2UP(SIZE)-1:0]; - assign wr_a = wr_ptr[`LOG2UP(SIZE)-1:0]; + assign wr_a = wr_ptr[`LOG2UP(SIZE)-1:0]; assign empty = (wr_ptr == rd_ptr); assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]); - assign enqueue = push; + assign enqueue = push; assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid - `RUNTIME_ASSERT(!push || !full, ("%t: *** invalid inputs", $time)); - + `RUNTIME_ASSERT(!push || !full, ("%t: *** invalid inputs", $time)) + always @(posedge clk) begin if (reset) begin rd_ptr <= '0; wr_ptr <= '0; - valid <= '0; + valid <= '0; end else begin if (enqueue) begin valid[wr_a] <= 1; wr_ptr <= wr_ptr + 1; - end + end if (dequeue) begin rd_ptr <= rd_ptr + 1; end @@ -67,7 +67,7 @@ module VX_index_queue #( if (enqueue) begin entries[wr_a] <= write_data; - end + end end assign write_addr = wr_a; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index b284a64490..e56d802e19 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -76,8 +76,8 @@ module VX_mem_coalescer #( `UNUSED_SPARAM (INSTANCE_ID) `STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) - `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)); - `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("%t: invalid request mask", $time)); + `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)) + `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("%t: invalid request mask", $time)) localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; // tag + mask + offest @@ -331,30 +331,30 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); + `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS) end else begin - `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); + `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) end - `TRACE(1, (", offset=")); - `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); - `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)); + `TRACE(1, (", offset=")) + `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS) + `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) end if (out_rsp_fire) begin - `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); - `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS); - `TRACE(1, (", offset=")); - `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS); - `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)); + `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) + `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS) + `TRACE(1, (", offset=")) + `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS) + `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 9599adf13f..b0d8704e3c 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -97,8 +97,8 @@ module VX_mem_scheduler #( `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter")) - `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)); - + `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)) + wire ibuf_push; wire ibuf_pop; wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr; @@ -584,41 +584,41 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); + `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS) end else begin - `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); + `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) end - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); + `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); - `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS); - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)); + `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) + `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS) + `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); + `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS) end else begin - `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); + `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) end - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)); + `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS); - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)); + `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) + `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) + `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index c5ba778a2d..5ec39438cc 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,14 +14,14 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_scope_tap #( +module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width parameter TRIGGERW = 0, // trigger signals width parameter PROBEW = 0, // probe signal width parameter SIZE = 256, // trace buffer size parameter IDLE_CTRW = 16 // idle time between triggers counter width -) ( +) ( input wire clk, input wire reset, input wire start, @@ -29,16 +29,16 @@ module VX_scope_tap #( input wire [TRIGGERW-1:0] triggers, input wire [PROBEW-1:0] probes, input wire bus_in, - output wire bus_out + output wire bus_out ); localparam TX_DATAW = 64; localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); - localparam DATAW = PROBEW + TRIGGERW; + localparam DATAW = PROBEW + TRIGGERW; localparam DATA_BITS = `LOG2UP(DATAW); localparam ADDRW = `CLOG2(SIZE); localparam TRIGGER_ENABLE = (TRIGGERW != 0); - localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; - + localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; + localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; localparam CTRL_STATE_CMD = 2'd2; @@ -80,7 +80,7 @@ module VX_scope_tap #( reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; reg [GET_TYPE_BITS-1:0] get_type; - + reg [TX_DATA_BITS-1:0] ser_tx_ctr; reg [DATA_BITS-1:0] read_offset; reg [ADDRW-1:0] raddr; @@ -109,20 +109,20 @@ module VX_scope_tap #( case (tap_state) TAP_STATE_IDLE: begin - if (start || cmd_start) begin + if (start || cmd_start) begin delta <= '0; - delta_flush <= 1; + delta_flush <= 1; if (0 == start_delay) begin tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)); + `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end else begin tap_state <= TAP_STATE_WAIT; - delay_cntr <= start_delay; + delay_cntr <= start_delay; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)); + `TRACE(2, ("%d: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) `endif end end @@ -133,13 +133,13 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)); + `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end end TAP_STATE_RUN: begin if (TRIGGER_ENABLE != 0) begin - if (delta_flush || (triggers != prev_triggers)) begin + if (delta_flush || (triggers != prev_triggers)) begin data_store[waddr] <= {probes, triggers}; delta_store[waddr] <= delta; waddr <= waddr + 1; @@ -150,7 +150,7 @@ module VX_scope_tap #( delta_flush <= (delta == (MAX_IDLE_CTR-1)); end prev_triggers <= triggers; - end else begin + end else begin data_store[waddr] <= {probes, triggers}; delta_store[waddr] <= '0; waddr <= waddr + 1; @@ -158,26 +158,26 @@ module VX_scope_tap #( if (stop || (waddr >= waddr_end)) begin waddr <= waddr; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)); + `TRACE(2, ("%d: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif - tap_state <= TAP_STATE_IDLE; + tap_state <= TAP_STATE_IDLE; end end default:; endcase - - if (ctrl_state == CTRL_STATE_SEND + + if (ctrl_state == CTRL_STATE_SEND && get_type == GET_TYPE_DATA && ser_tx_ctr == 0) begin if (~read_data) begin read_data <= 1; end else begin if (DATAW > TX_DATAW) begin - `IGNORE_WARNINGS_BEGIN + `IGNORE_WARNINGS_BEGIN if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin read_offset <= read_offset + DATA_BITS'(TX_DATAW); end else begin - raddr <= raddr_n; + raddr <= raddr_n; read_data <= 0; read_offset <= '0; end @@ -185,7 +185,7 @@ module VX_scope_tap #( end else begin raddr <= raddr_n; read_data <= 0; - end + end if (raddr_n == waddr) begin raddr <= 0; end @@ -197,9 +197,9 @@ module VX_scope_tap #( // // command controller // - + reg bus_out_r; - + reg [TX_DATAW-1:0] ser_buf_in; wire [TX_DATAW-1:0] ser_buf_in_n = {ser_buf_in[TX_DATAW-2:0], bus_in}; `UNUSED_VAR (ser_buf_in) @@ -210,16 +210,16 @@ module VX_scope_tap #( wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_store[raddr] >> read_offset)); wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_store[raddr]); - + always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; cmd_start <= 0; start_delay <= '0; - waddr_end <= ADDRW'(SIZE-1); - bus_out_r <= 0; + waddr_end <= ADDRW'(SIZE-1); + bus_out_r <= 0; end else begin - bus_out_r <= 0; + bus_out_r <= 0; cmd_start <= 0; case (ctrl_state) @@ -236,9 +236,9 @@ module VX_scope_tap #( ctrl_state <= (cmd_scope_id == SCOPE_ID) ? CTRL_STATE_CMD : CTRL_STATE_IDLE; end end - CTRL_STATE_CMD: begin + CTRL_STATE_CMD: begin ctrl_state <= CTRL_STATE_IDLE; - case (cmd_type) + case (cmd_type) CMD_SET_START: begin start_delay <= 64'(cmd_data); cmd_start <= 1; @@ -249,16 +249,16 @@ module VX_scope_tap #( CMD_GET_WIDTH, CMD_GET_START, CMD_GET_COUNT, - CMD_GET_DATA: begin - ctrl_state <= CTRL_STATE_SEND; + CMD_GET_DATA: begin + ctrl_state <= CTRL_STATE_SEND; get_type <= GET_TYPE_BITS'(cmd_type); ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); bus_out_r <= 1; end default:; - endcase + endcase `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)); + `TRACE(2, ("%d: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) `endif end CTRL_STATE_SEND: begin @@ -268,43 +268,43 @@ module VX_scope_tap #( bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) + end + `endif end GET_TYPE_COUNT: begin bus_out_r <= 1'(count >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) + end + `endif end GET_TYPE_START: begin - bus_out_r <= 1'(start_time >> ser_tx_ctr); + bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) + end + `endif end GET_TYPE_DATA: begin bus_out_r <= 1'(get_data >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) + end + `endif end default:; endcase if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; - end + end end default:; endcase - end + end end assign bus_out = bus_out_r; diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index 3e5bbebcb5..7e03c13784 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -61,10 +61,10 @@ module VX_gbar_unit #( always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin `TRACE(1, ("%d: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", - $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)); + $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)) end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)); + `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) end end `endif diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 700bcb48c4..1c03b03870 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -331,15 +331,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); + $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])) end else begin `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])); + $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])) end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])); + $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])) end end end @@ -349,15 +349,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); + $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])) end else begin `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])); + $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])) end end if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])); + $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])) end end end From cc105eaea96f69bb8ff913893aa13cf1f03eb579 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 14:54:04 -0700 Subject: [PATCH 165/407] tracing refactoring --- hw/rtl/VX_gpu_pkg.sv | 85 +++++++++++++++++++++++++---------- hw/rtl/Vortex.sv | 5 ++- hw/rtl/cache/VX_cache_bank.sv | 12 ++--- hw/rtl/cache/VX_cache_mshr.sv | 23 ++++++---- hw/rtl/cache/VX_cache_tags.sv | 10 +++-- hw/rtl/cache/VX_cache_wrap.sv | 10 +++-- 6 files changed, 99 insertions(+), 46 deletions(-) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 7748b8eec6..e5afefe8e3 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -467,13 +467,43 @@ package VX_gpu_pkg; case (`INST_SFU_BITS'(op_type)) `INST_SFU_TMC: `TRACE(level, ("TMC")) `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")) - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")) else `TRACE(level, ("SPLIT")) end + `INST_SFU_SPLIT: begin + if (op_args.wctl.is_neg) begin + `TRACE(level, ("SPLIT.N")) + end else begin + `TRACE(level, ("SPLIT")) + end + end `INST_SFU_JOIN: `TRACE(level, ("JOIN")) `INST_SFU_BAR: `TRACE(level, ("BAR")) - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")) else `TRACE(level, ("PRED")) end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")) else `TRACE(level, ("CSRRW")) end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")) else `TRACE(level, ("CSRRS")) end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")) else `TRACE(level, ("CSRRC")) end + `INST_SFU_PRED: begin + if (op_args.wctl.is_neg) begin + `TRACE(level, ("PRED.N")) + end else begin + `TRACE(level, ("PRED")) + end + end + `INST_SFU_CSRRW: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRWI")) + end else begin + `TRACE(level, ("CSRRW")) + end + end + `INST_SFU_CSRRS: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRSI")) + end else begin + `TRACE(level, ("CSRRS")) + end + end + `INST_SFU_CSRRC: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRCI")) + end else begin + `TRACE(level, ("CSRRC")) + end + end default: `TRACE(level, ("?")) endcase end @@ -482,60 +512,69 @@ package VX_gpu_pkg; case (`INST_FPU_BITS'(op_type)) `INST_FPU_ADD: begin if (op_args.fpu.fmt[1]) begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FSUB.D")) - else + end else begin `TRACE(level, ("FSUB.S")) + end end else begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FADD.D")) - else + end else begin `TRACE(level, ("FADD.S")) + end end end `INST_FPU_MADD: begin if (op_args.fpu.fmt[1]) begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FMSUB.D")) - else + end else begin `TRACE(level, ("FMSUB.S")) + end end else begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FMADD.D")) - else + end else begin `TRACE(level, ("FMADD.S")) + end end end `INST_FPU_NMADD: begin if (op_args.fpu.fmt[1]) begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FNMSUB.D")) - else + end else begin `TRACE(level, ("FNMSUB.S")) + end end else begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FNMADD.D")) - else + end else begin `TRACE(level, ("FNMADD.S")) + end end end `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FMUL.D")) - else + end else begin `TRACE(level, ("FMUL.S")) + end end `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FDIV.D")) - else + end else begin `TRACE(level, ("FDIV.S")) + end end `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FSQRT.D")) - else + end else begin `TRACE(level, ("FSQRT.S")) + end end `INST_FPU_CMP: begin if (op_args.fpu.fmt[0]) begin diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index dc9f6f0344..0263e17904 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -198,10 +198,11 @@ module Vortex import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (mem_req_fire) begin - if (mem_req_rw) + if (mem_req_rw) begin `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) - else + end else begin `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) + end end if (mem_rsp_fire) begin `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index e18be4b665..a8355ac76d 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -672,21 +672,23 @@ module VX_cache_bank #( `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin - if (core_req_rw) + if (core_req_rw) begin `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) - else + end else begin `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) + end end if (crsp_queue_fire) begin `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin - if (do_creq_wr_st1 && !WRITEBACK) + if (do_creq_wr_st1 && !WRITEBACK) begin `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) - else if (do_writeback_st1) + end else if (do_writeback_st1) begin `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) - else + end else begin `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) + end end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 855b953243..d771a20e0f 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -267,32 +267,39 @@ module VX_cache_mshr #( end else begin show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; end - if (allocate_fire) + if (allocate_fire) begin `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) - if (lookup_valid) + end + if (lookup_valid) begin `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) - if (finalize_valid) + end + if (finalize_valid) begin `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) - if (fill_valid) + end + if (fill_valid) begin `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) - if (dequeue_fire) + end + if (dequeue_fire) begin `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)) + end if (show_table) begin `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)) for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))) - if (write_table[i]) + if (write_table[i]) begin `TRACE(3, ("(w)")) - else + end else begin `TRACE(3, ("(r)")) - if (next_table[i]) + end + if (next_table[i]) begin `TRACE(3, ("->%0d", next_index[i])) + end end end `TRACE(3, ("\n")) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index dc2e77092f..b6c3735b5d 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -159,15 +159,17 @@ module VX_cache_tags #( end if (lookup && ~stall) begin if (tag_matches != 0) begin - if (write) + if (write) begin `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) - else + end else begin `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + end end else begin - if (write) + if (write) begin `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) - else + end else begin `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + end end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 6210c313e0..5787689818 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -233,10 +233,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin - if (core_bus_if[i].req_data.rw) + if (core_bus_if[i].req_data.rw) begin `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) - else + end else begin `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) + end end if (core_rsp_fire) begin `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) @@ -260,12 +261,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin - if (mem_bus_if.req_data.rw) + if (mem_bus_if.req_data.rw) begin `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) - else + end else begin `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) + end end if (mem_rsp_fire) begin `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", From b1dc2fba42709ab2f3b4c335f4d490b9444131b8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 17:47:17 -0700 Subject: [PATCH 166/407] cache read byteenable bug fix --- hw/rtl/cache/VX_cache_bank.sv | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index a8355ac76d..4abd7bad93 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -617,9 +617,18 @@ module VX_cache_bank #( assign mreq_queue_flush = creq_flush_st1; if (WRITE_ENABLE) begin - assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1; - assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1; - assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1; + if (WRITEBACK) begin + assign mreq_queue_rw = is_fill_or_flush_st1; + assign mreq_queue_data = dirty_data_st1; + assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; + end else begin + assign mreq_queue_rw = rw_st1; + assign mreq_queue_data = write_data_st1; + assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1; + `UNUSED_VAR (is_fill_or_flush_st1) + `UNUSED_VAR (dirty_data_st1) + `UNUSED_VAR (dirty_byteen_st1) + end end else begin assign mreq_queue_rw = 0; assign mreq_queue_data = '0; From 207840a97e36197e3ff16cf5120b2ec4e2c3ad53 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 17:49:28 -0700 Subject: [PATCH 167/407] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 120 ++++++++++++++++------------------ hw/rtl/afu/xrt/VX_afu_wrap.sv | 8 +-- 2 files changed, 61 insertions(+), 67 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 2ebd66fcfd..1fbb9d1b74 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -260,7 +260,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) + `TRACE(2, ("%d: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) end `endif end @@ -268,28 +268,28 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_SCOPE_READ: begin mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) + `TRACE(2, ("%d: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) `endif end `endif MMIO_DEV_CAPS: begin mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) + `TRACE(2, ("%d: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) `endif end MMIO_ISA_CAPS: begin mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) + `TRACE(2, ("%d: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) end `endif end default: begin mmio_rsp.data <= 64'h0; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) + `TRACE(2, ("%d: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) `endif end endcase @@ -303,30 +303,30 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_CMD_ARG0: begin cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG1: begin cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG2: begin cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_TYPE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) + `TRACE(2, ("%d: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) `endif end `endif @@ -344,56 +344,48 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cmd_mem_rd_done; reg cmd_mem_wr_done; + reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg vx_busy_wait; - reg vx_running; + reg vx_reset = 1; // asserted at initialization wire vx_busy; - reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; - always @(posedge clk) begin - if (state == STATE_RUN) begin - vx_reset_ctr <= vx_reset_ctr + $bits(vx_reset_ctr)'(1); - end else begin - vx_reset_ctr <= '0; - end - end - wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address); wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ? CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE); always @(posedge clk) begin if (reset) begin - state <= STATE_IDLE; - vx_busy_wait <= 0; - vx_running <= 0; + state <= STATE_IDLE; + vx_reset <= 1; end else begin case (state) STATE_IDLE: begin case (cmd_type) CMD_MEM_READ: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%d: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_READ; end CMD_MEM_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%d: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_WRITE; end CMD_DCR_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) + `TRACE(2, ("%d: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) `endif state <= STATE_DCR_WRITE; end CMD_RUN: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_running <= 0; + vx_reset_ctr <= (`RESET_DELAY-1); + vx_reset <= 1; end default: begin state <= state; @@ -404,54 +396,56 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_rd_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) `endif end end STATE_MEM_WRITE: begin if (cmd_mem_wr_done) begin state <= STATE_IDLE; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)) - `endif end end STATE_DCR_WRITE: begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) `endif end STATE_RUN: begin - if (vx_running) begin - if (vx_busy_wait) begin - // wait until the gpu goes busy - if (vx_busy) begin - vx_busy_wait <= 0; - end - end else begin - // wait until the gpu is not busy - if (~vx_busy) begin - state <= STATE_IDLE; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)) - `TRACE(2, ("%d: STATE IDLE\n", $time)) - `endif - end - end + if (vx_reset) begin + // wait until the reset network is ready + if (vx_reset_ctr == 0) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: AFU: Begin execution\n", $time)) + `endif + vx_busy_wait <= 1; + vx_reset <= 0; + end end else begin - // wait until the reset sequence is complete - if (vx_reset_ctr == (`RESET_DELAY-1)) begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)) - `endif - vx_running <= 1; - vx_busy_wait <= 1; - end + if (vx_busy_wait) begin + // wait until processor goes busy + if (vx_busy) begin + vx_busy_wait <= 0; + end + end else begin + // wait until the processor is not busy + if (~vx_busy) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: AFU: End execution\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `endif + state <= STATE_IDLE; + end + end end end default:; endcase + + // ensure reset network initialization + if (vx_reset_ctr != '0) begin + vx_reset_ctr <= vx_reset_ctr - 1; + end end end @@ -745,7 +739,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) + `TRACE(2, ("%d: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) `endif end @@ -755,13 +749,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) + `TRACE(2, ("%d: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) `endif end if (cci_rdq_pop) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) + `TRACE(2, ("%d: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) `endif end @@ -899,13 +893,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_wr_req_done <= 1; end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) + `TRACE(2, ("%d: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) `endif end if (cci_wr_rsp_fire) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) + `TRACE(2, ("%d: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) `endif end end @@ -933,7 +927,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `SCOPE_IO_BIND (1) .clk (clk), - .reset (reset || ~vx_running), + .reset (vx_reset), // Memory request .mem_req_valid (vx_mem_req_valid), diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 1efda8029e..d2a3f4c515 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -126,17 +126,16 @@ module VX_afu_wrap #( if (reset || ap_reset) begin state <= STATE_IDLE; vx_pending_writes <= '0; - vx_reset_ctr <= (`RESET_DELAY-1); vx_reset <= 1; end else begin case (state) STATE_IDLE: begin if (ap_start) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_reset_ctr <= 0; + vx_reset_ctr <= (`RESET_DELAY-1); vx_reset <= 1; end end @@ -161,6 +160,7 @@ module VX_afu_wrap #( if (~vx_busy) begin `ifdef DBG_TRACE_AFU `TRACE(2, ("%d: AFU: End execution\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) `endif state <= STATE_IDLE; end @@ -170,7 +170,7 @@ module VX_afu_wrap #( endcase // ensure reset network initialization - if (vx_reset_ctr != 0) begin + if (vx_reset_ctr != '0) begin vx_reset_ctr <= vx_reset_ctr - 1; end From 202af1e783362f5feab4932eccf64d4d15ecb6c4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 20:33:27 -0700 Subject: [PATCH 168/407] rtl bug fix --- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 22e2b652dc..a04f96c3b8 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -255,7 +255,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .TAG_WIDTH (TAG_WIDTH) ) fpu_sqrt ( .clk (clk), - .reset (div_sqrt_reset), + .reset (reset), .valid_in (div_sqrt_valid_in[1]), .ready_in (div_sqrt_ready_in[1]), .mask_in (div_sqrt_mask_in[1]), From b56aa00f4f282b39fc5df11929b19f11a7b84a99 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 20:37:28 -0700 Subject: [PATCH 169/407] reset cleanup --- hw/rtl/VX_cluster.sv | 6 ++---- hw/rtl/core/VX_core.sv | 23 +++++++---------------- hw/rtl/core/VX_execute.sv | 14 ++++---------- 3 files changed, 13 insertions(+), 30 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index ef845ae07f..3e93244378 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -56,14 +56,12 @@ module VX_cluster import VX_gpu_pkg::*; #( VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS](); VX_gbar_bus_if gbar_bus_if(); - `RESET_RELAY (gbar_reset, reset); - VX_gbar_arb #( .NUM_REQS (`NUM_SOCKETS), .OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure ) gbar_arb ( .clk (clk), - .reset (gbar_reset), + .reset (reset), .bus_in_if (per_socket_gbar_bus_if), .bus_out_if (gbar_bus_if) ); @@ -72,7 +70,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID)) ) gbar_unit ( .clk (clk), - .reset (gbar_reset), + .reset (reset), .gbar_bus_if (gbar_bus_if) ); diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index f97370e892..d9f3de6876 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -75,13 +75,6 @@ module VX_core import VX_gpu_pkg::*; #( assign mem_perf_tmp_if.mem = mem_perf_if.mem; `endif - `RESET_RELAY (schedule_reset, reset); - `RESET_RELAY (fetch_reset, reset); - `RESET_RELAY (decode_reset, reset); - `RESET_RELAY (issue_reset, reset); - `RESET_RELAY (execute_reset, reset); - `RESET_RELAY (commit_reset, reset); - base_dcrs_t base_dcrs; VX_dcr_data dcr_data ( @@ -98,7 +91,7 @@ module VX_core import VX_gpu_pkg::*; #( .CORE_ID (CORE_ID) ) schedule ( .clk (clk), - .reset (schedule_reset), + .reset (reset), `ifdef PERF_ENABLE .sched_perf (pipeline_perf_if.sched), @@ -126,7 +119,7 @@ module VX_core import VX_gpu_pkg::*; #( ) fetch ( `SCOPE_IO_BIND (0) .clk (clk), - .reset (fetch_reset), + .reset (reset), .icache_bus_if (icache_bus_if), .schedule_if (schedule_if), .fetch_if (fetch_if) @@ -136,7 +129,7 @@ module VX_core import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID)) ) decode ( .clk (clk), - .reset (decode_reset), + .reset (reset), .fetch_if (fetch_if), .decode_if (decode_if), .decode_sched_if(decode_sched_if) @@ -148,7 +141,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_BIND (1) .clk (clk), - .reset (issue_reset), + .reset (reset), `ifdef PERF_ENABLE .issue_perf (pipeline_perf_if.issue), @@ -166,7 +159,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_BIND (2) .clk (clk), - .reset (execute_reset), + .reset (reset), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_tmp_if), @@ -191,7 +184,7 @@ module VX_core import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID)) ) commit ( .clk (clk), - .reset (commit_reset), + .reset (reset), .commit_if (commit_if), @@ -201,13 +194,11 @@ module VX_core import VX_gpu_pkg::*; #( .commit_sched_if(commit_sched_if) ); - `RESET_RELAY (lmem_unit_reset, reset); - VX_mem_unit #( .INSTANCE_ID (INSTANCE_ID) ) mem_unit ( .clk (clk), - .reset (lmem_unit_reset), + .reset (reset), `ifdef PERF_ENABLE .lmem_perf (mem_perf_tmp_if.lmem), `endif diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index ded25918c6..6c148649b0 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -51,15 +51,11 @@ module VX_execute import VX_gpu_pkg::*; #( VX_fpu_csr_if fpu_csr_if[`NUM_FPU_BLOCKS](); `endif - `RESET_RELAY (alu_reset, reset); - `RESET_RELAY (lsu_reset, reset); - `RESET_RELAY (sfu_reset, reset); - VX_alu_unit #( .INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID)) ) alu_unit ( .clk (clk), - .reset (alu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .branch_ctl_if (branch_ctl_if) @@ -72,20 +68,18 @@ module VX_execute import VX_gpu_pkg::*; #( ) lsu_unit ( `SCOPE_IO_BIND (0) .clk (clk), - .reset (lsu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .lsu_mem_if (lsu_mem_if) ); `ifdef EXT_F_ENABLE - `RESET_RELAY (fpu_reset, reset); - VX_fpu_unit #( .INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID)) ) fpu_unit ( .clk (clk), - .reset (fpu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .fpu_csr_if (fpu_csr_if) @@ -97,7 +91,7 @@ module VX_execute import VX_gpu_pkg::*; #( .CORE_ID (CORE_ID) ) sfu_unit ( .clk (clk), - .reset (sfu_reset), + .reset (reset), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_if), .pipeline_perf_if (pipeline_perf_if), From 63840a20da52a951ec21d8902e59519b40cdc40b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 9 Sep 2024 06:10:56 -0700 Subject: [PATCH 170/407] minor update --- hw/rtl/core/VX_schedule.sv | 3 ++- hw/rtl/core/VX_uuid_gen.sv | 7 ++++--- hw/rtl/libs/VX_generic_arbiter.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 11 +++++------ 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 6916d3e00a..77e00156bf 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -334,7 +334,8 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`UUID_WIDTH-1:0] instr_uuid; `ifndef NDEBUG VX_uuid_gen #( - .CORE_ID (CORE_ID) + .CORE_ID (CORE_ID), + .UUID_WIDTH (`UUID_WIDTH) ) uuid_gen ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_uuid_gen.sv b/hw/rtl/core/VX_uuid_gen.sv index 8dca50e91f..cbde9091d3 100644 --- a/hw/rtl/core/VX_uuid_gen.sv +++ b/hw/rtl/core/VX_uuid_gen.sv @@ -14,15 +14,16 @@ `include "VX_define.vh" module VX_uuid_gen import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter CORE_ID = 0, + parameter UUID_WIDTH = 48 ) ( input wire clk, input wire reset, input wire incr, input wire [`NW_WIDTH-1:0] wid, - output wire [`UUID_WIDTH-1:0] uuid + output wire [UUID_WIDTH-1:0] uuid ); - localparam GNW_WIDTH = `UUID_WIDTH - 32; + localparam GNW_WIDTH = UUID_WIDTH - 32; reg [31:0] uuid_cntrs [0:`NUM_WARPS-1]; reg [`NUM_WARPS-1:0] has_uuid_cntrs; diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 3a3737d04c..5cc9a9aab4 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,7 +90,7 @@ module VX_generic_arbiter #( end - `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) + `RUNTIME_ASSERT (((~(| requests) != 1) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index b0d8704e3c..24ad5cdf18 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -96,9 +96,8 @@ module VX_mem_scheduler #( `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) - `STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter")) `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)) - + wire ibuf_push; wire ibuf_pop; wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr; @@ -435,7 +434,7 @@ module VX_mem_scheduler #( end end - if (RSP_PARTIAL == 1) begin + if (RSP_PARTIAL != 0) begin reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; @@ -462,14 +461,14 @@ module VX_mem_scheduler #( end else begin reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_BATCHES-1:00][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i]; + rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; end end end @@ -490,7 +489,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[(i * CORE_CHANNELS + j) * WORD_WIDTH +: WORD_WIDTH]; + assign crsp_data[r] = rsp_store_n[i][j]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From 83d65e2cf16c913a7242cb8318231bed5138da35 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 10 Sep 2024 16:22:34 -0700 Subject: [PATCH 171/407] tracing update --- hw/rtl/Vortex.sv | 6 ++-- hw/rtl/afu/opae/vortex_afu.sv | 56 +++++++++++++++++------------------ hw/rtl/afu/xrt/VX_afu_wrap.sv | 16 +++++----- hw/rtl/cache/VX_cache_bank.sv | 18 +++++------ hw/rtl/cache/VX_cache_data.sv | 8 ++--- hw/rtl/cache/VX_cache_mshr.sv | 12 ++++---- hw/rtl/cache/VX_cache_tags.sv | 14 ++++----- hw/rtl/cache/VX_cache_wrap.sv | 12 ++++---- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/core/VX_commit.sv | 2 +- hw/rtl/core/VX_dcr_data.sv | 2 +- hw/rtl/core/VX_decode.sv | 2 +- hw/rtl/core/VX_fetch.sv | 4 +-- hw/rtl/core/VX_issue_slice.sv | 2 +- hw/rtl/core/VX_lsu_slice.sv | 8 ++--- hw/rtl/core/VX_scoreboard.sv | 2 +- 16 files changed, 83 insertions(+), 83 deletions(-) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 0263e17904..fd7ef02677 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -199,13 +199,13 @@ module Vortex import VX_gpu_pkg::*; ( always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) + `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) end else begin - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) + `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) + `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) end end `endif diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 1fbb9d1b74..d97be483dd 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -260,7 +260,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) + `TRACE(2, ("%t: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) end `endif end @@ -268,28 +268,28 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_SCOPE_READ: begin mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) + `TRACE(2, ("%t: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) `endif end `endif MMIO_DEV_CAPS: begin mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) + `TRACE(2, ("%t: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) `endif end MMIO_ISA_CAPS: begin mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) + `TRACE(2, ("%t: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) end `endif end default: begin mmio_rsp.data <= 64'h0; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) + `TRACE(2, ("%t: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) `endif end endcase @@ -303,36 +303,36 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_CMD_ARG0: begin cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG1: begin cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG2: begin cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_TYPE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) + `TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) `endif end `endif default: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))) `endif end endcase @@ -363,25 +363,25 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ case (cmd_type) CMD_MEM_READ: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%t: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_READ; end CMD_MEM_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%t: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_WRITE; end CMD_DCR_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) + `TRACE(2, ("%t: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) `endif state <= STATE_DCR_WRITE; end CMD_RUN: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_reset_ctr <= (`RESET_DELAY-1); @@ -396,7 +396,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_rd_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif end end @@ -408,7 +408,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ STATE_DCR_WRITE: begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif end STATE_RUN: begin @@ -416,7 +416,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // wait until the reset network is ready if (vx_reset_ctr == 0) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)) + `TRACE(2, ("%t: AFU: Begin execution\n", $time)) `endif vx_busy_wait <= 1; vx_reset <= 0; @@ -431,8 +431,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // wait until the processor is not busy if (~vx_busy) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)) - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: End execution\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif state <= STATE_IDLE; end @@ -739,7 +739,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) + `TRACE(2, ("%t: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) `endif end @@ -749,13 +749,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) + `TRACE(2, ("%t: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) `endif end if (cci_rdq_pop) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) + `TRACE(2, ("%t: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) `endif end @@ -893,13 +893,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_wr_req_done <= 1; end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) + `TRACE(2, ("%t: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) `endif end if (cci_wr_rsp_fire) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) + `TRACE(2, ("%t: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) `endif end end @@ -1080,13 +1080,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin if (avs_write[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])) + `TRACE(2, ("%t: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])) end if (avs_read[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])) + `TRACE(2, ("%t: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])) end if (avs_readdatavalid[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])) + `TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])) end end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index d2a3f4c515..ff07cc09a1 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -132,7 +132,7 @@ module VX_afu_wrap #( STATE_IDLE: begin if (ap_start) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_reset_ctr <= (`RESET_DELAY-1); @@ -144,7 +144,7 @@ module VX_afu_wrap #( // wait until the reset network is ready if (vx_reset_ctr == 0) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)) + `TRACE(2, ("%t: AFU: Begin execution\n", $time)) `endif vx_busy_wait <= 1; vx_reset <= 0; @@ -159,8 +159,8 @@ module VX_afu_wrap #( // wait until the processor is not busy if (~vx_busy) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)) - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: End execution\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif state <= STATE_IDLE; end @@ -365,16 +365,16 @@ module VX_afu_wrap #( always @(posedge ap_clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) + `TRACE(2, ("%t: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) end if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) + `TRACE(2, ("%t: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) end if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin - `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) + `TRACE(2, ("%t: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) end if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) + `TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) end end end diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 4abd7bad93..181f63fbff 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -672,31 +672,31 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) + `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) end if (mem_rsp_fire) begin - `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) + `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) end if (replay_fire) begin - `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin if (core_req_rw) begin - `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) + `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) end else begin - `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end end if (crsp_queue_fire) begin - `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin if (do_creq_wr_st1 && !WRITEBACK) begin - `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else if (do_writeback_st1) begin - `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) + `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) end else begin - `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) + `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) end end end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 12e0e1ca3d..c4713f813a 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -182,16 +182,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) end if (read && ~stall) begin - `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) + `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) end if (write && ~stall) begin - `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) + `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index d771a20e0f..d51d0f0d40 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -268,27 +268,27 @@ module VX_cache_mshr #( show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; end if (allocate_fire) begin - `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) end if (lookup_valid) begin - `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) end if (finalize_valid) begin - `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) end if (fill_valid) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) end if (dequeue_fire) begin - `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)) end if (show_table) begin - `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)) + `TRACE(3, ("%t: %s table", $time, INSTANCE_ID)) for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index b6c3735b5d..4d9fc81deb 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -149,26 +149,26 @@ module VX_cache_tags #( wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel}; always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) + `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) end if (lookup && ~stall) begin if (tag_matches != 0) begin if (write) begin - `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) end else begin - `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) end end else begin if (write) begin - `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) end else begin - `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 5787689818..502e5b9d46 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -234,13 +234,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) + `TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) end else begin - `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) + `TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) end end if (core_rsp_fire) begin - `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) + `TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) end end end @@ -262,15 +262,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin if (mem_bus_if.req_data.rw) begin - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) end else begin - `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)) end end diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 083438e883..04d123860b 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -194,7 +194,7 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (br_enable) begin - `TRACE(1, ("%d: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + `TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)) end end diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index f993c9648e..acfae9e4df 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -178,7 +178,7 @@ module VX_commit import VX_gpu_pkg::*; #( for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) trace_ex_type(1, j); `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)) `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS) diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 03c5be61f1..042c87e552 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -50,7 +50,7 @@ module VX_dcr_data import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (dcr_bus_if.write_valid) begin - `TRACE(1, ("%d: base-dcr: state=", $time)) + `TRACE(1, ("%t: base-dcr: state=", $time)) trace_base_dcr(1, dcr_bus_if.write_addr); `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)) end diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 28d27a2993..79a8d9c3dc 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -568,7 +568,7 @@ module VX_decode import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (decode_if.valid && decode_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)) + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)) trace_ex_type(1, decode_if.data.ex_type); `TRACE(1, (", op=")) trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 46283818ae..dab4772db9 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -168,10 +168,10 @@ module VX_fetch import VX_gpu_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin if (schedule_fire) begin - `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) + `TRACE(1, ("%t: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) end if (fetch_fire) begin - `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) + `TRACE(1, ("%t: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index a99bf2c8ff..63d811328c 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -135,7 +135,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (operands_if.valid && operands_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")) trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index bd82aee31e..b880eee2e3 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -505,11 +505,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)) + `TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID)) end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) `TRACE(1, (", flags=")) `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) @@ -517,7 +517,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES) `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end else begin - `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) `TRACE(1, (", flags=")) `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) @@ -525,7 +525,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", + `TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)) `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES) `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 14d88b8b12..b4fd5c08c9 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -206,7 +206,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end else begin if (staging_if[w].valid && ~staging_if[w].ready) begin `ifdef DBG_TRACE_PIPELINE - `TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", + `TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, operands_busy, staging_if[w].data.uuid)) `endif From ae24264a2a39a42e4b746a14d88dac1032da1c6e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 05:40:05 -0700 Subject: [PATCH 172/407] minor update --- hw/rtl/VX_socket.sv | 14 +++++++------- hw/rtl/core/VX_ipdom_stack.sv | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index a6e58ebd18..17a027c956 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -178,13 +178,13 @@ module VX_socket import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH); VX_mem_arb #( - .NUM_INPUTS (2), - .DATA_SIZE (`L1_LINE_SIZE), - .TAG_WIDTH (L1_MEM_TAG_WIDTH), - .TAG_SEL_IDX (0), - .ARBITER ("P"), // prioritize the icache - .REQ_OUT_BUF (3), - .RSP_OUT_BUF (3) + .NUM_INPUTS (2), + .DATA_SIZE (`L1_LINE_SIZE), + .TAG_WIDTH (L1_MEM_TAG_WIDTH), + .TAG_SEL_IDX(0), + .ARBITER ("P"), // prioritize the icache + .REQ_OUT_BUF(3), + .RSP_OUT_BUF(3) ) mem_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index 0ec05cbae0..ded232f300 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -48,9 +48,9 @@ module VX_ipdom_stack #( empty_r <= 1; full_r <= 0; end else begin - `ASSERT(~push || ~full, ("runtime error: writing to a full stack!")); - `ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!")); - `ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!")); + `ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time)); + `ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time)); + `ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time)); if (push) begin rd_ptr <= wr_ptr; wr_ptr <= wr_ptr + ADDRW'(1); From bb9ae8576dba625758376d9cd155ede3c8a44bcc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 06:47:33 -0700 Subject: [PATCH 173/407] adding uuid support to memory transactions --- hw/rtl/VX_define.vh | 16 +++++----- hw/rtl/VX_gpu_pkg.sv | 8 ++--- hw/rtl/Vortex.sv | 9 ++++-- hw/rtl/Vortex_axi.sv | 4 ++- hw/rtl/afu/xrt/VX_afu_wrap.sv | 12 +++---- hw/rtl/cache/VX_cache.sv | 54 +++++++++++++++++++------------- hw/rtl/cache/VX_cache_bank.sv | 47 ++++++++++++++++++++------- hw/rtl/cache/VX_cache_cluster.sv | 4 +-- hw/rtl/cache/VX_cache_define.vh | 1 - hw/rtl/cache/VX_cache_flush.sv | 20 ++++++++++++ hw/rtl/cache/VX_cache_wrap.sv | 9 +++--- 11 files changed, 121 insertions(+), 63 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 69b14c7480..f42f0b0184 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -264,14 +264,14 @@ /////////////////////////////////////////////////////////////////////////////// -`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \ - (`CLOG2(mshr_size) + `CLOG2(num_banks)) +`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width) \ + (uuid_width + `CLOG2(mshr_size) + `CLOG2(num_banks)) `define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \ (`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width) -`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \ - (`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1) +`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, uuid_width) \ + (`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1) /////////////////////////////////////////////////////////////////////////////// @@ -281,14 +281,14 @@ `define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \ (tag_width + `ARB_SEL_BITS(`UP(num_caches), 1)) -`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \ - `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches) +`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches, uuid_width) \ + `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), num_caches) `define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \ `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches) -`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \ - `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches) +`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \ + `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index e5afefe8e3..fe35fb3919 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -166,7 +166,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef ICACHE_ENABLE - localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES); + localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES, `UUID_WIDTH); `else localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES); `endif @@ -197,7 +197,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef DCACHE_ENABLE - localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); + localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES, `UUID_WIDTH); `else localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); `endif @@ -226,7 +226,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef L2_ENABLE - localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); + localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH, `UUID_WIDTH); `else localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); `endif @@ -247,7 +247,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef L3_ENABLE - localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); + localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH, `UUID_WIDTH); `else localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); `endif diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index fd7ef02677..6dc59cad2e 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -196,16 +196,19 @@ module Vortex import VX_gpu_pkg::*; ( end `ifdef DBG_TRACE_MEM + wire [`UUID_WIDTH-1:0] mem_req_uuid = mem_req_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH]; + wire [`UUID_WIDTH-1:0] mem_rsp_uuid = mem_rsp_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH]; + always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) + `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid)) end else begin - `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) + `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) + `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 5d2f5b0a77..c5aa655c5d 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -82,9 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); + localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; + `STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH)) `STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH)) - //`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH)) + `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) wire mem_req_valid; wire mem_req_rw; diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index ff07cc09a1..191fbe0784 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -227,12 +227,12 @@ module VX_afu_wrap #( .dcr_wr_data (dcr_wr_data) ); - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS]; + wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin - assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); - assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); + assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); + assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); end `SCOPE_IO_SWITCH (2) @@ -250,7 +250,7 @@ module VX_afu_wrap #( .m_axi_awvalid (m_axi_mem_awvalid_a), .m_axi_awready (m_axi_mem_awready_a), - .m_axi_awaddr (m_axi_mem_awaddr_w), + .m_axi_awaddr (m_axi_mem_awaddr_u), .m_axi_awid (m_axi_mem_awid_a), .m_axi_awlen (m_axi_mem_awlen_a), `UNUSED_PIN (m_axi_awsize), @@ -274,7 +274,7 @@ module VX_afu_wrap #( .m_axi_arvalid (m_axi_mem_arvalid_a), .m_axi_arready (m_axi_mem_arready_a), - .m_axi_araddr (m_axi_mem_araddr_w), + .m_axi_araddr (m_axi_mem_araddr_u), .m_axi_arid (m_axi_mem_arid_a), .m_axi_arlen (m_axi_mem_arlen_a), `UNUSED_PIN (m_axi_arsize), diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index ebb5d15193..3e5a486c6b 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -83,7 +83,7 @@ module VX_cache import VX_gpu_pkg::*; #( localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH); localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; localparam WORD_WIDTH = WORD_SIZE * 8; localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE); @@ -92,6 +92,7 @@ module VX_cache import VX_gpu_pkg::*; #( localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; + localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH; localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1); @@ -110,6 +111,7 @@ module VX_cache import VX_gpu_pkg::*; #( ) core_bus2_if[NUM_REQS](); wire [NUM_BANKS-1:0] per_bank_flush_begin; + wire [`UP(UUID_WIDTH)-1:0] flush_uuid; wire [NUM_BANKS-1:0] per_bank_flush_end; wire [NUM_BANKS-1:0] per_bank_core_req_fire; @@ -117,6 +119,8 @@ module VX_cache import VX_gpu_pkg::*; #( VX_cache_flush #( .NUM_REQS (NUM_REQS), .NUM_BANKS (NUM_BANKS), + .UUID_WIDTH(UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency ) flush_unit ( .clk (clk), @@ -125,6 +129,7 @@ module VX_cache import VX_gpu_pkg::*; #( .core_bus_out_if (core_bus2_if), .bank_req_fire (per_bank_core_req_fire), .flush_begin (per_bank_flush_begin), + .flush_uuid (flush_uuid), .flush_end (per_bank_flush_end) ); @@ -182,6 +187,17 @@ module VX_cache import VX_gpu_pkg::*; #( .ready_out (mem_rsp_ready_s) ); + wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag; + wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id; + + if (NUM_BANKS > 1) begin + assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS]; + assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0]; + end else begin + assign bank_mem_rsp_tag = mem_rsp_tag_s; + assign mem_rsp_bank_id = 0; + end + // Memory request buffering wire mem_req_valid; @@ -190,7 +206,6 @@ module VX_cache import VX_gpu_pkg::*; #( wire [LINE_SIZE-1:0] mem_req_byteen; wire [`CS_LINE_WIDTH-1:0] mem_req_data; wire [MEM_TAG_WIDTH-1:0] mem_req_tag; - wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; wire mem_req_flush; wire mem_req_ready; @@ -243,7 +258,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] per_bank_mem_req_rw; wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; - wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; + wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag; wire [NUM_BANKS-1:0] per_bank_mem_req_flush; wire [NUM_BANKS-1:0] per_bank_mem_req_ready; @@ -251,11 +266,7 @@ module VX_cache import VX_gpu_pkg::*; #( assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; - if (NUM_BANKS == 1) begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready; - end else begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s)]; - end + assign mem_rsp_ready_s = per_bank_mem_rsp_ready[mem_rsp_bank_id]; // Bank requests dispatch @@ -359,13 +370,8 @@ module VX_cache import VX_gpu_pkg::*; #( // Banks access for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; - wire curr_bank_mem_rsp_valid; - if (NUM_BANKS == 1) begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; - end else begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id); - end + wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id); VX_cache_bank #( .BANK_ID (bank_id), @@ -421,17 +427,19 @@ module VX_cache import VX_gpu_pkg::*; #( .mem_req_rw (per_bank_mem_req_rw[bank_id]), .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), .mem_req_data (per_bank_mem_req_data[bank_id]), - .mem_req_id (per_bank_mem_req_id[bank_id]), + .mem_req_tag (per_bank_mem_req_tag[bank_id]), .mem_req_flush (per_bank_mem_req_flush[bank_id]), .mem_req_ready (per_bank_mem_req_ready[bank_id]), // Memory response .mem_rsp_valid (curr_bank_mem_rsp_valid), .mem_rsp_data (mem_rsp_data_s), - .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)), + .mem_rsp_tag (bank_mem_rsp_tag), .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), + // Flush request .flush_begin (per_bank_flush_begin[bank_id]), + .flush_uuid (flush_uuid), .flush_end (per_bank_flush_end[bank_id]) ); @@ -476,7 +484,7 @@ module VX_cache import VX_gpu_pkg::*; #( // Memory request arbitration - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin assign data_in[i] = { @@ -484,14 +492,16 @@ module VX_cache import VX_gpu_pkg::*; #( per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i], - per_bank_mem_req_id[i], + per_bank_mem_req_tag[i], per_bank_mem_req_flush[i] }; end + wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_req_tag; + VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1), .ARBITER ("R") ) mem_req_arb ( .clk (clk), @@ -499,7 +509,7 @@ module VX_cache import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, mem_req_id, mem_req_flush}), + .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}), .valid_out (mem_req_valid), .ready_out (mem_req_ready), `UNUSED_PIN (sel_out) @@ -507,9 +517,9 @@ module VX_cache import VX_gpu_pkg::*; #( if (NUM_BANKS > 1) begin wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); - assign mem_req_tag = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id}); + assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id}); end else begin - assign mem_req_tag = MEM_TAG_WIDTH'(mem_req_id); + assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag); end `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 181f63fbff..3e51052ef6 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -60,6 +60,7 @@ module VX_cache_bank #( parameter MEM_OUT_REG = 0, parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE), + parameter MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH, parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS), parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS) ) ( @@ -97,18 +98,19 @@ module VX_cache_bank #( output wire mem_req_rw, output wire [LINE_SIZE-1:0] mem_req_byteen, output wire [`CS_LINE_WIDTH-1:0] mem_req_data, - output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr + output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_flush, input wire mem_req_ready, // Memory response input wire mem_rsp_valid, input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data, - input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id, + input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_rsp_ready, // flush input wire flush_begin, + input wire [`UP(UUID_WIDTH)-1:0] flush_uuid, output wire flush_end ); @@ -241,12 +243,30 @@ module VX_cache_bank #( wire flush_fire = flush_valid && flush_ready; wire core_req_fire = core_req_valid && core_req_ready; + wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0]; + + wire [TAG_WIDTH-1:0] mem_rsp_tag_s; + if (TAG_WIDTH > MEM_TAG_WIDTH) begin + assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)}; + end else begin + assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH]; + `UNUSED_VAR (mem_rsp_tag) + end + + wire [TAG_WIDTH-1:0] flush_tag; + if (UUID_WIDTH != 0) begin + assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)}; + end else begin + assign flush_tag = '0; + end + assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign rw_sel = replay_valid ? replay_rw : core_req_rw; assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; - assign tag_sel = replay_valid ? replay_tag : core_req_tag; + assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) : + (replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag)); assign creq_flush_sel = core_req_valid && core_req_flush; assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : @@ -587,7 +607,7 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] mreq_queue_data; wire [LINE_SIZE-1:0] mreq_queue_byteen; wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; - wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id; + wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag; wire mreq_queue_rw; wire mreq_queue_flush; @@ -613,7 +633,6 @@ module VX_cache_bank #( assign mreq_queue_pop = mem_req_valid && mem_req_ready; assign mreq_queue_addr = addr_st1; - assign mreq_queue_id = mshr_id_st1; assign mreq_queue_flush = creq_flush_st1; if (WRITE_ENABLE) begin @@ -637,8 +656,14 @@ module VX_cache_bank #( `UNUSED_VAR (dirty_byteen_st1) end + if (UUID_WIDTH != 0) begin + assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1}; + end else begin + assign mreq_queue_tag = mshr_id_st1; + end + VX_fifo_queue #( - .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1), + .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), .DEPTH (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), .OUT_REG (MEM_OUT_REG) @@ -647,8 +672,8 @@ module VX_cache_bank #( .reset (reset), .push (mreq_queue_push), .pop (mreq_queue_pop), - .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}), - .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}), + .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}), + .data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}), .empty (mreq_queue_empty), .alm_full (mreq_queue_alm_full), `UNUSED_PIN (full), @@ -675,7 +700,7 @@ module VX_cache_bank #( `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) end if (mem_rsp_fire) begin - `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) + `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) end if (replay_fire) begin `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) @@ -694,9 +719,9 @@ module VX_cache_bank #( if (do_creq_wr_st1 && !WRITEBACK) begin `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else if (do_writeback_st1) begin - `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) + `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else begin - `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) + `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) end end end diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 7173444ec6..91055a548d 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -82,8 +82,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( localparam PASSTHRU = (NUM_UNITS == 0); localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES); localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH, UUID_WIDTH) : + `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH)); `STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter")) diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index e6d7da167f..342a40a1bd 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -57,7 +57,6 @@ `define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)} `define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS] `define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0] -`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS] `define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))} `define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)} diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index 648fbebb30..a841f3ebcf 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -18,6 +18,10 @@ module VX_cache_flush #( parameter NUM_REQS = 4, // Number of banks parameter NUM_BANKS = 1, + // Request debug identifier + parameter UUID_WIDTH = 0, + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 1, // Bank select latency parameter BANK_SEL_LATENCY = 1 ) ( @@ -27,6 +31,7 @@ module VX_cache_flush #( VX_mem_bus_if.master core_bus_out_if [NUM_REQS], input wire [NUM_BANKS-1:0] bank_req_fire, output wire [NUM_BANKS-1:0] flush_begin, + output wire [`UP(UUID_WIDTH)-1:0] flush_uuid, input wire [NUM_BANKS-1:0] flush_end ); localparam STATE_IDLE = 0; @@ -88,6 +93,7 @@ module VX_cache_flush #( wire flush_req_enable = (| flush_req_mask); reg [NUM_REQS-1:0] lock_released, lock_released_n; + reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n; for (genvar i = 0; i < NUM_REQS; ++i) begin wire input_enable = ~flush_req_enable || lock_released[i]; @@ -102,8 +108,14 @@ module VX_cache_flush #( assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready; end + reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid; wire [NUM_REQS-1:0] core_bus_out_ready; for (genvar i = 0; i < NUM_REQS; ++i) begin + if (UUID_WIDTH != 0) begin + assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin + assign core_bus_out_uuid[i] = 0; + end assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready; end @@ -111,10 +123,16 @@ module VX_cache_flush #( state_n = state; flush_done_n = flush_done; lock_released_n = lock_released; + flush_uuid_n = flush_uuid_r; case (state) STATE_IDLE: begin if (flush_req_enable) begin state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH; + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (flush_req_mask[i]) begin + flush_uuid_n = core_bus_out_uuid[i]; + end + end end end STATE_WAIT1: begin @@ -158,8 +176,10 @@ module VX_cache_flush #( flush_done <= flush_done_n; lock_released <= lock_released_n; end + flush_uuid_r <= flush_uuid_n; end assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}}; + assign flush_uuid = flush_uuid_r; endmodule diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 502e5b9d46..bf4f6de7ed 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -84,12 +84,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) - localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam CACHE_MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH); - localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH, UUID_WIDTH) : + CACHE_MEM_TAG_WIDTH); localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); From 230b29de6f1953bfa56e7086a200a35bad5615d4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 06:57:43 -0700 Subject: [PATCH 174/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 1 + hw/rtl/core/VX_decode.sv | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 3e51052ef6..0c199577b3 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -257,6 +257,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)}; end else begin + `UNUSED_VAR (flush_uuid) assign flush_tag = '0; end diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 79a8d9c3dc..70bb181a17 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -152,7 +152,7 @@ module VX_decode import VX_gpu_pkg::*; #( always @(*) begin - ex_type = '0; + ex_type = 'x; op_type = 'x; op_args = 'x; rd_v = '0; From f00f96377bc08580194960698e435442851889ef Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 17:16:34 -0700 Subject: [PATCH 175/407] disable tracing on synthesis mode --- hw/rtl/VX_platform.vh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 5a4426b285..aa63255df2 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -29,13 +29,21 @@ `endif `ifdef SYNTHESIS + `define TRACING_ON `define TRACING_OFF + `ifndef NDEBUG `define DEBUG_BLOCK(x) x + `define TRACE(level, args) \ + if (level <= `DEBUG_LEVEL) begin \ + $write args; \ + end `else `define DEBUG_BLOCK(x) + `define TRACE(level, args) `endif + `define IGNORE_UNOPTFLAT_BEGIN `define IGNORE_UNOPTFLAT_END `define IGNORE_UNUSED_BEGIN @@ -47,11 +55,9 @@ `define UNUSED_VAR(x) `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x -`define TRACE(level, args) \ - if (level <= `DEBUG_LEVEL) begin \ - $write args; \ - end -`else + +`else // not SYNTHESIS + `ifdef VERILATOR `ifndef TRACING_ALL @@ -122,7 +128,7 @@ `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ -`endif +`endif // not VERILATOR `ifdef SV_DPI `define TRACE(level, args) dpi_trace(level, $sformatf args); @@ -151,7 +157,7 @@ always @(posedge clk) begin \ assert(cond) else $error msg; \ end -`else +`else // not SIMULATION `define STATIC_ASSERT(cond, msg) `define ERROR(msg) // `define ASSERT(cond, msg) // From 5c726853567bf01006db7cc490ba56bb6c6c67c0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 17:27:36 -0700 Subject: [PATCH 176/407] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 4 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 12 ++++++------ hw/rtl/libs/VX_scope_tap.sv | 18 +++++++++--------- hw/rtl/mem/VX_gbar_unit.sv | 4 ++-- hw/rtl/mem/VX_local_mem.sv | 12 ++++++------ 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index e56d802e19..84c417bd3c 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -331,7 +331,7 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE(1, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) `TRACE(1, (", flags=")) `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) @@ -350,7 +350,7 @@ module VX_mem_coalescer #( `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) end if (out_rsp_fire) begin - `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) + `TRACE(1, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS) `TRACE(1, (", offset=")) `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 24ad5cdf18..1dddaba111 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -583,39 +583,39 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE(1, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) `TRACE(1, (", byteen=")) `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS) `TRACE(1, (", data=")) `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS) end else begin - `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE(1, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) end `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) + `TRACE(1, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS) `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE(1, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) `TRACE(1, (", byteen=")) `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS) `TRACE(1, (", data=")) `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS) end else begin - `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE(1, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) end `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) + `TRACE(1, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) end diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 5ec39438cc..010b6f2cc6 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -116,13 +116,13 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end else begin tap_state <= TAP_STATE_WAIT; delay_cntr <= start_delay; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) + `TRACE(2, ("%t: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) `endif end end @@ -133,7 +133,7 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end end @@ -158,7 +158,7 @@ module VX_scope_tap #( if (stop || (waddr >= waddr_end)) begin waddr <= waddr; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) + `TRACE(2, ("%t: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif tap_state <= TAP_STATE_IDLE; end @@ -258,7 +258,7 @@ module VX_scope_tap #( default:; endcase `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) + `TRACE(2, ("%t: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) `endif end CTRL_STATE_SEND: begin @@ -268,7 +268,7 @@ module VX_scope_tap #( bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) + `TRACE(2, ("%t: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) end `endif end @@ -276,7 +276,7 @@ module VX_scope_tap #( bus_out_r <= 1'(count >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) + `TRACE(2, ("%t: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) end `endif end @@ -284,7 +284,7 @@ module VX_scope_tap #( bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) + `TRACE(2, ("%t: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) end `endif end @@ -292,7 +292,7 @@ module VX_scope_tap #( bus_out_r <= 1'(get_data >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) + `TRACE(2, ("%t: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) end `endif end diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index 7e03c13784..c9707748fe 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -60,11 +60,11 @@ module VX_gbar_unit #( `ifdef DBG_TRACE_GBAR always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin - `TRACE(1, ("%d: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", + `TRACE(1, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)) end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) + `TRACE(1, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) end end `endif diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 1c03b03870..578f4552b7 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -330,15 +330,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])) end else begin - `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(1, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])) end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin - `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])) end end @@ -348,15 +348,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin - `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])) end else begin - `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])) end end if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin - `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])) end end From 6cf0d9f7b447995cfb67d8763077ef0e54ad1da9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:00:50 -0700 Subject: [PATCH 177/407] fixed generate labels lint warnings to improve hardware debugging --- hw/rtl/VX_cluster.sv | 2 +- hw/rtl/VX_define.vh | 19 +++-- hw/rtl/VX_platform.vh | 11 ++- hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- hw/rtl/Vortex_axi.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 2 +- hw/rtl/afu/xrt/VX_afu_wrap.sv | 2 +- hw/rtl/cache/VX_bank_flush.sv | 4 +- hw/rtl/cache/VX_cache.sv | 51 +++++++------- hw/rtl/cache/VX_cache_bank.sv | 55 +++++++-------- hw/rtl/cache/VX_cache_bypass.sv | 108 ++++++++++++++--------------- hw/rtl/cache/VX_cache_cluster.sv | 12 ++-- hw/rtl/cache/VX_cache_data.sv | 77 ++++++++++---------- hw/rtl/cache/VX_cache_flush.sv | 21 +++--- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/cache/VX_cache_tags.sv | 12 ++-- hw/rtl/cache/VX_cache_wrap.sv | 27 ++++---- hw/rtl/core/VX_alu_int.sv | 16 ++--- hw/rtl/core/VX_alu_muldiv.sv | 14 ++-- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_commit.sv | 12 ++-- hw/rtl/core/VX_core.sv | 4 +- hw/rtl/core/VX_csr_data.sv | 4 +- hw/rtl/core/VX_csr_unit.sv | 11 +-- hw/rtl/core/VX_dispatch.sv | 6 +- hw/rtl/core/VX_dispatch_unit.sv | 44 ++++++------ hw/rtl/core/VX_fetch.sv | 8 +-- hw/rtl/core/VX_fpu_unit.sv | 10 +-- hw/rtl/core/VX_gather_unit.sv | 19 ++--- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/core/VX_issue.sv | 8 +-- hw/rtl/core/VX_issue_top.sv | 4 +- hw/rtl/core/VX_lsu_slice.sv | 19 ++--- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 20 +++--- hw/rtl/core/VX_mem_unit_top.sv | 8 +-- hw/rtl/core/VX_operands.sv | 31 +++++---- hw/rtl/core/VX_pe_switch.sv | 4 +- hw/rtl/core/VX_schedule.sv | 6 +- hw/rtl/core/VX_scoreboard.sv | 14 ++-- hw/rtl/core/VX_split_join.sv | 2 +- hw/rtl/core/VX_wctl_unit.sv | 10 +-- hw/rtl/fpu/VX_fpu_cvt.sv | 6 +- hw/rtl/fpu/VX_fpu_div.sv | 10 +-- hw/rtl/fpu/VX_fpu_dpi.sv | 12 ++-- hw/rtl/fpu/VX_fpu_dsp.sv | 10 +-- hw/rtl/fpu/VX_fpu_fma.sv | 12 ++-- hw/rtl/fpu/VX_fpu_fpnew.sv | 4 +- hw/rtl/fpu/VX_fpu_ncp.sv | 6 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 10 +-- hw/rtl/libs/VX_avs_adapter.sv | 24 +++---- hw/rtl/libs/VX_axi_adapter.sv | 26 +++---- hw/rtl/libs/VX_bits_insert.sv | 20 +++--- hw/rtl/libs/VX_bits_remove.sv | 18 ++--- hw/rtl/libs/VX_bypass_buffer.sv | 32 +++++---- hw/rtl/libs/VX_cyclic_arbiter.sv | 4 +- hw/rtl/libs/VX_decoder.sv | 4 +- hw/rtl/libs/VX_divider.sv | 30 ++++---- hw/rtl/libs/VX_dp_ram.sv | 72 ++++++++++--------- hw/rtl/libs/VX_elastic_buffer.sv | 8 +-- hw/rtl/libs/VX_encoder.sv | 28 ++++---- hw/rtl/libs/VX_fifo_queue.sv | 8 +-- hw/rtl/libs/VX_find_first.sv | 10 +-- hw/rtl/libs/VX_generic_arbiter.sv | 10 +-- hw/rtl/libs/VX_lzc.sv | 14 ++-- hw/rtl/libs/VX_matrix_arbiter.sv | 22 +++--- hw/rtl/libs/VX_mem_adapter.sv | 24 +++---- hw/rtl/libs/VX_mem_coalescer.sv | 35 ++++++---- hw/rtl/libs/VX_mem_scheduler.sv | 49 +++++++------ hw/rtl/libs/VX_multiplier.sv | 20 +++--- hw/rtl/libs/VX_mux.sv | 14 ++-- hw/rtl/libs/VX_onehot_mux.sv | 28 ++++---- hw/rtl/libs/VX_onehot_shift.sv | 4 +- hw/rtl/libs/VX_pe_serializer.sv | 6 +- hw/rtl/libs/VX_pending_size.sv | 12 ++-- hw/rtl/libs/VX_pipe_buffer.sv | 6 +- hw/rtl/libs/VX_pipe_register.sv | 36 +++++----- hw/rtl/libs/VX_popcount.sv | 16 ++--- hw/rtl/libs/VX_priority_arbiter.sv | 4 +- hw/rtl/libs/VX_priority_encoder.sv | 20 +++--- hw/rtl/libs/VX_reduce.sv | 40 ++++++----- hw/rtl/libs/VX_reset_relay.sv | 8 +-- hw/rtl/libs/VX_rr_arbiter.sv | 26 +++---- hw/rtl/libs/VX_scan.sv | 26 +++---- hw/rtl/libs/VX_scope_switch.sv | 16 ++--- hw/rtl/libs/VX_serial_div.sv | 17 +++-- hw/rtl/libs/VX_serial_mul.sv | 16 ++--- hw/rtl/libs/VX_shift_register.sv | 18 ++--- hw/rtl/libs/VX_skid_buffer.sv | 16 ++--- hw/rtl/libs/VX_stream_arb.sv | 72 ++++++++++--------- hw/rtl/libs/VX_stream_buffer.sv | 18 ++--- hw/rtl/libs/VX_stream_pack.sv | 9 +-- hw/rtl/libs/VX_stream_switch.sv | 42 +++++------ hw/rtl/libs/VX_stream_unpack.sv | 6 +- hw/rtl/libs/VX_stream_xbar.sv | 16 ++--- hw/rtl/libs/VX_toggle_buffer.sv | 23 +++--- hw/rtl/libs/VX_transpose.sv | 4 +- hw/rtl/mem/VX_gbar_arb.sv | 8 +-- hw/rtl/mem/VX_lmem_switch.sv | 2 +- hw/rtl/mem/VX_local_mem.sv | 36 +++++----- hw/rtl/mem/VX_lsu_adapter.sv | 30 ++++---- hw/rtl/mem/VX_mem_arb.sv | 18 ++--- sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/xrtsim/Makefile | 2 +- 106 files changed, 938 insertions(+), 900 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 3e93244378..6109e873a3 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -128,7 +128,7 @@ module VX_cluster import VX_gpu_pkg::*; #( `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1)); // Generate all sockets - for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets + for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets `RESET_RELAY (socket_reset, reset); diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index f42f0b0184..fdd066434c 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -377,10 +377,13 @@ assign dst.req_data.data = src.req_data.data; \ assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.flags = src.req_data.flags; \ - if (TD != TS) \ + /* verilator lint_off GENUNNAMED */ \ + if (TD != TS) begin \ assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \ - else \ + end else begin \ assign dst.req_data.tag = src.req_data.tag; \ + end \ + /* verilator lint_on GENUNNAMED */ \ assign src.req_ready = dst.req_ready; \ assign src.rsp_valid = dst.rsp_valid; \ assign src.rsp_data.data = dst.rsp_data.data; \ @@ -388,6 +391,7 @@ assign dst.rsp_ready = src.rsp_ready `define BUFFER_DCR_BUS_IF(dst, src, enable) \ + /* verilator lint_off GENUNNAMED */ \ if (enable) begin \ reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ always @(posedge clk) begin \ @@ -396,9 +400,11 @@ assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \ end else begin \ assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \ + /* verilator lint_off GENUNNAMED */ \ if (count > 1) begin \ wire [count-1:0][width-1:0] __reduce_add_i_field; \ wire [width-1:0] __reduce_add_o_field; \ @@ -424,9 +430,11 @@ end \ end else begin \ assign dst.``field = src[0].``field; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \ + /* verilator lint_off GENUNNAMED */ \ if (block_size != 1) begin \ if (block_size != `NUM_WARPS) begin \ assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \ @@ -435,6 +443,7 @@ end \ end else begin \ assign dst = src; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `endif // VX_DEFINE_VH diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index aa63255df2..f2d0f6a360 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -116,15 +116,18 @@ localparam `STRING __``x = x; \ /* verilator lint_on UNUSED */ -`define UNUSED_VAR(x) if (1) begin \ +`define UNUSED_VAR(x) /* verilator lint_off GENUNNAMED */ \ + if (1) begin \ /* verilator lint_off UNUSED */ \ wire [$bits(x)-1:0] __x = x; \ /* verilator lint_on UNUSED */ \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \ . x () \ /* verilator lint_on PINCONNECTEMPTY */ + `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ @@ -143,8 +146,10 @@ `ifdef SIMULATION `define STATIC_ASSERT(cond, msg) \ - generate \ + generate \ + /* verilator lint_off GENUNNAMED */ \ if (!(cond)) $error msg; \ + /* verilator lint_on GENUNNAMED */ \ endgenerate `define ERROR(msg) \ diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 17a027c956..766ff468a8 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -202,7 +202,7 @@ module VX_socket import VX_gpu_pkg::*; #( `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); // Generate all cores - for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores + for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : g_cores `RESET_RELAY (core_reset, reset); diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 6dc59cad2e..0bdbec8435 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -129,7 +129,7 @@ module Vortex import VX_gpu_pkg::*; ( wire [`NUM_CLUSTERS-1:0] per_cluster_busy; // Generate all clusters - for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters + for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : g_clusters `RESET_RELAY (cluster_reset, reset); diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index c5aa655c5d..a15a478ee5 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -110,7 +110,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS]; wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS]; - for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin + for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin : g_padding assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]); assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]); diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index a6cd31b05b..0acf87744b 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -235,7 +235,7 @@ module VX_afu_ctrl #( assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; - for (genvar i = 0; i < 4; ++i) begin + for (genvar i = 0; i < 4; ++i) begin : g_wmask assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}}; end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 191fbe0784..c92d94c7cd 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -230,7 +230,7 @@ module VX_afu_wrap #( wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; - for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin + for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); end diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 3ceffaa6bc..e90c93cf6a 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -113,7 +113,7 @@ module VX_bank_flush #( assign flush_valid = (state_r == STATE_FLUSH); assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; - if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin + if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way VX_decoder #( .N (`CS_WAY_SEL_BITS) ) ctr_decoder ( @@ -121,7 +121,7 @@ module VX_bank_flush #( .valid_in (1'b1), .data_out (flush_way) ); - end else begin + end else begin : g_flush_way_all assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 3e5a486c6b..b6d3f95529 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -141,7 +141,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - for (genvar i = 0; i < NUM_REQS; ++i) begin : core_rsp_bufs + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), @@ -190,10 +190,10 @@ module VX_cache import VX_gpu_pkg::*; #( wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag; wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id; - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_mem_rsp_tag_s_with_banks assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS]; assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0]; - end else begin + end else begin : g_mem_rsp_tag_s_no_bank assign bank_mem_rsp_tag = mem_rsp_tag_s; assign mem_rsp_bank_id = 0; end @@ -228,9 +228,9 @@ module VX_cache import VX_gpu_pkg::*; #( assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); - end else begin + end else begin : g_mem_bus_if_ro `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end @@ -286,7 +286,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req assign core_req_valid[i] = core_bus2_if[i].req_valid; assign core_req_rw[i] = core_bus2_if[i].req_data.rw; assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; @@ -297,24 +297,27 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_bus2_if[i].req_ready = core_req_ready[i]; end - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (WORDS_PER_LINE > 1) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_wsel + if (WORDS_PER_LINE > 1) begin : g_wsel assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; - end else begin + end else begin : g_no_wsel assign core_req_wsel[i] = '0; end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_line_addr assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH]; end - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_bid + if (NUM_BANKS > 1) begin : g_multibank assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; + end else begin : g_singlebank + assign core_req_bid[i] = '0; end - end else begin - assign core_req_bid = '0; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_data_in assign core_req_data_in[i] = { core_req_line_addr[i], core_req_rw[i], @@ -355,7 +358,7 @@ module VX_cache import VX_gpu_pkg::*; #( .ready_out (per_bank_core_req_ready) ); - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_req_data_out assign { per_bank_core_req_addr[i], per_bank_core_req_rw[i], @@ -368,7 +371,7 @@ module VX_cache import VX_gpu_pkg::*; #( end // Banks access - for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks + for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : g_banks wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id); @@ -443,9 +446,9 @@ module VX_cache import VX_gpu_pkg::*; #( .flush_end (per_bank_flush_end[bank_id]) ); - if (NUM_BANKS == 1) begin + if (NUM_BANKS == 1) begin : g_per_bank_mem_req_addr_multibanks assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; - end else begin + end else begin : g_per_bank_mem_req_addr_one_bank assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); end end @@ -455,7 +458,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in; wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_rsp_data_in assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; end @@ -478,7 +481,7 @@ module VX_cache import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_data_s assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; end @@ -486,7 +489,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in assign data_in[i] = { per_bank_mem_req_addr[i], per_bank_mem_req_rw[i], @@ -515,10 +518,10 @@ module VX_cache import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_mem_req_tag_multibanks wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id}); - end else begin + end else begin : g_mem_req_tag_one_bank assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag); end @@ -546,7 +549,7 @@ module VX_cache import VX_gpu_pkg::*; #( `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); wire [NUM_REQS-1:0] perf_crsp_stall_per_req; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_perf_crsp_stall_per_req assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; end diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 0c199577b3..5054fa333b 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -246,17 +246,17 @@ module VX_cache_bank #( wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0]; wire [TAG_WIDTH-1:0] mem_rsp_tag_s; - if (TAG_WIDTH > MEM_TAG_WIDTH) begin + if (TAG_WIDTH > MEM_TAG_WIDTH) begin : g_mem_rsp_tag_s_pad assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)}; - end else begin + end else begin : g_mem_rsp_tag_s_cut assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH]; `UNUSED_VAR (mem_rsp_tag) end wire [TAG_WIDTH-1:0] flush_tag; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_flush_tag_uuid assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)}; - end else begin + end else begin : g_flush_tag_0 `UNUSED_VAR (flush_uuid) assign flush_tag = '0; end @@ -273,20 +273,21 @@ module VX_cache_bank #( assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_data_sel assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); - end else begin + end else begin : g_data_sel_ro assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (core_req_data) `UNUSED_VAR (replay_data) end - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin + + for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_sel assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_uuid_sel_0 assign req_uuid_sel = '0; end @@ -301,9 +302,9 @@ module VX_cache_bank #( .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) ); - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_st0 assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_uuid_st0_0 assign req_uuid_st0 = '0; end @@ -383,9 +384,9 @@ module VX_cache_bank #( // we have a tag hit wire is_hit_st1 = (| way_sel_st1); - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_uuid_st1_0 assign req_uuid_st1 = '0; end @@ -434,14 +435,14 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; wire [LINE_SIZE-1:0] dirty_byteen_st1; - if (`CS_WORDS_PER_LINE > 1) begin + if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w; always @(*) begin write_byteen_w = '0; write_byteen_w[wsel_st1] = byteen_st1; end assign write_byteen_st1 = write_byteen_w; - end else begin + end else begin : g_write_byteen_st1 assign write_byteen_st1 = byteen_st1; end @@ -489,9 +490,9 @@ module VX_cache_bank #( // release allocated mshr entry if we had a hit wire mshr_release_st1; - if (WRITEBACK) begin + if (WRITEBACK) begin : g_mshr_release_st1 assign mshr_release_st1 = is_hit_st1; - end else begin + end else begin : g_mshr_release_st1_ro // we need to keep missed write requests in MSHR if there is already a pending entry to the same address // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content // this can happen when writes are sent late, when the fill was already in flight. @@ -566,7 +567,7 @@ module VX_cache_bank #( // check if there are pending requests to same line in the MSHR wire [MSHR_SIZE-1:0] lookup_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin + for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches assign lookup_matches[i] = mshr_lookup_pending_st0[i] && (i != mshr_alloc_id_st0) // exclude current mshr id && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough @@ -616,8 +617,8 @@ module VX_cache_bank #( wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; - if (WRITEBACK) begin - if (DIRTY_BYTES) begin + if (WRITEBACK) begin : g_mreq_queue_push + if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| dirty_byteen_st1); `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) @@ -625,7 +626,7 @@ module VX_cache_bank #( assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) || do_writeback_st1) && ~rdw_hazard3_st1; - end else begin + end else begin : g_mreq_queue_push_ro `UNUSED_VAR (do_writeback_st1) assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) || do_creq_wr_st1) @@ -636,12 +637,12 @@ module VX_cache_bank #( assign mreq_queue_addr = addr_st1; assign mreq_queue_flush = creq_flush_st1; - if (WRITE_ENABLE) begin - if (WRITEBACK) begin + if (WRITE_ENABLE) begin : g_mreq_queue + if (WRITEBACK) begin : g_writeback assign mreq_queue_rw = is_fill_or_flush_st1; assign mreq_queue_data = dirty_data_st1; assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; - end else begin + end else begin : g_writethrough assign mreq_queue_rw = rw_st1; assign mreq_queue_data = write_data_st1; assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1; @@ -649,7 +650,7 @@ module VX_cache_bank #( `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) end - end else begin + end else begin : g_mreq_queue_ro assign mreq_queue_rw = 0; assign mreq_queue_data = '0; assign mreq_queue_byteen = '1; @@ -657,9 +658,9 @@ module VX_cache_bank #( `UNUSED_VAR (dirty_byteen_st1) end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1}; - end else begin + end else begin : g_mreq_queue_tag assign mreq_queue_tag = mshr_id_st1; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index a3d872d7fd..a60904d463 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -77,12 +77,12 @@ module VX_cache_bypass #( wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_ready; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc + if (PASSTHRU != 0) begin : g_passthru assign core_req_nc_idxs[i] = 1'b1; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_nc assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO]; - end else begin + end else begin : g_no_nc assign core_req_nc_idxs[i] = 1'b0; end assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; @@ -101,7 +101,7 @@ module VX_cache_bypass #( .grant_ready (core_req_nc_ready) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_if assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) @@ -127,7 +127,7 @@ module VX_cache_bypass #( wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc_mux_in assign core_req_nc_mux_in[i] = { core_bus_in_if[i].req_data.rw, core_bus_in_if[i].req_data.addr, @@ -158,7 +158,7 @@ module VX_cache_bypass #( wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; - if (WORDS_PER_LINE > 1) begin + if (WORDS_PER_LINE > 1) begin : g_mem_req_multi_word_line reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_w; reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_w; @@ -174,46 +174,44 @@ module VX_cache_bypass #( assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_w; assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_w; - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_multiple_requests assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); - end else begin + end else begin : g_single_request assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); end - end else begin + end else begin : g_mem_req_single_word_line assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : core_req_nc_sel_byteen; assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : core_req_nc_sel_data; - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_multiple_requests assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id}); - end else begin + end else begin : g_single_request assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id}); end end wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mem_req_tag_bypass_with_uuid assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; - end else begin + end else begin : g_mem_req_tag_bypass assign mem_req_tag_bypass = mem_req_tag_id_bypass; end - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_mem_req_out_tag_passthru assign mem_req_out_tag = mem_req_tag_bypass; `UNUSED_VAR (mem_bus_in_if.req_data.tag) - end else begin - if (NC_ENABLE) begin - VX_bits_insert #( - .N (MEM_TAG_OUT_WIDTH-1), - .S (1), - .POS (TAG_SEL_IDX) - ) mem_req_tag_in_nc_insert ( - .data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), - .ins_in (~mem_bus_in_if.req_valid), - .data_out (mem_req_out_tag) - ); - end else begin - assign mem_req_out_tag = mem_bus_in_if.req_data.tag; - end + end else if (NC_ENABLE) begin : g_mem_req_out_tag_nc + VX_bits_insert #( + .N (MEM_TAG_OUT_WIDTH-1), + .S (1), + .POS (TAG_SEL_IDX) + ) mem_req_tag_in_nc_insert ( + .data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), + .ins_in (~mem_bus_in_if.req_valid), + .data_out (mem_req_out_tag) + ); + end else begin : g_mem_req_out_tag + assign mem_req_out_tag = mem_bus_in_if.req_data.tag; end assign mem_bus_in_if.req_ready = mem_req_out_ready; @@ -241,14 +239,12 @@ module VX_cache_bypass #( wire [NUM_REQS-1:0] core_rsp_in_ready; wire is_mem_rsp_nc; - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_is_mem_rsp_nc_passthru assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid; - end else begin - if (NC_ENABLE) begin - assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; - end else begin - assign is_mem_rsp_nc = 1'b0; - end + end else if (NC_ENABLE) begin : g_is_mem_rsp_nc + assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; + end else begin : g_is_no_mem_rsp_nc + assign is_mem_rsp_nc = 1'b0; end wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; @@ -263,47 +259,47 @@ module VX_cache_bypass #( ); wire [REQ_SEL_WIDTH-1:0] rsp_idx; - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_rsp_idx assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; - end else begin + end else begin : g_rsp_idx_0 assign rsp_idx = 1'b0; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); + end + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end - if (WORDS_PER_LINE > 1) begin - wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_data + if (WORDS_PER_LINE > 1) begin : g_wsel + wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; - end - end else begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + end else begin : g_no_wsel assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data; end end wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mem_rsp_tag_in_nc2_uuid assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]}; - end else begin + end else begin : g_mem_rsp_tag_in_nc2 assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]; end - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_tag + if (PASSTHRU) begin : g_passthru assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_nc assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; - end else begin + end else begin : g_no_nc assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; end end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(CORE_OUT_BUF)), @@ -322,22 +318,22 @@ module VX_cache_bypass #( // handle memory responses //////////////////////////////////////////////// - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_mem_bus_in_if_passthru assign mem_bus_in_if.rsp_valid = 1'b0; assign mem_bus_in_if.rsp_data.data = '0; assign mem_bus_in_if.rsp_data.tag = '0; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_mem_bus_in_if_nc assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid && ~mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0]; - end else begin + end else begin : g_mem_bus_in_if assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid; assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc; end wire [NUM_REQS-1:0] core_rsp_out_valid; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_out_valid assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; end diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 91055a548d..5a8bb98659 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -102,7 +102,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - for (genvar i = 0; i < NUM_REQS; ++i) begin : core_arbs + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_arb VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), .TAG_WIDTH (TAG_WIDTH) @@ -113,7 +113,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_tmp_if[NUM_CACHES](); - for (genvar j = 0; j < NUM_INPUTS; ++j) begin + for (genvar j = 0; j < NUM_INPUTS; ++j) begin : g_core_bus_tmp_if `ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]); end @@ -133,12 +133,12 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .bus_out_if (arb_core_bus_tmp_if) ); - for (genvar k = 0; k < NUM_CACHES; ++k) begin + for (genvar k = 0; k < NUM_CACHES; ++k) begin : g_arb_core_bus_if `ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]); end end - for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches + for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap VX_cache_wrap #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), .CACHE_SIZE (CACHE_SIZE), @@ -192,9 +192,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .bus_out_if (mem_bus_tmp_if) ); - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); - end else begin + end else begin : g_mem_bus_if_ro `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if[0]); end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index c4713f813a..27844fd6f5 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -75,64 +75,63 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata; wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; - if (WRITEBACK) begin : dirty_bytes - if (DIRTY_BYTES) begin - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; - - for (genvar i = 0; i < NUM_WAYS; ++i) begin - wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); - assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); - end - - VX_sp_ram #( - .DATAW (LINE_SIZE * NUM_WAYS), - .SIZE (`CS_LINES_PER_BANK) - ) byteen_store ( - .clk (clk), - .reset (reset), - .read (write || fill || flush), - .write (init || write || fill || flush), - .wren (1'b1), - .addr (line_sel), - .wdata (bs_wdata), - .rdata (bs_rdata) - ); - - assign dirty_byteen = bs_rdata[way_idx]; - end else begin - assign dirty_byteen = '1; - end - + if (WRITEBACK) begin : g_dirty_data wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin - for (genvar j = 0; j < NUM_WAYS; ++j) begin + for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_flipped_rdata + for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j assign flipped_rdata[j][i] = line_rdata[i][j]; end end assign dirty_data = flipped_rdata[way_idx]; - end else begin - assign dirty_byteen = '1; + end else begin : g_dirty_data_0 assign dirty_data = '0; end + if (DIRTY_BYTES) begin : g_dirty_byteen + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; + + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata + wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); + assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); + end + + VX_sp_ram #( + .DATAW (LINE_SIZE * NUM_WAYS), + .SIZE (`CS_LINES_PER_BANK) + ) byteen_store ( + .clk (clk), + .reset (reset), + .read (write || fill || flush), + .write (init || write || fill || flush), + .wren (1'b1), + .addr (line_sel), + .wdata (bs_wdata), + .rdata (bs_rdata) + ); + + assign dirty_byteen = bs_rdata[way_idx]; + end else begin : g_dirty_byteen_0 + assign dirty_byteen = '1; + end + // order the data layout to perform ways multiplexing last. // this allows converting way index to binary in parallel with BRAM readaccess and way selection. wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata; wire [BYTEENW-1:0] line_wren; - if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin + if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin - for (genvar j = 0; j < NUM_WAYS; ++j) begin + for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i + for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i]; assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i]) & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}}; end end assign line_wren = wren_w; - end else begin + end else begin : g_line_wdata_ro `UNUSED_VAR (write) `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_data) @@ -171,9 +170,9 @@ module VX_cache_data #( ); wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; - if (`CS_WORDS_PER_LINE > 1) begin + if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel assign per_way_rdata = line_rdata[wsel]; - end else begin + end else begin : g_per_way_rdata `UNUSED_VAR (wsel) assign per_way_rdata = line_rdata; end diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index a841f3ebcf..b318dc5af6 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -46,13 +46,13 @@ module VX_cache_flush #( wire no_inflight_reqs; - if (BANK_SEL_LATENCY != 0) begin + if (BANK_SEL_LATENCY != 0) begin : g_bank_sel_latency localparam NUM_REQS_W = `CLOG2(NUM_REQS+1); localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1); wire [NUM_REQS-1:0] core_bus_out_fire; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_fire assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready; end @@ -79,7 +79,7 @@ module VX_cache_flush #( `UNUSED_PIN (size) ); - end else begin + end else begin : g_no_bank_sel_latency assign no_inflight_reqs = 0; `UNUSED_VAR (bank_req_fire) end @@ -87,7 +87,7 @@ module VX_cache_flush #( reg [NUM_BANKS-1:0] flush_done, flush_done_n; wire [NUM_REQS-1:0] flush_req_mask; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_flush_req_mask assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; end wire flush_req_enable = (| flush_req_mask); @@ -95,14 +95,14 @@ module VX_cache_flush #( reg [NUM_REQS-1:0] lock_released, lock_released_n; reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_req wire input_enable = ~flush_req_enable || lock_released[i]; assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_in_rsp assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid; assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data; assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready; @@ -110,12 +110,15 @@ module VX_cache_flush #( reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid; wire [NUM_REQS-1:0] core_bus_out_ready; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign core_bus_out_uuid[i] = 0; end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_ready assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready; end diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index d51d0f0d40..482c110dcf 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -135,7 +135,7 @@ module VX_cache_mshr #( wire dequeue_fire = dequeue_valid && dequeue_ready; wire [MSHR_SIZE-1:0] addr_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin + for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr); end diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 4d9fc81deb..92497b80bb 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -69,7 +69,7 @@ module VX_cache_tags #( wire [NUM_WAYS-1:0] read_valid; wire [NUM_WAYS-1:0] read_dirty; - if (NUM_WAYS > 1) begin + if (NUM_WAYS > 1) begin : g_evict_way reg [NUM_WAYS-1:0] evict_way_r; // cyclic assignment of replacement way always @(posedge clk) begin @@ -90,7 +90,7 @@ module VX_cache_tags #( .sel_in (evict_way), .data_out (evict_tag) ); - end else begin + end else begin : g_evict_way_0 `UNUSED_VAR (stall) assign evict_way = 1'b1; assign evict_tag = read_tag; @@ -100,7 +100,7 @@ module VX_cache_tags #( wire fill_s = fill && (!WRITEBACK || ~stall); wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : tag_stores + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store wire do_fill = fill_s && evict_way[i]; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode @@ -113,10 +113,10 @@ module VX_cache_tags #( wire [TAG_WIDTH-1:0] line_wdata; wire [TAG_WIDTH-1:0] line_rdata; - if (WRITEBACK) begin + if (WRITEBACK) begin : g_writeback assign line_wdata = {line_valid, write, line_tag}; assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata; - end else begin + end else begin : g_writethrough assign line_wdata = {line_valid, line_tag}; assign {read_valid[i], read_tag[i]} = line_rdata; assign read_dirty[i] = 1'b0; @@ -139,7 +139,7 @@ module VX_cache_tags #( ); end - for (genvar i = 0; i < NUM_WAYS; ++i) begin + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index bf4f6de7ed..0b8a1f3c46 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -107,7 +107,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .TAG_WIDTH (MEM_TAG_WIDTH) ) mem_bus_tmp_if(); - if (NC_OR_BYPASS) begin : bypass_if + if (NC_OR_BYPASS) begin : g_bypass VX_cache_bypass #( .NUM_REQS (NUM_REQS), @@ -141,22 +141,22 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_out_if (mem_bus_tmp_if) ); - end else begin + end else begin : g_no_bypass - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); end `ASSIGN_VX_MEM_BUS_IF (mem_bus_tmp_if, mem_bus_cache_if); end - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); - end else begin + end else begin : g_mem_bus_if_ro `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end - if (PASSTHRU == 0) begin : cache_if + if (PASSTHRU == 0) begin : g_cache VX_cache #( .INSTANCE_ID (INSTANCE_ID), @@ -187,9 +187,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_if (mem_bus_cache_if) ); - end else begin + end else begin : g_passthru - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if `UNUSED_VAR (core_bus_cache_if[i].req_valid) `UNUSED_VAR (core_bus_cache_if[i].req_data) assign core_bus_cache_if[i].req_ready = 0; @@ -214,15 +214,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( end `ifdef DBG_TRACE_CACHE - - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_trace wire [`UP(UUID_WIDTH)-1:0] core_req_uuid; wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_core_rsp_uuid assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_core_rsp_uuid assign core_req_uuid = 0; assign core_rsp_uuid = 0; end @@ -247,10 +246,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid; wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; - if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin + if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin : g_mem_req_uuid assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_mem_req_uuid assign mem_req_uuid = 0; assign mem_rsp_uuid = 0; end diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 04d123860b..53c7ae57aa 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -71,19 +71,19 @@ module VX_alu_int #( wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.op_args.alu.use_imm ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2; wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.op_args.alu.use_imm && ~is_br_op) ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_add_result assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i]; assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0])); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_sub_result wire [`XLEN:0] sub_in1 = {is_signed & alu_in1[i][`XLEN-1], alu_in1[i]}; wire [`XLEN:0] sub_in2 = {is_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]}; assign sub_result[i] = sub_in1 - sub_in2; assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0])); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_shr_result wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]}; always @(*) begin case (alu_op[1:0]) @@ -102,7 +102,7 @@ module VX_alu_int #( assign shr_result_w[i] = `XLEN'($signed(shr_res_w)); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_msc_result always @(*) begin case (alu_op[1:0]) 2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND @@ -114,7 +114,7 @@ module VX_alu_int #( assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_alu_result wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]}); wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result; always @(*) begin @@ -141,9 +141,9 @@ module VX_alu_int #( assign cbr_dest = add_result[0][1 +: `PC_BITS]; - if (LANE_BITS != 0) begin + if (LANE_BITS != 0) begin : g_tid assign tid = execute_if.data.tid[0 +: LANE_BITS]; - end else begin + end else begin : g_tid_0 assign tid = 0; end @@ -185,7 +185,7 @@ module VX_alu_int #( .data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest}) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? {(PC_r + `PC_BITS'(2)), 1'd0} : alu_result_r[i]; end diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index bd498a0bba..d374013bc3 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -68,7 +68,7 @@ module VX_alu_muldiv #( wire mul_fire_in = mul_valid_in && mul_ready_in; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_tmp reg [`XLEN-1:0] mul_resultl, mul_resulth; wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i]; wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i]; @@ -103,7 +103,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN:0] mul_in1; wire [NUM_LANES-1:0][`XLEN:0] mul_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_in assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]}; assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]}; end @@ -149,7 +149,7 @@ module VX_alu_muldiv #( `else - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_multiplier wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]}; wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]}; @@ -184,7 +184,7 @@ module VX_alu_muldiv #( `endif - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_out `ifdef XLEN_64 assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] : (is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) : @@ -219,7 +219,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN-1:0] div_in1; wire [NUM_LANES-1:0][`XLEN-1:0] div_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_in `ifdef XLEN_64 assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i]; assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i]; @@ -234,7 +234,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in; wire div_fire_in = div_valid_in && div_ready_in; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_in reg [`XLEN-1:0] div_quotient, div_remainder; always @(*) begin dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder); @@ -306,7 +306,7 @@ module VX_alu_muldiv #( assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_out `ifdef XLEN_64 assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) : (is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]); diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 8ec044eeb4..951cd811bf 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -55,7 +55,7 @@ module VX_alu_unit #( .execute_if (per_block_execute_if) ); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_alus VX_execute_if #( .NUM_LANES (NUM_LANES) diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index acfae9e4df..d2e7056740 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -41,13 +41,13 @@ module VX_commit import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask; wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : commit_arbs + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_arbs wire [`NUM_EX_UNITS-1:0] valid_in; wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in; wire [`NUM_EX_UNITS-1:0] ready_in; - for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin + for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_data_in assign valid_in[j] = commit_if[j * `ISSUE_WIDTH + i].valid; assign data_in[j] = commit_if[j * `ISSUE_WIDTH + i].data; assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j]; @@ -84,7 +84,7 @@ module VX_commit import VX_gpu_pkg::*; #( assign commit_fire_any = (| per_issue_commit_fire); - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_size wire [COMMIT_SIZEW-1:0] count; `POP_COUNT(count, per_issue_commit_tmask[i]); assign commit_size[i] = count; @@ -160,7 +160,7 @@ module VX_commit import VX_gpu_pkg::*; #( // Writeback - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback assign writeback_if[i].valid = commit_arb_if[i].valid && commit_arb_if[i].data.wb; assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid; assign writeback_if[i].data.wis = wid_to_wis(commit_arb_if[i].data.wid); @@ -174,8 +174,8 @@ module VX_commit import VX_gpu_pkg::*; #( end `ifdef DBG_TRACE_PIPELINE - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin - for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_trace + for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_j always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index d9f3de6876..1d3e126137 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -229,8 +229,8 @@ module VX_core import VX_gpu_pkg::*; #( wire [LSU_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r; wire [LSU_NUM_REQS-1:0] perf_dcache_rsp_fire; - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_perf_dcache + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_j assign perf_dcache_rd_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && ~lsu_mem_if[i].req_data.rw; assign perf_dcache_wr_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && lsu_mem_if[i].req_data.rw; assign perf_dcache_rsp_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].rsp_valid && lsu_mem_if[i].rsp_data.mask[j] && lsu_mem_if[i].rsp_ready; diff --git a/hw/rtl/core/VX_csr_data.sv b/hw/rtl/core/VX_csr_data.sv index aa9b30e05c..68bf7f7395 100644 --- a/hw/rtl/core/VX_csr_data.sv +++ b/hw/rtl/core/VX_csr_data.sv @@ -83,7 +83,7 @@ import VX_fpu_pkg::*; wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid; fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags; - for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_write assign fpu_write_enable[i] = fpu_csr_if[i].write_enable; assign fpu_write_wid[i] = fpu_csr_if[i].write_wid; assign fpu_write_fflags[i] = fpu_csr_if[i].write_fflags; @@ -107,7 +107,7 @@ import VX_fpu_pkg::*; end end - for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_csr_read_frm assign fpu_csr_if[i].read_frm = fcsr[fpu_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]; end diff --git a/hw/rtl/core/VX_csr_unit.sv b/hw/rtl/core/VX_csr_unit.sv index 999c9c4162..be4f7321d8 100644 --- a/hw/rtl/core/VX_csr_unit.sv +++ b/hw/rtl/core/VX_csr_unit.sv @@ -66,7 +66,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][`XLEN-1:0] rs1_data; `UNUSED_VAR (rs1_data) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_rs1_data assign rs1_data[i] = execute_if.data.rs1_data[i]; end @@ -113,12 +113,15 @@ module VX_csr_unit import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][`XLEN-1:0] wtid, gtid; - for (genvar i = 0; i < NUM_LANES; ++i) begin - if (PID_BITS != 0) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_wtid + if (PID_BITS != 0) begin : g_pid assign wtid[i] = `XLEN'(execute_if.data.pid * NUM_LANES + i); - end else begin + end else begin : g_no_pid assign wtid[i] = `XLEN'(i); end + end + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_gtid assign gtid[i] = (`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(execute_if.data.wid) << `NT_BITS) + wtid[i]; end diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 4326298a11..1c24fe46d0 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -33,7 +33,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH; wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_tids assign tids[i] = `NT_WIDTH'(i); end @@ -53,7 +53,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( wire [`NUM_EX_UNITS-1:0] operands_ready_in; assign operands_if.ready = operands_ready_in[operands_if.data.ex_type]; - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : buffers + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_buffers VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), @@ -88,7 +88,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( wire operands_if_stall = operands_if.valid && ~operands_if.ready; - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_stalls always @(posedge clk) begin if (reset) begin perf_stalls_r[i] <= '0; diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 0bd4b45c4d..5d37d05783 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -49,7 +49,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data; wire [`ISSUE_WIDTH-1:0] dispatch_ready; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_dispatch_data assign dispatch_valid[i] = dispatch_if[i].valid; assign dispatch_data[i] = dispatch_if[i].data; assign dispatch_if[i].ready = dispatch_ready[i]; @@ -69,10 +69,10 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( logic [BATCH_COUNT_W-1:0] batch_idx; - if (BATCH_COUNT != 1) begin + if (BATCH_COUNT != 1) begin : g_batch_idx wire [BATCH_COUNT_W-1:0] batch_idx_n; wire [BATCH_COUNT-1:0] valid_batches; - for (genvar i = 0; i < BATCH_COUNT; ++i) begin + for (genvar i = 0; i < BATCH_COUNT; ++i) begin : g_valid_batches assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE]; end @@ -96,22 +96,22 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( batch_idx <= batch_idx_n; end end - end else begin + end else begin : g_batch_idx_0 assign batch_idx = 0; `UNUSED_VAR (batch_done) end wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_issue_indices assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); end - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_blocks wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx]; wire valid_p, ready_p; - if (`NUM_THREADS != NUM_LANES) begin : threads_split + if (`NUM_THREADS > NUM_LANES) begin : g_partial_threads reg [NUM_PACKETS-1:0] sent_mask_p; wire [PID_WIDTH-1:0] start_p_n, start_p, end_p; wire dispatch_valid_r; @@ -146,8 +146,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; - for (genvar i = 0; i < NUM_PACKETS; ++i) begin - for (genvar j = 0; j < NUM_LANES; ++j) begin + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_per_packet_data + for (genvar j = 0; j < NUM_LANES; ++j) begin : g_j localparam k = i * NUM_LANES + j; assign per_packet_tmask[i][j] = dispatch_tmask[k]; assign per_packet_regs[i][0][j] = dispatch_rs1_data[k]; @@ -157,10 +157,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( end wire [NUM_PACKETS-1:0] packet_valids; - wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids; - - for (genvar i = 0; i < NUM_PACKETS; ++i) begin + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_valids assign packet_valids[i] = (| per_packet_tmask[i]); + end + + wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids; + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_ids assign packet_ids[i] = PID_WIDTH'(i); end @@ -209,13 +211,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign block_pid[block_idx] = start_p; assign block_sop[block_idx] = is_first_p; assign block_eop[block_idx] = is_last_p; - if (FANOUT_ENABLE) begin + if (FANOUT_ENABLE) begin : g_block_ready_fanout assign block_ready[block_idx] = dispatch_valid_r && ready_p && block_enable; - end else begin + end else begin : g_block_ready assign block_ready[block_idx] = ready_p && block_enable; end assign block_done[block_idx] = fire_eop || ~dispatch_valid[issue_idx]; - end else begin + end else begin : g_full_threads assign valid_p = dispatch_valid[issue_idx]; assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS]; assign block_regs[block_idx][0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; @@ -229,13 +231,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( end wire [ISSUE_ISW_W-1:0] isw; - if (BATCH_COUNT != 1) begin - if (BLOCK_SIZE != 1) begin + if (BATCH_COUNT != 1) begin : g_isw_batch + if (BLOCK_SIZE != 1) begin : g_block assign isw = {batch_idx, BLOCK_SIZE_W'(block_idx)}; - end else begin + end else begin : g_no_block assign isw = batch_idx; end - end else begin + end else begin : g_isw assign isw = block_idx; end @@ -268,9 +270,9 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( .ready_out (execute_if[block_idx].ready) ); - if (`NUM_THREADS != NUM_LANES) begin + if (`NUM_THREADS != NUM_LANES) begin : g_execute_data_w_partial assign execute_data_w = execute_data; - end else begin + end else begin : g_execute_data_w_full always @(*) begin execute_data_w = execute_data; execute_data_w[2:0] = {1'b0, 1'b1, 1'b1}; // default pid, sop, and eop diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index dab4772db9..a2a80ed94b 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -71,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #( // This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests. // This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus. wire [`NUM_WARPS-1:0] pending_ibuf_full; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_reads + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_reads VX_pending_size #( .SIZE (`IBUF_SIZE) ) pending_reads ( @@ -164,13 +164,11 @@ module VX_fetch import VX_gpu_pkg::*; #( `endif `ifdef DBG_TRACE_MEM - wire schedule_fire = schedule_if.valid && schedule_if.ready; - wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin - if (schedule_fire) begin + if (schedule_if.valid && schedule_if.ready) begin `TRACE(1, ("%t: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) end - if (fetch_fire) begin + if (fetch_if.valid && fetch_if.ready) begin `TRACE(1, ("%t: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) end end diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 10e5c236bf..1565f3728b 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -53,7 +53,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_fpus `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) @@ -98,11 +98,11 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( `UNUSED_PIN (empty) ); - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_fpu_rsp_pid assign fpu_rsp_pid = fpu_rsp_pid_u; assign fpu_rsp_sop = fpu_rsp_sop_u; assign fpu_rsp_eop = fpu_rsp_eop_u; - end else begin + end else begin : g_no_fpu_rsp_pid `UNUSED_VAR (fpu_rsp_pid_u) `UNUSED_VAR (fpu_rsp_sop_u) `UNUSED_VAR (fpu_rsp_eop_u) @@ -214,7 +214,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( // handle CSR update fflags_t fpu_rsp_fflags_q; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_pid fflags_t fpu_rsp_fflags_r; always @(posedge clk) begin if (reset) begin @@ -224,7 +224,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( end end assign fpu_rsp_fflags_q = fpu_rsp_fflags_r | fpu_rsp_fflags; - end else begin + end else begin : g_no_pid assign fpu_rsp_fflags_q = fpu_rsp_fflags; end diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 69295321b6..284d5c167b 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -41,17 +41,17 @@ module VX_gather_unit import VX_gpu_pkg::*; #( wire [BLOCK_SIZE-1:0] commit_in_ready; wire [BLOCK_SIZE-1:0][ISSUE_ISW_W-1:0] commit_in_isw; - for (genvar i = 0; i < BLOCK_SIZE; ++i) begin + for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in assign commit_in_valid[i] = commit_in_if[i].valid; assign commit_in_data[i] = commit_in_if[i].data; assign commit_in_if[i].ready = commit_in_ready[i]; - if (BLOCK_SIZE != `ISSUE_WIDTH) begin - if (BLOCK_SIZE != 1) begin + if (BLOCK_SIZE != `ISSUE_WIDTH) begin : g_commit_in_isw_partial + if (BLOCK_SIZE != 1) begin : g_block assign commit_in_isw[i] = {commit_in_data[i][DATA_WIS_OFF+BLOCK_SIZE_W +: (ISSUE_ISW_W-BLOCK_SIZE_W)], BLOCK_SIZE_W'(i)}; - end else begin + end else begin : g_no_block assign commit_in_isw[i] = commit_in_data[i][DATA_WIS_OFF +: ISSUE_ISW_W]; end - end else begin + end else begin : g_commit_in_isw_full assign commit_in_isw[i] = BLOCK_SIZE_W'(i); end end @@ -70,11 +70,12 @@ module VX_gather_unit import VX_gpu_pkg::*; #( commit_out_data[commit_in_isw[i]] = commit_in_data[i]; end end - for (genvar i = 0; i < BLOCK_SIZE; ++i) begin + + for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in_ready assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]]; end - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: out_bufs + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: g_out_bufs VX_commit_if #( .NUM_LANES (NUM_LANES) ) commit_tmp_if(); @@ -96,7 +97,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( logic [`NUM_THREADS-1:0] commit_tmask_w; logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_w; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_commit_data_with_pid always @(*) begin commit_tmask_w = '0; commit_data_w = 'x; @@ -105,7 +106,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( commit_data_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; end end - end else begin + end else begin : g_commit_data_no_pid assign commit_tmask_w = commit_tmp_if.data.tmask; assign commit_data_w = commit_tmp_if.data.data; end diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index f5d879f334..e1a9457ded 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : instr_bufs + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_instr_bufs VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index a0f223ff5b..a2e689b7c8 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -36,10 +36,10 @@ module VX_issue import VX_gpu_pkg::*; #( `PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) `PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) `PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_issue_perf_units_uses `PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) end - for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_issue_perf_sfu_uses `PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) end `endif @@ -52,7 +52,7 @@ module VX_issue import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (`ISSUE_WIDTH) - for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices + for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices VX_decode_if #( .NUM_WARPS (PER_ISSUE_WARPS) ) per_issue_decode_if(); @@ -93,7 +93,7 @@ module VX_issue import VX_gpu_pkg::*; #( ); // Assign transposed dispatch_if - for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin + for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin : g_dispatch_if `ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]); end end diff --git a/hw/rtl/core/VX_issue_top.sv b/hw/rtl/core/VX_issue_top.sv index 0166cf7703..e148b02f64 100644 --- a/hw/rtl/core/VX_issue_top.sv +++ b/hw/rtl/core/VX_issue_top.sv @@ -80,7 +80,7 @@ module VX_issue_top import VX_gpu_pkg::*; #( assign decode_if.data.rs3 = decode_rs3; assign decode_ready = decode_if.ready; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback_if assign writeback_if[i].valid = writeback_valid[i]; assign writeback_if[i].data.uuid = writeback_uuid[i]; assign writeback_if[i].data.wis = writeback_wis[i]; @@ -92,7 +92,7 @@ module VX_issue_top import VX_gpu_pkg::*; #( assign writeback_if[i].data.eop = writeback_eop[i]; end - for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin : g_dispatch_if assign dispatch_valid[i] = dispatch_if[i].valid; assign dispatch_uuid[i] = dispatch_if[i].data.uuid; assign dispatch_wis[i] = dispatch_if[i].data.wis; diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index b880eee2e3..43f787ae94 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -59,14 +59,14 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire req_is_fence, rsp_is_fence; wire [NUM_LANES-1:0][`XLEN-1:0] full_addr; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_full_addr assign full_addr[i] = execute_if.data.rs1_data[i] + `SEXT(`XLEN, execute_if.data.op_args.lsu.offset); end // address type calculation wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_flags wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW]; // is I/O address wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT); @@ -151,13 +151,13 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_addr assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0]; assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT]; end // byte enable formatting - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_byteen_w reg [LSU_WORD_SIZE-1:0] mem_req_byteen_w; always @(*) begin mem_req_byteen_w = '0; @@ -185,7 +185,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end // memory misalignment not supported! - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_missalign wire lsu_req_fire = execute_if.valid && execute_if.ready; `RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0), ("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", @@ -193,7 +193,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end // store data formatting - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_data always @(*) begin mem_req_data[i] = execute_if.data.rs2_data[i]; case (req_align[i]) @@ -215,7 +215,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_pids reg [`LSUQ_IN_SIZE-1:0][PID_BITS:0] pkt_ctr; reg [`LSUQ_IN_SIZE-1:0] pkt_sop, pkt_eop; @@ -274,7 +274,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time)) `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("%t: oops! broken sop request!", $time)) `UNUSED_VAR (mem_rsp_sop) - end else begin + end else begin : g_no_pids assign pkt_waddr = 0; assign mem_rsp_sop_pkt = mem_rsp_sop; assign mem_rsp_eop_pkt = mem_rsp_eop; @@ -424,7 +424,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `endif `endif - for (genvar i = 0; i < NUM_LANES; i++) begin + for (genvar i = 0; i < NUM_LANES; i++) begin : g_rsp_data `ifdef XLEN_64 wire [63:0] rsp_data64 = mem_rsp_data[i]; wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]); @@ -481,6 +481,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .valid_out (commit_no_rsp_if.valid), .ready_out (commit_no_rsp_if.ready) ); + assign commit_no_rsp_if.data.rd = '0; assign commit_no_rsp_if.data.wb = 1'b0; assign commit_no_rsp_if.data.data = commit_rsp_if.data.data; // arbiter MUX optimization diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 8c594f5331..f4a1fc4ae6 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 75f60e63cf..c02e99b29a 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -45,7 +45,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_switches + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), @@ -65,7 +65,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapters + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_adapters VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) @@ -86,7 +86,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .mem_bus_if (lmem_bus_tmp_if) ); - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_lmem_bus_if `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); end end @@ -115,7 +115,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE assign lmem_perf = '0; `endif - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_dcache_if `ASSIGN_VX_MEM_BUS_IF (lsu_dcache_if[i], lsu_mem_if[i]); end @@ -127,9 +127,9 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescers + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), .NUM_REQS (`NUM_LSU_LANES), @@ -182,15 +182,15 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ); end - end else begin + end else begin : g_passthru - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_coalesced_if `ASSIGN_VX_MEM_BUS_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); end end - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapters + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_adapters VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), @@ -212,7 +212,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .mem_bus_if (dcache_bus_tmp_if) ); - for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin + for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin : g_dcache_bus_if `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); end diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv index 1eac9da103..17786a09bd 100644 --- a/hw/rtl/core/VX_mem_unit_top.sv +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -62,7 +62,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( ) lsu_mem_if[`NUM_LSU_BLOCKS](); // LSU memory request - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_mem_req assign lsu_mem_if[i].req_valid = lsu_req_valid[i]; assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i]; assign lsu_mem_if[i].req_data.mask = lsu_req_mask[i]; @@ -75,7 +75,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( end // LSU memory response - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_rsp assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid; assign lsu_rsp_mask[i] = lsu_mem_if[i].rsp_data.mask; assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data; @@ -89,7 +89,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( ) mem_bus_if[DCACHE_NUM_REQS](); // memory request - for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_req assign mem_req_valid[i] = mem_bus_if[i].req_valid; assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; assign mem_req_byteen[i] = mem_bus_if[i].req_data.byteen; @@ -101,7 +101,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( end // memory response - for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_bus_rsp assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index ef98ea79ef..066db15cda 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -81,20 +81,23 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds; assign src_opds = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1}; - for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin - if (ISSUE_WIS != 0) begin + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_data_in + if (ISSUE_WIS != 0) begin : g_wis assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; - end else begin + end else begin : g_no_wis assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; end - if (NUM_BANKS != 1) begin + end + + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx + if (NUM_BANKS != 1) begin : g_banks assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; - end else begin + end else begin : g_1bank assign req_bank_idx[i] = '0; end end - for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_src_valid assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i]; end @@ -232,30 +235,30 @@ module VX_operands import VX_gpu_pkg::*; #( ); wire [PER_BANK_ADDRW-1:0] gpr_wr_addr; - if (ISSUE_WIS != 0) begin + if (ISSUE_WIS != 0) begin : g_gpr_wr_addr assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis}; - end else begin + end else begin : g_gpr_wr_addr_no_wis assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS]; end wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx; - if (NUM_BANKS != 1) begin + if (NUM_BANKS != 1) begin : g_gpr_wr_bank_idx assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0]; - end else begin + end else begin : g_gpr_wr_bank_idx_0 assign gpr_wr_bank_idx = '0; end - for (genvar b = 0; b < NUM_BANKS; ++b) begin : gpr_rams + for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams wire gpr_wr_enabled; - if (BANK_SEL_BITS != 0) begin + if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled assign gpr_wr_enabled = writeback_if.valid && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); - end else begin + end else begin : g_gpr_wr_enabled_1bank assign gpr_wr_enabled = writeback_if.valid; end wire [BYTEENW-1:0] wren; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_wren assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}}; end diff --git a/hw/rtl/core/VX_pe_switch.sv b/hw/rtl/core/VX_pe_switch.sv index 384fce3292..163d76c647 100644 --- a/hw/rtl/core/VX_pe_switch.sv +++ b/hw/rtl/core/VX_pe_switch.sv @@ -54,7 +54,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #( .ready_out (pe_req_ready) ); - for (genvar i = 0; i < PE_COUNT; ++i) begin + for (genvar i = 0; i < PE_COUNT; ++i) begin : g_execute_out_if assign execute_out_if[i].valid = pe_req_valid[i]; assign execute_out_if[i].data = pe_req_data[i]; assign pe_req_ready[i] = execute_out_if[i].ready; @@ -66,7 +66,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #( wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data; wire [PE_COUNT-1:0] pe_rsp_ready; - for (genvar i = 0; i < PE_COUNT; ++i) begin + for (genvar i = 0; i < PE_COUNT; ++i) begin : g_commit_in_if assign pe_rsp_valid[i] = commit_in_if[i].valid; assign pe_rsp_data[i] = commit_in_if[i].data; assign commit_in_if[i].ready = pe_rsp_ready[i]; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 77e00156bf..e7937fe493 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -78,7 +78,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid; wire [`NUM_ALU_BLOCKS-1:0] branch_taken; wire [`NUM_ALU_BLOCKS-1:0][`PC_BITS-1:0] branch_dest; - for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin : g_branch_init assign branch_valid[i] = branch_ctl_if[i].valid; assign branch_wid[i] = branch_ctl_if[i].wid; assign branch_taken[i] = branch_ctl_if[i].taken; @@ -322,7 +322,7 @@ module VX_schedule import VX_gpu_pkg::*; #( ); wire [`NUM_WARPS-1:0][(`NUM_THREADS + `PC_BITS)-1:0] schedule_data; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_schedule_data assign schedule_data[i] = {thread_masks[i], warp_pcs[i]}; end @@ -367,7 +367,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_sizes + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_sizes VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index b4fd5c08c9..1fe9a7f44d 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -66,13 +66,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); wire [PER_ISSUE_WARPS-1:0] stg_valid_in; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in assign stg_valid_in[w] = staging_if[w].valid; end wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready)); - always @(posedge clk) begin + always @(posedge clk) begin : g_perf_stalls if (reset) begin perf_stalls <= '0; end else begin @@ -80,7 +80,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_units_uses always @(posedge clk) begin if (reset) begin perf_units_uses[i] <= '0; @@ -90,7 +90,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_perf_sfu_uses always @(posedge clk) begin if (reset) begin perf_sfu_uses[i] <= '0; @@ -101,7 +101,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `endif - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : stanging_bufs + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stanging_bufs VX_pipe_buffer #( .DATAW (DATAW) ) stanging_buf ( @@ -116,7 +116,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( ); end - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_scoreboard reg [`NUM_REGS-1:0] inuse_regs; reg [NUM_OPDS-1:0] operands_busy, operands_busy_n; @@ -233,7 +233,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in; wire [PER_ISSUE_WARPS-1:0] arb_ready_in; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_arb_data_in assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w]; assign arb_data_in[w] = staging_if[w].data; assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w]; diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index c5542e1375..7955437a68 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -45,7 +45,7 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_push = valid && split.valid && split.is_dvg; wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_stacks + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), .DEPTH (`DV_STACK_SIZE), diff --git a/hw/rtl/core/VX_wctl_unit.sv b/hw/rtl/core/VX_wctl_unit.sv index 132f679d43..bb85b70c92 100644 --- a/hw/rtl/core/VX_wctl_unit.sv +++ b/hw/rtl/core/VX_wctl_unit.sv @@ -50,9 +50,9 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( wire is_bar = (execute_if.data.op_type == `INST_SFU_BAR); wire [`UP(LANE_BITS)-1:0] tid; - if (LANE_BITS != 0) begin + if (LANE_BITS != 0) begin : g_tid assign tid = execute_if.data.tid[0 +: LANE_BITS]; - end else begin + end else begin : g_no_tid assign tid = 0; end @@ -63,7 +63,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( wire not_pred = execute_if.data.op_args.wctl.is_neg; wire [NUM_LANES-1:0] taken; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_taken assign taken[i] = (execute_if.data.rs1_data[i][0] ^ not_pred); end @@ -131,7 +131,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( // wspawn wire [`NUM_WARPS-1:0] wspawn_wmask; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_wspawn_wmask assign wspawn_wmask[i] = (i < rs1_data[`NW_BITS:0]) && (i != execute_if.data.wid); end assign wspawn.valid = is_wspawn; @@ -162,7 +162,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( assign warp_ctl_if.sjoin = sjoin_r; assign warp_ctl_if.barrier = barrier_r; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit_if assign commit_if.data.data[i] = `XLEN'(dvstack_ptr); end diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index b3d1e099a1..2d0d527532 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -58,7 +58,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: `INST_FRM_BITS] = frm; assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof; @@ -92,12 +92,12 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin : fcvt_units + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fcvt_units VX_fcvt_unit #( .LATENCY (`LATENCY_FCVT), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 1a1da2758d..2238307a6f 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -56,7 +56,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; assign data_in[i][64 +: `INST_FRM_BITS] = frm; @@ -89,7 +89,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -98,7 +98,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs acl_fdiv fdiv ( .clk (clk), .areset (1'b0), @@ -116,7 +116,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs wire [3:0] tuser; xil_fdiv fdiv ( .aclk (clk), @@ -138,7 +138,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 9670241b3b..e900e105c0 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -124,7 +124,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end generate - begin : fma + begin : g_fma reg [NUM_LANES-1:0][`XLEN-1:0] result_fma; reg [NUM_LANES-1:0][63:0] result_fadd; @@ -200,7 +200,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fdiv + begin : g_fdiv reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r; reg [NUM_LANES-1:0][63:0] result_fdiv; @@ -239,7 +239,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fsqrt + begin : g_fsqrt reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r; reg [NUM_LANES-1:0][63:0] result_fsqrt; @@ -278,7 +278,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fcvt + begin : g_fcvt reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt; reg [NUM_LANES-1:0][63:0] result_itof; @@ -342,7 +342,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fncp + begin : g_fncp reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp; reg [NUM_LANES-1:0][63:0] result_fclss; @@ -449,7 +449,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( wire [NUM_FPC-1:0][RSP_DATAW-1:0] per_core_data_out; - for (genvar i = 0; i < NUM_FPC; ++i) begin + for (genvar i = 0; i < NUM_FPC; ++i) begin : g_per_core_data_out assign per_core_data_out[i] = {per_core_result[i], per_core_has_fflags[i], per_core_fflags[i], per_core_tag_out[i]}; end diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index a04f96c3b8..af75c8a756 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -83,7 +83,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0][31:0] datab_s; wire [NUM_LANES-1:0][31:0] datac_s; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data assign dataa_s[i] = dataa[i][31:0]; assign datab_s[i] = datab[i][31:0]; assign datac_s[i] = datac[i][31:0]; @@ -111,7 +111,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (per_core_ready_in) ); - for (genvar i = 0; i < NUM_FPCORES; ++i) begin + for (genvar i = 0; i < NUM_FPCORES; ++i) begin : g_per_core_data_in assign { per_core_mask_in[i], per_core_tag_in[i], @@ -211,7 +211,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (div_sqrt_ready_in) ); - for (genvar i = 0; i < 2; ++i) begin + for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_data_in assign { div_sqrt_mask_in[i], div_sqrt_tag_in[i], @@ -271,7 +271,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ); wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; - for (genvar i = 0; i < 2; ++i) begin + for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_arb_data_in assign div_sqrt_arb_data_in[i] = { div_sqrt_result[i], div_sqrt_has_fflags[i], @@ -403,7 +403,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result `ifdef FPU_RV64F reg [`XLEN-1:0] result_w; always @(*) begin diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 8ab5b10b34..e793ff55b2 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -63,7 +63,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( reg [NUM_LANES-1:0][31:0] a, b, c; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_select always @(*) begin if (is_madd) begin // MADD / MSUB / NMADD / NMSUB @@ -86,7 +86,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( end end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = a[i]; assign data_in[i][32 +: 32] = b[i]; assign data_in[i][64 +: 32] = c[i]; @@ -120,7 +120,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -129,7 +129,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fmas + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas acl_fmadd fmadd ( .clk (clk), .areset (1'b0), @@ -147,7 +147,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin : fmas + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas wire [2:0] tuser; xil_fma fma ( @@ -172,7 +172,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin : fmas + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 030ae35573..15a6c8d52c 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -162,7 +162,7 @@ module VX_fpu_fpnew end `UNUSED_VAR (mask_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin : fpnew_cores + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_fpnew_coreses wire [(TAG_WIDTH+1)-1:0] fpu_tag; wire fpu_valid_out_uq; wire fpu_ready_in_uq; @@ -201,7 +201,7 @@ module VX_fpu_fpnew `UNUSED_PIN (busy_o) ); - if (i == 0) begin + if (i == 0) begin : g_output_0 assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag; assign fpu_valid_out = fpu_valid_out_uq; assign fpu_ready_in = fpu_ready_in_uq; diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index e39af42963..21162dd6c9 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -57,7 +57,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; assign data_in[i][64 +: `INST_FRM_BITS] = frm; @@ -91,12 +91,12 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin : fncp_units + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fncp_units VX_fncp_unit #( .LATENCY (`LATENCY_FNCP), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 557e21f203..fbfb86175d 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -55,7 +55,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: `INST_FRM_BITS] = frm; end @@ -87,7 +87,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -96,7 +96,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts acl_fsqrt fsqrt ( .clk (clk), .areset (1'b0), @@ -113,7 +113,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts wire tuser; xil_fsqrt fsqrt ( @@ -134,7 +134,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 61322f673a..8d308ec362 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -67,19 +67,19 @@ module VX_avs_adapter #( wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_bank_sel assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin + end else begin : g_bank_sel assign req_bank_sel = '0; end assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS]; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin : pending_sizes + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_pending_sizes VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( @@ -95,7 +95,7 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_req_queues + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_req_queues VX_fifo_queue #( .DATAW (TAG_WIDTH), .DEPTH (RD_QUEUE_SIZE) @@ -114,7 +114,7 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin : req_out_bufs + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_out_bufs wire valid_out; wire rw_out; wire [DATA_SIZE-1:0] byteen_out; @@ -151,11 +151,7 @@ module VX_avs_adapter #( assign bank_req_ready[i] = ready_out_w && ~req_queue_going_full[i]; end - if (NUM_BANKS > 1) begin - assign mem_req_ready = bank_req_ready[req_bank_sel]; - end else begin - assign mem_req_ready = bank_req_ready; - end + assign mem_req_ready = bank_req_ready[req_bank_sel]; // Responses handling ///////////////////////////////////////////////////// @@ -166,7 +162,7 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out; wire [NUM_BANKS-1:0] rsp_queue_empty; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_rsp_queues + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_rsp_queues VX_fifo_queue #( .DATAW (DATA_WIDTH), .DEPTH (RD_QUEUE_SIZE) @@ -185,8 +181,8 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign rsp_arb_valid_in[i] = !rsp_queue_empty[i]; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rsp_arbs + assign rsp_arb_valid_in[i] = ~rsp_queue_empty[i]; assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]}; assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i]; end diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 25ce1081bf..4755764a49 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -95,9 +95,9 @@ module VX_axi_adapter #( wire [BANK_ADDRW-1:0] req_bank_sel; - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_req_bank_sel assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin + end else begin : g_req_bank_sel_0 assign req_bank_sel = '0; end @@ -106,7 +106,7 @@ module VX_axi_adapter #( reg [NUM_BANKS-1:0] m_axi_aw_ack; reg [NUM_BANKS-1:0] m_axi_w_ack; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; always @(posedge clk) begin @@ -129,20 +129,16 @@ module VX_axi_adapter #( wire axi_write_ready [NUM_BANKS]; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) && (m_axi_wready[i] || m_axi_w_ack[i]); end - // Vortex request ack - if (NUM_BANKS > 1) begin - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - end else begin - assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0]; - end + // request ack + assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; // AXI write request address channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_awid[i] = mem_req_tag; @@ -157,7 +153,7 @@ module VX_axi_adapter #( end // AXI write request data channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; @@ -165,7 +161,7 @@ module VX_axi_adapter #( end // AXI write response channel (ignore) - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_rsp `UNUSED_VAR (m_axi_bvalid[i]) `UNUSED_VAR (m_axi_bid[i]) `UNUSED_VAR (m_axi_bresp[i]) @@ -174,7 +170,7 @@ module VX_axi_adapter #( end // AXI read request channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_arid[i] = mem_req_tag; @@ -196,7 +192,7 @@ module VX_axi_adapter #( `UNUSED_VAR (m_axi_rlast) - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; diff --git a/hw/rtl/libs/VX_bits_insert.sv b/hw/rtl/libs/VX_bits_insert.sv index f0f00a2b5e..dee8141bbe 100644 --- a/hw/rtl/libs/VX_bits_insert.sv +++ b/hw/rtl/libs/VX_bits_insert.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,19 +19,19 @@ module VX_bits_insert #( parameter S = 1, parameter POS = 0 ) ( - input wire [N-1:0] data_in, - input wire [`UP(S)-1:0] ins_in, + input wire [N-1:0] data_in, + input wire [`UP(S)-1:0] ins_in, output wire [N+S-1:0] data_out -); - if (S == 0) begin +); + if (S == 0) begin : g_passthru `UNUSED_VAR (ins_in) assign data_out = data_in; - end else begin - if (POS == 0) begin + end else begin : g_insert + if (POS == 0) begin : g_pos_0 assign data_out = {data_in, ins_in}; - end else if (POS == N) begin + end else if (POS == N) begin : g_pos_N assign data_out = {ins_in, data_in}; - end else begin + end else begin : g_pos assign data_out = {data_in[N-1:POS], ins_in, data_in[POS-1:0]}; end end diff --git a/hw/rtl/libs/VX_bits_remove.sv b/hw/rtl/libs/VX_bits_remove.sv index bc2f60a705..159bd49930 100644 --- a/hw/rtl/libs/VX_bits_remove.sv +++ b/hw/rtl/libs/VX_bits_remove.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,17 +19,19 @@ module VX_bits_remove #( parameter S = 1, parameter POS = 0 ) ( - input wire [N-1:0] data_in, + input wire [N-1:0] data_in, output wire [N-S-1:0] data_out ); `STATIC_ASSERT (((0 == S) || ((POS + S) <= N)), ("invalid parameter")) - - if (POS == 0 || S == 0) begin + + if (S == 0) begin : g_passthru + assign data_out = data_in; + end else if (POS == 0) begin : g_pos_0 assign data_out = data_in[N-1:S]; - end else if ((POS + S) < N) begin - assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]}; - end else begin + end else if ((POS + S) == N) begin : g_pos_N assign data_out = data_in[POS-1:0]; + end else begin : g_pos + assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]}; end `UNUSED_VAR (data_in) diff --git a/hw/rtl/libs/VX_bypass_buffer.sv b/hw/rtl/libs/VX_bypass_buffer.sv index 4eefce440b..14079395b7 100644 --- a/hw/rtl/libs/VX_bypass_buffer.sv +++ b/hw/rtl/libs/VX_bypass_buffer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,30 +25,25 @@ module VX_bypass_buffer #( parameter DATAW = 1, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); - if (PASSTHRU != 0) begin - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign ready_in = ready_out; - assign valid_out = valid_in; - assign data_out = data_in; - end else begin +); + if (PASSTHRU == 0) begin : g_buffer + reg [DATAW-1:0] buffer; reg has_data; always @(posedge clk) begin if (reset) begin has_data <= 0; - end else begin + end else begin if (ready_out) begin has_data <= 0; end else if (~has_data) begin @@ -63,7 +58,16 @@ module VX_bypass_buffer #( assign ready_in = ready_out || ~has_data; assign data_out = has_data ? buffer : data_in; assign valid_out = valid_in || has_data; - end + + end else begin : g_passthru + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + + end else endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 167042a3a8..ff803b9108 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -26,7 +26,7 @@ module VX_cyclic_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -36,7 +36,7 @@ module VX_cyclic_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_arbiter localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 45b37b1dbb..c5c7b8706e 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -27,14 +27,14 @@ module VX_decoder #( input wire [M-1:0] valid_in, output wire [D-1:0][M-1:0] data_out ); - if (MODEL == 1) begin + if (MODEL == 1) begin : g_model1 reg [D-1:0][M-1:0] data_out_w; always @(*) begin data_out_w = '0; data_out_w[data_in] = valid_in; end assign data_out = data_out_w; - end else begin + end else begin : g_model0 assign data_out = (D*M)'(valid_in) << (data_in * M); end diff --git a/hw/rtl/libs/VX_divider.sv b/hw/rtl/libs/VX_divider.sv index 551940da15..b8424843d7 100644 --- a/hw/rtl/libs/VX_divider.sv +++ b/hw/rtl/libs/VX_divider.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,7 @@ module VX_divider #( parameter LATENCY = 0 ) ( input wire clk, - input wire enable, + input wire enable, input wire [N_WIDTH-1:0] numer, input wire [D_WIDTH-1:0] denom, output wire [Q_WIDTH-1:0] quotient, @@ -37,7 +37,7 @@ module VX_divider #( wire [D_WIDTH-1:0] remainder_unqual; lpm_divide divide ( - .clock (clk), + .clock (clk), .clken (enable), .numer (numer), .denom (denom), @@ -47,7 +47,7 @@ module VX_divider #( defparam divide.lpm_type = "LPM_DIVIDE", - divide.lpm_widthn = N_WIDTH, + divide.lpm_widthn = N_WIDTH, divide.lpm_widthd = D_WIDTH, divide.lpm_nrepresentation = N_SIGNED ? "SIGNED" : "UNSIGNED", divide.lpm_drepresentation = D_SIGNED ? "SIGNED" : "UNSIGNED", @@ -62,36 +62,36 @@ module VX_divider #( reg [N_WIDTH-1:0] quotient_unqual; reg [D_WIDTH-1:0] remainder_unqual; - always @(*) begin + always @(*) begin begin if (N_SIGNED && D_SIGNED) begin quotient_unqual = $signed(numer) / $signed(denom); remainder_unqual = $signed(numer) % $signed(denom); - end + end else if (N_SIGNED && !D_SIGNED) begin quotient_unqual = $signed(numer) / denom; remainder_unqual = $signed(numer) % denom; - end + end else if (!N_SIGNED && D_SIGNED) begin quotient_unqual = numer / $signed(denom); remainder_unqual = numer % $signed(denom); - end + end else begin quotient_unqual = numer / denom; - remainder_unqual = numer % denom; + remainder_unqual = numer % denom; end end end - if (LATENCY == 0) begin + if (LATENCY == 0) begin : g_comb assign quotient = quotient_unqual [Q_WIDTH-1:0]; assign remainder = remainder_unqual [R_WIDTH-1:0]; - end else begin + end else begin : g_pipe reg [N_WIDTH-1:0] quotient_pipe [LATENCY-1:0]; reg [D_WIDTH-1:0] remainder_pipe [LATENCY-1:0]; - for (genvar i = 0; i < LATENCY; ++i) begin - always @(posedge clk) begin + for (genvar i = 0; i < LATENCY; ++i) begin : g_reg + always @(posedge clk) begin if (enable) begin quotient_pipe[i] <= (0 == i) ? quotient_unqual : quotient_pipe[i-1]; remainder_pipe[i] <= (0 == i) ? remainder_unqual : remainder_pipe[i-1]; @@ -101,7 +101,7 @@ module VX_divider #( assign quotient = quotient_pipe[LATENCY-1][Q_WIDTH-1:0]; assign remainder = remainder_pipe[LATENCY-1][R_WIDTH-1:0]; - end + end `endif diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 49f37caff8..21ab03ad5e 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -44,10 +44,10 @@ module VX_dp_ram #( `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) `define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin \ - if (INIT_FILE != "") begin \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ initial $readmemh(INIT_FILE, ram); \ - end else begin \ + end else begin : g_value \ initial begin \ for (integer i = 0; i < SIZE; ++i) \ ram[i] = INIT_VALUE; \ @@ -58,17 +58,15 @@ module VX_dp_ram #( `UNUSED_PARAM (RW_ASSERT) `UNUSED_VAR (read) - if (WRENW > 1) begin - `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)) - end + `RUNTIME_ASSERT((((WRENW == 1) ) || ~write) || (| wren), ("%t: invalid write enable mask", $time)) - if (OUT_REG && !READ_ENABLE) begin + if (OUT_REG && !READ_ENABLE) begin : g_out_reg `UNUSED_PARAM (NO_RWCHECK) reg [DATAW-1:0] rdata_r; wire cs = read || write; - if (WRENW != 1) begin + if (WRENW != 1) begin : g_writeen `ifdef QUARTUS - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -86,7 +84,7 @@ module VX_dp_ram #( end end end - end else begin + end else begin : g_no_lutram reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -107,7 +105,7 @@ module VX_dp_ram #( end `else // default synthesis - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -125,7 +123,7 @@ module VX_dp_ram #( end end end - end else begin + end else begin : g_no_lutram reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -145,8 +143,8 @@ module VX_dp_ram #( end end `endif - end else begin - if (LUTRAM != 0) begin + end else begin : g_no_writeen + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -161,7 +159,7 @@ module VX_dp_ram #( end end - end else begin + end else begin : g_no_lutram reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -178,13 +176,13 @@ module VX_dp_ram #( end end assign rdata = rdata_r; - end else begin + end else begin : g_no_out_reg // OUT_REG==0 || READ_ENABLE=1 wire [DATAW-1:0] rdata_w; `ifdef SYNTHESIS - if (WRENW > 1) begin + if (WRENW > 1) begin : g_writeen `ifdef QUARTUS - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -196,8 +194,8 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -209,7 +207,7 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin + end else begin : g_rwcheck reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -225,7 +223,7 @@ module VX_dp_ram #( end `else // default synthesis - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -237,8 +235,8 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -250,7 +248,7 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin + end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -265,9 +263,9 @@ module VX_dp_ram #( end end `endif - end else begin + end else begin : g_no_writeen // (WRENW == 1) - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -276,8 +274,8 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -286,7 +284,7 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin + end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -304,7 +302,7 @@ module VX_dp_ram #( `RAM_INITIALIZATION wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin + for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; end @@ -320,9 +318,7 @@ module VX_dp_ram #( end end - if (LUTRAM || !NO_RWCHECK) begin - assign rdata_w = ram[raddr]; - end else begin + if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -340,13 +336,15 @@ module VX_dp_ram #( end assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin + if (RW_ASSERT) begin : g_rw_assert `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) end + end else begin : g_rdata_with_bypass + assign rdata_w = ram[raddr]; end `endif - if (OUT_REG != 0) begin + if (OUT_REG != 0) begin : g_rdata_req reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (READ_ENABLE && reset) begin @@ -356,7 +354,7 @@ module VX_dp_ram #( end end assign rdata = rdata_r; - end else begin + end else begin : g_rdata_comb assign rdata = rdata_w; end diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 3bfcdeb9cb..5067a4dd32 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -31,7 +31,7 @@ module VX_elastic_buffer #( input wire ready_out, output wire valid_out ); - if (SIZE == 0) begin + if (SIZE == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -40,7 +40,7 @@ module VX_elastic_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (SIZE == 1) begin + end else if (SIZE == 1) begin : g_eb1 VX_pipe_buffer #( .DATAW (DATAW), @@ -56,7 +56,7 @@ module VX_elastic_buffer #( .ready_out (ready_out) ); - end else if (SIZE == 2 && LUTRAM == 0) begin + end else if (SIZE == 2 && LUTRAM == 0) begin : g_eb2 wire valid_out_t; wire [DATAW-1:0] data_out_t; @@ -90,7 +90,7 @@ module VX_elastic_buffer #( .ready_out (ready_out) ); - end else begin + end else begin : g_ebN wire empty, full; diff --git a/hw/rtl/libs/VX_encoder.sv b/hw/rtl/libs/VX_encoder.sv index 85d72ce52b..ed65ed4f6e 100644 --- a/hw/rtl/libs/VX_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -27,17 +27,17 @@ module VX_encoder #( output wire [LN-1:0] data_out, output wire valid_out ); - if (N == 1) begin + if (N == 1) begin : g_n1 assign data_out = 0; assign valid_out = data_in; - end else if (N == 2) begin + end else if (N == 2) begin : g_n2 assign data_out = data_in[!REVERSE]; assign valid_out = (| data_in); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 localparam M = 1 << LN; `IGNORE_UNOPTFLAT_BEGIN wire [LN-1:0][M-1:0] addr; @@ -47,21 +47,19 @@ module VX_encoder #( // base case, also handle padding for non-power of two inputs assign v[0] = REVERSE ? (M'(data_in) << (M - N)) : M'(data_in); - for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin + for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin : g_scan_l localparam SN = 1 << (LN - lvl); localparam SI = M / SN; localparam SW = lvl; - for (genvar s = 0; s < SN; ++s) begin + for (genvar s = 0; s < SN; ++s) begin : g_scan_s `IGNORE_UNOPTFLAT_BEGIN wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]}; `IGNORE_UNOPTFLAT_END - assign v[lvl][s*SI] = (| vs); - - if (lvl == 1) begin + if (lvl == 1) begin : g_lvl_1 assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE]; - end else begin + end else begin : g_lvl_n assign addr[lvl-1][s*SI +: SW] = { vs[!REVERSE], addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1] @@ -73,11 +71,11 @@ module VX_encoder #( assign data_out = addr[LN-1][LN-1:0]; assign valid_out = v[LN][0]; - end else if (MODEL == 2 && REVERSE == 0) begin + end else if (MODEL == 2 && REVERSE == 0) begin : g_model2 - for (genvar j = 0; j < LN; ++j) begin + for (genvar j = 0; j < LN; ++j) begin : g_data_out wire [N-1:0] mask; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_mask assign mask[i] = i[j]; end assign data_out[j] = | (mask & data_in); @@ -85,11 +83,11 @@ module VX_encoder #( assign valid_out = (| data_in); - end else begin + end else begin : g_model0 reg [LN-1:0] index_w; - if (REVERSE != 0) begin + if (REVERSE != 0) begin : g_msb always @(*) begin index_w = 'x; for (integer i = N-1; i >= 0; --i) begin @@ -98,7 +96,7 @@ module VX_encoder #( end end end - end else begin + end else begin : g_lsb always @(*) begin index_w = 'x; for (integer i = 0; i < N; ++i) begin diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index dd772ea731..7eb760e6bb 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -58,7 +58,7 @@ module VX_fifo_queue #( .size (size) ); - if (DEPTH == 1) begin + if (DEPTH == 1) begin : g_depth_1 reg [DATAW-1:0] head_r; @@ -70,11 +70,11 @@ module VX_fifo_queue #( assign data_out = head_r; - end else begin + end else begin : g_depth_n localparam ADDRW = `CLOG2(DEPTH); - if (OUT_REG != 0) begin + if (OUT_REG != 0) begin : g_out_reg wire [DATAW-1:0] dout; reg [DATAW-1:0] dout_r; @@ -128,7 +128,7 @@ module VX_fifo_queue #( assign data_out = dout_r; - end else begin + end else begin : g_no_out_reg reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] wr_ptr_r; diff --git a/hw/rtl/libs/VX_find_first.sv b/hw/rtl/libs/VX_find_first.sv index 18f345855f..43666737ce 100644 --- a/hw/rtl/libs/VX_find_first.sv +++ b/hw/rtl/libs/VX_find_first.sv @@ -33,20 +33,20 @@ module VX_find_first #( wire [TN-1:0][DATAW-1:0] d_n; `IGNORE_WARNINGS_END - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_reverse assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i]; assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i]; end - if (TL < (TN-N)) begin - for (genvar i = TL+N; i < TN; ++i) begin + if (TL < (TN-N)) begin : g_fill + for (genvar i = TL+N; i < TN; ++i) begin : g_i assign s_n[i] = 0; assign d_n[i] = '0; end end - for (genvar j = 0; j < LOGN; ++j) begin - for (genvar i = 0; i < (2**j); ++i) begin + for (genvar j = 0; j < LOGN; ++j) begin : g_scan + for (genvar i = 0; i < (2**j); ++i) begin : g_i assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1]; assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1]; end diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 5cc9a9aab4..5e090ebdda 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -27,7 +27,7 @@ module VX_generic_arbiter #( output wire grant_valid, input wire grant_ready ); - if (TYPE == "P") begin + if (TYPE == "P") begin : g_priority `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -42,7 +42,7 @@ module VX_generic_arbiter #( .grant_onehot (grant_onehot) ); - end else if (TYPE == "R") begin + end else if (TYPE == "R") begin : g_round_robin VX_rr_arbiter #( .NUM_REQS (NUM_REQS) @@ -56,7 +56,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "M") begin + end else if (TYPE == "M") begin : g_matrix VX_matrix_arbiter #( .NUM_REQS (NUM_REQS) @@ -70,7 +70,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "C") begin + end else if (TYPE == "C") begin : g_cyclic VX_cyclic_arbiter #( .NUM_REQS (NUM_REQS) @@ -84,7 +84,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else begin + end else begin : g_invalid `ERROR(("invalid parameter")); diff --git a/hw/rtl/libs/VX_lzc.sv b/hw/rtl/libs/VX_lzc.sv index 2589bf5a76..af2cb650dd 100644 --- a/hw/rtl/libs/VX_lzc.sv +++ b/hw/rtl/libs/VX_lzc.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,18 +23,18 @@ module VX_lzc #( output wire [LOGN-1:0] data_out, output wire valid_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru `UNUSED_PARAM (REVERSE) assign data_out = '0; assign valid_out = data_in; - end else begin + end else begin : g_lzc wire [N-1:0][LOGN-1:0] indices; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_indices assign indices[i] = REVERSE ? LOGN'(i) : LOGN'(N-1-i); end @@ -42,7 +42,7 @@ module VX_lzc #( .N (N), .DATAW (LOGN), .REVERSE (!REVERSE) - ) find_first ( + ) find_first ( .data_in (indices), .valid_in (data_in), .data_out (data_out), @@ -50,6 +50,6 @@ module VX_lzc #( ); end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index eff4eb7e13..2840ef43ec 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -26,7 +26,7 @@ module VX_matrix_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -36,32 +36,30 @@ module VX_matrix_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_arbiter reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; wire [NUM_REQS-1:0] grant; - for (genvar r = 0; r < NUM_REQS; ++r) begin - for (genvar c = 0; c < NUM_REQS; ++c) begin - if (r > c) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_pri_r + for (genvar c = 0; c < NUM_REQS; ++c) begin : g_pri_c + if (r > c) begin : g_row assign pri[r][c] = requests[c] && state[c][r]; - end - else if (r < c) begin + end else if (r < c) begin : g_col assign pri[r][c] = requests[c] && !state[r][c]; - end - else begin + end else begin : g_equal assign pri[r][c] = 0; end end end - for (genvar r = 0; r < NUM_REQS; ++r) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_grant assign grant[r] = requests[r] && ~(| pri[r]); end - for (genvar r = 0; r < NUM_REQS; ++r) begin - for (genvar c = r + 1; c < NUM_REQS; ++c) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_state_r + for (genvar c = r + 1; c < NUM_REQS; ++c) begin : g_state_c always @(posedge clk) begin if (reset) begin state[r][c] <= '0; diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 6ee6060b83..5f32e1aa15 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -76,7 +76,7 @@ module VX_mem_adapter #( `UNUSED_VAR (mem_rsp_tag_out) - if (DST_LDATAW > SRC_LDATAW) begin + if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -88,12 +88,12 @@ module VX_mem_adapter #( wire [P-1:0][SRC_DATA_WIDTH-1:0] mem_rsp_data_out_w = mem_rsp_data_out; - if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin + if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in_qual) assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin + end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in_qual; end @@ -125,7 +125,7 @@ module VX_mem_adapter #( assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]); assign mem_rsp_ready_out = mem_rsp_ready_in_w; - end else if (DST_LDATAW < SRC_LDATAW) begin + end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data reg [D-1:0] req_ctr, rsp_ctr; @@ -173,12 +173,12 @@ module VX_mem_adapter #( wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr}; - if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin + if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in_qual) assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin + end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in_qual; end @@ -194,17 +194,17 @@ module VX_mem_adapter #( assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out); assign mem_rsp_ready_out = mem_rsp_ready_in_w; - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) - if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin + if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in) assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin + end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in; end diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 84c417bd3c..55cad2df7d 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -115,11 +115,11 @@ module VX_mem_coalescer #( logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; - for (genvar i = 0; i < NUM_REQS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin : g_in_addr_offset assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; end - for (genvar i = 0; i < OUT_REQS; ++i) begin + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_seed_gen wire [DATA_RATIO-1:0] batch_mask; wire [DATA_RATIO_W-1:0] batch_idx; @@ -135,16 +135,19 @@ module VX_mem_coalescer #( ); wire [DATA_RATIO-1:0][OUT_ADDR_WIDTH-1:0] addr_base; - wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; - for (genvar j = 0; j < DATA_RATIO; ++j) begin + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_addr_base assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W]; + end + + wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags assign req_flags[j] = in_req_flags[DATA_RATIO * i + j]; end assign seed_addr_n[i] = addr_base[batch_idx]; assign seed_flags_n[i] = req_flags[batch_idx]; - for (genvar j = 0; j < DATA_RATIO; ++j) begin + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_addr_matches_n assign addr_matches_n[i * DATA_RATIO + j] = (addr_base[j] == seed_addr_n[i]); end end @@ -291,12 +294,16 @@ module VX_mem_coalescer #( assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout; wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n; - wire [NUM_REQS-1:0] in_rsp_mask_n; + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_in_rsp_data_n + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_j + assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; + end + end - for (genvar i = 0; i < OUT_REQS; ++i) begin - for (genvar j = 0; j < DATA_RATIO; ++j) begin + wire [NUM_REQS-1:0] in_rsp_mask_n; + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_in_rsp_mask_n + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_j assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j]; - assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; end end @@ -310,11 +317,15 @@ module VX_mem_coalescer #( wire [`UP(UUID_WIDTH)-1:0] out_req_uuid; wire [`UP(UUID_WIDTH)-1:0] out_rsp_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_out_req_uuid assign out_req_uuid = out_req_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; - assign out_rsp_uuid = out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_out_req_uuid_0 assign out_req_uuid = '0; + end + + if (UUID_WIDTH != 0) begin : g_out_rsp_uuid + assign out_rsp_uuid = out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin : g_out_rsp_uuid_0 assign out_rsp_uuid = '0; end diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 1dddaba111..9dada16bca 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -160,9 +160,9 @@ module VX_mem_scheduler #( wire reqq_ready_in; wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_reqq_tag_u_uuid assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - end else begin + end else begin : g_reqq_tag_u assign reqq_tag_u = ibuf_waddr; end @@ -220,7 +220,7 @@ module VX_mem_scheduler #( // Handle memory coalescing /////////////////////////////////////////////// - if (COALESCE_ENABLE) begin + if (COALESCE_ENABLE) begin : g_coalescer VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), @@ -273,8 +273,7 @@ module VX_mem_scheduler #( .out_rsp_ready (mem_rsp_ready) ); - end else begin - + end else begin : g_no_coalescer assign reqq_valid_s = reqq_valid; assign reqq_mask_s = reqq_mask; assign reqq_rw_s = reqq_rw; @@ -303,16 +302,16 @@ module VX_mem_scheduler #( wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; - for (genvar i = 0; i < MEM_BATCHES; ++i) begin - for (genvar j = 0; j < MEM_CHANNELS; ++j) begin + for (genvar i = 0; i < MEM_BATCHES; ++i) begin : g_mem_req_data_b + for (genvar j = 0; j < MEM_CHANNELS; ++j) begin : g_j localparam r = i * MEM_CHANNELS + j; - if (r < MERGED_REQS) begin + if (r < MERGED_REQS) begin : g_valid assign mem_req_mask_b[i][j] = reqq_mask_s[r]; assign mem_req_byteen_b[i][j] = reqq_byteen_s[r]; assign mem_req_addr_b[i][j] = reqq_addr_s[r]; assign mem_req_flags_b[i][j] = reqq_flags_s[r]; assign mem_req_data_b[i][j] = reqq_data_s[r]; - end else begin + end else begin : g_extra assign mem_req_mask_b[i][j] = 0; assign mem_req_byteen_b[i][j] = '0; assign mem_req_addr_b[i][j] = '0; @@ -329,7 +328,7 @@ module VX_mem_scheduler #( assign mem_req_flags_s = mem_req_flags_b[req_batch_idx]; assign mem_req_data_s = mem_req_data_b[req_batch_idx]; - if (MEM_BATCHES != 1) begin + if (MEM_BATCHES != 1) begin : g_batch reg [MEM_BATCH_BITS-1:0] req_batch_idx_r; wire is_degenerate_batch = ~(| mem_req_mask_s); @@ -354,7 +353,7 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs; wire [MEM_BATCH_BITS-1:0] req_batch_idx_last; - for (genvar i = 0; i < MEM_BATCHES; ++i) begin + for (genvar i = 0; i < MEM_BATCHES; ++i) begin : g_req_batch assign req_batch_valids[i] = (| mem_req_mask_b[i]); assign req_batch_idxs[i] = MEM_BATCH_BITS'(i); end @@ -375,7 +374,7 @@ module VX_mem_scheduler #( assign req_sent_all = mem_req_ready_b && (req_batch_idx_r == req_batch_idx_last); assign mem_req_tag_s = {reqq_tag_s, req_batch_idx}; - end else begin + end else begin : g_no_batch assign mem_req_valid_s = reqq_valid_s; assign req_batch_idx = '0; @@ -407,13 +406,13 @@ module VX_mem_scheduler #( wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - if (CORE_BATCHES > 1) begin + if (CORE_BATCHES > 1) begin : g_rsp_batch_idx assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; - end else begin + end else begin : g_rsp_batch_idx_0 assign rsp_batch_idx = '0; end - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; @@ -434,7 +433,7 @@ module VX_mem_scheduler #( end end - if (RSP_PARTIAL != 0) begin + if (RSP_PARTIAL != 0) begin : g_rsp_partial reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; @@ -451,14 +450,14 @@ module VX_mem_scheduler #( assign crsp_mask = curr_mask; assign crsp_sop = rsp_sop_r[ibuf_raddr]; - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam j = r % CORE_CHANNELS; assign crsp_data[r] = mem_rsp_data_s[j]; end assign mem_rsp_ready_s = crsp_ready; - end else begin + end else begin : g_rsp_full reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; reg [CORE_BATCHES-1:00][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; @@ -486,7 +485,7 @@ module VX_mem_scheduler #( assign crsp_mask = rsp_orig_mask[ibuf_raddr]; assign crsp_sop = 1'b1; - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; assign crsp_data[r] = rsp_store_n[i][j]; @@ -496,9 +495,9 @@ module VX_mem_scheduler #( end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_crsp_tag assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout}; - end else begin + end else begin : g_crsp_tag_0 assign crsp_tag = ibuf_dout; end @@ -524,9 +523,9 @@ module VX_mem_scheduler #( `ifdef SIMULATION wire [`UP(UUID_WIDTH)-1:0] req_dbg_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_dbg_uuid assign req_dbg_uuid = core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_dbg_uuid_0 assign req_dbg_uuid = '0; end @@ -566,11 +565,11 @@ module VX_mem_scheduler #( wire [`UP(UUID_WIDTH)-1:0] mem_rsp_dbg_uuid; wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_dbg_uuid assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_dbg_uuid_0 assign mem_req_dbg_uuid = '0; assign mem_rsp_dbg_uuid = '0; assign rsp_dbg_uuid = '0; diff --git a/hw/rtl/libs/VX_multiplier.sv b/hw/rtl/libs/VX_multiplier.sv index 2f046779fc..11bf13a9f4 100644 --- a/hw/rtl/libs/VX_multiplier.sv +++ b/hw/rtl/libs/VX_multiplier.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,7 +21,7 @@ module VX_multiplier #( parameter SIGNED = 0, parameter LATENCY = 0 ) ( - input wire clk, + input wire clk, input wire enable, input wire [A_WIDTH-1:0] dataa, input wire [B_WIDTH-1:0] datab, @@ -29,15 +29,15 @@ module VX_multiplier #( ); wire [R_WIDTH-1:0] prod_w; - if (SIGNED != 0) begin + if (SIGNED != 0) begin : g_prod_s assign prod_w = R_WIDTH'($signed(dataa) * $signed(datab)); - end else begin + end else begin : g_prod_u assign prod_w = R_WIDTH'(dataa * datab); end - - if (LATENCY == 0) begin + + if (LATENCY == 0) begin : g_passthru assign result = prod_w; - end else begin + end else begin : g_latency reg [LATENCY-1:0][R_WIDTH-1:0] prod_r; always @(posedge clk) begin if (enable) begin @@ -46,8 +46,8 @@ module VX_multiplier #( prod_r[i] <= prod_r[i-1]; end end - end - assign result = prod_r[LATENCY-1]; + end + assign result = prod_r[LATENCY-1]; end endmodule diff --git a/hw/rtl/libs/VX_mux.sv b/hw/rtl/libs/VX_mux.sv index f0bc78cae6..19a06600f0 100644 --- a/hw/rtl/libs/VX_mux.sv +++ b/hw/rtl/libs/VX_mux.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,13 +19,13 @@ module VX_mux #( parameter N = 1, parameter LN = `LOG2UP(N) ) ( - input wire [N-1:0][DATAW-1:0] data_in, - input wire [LN-1:0] sel_in, + input wire [N-1:0][DATAW-1:0] data_in, + input wire [LN-1:0] sel_in, output wire [DATAW-1:0] data_out -); - if (N > 1) begin +); + if (N > 1) begin : g_mux assign data_out = data_in[sel_in]; - end else begin + end else begin : g_passthru `UNUSED_VAR (sel_in) assign data_out = data_in; end diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index e13186015a..8b97692f5a 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -24,13 +24,13 @@ module VX_onehot_mux #( input wire [N-1:0] sel_in, output wire [DATAW-1:0] data_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru `UNUSED_VAR (sel_in) assign data_out = data_in; - end else if (LUT_OPT && N == 2) begin + end else if (LUT_OPT && N == 2) begin : g_lut2 `UNUSED_VAR (sel_in) assign data_out = sel_in[0] ? data_in[0] : data_in[1]; - end else if (LUT_OPT && N == 3) begin + end else if (LUT_OPT && N == 3) begin : g_lut3 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -41,7 +41,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 4) begin + end else if (LUT_OPT && N == 4) begin : g_lut4 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -53,7 +53,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 5) begin + end else if (LUT_OPT && N == 5) begin : g_lut5 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -66,7 +66,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 6) begin + end else if (LUT_OPT && N == 6) begin : g_lut6 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -80,7 +80,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 7) begin + end else if (LUT_OPT && N == 7) begin : g_lut7 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -95,7 +95,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 8) begin + end else if (LUT_OPT && N == 8) begin : g_lut8 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -111,19 +111,19 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 wire [N-1:0][DATAW-1:0] mask; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_mask assign mask[i] = {DATAW{sel_in[i]}} & data_in[i]; end - for (genvar i = 0; i < DATAW; ++i) begin + for (genvar i = 0; i < DATAW; ++i) begin : g_data_out wire [N-1:0] gather; - for (genvar j = 0; j < N; ++j) begin + for (genvar j = 0; j < N; ++j) begin : g_gather assign gather[j] = mask[j][i]; end assign data_out[i] = (| gather); end - end else if (MODEL == 2) begin + end else if (MODEL == 2) begin : g_model2 VX_find_first #( .N (N), .DATAW (DATAW) @@ -133,7 +133,7 @@ module VX_onehot_mux #( .data_out (data_out), `UNUSED_PIN (valid_out) ); - end else if (MODEL == 3) begin + end else if (MODEL == 3) begin : g_model3 reg [DATAW-1:0] data_out_w; always @(*) begin data_out_w = 'x; diff --git a/hw/rtl/libs/VX_onehot_shift.sv b/hw/rtl/libs/VX_onehot_shift.sv index 5ab5712a21..3222e30671 100644 --- a/hw/rtl/libs/VX_onehot_shift.sv +++ b/hw/rtl/libs/VX_onehot_shift.sv @@ -22,8 +22,8 @@ module VX_onehot_shift #( input wire [M-1:0] data_in1, output wire [N*M-1:0] data_out ); - for (genvar i = 0; i < M; ++i) begin - for (genvar j = 0; j < N; ++j) begin + for (genvar i = 0; i < M; ++i) begin : g_i + for (genvar j = 0; j < N; ++j) begin : g_j assign data_out[i*N + j] = data_in1[i] & data_in0[j]; end end diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index d96db52f02..58fced4103 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -79,7 +79,7 @@ module VX_pe_serializer #( assign pe_enable = enable; - if (NUM_LANES != NUM_PES) begin + if (NUM_LANES != NUM_PES) begin : g_serialize localparam BATCH_SIZE = NUM_LANES / NUM_PES; localparam BATCH_SIZEW = `LOG2UP(BATCH_SIZE); @@ -87,7 +87,7 @@ module VX_pe_serializer #( reg [BATCH_SIZEW-1:0] batch_in_idx, batch_out_idx; reg batch_in_done, batch_out_done; - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_pe_data_out_w assign pe_data_out_w[i] = data_in[batch_in_idx * NUM_PES + i]; end @@ -125,7 +125,7 @@ module VX_pe_serializer #( assign data_out_u = data_out_n; assign tag_out_u = pe_tag_in; - end else begin + end else begin : g_passthru assign pe_data_out_w = data_in; diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 610c2bc04f..475bbb36c1 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -35,7 +35,7 @@ module VX_pending_size #( `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) - if (SIZE == 1) begin + if (SIZE == 1) begin : g_size1 reg size_r; @@ -59,12 +59,12 @@ module VX_pending_size #( assign alm_full = 1'b1; assign size = size_r; - end else begin + end else begin : g_sizeN reg empty_r, alm_empty_r; reg full_r, alm_full_r; - if (INCRW != 1 || DECRW != 1) begin + if (INCRW != 1 || DECRW != 1) begin : g_wide_step localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); @@ -92,7 +92,7 @@ module VX_pending_size #( assign size = size_r; - end else begin + end else begin : g_single_step localparam ADDRW = `LOG2UP(SIZE); @@ -124,7 +124,7 @@ module VX_pending_size #( end end - if (SIZE > 2) begin + if (SIZE > 2) begin : g_sizeN wire is_empty_n = (used_r == ADDRW'(1)); wire is_full_n = (used_r == ADDRW'(SIZE-1)); @@ -152,7 +152,7 @@ module VX_pending_size #( end end - end else begin + end else begin : g_size2 always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv index 6ed6cf8eca..d71a78dacb 100644 --- a/hw/rtl/libs/VX_pipe_buffer.sv +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -37,13 +37,13 @@ module VX_pipe_buffer #( input wire ready_out, output wire valid_out ); - if (DEPTH == 0) begin + if (DEPTH == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; assign valid_out = valid_in; assign data_out = data_in; - end else begin + end else begin : g_register wire [DEPTH:0] valid; `IGNORE_UNOPTFLAT_BEGIN wire [DEPTH:0] ready; @@ -54,7 +54,7 @@ module VX_pipe_buffer #( assign data[0] = data_in; assign ready_in = ready[0]; - for (genvar i = 0; i < DEPTH; ++i) begin + for (genvar i = 0; i < DEPTH; ++i) begin : g_pipe_regs assign ready[i] = (ready[i+1] || ~valid[i+1]); VX_pipe_register #( .DATAW (1 + DATAW), diff --git a/hw/rtl/libs/VX_pipe_register.sv b/hw/rtl/libs/VX_pipe_register.sv index 69184898fb..ef19cb58b9 100644 --- a/hw/rtl/libs/VX_pipe_register.sv +++ b/hw/rtl/libs/VX_pipe_register.sv @@ -26,13 +26,13 @@ module VX_pipe_register #( input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out ); - if (DEPTH == 0) begin + if (DEPTH == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (enable) assign data_out = data_in; - end else if (DEPTH == 1) begin - if (RESETW == 0) begin + end else if (DEPTH == 1) begin : g_depth1 + if (RESETW == 0) begin : g_no_reset `UNUSED_VAR (reset) reg [DATAW-1:0] value; @@ -42,18 +42,7 @@ module VX_pipe_register #( end end assign data_out = value; - end else if (RESETW == DATAW) begin - reg [DATAW-1:0] value; - - always @(posedge clk) begin - if (reset) begin - value <= INIT_VALUE; - end else if (enable) begin - value <= data_in; - end - end - assign data_out = value; - end else begin + end else if (RESETW < DATAW) begin : g_partial_reset reg [DATAW-RESETW-1:0] value_d; reg [RESETW-1:0] value_r; @@ -71,12 +60,23 @@ module VX_pipe_register #( end end assign data_out = {value_r, value_d}; + end else begin : g_full_reset + reg [DATAW-1:0] value; + + always @(posedge clk) begin + if (reset) begin + value <= INIT_VALUE; + end else if (enable) begin + value <= data_in; + end + end + assign data_out = value; end - end else begin + end else begin : g_recursive wire [DEPTH:0][DATAW-1:0] data_delayed; assign data_delayed[0] = data_in; - - for (genvar i = 1; i <= DEPTH; ++i) begin + + for (genvar i = 1; i <= DEPTH; ++i) begin : g_pipe_reg VX_pipe_register #( .DATAW (DATAW), .RESETW (RESETW), diff --git a/hw/rtl/libs/VX_popcount.sv b/hw/rtl/libs/VX_popcount.sv index 3d94dd00f8..fa8c49099e 100644 --- a/hw/rtl/libs/VX_popcount.sv +++ b/hw/rtl/libs/VX_popcount.sv @@ -100,11 +100,11 @@ module VX_popcount #( `elsif QUARTUS assign data_out = $countones(data_in); `else - if (N == 1) begin + if (N == 1) begin : g_passthru assign data_out = data_in; - end else if (N <= 3) begin + end else if (N <= 3) begin : g_popcount3 reg [2:0] t_in; wire [1:0] t_out; @@ -115,7 +115,7 @@ module VX_popcount #( VX_popcount32 pc32(t_in, t_out); assign data_out = t_out[M-1:0]; - end else if (N <= 6) begin + end else if (N <= 6) begin : g_popcount6 reg [5:0] t_in; wire [2:0] t_out; @@ -126,7 +126,7 @@ module VX_popcount #( VX_popcount63 pc63(t_in, t_out); assign data_out = t_out[M-1:0]; - end else if (N <= 9) begin + end else if (N <= 9) begin : g_popcount9 reg [8:0] t_in; wire [4:0] t1_out; @@ -140,7 +140,7 @@ module VX_popcount #( VX_sum33 sum33(t1_out[2:0], {1'b0, t1_out[4:3]}, t2_out); assign data_out = t2_out[M-1:0]; - end else if (N <= 12) begin + end else if (N <= 12) begin : g_popcount12 reg [11:0] t_in; wire [5:0] t1_out; @@ -154,7 +154,7 @@ module VX_popcount #( VX_sum33 sum33(t1_out[2:0], t1_out[5:3], t2_out); assign data_out = t2_out[M-1:0]; - end else if (N <= 18) begin + end else if (N <= 18) begin : g_popcount18 reg [17:0] t_in; wire [8:0] t1_out; @@ -171,7 +171,7 @@ module VX_popcount #( VX_popcount32 pc32c({t1_out[2], t1_out[5], t1_out[8]}, t2_out[5:4]); assign data_out = {2'b0,t2_out[1:0]} + {1'b0,t2_out[3:2],1'b0} + {t2_out[5:4],2'b0}; - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 localparam PN = 1 << `CLOG2(N); localparam LOGPN = `CLOG2(PN); @@ -204,7 +204,7 @@ module VX_popcount #( assign data_out = tmp[LOGPN-1][0]; - end else begin + end else begin : g_model2 reg [M-1:0] cnt_w; diff --git a/hw/rtl/libs/VX_priority_arbiter.sv b/hw/rtl/libs/VX_priority_arbiter.sv index 13a9401780..de5a3b3b11 100644 --- a/hw/rtl/libs/VX_priority_arbiter.sv +++ b/hw/rtl/libs/VX_priority_arbiter.sv @@ -23,13 +23,13 @@ module VX_priority_arbiter #( output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_encoder VX_priority_encoder #( .N (NUM_REQS) diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index a3928492a9..444c406834 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -27,34 +27,34 @@ module VX_priority_encoder #( ); wire [N-1:0] reversed; - if (REVERSE != 0) begin - for (genvar i = 0; i < N; ++i) begin + if (REVERSE != 0) begin : g_reverse + for (genvar i = 0; i < N; ++i) begin : g_i assign reversed[N-i-1] = data_in[i]; end - end else begin + end else begin : g_no_reverse assign reversed = data_in; end - if (N == 1) begin + if (N == 1) begin : g_n1 assign onehot_out = reversed; assign index_out = '0; assign valid_out = reversed; - end else if (N == 2) begin + end else if (N == 2) begin : g_n2 assign onehot_out = {reversed[1] && ~reversed[0], reversed[0]}; assign index_out = ~reversed[0]; assign valid_out = (| reversed); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 `IGNORE_UNOPTFLAT_BEGIN wire [N-1:0] higher_pri_regs; `IGNORE_UNOPTFLAT_END assign higher_pri_regs[0] = 1'b0; - for (genvar i = 1; i < N; ++i) begin + for (genvar i = 1; i < N; ++i) begin : g_higher_pri_regs assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; end assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; @@ -68,7 +68,7 @@ module VX_priority_encoder #( .valid_out (valid_out) ); - end else if (MODEL == 2) begin + end else if (MODEL == 2) begin : g_model2 wire [N-1:0] scan_lo; @@ -91,7 +91,7 @@ module VX_priority_encoder #( assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - end else if (MODEL == 3) begin + end else if (MODEL == 3) begin : g_model3 assign onehot_out = reversed & -reversed; @@ -104,7 +104,7 @@ module VX_priority_encoder #( .valid_out (valid_out) ); - end else begin + end else begin : g_model0 reg [LN-1:0] index_w; reg [N-1:0] onehot_w; diff --git a/hw/rtl/libs/VX_reduce.sv b/hw/rtl/libs/VX_reduce.sv index ac01175672..15c0f0228d 100644 --- a/hw/rtl/libs/VX_reduce.sv +++ b/hw/rtl/libs/VX_reduce.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,7 +14,7 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_reduce #( +module VX_reduce #( parameter DATAW_IN = 1, parameter DATAW_OUT = DATAW_IN, parameter N = 1, @@ -23,9 +23,9 @@ module VX_reduce #( input wire [N-1:0][DATAW_IN-1:0] data_in, output wire [DATAW_OUT-1:0] data_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru assign data_out = DATAW_OUT'(data_in[0]); - end else begin + end else begin : g_reduce localparam int N_A = N / 2; localparam int N_B = N - N_A; @@ -33,40 +33,46 @@ module VX_reduce #( wire [N_B-1:0][DATAW_IN-1:0] in_B; wire [DATAW_OUT-1:0] out_A, out_B; - for (genvar i = 0; i < N_A; i++) begin + for (genvar i = 0; i < N_A; i++) begin : g_in_A assign in_A[i] = data_in[i]; end - for (genvar i = 0; i < N_B; i++) begin + for (genvar i = 0; i < N_B; i++) begin : g_in_B assign in_B[i] = data_in[N_A + i]; end VX_reduce #( - .DATAW_IN (DATAW_IN), + .DATAW_IN (DATAW_IN), .DATAW_OUT (DATAW_OUT), .N (N_A), .OP (OP) ) reduce_A ( - .data_in (in_A), + .data_in (in_A), .data_out (out_A) ); VX_reduce #( - .DATAW_IN (DATAW_IN), + .DATAW_IN (DATAW_IN), .DATAW_OUT (DATAW_OUT), .N (N_B), .OP (OP) ) reduce_B ( - .data_in (in_B), + .data_in (in_B), .data_out (out_B) ); - if (OP == "+") assign data_out = out_A + out_B; - else if (OP == "^") assign data_out = out_A ^ out_B; - else if (OP == "&") assign data_out = out_A & out_B; - else if (OP == "|") assign data_out = out_A | out_B; - else `ERROR(("invalid parameter")); + if (OP == "+") begin : g_plus + assign data_out = out_A + out_B; + end else if (OP == "^") begin : g_xor + assign data_out = out_A ^ out_B; + end else if (OP == "&") begin : g_and + assign data_out = out_A & out_B; + end else if (OP == "|") begin : g_or + assign data_out = out_A | out_B; + end else begin : g_error + `ERROR(("invalid parameter")); + end end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_reset_relay.sv b/hw/rtl/libs/VX_reset_relay.sv index d7e735c255..0e2a7f4caf 100644 --- a/hw/rtl/libs/VX_reset_relay.sv +++ b/hw/rtl/libs/VX_reset_relay.sv @@ -22,19 +22,19 @@ module VX_reset_relay #( input wire reset, output wire [N-1:0] reset_o ); - if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin + if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin : g_relay localparam F = `UP(MAX_FANOUT); localparam R = N / F; `PRESERVE_NET reg [R-1:0] reset_r; - for (genvar i = 0; i < R; ++i) begin + for (genvar i = 0; i < R; ++i) begin : g_reset_r always @(posedge clk) begin reset_r[i] <= reset; end end - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_reset_o assign reset_o[i] = reset_r[i / F]; end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) assign reset_o = {N{reset}}; end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 894f4e3120..3831238dc7 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -28,7 +28,7 @@ module VX_rr_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -38,7 +38,7 @@ module VX_rr_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else if (LUT_OPT && NUM_REQS == 2) begin + end else if (LUT_OPT && NUM_REQS == 2) begin : g_lut2 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -66,7 +66,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 3) begin + end else if (LUT_OPT && NUM_REQS == 3) begin : g_lut3 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -99,7 +99,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 4) begin + end else if (LUT_OPT && NUM_REQS == 4) begin : g_lut4 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -139,7 +139,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 5) begin + end else if (LUT_OPT && NUM_REQS == 5) begin : g_lut5 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -188,7 +188,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 6) begin + end else if (LUT_OPT && NUM_REQS == 6) begin : g_lut6 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -248,7 +248,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 7) begin + end else if (LUT_OPT && NUM_REQS == 7) begin : g_lut7 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -321,7 +321,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 8) begin + end else if (LUT_OPT && NUM_REQS == 8) begin : g_lut8 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -409,7 +409,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 `IGNORE_UNOPTFLAT_BEGIN wire [NUM_REQS-1:0] masked_pri_reqs, unmasked_pri_reqs; @@ -419,12 +419,12 @@ module VX_rr_arbiter #( wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; assign masked_pri_reqs[0] = 1'b0; - for (genvar i = 1; i < NUM_REQS; ++i) begin + for (genvar i = 1; i < NUM_REQS; ++i) begin : g_masked_pri_reqs assign masked_pri_reqs[i] = masked_pri_reqs[i-1] | masked_reqs[i-1]; end assign unmasked_pri_reqs[0] = 1'b0; - for (genvar i = 1; i < NUM_REQS; ++i) begin + for (genvar i = 1; i < NUM_REQS; ++i) begin : g_unmasked_pri_reqs assign unmasked_pri_reqs[i] = unmasked_pri_reqs[i-1] | requests[i-1]; end @@ -456,12 +456,12 @@ module VX_rr_arbiter #( .valid_out(grant_valid) ); - end else if (MODEL == 2) begin + end else if (MODEL == 2) begin : g_model2 reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] grant_table; reg [LOG_NUM_REQS-1:0] state; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_grant_table always @(*) begin grant_table[i] = 'x; for (integer j = NUM_REQS-1; j >= 0; --j) begin diff --git a/hw/rtl/libs/VX_scan.sv b/hw/rtl/libs/VX_scan.sv index 48de2964a9..6effd58146 100644 --- a/hw/rtl/libs/VX_scan.sv +++ b/hw/rtl/libs/VX_scan.sv @@ -32,31 +32,31 @@ module VX_scan #( `IGNORE_UNOPTFLAT_END // reverses bits - if (REVERSE != 0) begin + if (REVERSE != 0) begin : g_data_in_reverse assign t[0] = data_in; - end else begin + end else begin : g_data_in_no_reverse assign t[0] = {<<{data_in}}; end // optimize for the common case of small and-scans - if ((N == 2) && (OP == "&")) begin + if ((N == 2) && (OP == "&")) begin : g_scan_n2_and assign t[LOGN] = {t[0][1], &t[0][1:0]}; - end else if ((N == 3) && (OP == "&")) begin + end else if ((N == 3) && (OP == "&")) begin : g_scan_n3_and assign t[LOGN] = {t[0][2], &t[0][2:1], &t[0][2:0]}; - end else if ((N == 4) && (OP == "&")) begin + end else if ((N == 4) && (OP == "&")) begin : g_scan_n4_and assign t[LOGN] = {t[0][3], &t[0][3:2], &t[0][3:1], &t[0][3:0]}; - end else begin + end else begin : g_scan // general case wire [N-1:0] fill; - for (genvar i = 0; i < LOGN; ++i) begin + for (genvar i = 0; i < LOGN; ++i) begin : g_i wire [N-1:0] shifted = N'({fill, t[i]} >> (1< 1) begin + if (N > 1) begin : g_switch reg req_out_r [N]; reg rsp_out_r; @@ -34,7 +34,7 @@ module VX_scope_switch #( req_out_r[i] <= 0; end rsp_out_r <= 0; - end else begin + end else begin for (integer i = 0; i < N; ++i) begin req_out_r[i] <= req_in; end @@ -48,8 +48,8 @@ module VX_scope_switch #( assign req_out = req_out_r; assign rsp_out = rsp_out_r; - - end else begin + + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_serial_div.sv b/hw/rtl/libs/VX_serial_div.sv index e7af40009c..593be2d9a1 100644 --- a/hw/rtl/libs/VX_serial_div.sv +++ b/hw/rtl/libs/VX_serial_div.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,7 +29,7 @@ module VX_serial_div #( input wire is_signed, input wire [LANES-1:0][WIDTHN-1:0] numer, - input wire [LANES-1:0][WIDTHD-1:0] denom, + input wire [LANES-1:0][WIDTHD-1:0] denom, output wire [LANES-1:0][WIDTHQ-1:0] quotient, output wire [LANES-1:0][WIDTHR-1:0] remainder @@ -49,14 +49,14 @@ module VX_serial_div #( reg [CNTRW-1:0] cntr; reg busy_r; - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_setup wire negate_numer = is_signed && numer[i][WIDTHN-1]; wire negate_denom = is_signed && denom[i][WIDTHD-1]; assign numer_qual[i] = negate_numer ? -$signed(numer[i]) : numer[i]; assign denom_qual[i] = negate_denom ? -$signed(denom[i]) : denom[i]; assign sub_result[i] = working[i][WIDTHN + MIN_ND : WIDTHN] - denom_r[i]; end - + always @(posedge clk) begin if (reset) begin busy_r <= 0; @@ -74,18 +74,21 @@ module VX_serial_div #( end end - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_div always @(posedge clk) begin if (strobe) begin working[i] <= {{WIDTHD{1'b0}}, numer_qual[i], 1'b0}; denom_r[i] <= denom_qual[i]; inv_quot[i] <= (denom[i] != 0) && is_signed && (numer[i][31] ^ denom[i][31]); inv_rem[i] <= is_signed && numer[i][31]; - end else if (busy_r) begin + end else if (busy_r) begin working[i] <= sub_result[i][WIDTHD] ? {working[i][WIDTHN+MIN_ND-1:0], 1'b0} : {sub_result[i][WIDTHD-1:0], working[i][WIDTHN-1:0], 1'b1}; end end + end + + for (genvar i = 0; i < LANES; ++i) begin : g_output wire [WIDTHQ-1:0] q = working[i][WIDTHQ-1:0]; wire [WIDTHR-1:0] r = working[i][WIDTHN+WIDTHR:WIDTHN+1]; assign quotient[i] = inv_quot[i] ? -$signed(q) : q; diff --git a/hw/rtl/libs/VX_serial_mul.sv b/hw/rtl/libs/VX_serial_mul.sv index 9369dfd105..d847b7111f 100644 --- a/hw/rtl/libs/VX_serial_mul.sv +++ b/hw/rtl/libs/VX_serial_mul.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +13,7 @@ `include "VX_platform.vh" -// Iterative integer multiplier +// Iterative integer multiplier // An adaptation of ZipCPU algorithm for a multi-lane elastic architecture. // https://zipcpu.com/zipcpu/2021/07/03/slowmpy.html @@ -65,7 +65,7 @@ module VX_serial_mul #( end end - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_mul wire [X_WIDTH-1:0] axb = b[i][0] ? a[i] : '0; always @(posedge clk) begin @@ -73,12 +73,12 @@ module VX_serial_mul #( if (SIGNED) begin a[i] <= X_WIDTH'($signed(dataa[i])); b[i] <= Y_WIDTH'($signed(datab[i])); - end else begin + end else begin a[i] <= dataa[i]; b[i] <= datab[i]; end p[i] <= 0; - end else if (busy_r) begin + end else if (busy_r) begin b[i] <= (b[i] >> 1); p[i][Y_WIDTH-2:0] <= p[i][Y_WIDTH-1:1]; if (SIGNED) begin @@ -93,9 +93,9 @@ module VX_serial_mul #( end end - if (SIGNED) begin + if (SIGNED) begin : g_signed assign result[i] = R_WIDTH'(p[i][P_WIDTH-1:0] + {1'b1, {(X_WIDTH-2){1'b0}}, 1'b1, {(Y_WIDTH){1'b0}}}); - end else begin + end else begin : g_unsigned assign result[i] = R_WIDTH'(p[i]); end end diff --git a/hw/rtl/libs/VX_shift_register.sv b/hw/rtl/libs/VX_shift_register.sv index 56726d2cbd..b4809fe90f 100644 --- a/hw/rtl/libs/VX_shift_register.sv +++ b/hw/rtl/libs/VX_shift_register.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,13 +14,13 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_shift_register #( +module VX_shift_register #( parameter DATAW = 1, parameter RESETW = 0, parameter DEPTH = 1, - parameter NUM_TAPS = 1, + parameter NUM_TAPS = 1, parameter TAP_START = 0, - parameter TAP_STRIDE = 1 + parameter TAP_STRIDE = 1 ) ( input wire clk, input wire reset, @@ -28,7 +28,7 @@ module VX_shift_register #( input wire [DATAW-1:0] data_in, output wire [NUM_TAPS-1:0][DATAW-1:0] data_out ); - if (DEPTH != 0) begin + if (DEPTH != 0) begin : g_shift_register reg [DEPTH-1:0][DATAW-1:0] entries; always @(posedge clk) begin @@ -36,7 +36,7 @@ module VX_shift_register #( if ((i >= (DATAW-RESETW)) && reset) begin for (integer j = 0; j < DEPTH; ++j) entries[j][i] <= 0; - end else if (enable) begin + end else if (enable) begin for (integer j = 1; j < DEPTH; ++j) entries[j-1][i] <= entries[j][i]; entries[DEPTH-1][i] <= data_in[i]; @@ -44,10 +44,10 @@ module VX_shift_register #( end end - for (genvar i = 0; i < NUM_TAPS; ++i) begin + for (genvar i = 0; i < NUM_TAPS; ++i) begin : g_data_out assign data_out[i] = entries[i * TAP_STRIDE + TAP_START]; end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (enable) diff --git a/hw/rtl/libs/VX_skid_buffer.sv b/hw/rtl/libs/VX_skid_buffer.sv index 53c213622d..b77cce2a47 100644 --- a/hw/rtl/libs/VX_skid_buffer.sv +++ b/hw/rtl/libs/VX_skid_buffer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,19 +19,19 @@ module VX_skid_buffer #( parameter PASSTHRU = 0, parameter HALF_BW = 0, parameter OUT_REG = 0 -) ( +) ( input wire clk, input wire reset, - + input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out ); - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -40,7 +40,7 @@ module VX_skid_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (HALF_BW != 0) begin + end else if (HALF_BW != 0) begin : g_half_bw VX_toggle_buffer #( .DATAW (DATAW) @@ -55,7 +55,7 @@ module VX_skid_buffer #( .ready_out (ready_out) ); - end else begin + end else begin : g_full_bw VX_stream_buffer #( .DATAW (DATAW), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 3a457f8b89..ba824236e0 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -37,13 +37,13 @@ module VX_stream_arb #( output wire [NUM_OUTPUTS-1:0][NUM_REQS_W-1:0] sel_out, input wire [NUM_OUTPUTS-1:0] ready_out ); - if (NUM_INPUTS > NUM_OUTPUTS) begin + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_more_inputs - if (NUM_OUTPUTS > 1) begin + if (NUM_OUTPUTS > 1) begin : g_multiple_outputs // (#inputs > #outputs) and (#outputs > 1) - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_arb_slices localparam SLICE_BEGIN = i * NUM_REQS; localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS); @@ -69,7 +69,7 @@ module VX_stream_arb #( ); end - end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin + end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin : g_fanout // (#inputs > max_fanout) and (#outputs == 1) @@ -81,7 +81,7 @@ module VX_stream_arb #( wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp; wire [NUM_SLICES-1:0] ready_tmp; - for (genvar i = 0; i < NUM_SLICES; ++i) begin + for (genvar i = 0; i < NUM_SLICES; ++i) begin : g_fanout_slice_arbs localparam SLICE_BEGIN = i * MAX_FANOUT; localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS); @@ -90,26 +90,24 @@ module VX_stream_arb #( wire [DATAW-1:0] data_tmp_u; wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u; - if (MAX_FANOUT != 1) begin - VX_stream_arb #( - .NUM_INPUTS (SLICE_SIZE), - .NUM_OUTPUTS (1), - .DATAW (DATAW), - .ARBITER (ARBITER), - .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3) - ) fanout_slice_arb ( - .clk (clk), - .reset (reset), - .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), - .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), - .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), - .valid_out (valid_tmp[i]), - .data_out (data_tmp_u), - .sel_out (sel_tmp_u), - .ready_out (ready_tmp[i]) - ); - end + VX_stream_arb #( + .NUM_INPUTS (SLICE_SIZE), + .NUM_OUTPUTS (1), + .DATAW (DATAW), + .ARBITER (ARBITER), + .MAX_FANOUT (MAX_FANOUT), + .OUT_BUF (3) + ) fanout_slice_arb ( + .clk (clk), + .reset (reset), + .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), + .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), + .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), + .valid_out (valid_tmp[i]), + .data_out (data_tmp_u), + .sel_out (sel_tmp_u), + .ready_out (ready_tmp[i]) + ); assign data_tmp[i] = {data_tmp_u, LOG_NUM_REQS2'(sel_tmp_u)}; end @@ -139,7 +137,7 @@ module VX_stream_arb #( assign data_out = data_out_u[LOG_NUM_REQS2 +: DATAW]; assign sel_out = {sel_out_u, data_out_u[0 +: LOG_NUM_REQS2]}; - end else begin + end else begin : g_one_output // (#inputs <= max_fanout) and (#outputs == 1) @@ -169,7 +167,7 @@ module VX_stream_arb #( assign data_in_w = data_in[arb_index]; assign arb_ready = ready_in_w; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_ready_in assign ready_in[i] = ready_in_w && arb_onehot[i]; end @@ -190,13 +188,13 @@ module VX_stream_arb #( ); end - end else if (NUM_OUTPUTS > NUM_INPUTS) begin + end else if (NUM_OUTPUTS > NUM_INPUTS) begin : g_more_outputs - if (NUM_INPUTS > 1) begin + if (NUM_INPUTS > 1) begin : g_multiple_inputs // (#inputs > 1) and (#outputs > #inputs) - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_arb_slices localparam SLICE_BEGIN = i * NUM_REQS; localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS); @@ -221,12 +219,12 @@ module VX_stream_arb #( `UNUSED_PIN (sel_out) ); - for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin + for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin : g_sel_out assign sel_out[j] = i; end end - end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin + end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin : g_fanout // (#inputs == 1) and (#outputs > max_fanout) @@ -255,7 +253,7 @@ module VX_stream_arb #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_SLICES; ++i) begin + for (genvar i = 0; i < NUM_SLICES; ++i) begin : g_fanout_slice_arbs localparam SLICE_BEGIN = i * MAX_FANOUT; localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS); @@ -281,7 +279,7 @@ module VX_stream_arb #( ); end - end else begin + end else begin : g_one_input // (#inputs == 1) and (#outputs <= max_fanout) @@ -309,7 +307,7 @@ module VX_stream_arb #( assign arb_ready = valid_in[0]; assign ready_in = arb_valid; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -330,11 +328,11 @@ module VX_stream_arb #( assign sel_out = 0; - end else begin + end else begin : g_passthru // #Inputs == #Outputs - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 5e8297f7a1..7670b40fea 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -37,14 +37,8 @@ module VX_stream_buffer #( input wire ready_out, output wire valid_out ); - if (PASSTHRU != 0) begin - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign ready_in = ready_out; - assign valid_out = valid_in; - assign data_out = data_in; - end else begin - if (OUT_REG != 0) begin + if (PASSTHRU == 0) begin : g_buffer + if (OUT_REG != 0) begin : g_with_reg reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] buffer; @@ -83,7 +77,7 @@ module VX_stream_buffer #( assign valid_out = valid_out_r; assign data_out = data_out_r; - end else begin + end else begin : g_no_reg reg [1:0][DATAW-1:0] shift_reg; reg [1:0] fifo_state; @@ -115,6 +109,12 @@ module VX_stream_buffer #( assign data_out = shift_reg[fifo_state[1]]; end + end else begin : g_passthru + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; end endmodule diff --git a/hw/rtl/libs/VX_stream_pack.sv b/hw/rtl/libs/VX_stream_pack.sv index 7f024b1840..944b120c29 100644 --- a/hw/rtl/libs/VX_stream_pack.sv +++ b/hw/rtl/libs/VX_stream_pack.sv @@ -38,7 +38,8 @@ module VX_stream_pack #( output wire [TAG_WIDTH-1:0] tag_out, input wire ready_out ); - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_pack + localparam LOG_NUM_REQS = `CLOG2(NUM_REQS); wire [LOG_NUM_REQS-1:0] grant_index; @@ -62,11 +63,11 @@ module VX_stream_pack #( wire [NUM_REQS-1:0] tag_matches; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_tag_matches assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]); end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_ready_in assign ready_in[i] = grant_ready & tag_matches[i]; end @@ -87,7 +88,7 @@ module VX_stream_pack #( .ready_out (ready_out) ); - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index f3723ebb01..01217b6684 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -36,18 +36,17 @@ module VX_stream_switch #( output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out, input wire [NUM_OUTPUTS-1:0] ready_out ); - if (NUM_INPUTS > NUM_OUTPUTS) begin - + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_more_inputs wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_w; wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_w; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_data_in + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_INPUTS) begin + if (ii < NUM_INPUTS) begin : g_valid assign valid_in_w[i][j] = valid_in[ii]; assign data_in_w[i][j] = data_in[ii]; - end else begin + end else begin : g_extra assign valid_in_w[i][j] = 0; assign data_in_w[i][j] = '0; end @@ -58,21 +57,21 @@ module VX_stream_switch #( wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; wire [NUM_OUTPUTS-1:0] ready_out_w; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_data_out_w assign valid_out_w[i] = valid_in_w[i][sel_in[i]]; assign data_out_w[i] = data_in_w[i][sel_in[i]]; end - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_ready_out_w + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_INPUTS) begin + if (ii < NUM_INPUTS) begin : g_valid assign ready_in[ii] = ready_out_w[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end end end - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -89,22 +88,25 @@ module VX_stream_switch #( ); end - end else if (NUM_OUTPUTS > NUM_INPUTS) begin + end else if (NUM_OUTPUTS > NUM_INPUTS) begin : g_more_outputs wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_w; wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_w; - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_valid_out_w + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j assign valid_out_w[i][j] = valid_in[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end + end + + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_ready_in assign ready_in[i] = ready_out_w[i][sel_in[i]]; end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_out_buf + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_OUTPUTS) begin + if (ii < NUM_OUTPUTS) begin : g_valid VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -119,20 +121,20 @@ module VX_stream_switch #( .valid_out (valid_out[ii]), .ready_out (ready_out[ii]) ); - end else begin + end else begin : g_extra `UNUSED_VAR (valid_out_w[i][j]) assign ready_out_w[i][j] = '0; end end end - end else begin + end else begin : g_passthru // #Inputs == #Outputs `UNUSED_VAR (sel_in) - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index 37c238a77c..b0cca961ab 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -36,7 +36,7 @@ module VX_stream_unpack #( output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag_out, input wire [NUM_REQS-1:0] ready_out ); - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_unpack reg [NUM_REQS-1:0] rem_mask_r; wire [NUM_REQS-1:0] ready_out_w; @@ -56,7 +56,7 @@ module VX_stream_unpack #( assign ready_in = sent_all; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_outbuf VX_elastic_buffer #( .DATAW (DATA_WIDTH + TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -73,7 +73,7 @@ module VX_stream_unpack #( ); end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index f2d9aa856e..db59f895eb 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -43,9 +43,9 @@ module VX_stream_xbar #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) - if (NUM_INPUTS != 1) begin + if (NUM_INPUTS != 1) begin : g_multiple_inputs - if (NUM_OUTPUTS != 1) begin + if (NUM_OUTPUTS != 1) begin : g_multiple_outputs // (#inputs > 1) and (#outputs > 1) @@ -63,7 +63,7 @@ module VX_stream_xbar #( .data_out (per_output_ready_in_w) ); - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders VX_decoder #( .N (OUT_WIDTH) ) sel_in_decoder ( @@ -82,7 +82,7 @@ module VX_stream_xbar #( .data_out (per_output_valid_in_w) ); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_xbar_arbs VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -103,7 +103,7 @@ module VX_stream_xbar #( ); end - end else begin + end else begin : g_one_output // (#inputs >= 1) and (#outputs == 1) @@ -129,7 +129,7 @@ module VX_stream_xbar #( `UNUSED_VAR (sel_in) end - end else if (NUM_OUTPUTS != 1) begin + end else if (NUM_OUTPUTS != 1) begin : g_one_input // (#inputs == 1) and (#outputs > 1) @@ -147,7 +147,7 @@ module VX_stream_xbar #( assign ready_in[0] = ready_out_w[sel_in[0]]; assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -167,7 +167,7 @@ module VX_stream_xbar #( assign sel_out = 0; - end else begin + end else begin : g_passthru // (#inputs == 1) and (#outputs == 1) diff --git a/hw/rtl/libs/VX_toggle_buffer.sv b/hw/rtl/libs/VX_toggle_buffer.sv index fb24a7f792..9d6b42720d 100644 --- a/hw/rtl/libs/VX_toggle_buffer.sv +++ b/hw/rtl/libs/VX_toggle_buffer.sv @@ -1,11 +1,11 @@ // Copyright 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,23 +26,26 @@ module VX_toggle_buffer #( parameter DATAW = 1, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); - if (PASSTHRU != 0) begin +); + if (PASSTHRU != 0) begin : g_passthru + `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; - end else begin + + end else begin : g_buffer + reg [DATAW-1:0] buffer; reg has_data; @@ -54,7 +57,7 @@ module VX_toggle_buffer #( has_data <= valid_in; end else if (ready_out) begin has_data <= 0; - end + end end if (~has_data) begin buffer <= data_in; diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv index 7b2c273ef0..769a78422a 100644 --- a/hw/rtl/libs/VX_transpose.sv +++ b/hw/rtl/libs/VX_transpose.sv @@ -21,8 +21,8 @@ module VX_transpose #( input wire [N-1:0][M-1:0] data_in, output wire [M-1:0][N-1:0] data_out ); - for (genvar i = 0; i < N; ++i) begin - for (genvar j = 0; j < M; ++j) begin + for (genvar i = 0; i < N; ++i) begin : g_i + for (genvar j = 0; j < M; ++j) begin : g_j assign data_out[j][i] = data_in[i][j]; end end diff --git a/hw/rtl/mem/VX_gbar_arb.sv b/hw/rtl/mem/VX_gbar_arb.sv index 9ff761ec25..2b0856980d 100644 --- a/hw/rtl/mem/VX_gbar_arb.sv +++ b/hw/rtl/mem/VX_gbar_arb.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,7 +33,7 @@ module VX_gbar_arb #( wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; wire [NUM_REQS-1:0] req_ready_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_data_in assign req_valid_in[i] = bus_in_if[i].req_valid; assign req_data_in[i] = {bus_in_if[i].req_id, bus_in_if[i].req_size_m1, bus_in_if[i].req_core_id}; assign bus_in_if[i].req_ready = req_ready_in[i]; @@ -71,7 +71,7 @@ module VX_gbar_arb #( rsp_id <= bus_out_if.rsp_id; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_bus_in_if assign bus_in_if[i].rsp_valid = rsp_valid; assign bus_in_if[i].rsp_id = rsp_id; end diff --git a/hw/rtl/mem/VX_lmem_switch.sv b/hw/rtl/mem/VX_lmem_switch.sv index 6429077857..345f357a32 100644 --- a/hw/rtl/mem/VX_lmem_switch.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -32,7 +32,7 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( wire req_global_ready; wire req_local_ready; - for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin + for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin : g_is_addr_local_mask assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL]; end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 578f4552b7..7131c3f21e 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -67,18 +67,18 @@ module VX_local_mem import VX_gpu_pkg::*; #( // bank selection wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + if (NUM_BANKS > 1) begin : g_req_bank_idx + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_bank_idxs assign req_bank_idx[i] = mem_bus_if[i].req_data.addr[0 +: BANK_SEL_BITS]; end - end else begin + end else begin : g_req_bank_idx_0 assign req_bank_idx = 0; end // bank addressing wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_bank_addr assign req_bank_addr[i] = mem_bus_if[i].req_data.addr[BANK_SEL_BITS +: BANK_ADDR_WIDTH]; `UNUSED_VAR (mem_bus_if[i].req_data.flags) end @@ -104,7 +104,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [`PERF_CTR_BITS-1:0] perf_collisions; `endif - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_data_in assign req_valid_in[i] = mem_bus_if[i].req_valid; assign req_data_in[i] = { mem_bus_if[i].req_data.rw, @@ -141,7 +141,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .ready_out (per_bank_req_ready) ); - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_req_data_soa assign { per_bank_req_rw[i], per_bank_req_addr[i], @@ -159,7 +159,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag; wire [NUM_BANKS-1:0] per_bank_rsp_ready; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_store wire bank_rsp_valid, bank_rsp_ready; VX_sp_ram #( @@ -216,7 +216,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_rsp_data_aos assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; end @@ -244,7 +244,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_mem_bus_if assign mem_bus_if[i].rsp_valid = rsp_valid_out[i]; assign mem_bus_if[i].rsp_data = rsp_data_out[i]; assign rsp_ready_out[i] = mem_bus_if[i].rsp_ready; @@ -257,7 +257,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; wire [NUM_REQS-1:0] req_rw; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_rw assign req_rw[i] = mem_bus_if[i].req_data.rw; end @@ -303,11 +303,11 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] req_uuid; wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] rsp_uuid; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign req_uuid[i] = mem_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign req_uuid[i] = 0; assign rsp_uuid[i] = 0; end @@ -316,17 +316,17 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_req_uuid; wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_rsp_uuid; - for (genvar i = 0; i < NUM_BANKS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_req_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign per_bank_req_uuid[i] = per_bank_req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH]; assign per_bank_rsp_uuid[i] = per_bank_rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign per_bank_req_uuid[i] = 0; assign per_bank_rsp_uuid[i] = 0; end end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_trace always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin @@ -344,7 +344,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_bank_trace always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin diff --git a/hw/rtl/mem/VX_lsu_adapter.sv b/hw/rtl/mem/VX_lsu_adapter.sv index 8223416926..eb5dd102ac 100644 --- a/hw/rtl/mem/VX_lsu_adapter.sv +++ b/hw/rtl/mem/VX_lsu_adapter.sv @@ -41,7 +41,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][TAG_WIDTH-1:0] req_tag_out; wire [NUM_LANES-1:0] req_ready_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_req_data_in assign req_data_in[i] = { lsu_mem_if.req_data.rw, lsu_mem_if.req_data.addr[i], @@ -51,19 +51,6 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( }; end - for (genvar i = 0; i < NUM_LANES; ++i) begin - assign mem_bus_if[i].req_valid = req_valid_out[i]; - assign { - mem_bus_if[i].req_data.rw, - mem_bus_if[i].req_data.addr, - mem_bus_if[i].req_data.data, - mem_bus_if[i].req_data.byteen, - mem_bus_if[i].req_data.flags - } = req_data_out[i]; - assign mem_bus_if[i].req_data.tag = req_tag_out[i]; - assign req_ready_out[i] = mem_bus_if[i].req_ready; - end - VX_stream_unpack #( .NUM_REQS (NUM_LANES), .DATA_WIDTH (REQ_DATA_WIDTH), @@ -83,6 +70,19 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( .ready_out (req_ready_out) ); + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_bus_req + assign mem_bus_if[i].req_valid = req_valid_out[i]; + assign { + mem_bus_if[i].req_data.rw, + mem_bus_if[i].req_data.addr, + mem_bus_if[i].req_data.data, + mem_bus_if[i].req_data.byteen, + mem_bus_if[i].req_data.flags + } = req_data_out[i]; + assign mem_bus_if[i].req_data.tag = req_tag_out[i]; + assign req_ready_out[i] = mem_bus_if[i].req_ready; + end + // handle response packing wire [NUM_LANES-1:0] rsp_valid_out; @@ -90,7 +90,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][TAG_WIDTH-1:0] rsp_tag_out; wire [NUM_LANES-1:0] rsp_ready_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_bus_rsp assign rsp_valid_out[i] = mem_bus_if[i].rsp_valid; assign rsp_data_out[i] = mem_bus_if[i].rsp_data.data; assign rsp_tag_out[i] = mem_bus_if[i].rsp_data.tag; diff --git a/hw/rtl/mem/VX_mem_arb.sv b/hw/rtl/mem/VX_mem_arb.sv index f45a7ea75f..321bbb270a 100644 --- a/hw/rtl/mem/VX_mem_arb.sv +++ b/hw/rtl/mem/VX_mem_arb.sv @@ -47,7 +47,7 @@ module VX_mem_arb #( wire [NUM_OUTPUTS-1:0][`UP(LOG_NUM_REQS)-1:0] req_sel_out; wire [NUM_OUTPUTS-1:0] req_ready_out; - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_req_data_in assign req_valid_in[i] = bus_in_if[i].req_valid; assign req_data_in[i] = { bus_in_if[i].req_data.rw, @@ -78,7 +78,7 @@ module VX_mem_arb #( .ready_out (req_ready_out) ); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_bus_out_if wire [TAG_WIDTH-1:0] req_tag_out; VX_bits_insert #( .N (TAG_WIDTH), @@ -111,11 +111,11 @@ module VX_mem_arb #( wire [NUM_OUTPUTS-1:0][RSP_DATAW-1:0] rsp_data_in; wire [NUM_OUTPUTS-1:0] rsp_ready_in; - if (NUM_INPUTS > NUM_OUTPUTS) begin + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_rsp_enabled wire [NUM_OUTPUTS-1:0][LOG_NUM_REQS-1:0] rsp_sel_in; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in wire [TAG_WIDTH-1:0] rsp_tag_out; VX_bits_remove #( .N (TAG_WIDTH + LOG_NUM_REQS), @@ -130,9 +130,9 @@ module VX_mem_arb #( assign rsp_data_in[i] = {rsp_tag_out, bus_out_if[i].rsp_data.data}; assign bus_out_if[i].rsp_ready = rsp_ready_in[i]; - if (NUM_INPUTS > 1) begin + if (NUM_INPUTS > 1) begin : g_rsp_sel_in assign rsp_sel_in[i] = bus_out_if[i].rsp_data.tag[TAG_SEL_IDX +: LOG_NUM_REQS]; - end else begin + end else begin : g_no_rsp_sel_in assign rsp_sel_in[i] = '0; end end @@ -154,9 +154,9 @@ module VX_mem_arb #( .ready_out (rsp_ready_out) ); - end else begin + end else begin : g_passthru - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in assign rsp_valid_in[i] = bus_out_if[i].rsp_valid; assign rsp_data_in[i] = { bus_out_if[i].rsp_data.tag, @@ -185,7 +185,7 @@ module VX_mem_arb #( end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_output assign bus_in_if[i].rsp_valid = rsp_valid_out[i]; assign { bus_in_if[i].rsp_data.tag, diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 984686d3b8..2def887e9e 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -67,7 +67,7 @@ RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 591a2c2260..24287aa56e 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -48,7 +48,7 @@ endif VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += verilator.vlt VL_FLAGS += -DSIMULATION -DSV_DPI diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 63787e5b67..3e256ffb34 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -51,7 +51,7 @@ RTL_INCLUDE += -I$(AFU_DIR) TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) From 7208f251b771da6af7ed5f61df954d949009cbc9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:07:19 -0700 Subject: [PATCH 178/407] minor update --- hw/rtl/VX_cluster.sv | 13 ++++--------- hw/rtl/VX_define.vh | 22 ++++++++++++++-------- hw/rtl/VX_socket.sv | 6 +++--- hw/rtl/Vortex.sv | 2 +- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 6109e873a3..73d9b34abc 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -116,22 +116,17 @@ module VX_cluster import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - VX_dcr_bus_if socket_dcr_bus_tmp_if(); - wire is_dcr_base_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); - assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && is_dcr_base_addr; - assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr; - assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data; - wire [`NUM_SOCKETS-1:0] per_socket_busy; - VX_dcr_bus_if socket_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1)); - // Generate all sockets for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets `RESET_RELAY (socket_reset, reset); + VX_dcr_bus_if socket_dcr_bus_if(); + wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); + `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1)) + VX_socket #( .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id), .INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id)) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index fdd066434c..502f794bb3 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -390,16 +390,22 @@ assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \ assign dst.rsp_ready = src.rsp_ready -`define BUFFER_DCR_BUS_IF(dst, src, enable) \ +`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \ /* verilator lint_off GENUNNAMED */ \ - if (enable) begin \ - reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ - always @(posedge clk) begin \ - __dst <= {src.write_valid, src.write_addr, src.write_data}; \ - end \ - assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \ + if (latency != 0) begin \ + VX_pipe_register #( \ + .DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ + .RESETW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ + .DEPTH (latency) \ + ) pipe_reg ( \ + .clk (clk), \ + .reset (reset), \ + .enable (1'b1), \ + .data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \ + .data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \ + ); \ end else begin \ - assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \ + assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \ end \ /* verilator lint_on GENUNNAMED */ diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 766ff468a8..c2b5746e89 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -198,14 +198,14 @@ module VX_socket import VX_gpu_pkg::*; #( wire [`SOCKET_SIZE-1:0] per_core_busy; - VX_dcr_bus_if core_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); - // Generate all cores for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : g_cores `RESET_RELAY (core_reset, reset); + VX_dcr_bus_if core_dcr_bus_if(); + `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1)); + VX_core #( .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), .INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id)) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 0bdbec8435..fad67be4cc 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -134,7 +134,7 @@ module Vortex import VX_gpu_pkg::*; ( `RESET_RELAY (cluster_reset, reset); VX_dcr_bus_if cluster_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1)); VX_cluster #( .CLUSTER_ID (cluster_id), From 49ed88e59f51b73b33728911ac4f679a8e4b9318 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:12:18 -0700 Subject: [PATCH 179/407] minor update --- hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index c2b5746e89..69ff88a2ce 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -204,7 +204,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (core_reset, reset); VX_dcr_bus_if core_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1)); + `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1)) VX_core #( .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index fad67be4cc..e07aaae4d1 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -134,7 +134,7 @@ module Vortex import VX_gpu_pkg::*; ( `RESET_RELAY (cluster_reset, reset); VX_dcr_bus_if cluster_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1)); + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1)) VX_cluster #( .CLUSTER_ID (cluster_id), From 1ddd1ba1ccf36440f8f768f856d062b69cfbbbb5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:15:41 -0700 Subject: [PATCH 180/407] minor update --- hw/rtl/libs/VX_decoder.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index c5c7b8706e..3e463326c8 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -27,16 +27,16 @@ module VX_decoder #( input wire [M-1:0] valid_in, output wire [D-1:0][M-1:0] data_out ); + logic [D-1:0][M-1:0] shift; if (MODEL == 1) begin : g_model1 - reg [D-1:0][M-1:0] data_out_w; always @(*) begin - data_out_w = '0; - data_out_w[data_in] = valid_in; + shift = '0; + shift[data_in] = 1'b1; end - assign data_out = data_out_w; end else begin : g_model0 - assign data_out = (D*M)'(valid_in) << (data_in * M); + assign shift = (D*M)'(1'b1) << (data_in * M); end + assign data_out = {D{valid_in}} & shift; endmodule `TRACING_ON From 145eacc451ee552efc261997871ca56d9fb27c4b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 21:08:19 -0700 Subject: [PATCH 181/407] minor update --- hw/rtl/libs/VX_pending_size.sv | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 475bbb36c1..50737634f2 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -167,7 +167,15 @@ module VX_pending_size #( end end - assign size = {full_r, used_r}; + if (SIZE > 1) begin : g_sizeN + if (SIZEW > ADDRW) begin : g_not_log2 + assign size = {full_r, used_r}; + end else begin : g_log2 + assign size = used_r; + end + end else begin : g_size1 + assign size = full_r; + end end From b77fff764e2c7b1acc3bd8d46e11332b2663da46 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 22:12:03 -0700 Subject: [PATCH 182/407] minor update --- hw/rtl/libs/VX_bypass_buffer.sv | 20 ++--- hw/rtl/libs/VX_stream_buffer.sv | 129 ++++++++++++++++---------------- 2 files changed, 74 insertions(+), 75 deletions(-) diff --git a/hw/rtl/libs/VX_bypass_buffer.sv b/hw/rtl/libs/VX_bypass_buffer.sv index 14079395b7..7378a4fddd 100644 --- a/hw/rtl/libs/VX_bypass_buffer.sv +++ b/hw/rtl/libs/VX_bypass_buffer.sv @@ -35,7 +35,15 @@ module VX_bypass_buffer #( input wire ready_out, output wire valid_out ); - if (PASSTHRU == 0) begin : g_buffer + if (PASSTHRU != 0) begin : g_passthru + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + + end else begin : g_buffer reg [DATAW-1:0] buffer; reg has_data; @@ -59,15 +67,7 @@ module VX_bypass_buffer #( assign data_out = has_data ? buffer : data_in; assign valid_out = valid_in || has_data; - end else begin : g_passthru - - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign ready_in = ready_out; - assign valid_out = valid_in; - assign data_out = data_in; - - end else + end endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 7670b40fea..51e33db60c 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -37,86 +37,85 @@ module VX_stream_buffer #( input wire ready_out, output wire valid_out ); - if (PASSTHRU == 0) begin : g_buffer - if (OUT_REG != 0) begin : g_with_reg - - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - reg valid_out_r; - reg no_buffer; - - wire fire_in = valid_in && ready_in; - wire flow_out = ready_out || ~valid_out; - - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 0; - no_buffer <= 1; - end else begin - if (flow_out) begin - no_buffer <= 1; - end else if (valid_in) begin - no_buffer <= 0; - end - if (flow_out) begin - valid_out_r <= valid_in || ~no_buffer; - end - end - end + if (PASSTHRU != 0) begin : g_passthru + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + + end else if (OUT_REG != 0) begin : g_with_reg + + reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] buffer; + reg valid_out_r; + reg no_buffer; + + wire fire_in = valid_in && ready_in; + wire flow_out = ready_out || ~valid_out; - always @(posedge clk) begin - if (fire_in) begin - buffer <= data_in; + always @(posedge clk) begin + if (reset) begin + valid_out_r <= 0; + no_buffer <= 1; + end else begin + if (flow_out) begin + no_buffer <= 1; + end else if (valid_in) begin + no_buffer <= 0; end if (flow_out) begin - data_out_r <= no_buffer ? data_in : buffer; + valid_out_r <= valid_in || ~no_buffer; end end + end + + always @(posedge clk) begin + if (fire_in) begin + buffer <= data_in; + end + if (flow_out) begin + data_out_r <= no_buffer ? data_in : buffer; + end + end - assign ready_in = no_buffer; - assign valid_out = valid_out_r; - assign data_out = data_out_r; + assign ready_in = no_buffer; + assign valid_out = valid_out_r; + assign data_out = data_out_r; - end else begin : g_no_reg + end else begin : g_no_reg - reg [1:0][DATAW-1:0] shift_reg; - reg [1:0] fifo_state; + reg [1:0][DATAW-1:0] shift_reg; + reg [1:0] fifo_state; - wire fire_in = valid_in && ready_in; - wire fire_out = valid_out && ready_out; + wire fire_in = valid_in && ready_in; + wire fire_out = valid_out && ready_out; - always @(posedge clk) begin - if (reset) begin - fifo_state <= 2'b00; - end else begin - case ({fire_in, fire_out}) - 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 - 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 - default: fifo_state <= fifo_state; - endcase - end + always @(posedge clk) begin + if (reset) begin + fifo_state <= 2'b00; + end else begin + case ({fire_in, fire_out}) + 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 + 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 + default: fifo_state <= fifo_state; + endcase end + end - always @(posedge clk) begin - if (fire_in) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; - end + always @(posedge clk) begin + if (fire_in) begin + shift_reg[1] <= shift_reg[0]; + shift_reg[0] <= data_in; end + end - assign ready_in = ~fifo_state[1]; - assign valid_out = fifo_state[0]; - assign data_out = shift_reg[fifo_state[1]]; + assign ready_in = ~fifo_state[1]; + assign valid_out = fifo_state[0]; + assign data_out = shift_reg[fifo_state[1]]; - end - end else begin : g_passthru - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign ready_in = ready_out; - assign valid_out = valid_in; - assign data_out = data_in; - end + end endmodule `TRACING_ON - From 263893eb7c6a9593ffaa9d09085e1c1898790387 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 13 Sep 2024 00:03:08 -0700 Subject: [PATCH 183/407] minor update --- hw/rtl/VX_define.vh | 3 +-- hw/rtl/cache/VX_cache.sv | 6 +++--- hw/rtl/cache/VX_cache_bypass.sv | 1 + hw/rtl/cache/VX_cache_data.sv | 17 ++++++++++------- hw/rtl/core/VX_operands.sv | 8 ++++---- hw/rtl/libs/VX_transpose.sv | 5 +++-- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 502f794bb3..85fa40f0d2 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -395,11 +395,10 @@ if (latency != 0) begin \ VX_pipe_register #( \ .DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ - .RESETW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ .DEPTH (latency) \ ) pipe_reg ( \ .clk (clk), \ - .reset (reset), \ + .reset (1'b0), \ .enable (1'b1), \ .data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \ .data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \ diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index b6d3f95529..06887944ca 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -310,7 +310,7 @@ module VX_cache import VX_gpu_pkg::*; #( end for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_bid - if (NUM_BANKS > 1) begin : g_multibank + if (NUM_BANKS > 1) begin : g_multibanks assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; end else begin : g_singlebank assign core_req_bid[i] = '0; @@ -448,7 +448,7 @@ module VX_cache import VX_gpu_pkg::*; #( if (NUM_BANKS == 1) begin : g_per_bank_mem_req_addr_multibanks assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; - end else begin : g_per_bank_mem_req_addr_one_bank + end else begin : g_per_bank_mem_req_addr_singlebank assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); end end @@ -521,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #( if (NUM_BANKS > 1) begin : g_mem_req_tag_multibanks wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id}); - end else begin : g_mem_req_tag_one_bank + end else begin : g_mem_req_tag assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag); end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index a60904d463..4b3b3a59ab 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,6 +268,7 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); end + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 27844fd6f5..04b0ff746c 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -76,13 +76,16 @@ module VX_cache_data #( wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; if (WRITEBACK) begin : g_dirty_data - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_flipped_rdata - for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j - assign flipped_rdata[j][i] = line_rdata[i][j]; - end - end - assign dirty_data = flipped_rdata[way_idx]; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; + VX_transpose #( + .DATAW (`CS_WORD_WIDTH), + .N (`CS_WORDS_PER_LINE), + .M (NUM_WAYS) + ) transpose ( + .data_in (line_rdata), + .data_out (transposed_rdata) + ); + assign dirty_data = transposed_rdata[way_idx]; end else begin : g_dirty_data_0 assign dirty_data = '0; end diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 066db15cda..2ca847394b 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -90,9 +90,9 @@ module VX_operands import VX_gpu_pkg::*; #( end for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx - if (NUM_BANKS != 1) begin : g_banks + if (NUM_BANKS != 1) begin : g_multibanks assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; - end else begin : g_1bank + end else begin : g_singlebank assign req_bank_idx[i] = '0; end end @@ -250,10 +250,10 @@ module VX_operands import VX_gpu_pkg::*; #( for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams wire gpr_wr_enabled; - if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled + if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled_multibanks assign gpr_wr_enabled = writeback_if.valid && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); - end else begin : g_gpr_wr_enabled_1bank + end else begin : g_gpr_wr_enabled assign gpr_wr_enabled = writeback_if.valid; end diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv index 769a78422a..2fc0bd6957 100644 --- a/hw/rtl/libs/VX_transpose.sv +++ b/hw/rtl/libs/VX_transpose.sv @@ -15,11 +15,12 @@ `TRACING_OFF module VX_transpose #( + parameter DATAW = 1, parameter N = 1, parameter M = 1 ) ( - input wire [N-1:0][M-1:0] data_in, - output wire [M-1:0][N-1:0] data_out + input wire [N-1:0][M-1:0][DATAW-1:0] data_in, + output wire [M-1:0][N-1:0][DATAW-1:0] data_out ); for (genvar i = 0; i < N; ++i) begin : g_i for (genvar j = 0; j < M; ++j) begin : g_j From bbe9c0372fc893197fdd77f0095422a7596159a9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 13 Sep 2024 00:35:42 -0700 Subject: [PATCH 184/407] minor update --- hw/rtl/libs/VX_decoder.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 3e463326c8..7c0c760e56 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -31,10 +31,10 @@ module VX_decoder #( if (MODEL == 1) begin : g_model1 always @(*) begin shift = '0; - shift[data_in] = 1'b1; + shift[data_in] = {M{1'b1}}; end end else begin : g_model0 - assign shift = (D*M)'(1'b1) << (data_in * M); + assign shift = ((D*M)'({M{1'b1}})) << (data_in * M); end assign data_out = {D{valid_in}} & shift; From dc7610106873d46e280e74fe406f08d2cf9c6d2d Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 13 Sep 2024 09:09:38 -0400 Subject: [PATCH 185/407] contribution stats --- docs/contributing.md | 36 +++++++++++++++++++++++++++++++----- docs/environment_setup.md | 23 +++++++++++++++-------- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 14e0ccd0ca..5264454d2b 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,11 +1,37 @@ -# Contributing to Vortex on Github +# Contributing to Vortex -## Github Details -- There are two main repos, `vortex` (public, this one) and `vortex-dev` (private) -- todo: Most current development is on `vortex` -- If you have a legacy version of `vortex`, you can use the releases branch or tags to access the repo at that point in time +## Github +Vortex uses Github to host its git repositories. +There are a lot of ways to use the features on Github for collaboration. +Therefore, this documentation details the standard procedure for contributing to Vortex. +Development of Vortex is consolidated to this repo, `vortex` and any associated forks. +Previously, there was active work done on a private repo named `vortex-dev`. +`vortex-dev` has officially been deprecated and fully merged into this public repo, `vortex`. +If you are returning to this project and have legacy versions of Vortex, you can use the releases branches to access older versions. ## Contribution Process +In an effort to keep `vortex` organized, permissions to directly create branches and push code has been limited to admins. +However, contributions are strongly encouraged and keep the project moving forward! Here is the procedure for contributing: + +1. Create a fork of `vortex` +2. In your fork, create a branch that briefly explains the work you are adding (ie: `develop-documentation`) branches from `develop` and adds some documentation +3. Make your changes on your new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations +4. Since you are the owner of your fork, you have full permissions to push commits to your fork +4. When you are satisfied with the changes on your fork, you can open a PR from your fork using the online interface +5. If you recently made a push, you will get automatically get a prompt on Github online to create a PR, which you can press +6. Otherwise, you can go to your fork on Github online and manually create a PR (todo) +(todo): how to name and format your PR, what information you should add to the PR, does not need to be too strict if you are attending the weekly meetings* +7. Github uses the following semantics: `base repository` gets the changes from your `head repository` +8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `develop` since active development should only be added to this branch +9. And you should assign the `head repository` to `/vortex` (which represents your fork of vortex) and the `base` branch to the one created in step 2 +10. Now that your intended PR has been specified, you should review the status. Check for merge conflicts, if all your commits are present, and all the modified files make sense +11. You can still make a PR if there are issues in step 10, just make sure the structure is correct according to steps 7-9 +12. Once the PR is made, the CI pipeline will run automatically, testing your changes +13. Remember, a PR is flexible if you need to make changes to the code you can go back to your branch of the fork to commit and push any updates +14. As long as the `head repository`'s `base` branch is the one you edited, the PR will automatically get the most recent changes +15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR + + - You should create a new branch from develop that is clearly named with the feature that you want to add - Avoid pushing directly to the `master` branch instead you will need to make a Pull Request (PR) - There should be protections in place that prevent pushing directly to the main branch, but don't rely on it diff --git a/docs/environment_setup.md b/docs/environment_setup.md index 1c15495faf..be0a1328a5 100644 --- a/docs/environment_setup.md +++ b/docs/environment_setup.md @@ -1,30 +1,35 @@ -# Environment Setup# Vortex Dev Environment Setup -These instructions apply to the development vortex repo using the *updated toolchain*. The updated toolchain is considered to be any commit of `master` pulled from *July 2, 2023* onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`. +# Environment Setup + +These instructions apply to the development vortex repo using the _updated toolchain_. The updated toolchain is considered to be any commit of `master` pulled from _July 2, 2023_ onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`. > Note: As it stands right now, there a few test suites which are not working due to this toolchain migration. We are working to determine an exact list of which ones are working and which ones are not. For now, if the repo builds at a minimum, then you can consider all these steps to have worked successfully. ## Choosing an Development Environment + There are three primary environments you can use. Each has its own pros and cons. Refer to this section to help you determine which environment best suits your needs. + 1. Volvo 2. Docker 3. Local ### Volvo + Volvo is a server provided by Georgia Tech. As such, it provides high performance compute, but you need valid credentials to access it. If you don't already have credentials, you can get in contact with your mentor to ask about setting your account up. Pros: -1. Native x86_64 architecture, AMD EPYC 7702P 64-Core Processor (*fast*) +1. Native x86_64 architecture, AMD EPYC 7702P 64-Core Processor (_fast_) 2. Packages and difficult configurations are already done for you 3. Consistent environment as others, allowing for easier troubleshooting 4. Just need to SSH into Volvo, minimal impact on local computer resources 5. VScode remote development tools are phenomenal over SSH Cons: + 1. Volvo is accessed via gatech vpn, external contributors might encounter issues with it -- especially from other university networks 2. Account creation is not immediate and is subject to processing time -3. Volvo might have outtages (*pretty uncommon*) -5. SSH development requires internet and other remote development tools (*vscode works!*) +3. Volvo might have outtages (_pretty uncommon_) +4. SSH development requires internet and other remote development tools (_vscode works!_) ### Docker @@ -44,18 +49,21 @@ Cons: 2. Limited to your computer's performance, and Vortex is a large repo to build 3. Will utilize a few gigabytes of storage on your computer for saving binaries to run the container - ### Local + You can reverse engineer the Dockerfile and scripts above to get a working environment setup locally. This option is for experienced users, who have already considered the pros and cons of Volvo and Docker. ## Setup on Volvo + 1. Clone Repo Recursively: `git clone --recursive https://github.com/vortexgpgpu/vortex-dev.git` 2. Source `/opt/set_vortex_env_dev.sh` to initialize pre-installed toolchain 3. `make -s` in `vortex-dev` root directory 4. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood` ## Setup with Docker + Currently the Dockerfile is not included with the official vortex-dev repository, however you can quickly add it to repo and get started. + 1. Clone repo recursively onto your local machine: `git clone --recursive https://github.com/vortexgpgpu/vortex-dev.git` 2. Download a copy of `Dockerfile.dev` and place it in the root of the repo. 3. Build the Dockerfile into an image: `docker build --platform=linux/amd64 -t vortex-dev -f Dockerfile.dev .` @@ -64,8 +72,7 @@ Currently the Dockerfile is not included with the official vortex-dev repository 6. `make -s` in `vortex-dev` root directory 7. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood` - ### Additional Docker Commands + - Exit from a container (does not stop or remove it) - Resume a container you have exited or start a second terminal session `docker exec -it bash` - From 0a48d98bc12b60e65a22469cb61d4b428c9e05f3 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 13 Sep 2024 09:39:28 -0400 Subject: [PATCH 186/407] Update README.md It has the instruction about the other branch(Vortex_vm). --- README.md | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4322f06bc1..ec8d10bd56 100644 --- a/README.md +++ b/README.md @@ -59,20 +59,17 @@ sudo apt-get install git ``` ### Configure your build folder ```sh - # - # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. - # This is the example for volvo server mkdir build - mkdir out - export OUT_DIR=`pwd`/out cd build - # Run the following to disble virtual memory feature in compilation - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR - # Run the following instead to enable virtual memory feature in compilation - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1 + # for 32bit + ../configure --xlen=32 --tooldir=$HOME/tools + # for 64bit + ../configure --xlen=64 --tooldir=$HOME/tools ``` ### Install prebuilt toolchain - # We will use the precomipled tools in volvo toolchanin directory +```sh + ./ci/toolchain_install.sh --all +``` ### set environment variables ```sh # should always run before using the toolchain! @@ -82,7 +79,6 @@ sudo apt-get install git ```sh make -s ``` - ### Quick demo running vecadd OpenCL kernel on 2 cores ```sh ./ci/blackbox.sh --cores=2 --app=vecadd From 50458bbae04b73fa0813b92f00bc7f00767517ba Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 06:22:07 -0700 Subject: [PATCH 187/407] xilinx synthesis debugging foxes --- configure | 2 +- hw/rtl/VX_platform.vh | 17 + hw/rtl/afu/xrt/VX_afu_wrap.sv | 39 +- hw/rtl/afu/xrt/vortex_afu.v | 22 +- hw/scripts/{ip_gen.sh => altera_ip_gen.sh} | 0 hw/scripts/ila_insert.tcl | 231 ++++++ .../gen_ip.tcl => scripts/xilinx_ip_gen.tcl} | 0 hw/syn/altera/dut/Makefile | 2 +- hw/syn/altera/opae/Makefile | 2 +- hw/syn/xilinx/README | 2 +- hw/syn/xilinx/dut/common.mk | 4 +- hw/syn/xilinx/dut/project.tcl | 8 +- hw/syn/xilinx/{scripts => }/gen_xo.tcl | 6 +- hw/syn/xilinx/{scripts => }/kill_build.sh | 0 hw/syn/xilinx/{scripts => }/kill_hwserver.sh | 0 hw/syn/xilinx/{scripts => }/kill_sim.sh | 0 .../xilinx/{scripts => }/package_kernel.tcl | 39 +- hw/syn/xilinx/sandbox/Makefile | 22 +- hw/syn/xilinx/sandbox/project.tcl.in | 739 +++++++++--------- hw/syn/xilinx/xrt/Makefile | 5 +- sim/simx/cache_sim.cpp | 1 + 21 files changed, 719 insertions(+), 422 deletions(-) rename hw/scripts/{ip_gen.sh => altera_ip_gen.sh} (100%) create mode 100644 hw/scripts/ila_insert.tcl rename hw/{syn/xilinx/scripts/gen_ip.tcl => scripts/xilinx_ip_gen.tcl} (100%) rename hw/syn/xilinx/{scripts => }/gen_xo.tcl (94%) rename hw/syn/xilinx/{scripts => }/kill_build.sh (100%) rename hw/syn/xilinx/{scripts => }/kill_hwserver.sh (100%) rename hw/syn/xilinx/{scripts => }/kill_sim.sh (100%) rename hw/syn/xilinx/{scripts => }/package_kernel.tcl (86%) diff --git a/configure b/configure index de04b648b7..bbeda59c92 100755 --- a/configure +++ b/configure @@ -65,7 +65,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index f2d0f6a360..7f6805c509 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -56,8 +56,25 @@ `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x +`define __SCOPE (* mark_debug="true" *) + +`define __SCOPE_X + +`define __SCOPE_ON \ + `undef __SCOPE_X \ + `define __SCOPE_X `__SCOPE + +`define __SCOPE_OFF \ + `undef __SCOPE_X \ + `define __SCOPE_X + `else // not SYNTHESIS +`define __SCOPE +`define __SCOPE_X +`define __SCOPE_ON +`define __SCOPE_OFF + `ifdef VERILATOR `ifndef TRACING_ALL diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index c92d94c7cd..9872ae3c14 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -21,8 +21,8 @@ module VX_afu_wrap #( parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH ) ( // System signals - input wire ap_clk, - input wire ap_rst_n, + input wire clk, + input wire reset, // AXI4 master interface `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), @@ -82,8 +82,6 @@ module VX_afu_wrap #( // convert memory interface to array `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); - wire reset = ~ap_rst_n; - reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; reg vx_busy_wait; @@ -122,7 +120,7 @@ module VX_afu_wrap #( end end - always @(posedge ap_clk) begin + always @(posedge clk) begin if (reset || ap_reset) begin state <= STATE_IDLE; vx_pending_writes <= '0; @@ -187,7 +185,7 @@ module VX_afu_wrap #( .AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_ctrl ( - .clk (ap_clk), + .clk (clk), .reset (reset), .s_axi_awvalid (s_axi_ctrl_awvalid), @@ -245,7 +243,7 @@ module VX_afu_wrap #( ) vortex_axi ( `SCOPE_IO_BIND (1) - .clk (ap_clk), + .clk (clk), .reset (vx_reset), .m_axi_awvalid (m_axi_mem_awvalid_a), @@ -301,9 +299,32 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// +`ifdef CHIPSCOPE + ila_afu ila_afu_inst ( + .clk (clk), + .probe0 ({ + ap_reset, + ap_start, + ap_done, + ap_idle, + interrupt + }), + .probe1 ({ + vx_pending_writes, + vx_busy_wait, + vx_busy, + vx_reset, + dcr_wr_valid, + dcr_wr_addr, + dcr_wr_data + }) + ); +`endif + `ifdef DBG_SCOPE_AFU `define TRIGGERS { \ reset, \ + ap_reset, \ ap_start, \ ap_done, \ ap_idle, \ @@ -343,7 +364,7 @@ module VX_afu_wrap #( initial begin $assertoff(0, vortex_axi); end - always @(posedge ap_clk) begin + always @(posedge clk) begin if (reset) begin assert_delay_ctr <= '0; assert_enabled <= 0; @@ -362,7 +383,7 @@ module VX_afu_wrap #( `endif `ifdef DBG_TRACE_AFU - always @(posedge ap_clk) begin + always @(posedge clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin `TRACE(2, ("%t: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 2c31900cb8..1973ec0aa4 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,12 +18,12 @@ module vortex_afu #( parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH ) ( // System signals input wire ap_clk, input wire ap_rst_n, - + // AXI4 master interface `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), @@ -45,8 +45,8 @@ module vortex_afu #( output wire s_axi_ctrl_bvalid, input wire s_axi_ctrl_bready, output wire [1:0] s_axi_ctrl_bresp, - - output wire interrupt + + output wire interrupt ); VX_afu_wrap #( @@ -56,14 +56,14 @@ module vortex_afu #( .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH) ) afu_wrap ( - .ap_clk (ap_clk), - .ap_rst_n (ap_rst_n), + .clk (ap_clk), + .reset (~ap_rst_n), `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - + .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awready (s_axi_ctrl_awready), - .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), + .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), .s_axi_ctrl_wvalid (s_axi_ctrl_wvalid), .s_axi_ctrl_wready (s_axi_ctrl_wready), .s_axi_ctrl_wdata (s_axi_ctrl_wdata), @@ -81,5 +81,5 @@ module vortex_afu #( .interrupt (interrupt) ); - + endmodule diff --git a/hw/scripts/ip_gen.sh b/hw/scripts/altera_ip_gen.sh similarity index 100% rename from hw/scripts/ip_gen.sh rename to hw/scripts/altera_ip_gen.sh diff --git a/hw/scripts/ila_insert.tcl b/hw/scripts/ila_insert.tcl new file mode 100644 index 0000000000..de9f0eec0e --- /dev/null +++ b/hw/scripts/ila_insert.tcl @@ -0,0 +1,231 @@ +###################################################################### +# Automatically inserts ILA instances in a batch flow, and calls "implement_debug_core". Can also be used in a GUI flow +# This should ONLY be invoked after synthesis, and before opt_design. If opt_design is called first, marked nets may be missing and not found +# Warning: Currently will skip a net if it has no obvious clock domain on the driver. Nets connected to input buffers will be dropped unless "mark_debug_clock" is attached to the net. +# Nets attached to VIO cores have the "mark_debug" attribute, and will be filtered out unless the "mark_debug_valid" attribute is attached. +# Supports the following additional attributes beyond "mark_debug" +# attribute mark_debug_valid of X : signal is "true"; -- Marks a net for ILA capture, even if net is also attached to a VIO core +# attribute mark_debug_clock of X : signal is "inst1_bufg/clock"; -- Specifies clock net to use for capturing this net. May create a new ILA core for that clock domain +# attribute mark_debug_depth of X : signal is "4096"; -- overrides default depth for this ILA core. valid values: 1024, 2048, ... 132072. Last attribute that is scanned will win. +# attribute mark_debug_adv_trigger of X : signal is "true"; -- specifies that advanced trigger capability will be added to ILA core +# Engineer: J. McCluskey +proc insert_ila { depth } { + # sequence through debug nets and organize them by clock in the + # clock_list array. Also create max and min array for bus indices + set dbgs [get_nets -hierarchical -filter {MARK_DEBUG}] + if {[llength $dbgs] == 0} { + puts "No debug net found. No ILA cores created" + return + } + + # process list of nets to find and reject nets that are attached to VIO cores. + # This has a side effect that VIO nets can't be monitored with an ILA + # This can be overridden by using the attribute "mark_debug_valid" = "true" on a net like this. + set net_list {} + foreach net $dbgs { + if { [get_property -quiet MARK_DEBUG_VALID $net] != "true" } { + set pin_list [get_pins -of_objects [get_nets -segments $net]] + set not_vio_net 1 + foreach pin $pin_list { + if { [get_property IS_DEBUG_CORE [get_cells -of_object $pin]] == 1 } { + # It seems this net is attached to a debug core (i.e. VIO core) already, so we should skip adding it to the netlist + set not_vio_net 0 + break + } + } + if { $not_vio_net == 1 } { lappend net_list $net; } + } else { + lappend net_list $net + } + } + + # check again to see if we have any nets left now + if {[llength $net_list] == 0} { + puts "All nets with MARK_DEBUG are already connected to VIO cores. No ILA cores created" + return + } + + # Now that the netlist has been filtered, determine bus names and clock domains + foreach d $net_list { + # name is root name of a bus, index is the bit index in the bus + set name [regsub {\[[[:digit:]]+\]$} $d {}] + set index [regsub {^.*\[([[:digit:]]+)\]$} $d {\1}] + if {[string is integer -strict $index]} { + if {![info exists max($name)]} { + set max($name) $index + set min($name) $index + } elseif {$index > $max($name)} { + set max($name) $index + } elseif {$index < $min($name)} { + set min($name) $index + } + } else { + set max($name) -1 + } + # Now we search for the local clock net associated with the target net. + # There may be ambiguities or no answer in some cases + if {![info exists clocks($name)]} { + # does MARK_DEBUG_CLOCK decorate this net? If not, then search backwards to the driver cell + set clk_name [get_property -quiet MARK_DEBUG_CLOCK $d] + if { [llength $clk_name] == 0 } { + # trace to the clock net, tracing backwards via the driver pin. + set driver_pin [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects [ get_nets -segments $d ]] + set driver_cell [get_cells -of_objects $driver_pin] + if { [get_property IS_SEQUENTIAL $driver_cell] == 1 } { + set timing_arc [get_timing_arcs -to $driver_pin] + set cell_clock_pin [get_pins -filter {IS_CLOCK} [get_property FROM_PIN $timing_arc]] + if { [llength $cell_clock_pin] > 1 } { + puts "Error: in insert_ila. Found more than 1 clock pin in driver cell $driver_cell with timing arc $timing_arc for net $d" + continue + } + } else { + # our driver cell is a LUT or LUTMEM in combinatorial mode, we need to trace further. + set paths [get_timing_paths -quiet -through $driver_pin ] + if { [llength $paths] > 0 } { + # note that here we arbitrarily select the start point of the FIRST timing path... there might be multiple clocks with timing paths for this net. + # use MARK_DEBUG_CLOCK to specify another clock in this case. + set cell_clock_pin [get_pins [get_property STARTPOINT_PIN [lindex $paths 0]]] + } else { + # Can't find any timing path, so skip the net, and warn the user. + puts "Critical Warning: from insert_ila.tcl Can't trace any clock domain on driver of net $d" + puts "Please attach the attribute MARK_DEBUG_CLOCK with a string containing the net name of the desired sampling clock, .i.e." + puts "attribute mark_debug_clock of $d : signal is \"inst_bufg/clk\";" + continue + } + } + # clk_net will usually be a list of net segments, which needs filtering to determine the net connected to the driver pin + set clk_net [get_nets -segments -of_objects $cell_clock_pin] + } else { + set clk_net [get_nets -segments $clk_name] + if { [llength $clk_net] == 0 } { puts "MARK_DEBUG_CLOCK attribute on net $d does not match any known net. Please fix."; continue; } + } + # trace forward to net actually connected to clock buffer output, not any of the lower level segment names + set clocks($name) [get_nets -of_objects [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects $clk_net]] + if { [llength $clocks($name)] == 0 } { + puts "Critical Warning: from insert_ila.tcl Can't trace any clock domain on driver of net $d" + puts "Please attach the attribute MARK_DEBUG_CLOCK with a string containing the net name of the desired sampling clock, .i.e." + puts "attribute mark_debug_clock of $d : signal is \"inst_bufg/clk\";" + continue + } + if {![info exists clock_list($clocks($name))]} { + # found a new clock + puts "New clock found is $clocks($name)" + set clock_list($clocks($name)) [list $name] + set ila_depth($clocks($name)) $depth + set ila_adv_trigger($clocks($name)) false + } else { + lappend clock_list($clocks($name)) $name + } + # Does this net have a "MARK_DEBUG_DEPTH" attribute attached? + set clk_depth [get_property -quiet MARK_DEBUG_DEPTH $d] + if { [llength $clk_depth] != 0 } { + set ila_depth($clocks($name)) $clk_depth + } + # Does this net have a "MARK_DEBUG_ADV_TRIGGER" attribute attached? + set trigger [get_property -quiet MARK_DEBUG_ADV_TRIGGER $d] + if { $trigger == "true" } { + set ila_adv_trigger($clocks($name)) true + } + } + } + + set ila_count 0 + set trig_out "" + set trig_out_ack "" + + if { [llength [array names clock_list]] > 1 } { + set enable_trigger true + } else { + set enable_trigger false + } + + foreach c [array names clock_list] { + # Now build and connect an ILA core for each clock domain + [incr ila_count ] + set ila_inst "ila_$ila_count" + # first verify if depth is a member of the set, 1024, 2048, 4096, 8192, ... 131072 + if { $ila_depth($c) < 1024 || [expr $ila_depth($c) & ($ila_depth($c) - 1)] || $ila_depth($c) > 131072 } { + # Depth is not right... lets fix it, and continue + if { $ila_depth($c) < 1024 } { + set new_depth 1024 + } elseif { $ila_depth($c) > 131072 } { + set new_depth 131072 + } else { + # round value to next highest power of 2, (in log space) + set new_depth [expr 1 << int( log($ila_depth($c))/log(2) + .9999 )] + } + puts "Can't create ILA core $ila_inst with depth of $ila_depth($c)! Changed capture depth to $new_depth" + set ila_depth($c) $new_depth + } + # create ILA and connect its clock + puts "Creating ILA $ila_inst with clock $c, capture depth $ila_depth($c) and advanced trigger = $ila_adv_trigger($c)" + create_debug_core $ila_inst ila + if { $ila_adv_trigger($c) } { set mu_cnt 4; } else { set mu_cnt 2; } + set_property C_DATA_DEPTH $ila_depth($c) [get_debug_cores $ila_inst] + set_property C_TRIGIN_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_TRIGOUT_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_ADV_TRIGGER $ila_adv_trigger($c) [get_debug_cores $ila_inst] + set_property C_INPUT_PIPE_STAGES 1 [get_debug_cores $ila_inst] + set_property C_EN_STRG_QUAL true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU_CNT $mu_cnt [get_debug_cores $ila_inst] + set_property port_width 1 [get_debug_ports $ila_inst/clk] + connect_debug_port $ila_inst/clk $c + # hookup trigger ports in a circle if more than one ILA is created + if { $enable_trigger == true } { + create_debug_port $ila_inst trig_in + create_debug_port $ila_inst trig_in_ack + create_debug_port $ila_inst trig_out + create_debug_port $ila_inst trig_out_ack + if { $trig_out != "" } { + connect_debug_port $ila_inst/trig_in [get_nets $trig_out] + } + if { $trig_out_ack != "" } { + connect_debug_port $ila_inst/trig_in_ack [get_nets $trig_out_ack] + } + set trig_out ${ila_inst}_trig_out_$ila_count + create_net $trig_out + connect_debug_port $ila_inst/trig_out [get_nets $trig_out] + set trig_out_ack ${ila_inst}_trig_out_ack_$ila_count + create_net $trig_out_ack + connect_debug_port $ila_inst/trig_out_ack [get_nets $trig_out_ack] + } + # add probes + set nprobes 0 + foreach n [lsort $clock_list($c)] { + set nets {} + if {$max($n) < 0} { + lappend nets [get_nets $n] + } else { + # n is a bus name + for {set i $min($n)} {$i <= $max($n)} {incr i} { + lappend nets [get_nets $n[$i]] + } + } + set prb probe$nprobes + if {$nprobes > 0} { + create_debug_port $ila_inst probe + } + set_property port_width [llength $nets] [get_debug_ports $ila_inst/$prb] + connect_debug_port $ila_inst/$prb $nets + incr nprobes + } + } + + # at this point, we need to complete the circular connection of trigger outputs and acks + if { $enable_trigger == true } { + connect_debug_port ila_1/trig_in [get_nets $trig_out] + connect_debug_port ila_1/trig_in_ack [get_nets $trig_out_ack] + } + set project_found [get_projects -quiet] + if { $project_found != "New Project" } { + puts "Saving constraints now in project [current_project -quiet]" + save_constraints_as debug_constraints.xdc + } + + # run ILA cores implementation + implement_debug_core + + # write out probe info file + write_debug_probes -force debug_nets.ltx +} \ No newline at end of file diff --git a/hw/syn/xilinx/scripts/gen_ip.tcl b/hw/scripts/xilinx_ip_gen.tcl similarity index 100% rename from hw/syn/xilinx/scripts/gen_ip.tcl rename to hw/scripts/xilinx_ip_gen.tcl diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index 5f1dd62fe5..e5655c5fda 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -13,7 +13,7 @@ IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: - $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) + $(SCRIPT_DIR)/altera_ip_gen.sh $(IP_CACHE_DIR) unittest: mkdir -p unittest/$(BUILD_DIR) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 53b1210d83..a3d373cb09 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -102,7 +102,7 @@ all: swconfig ip-gen setup build ip-gen: $(IP_CACHE_DIR)/ip-gen.log $(IP_CACHE_DIR)/ip-gen.log: - $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) + $(SCRIPT_DIR)/altera_ip_gen.sh $(IP_CACHE_DIR) swconfig: vortex_afu.h vortex_afu.h: $(SRC_DIR)/vortex_afu.json diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index 563c4c17e0..17d398dfa2 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -5,7 +5,7 @@ platforminfo -l xbutil validate --device 0000:09:00.1 --verbose # generate FPU IPs -vivado -mode batch -source scripts/gen_ip.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 +vivado -mode batch -source xilinx_ip_gen.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 # build FPGA PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_emu_base_1c.log 2>&1 & diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index b2a8e71c75..933621bef6 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -31,9 +31,9 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt ifdef FPU_IP - MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) else - MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) endif clean: diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index e23ce2997a..dcaf883fa3 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -14,9 +14,9 @@ # Start time set start_time [clock seconds] -if { $::argc != 6 } { +if { $::argc != 5 } { puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" + puts "Usage: $::argv0 \n" exit } @@ -28,14 +28,12 @@ set device_part [lindex $::argv 1] set vcs_file [lindex $::argv 2] set xdc_file [lindex $::argv 3] set tool_dir [lindex $::argv 4] -set script_dir [lindex $::argv 5] puts "Using top_module=$top_module" puts "Using device_part=$device_part" puts "Using vcs_file=$vcs_file" puts "Using xdc_file=$xdc_file" puts "Using tool_dir=$tool_dir" -puts "Using script_dir=$script_dir" # Set the number of jobs based on MAX_JOBS environment variable if {[info exists ::env(MAX_JOBS)]} { @@ -50,7 +48,7 @@ if {[info exists ::env(FPU_IP)]} { set ip_dir $::env(FPU_IP) set argv [list $ip_dir $device_part] set argc 2 - source ${script_dir}/gen_ip.tcl + source ${tool_dir}/xilinx_ip_gen.tcl } source "${tool_dir}/parse_vcs_list.tcl" diff --git a/hw/syn/xilinx/scripts/gen_xo.tcl b/hw/syn/xilinx/gen_xo.tcl similarity index 94% rename from hw/syn/xilinx/scripts/gen_xo.tcl rename to hw/syn/xilinx/gen_xo.tcl index 0f95f09be0..7d3342a4cf 100644 --- a/hw/syn/xilinx/scripts/gen_xo.tcl +++ b/hw/syn/xilinx/gen_xo.tcl @@ -1,10 +1,10 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -12,7 +12,7 @@ # limitations under the License. if { $::argc != 5 } { - puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n" + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" puts "Usage: $::argv0 \n" exit } diff --git a/hw/syn/xilinx/scripts/kill_build.sh b/hw/syn/xilinx/kill_build.sh similarity index 100% rename from hw/syn/xilinx/scripts/kill_build.sh rename to hw/syn/xilinx/kill_build.sh diff --git a/hw/syn/xilinx/scripts/kill_hwserver.sh b/hw/syn/xilinx/kill_hwserver.sh similarity index 100% rename from hw/syn/xilinx/scripts/kill_hwserver.sh rename to hw/syn/xilinx/kill_hwserver.sh diff --git a/hw/syn/xilinx/scripts/kill_sim.sh b/hw/syn/xilinx/kill_sim.sh similarity index 100% rename from hw/syn/xilinx/scripts/kill_sim.sh rename to hw/syn/xilinx/kill_sim.sh diff --git a/hw/syn/xilinx/scripts/package_kernel.tcl b/hw/syn/xilinx/package_kernel.tcl similarity index 86% rename from hw/syn/xilinx/scripts/package_kernel.tcl rename to hw/syn/xilinx/package_kernel.tcl index ed8a683acc..2c314754d0 100644 --- a/hw/syn/xilinx/scripts/package_kernel.tcl +++ b/hw/syn/xilinx/package_kernel.tcl @@ -71,65 +71,60 @@ set_property -verbose -name "top" -value ${krnl_name} -objects $obj if { $chipscope == 1 } { # hw debugging - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_afu + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_afu set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {2} \ CONFIG.C_PROBE0_WIDTH {8} \ - CONFIG.C_PROBE1_WIDTH {24} \ + CONFIG.C_PROBE1_WIDTH {64} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_afu] generate_target {instantiation_template} [get_files ila_afu.xci] set_property generate_synth_checkpoint false [get_files ila_afu.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_fetch + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_fetch set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {3} \ CONFIG.C_PROBE0_WIDTH {128} \ CONFIG.C_PROBE1_WIDTH {128} \ CONFIG.C_PROBE2_WIDTH {128} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_fetch] generate_target {instantiation_template} [get_files ila_fetch.xci] set_property generate_synth_checkpoint false [get_files ila_fetch.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_issue + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_issue set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {2} \ CONFIG.C_PROBE0_WIDTH {256} \ CONFIG.C_PROBE1_WIDTH {128} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_issue] generate_target {instantiation_template} [get_files ila_issue.xci] set_property generate_synth_checkpoint false [get_files ila_issue.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_lsu + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_lsu set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {4} \ CONFIG.C_PROBE0_WIDTH {256} \ CONFIG.C_PROBE1_WIDTH {128} \ CONFIG.C_PROBE2_WIDTH {288} \ CONFIG.C_PROBE3_WIDTH {256} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_lsu] generate_target {instantiation_template} [get_files ila_lsu.xci] set_property generate_synth_checkpoint false [get_files ila_lsu.xci] - - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_msched - set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ - CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ - CONFIG.C_NUM_OF_PROBES {4} \ - CONFIG.C_PROBE0_WIDTH {128} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {128} \ - CONFIG.C_PROBE3_WIDTH {128} \ - ] [get_ips ila_msched] - generate_target {instantiation_template} [get_files ila_msched.xci] - set_property generate_synth_checkpoint false [get_files ila_msched.xci] } update_compile_order -fileset sources_1 diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index d1ebf9afaa..e4def9c4e8 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -19,16 +19,16 @@ KERNEL ?= fibonacci NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) -COE_FILE := $(shell realpath kernel.bin.coe) -ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') - # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif +TEX_INCLUDE = -I$(RTL_DIR)/tex +RASTER_INCLUDE = -I$(RTL_DIR)/raster +OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -RTL_INCLUDE += $(FPU_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) RTL_INCLUDE += -I$(SRC_DIR) # compilation flags @@ -43,9 +43,6 @@ CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build -project2.tcl: project.tcl - @sed -e "s/@COE_FILE@/$(ESCAPED_COE_FILE)/g" $< > $@ - $(KERNEL).bin: $(MAKE) -C $(ROOT_DIR)/kernel clean STACK_BASE_ADDR=0xFF000 $(MAKE) -C $(ROOT_DIR)/kernel @@ -61,13 +58,14 @@ project_1/sources.txt: mkdir -p project_1 $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt -build: project_1/project_1.xpr -project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project2.tcl - MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) +build: done.dcp +done.dcp: project_1/sources.txt kernel.bin.coe project.tcl + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source project.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) + echo done > done.dcp run: project_1/project_1.xpr $(VIVADO) project_1/project_1.xpr & clean: - rm -rf project_1 project2.tcl $(KERNEL).bin kernel.bin.coe - rm -rf .Xil *.log *.jou + rm -rf project_1 project1.tcl $(KERNEL).bin kernel.bin.coe + rm -rf .Xil *.log *.jou *.dcp *.rpt diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index d4fa45581f..8926b43ad0 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -11,9 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Start time -set start_time [clock seconds] - if { $::argc != 3 } { puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" puts "Usage: $::argv0 \n" @@ -24,7 +21,7 @@ set device_part [lindex $::argv 0] set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] -uuts "Using device_part=$device_part" +puts "Using device_part=$device_part" puts "Using vcs_file=$vcs_file" puts "Using tool_dir=$tool_dir" @@ -37,398 +34,436 @@ if {[info exists ::env(MAX_JOBS)]} { set num_jobs 0 } -set origin_dir [file normalize "."] +proc run_setup {} { + global device_part vcs_file tool_dir -# Use origin directory path location variable, if specified in the tcl shell -if { [info exists ::origin_dir_loc] } { - set origin_dir $::origin_dir_loc -} + # Set the project name + set project_name "project_1" -# Set the project name -set project_name "project_1" + # Use project name variable, if specified in the tcl shell + if { [info exists ::user_project_name] } { + set project_name $::user_project_name + } -# Use project name variable, if specified in the tcl shell -if { [info exists ::user_project_name] } { - set project_name $::user_project_name -} + source "${tool_dir}/parse_vcs_list.tcl" + set vlist [parse_vcs_list "${vcs_file}"] -source "${tool_dir}/parse_vcs_list.tcl" -set vlist [parse_vcs_list "${vcs_file}"] + set vsources_list [lindex $vlist 0] + set vincludes_list [lindex $vlist 1] + set vdefines_list [lindex $vlist 2] -set vsources_list [lindex $vlist 0] -set vincludes_list [lindex $vlist 1] -set vdefines_list [lindex $vlist 2] + #puts ${vsources_list} + #puts ${vincludes_list} + #puts ${vdefines_list} -#puts ${vsources_list} -#puts ${vincludes_list} -#puts ${vdefines_list} + # Create project + create_project $project_name $project_name -force -part $device_part -# Create project -create_project $project_name $project_name -force -part $device_part + # Set the directory path for the new project + set proj_dir [get_property directory [current_project]] -# Set the directory path for the new project -set proj_dir [get_property directory [current_project]] + # Create 'sources_1' fileset (if not found) + if {[string equal [get_filesets -quiet sources_1] ""]} { + create_fileset -srcset sources_1 + } -# Create 'sources_1' fileset (if not found) -if {[string equal [get_filesets -quiet sources_1] ""]} { - create_fileset -srcset sources_1 -} + # add source files + set obj [get_filesets sources_1] + add_files -norecurse -verbose -fileset $obj ${vsources_list} -# add source files -set obj [get_filesets sources_1] -add_files -norecurse -verbose -fileset $obj ${vsources_list} + # process defines + set obj [get_filesets sources_1] + foreach def $vdefines_list { + set_property -name "verilog_define" -value $def -objects $obj + } -# process defines -set obj [get_filesets sources_1] -foreach def $vdefines_list { - set_property -name "verilog_define" -value $def -objects $obj -} + # Set 'sources_1' fileset properties + set obj [get_filesets sources_1] + set_property -name "name" -value "sources_1" -objects $obj + set_property -name "top" -value "design_1_wrapper" -objects $obj -# Set 'sources_1' fileset properties -set obj [get_filesets sources_1] -set_property -name "name" -value "sources_1" -objects $obj -set_property -name "top" -value "design_1_wrapper" -objects $obj + # Create 'constrs_1' fileset (if not found) + if {[string equal [get_filesets -quiet constrs_1] ""]} { + create_fileset -constrset constrs_1 + } -# Create 'constrs_1' fileset (if not found) -if {[string equal [get_filesets -quiet constrs_1] ""]} { - create_fileset -constrset constrs_1 -} + # Set 'constrs_1' fileset object + set obj [get_filesets constrs_1] -# Set 'constrs_1' fileset object -set obj [get_filesets constrs_1] + # Empty (no sources present) -# Empty (no sources present) + # Set 'constrs_1' fileset properties + set obj [get_filesets constrs_1] + set_property -name "constrs_type" -value "XDC" -objects $obj + set_property -name "name" -value "constrs_1" -objects $obj + set_property -name "target_constrs_file" -value "" -objects $obj -# Set 'constrs_1' fileset properties -set obj [get_filesets constrs_1] -set_property -name "constrs_type" -value "XDC" -objects $obj -set_property -name "name" -value "constrs_1" -objects $obj -set_property -name "target_constrs_file" -value "" -objects $obj + # Create 'sim_1' fileset (if not found) + if {[string equal [get_filesets -quiet sim_1] ""]} { + create_fileset -simset sim_1 + } -# Create 'sim_1' fileset (if not found) -if {[string equal [get_filesets -quiet sim_1] ""]} { - create_fileset -simset sim_1 -} + set testbench_file "" + foreach file ${vsources_list} { + if {[string match "*testbench.v" $file]} { + set testbench_file [file normalize $file] + break + } + } -# Set 'sim_1' fileset object -set obj [get_filesets sim_1] -# Import local files from the original project -set files [list \ - [file normalize "testbench.v" ]\ -] -set imported_files [import_files -fileset sim_1 $files] + # Set 'sim_1' fileset object + set obj [get_filesets sim_1] + # Import local files from the original project + set files [list $testbench_file] + set imported_files [import_files -fileset sim_1 $files] -# Set 'sim_1' fileset file properties for remote files -# None + # Set 'sim_1' fileset file properties for remote files + # None -# Set 'sim_1' fileset file properties for local files + # Set 'sim_1' fileset file properties for local files set file "testbench.v" set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] -set_property -name "file_type" -value "Verilog" -objects $file_obj -set_property -name "is_enabled" -value "1" -objects $file_obj -set_property -name "is_global_include" -value "0" -objects $file_obj -set_property -name "library" -value "xil_defaultlib" -objects $file_obj -set_property -name "path_mode" -value "RelativeFirst" -objects $file_obj -set_property -name "used_in" -value "synthesis implementation simulation" -objects $file_obj -set_property -name "used_in_implementation" -value "1" -objects $file_obj -set_property -name "used_in_simulation" -value "1" -objects $file_obj -set_property -name "used_in_synthesis" -value "1" -objects $file_obj - -# Set 'sim_1' fileset properties -set obj [get_filesets sim_1] -set_property -name "32bit" -value "0" -objects $obj -set_property -name "force_compile_glbl" -value "0" -objects $obj -set_property -name "generate_scripts_only" -value "0" -objects $obj -set_property -name "generic" -value "" -objects $obj -set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $obj -set_property -name "include_dirs" -value "" -objects $obj -set_property -name "incremental" -value "1" -objects $obj -set_property -name "name" -value "sim_1" -objects $obj -set_property -name "source_set" -value "sources_1" -objects $obj -set_property -name "systemc_include_dirs" -value "" -objects $obj -set_property -name "top" -value "testbench" -objects $obj -set_property -name "top_auto_set" -value "0" -objects $obj -set_property -name "top_lib" -value "xil_defaultlib" -objects $obj -set_property -name "verilog_define" -value "" -objects $obj -set_property -name "verilog_uppercase" -value "0" -objects $obj - -# Set 'utils_1' fileset object -set obj [get_filesets utils_1] -# Empty (no sources present) - -# Set 'utils_1' fileset properties -set obj [get_filesets utils_1] -set_property -name "name" -value "utils_1" -objects $obj - -# Proc to create BD design_1 -proc cr_bd_design_1 { parentCell } { -# The design that will be created by this Tcl proc contains the following -# module references: -# Vortex_top - -# CHANGE DESIGN NAME HERE -set design_name design_1 - -common::send_gid_msg -ssname BD::TCL -id 2010 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." - -create_bd_design $design_name - -set bCheckIPsPassed 1 -################################################################## -# CHECK IPs -################################################################## -set bCheckIPs 1 -if { $bCheckIPs == 1 } { - set list_check_ips "\ - xilinx.com:ip:axi_bram_ctrl:4.1\ - xilinx.com:ip:blk_mem_gen:8.4\ - " - - set list_ips_missing "" - common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + set_property -name "file_type" -value "Verilog" -objects $file_obj + set_property -name "is_enabled" -value "1" -objects $file_obj + set_property -name "is_global_include" -value "0" -objects $file_obj + set_property -name "library" -value "xil_defaultlib" -objects $file_obj + set_property -name "path_mode" -value "RelativeFirst" -objects $file_obj + set_property -name "used_in" -value "synthesis implementation simulation" -objects $file_obj + set_property -name "used_in_implementation" -value "1" -objects $file_obj + set_property -name "used_in_simulation" -value "1" -objects $file_obj + set_property -name "used_in_synthesis" -value "1" -objects $file_obj + + # Set 'sim_1' fileset properties + set obj [get_filesets sim_1] + set_property -name "32bit" -value "0" -objects $obj + set_property -name "force_compile_glbl" -value "0" -objects $obj + set_property -name "generate_scripts_only" -value "0" -objects $obj + set_property -name "generic" -value "" -objects $obj + set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $obj + set_property -name "include_dirs" -value "" -objects $obj + set_property -name "incremental" -value "1" -objects $obj + set_property -name "name" -value "sim_1" -objects $obj + set_property -name "source_set" -value "sources_1" -objects $obj + set_property -name "systemc_include_dirs" -value "" -objects $obj + set_property -name "top" -value "testbench" -objects $obj + set_property -name "top_auto_set" -value "0" -objects $obj + set_property -name "top_lib" -value "xil_defaultlib" -objects $obj + set_property -name "verilog_define" -value "" -objects $obj + set_property -name "verilog_uppercase" -value "0" -objects $obj + + # Set 'utils_1' fileset object + set obj [get_filesets utils_1] + # Empty (no sources present) + + # Set 'utils_1' fileset properties + set obj [get_filesets utils_1] + set_property -name "name" -value "utils_1" -objects $obj + + # Proc to create BD design_1 + proc cr_bd_design_1 { parentCell } { + # The design that will be created by this Tcl proc contains the following + # module references: + # Vortex_top + + # CHANGE DESIGN NAME HERE + set design_name design_1 + + common::send_gid_msg -ssname BD::TCL -id 2010 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + set bCheckIPsPassed 1 + ################################################################## + # CHECK IPs + ################################################################## + set bCheckIPs 1 + if { $bCheckIPs == 1 } { + set list_check_ips "\ + xilinx.com:ip:axi_bram_ctrl:4.1\ + xilinx.com:ip:blk_mem_gen:8.4\ + " + + set list_ips_missing "" + common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + + } + + ################################################################## + # CHECK Modules + ################################################################## + set bCheckModules 1 + if { $bCheckModules == 1 } { + set list_check_mods "\ + Vortex_top\ + " + + set list_mods_missing "" + common::send_gid_msg -ssname BD::TCL -id 2020 -severity "INFO" "Checking if the following modules exist in the project's sources: $list_check_mods ." + + foreach mod_vlnv $list_check_mods { + if { [can_resolve_reference $mod_vlnv] == 0 } { + lappend list_mods_missing $mod_vlnv + } + } + + if { $list_mods_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2021 -severity "ERROR" "The following module(s) are not found in the project: $list_mods_missing" } + common::send_gid_msg -ssname BD::TCL -id 2022 -severity "INFO" "Please add source files for the missing module(s) above." + set bCheckIPsPassed 0 + } + } - foreach ip_vlnv $list_check_ips { - set ip_obj [get_ipdefs -all $ip_vlnv] - if { $ip_obj eq "" } { - lappend list_ips_missing $ip_vlnv - } - } + if { $bCheckIPsPassed != 1 } { + common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 + } - if { $list_ips_missing ne "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } - set bCheckIPsPassed 0 - } + variable script_folder + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] } - ################################################################## - # CHECK Modules - ################################################################## - set bCheckModules 1 - if { $bCheckModules == 1 } { - set list_check_mods "\ - Vortex_top\ - " + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } - set list_mods_missing "" - common::send_gid_msg -ssname BD::TCL -id 2020 -severity "INFO" "Checking if the following modules exist in the project's sources: $list_check_mods ." + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } - foreach mod_vlnv $list_check_mods { - if { [can_resolve_reference $mod_vlnv] == 0 } { - lappend list_mods_missing $mod_vlnv - } - } + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + + # Create interface ports + + # Create ports + set clk_100MHz [ create_bd_port -dir I -type clk -freq_hz 100000000 clk_100MHz ] + set resetn [ create_bd_port -dir I -type rst resetn ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_LOW} \ + ] $resetn + set vx_busy [ create_bd_port -dir O vx_busy ] + set vx_reset [ create_bd_port -dir I -type rst vx_reset ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_HIGH} \ + ] $vx_reset + + set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] + set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] + set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] + + # Create instance: Vortex_top_0, and set properties + set block_name Vortex_top + set block_cell_name Vortex_top_0 + if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_name $block_cell_name] } errmsg] } { + catch {common::send_gid_msg -ssname BD::TCL -id 2095 -severity "ERROR" "Unable to add referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} + return 1 + } elseif { $Vortex_top_0 eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} + return 1 + } + + # Create instance: axi_bram_ctrl_0, and set properties + set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] + set_property -dict [ list \ + CONFIG.DATA_WIDTH {512} \ + CONFIG.ECC_TYPE {0} \ + ] $axi_bram_ctrl_0 + + # Create instance: axi_bram_ctrl_0_bram, and set properties + set axi_bram_ctrl_0_bram [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 axi_bram_ctrl_0_bram ] + + set_property -dict [ list \ + CONFIG.Assume_Synchronous_Clk {true} \ + CONFIG.Byte_Size {8} \ + CONFIG.Load_Init_File {true} \ + CONFIG.Coe_File {@CURRENTDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \ + CONFIG.EN_SAFETY_CKT {true} \ + CONFIG.Enable_32bit_Address {true} \ + CONFIG.Fill_Remaining_Memory_Locations {false} \ + CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ + CONFIG.Operating_Mode_A {NO_CHANGE} \ + CONFIG.Operating_Mode_B {READ_FIRST} \ + CONFIG.Port_B_Write_Rate {0} \ + CONFIG.Read_Width_A {512} \ + CONFIG.Read_Width_B {512} \ + CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ + CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ + CONFIG.Remaining_Memory_Locations {0} \ + CONFIG.Use_Byte_Write_Enable {true} \ + CONFIG.Use_RSTA_Pin {false} \ + CONFIG.Use_RSTB_Pin {true} \ + CONFIG.Write_Width_A {512} \ + CONFIG.Write_Depth_A {16384} \ + CONFIG.use_bram_block {Stand_Alone} \ + ] $axi_bram_ctrl_0_bram + + # Create interface connections + connect_bd_intf_net -intf_net Vortex_top_0_m_axi_mem [get_bd_intf_pins Vortex_top_0/m_axi_mem] [get_bd_intf_pins axi_bram_ctrl_0/S_AXI] + connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTA [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTA] + connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTB [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTB] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTB] + + # Create port connections + connect_bd_net -net Vortex_top_0_busy [get_bd_ports vx_busy] [get_bd_pins Vortex_top_0/busy] + connect_bd_net -net clk_wiz_clk_out1 [get_bd_ports clk_100MHz] [get_bd_pins Vortex_top_0/clk] [get_bd_pins axi_bram_ctrl_0/s_axi_aclk] + connect_bd_net -net resetn_1 [get_bd_ports resetn] [get_bd_pins axi_bram_ctrl_0/s_axi_aresetn] + connect_bd_net -net vx_reset_1 [get_bd_ports vx_reset] [get_bd_pins Vortex_top_0/reset] + connect_bd_net -net dcr_wr_valid_1 [get_bd_ports dcr_wr_valid] [get_bd_pins Vortex_top_0/dcr_wr_valid] + connect_bd_net -net dcr_wr_addr_1 [get_bd_ports dcr_wr_addr] [get_bd_pins Vortex_top_0/dcr_wr_addr] + connect_bd_net -net dcr_wr_data_1 [get_bd_ports dcr_wr_data] [get_bd_pins Vortex_top_0/dcr_wr_data] + + # Create address segments + assign_bd_address -offset 0x00000000 -range 0x00100000 -target_address_space [get_bd_addr_spaces Vortex_top_0/m_axi_mem] [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force + + # Perform GUI Layout + regenerate_bd_layout -layout_string { + "ActiveEmotionalView":"Default View", + "Default View_ScaleFactor":"1.0", + "Default View_TopLeft":"-195,-165", + "ExpandedHierarchyInLayout":"", + "guistr":"# # String gsaved with Nlview 7.0r4 2019-12-20 bk=1.5203 VDI=41 GEI=36 GUI=JA:10.0 TLS + # -string -flagsOSRD + preplace port clk_100MHz -pg 1 -lvl 0 -x 0 -y 40 -defaultsOSRD + preplace port resetn -pg 1 -lvl 0 -x 0 -y 20 -defaultsOSRD + preplace port vx_busy -pg 1 -lvl 4 -x 950 -y 220 -defaultsOSRD + preplace port vx_reset -pg 1 -lvl 0 -x 0 -y 110 -defaultsOSRD + preplace port dcr_wr_valid -pg 1 -lvl 0 -x 0 -y 130 -defaultsOSRD + preplace portBus dcr_wr_addr -pg 1 -lvl 0 -x 0 -y 150 -defaultsOSRD + preplace portBus dcr_wr_data -pg 1 -lvl 0 -x 0 -y 170 -defaultsOSRD + preplace inst Vortex_top_0 -pg 1 -lvl 1 -x 190 -y 130 -defaultsOSRD + preplace inst axi_bram_ctrl_0 -pg 1 -lvl 2 -x 520 -y 140 -defaultsOSRD + preplace inst axi_bram_ctrl_0_bram -pg 1 -lvl 3 -x 800 -y 140 -defaultsOSRD + preplace netloc Vortex_top_0_busy 1 1 3 360J 220 NJ 220 NJ + preplace netloc clk_wiz_clk_out1 1 0 2 20 30 370 + preplace netloc resetn_1 1 0 2 NJ 20 380J + preplace netloc vx_reset_1 1 0 1 NJ 110 + preplace netloc dcr_wr_valid_1 1 0 1 NJ 130 + preplace netloc dcr_wr_addr_1 1 0 1 NJ 150 + preplace netloc dcr_wr_data_1 1 0 1 NJ 170 + preplace netloc axi_bram_ctrl_0_BRAM_PORTB 1 2 1 N 150 + preplace netloc axi_bram_ctrl_0_BRAM_PORTA 1 2 1 N 130 + preplace netloc Vortex_top_0_m_axi_mem 1 1 1 N 120 + levelinfo -pg 1 0 190 520 800 950 + pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 + " + } - if { $list_mods_missing ne "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2021 -severity "ERROR" "The following module(s) are not found in the project: $list_mods_missing" } - common::send_gid_msg -ssname BD::TCL -id 2022 -severity "INFO" "Please add source files for the missing module(s) above." - set bCheckIPsPassed 0 - } -} + # Restore current instance + current_bd_instance $oldCurInst -if { $bCheckIPsPassed != 1 } { - common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." - return 3 + validate_bd_design + save_bd_design + close_bd_design $design_name + } + # End of cr_bd_design_1() + cr_bd_design_1 "" + set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] + set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] + set_property IS_ENABLED "1" [get_files design_1.bd ] + set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] + #set_property IS_LOCKED "0" [get_files design_1.bd ] + set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] + set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] + set_property PFM_NAME "" [get_files design_1.bd ] + set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] + set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] + set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] + set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] + set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] + set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] + + # Call make_wrapper to create wrapper files + set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] + add_files -norecurse -fileset sources_1 $wrapper_path + + update_compile_order -fileset sources_1 } -variable script_folder - -if { $parentCell eq "" } { - set parentCell [get_bd_cells /] -} +proc run_synthesis {} { + global num_jobs + # Synthesis + if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs + } else { + launch_runs synth_1 + } + wait_on_run synth_1 + open_run synth_1 + report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages -# Get object for parentCell -set parentObj [get_bd_cells $parentCell] -if { $parentObj == "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} - return + write_checkpoint -force post_synth.dcp } -# Make sure parentObj is hier blk -set parentType [get_property TYPE $parentObj] -if { $parentType ne "hier" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} - return -} +proc run_implementation {} { + global tool_dir num_jobs + source "${tool_dir}/ila_insert.tcl" + insert_ila 8192 -# Save current instance; Restore later -set oldCurInst [current_bd_instance .] - -# Set parent object as current -current_bd_instance $parentObj - - -# Create interface ports - -# Create ports -set clk_100MHz [ create_bd_port -dir I -type clk -freq_hz 100000000 clk_100MHz ] -set resetn [ create_bd_port -dir I -type rst resetn ] -set_property -dict [ list \ - CONFIG.POLARITY {ACTIVE_LOW} \ -] $resetn -set vx_busy [ create_bd_port -dir O vx_busy ] -set vx_reset [ create_bd_port -dir I -type rst vx_reset ] -set_property -dict [ list \ - CONFIG.POLARITY {ACTIVE_HIGH} \ -] $vx_reset - -set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] -set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] -set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] - -# Create instance: Vortex_top_0, and set properties -set block_name Vortex_top -set block_cell_name Vortex_top_0 -if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_name $block_cell_name] } errmsg] } { - catch {common::send_gid_msg -ssname BD::TCL -id 2095 -severity "ERROR" "Unable to add referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} - return 1 - } elseif { $Vortex_top_0 eq "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} - return 1 + # Implementation + if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs + } else { + launch_runs impl_1 } + wait_on_run impl_1 + open_run impl_1 + report_place_status -file place.rpt + report_route_status -file route.rpt + write_checkpoint -force post_impl.dcp +} -# Create instance: axi_bram_ctrl_0, and set properties -set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] -set_property -dict [ list \ - CONFIG.DATA_WIDTH {512} \ - CONFIG.ECC_TYPE {0} \ -] $axi_bram_ctrl_0 - -# Create instance: axi_bram_ctrl_0_bram, and set properties -set axi_bram_ctrl_0_bram [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 axi_bram_ctrl_0_bram ] - -set_property -dict [ list \ - CONFIG.Assume_Synchronous_Clk {true} \ - CONFIG.Byte_Size {8} \ - CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@COE_FILE@} \ - CONFIG.EN_SAFETY_CKT {true} \ - CONFIG.Enable_32bit_Address {true} \ - CONFIG.Fill_Remaining_Memory_Locations {false} \ - CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ - CONFIG.Operating_Mode_A {NO_CHANGE} \ - CONFIG.Operating_Mode_B {READ_FIRST} \ - CONFIG.Port_B_Write_Rate {0} \ - CONFIG.Read_Width_A {512} \ - CONFIG.Read_Width_B {512} \ - CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ - CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ - CONFIG.Remaining_Memory_Locations {0} \ - CONFIG.Use_Byte_Write_Enable {true} \ - CONFIG.Use_RSTA_Pin {false} \ - CONFIG.Use_RSTB_Pin {true} \ - CONFIG.Write_Width_A {512} \ - CONFIG.Write_Depth_A {16384} \ - CONFIG.use_bram_block {Stand_Alone} \ -] $axi_bram_ctrl_0_bram - -# Create interface connections -connect_bd_intf_net -intf_net Vortex_top_0_m_axi_mem [get_bd_intf_pins Vortex_top_0/m_axi_mem] [get_bd_intf_pins axi_bram_ctrl_0/S_AXI] -connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTA [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTA] -connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTB [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTB] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTB] - -# Create port connections -connect_bd_net -net Vortex_top_0_busy [get_bd_ports vx_busy] [get_bd_pins Vortex_top_0/busy] -connect_bd_net -net clk_wiz_clk_out1 [get_bd_ports clk_100MHz] [get_bd_pins Vortex_top_0/clk] [get_bd_pins axi_bram_ctrl_0/s_axi_aclk] -connect_bd_net -net resetn_1 [get_bd_ports resetn] [get_bd_pins axi_bram_ctrl_0/s_axi_aresetn] -connect_bd_net -net vx_reset_1 [get_bd_ports vx_reset] [get_bd_pins Vortex_top_0/reset] -connect_bd_net -net dcr_wr_valid_1 [get_bd_ports dcr_wr_valid] [get_bd_pins Vortex_top_0/dcr_wr_valid] -connect_bd_net -net dcr_wr_addr_1 [get_bd_ports dcr_wr_addr] [get_bd_pins Vortex_top_0/dcr_wr_addr] -connect_bd_net -net dcr_wr_data_1 [get_bd_ports dcr_wr_data] [get_bd_pins Vortex_top_0/dcr_wr_data] - -# Create address segments -assign_bd_address -offset 0x00000000 -range 0x00100000 -target_address_space [get_bd_addr_spaces Vortex_top_0/m_axi_mem] [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force - -# Perform GUI Layout -regenerate_bd_layout -layout_string { - "ActiveEmotionalView":"Default View", - "Default View_ScaleFactor":"1.0", - "Default View_TopLeft":"-195,-165", - "ExpandedHierarchyInLayout":"", - "guistr":"# # String gsaved with Nlview 7.0r4 2019-12-20 bk=1.5203 VDI=41 GEI=36 GUI=JA:10.0 TLS -# -string -flagsOSRD -preplace port clk_100MHz -pg 1 -lvl 0 -x 0 -y 40 -defaultsOSRD -preplace port resetn -pg 1 -lvl 0 -x 0 -y 20 -defaultsOSRD -preplace port vx_busy -pg 1 -lvl 4 -x 950 -y 220 -defaultsOSRD -preplace port vx_reset -pg 1 -lvl 0 -x 0 -y 110 -defaultsOSRD -preplace port dcr_wr_valid -pg 1 -lvl 0 -x 0 -y 130 -defaultsOSRD -preplace portBus dcr_wr_addr -pg 1 -lvl 0 -x 0 -y 150 -defaultsOSRD -preplace portBus dcr_wr_data -pg 1 -lvl 0 -x 0 -y 170 -defaultsOSRD -preplace inst Vortex_top_0 -pg 1 -lvl 1 -x 190 -y 130 -defaultsOSRD -preplace inst axi_bram_ctrl_0 -pg 1 -lvl 2 -x 520 -y 140 -defaultsOSRD -preplace inst axi_bram_ctrl_0_bram -pg 1 -lvl 3 -x 800 -y 140 -defaultsOSRD -preplace netloc Vortex_top_0_busy 1 1 3 360J 220 NJ 220 NJ -preplace netloc clk_wiz_clk_out1 1 0 2 20 30 370 -preplace netloc resetn_1 1 0 2 NJ 20 380J -preplace netloc vx_reset_1 1 0 1 NJ 110 -preplace netloc dcr_wr_valid_1 1 0 1 NJ 130 -preplace netloc dcr_wr_addr_1 1 0 1 NJ 150 -preplace netloc dcr_wr_data_1 1 0 1 NJ 170 -preplace netloc axi_bram_ctrl_0_BRAM_PORTB 1 2 1 N 150 -preplace netloc axi_bram_ctrl_0_BRAM_PORTA 1 2 1 N 130 -preplace netloc Vortex_top_0_m_axi_mem 1 1 1 N 120 -levelinfo -pg 1 0 190 520 800 950 -pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 -" +proc run_report {} { + # Generate reports + report_timing_summary -file timing.rpt + report_power -file power.rpt + report_drc -file drc.rpt } - # Restore current instance - current_bd_instance $oldCurInst +############################################################################### - validate_bd_design - save_bd_design - close_bd_design $design_name -} -# End of cr_bd_design_1() -cr_bd_design_1 "" -set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] -set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] -set_property IS_ENABLED "1" [get_files design_1.bd ] -set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] -#set_property IS_LOCKED "0" [get_files design_1.bd ] -set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] -set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] -set_property PFM_NAME "" [get_files design_1.bd ] -set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] -set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] -set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] -set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] -set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] -set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] - -# Call make_wrapper to create wrapper files -set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] -add_files -norecurse -fileset sources_1 $wrapper_path - -update_compile_order -fileset sources_1 - -# Synthesis -if {$num_jobs != 0} { - launch_runs synth_1 -jobs $num_jobs -} else { - launch_runs synth_1 -} -wait_on_run synth_1 -open_run synth_1 -write_checkpoint -force post_synth.dcp -report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages - -# Implementation -if {$num_jobs != 0} { - launch_runs impl_1 -jobs $num_jobs +# Start time +set start_time [clock seconds] + +# Check if the post-implementation checkpoint exists +if { [file exists post_impl.dcp] } { + puts "Resuming from post-implementation checkpoint: post_impl.dcp" + open_checkpoint post_impl.dcp + run_report +} elseif { [file exists post_synth.dcp] } { + puts "Resuming from post-synthesis checkpoint: post_synth.dcp" + open_checkpoint post_synth.dcp + run_implementation + run_report } else { - launch_runs impl_1 + # execute full pipeline + run_setup + run_synthesis + run_implementation + run_report } -wait_on_run impl_1 -open_run impl_1 -write_checkpoint -force post_impl.dcp - -# Generate reports -report_place_status -file place.rpt -report_route_status -file route.rpt -report_timing_summary -file timing.rpt -report_power -file power.rpt -report_drc -file drc.rpt # End time and calculation set elapsed_time [expr {[clock seconds] - $start_time}] diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index fe9a56dc8a..44b04c1a28 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -122,7 +122,8 @@ ifdef DEBUG VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS) else - CFLAGS += -DNDEBUG + VPP_FLAGS += --debug.chipscope vortex_afu_1 + CFLAGS += -DNDEBUG -DCHIPSCOPE $(DBG_SCOPE_FLAGS) endif else VPP_FLAGS += --optimize 3 @@ -167,7 +168,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/../scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index 4f357f195e..71b2f46998 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -514,6 +514,7 @@ class CacheSim::Impl { bank_req.type = bank_req_t::Core; bank_req.write = core_req.write; pipeline_req = bank_req; + DT(3, simobject_->name() << " core-req: " << core_req); } if (core_req.write) From 8135f72cc9280a86ee8b4542a2fde589acc6c0b9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 06:45:22 -0700 Subject: [PATCH 188/407] configure update --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index bbeda59c92..d2483a7969 100755 --- a/configure +++ b/configure @@ -166,7 +166,7 @@ if [ "$OSVERSION" == "unsupported" ]; then fi # project subdirectories to build -SUBDIRS=("." "!ci" "!perf" "hw*" "!hw/syn*" "kernel*" "runtime*" "sim*" "tests*") +SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*") # Get the directory of the script SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" From f2c1ad783126898093190b83401f85cff23db9d4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 09:56:54 -0700 Subject: [PATCH 189/407] minor update --- hw/syn/xilinx/{ => xrt}/gen_xo.tcl | 0 hw/syn/xilinx/{ => xrt}/package_kernel.tcl | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename hw/syn/xilinx/{ => xrt}/gen_xo.tcl (100%) rename hw/syn/xilinx/{ => xrt}/package_kernel.tcl (100%) diff --git a/hw/syn/xilinx/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl similarity index 100% rename from hw/syn/xilinx/gen_xo.tcl rename to hw/syn/xilinx/xrt/gen_xo.tcl diff --git a/hw/syn/xilinx/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl similarity index 100% rename from hw/syn/xilinx/package_kernel.tcl rename to hw/syn/xilinx/xrt/package_kernel.tcl From 8908f3e006d260c4a51f5692315b4a34d4a236c2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 10:05:17 -0700 Subject: [PATCH 190/407] minor update --- hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- runtime/xrt/Makefile | 5 +++++ runtime/xrt/vortex.cpp | 12 +++++------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index 7d3342a4cf..bad41f5cd5 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -31,7 +31,7 @@ if {[file exists "${xoname}"]} { set argv [list ${build_dir}/ip] set argc 1 -source ${script_path}/gen_ip.tcl +source ${script_path}/xilinx_ip_gen.tcl set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 diff --git a/runtime/xrt/Makefile b/runtime/xrt/Makefile index 66d3e481b6..d4fbc51a85 100644 --- a/runtime/xrt/Makefile +++ b/runtime/xrt/Makefile @@ -39,6 +39,11 @@ ifdef SCOPE SRCS += $(COMMON_DIR)/scope.cpp endif +# Enable ILA logic analyzer +ifdef CHIPSCOPE + CXXFLAGS += -DCHIPSCOPE +endif + all: $(DESTDIR)/$(PROJECT) driver: $(DESTDIR)/libxrtsim.so diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index b4e6090e17..a02a849905 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -125,13 +125,6 @@ static int get_platform_info(const std::string &device_name, return -1; } -/* -static void wait_for_enter(const std::string &msg) { - std::cout << msg << std::endl; - std::cin.ignore(std::numeric_limits::max(), '\n'); -} -*/ - /////////////////////////////////////////////////////////////////////////////// class vx_device { @@ -390,6 +383,11 @@ class vx_device { } #endif + #ifdef CHIPSCOPE + std::cout << "\nPress ENTER to continue after setting up ILA trigger..." << std::endl; + std::cin.ignore(std::numeric_limits::max(), '\n'); + #endif + return 0; } From 992f8d97d3fe821caa083da6f0b8d7fe07fce131 Mon Sep 17 00:00:00 2001 From: sij814 Date: Tue, 17 Sep 2024 19:47:13 -0700 Subject: [PATCH 191/407] sliced the bypass requests --- hw/rtl/Vortex_hbm.sv | 6 +- hw/rtl/cache/VX_cache_bypass.sv | 2 + hw/rtl/cache/VX_cache_bypass_l3.sv | 355 ++++++++++++++++ hw/rtl/cache/VX_cache_l3.sv | 628 +++++++++++++++++++++++++++ hw/rtl/cache/VX_cache_wrap_l3.sv | 21 +- sim/rtlsim/Makefile | 2 +- sim/rtlsim/processor.cpp | 246 +++++------ sim/rtlsim/processor_hbm.cpp | 656 +++++++++++++++++++++++++++++ 8 files changed, 1770 insertions(+), 146 deletions(-) create mode 100644 hw/rtl/cache/VX_cache_bypass_l3.sv create mode 100644 hw/rtl/cache/VX_cache_l3.sv create mode 100644 sim/rtlsim/processor_hbm.cpp diff --git a/hw/rtl/Vortex_hbm.sv b/hw/rtl/Vortex_hbm.sv index 253c325bb8..d2ffc344df 100644 --- a/hw/rtl/Vortex_hbm.sv +++ b/hw/rtl/Vortex_hbm.sv @@ -209,12 +209,12 @@ module Vortex_hbm import VX_gpu_pkg::*; ( for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin if (mem_req_fire[i]) begin if (mem_req_rw[i]) - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i])); + `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i], i)); else - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i])); + `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], i)); end if (mem_rsp_fire[i]) begin - `TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); + `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); end end end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 379d33e8a9..18dfd50ad4 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -250,7 +250,9 @@ module VX_cache_bypass #( end end + `IGNORE_UNUSED_BEGIN wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; + `IGNORE_UNUSED_END VX_bits_remove #( .N (MEM_TAG_OUT_WIDTH), diff --git a/hw/rtl/cache/VX_cache_bypass_l3.sv b/hw/rtl/cache/VX_cache_bypass_l3.sv new file mode 100644 index 0000000000..69393cfc67 --- /dev/null +++ b/hw/rtl/cache/VX_cache_bypass_l3.sv @@ -0,0 +1,355 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_bypass_l3 #( + parameter NUM_REQS = 1, + parameter NUM_OUTPUTS = 1, + parameter TAG_SEL_IDX = 0, + + parameter PASSTHRU = 0, + parameter NC_ENABLE = 0, + + parameter WORD_SIZE = 1, + parameter LINE_SIZE = 1, + + parameter CORE_ADDR_WIDTH = 1, + + parameter CORE_TAG_WIDTH = 1, + + parameter MEM_ADDR_WIDTH = 1, + parameter MEM_TAG_IN_WIDTH = 1, + parameter MEM_TAG_OUT_WIDTH = 1, + + parameter UUID_WIDTH = 0, + + parameter CORE_OUT_BUF = 0, + parameter MEM_OUT_BUF = 0, + + parameter CORE_DATA_WIDTH = WORD_SIZE * 8 + ) ( + input wire clk, + input wire reset, + + // Core request in + VX_mem_bus_if.slave core_bus_in_if [NUM_REQS], + + // Core request out + VX_mem_bus_if.master core_bus_out_if [NUM_REQS], + + // Memory request in + VX_mem_bus_if.slave mem_bus_in_if, + + // Memory request out + VX_mem_bus_if.master mem_bus_out_if +); + localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); + + localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); + localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; + + localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; + localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE); + + localparam CORE_TAG_ID_BITS = CORE_TAG_WIDTH - UUID_WIDTH; + localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS; + localparam MEM_TAG_BYPASS_BITS = UUID_WIDTH + MEM_TAG_ID_BITS; + + `STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter")) + + // handle core requests /////////////////////////////////////////////////// + + wire core_req_nc_valid; + wire [NUM_REQS-1:0] core_req_nc_valids; + wire [NUM_REQS-1:0] core_req_nc_idxs; + wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; + wire [NUM_REQS-1:0] core_req_nc_sel; + wire [NUM_REQS-1:0] core_req_nc_ready; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + if (PASSTHRU != 0) begin + assign core_req_nc_idxs[i] = 1'b1; + end else if (NC_ENABLE) begin + assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; + end else begin + assign core_req_nc_idxs[i] = 1'b0; + end + assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; + end + + /* + + VX_generic_arbiter #( + .NUM_REQS (NUM_REQS), + .TYPE (PASSTHRU ? "R" : "P") + ) core_req_nc_arb ( + .clk (clk), + .reset (reset), + .requests (core_req_nc_valids), + .grant_index (core_req_nc_idx), + .grant_onehot (core_req_nc_sel), + .grant_valid (core_req_nc_valid), + .grant_ready (core_req_nc_ready) + ); + */ + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; + assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; + assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) + : core_bus_out_if[i].req_ready; + end + + // handle memory requests ///////////////////////////////////////////////// + + wire [NUM_OUTPUTS-1:0] mem_req_out_valid; + wire [NUM_OUTPUTS-1:0] mem_req_out_rw; + wire [NUM_OUTPUTS-1:0][LINE_SIZE-1:0] mem_req_out_byteen; + wire [NUM_OUTPUTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr; + wire [NUM_OUTPUTS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype; + wire [NUM_OUTPUTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_out_data; + wire [NUM_OUTPUTS-1:0][MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; + wire [NUM_OUTPUTS-1:0] mem_req_out_ready; + + wire [NUM_REQS-1:0] core_req_nc_sel_rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_nc_sel_byteen; + wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; + wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype; + wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_nc_sel_data; + wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; + + wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_nc_mux_in[i] = { + core_bus_in_if[i].req_data.rw, + core_bus_in_if[i].req_data.byteen, + core_bus_in_if[i].req_data.addr, + core_bus_in_if[i].req_data.atype, + core_bus_in_if[i].req_data.data, + core_bus_in_if[i].req_data.tag + }; + end + + assign { + core_req_nc_sel_rw, + core_req_nc_sel_byteen, + core_req_nc_sel_addr, + core_req_nc_sel_atype, + core_req_nc_sel_data, + core_req_nc_sel_tag + } = core_req_nc_mux_in; + + assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready; + + assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid; + assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw; + assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH]; + assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype; + + wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; + + wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; + + if (WORDS_PER_LINE > 1) begin + reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; + reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; + + wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; + + always @(*) begin + mem_req_byteen_in_r = '0; + mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen; + + mem_req_data_in_r = 'x; + mem_req_data_in_r[req_wsel] = core_req_nc_sel_data; + end + + assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r; + assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; + if (NUM_REQS > 1) begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); + end else begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); + end + end else begin + assign mem_req_out_byteen = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.byteen : core_req_nc_sel_byteen; + assign mem_req_out_data = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.data : core_req_nc_sel_data; + if (NUM_REQS > 1) begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id}); + end else begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id}); + end + end + + wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; + + if (UUID_WIDTH != 0) begin + assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; + end else begin + assign mem_req_tag_bypass = mem_req_tag_id_bypass; + end + + if (PASSTHRU != 0) begin + assign mem_req_out_tag = mem_req_tag_bypass; + `UNUSED_VAR (mem_bus_in_if[0].req_data.tag) + end else begin + if (NC_ENABLE) begin + VX_bits_insert #( + .N (MEM_TAG_OUT_WIDTH-1), + .S (1), + .POS (TAG_SEL_IDX) + ) mem_req_tag_in_nc_insert ( + .data_in (mem_bus_in_if[0].req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if[0].req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), + .ins_in (~mem_bus_in_if[0].req_valid), + .data_out (mem_req_out_tag) + ); + end else begin + assign mem_req_out_tag = mem_bus_in_if[0].req_data.tag; + end + end + + assign mem_bus_in_if[0].req_ready = mem_req_out_ready; + + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), + .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_out_valid), + .ready_in (mem_req_out_ready), + .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), + .data_out ({mem_bus_out_if[0].req_data.rw, mem_bus_out_if[0].req_data.byteen, mem_bus_out_if[0].req_data.addr, mem_bus_out_if[0].req_data.atype, mem_bus_out_if[0].req_data.data, mem_bus_out_if[0].req_data.tag}), + .valid_out (mem_bus_out_if[0].req_valid), + .ready_out (mem_bus_out_if[0].req_ready) + ); + + // handle core responses ////////////////////////////////////////////////// + + wire [NUM_REQS-1:0] core_rsp_in_valid; + wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_in_data; + wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_in_tag; + wire [NUM_REQS-1:0] core_rsp_in_ready; + + wire is_mem_rsp_nc; + if (PASSTHRU != 0) begin + assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid; + end else begin + if (NC_ENABLE) begin + assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid && mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; + end else begin + assign is_mem_rsp_nc = 1'b0; + end + end + + wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; + + VX_bits_remove #( + .N (MEM_TAG_OUT_WIDTH), + .S (NC_ENABLE), + .POS (TAG_SEL_IDX) + ) mem_rsp_tag_in_nc_remove ( + .data_in (mem_bus_out_if[0].rsp_data.tag), + .data_out (mem_rsp_tag_id_nc) + ); + + wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; + if (NUM_REQS > 1) begin + assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; + end else begin + assign rsp_idx = 1'b0; + end + + reg [NUM_REQS-1:0] rsp_nc_valid_r; + always @(*) begin + rsp_nc_valid_r = '0; + rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; + assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; + end + + if (WORDS_PER_LINE > 1) begin + wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? + core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; + end + end else begin + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data; + end + end + + wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2; + if (UUID_WIDTH != 0) begin + assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]}; + end else begin + assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + if (PASSTHRU) begin + assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2; + end else if (NC_ENABLE) begin + assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; + end else begin + assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; + end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + VX_elastic_buffer #( + .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), + .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) + ) core_rsp_buf ( + .clk (clk), + .reset (reset), + .valid_in (core_rsp_in_valid[i]), + .ready_in (core_rsp_in_ready[i]), + .data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}), + .data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}), + .valid_out (core_bus_in_if[i].rsp_valid), + .ready_out (core_bus_in_if[i].rsp_ready) + ); + end + + // handle memory responses //////////////////////////////////////////////// + + if (PASSTHRU != 0) begin + assign mem_bus_in_if[0].rsp_valid = 1'b0; + assign mem_bus_in_if[0].rsp_data.data = '0; + assign mem_bus_in_if[0].rsp_data.tag = '0; + end else if (NC_ENABLE) begin + assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid && ~mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; + assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; + assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0]; + end else begin + assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid; + assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; + assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc; + end + + wire [NUM_REQS-1:0] core_rsp_out_valid; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; + end + + assign mem_bus_out_if[0].rsp_ready = is_mem_rsp_nc ? (~core_rsp_out_valid[rsp_idx] && core_rsp_in_ready[rsp_idx]) : mem_bus_in_if[0].rsp_ready; + +endmodule diff --git a/hw/rtl/cache/VX_cache_l3.sv b/hw/rtl/cache/VX_cache_l3.sv new file mode 100644 index 0000000000..326a4fc65f --- /dev/null +++ b/hw/rtl/cache/VX_cache_l3.sv @@ -0,0 +1,628 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_l3 import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + + // Number of Word requests per cycle + parameter NUM_REQS = 4, + + // Size of cache in bytes + parameter CACHE_SIZE = 4096, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 64, + // Number of banks + parameter NUM_BANKS = 1, + // Number of memory ports + parameter NUM_MEM_PORTS = 1, + // Number of associative ways + parameter NUM_WAYS = 1, + // Size of a word in bytes + parameter WORD_SIZE = `XLEN/8, + + // Core Response Queue Size + parameter CRSQ_SIZE = 2, + // Miss Reserv Queue Knob + parameter MSHR_SIZE = 8, + // Memory Response Queue Size + parameter MRSQ_SIZE = 0, + // Memory Request Queue Size + parameter MREQ_SIZE = 4, + + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Enable cache writeback + parameter WRITEBACK = 0, + + // Enable dirty bytes on writeback + parameter DIRTY_BYTES = 0, + + // Request debug identifier + parameter UUID_WIDTH = 0, + + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 1, + + // Core response output register + parameter CORE_OUT_BUF = 0, + + // Memory request output register + parameter MEM_OUT_BUF = 0 + ) ( + // PERF +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + input wire clk, + input wire reset, + + VX_mem_bus_if.slave core_bus_if [NUM_REQS], + VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] +); + + `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2")) + `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable")) + `STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback")) + + // In writeback mode, memory fill response may issue a new memory request to handle evicted blocks. + // We need to ensure that the memory request queue never fills up to avoid deadlock. + `STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE")) + + localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); + localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); + localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); + localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; + localparam WORD_WIDTH = WORD_SIZE * 8; + localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE); + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); + localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; + localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; + + localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); + localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); + + localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; + +`ifdef PERF_ENABLE + wire [NUM_BANKS-1:0] perf_read_miss_per_bank; + wire [NUM_BANKS-1:0] perf_write_miss_per_bank; + wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank; +`endif + + VX_mem_bus_if #( + .DATA_SIZE (WORD_SIZE), + .TAG_WIDTH (TAG_WIDTH) + ) core_bus2_if[NUM_REQS](); + + wire [NUM_BANKS-1:0] per_bank_flush_begin; + wire [NUM_BANKS-1:0] per_bank_flush_end; + + wire [NUM_BANKS-1:0] per_bank_core_req_fire; + + VX_cache_flush #( + .NUM_REQS (NUM_REQS), + .NUM_BANKS (NUM_BANKS), + .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency + ) flush_unit ( + .clk (clk), + .reset (reset), + .core_bus_in_if (core_bus_if), + .core_bus_out_if (core_bus2_if), + .bank_req_fire (per_bank_core_req_fire), + .flush_begin (per_bank_flush_begin), + .flush_end (per_bank_flush_end) + ); + + /////////////////////////////////////////////////////////////////////////// + + // Core response buffering + wire [NUM_REQS-1:0] core_rsp_valid_s; + wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s; + wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; + wire [NUM_REQS-1:0] core_rsp_ready_s; + + `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT); + + for (genvar i = 0; i < NUM_REQS; ++i) begin + + VX_elastic_buffer #( + .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), + .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) + ) core_rsp_buf ( + .clk (clk), + .reset (core_rsp_reset[i]), + .valid_in (core_rsp_valid_s[i]), + .ready_in (core_rsp_ready_s[i]), + .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), + .data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}), + .valid_out (core_bus2_if[i].rsp_valid), + .ready_out (core_bus2_if[i].rsp_ready) + ); + end + + /////////////////////////////////////////////////////////////////////////// + + // Memory request buffering + wire [NUM_MEM_PORTS-1:0] mem_req_valid_s; + wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s; + wire [NUM_MEM_PORTS-1:0] mem_req_rw_s; + wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; + wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_s; + wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_s; + wire [NUM_MEM_PORTS-1:0] mem_req_flush_s; + wire [NUM_MEM_PORTS-1:0] mem_req_ready_s; + + wire [NUM_MEM_PORTS-1:0] mem_bus_if_flush; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_valid_s[i]), + .ready_in (mem_req_ready_s[i]), + .data_in ({mem_req_rw_s[i], mem_req_byteen_s[i], mem_req_addr_s[i], mem_req_data_s[i], mem_req_tag_s[i], mem_req_flush_s[i]}), + .data_out ({mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag, mem_bus_if_flush[i]}), + .valid_out (mem_bus_if[i].req_valid), + .ready_out (mem_bus_if[i].req_ready) + ); + + assign mem_bus_if[i].req_data.atype = mem_bus_if_flush[i] ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; + + end + + /////////////////////////////////////////////////////////////////////////// + + // Memory response buffering + wire [NUM_MEM_PORTS-1:0] mem_rsp_valid_s; + wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_rsp_data_s; + wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; + wire [NUM_MEM_PORTS-1:0] mem_rsp_ready_s; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_elastic_buffer #( + .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), + .SIZE (MRSQ_SIZE), + .OUT_REG (MRSQ_SIZE > 2) + ) mem_rsp_queue ( + .clk (clk), + .reset (reset), + .valid_in (mem_bus_if[i].rsp_valid), + .ready_in (mem_bus_if[i].rsp_ready), + .data_in ({mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data}), + .data_out ({mem_rsp_tag_s[i], mem_rsp_data_s[i]}), + .valid_out (mem_rsp_valid_s[i]), + .ready_out (mem_rsp_ready_s[i]) + ); + end + + /////////////////////////////////////////////////////////////////////////// + + wire [NUM_BANKS-1:0] per_bank_core_req_valid; + wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; + wire [NUM_BANKS-1:0] per_bank_core_req_rw; + wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_core_req_wsel; + wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; + wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; + wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; + wire [NUM_BANKS-1:0] per_bank_core_req_flush; + wire [NUM_BANKS-1:0] per_bank_core_req_ready; + + wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; + wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data; + wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx; + wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; + + wire [NUM_BANKS-1:0] per_bank_mem_req_valid; + wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; + wire [NUM_BANKS-1:0] per_bank_mem_req_rw; + wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; + wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; + wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; + wire [NUM_BANKS-1:0] per_bank_mem_req_flush; + wire [NUM_BANKS-1:0] per_bank_mem_req_ready; + + wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; + + assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; + + if (NUM_BANKS == 1) begin + assign mem_rsp_ready_s = per_bank_mem_rsp_ready; + end else begin + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign mem_rsp_ready_s[i] = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[i])]; + end + end + + // Bank requests dispatch + + wire [NUM_REQS-1:0] core_req_valid; + wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr; + wire [NUM_REQS-1:0] core_req_rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; + wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; + wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; + wire [NUM_REQS-1:0] core_req_flush; + wire [NUM_REQS-1:0] core_req_ready; + + wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; + wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid; + wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel; + + wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; + wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_valid[i] = core_bus2_if[i].req_valid; + assign core_req_rw[i] = core_bus2_if[i].req_data.rw; + assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; + assign core_req_addr[i] = core_bus2_if[i].req_data.addr; + assign core_req_data[i] = core_bus2_if[i].req_data.data; + assign core_req_tag[i] = core_bus2_if[i].req_data.tag; + assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign core_bus2_if[i].req_ready = core_req_ready[i]; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + if (WORDS_PER_LINE > 1) begin + assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; + end else begin + assign core_req_wsel[i] = '0; + end + assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH]; + end + + if (NUM_BANKS > 1) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; + end + end else begin + assign core_req_bid = '0; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_data_in[i] = { + core_req_line_addr[i], + core_req_rw[i], + core_req_wsel[i], + core_req_byteen[i], + core_req_data[i], + core_req_tag[i], + core_req_flush[i] + }; + end + +`ifdef PERF_ENABLE + wire [`PERF_CTR_BITS-1:0] perf_collisions; +`endif + + `RESET_RELAY (req_xbar_reset, reset); + + VX_stream_xbar #( + .NUM_INPUTS (NUM_REQS), + .NUM_OUTPUTS (NUM_BANKS), + .DATAW (CORE_REQ_DATAW), + .PERF_CTR_BITS (`PERF_CTR_BITS), + .ARBITER ("F"), + .OUT_BUF (REQ_XBAR_BUF) + ) req_xbar ( + .clk (clk), + .reset (req_xbar_reset), + `ifdef PERF_ENABLE + .collisions(perf_collisions), + `else + `UNUSED_PIN(collisions), + `endif + .valid_in (core_req_valid), + .data_in (core_req_data_in), + .sel_in (core_req_bid), + .ready_in (core_req_ready), + .valid_out (per_bank_core_req_valid), + .data_out (core_req_data_out), + .sel_out (per_bank_core_req_idx), + .ready_out (per_bank_core_req_ready) + ); + + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign { + per_bank_core_req_addr[i], + per_bank_core_req_rw[i], + per_bank_core_req_wsel[i], + per_bank_core_req_byteen[i], + per_bank_core_req_data[i], + per_bank_core_req_tag[i], + per_bank_core_req_flush[i] + } = core_req_data_out[i]; + end + + // Banks access + for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks + wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; + wire curr_bank_mem_rsp_valid; + + if (NUM_BANKS == 1) begin + assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; + end else begin + assign curr_bank_mem_rsp_valid = mem_rsp_valid_s[bank_id] && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[bank_id]) == bank_id); + end + + `RESET_RELAY (bank_reset, reset); + + VX_cache_bank #( + .BANK_ID (bank_id), + .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQS (NUM_REQS), + .CRSQ_SIZE (CRSQ_SIZE), + .MSHR_SIZE (MSHR_SIZE), + .MREQ_SIZE (MREQ_SIZE), + .WRITE_ENABLE (WRITE_ENABLE), + .DIRTY_BYTES (DIRTY_BYTES), + .WRITEBACK (WRITEBACK), + .UUID_WIDTH (UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), + .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), + .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) + ) bank ( + .clk (clk), + .reset (bank_reset), + + `ifdef PERF_ENABLE + .perf_read_misses (perf_read_miss_per_bank[bank_id]), + .perf_write_misses (perf_write_miss_per_bank[bank_id]), + .perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]), + `endif + + // Core request + .core_req_valid (per_bank_core_req_valid[bank_id]), + .core_req_addr (per_bank_core_req_addr[bank_id]), + .core_req_rw (per_bank_core_req_rw[bank_id]), + .core_req_wsel (per_bank_core_req_wsel[bank_id]), + .core_req_byteen (per_bank_core_req_byteen[bank_id]), + .core_req_data (per_bank_core_req_data[bank_id]), + .core_req_tag (per_bank_core_req_tag[bank_id]), + .core_req_idx (per_bank_core_req_idx[bank_id]), + .core_req_flush (per_bank_core_req_flush[bank_id]), + .core_req_ready (per_bank_core_req_ready[bank_id]), + + // Core response + .core_rsp_valid (per_bank_core_rsp_valid[bank_id]), + .core_rsp_data (per_bank_core_rsp_data[bank_id]), + .core_rsp_tag (per_bank_core_rsp_tag[bank_id]), + .core_rsp_idx (per_bank_core_rsp_idx[bank_id]), + .core_rsp_ready (per_bank_core_rsp_ready[bank_id]), + + // Memory request + .mem_req_valid (per_bank_mem_req_valid[bank_id]), + .mem_req_addr (curr_bank_mem_req_addr), + .mem_req_rw (per_bank_mem_req_rw[bank_id]), + .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), + .mem_req_data (per_bank_mem_req_data[bank_id]), + .mem_req_id (per_bank_mem_req_id[bank_id]), + .mem_req_flush (per_bank_mem_req_flush[bank_id]), + .mem_req_ready (per_bank_mem_req_ready[bank_id]), + + // Memory response + .mem_rsp_valid (curr_bank_mem_rsp_valid), + .mem_rsp_data (mem_rsp_data_s[bank_id]), + .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s[bank_id])), + .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), + + .flush_begin (per_bank_flush_begin[bank_id]), + .flush_end (per_bank_flush_end[bank_id]) + ); + + if (NUM_BANKS == 1) begin + assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; + end else begin + assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); + end + end + + // Bank responses gather + + wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in; + wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out; + + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; + end + + `RESET_RELAY (rsp_xbar_reset, reset); + + VX_stream_xbar #( + .NUM_INPUTS (NUM_BANKS), + .NUM_OUTPUTS (NUM_REQS), + .DATAW (CORE_RSP_DATAW), + .ARBITER ("F") + ) rsp_xbar ( + .clk (clk), + .reset (rsp_xbar_reset), + `UNUSED_PIN (collisions), + .valid_in (per_bank_core_rsp_valid), + .data_in (core_rsp_data_in), + .sel_in (per_bank_core_rsp_idx), + .ready_in (per_bank_core_rsp_ready), + .valid_out (core_rsp_valid_s), + .data_out (core_rsp_data_out), + .ready_out (core_rsp_ready_s), + `UNUSED_PIN (sel_out) + ); + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; + end + + /////////////////////////////////////////////////////////////////////////// + + wire [NUM_MEM_PORTS-1:0] mem_req_valid_p; + wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; + wire [NUM_MEM_PORTS-1:0] mem_req_rw_p; + wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_p; + wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_p; + wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_p; + wire [NUM_MEM_PORTS-1:0][MSHR_ADDR_WIDTH-1:0] mem_req_id_p; + wire [NUM_MEM_PORTS-1:0] mem_req_flush_p; + wire [NUM_MEM_PORTS-1:0] mem_req_ready_p; + + // Memory request arbitration + + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; + + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign data_in[i] = { + per_bank_mem_req_addr[i], + per_bank_mem_req_rw[i], + per_bank_mem_req_byteen[i], + per_bank_mem_req_data[i], + per_bank_mem_req_id[i], + per_bank_mem_req_flush[i] + }; + end + + VX_stream_arb #( + .NUM_INPUTS (NUM_BANKS), + .NUM_OUTPUTS (NUM_MEM_PORTS), + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), + .ARBITER ("F") + ) mem_req_arb ( + .clk (clk), + .reset (reset), + .valid_in (per_bank_mem_req_valid), + .ready_in (per_bank_mem_req_ready), + .data_in (data_in), + .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), + .valid_out (mem_req_valid_p), + .ready_out (mem_req_ready_p), + `UNUSED_PIN (sel_out) + ); + + if (NUM_BANKS > 1) begin + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p[i]); + assign mem_req_tag_p[i] = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p[i]}); + end + end else begin + assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); + end + + // Memory request multi-port handling + + assign mem_req_valid_s = mem_req_valid_p; + assign mem_req_addr_s = mem_req_addr_p; + assign mem_req_tag_s = mem_req_tag_p; + assign mem_req_flush_s = mem_req_flush_p; + assign mem_req_ready_p = mem_req_ready_s; + + if (WRITE_ENABLE != 0) begin + assign mem_req_rw_s = mem_req_rw_p; + assign mem_req_byteen_s = mem_req_byteen_p; + assign mem_req_data_s = mem_req_data_p; + end else begin + `UNUSED_VAR (mem_req_byteen_p) + `UNUSED_VAR (mem_req_data_p) + `UNUSED_VAR (mem_req_rw_p) + + assign mem_req_rw_s = 0; + assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; + assign mem_req_data_s = '0; + end + +`ifdef PERF_ENABLE + // per cycle: core_reads, core_writes + wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; + wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; + + wire [NUM_REQS-1:0] perf_core_reads_per_req; + wire [NUM_REQS-1:0] perf_core_writes_per_req; + + // per cycle: read misses, write misses, msrq stalls, pipeline stalls + wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle; + wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle; + wire [`CLOG2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle; + wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; + + `BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw); + `BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw); + + `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req); + `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req); + `POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank); + `POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank); + `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); + + wire [NUM_REQS-1:0] perf_crsp_stall_per_req; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; + end + + `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req); + + wire perf_mem_stall_per_cycle = mem_bus_if[0].req_valid && ~mem_bus_if[0].req_ready; + + reg [`PERF_CTR_BITS-1:0] perf_core_reads; + reg [`PERF_CTR_BITS-1:0] perf_core_writes; + reg [`PERF_CTR_BITS-1:0] perf_read_misses; + reg [`PERF_CTR_BITS-1:0] perf_write_misses; + reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls; + reg [`PERF_CTR_BITS-1:0] perf_mem_stalls; + reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; + + always @(posedge clk) begin + if (reset) begin + perf_core_reads <= '0; + perf_core_writes <= '0; + perf_read_misses <= '0; + perf_write_misses <= '0; + perf_mshr_stalls <= '0; + perf_mem_stalls <= '0; + perf_crsp_stalls <= '0; + end else begin + perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle); + perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle); + perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle); + perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle); + perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle); + perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle); + perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle); + end + end + + assign cache_perf.reads = perf_core_reads; + assign cache_perf.writes = perf_core_writes; + assign cache_perf.read_misses = perf_read_misses; + assign cache_perf.write_misses = perf_write_misses; + assign cache_perf.bank_stalls = perf_collisions; + assign cache_perf.mshr_stalls = perf_mshr_stalls; + assign cache_perf.mem_stalls = perf_mem_stalls; + assign cache_perf.crsp_stalls = perf_crsp_stalls; +`endif + +endmodule diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv index 9a8f1688f1..403edf5545 100644 --- a/hw/rtl/cache/VX_cache_wrap_l3.sv +++ b/hw/rtl/cache/VX_cache_wrap_l3.sv @@ -95,6 +95,8 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); + localparam NUM_REQS_P = NUM_REQS / NUM_MEM_PORTS; + VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), .TAG_WIDTH (TAG_WIDTH) @@ -108,9 +110,13 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( if (NC_OR_BYPASS) begin `RESET_RELAY (nc_bypass_reset, reset); - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + + localparam SLICE_BEGIN = i * NUM_REQS_P; + localparam SLICE_END = SLICE_BEGIN + NUM_REQS_P; + VX_cache_bypass #( - .NUM_REQS (NUM_REQS), + .NUM_REQS (NUM_REQS_P), .TAG_SEL_IDX (TAG_SEL_IDX), .PASSTHRU (PASSTHRU), @@ -134,13 +140,13 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( .clk (clk), .reset (nc_bypass_reset), - .core_bus_in_if (core_bus_if), - .core_bus_out_if(core_bus_cache_if), + .core_bus_in_if (core_bus_if[SLICE_END-1:SLICE_BEGIN]), + .core_bus_out_if(core_bus_cache_if[SLICE_END-1:SLICE_BEGIN]), .mem_bus_in_if (mem_bus_cache_if[i]), .mem_bus_out_if (mem_bus_if[i]) ); - end + end end else begin @@ -183,11 +189,12 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( `RESET_RELAY (cache_reset, reset); - VX_cache #( + VX_cache_l3 #( .INSTANCE_ID (INSTANCE_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), + .NUM_MEM_PORTS (NUM_MEM_PORTS), .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), @@ -209,7 +216,7 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( .cache_perf (cache_perf), `endif .core_bus_if (core_bus_cache_if), - .mem_bus_if (mem_bus_cache_if[0]) + .mem_bus_if (mem_bus_cache_if) ); end diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 1970788131..9ddccc19d4 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -37,7 +37,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += $(SRC_DIR)/processor.cpp +SRCS += $(SRC_DIR)/processor_hbm.cpp ifdef AXI_BUS TOP = Vortex_axi diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 7c812f7e88..e5e00f49eb 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -17,8 +17,8 @@ #include "VVortex_axi.h" typedef VVortex_axi Device; #else -#include "VVortex_hbm.h" -typedef VVortex_hbm Device; +#include "VVortex.h" +typedef VVortex Device; #endif #ifdef VCD_OUTPUT @@ -123,15 +123,6 @@ class Processor::Impl { tfp_->open("trace.vcd"); #endif - pending_mem_reqs_.resize(NUM_MEM_PORTS); - dram_queue_.resize(NUM_MEM_PORTS); - - mem_rd_rsp_active_.resize(NUM_MEM_PORTS); - mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); - - mem_wr_rsp_active_.resize(NUM_MEM_PORTS); - mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); - ram_ = nullptr; #ifndef NDEBUG @@ -219,19 +210,16 @@ class Processor::Impl { print_bufs_.clear(); - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - - pending_mem_reqs_.at(i).clear(); - - { - std::queue empty; - std::swap(dram_queue_.at(i), empty); - } + pending_mem_reqs_.clear(); - mem_rd_rsp_active_.at(i) = false; - mem_wr_rsp_active_.at(i) = false; + { + std::queue empty; + std::swap(dram_queue_, empty); } + mem_rd_rsp_active_ = false; + mem_wr_rsp_active_ = false; + this->mem_bus_reset(); this->dcr_bus_reset(); @@ -262,19 +250,17 @@ class Processor::Impl { dram_sim_.tick(); - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (!dram_queue_.at(i).empty()) { - auto mem_req = dram_queue_.at(i).front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; - } - }, mem_req)) { - dram_queue_.at(i).pop(); + if (!dram_queue_.empty()) { + auto mem_req = dram_queue_.front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; } + }, mem_req)) { + dram_queue_.pop(); } } @@ -451,126 +437,116 @@ class Processor::Impl { #else void mem_bus_reset() { - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - device_->mem_req_ready[i] = 0; - device_->mem_rsp_valid[i] = 0; - } + device_->mem_req_ready = 0; + device_->mem_rsp_valid = 0; } void mem_bus_eval(bool clk) { - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (!clk) { - mem_rd_rsp_ready_.at(i) = device_->mem_rsp_ready[i]; - return; - } + if (!clk) { + mem_rd_rsp_ready_ = device_->mem_rsp_ready; + return; } - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (ram_ == nullptr) { - device_->mem_req_ready[i] = 0; - return; - } + if (ram_ == nullptr) { + device_->mem_req_ready = 0; + return; } // process memory read responses - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (mem_rd_rsp_active_.at(i) - && device_->mem_rsp_valid[i] && mem_rd_rsp_ready_.at(i)) { - mem_rd_rsp_active_.at(i) = false; - } - if (!mem_rd_rsp_active_.at(i)) { - if (!pending_mem_reqs_.at(i).empty() - && (*pending_mem_reqs_.at(i).begin())->ready) { - device_->mem_rsp_valid[i] = 1; - auto mem_rsp_it = pending_mem_reqs_.at(i).begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - memcpy(VDataCast::get(device_->mem_rsp_data[i]), mem_rsp->block.data(), MEM_BLOCK_SIZE); - device_->mem_rsp_tag[i] = mem_rsp->tag; - pending_mem_reqs_.at(i).erase(mem_rsp_it); - mem_rd_rsp_active_.at(i) = true; - delete mem_rsp; - } else { - device_->mem_rsp_valid[i] = 0; + if (mem_rd_rsp_active_ + && device_->mem_rsp_valid && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; + } + if (!mem_rd_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready) { + device_->mem_rsp_valid = 1; + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); } + printf("\n"); + */ + memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_tag = mem_rsp->tag; + pending_mem_reqs_.erase(mem_rsp_it); + mem_rd_rsp_active_ = true; + delete mem_rsp; + } else { + device_->mem_rsp_valid = 0; } } // process memory requests - for (int j = 0; j < NUM_MEM_PORTS; ++j) { - if (device_->mem_req_valid[j] && running_) { - uint64_t byte_addr = (device_->mem_req_addr[j] * MEM_BLOCK_SIZE); - if (device_->mem_req_rw[j]) { - auto byteen = device_->mem_req_byteen[j]; - auto data = VDataCast::get(device_->mem_req_data[j]); - - if (byte_addr >= uint64_t(IO_COUT_ADDR) - && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < IO_COUT_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%d=%02x,", i, data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; + if (device_->mem_req_valid && running_) { + uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); + if (device_->mem_req_rw) { + auto byteen = device_->mem_req_byteen; + auto data = VDataCast::get(device_->mem_req_data); + + if (byte_addr >= uint64_t(IO_COUT_ADDR) + && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < IO_COUT_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); } } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[j]; - mem_req->addr = byte_addr; - mem_req->write = true; - mem_req->ready = true; - - // send dram request - dram_queue_.at(j).push(mem_req); } } else { - // process reads + // process writes + /* + printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); + } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%d=%02x,", i, data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[j]; + mem_req->tag = device_->mem_req_tag; mem_req->addr = byte_addr; - mem_req->write = false; - mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); - pending_mem_reqs_.at(j).emplace_back(mem_req); - - //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); + mem_req->write = true; + mem_req->ready = true; // send dram request - dram_queue_.at(j).push(mem_req); + dram_queue_.push(mem_req); } - } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag; + mem_req->addr = byte_addr; + mem_req->write = false; + mem_req->ready = false; + ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + pending_mem_reqs_.emplace_back(mem_req); - device_->mem_req_ready[j] = running_; + //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); + + // send dram request + dram_queue_.push(mem_req); + } } + + device_->mem_req_ready = running_; } #endif @@ -607,9 +583,9 @@ class Processor::Impl { std::unordered_map print_bufs_; - std::vector> pending_mem_reqs_; + std::list pending_mem_reqs_; - std::vector> dram_queue_; + std::queue dram_queue_; DramSim dram_sim_; @@ -621,11 +597,11 @@ class Processor::Impl { RAM* ram_; - std::vector mem_rd_rsp_active_; - std::vector mem_rd_rsp_ready_; + bool mem_rd_rsp_active_; + bool mem_rd_rsp_ready_; - std::vector mem_wr_rsp_active_; - std::vector mem_wr_rsp_ready_; + bool mem_wr_rsp_active_; + bool mem_wr_rsp_ready_; bool running_; }; diff --git a/sim/rtlsim/processor_hbm.cpp b/sim/rtlsim/processor_hbm.cpp new file mode 100644 index 0000000000..5f7bee7eee --- /dev/null +++ b/sim/rtlsim/processor_hbm.cpp @@ -0,0 +1,656 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "processor.h" + +#ifdef AXI_BUS +#include "VVortex_axi.h" +typedef VVortex_axi Device; +#else +#include "VVortex_hbm.h" +typedef VVortex_hbm Device; +#endif + +#ifdef VCD_OUTPUT +#include +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifndef MEMORY_BANKS + #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #else + #define MEMORY_BANKS 2 + #endif +#endif + +#ifndef MEM_CLOCK_RATIO +#define MEM_CLOCK_RATIO 1 +#endif + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +#ifndef VERILATOR_RESET_VALUE +#define VERILATOR_RESET_VALUE 2 +#endif + +#if (XLEN == 32) +typedef uint32_t Word; +#elif (XLEN == 64) +typedef uint64_t Word; +#else +#error unsupported XLEN +#endif + +#define VL_WDATA_GETW(lwp, i, n, w) \ + VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w) + +using namespace vortex; + +static uint64_t timestamp = 0; + +double sc_time_stamp() { + return timestamp; +} + +/////////////////////////////////////////////////////////////////////////////// + +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +/////////////////////////////////////////////////////////////////////////////// + +class Processor::Impl { +public: + Impl() : dram_sim_(MEM_CLOCK_RATIO) { + // force random values for unitialized signals + Verilated::randReset(VERILATOR_RESET_VALUE); + Verilated::randSeed(50); + + // turn off assertion before reset + Verilated::assertOn(false); + + // create RTL module instance + device_ = new Device(); + + #ifdef VCD_OUTPUT + Verilated::traceEverOn(true); + tfp_ = new VerilatedVcdC(); + device_->trace(tfp_, 99); + tfp_->open("trace.vcd"); + #endif + + pending_mem_reqs_.resize(NUM_MEM_PORTS); + dram_queue_.resize(NUM_MEM_PORTS); + + mem_rd_rsp_active_.resize(NUM_MEM_PORTS); + mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); + + mem_wr_rsp_active_.resize(NUM_MEM_PORTS); + mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); + + ram_ = nullptr; + + #ifndef NDEBUG + // dump device configuration + std::cout << "CONFIGS:" + << " num_threads=" << NUM_THREADS + << ", num_warps=" << NUM_WARPS + << ", num_cores=" << NUM_CORES + << ", num_clusters=" << NUM_CLUSTERS + << ", socket_size=" << SOCKET_SIZE + << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec + << ", num_barriers=" << NUM_BARRIERS + << std::endl; + #endif + // reset the device + this->reset(); + + // Turn on assertion after reset + Verilated::assertOn(true); + } + + ~Impl() { + this->cout_flush(); + + #ifdef VCD_OUTPUT + tfp_->close(); + delete tfp_; + #endif + + delete device_; + } + + void cout_flush() { + for (auto& buf : print_bufs_) { + auto str = buf.second.str(); + if (!str.empty()) { + std::cout << "#" << buf.first << ": " << str << std::endl; + } + } + } + + void attach_ram(RAM* ram) { + ram_ = ram; + } + + void run() { + + #ifndef NDEBUG + std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; + #endif + + // start execution + running_ = true; + device_->reset = 0; + + /* + device_->mem_req_valid[1] = 0; + device_->mem_req_ready[1] = 0; + device_->mem_rsp_valid[1] = 0; + device_->mem_rsp_ready[1] = 0; + */ + + // wait on device to go busy + while (!device_->busy) { + this->tick(); + } + + // wait on device to go idle + while (device_->busy) { + this->tick(); + } + + // reset device + this->reset(); + + this->cout_flush(); + } + + void dcr_write(uint32_t addr, uint32_t value) { + device_->dcr_wr_valid = 1; + device_->dcr_wr_addr = addr; + device_->dcr_wr_data = value; + while (device_->dcr_wr_valid) { + this->tick(); + } + } + +private: + + void reset() { + running_ = false; + + print_bufs_.clear(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + + pending_mem_reqs_.at(i).clear(); + + { + std::queue empty; + std::swap(dram_queue_.at(i), empty); + } + + mem_rd_rsp_active_.at(i) = false; + mem_wr_rsp_active_.at(i) = false; + } + + this->mem_bus_reset(); + + this->dcr_bus_reset(); + + device_->reset = 1; + + for (int i = 0; i < RESET_DELAY; ++i) { + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + } + } + + void tick() { + + device_->clk = 0; + this->eval(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + this->mem_bus_eval(0, i); + } + this->dcr_bus_eval(0); + + device_->clk = 1; + this->eval(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + this->mem_bus_eval(1, i); + } + this->dcr_bus_eval(1); + + dram_sim_.tick(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (!dram_queue_.at(i).empty()) { + auto mem_req = dram_queue_.at(i).front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { + dram_queue_.at(i).pop(); + } + } + } + + #ifndef NDEBUG + fflush(stdout); + #endif + } + + void eval() { + device_->eval(); + #ifdef VCD_OUTPUT + if (sim_trace_enabled()) { + tfp_->dump(timestamp); + } else { + exit(-1); + } + #endif + ++timestamp; + } + +#ifdef AXI_BUS + + void mem_bus_reset() { + device_->m_axi_wready[0] = 0; + device_->m_axi_awready[0] = 0; + device_->m_axi_arready[0] = 0; + device_->m_axi_rvalid[0] = 0; + device_->m_axi_bvalid[0] = 0; + } + + void mem_bus_eval(bool clk) { + if (!clk) { + mem_rd_rsp_ready_ = device_->m_axi_rready[0]; + mem_wr_rsp_ready_ = device_->m_axi_bready[0]; + return; + } + + if (ram_ == nullptr) { + device_->m_axi_wready[0] = 0; + device_->m_axi_awready[0] = 0; + device_->m_axi_arready[0] = 0; + return; + } + + // process memory read responses + if (mem_rd_rsp_active_ + && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; + } + if (!mem_rd_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready + && !(*pending_mem_reqs_.begin())->write) { + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); + } + printf("\n"); + */ + device_->m_axi_rvalid[0] = 1; + device_->m_axi_rid[0] = mem_rsp->tag; + device_->m_axi_rresp[0] = 0; + device_->m_axi_rlast[0] = 1; + memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); + pending_mem_reqs_.erase(mem_rsp_it); + mem_rd_rsp_active_ = true; + delete mem_rsp; + } else { + device_->m_axi_rvalid[0] = 0; + } + } + + // process memory write responses + if (mem_wr_rsp_active_ + && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { + mem_wr_rsp_active_ = false; + } + if (!mem_wr_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready + && (*pending_mem_reqs_.begin())->write) { + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr); + */ + device_->m_axi_bvalid[0] = 1; + device_->m_axi_bid[0] = mem_rsp->tag; + device_->m_axi_bresp[0] = 0; + pending_mem_reqs_.erase(mem_rsp_it); + mem_wr_rsp_active_ = true; + delete mem_rsp; + } else { + device_->m_axi_bvalid[0] = 0; + } + } + + // select the memory bank + uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0]; + + // process memory requests + if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { + if (device_->m_axi_wvalid[0]) { + auto byteen = device_->m_axi_wstrb[0]; + auto base_addr = device_->m_axi_awaddr[0]; + auto data = (uint8_t*)device_->m_axi_wdata[0].data(); + + if (base_addr >= uint64_t(IO_COUT_ADDR) + && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + // process writes + /* + printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); + } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[base_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = device_->m_axi_awid[0]; + mem_req->addr = device_->m_axi_awaddr[0]; + mem_req->write = true; + mem_req->ready = false; + pending_mem_reqs_.emplace_back(mem_req); + + // send dram request + dram_queue_.push(mem_req); + } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->m_axi_arid[0]; + mem_req->addr = device_->m_axi_araddr[0]; + ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE); + mem_req->write = false; + mem_req->ready = false; + pending_mem_reqs_.emplace_back(mem_req); + + // send dram request + dram_queue_.push(mem_req); + } + } + + device_->m_axi_wready[0] = running_; + device_->m_axi_awready[0] = running_; + device_->m_axi_arready[0] = running_; + } + +#else + + void mem_bus_reset() { + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + device_->mem_req_ready[i] = 0; + device_->mem_rsp_valid[i] = 0; + } + } + + void mem_bus_eval(bool clk, int n) { + if (!clk) { + mem_rd_rsp_ready_.at(n) = device_->mem_rsp_ready[n]; + return; + } + + if (ram_ == nullptr) { + device_->mem_req_ready[n] = 0; + return; + } + + // process memory read responses + if (mem_rd_rsp_active_.at(n) + && device_->mem_rsp_valid[n] && mem_rd_rsp_ready_.at(n)) { + mem_rd_rsp_active_.at(n) = false; + } + if (!mem_rd_rsp_active_.at(n)) { + if (!pending_mem_reqs_.at(n).empty() + && (*pending_mem_reqs_.at(n).begin())->ready) { + device_->mem_rsp_valid[n] = 1; + auto mem_rsp_it = pending_mem_reqs_.at(n).begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); + } + printf("\n"); + */ + memcpy(VDataCast::get(device_->mem_rsp_data[n]), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_tag[n] = mem_rsp->tag; + pending_mem_reqs_.at(n).erase(mem_rsp_it); + mem_rd_rsp_active_.at(n) = true; + delete mem_rsp; + } else { + device_->mem_rsp_valid[n] = 0; + } + } + + // process memory requests + if (device_->mem_req_valid[n] && running_) { + uint64_t byte_addr = (device_->mem_req_addr[n] * MEM_BLOCK_SIZE); + if (device_->mem_req_rw[n]) { + auto byteen = device_->mem_req_byteen[n]; + auto data = VDataCast::get(device_->mem_req_data[n]); + + if (byte_addr >= uint64_t(IO_COUT_ADDR) + && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < IO_COUT_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + // process writes + /* + printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); + } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%d=%02x,", i, data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag[n]; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = true; + + // send dram request + dram_queue_.at(n).push(mem_req); + } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag[n]; + mem_req->addr = byte_addr; + mem_req->write = false; + mem_req->ready = false; + ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + pending_mem_reqs_.at(n).emplace_back(mem_req); + + //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); + + // send dram request + dram_queue_.at(n).push(mem_req); + } + } + + device_->mem_req_ready[n] = running_; + } + +#endif + + void dcr_bus_reset() { + device_->dcr_wr_valid = 0; + } + + void dcr_bus_eval(bool clk) { + if (!clk) { + return; + } + if (device_->dcr_wr_valid) { + device_->dcr_wr_valid = 0; + } + } + + void wait(uint32_t cycles) { + for (int i = 0; i < cycles; ++i) { + this->tick(); + } + } + +private: + + typedef struct { + Device* device; + std::array block; + uint64_t addr; + uint64_t tag; + bool write; + bool ready; + } mem_req_t; + + std::unordered_map print_bufs_; + + std::vector> pending_mem_reqs_; + + std::vector> dram_queue_; + + DramSim dram_sim_; + + Device* device_; + +#ifdef VCD_OUTPUT + VerilatedVcdC *tfp_; +#endif + + RAM* ram_; + + std::vector mem_rd_rsp_active_; + std::vector mem_rd_rsp_ready_; + + std::vector mem_wr_rsp_active_; + std::vector mem_wr_rsp_ready_; + + bool running_; +}; + +/////////////////////////////////////////////////////////////////////////////// + +Processor::Processor() + : impl_(new Impl()) +{} + +Processor::~Processor() { + delete impl_; +} + +void Processor::attach_ram(RAM* mem) { + impl_->attach_ram(mem); +} + +void Processor::run() { + impl_->run(); +} + +void Processor::dcr_write(uint32_t addr, uint32_t value) { + return impl_->dcr_write(addr, value); +} \ No newline at end of file From 8e3bd5696b389baf3962b65b60c0b2e210b6fd27 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 19:52:51 -0700 Subject: [PATCH 192/407] xilinx synthesis debugging fixes --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 47 ++++++++++++++-------------- hw/rtl/core/VX_fetch.sv | 10 ++++++ hw/rtl/core/VX_issue_slice.sv | 11 +++++++ hw/rtl/core/VX_lsu_slice.sv | 12 ++++++- hw/syn/xilinx/README | 20 +++++++++--- hw/syn/xilinx/xrt/Makefile | 18 ++++++++--- hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- hw/syn/xilinx/xrt/package_kernel.tcl | 23 +++++++------- 8 files changed, 97 insertions(+), 46 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 9872ae3c14..c2f865076b 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -299,29 +299,8 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// -`ifdef CHIPSCOPE - ila_afu ila_afu_inst ( - .clk (clk), - .probe0 ({ - ap_reset, - ap_start, - ap_done, - ap_idle, - interrupt - }), - .probe1 ({ - vx_pending_writes, - vx_busy_wait, - vx_busy, - vx_reset, - dcr_wr_valid, - dcr_wr_addr, - dcr_wr_data - }) - ); -`endif - `ifdef DBG_SCOPE_AFU +`ifdef SCOPE `define TRIGGERS { \ reset, \ ap_reset, \ @@ -333,11 +312,9 @@ module VX_afu_wrap #( vx_busy, \ vx_reset \ } - `define PROBES { \ vx_pending_writes \ } - VX_scope_tap #( .SCOPE_ID (0), .TRIGGERW ($bits(`TRIGGERS)), @@ -355,6 +332,28 @@ module VX_afu_wrap #( `else `SCOPE_IO_UNUSED_W(0) `endif +`ifdef CHIPSCOPE + ila_afu ila_afu_inst ( + .clk (clk), + .probe0 ({ + ap_reset, + ap_start, + ap_done, + ap_idle, + interrupt + }), + .probe1 ({ + vx_pending_writes, + vx_busy_wait, + vx_busy, + vx_reset, + dcr_wr_valid, + dcr_wr_addr, + dcr_wr_data + }) + ); +`endif +`endif `ifdef SIMULATION `ifndef VERILATOR diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index a2a80ed94b..044cd0aba4 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -132,6 +132,7 @@ module VX_fetch import VX_gpu_pkg::*; #( assign icache_bus_if.rsp_ready = fetch_if.ready; `ifdef DBG_SCOPE_FETCH +`ifdef SCOPE wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; VX_scope_tap #( @@ -162,6 +163,15 @@ module VX_fetch import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`ifdef CHIPSCOPE + ila_fetch ila_fetch_inst ( + .clk (clk), + .probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}), + .probe1 ({icache_bus_if.req_valid, icache_bus_if.req_data, icache_bus_if.req_ready}), + .probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready}) + ); +`endif +`endif `ifdef DBG_TRACE_MEM always @(posedge clk) begin diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 63d811328c..34b60676fb 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -89,6 +89,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); `ifdef DBG_SCOPE_ISSUE +`ifdef SCOPE wire operands_if_fire = operands_if.valid && operands_if.ready; wire operands_if_not_ready = ~operands_if.ready; wire writeback_if_valid = writeback_if.valid; @@ -131,6 +132,16 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`ifdef CHIPSCOPE + ila_issue ila_issue_inst ( + .clk (clk), + .probe0 ({decode_if.valid, decode_if.data, decode_if.ready}), + .probe1 ({scoreboard_if.valid, scoreboard_if.data, scoreboard_if.ready}), + .probe2 ({operands_if.valid, operands_if.data, operands_if.ready}), + .probe3 ({writeback_if.valid, writeback_if.data}) + ); +`endif +`endif `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 43f787ae94..2664202e7b 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -341,7 +341,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .core_req_tag (mem_req_tag), .core_req_ready (mem_req_ready), `UNUSED_PIN (core_req_empty), - `UNUSED_PIN (core_req_sent), + `UNUSED_PIN (core_write_notify), // Output response .core_rsp_valid (mem_rsp_valid), @@ -535,6 +535,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `endif `ifdef DBG_SCOPE_LSU +`ifdef SCOPE VX_scope_tap #( .SCOPE_ID (3), .TRIGGERW (3), @@ -552,5 +553,14 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`ifdef CHIPSCOPE + ila_lsu ila_lsu_inst ( + .clk (clk), + .probe0 ({execute_if.valid, execute_if.data, execute_if.ready}), + .probe1 ({lsu_mem_if.req_valid, lsu_mem_if.req_data, lsu_mem_if.req_ready}), + .probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready}) + ); +`endif +`endif endmodule diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index 17d398dfa2..0fb83e71b8 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -8,6 +8,9 @@ xbutil validate --device 0000:09:00.1 --verbose vivado -mode batch -source xilinx_ip_gen.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 # build FPGA +PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 make > build_u55c_hw_emu_base_1c.log 2>&1 & +PREFIX=build_base_1c NUM_CORES=1 TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 make > build_u55c_hw_base_1c.log 2>&1 & + PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_emu_base_1c.log 2>&1 & PREFIX=build_base_1c NUM_CORES=1 TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_base_1c.log 2>&1 & @@ -25,14 +28,21 @@ PREFIX=build TARGET=hw_emu PLATFORM=xilinx_vck5000_gen3x16_xdma_1_202120_1 make # debug hw_emu using xsim xsim --gui xilinx_u50_gen3x16_xdma_5_202210_1-0-vortex_afu.wdb & -# debug hw using ILA +# h/w debugging using ILA +## (1) check for ILA support platforminfo --json="hardwarePlatform.extensions.chipscope_debug" xilinx_u50_gen3x16_xdma_5_202210_1 +## (2) chedk for XVC full path to get device id ls /dev/xfpga/xvc_pub* -ls /dev/xvc_pub* -debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server -debug_hw --xvc_pcie /dev/xvc_pub.u0 --hw_server +## (3) start h/w server +debug_hw --xvc_pcie /dev/xfpga/xvc_pub. --hw_server +## (4) start application and pause +## (5) start vivado to connect to h/w server and select ILA probes debug_hw --vivado --host localhost --ltx_file ./build_xilinx_u50_gen3x16_xdma_5_202210_1_hw/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx & -make chipscope TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 +## (6) resume application + +# supported ILA Makefie targets +TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make hw_server +TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope # analyze build report vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 44b04c1a28..0e2aea5a9c 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -53,6 +53,9 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE DBG_TRACE_FLAGS += -DDBG_TRACE_MEM DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE DBG_TRACE_FLAGS += -DDBG_TRACE_AFU +DBG_TRACE_FLAGS += -DDBG_TRACE_TEX +DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER +DBG_TRACE_FLAGS += -DDBG_TRACE_OM DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR # Control logic analyzer monitors @@ -60,6 +63,9 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU +DBG_SCOPE_FLAGS += -DDBG_SCOPE_TEX +DBG_SCOPE_FLAGS += -DDBG_SCOPE_OM +DBG_SCOPE_FLAGS += -DDBG_SCOPE_RASTER DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # cluster configuration @@ -77,8 +83,11 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif +TEX_INCLUDE = -I$(RTL_DIR)/tex +RASTER_INCLUDE = -I$(RTL_DIR)/raster +OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -RTL_INCLUDE += $(FPU_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) # Kernel compiler global settings VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache @@ -179,17 +188,18 @@ $(BIN_DIR)/emconfig.json: mkdir -p $(BIN_DIR); cd $(BUILD_DIR); emconfigutil --platform $(PLATFORM) --od ../$(BIN_DIR) report: $(XCLBIN_CONTAINER) -ifeq ($(TARGET),$(findstring $(TARGET), hw)) - cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin/runme.log +ifeq ($(TARGET), hw) + cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin/vivado.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log + [ -f "$(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx" ] && cp $(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx $(BUILD_DIR)/bin/debug_nets.ltx endif hwserver: debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server & chipscope: - debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx & + debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/bin/debug_nets.ltx & clean: $(RMDIR) $(BUILD_DIR) diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index bad41f5cd5..c36c98e36a 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -31,7 +31,7 @@ if {[file exists "${xoname}"]} { set argv [list ${build_dir}/ip] set argc 1 -source ${script_path}/xilinx_ip_gen.tcl +source ${tool_dir}/xilinx_ip_gen.tcl set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 diff --git a/hw/syn/xilinx/xrt/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl index 2c314754d0..aa7e96f3f9 100644 --- a/hw/syn/xilinx/xrt/package_kernel.tcl +++ b/hw/syn/xilinx/xrt/package_kernel.tcl @@ -89,9 +89,9 @@ if { $chipscope == 1 } { CONFIG.C_EN_STRG_QUAL {1} \ CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {3} \ - CONFIG.C_PROBE0_WIDTH {128} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {128} \ + CONFIG.C_PROBE0_WIDTH {40} \ + CONFIG.C_PROBE1_WIDTH {80} \ + CONFIG.C_PROBE2_WIDTH {40} \ CONFIG.ALL_PROBE_SAME_MU {false} \ CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_fetch] @@ -102,9 +102,11 @@ if { $chipscope == 1 } { set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ CONFIG.C_DATA_DEPTH {8192} \ - CONFIG.C_NUM_OF_PROBES {2} \ - CONFIG.C_PROBE0_WIDTH {256} \ - CONFIG.C_PROBE1_WIDTH {128} \ + CONFIG.C_NUM_OF_PROBES {4} \ + CONFIG.C_PROBE0_WIDTH {112} \ + CONFIG.C_PROBE1_WIDTH {112} \ + CONFIG.C_PROBE2_WIDTH {280} \ + CONFIG.C_PROBE3_WIDTH {112} \ CONFIG.ALL_PROBE_SAME_MU {false} \ CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_issue] @@ -115,11 +117,10 @@ if { $chipscope == 1 } { set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ CONFIG.C_DATA_DEPTH {8192} \ - CONFIG.C_NUM_OF_PROBES {4} \ - CONFIG.C_PROBE0_WIDTH {256} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {288} \ - CONFIG.C_PROBE3_WIDTH {256} \ + CONFIG.C_NUM_OF_PROBES {3} \ + CONFIG.C_PROBE0_WIDTH {288} \ + CONFIG.C_PROBE1_WIDTH {152} \ + CONFIG.C_PROBE2_WIDTH {72} \ CONFIG.ALL_PROBE_SAME_MU {false} \ CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_lsu] From f0bff2a4a23f21d9de2006add2862662bcfbc539 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 20:31:12 -0700 Subject: [PATCH 193/407] minor update --- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 2664202e7b..4a8e79953a 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -341,7 +341,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .core_req_tag (mem_req_tag), .core_req_ready (mem_req_ready), `UNUSED_PIN (core_req_empty), - `UNUSED_PIN (core_write_notify), + `UNUSED_PIN (core_req_wr_notify), // Output response .core_rsp_valid (mem_rsp_valid), diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 9dada16bca..229ff6cf20 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -55,7 +55,7 @@ module VX_mem_scheduler #( input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, output wire core_req_empty, - output wire core_req_sent, + output wire core_req_wr_notify, // Core response output wire core_rsp_valid, @@ -187,8 +187,8 @@ module VX_mem_scheduler #( // no pending requests assign core_req_empty = !reqq_valid && ibuf_empty; - // notify request submisison - assign core_req_sent = reqq_valid && reqq_ready; + // notify write request submisison + assign core_req_wr_notify = reqq_valid && reqq_ready && reqq_rw; // Index buffer /////////////////////////////////////////////////////////// From 48f86a48f60ba699430dad41132ec3ada0d95e8c Mon Sep 17 00:00:00 2001 From: sij814 Date: Wed, 18 Sep 2024 22:05:40 -0700 Subject: [PATCH 194/407] changed mem_req_arb in VX_cache_l3.sv to accept data_out --- hw/rtl/cache/VX_cache_l3.sv | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_l3.sv b/hw/rtl/cache/VX_cache_l3.sv index 326a4fc65f..7eb7556de4 100644 --- a/hw/rtl/cache/VX_cache_l3.sv +++ b/hw/rtl/cache/VX_cache_l3.sv @@ -495,6 +495,7 @@ module VX_cache_l3 import VX_gpu_pkg::*; #( // Memory request arbitration wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; + wire [NUM_MEM_PORTS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_out; for (genvar i = 0; i < NUM_BANKS; ++i) begin assign data_in[i] = { @@ -518,12 +519,23 @@ module VX_cache_l3 import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), + .data_out (data_out), .valid_out (mem_req_valid_p), .ready_out (mem_req_ready_p), `UNUSED_PIN (sel_out) ); + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign { + mem_req_addr_p[i], + mem_req_rw_p[i], + mem_req_byteen_p[i], + mem_req_data_p[i], + mem_req_id_p[i], + mem_req_flush_p[i] + } = data_out[i]; + end + if (NUM_BANKS > 1) begin for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p[i]); From a37309c6b001cb25c6d760b606e853a997789652 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 04:24:20 -0700 Subject: [PATCH 195/407] xrtsim implementation --- ci/regression.sh.in | 18 +- hw/rtl/afu/xrt/vortex_afu.v | 4 +- hw/rtl/afu/xrt/vortex_afu.vh | 14 +- hw/rtl/cache/VX_cache_bank.sv | 6 +- hw/rtl/core/VX_operands.sv | 45 +- hw/rtl/libs/VX_axi_adapter.sv | 40 +- runtime/common/common.h | 3 +- runtime/opae/Makefile | 2 +- runtime/xrt/vortex.cpp | 12 +- .../common/malloc.h => sim/common/mem_alloc.h | 0 sim/common/mp_macros.h | 327 +++++++++++++ sim/opaesim/fpga.cpp | 2 + sim/opaesim/opae_sim.cpp | 37 +- sim/opaesim/opae_sim.h | 2 + sim/rtlsim/Makefile | 9 +- sim/rtlsim/processor.cpp | 266 ++-------- sim/xrtsim/Makefile | 13 +- sim/xrtsim/{fpga.cpp => xrt.cpp} | 57 ++- sim/xrtsim/{fpga.h => xrt.h} | 6 +- sim/xrtsim/xrt_sim.cpp | 453 +++++++++++++++--- sim/xrtsim/xrt_sim.h | 14 + 21 files changed, 932 insertions(+), 398 deletions(-) rename runtime/common/malloc.h => sim/common/mem_alloc.h (100%) create mode 100644 sim/common/mp_macros.h rename sim/xrtsim/{fpga.cpp => xrt.cpp} (62%) rename sim/xrtsim/{fpga.h => xrt.h} (98%) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 32e479c1e2..fb25ef480e 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -92,10 +92,12 @@ regression() # test global barrier CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2 + CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2 # test local barrier ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar" + ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar" # test temp driver mode for ./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3 @@ -230,15 +232,18 @@ config2() # test opaesim ./ci/blackbox.sh --driver=opae --app=printf ./ci/blackbox.sh --driver=opae --app=diverge + ./ci/blackbox.sh --driver=xrt --app=diverge # disable DPI if [ "$XLEN" == "64" ]; then # need to disable trig on 64-bit due to a bug inside fpnew's sqrt core. CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar" CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar" + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar" else CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood fi # custom program startup address @@ -255,11 +260,9 @@ config2() # disabling ZICOND extension CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo - # test AXI bus - AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress - # test 128-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress # test XLEN-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress @@ -299,10 +302,11 @@ debug() test_csv_trace - CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" - ./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1" - + ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" + echo "debugging tests done!" } @@ -312,7 +316,7 @@ stress() # test verilator reset values CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood - CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache + CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache echo "stress tests done!" } diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 1973ec0aa4..0e042c32b8 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -17,8 +17,8 @@ module vortex_afu #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH, + parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH ) ( // System signals input wire ap_clk, diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index 3616b07940..1a14e13163 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,7 +15,15 @@ `define VORTEX_AFU_VH `ifndef M_AXI_MEM_NUM_BANKS -`define M_AXI_MEM_NUM_BANKS 1 +`define M_AXI_MEM_NUM_BANKS 4 +`endif + +`ifndef M_AXI_MEM_ADDR_WIDTH +`define M_AXI_MEM_ADDR_WIDTH 30 +`endif + +`ifndef M_AXI_MEM_DATA_WIDTH +`define M_AXI_MEM_DATA_WIDTH 512 `endif `ifndef M_AXI_MEM_ID_WIDTH diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 5054fa333b..59b4be8713 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -273,15 +273,15 @@ module VX_cache_bank #( assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin : g_data_sel + if (WRITE_ENABLE) begin : g_data_sel_lo assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); - end else begin : g_data_sel_ro + end else begin : g_data_sel_lo_ro assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (core_req_data) `UNUSED_VAR (replay_data) end - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel + for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel_hi assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel end diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 2ca847394b..f306812639 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -69,11 +69,9 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_m_st2; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st2, src_data_m_st2; - reg [NUM_SRC_OPDS-1:0] data_fetched_n; - wire [NUM_SRC_OPDS-1:0] data_fetched_st1; + reg [NUM_SRC_OPDS-1:0] data_fetched_st1; reg has_collision_n; wire has_collision_st1; @@ -139,15 +137,6 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in; - always @(*) begin - data_fetched_n = data_fetched_st1; - if (scoreboard_if.ready) begin - data_fetched_n = '0; - end else begin - data_fetched_n = data_fetched_st1 | req_fire_in; - end - end - assign pipe_data = { scoreboard_if.data.wis, scoreboard_if.data.tmask, @@ -166,33 +155,37 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; VX_pipe_buffer #( - .DATAW (NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), - .RESETW (NUM_SRC_OPDS) + .DATAW (NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)) ) pipe_reg1 ( .clk (clk), .reset (reset), .valid_in (scoreboard_if.valid), .ready_in (pipe_ready_in), - .data_in ({data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), - .data_out ({data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), + .data_in ({gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), + .data_out ({gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), .valid_out(pipe_valid_st1), .ready_out(pipe_ready_st1) ); - assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_m_st2; + always @(posedge clk) begin + if (reset || scoreboard_if.ready) begin + data_fetched_st1 <= 0; + end else begin + data_fetched_st1 <= data_fetched_st1 | req_fire_in; + end + end wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; VX_pipe_buffer #( - .DATAW (NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (NUM_SRC_OPDS * REGS_DATAW) + .DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH) ) pipe_reg2 ( .clk (clk), .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), - .data_in ({src_data_st1, gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), - .data_out ({src_data_st2, gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), + .data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), + .data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), .valid_out(pipe_valid_st2), .ready_out(pipe_ready_st2) ); @@ -206,6 +199,14 @@ module VX_operands import VX_gpu_pkg::*; #( end end + always @(posedge clk) begin + if (reset || pipe_fire_st2) begin + src_data_st2 <= 0; + end else begin + src_data_st2 <= src_data_m_st2; + end + end + VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 4755764a49..6c231cb959 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -94,49 +94,36 @@ module VX_axi_adapter #( localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); wire [BANK_ADDRW-1:0] req_bank_sel; - if (NUM_BANKS > 1) begin : g_req_bank_sel assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; end else begin : g_req_bank_sel_0 assign req_bank_sel = '0; end - wire mem_req_fire = mem_req_valid && mem_req_ready; + wire [NUM_BANKS-1:0] axi_aw_ready, axi_write_ready; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready + assign axi_aw_ready[i] = m_axi_awready[i] || m_axi_aw_ack[i]; + assign axi_write_ready[i] = m_axi_wready[i] && axi_aw_ready[i]; + end - reg [NUM_BANKS-1:0] m_axi_aw_ack; - reg [NUM_BANKS-1:0] m_axi_w_ack; + // request ack + assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; + reg [NUM_BANKS-1:0] m_axi_aw_ack; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w - wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; - wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; always @(posedge clk) begin if (reset) begin m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; end else begin - if (mem_req_fire && (req_bank_sel == i)) begin + if (m_axi_wvalid[i] && m_axi_wready[i]) begin m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; - end else begin - if (m_axi_aw_fire) - m_axi_aw_ack[i] <= 1; - if (m_axi_w_fire) - m_axi_w_ack[i] <= 1; + end else if (m_axi_awvalid[i] && m_axi_awready[i]) begin + m_axi_aw_ack[i] <= 1; end end end end - wire axi_write_ready [NUM_BANKS]; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) - && (m_axi_wready[i] || m_axi_w_ack[i]); - end - - // request ack - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; @@ -154,7 +141,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && axi_aw_ready[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; @@ -190,14 +177,13 @@ module VX_axi_adapter #( wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in; wire [NUM_BANKS-1:0] rsp_arb_ready_in; - `UNUSED_VAR (m_axi_rlast) - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)) `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)) + `UNUSED_VAR (m_axi_rlast[i]) end VX_stream_arb #( diff --git a/runtime/common/common.h b/runtime/common/common.h index 1f718f938b..27335455ba 100644 --- a/runtime/common/common.h +++ b/runtime/common/common.h @@ -13,11 +13,12 @@ #pragma once +#include #include #include #include #include -#include +#include #include #include diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 56355890db..b002375d9c 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -10,7 +10,7 @@ SYN_DIR := $(HW_DIR)/syn/altera/opae SRC_DIR := $(VORTEX_HOME)/runtime/opae CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) +CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) # Position independent code diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index a02a849905..de65c1e856 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -18,15 +18,15 @@ #endif // XRT includes -#ifndef XRTSIM +#ifdef XRTSIM +#include +#else #include "experimental/xrt_bo.h" #include "experimental/xrt_device.h" #include "experimental/xrt_error.h" #include "experimental/xrt_ip.h" #include "experimental/xrt_kernel.h" #include "experimental/xrt_xclbin.h" -#else -#include #endif #include @@ -66,7 +66,7 @@ struct platform_info_t { }; static const platform_info_t g_platforms[] = { - {"vortex_xrtsim", 4, 16, 0x0}, // 16 x 64 KB = 1 MB + {"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB {"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 {"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 {"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 @@ -258,7 +258,7 @@ class vx_device { return -1; }); #else - xrtKernelHandle xrtKernel = nullptr; + xrtKernelHandle xrtKernel = xrtDevice; #endif // get device name @@ -538,7 +538,6 @@ class vx_device { return err; }); #endif - DBGPRINT("*** write_register: addr=0x%x, value=0x%x\n", addr, value); return 0; } @@ -551,7 +550,6 @@ class vx_device { return err; }); #endif - DBGPRINT("*** read_register: addr=0x%x, value=0x%x\n", addr, *value); return 0; } diff --git a/runtime/common/malloc.h b/sim/common/mem_alloc.h similarity index 100% rename from runtime/common/malloc.h rename to sim/common/mem_alloc.h diff --git a/sim/common/mp_macros.h b/sim/common/mp_macros.h new file mode 100644 index 0000000000..fde5ac79eb --- /dev/null +++ b/sim/common/mp_macros.h @@ -0,0 +1,327 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +// macro primitives + +#define MP_COMMA , +#define MP_REM(...) __VA_ARGS__ +#define MP_EAT(...) + +#define MP_STRINGIZE_(x) #x +#define MP_STRINGIZE(x) MP_STRINGIZE_(x) + +#define MP_CONCAT_(x, ...) x ## __VA_ARGS__ +#define MP_CONCAT(x, ...) MP_CONCAT_(x, __VA_ARGS__) + +#define MP_COUNTOF(arr) (sizeof(arr) / sizeof(arr[0])) + +// conditional macro + +#define MP_IIF_0(x, y) y +#define MP_IIF_1(x, y) x +#define MP_IIF(c) MP_CONCAT(MP_IIF_, c) + +#define MP_PAIR_FIRST(a, b) a +#define MP_PAIR_SECOND(a, b) b + +// pair macros + +#define MP_PAIR(x) MP_REM x +#define MP_PAIR_HEAD_(x, ...) MP_PAIR(x) +#define MP_PAIR_PROBE_(...) (__VA_ARGS__), +#define MP_PAIR_L_(...) MP_PAIR_HEAD_(__VA_ARGS__) +#define MP_PAIR_L(x) MP_PAIR_L_(MP_PAIR_PROBE_ x,) +#define MP_PAIR_R(x) MP_EAT x + +// separator macros + +#define MP_SEP_COMMA() , +#define MP_SEP_SEMICOLON() ; +#define MP_SEP_PLUS() + +#define MP_SEP_AND() & +#define MP_SEP_OR() | +#define MP_SEP_COLON() : +#define MP_SEP_SPACE() /**/ +#define MP_SEP_LESS() < +#define MP_SEP_GREATER() > +#define MP_SEP_ANDL() && +#define MP_SEP_ORL() || + +// MAKE_UNIQUE macro + +#define MP_MAKE_UNIQUE(x) MP_CONCAT(x, __COUNTER__) + +// increment macro + +#define MP_INC(x) MP_INC_ ## x +#define MP_INC_0 1 +#define MP_INC_1 2 +#define MP_INC_2 3 +#define MP_INC_3 4 +#define MP_INC_4 5 +#define MP_INC_5 6 +#define MP_INC_6 7 +#define MP_INC_7 8 +#define MP_INC_8 9 +#define MP_INC_9 10 +#define MP_INC_10 11 +#define MP_INC_11 12 +#define MP_INC_12 13 +#define MP_INC_13 14 +#define MP_INC_14 15 +#define MP_INC_15 16 +#define MP_INC_16 17 +#define MP_INC_17 18 +#define MP_INC_18 19 +#define MP_INC_19 20 +#define MP_INC_20 21 +#define MP_INC_21 22 +#define MP_INC_22 23 +#define MP_INC_23 24 +#define MP_INC_24 25 +#define MP_INC_25 26 +#define MP_INC_26 27 +#define MP_INC_27 28 +#define MP_INC_28 29 +#define MP_INC_29 30 +#define MP_INC_30 31 +#define MP_INC_31 32 +#define MP_INC_32 33 +#define MP_INC_33 34 +#define MP_INC_34 35 +#define MP_INC_35 36 +#define MP_INC_36 37 +#define MP_INC_37 38 +#define MP_INC_38 39 +#define MP_INC_39 40 +#define MP_INC_40 41 +#define MP_INC_41 42 +#define MP_INC_42 43 +#define MP_INC_43 44 +#define MP_INC_44 45 +#define MP_INC_45 46 +#define MP_INC_46 47 +#define MP_INC_47 48 +#define MP_INC_48 49 +#define MP_INC_49 50 +#define MP_INC_50 51 +#define MP_INC_51 52 +#define MP_INC_52 53 +#define MP_INC_53 54 +#define MP_INC_54 55 +#define MP_INC_55 56 +#define MP_INC_56 57 +#define MP_INC_57 58 +#define MP_INC_58 59 +#define MP_INC_59 60 +#define MP_INC_60 61 +#define MP_INC_61 62 +#define MP_INC_62 63 +#define MP_INC_63 64 + +// NARG macro + +#define MP_NARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10,_11,_12,_13,_14,_15,_16, \ + _17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,_32, \ + _33,_34,_35,_36,_37,_38,_39,_40,_41,_42,_43,_44,_45,_46,_47,_48, \ + _49,_50,_51,_52,_53,_54,_55,_56,_57,_58,_59,_60,_61,_62,_63, N, ...) N + +#define MP_NARG_R() 63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48, \ + 47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32, \ + 31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16, \ + 15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +#define MP_NARG_(...) MP_NARG_N(__VA_ARGS__) +#define MP_NARG(...) MP_NARG_(__VA_ARGS__, MP_NARG_R()) + +// FOR_EACH macro + +#define MP_FOR_EACH_1(idx, func, arg, sep, ...) func(arg, idx, __VA_ARGS__) +#define MP_FOR_EACH_2(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_1(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_3(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_2(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_4(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_3(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_5(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_4(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_6(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_5(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_7(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_6(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_8(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_7(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_9(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_8(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_10(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_9(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_11(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_10(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_12(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_11(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_13(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_12(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_14(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_13(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_15(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_14(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_16(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_15(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_17(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_16(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_18(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_17(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_19(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_18(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_20(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_19(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_21(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_20(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_22(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_21(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_23(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_22(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_24(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_23(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_25(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_24(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_26(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_25(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_27(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_26(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_28(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_27(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_29(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_28(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_30(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_29(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_31(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_30(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_32(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_31(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_33(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_32(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_34(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_33(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_35(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_34(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_36(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_35(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_37(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_36(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_38(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_37(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_39(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_38(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_40(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_39(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_41(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_40(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_42(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_41(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_43(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_42(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_44(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_43(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_45(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_44(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_46(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_45(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_47(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_46(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_48(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_47(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_49(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_48(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_50(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_49(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_51(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_50(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_52(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_51(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_53(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_52(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_54(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_53(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_55(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_54(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_56(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_55(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_57(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_56(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_58(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_57(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_59(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_58(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_60(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_59(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_61(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_60(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_62(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_61(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_63(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_62(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_64(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_63(MP_INC(idx), func, arg, sep, __VA_ARGS__) + +#define MP_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_FOR_EACH_, N)(0, func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH(func, arg, sep, ...) MP_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__) + +// REVERSE_FOR_EACH macro + +#define MP_REVERSE_FOR_EACH_1(func, arg, sep, ...) func(arg, 0, __VA_ARGS__) +#define MP_REVERSE_FOR_EACH_2(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_1(func, arg, sep, __VA_ARGS__) sep() func(arg, 1, x) +#define MP_REVERSE_FOR_EACH_3(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_2(func, arg, sep, __VA_ARGS__) sep() func(arg, 2, x) +#define MP_REVERSE_FOR_EACH_4(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_3(func, arg, sep, __VA_ARGS__) sep() func(arg, 3, x) +#define MP_REVERSE_FOR_EACH_5(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_4(func, arg, sep, __VA_ARGS__) sep() func(arg, 4, x) +#define MP_REVERSE_FOR_EACH_6(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_5(func, arg, sep, __VA_ARGS__) sep() func(arg, 5, x) +#define MP_REVERSE_FOR_EACH_7(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_6(func, arg, sep, __VA_ARGS__) sep() func(arg, 6, x) +#define MP_REVERSE_FOR_EACH_8(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_7(func, arg, sep, __VA_ARGS__) sep() func(arg, 7, x) +#define MP_REVERSE_FOR_EACH_9(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_8(func, arg, sep, __VA_ARGS__) sep() func(arg, 8, x) +#define MP_REVERSE_FOR_EACH_10(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_9(func, arg, sep, __VA_ARGS__) sep() func(arg, 9, x) +#define MP_REVERSE_FOR_EACH_11(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_10(func, arg, sep, __VA_ARGS__) sep() func(arg, 10, x) +#define MP_REVERSE_FOR_EACH_12(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_11(func, arg, sep, __VA_ARGS__) sep() func(arg, 11, x) +#define MP_REVERSE_FOR_EACH_13(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_12(func, arg, sep, __VA_ARGS__) sep() func(arg, 12, x) +#define MP_REVERSE_FOR_EACH_14(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_13(func, arg, sep, __VA_ARGS__) sep() func(arg, 13, x) +#define MP_REVERSE_FOR_EACH_15(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_14(func, arg, sep, __VA_ARGS__) sep() func(arg, 14, x) +#define MP_REVERSE_FOR_EACH_16(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_15(func, arg, sep, __VA_ARGS__) sep() func(arg, 15, x) +#define MP_REVERSE_FOR_EACH_17(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_16(func, arg, sep, __VA_ARGS__) sep() func(arg, 16, x) +#define MP_REVERSE_FOR_EACH_18(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_17(func, arg, sep, __VA_ARGS__) sep() func(arg, 17, x) +#define MP_REVERSE_FOR_EACH_19(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_18(func, arg, sep, __VA_ARGS__) sep() func(arg, 18, x) +#define MP_REVERSE_FOR_EACH_20(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_19(func, arg, sep, __VA_ARGS__) sep() func(arg, 19, x) +#define MP_REVERSE_FOR_EACH_21(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_20(func, arg, sep, __VA_ARGS__) sep() func(arg, 20, x) +#define MP_REVERSE_FOR_EACH_22(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_21(func, arg, sep, __VA_ARGS__) sep() func(arg, 21, x) +#define MP_REVERSE_FOR_EACH_23(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_22(func, arg, sep, __VA_ARGS__) sep() func(arg, 22, x) +#define MP_REVERSE_FOR_EACH_24(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_23(func, arg, sep, __VA_ARGS__) sep() func(arg, 23, x) +#define MP_REVERSE_FOR_EACH_25(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_24(func, arg, sep, __VA_ARGS__) sep() func(arg, 24, x) +#define MP_REVERSE_FOR_EACH_26(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_25(func, arg, sep, __VA_ARGS__) sep() func(arg, 25, x) +#define MP_REVERSE_FOR_EACH_27(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_26(func, arg, sep, __VA_ARGS__) sep() func(arg, 26, x) +#define MP_REVERSE_FOR_EACH_28(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_27(func, arg, sep, __VA_ARGS__) sep() func(arg, 27, x) +#define MP_REVERSE_FOR_EACH_29(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_28(func, arg, sep, __VA_ARGS__) sep() func(arg, 28, x) +#define MP_REVERSE_FOR_EACH_30(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_29(func, arg, sep, __VA_ARGS__) sep() func(arg, 29, x) +#define MP_REVERSE_FOR_EACH_31(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_30(func, arg, sep, __VA_ARGS__) sep() func(arg, 30, x) +#define MP_REVERSE_FOR_EACH_32(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_31(func, arg, sep, __VA_ARGS__) sep() func(arg, 31, x) +#define MP_REVERSE_FOR_EACH_33(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_32(func, arg, sep, __VA_ARGS__) sep() func(arg, 32, x) +#define MP_REVERSE_FOR_EACH_34(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_33(func, arg, sep, __VA_ARGS__) sep() func(arg, 33, x) +#define MP_REVERSE_FOR_EACH_35(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_34(func, arg, sep, __VA_ARGS__) sep() func(arg, 34, x) +#define MP_REVERSE_FOR_EACH_36(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_35(func, arg, sep, __VA_ARGS__) sep() func(arg, 35, x) +#define MP_REVERSE_FOR_EACH_37(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_36(func, arg, sep, __VA_ARGS__) sep() func(arg, 36, x) +#define MP_REVERSE_FOR_EACH_38(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_37(func, arg, sep, __VA_ARGS__) sep() func(arg, 37, x) +#define MP_REVERSE_FOR_EACH_39(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_38(func, arg, sep, __VA_ARGS__) sep() func(arg, 38, x) +#define MP_REVERSE_FOR_EACH_40(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_39(func, arg, sep, __VA_ARGS__) sep() func(arg, 39, x) +#define MP_REVERSE_FOR_EACH_41(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_40(func, arg, sep, __VA_ARGS__) sep() func(arg, 40, x) +#define MP_REVERSE_FOR_EACH_42(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_41(func, arg, sep, __VA_ARGS__) sep() func(arg, 41, x) +#define MP_REVERSE_FOR_EACH_43(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_42(func, arg, sep, __VA_ARGS__) sep() func(arg, 42, x) +#define MP_REVERSE_FOR_EACH_44(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_43(func, arg, sep, __VA_ARGS__) sep() func(arg, 43, x) +#define MP_REVERSE_FOR_EACH_45(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_44(func, arg, sep, __VA_ARGS__) sep() func(arg, 44, x) +#define MP_REVERSE_FOR_EACH_46(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_45(func, arg, sep, __VA_ARGS__) sep() func(arg, 45, x) +#define MP_REVERSE_FOR_EACH_47(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_46(func, arg, sep, __VA_ARGS__) sep() func(arg, 46, x) +#define MP_REVERSE_FOR_EACH_48(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_47(func, arg, sep, __VA_ARGS__) sep() func(arg, 47, x) +#define MP_REVERSE_FOR_EACH_49(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_48(func, arg, sep, __VA_ARGS__) sep() func(arg, 48, x) +#define MP_REVERSE_FOR_EACH_50(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_49(func, arg, sep, __VA_ARGS__) sep() func(arg, 49, x) +#define MP_REVERSE_FOR_EACH_51(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_50(func, arg, sep, __VA_ARGS__) sep() func(arg, 50, x) +#define MP_REVERSE_FOR_EACH_52(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_51(func, arg, sep, __VA_ARGS__) sep() func(arg, 51, x) +#define MP_REVERSE_FOR_EACH_53(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_52(func, arg, sep, __VA_ARGS__) sep() func(arg, 52, x) +#define MP_REVERSE_FOR_EACH_54(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_53(func, arg, sep, __VA_ARGS__) sep() func(arg, 53, x) +#define MP_REVERSE_FOR_EACH_55(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_54(func, arg, sep, __VA_ARGS__) sep() func(arg, 54, x) +#define MP_REVERSE_FOR_EACH_56(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_55(func, arg, sep, __VA_ARGS__) sep() func(arg, 55, x) +#define MP_REVERSE_FOR_EACH_57(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_56(func, arg, sep, __VA_ARGS__) sep() func(arg, 56, x) +#define MP_REVERSE_FOR_EACH_58(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_57(func, arg, sep, __VA_ARGS__) sep() func(arg, 57, x) +#define MP_REVERSE_FOR_EACH_59(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_58(func, arg, sep, __VA_ARGS__) sep() func(arg, 58, x) +#define MP_REVERSE_FOR_EACH_60(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_59(func, arg, sep, __VA_ARGS__) sep() func(arg, 59, x) +#define MP_REVERSE_FOR_EACH_61(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_60(func, arg, sep, __VA_ARGS__) sep() func(arg, 60, x) +#define MP_REVERSE_FOR_EACH_62(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_61(func, arg, sep, __VA_ARGS__) sep() func(arg, 61, x) +#define MP_REVERSE_FOR_EACH_63(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_62(func, arg, sep, __VA_ARGS__) sep() func(arg, 62, x) +#define MP_REVERSE_FOR_EACH_64(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_63(func, arg, sep, __VA_ARGS__) sep() func(arg, 63, x) + +#define MP_REVERSE_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_REVERSE_FOR_EACH_, N)(func, arg, sep, __VA_ARGS__) +#define MP_REVERSE_FOR_EACH(func, arg, sep, ...) MP_REVERSE_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__) + +#define MP_FIRST_ARG_(N, ...) N +#define MP_FIRST_ARG(...) MP_FIRST_ARG_(__VA_ARGS__, ignore) + +// MP_REPEAT macro + +#define MP_REPEAT_0(func, sep) +#define MP_REPEAT_1(func, sep) func(0) +#define MP_REPEAT_2(func, sep) MP_REPEAT_1(func, sep) sep func(1) +#define MP_REPEAT_3(func, sep) MP_REPEAT_2(func, sep) sep func(2) +#define MP_REPEAT_4(func, sep) MP_REPEAT_3(func, sep) sep func(3) +#define MP_REPEAT_5(func, sep) MP_REPEAT_4(func, sep) sep func(4) +#define MP_REPEAT_6(func, sep) MP_REPEAT_5(func, sep) sep func(5) +#define MP_REPEAT_7(func, sep) MP_REPEAT_6(func, sep) sep func(6) +#define MP_REPEAT_8(func, sep) MP_REPEAT_7(func, sep) sep func(7) +#define MP_REPEAT_9(func, sep) MP_REPEAT_8(func, sep) sep func(8) +#define MP_REPEAT_10(func, sep) MP_REPEAT_9(func, sep) sep func(9) +#define MP_REPEAT_11(func, sep) MP_REPEAT_10(func, sep) sep func(10) +#define MP_REPEAT_12(func, sep) MP_REPEAT_11(func, sep) sep func(11) +#define MP_REPEAT_13(func, sep) MP_REPEAT_12(func, sep) sep func(12) +#define MP_REPEAT_14(func, sep) MP_REPEAT_13(func, sep) sep func(13) +#define MP_REPEAT_15(func, sep) MP_REPEAT_14(func, sep) sep func(14) +#define MP_REPEAT_16(func, sep) MP_REPEAT_15(func, sep) sep func(15) +#define MP_REPEAT_17(func, sep) MP_REPEAT_16(func, sep) sep func(16) +#define MP_REPEAT_18(func, sep) MP_REPEAT_17(func, sep) sep func(17) +#define MP_REPEAT_19(func, sep) MP_REPEAT_18(func, sep) sep func(18) +#define MP_REPEAT_20(func, sep) MP_REPEAT_19(func, sep) sep func(19) +#define MP_REPEAT_21(func, sep) MP_REPEAT_20(func, sep) sep func(20) +#define MP_REPEAT_22(func, sep) MP_REPEAT_21(func, sep) sep func(21) +#define MP_REPEAT_23(func, sep) MP_REPEAT_22(func, sep) sep func(22) +#define MP_REPEAT_24(func, sep) MP_REPEAT_23(func, sep) sep func(23) +#define MP_REPEAT_25(func, sep) MP_REPEAT_24(func, sep) sep func(24) +#define MP_REPEAT_26(func, sep) MP_REPEAT_25(func, sep) sep func(25) +#define MP_REPEAT_27(func, sep) MP_REPEAT_26(func, sep) sep func(26) +#define MP_REPEAT_28(func, sep) MP_REPEAT_27(func, sep) sep func(27) +#define MP_REPEAT_29(func, sep) MP_REPEAT_28(func, sep) sep func(28) +#define MP_REPEAT_30(func, sep) MP_REPEAT_29(func, sep) sep func(29) +#define MP_REPEAT_31(func, sep) MP_REPEAT_30(func, sep) sep func(30) +#define MP_REPEAT_32(func, sep) MP_REPEAT_31(func, sep) sep func(31) +#define MP_REPEAT(N, func, sep) MP_CONCAT(MP_REPEAT_, N)(func, sep) diff --git a/sim/opaesim/fpga.cpp b/sim/opaesim/fpga.cpp index 6c8ce8b2f2..d16ef97a15 100644 --- a/sim/opaesim/fpga.cpp +++ b/sim/opaesim/fpga.cpp @@ -93,6 +93,8 @@ extern fpga_result fpgaClose(fpga_handle handle) { return FPGA_INVALID_PARAM; auto sim = reinterpret_cast(handle); + sim->shutdown(); + delete sim; return FPGA_OK; diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index f5acc3d215..430e4478bb 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -110,6 +110,9 @@ class opae_sim::Impl { for (auto& buffer : host_buffers_) { aligned_free(buffer.second.data); } + if (ram_) { + delete ram_; + } #ifdef VCD_OUTPUT if (tfp_) { tfp_->close(); @@ -119,9 +122,6 @@ class opae_sim::Impl { if (device_) { delete device_; } - if (ram_) { - delete ram_; - } } int init() { @@ -142,11 +142,15 @@ class opae_sim::Impl { tfp_->open("trace.vcd"); #endif + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); - + // reset the device this->reset(); + // Turn on assertion after reset + Verilated::assertOn(true); + // launch execution thread future_ = std::async(std::launch::async, [&]{ while (!stop_) { @@ -158,6 +162,13 @@ class opae_sim::Impl { return 0; } + void shutdown() { + stop_ = true; + if (future_.valid()) { + future_.wait(); + } + } + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { auto alloc = aligned_malloc(len, CACHE_BLOCK_SIZE); if (alloc == NULL) @@ -256,9 +267,6 @@ class opae_sim::Impl { device_->clk = 1; this->eval(); } - - // Turn on assertion after reset - Verilated::assertOn(true); } void tick() { @@ -279,13 +287,13 @@ class opae_sim::Impl { } } + dram_sim_.tick(); + device_->clk = 0; this->eval(); device_->clk = 1; this->eval(); - dram_sim_.tick(); - #ifndef NDEBUG fflush(stdout); #endif @@ -399,7 +407,6 @@ class opae_sim::Impl { void avs_bus_reset() { for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { - pending_mem_reqs_[b].clear(); device_->avs_readdatavalid[b] = 0; device_->avs_waitrequest[b] = 0; } @@ -422,7 +429,7 @@ class opae_sim::Impl { // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; + uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; if (device_->avs_write[b]) { uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); @@ -432,7 +439,7 @@ class opae_sim::Impl { } } - /*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr); + /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr); for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]); } @@ -456,7 +463,7 @@ class opae_sim::Impl { mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); for (auto& req : pending_mem_reqs_[b]) { if (req.cycles_left != 0) printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); @@ -537,6 +544,10 @@ int opae_sim::init() { return impl_->init(); } +void opae_sim::shutdown() { + impl_->shutdown(); +} + int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { return impl_->prepare_buffer(len, buf_addr, wsid, flags); } diff --git a/sim/opaesim/opae_sim.h b/sim/opaesim/opae_sim.h index a04ade0a09..454cc1bf74 100644 --- a/sim/opaesim/opae_sim.h +++ b/sim/opaesim/opae_sim.h @@ -25,6 +25,8 @@ class opae_sim { int init(); + void shutdown(); + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags); void release_buffer(uint64_t wsid); diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 24287aa56e..ecaee717b4 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -39,13 +39,6 @@ SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/processor.cpp -ifdef AXI_BUS - TOP = Vortex_axi - CXXFLAGS += -DAXI_BUS -else - TOP = Vortex -endif - VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO @@ -56,7 +49,7 @@ VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) -VL_FLAGS += --cc $(TOP) --top-module $(TOP) +VL_FLAGS += --cc Vortex --top-module Vortex CXXFLAGS += $(CONFIGS) diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index f52e7c8da5..1f6af60dd1 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -13,13 +13,7 @@ #include "processor.h" -#ifdef AXI_BUS -#include "VVortex_axi.h" -typedef VVortex_axi Device; -#else #include "VVortex.h" -typedef VVortex Device; -#endif #ifdef VCD_OUTPUT #include @@ -106,7 +100,7 @@ class Processor::Impl { Verilated::assertOn(false); // create RTL module instance - device_ = new Device(); + device_ = new VVortex(); #ifdef VCD_OUTPUT Verilated::traceEverOn(true); @@ -116,7 +110,7 @@ class Processor::Impl { #endif ram_ = nullptr; - + // reset the device this->reset(); @@ -154,9 +148,11 @@ class Processor::Impl { std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; #endif + // reset device + this->reset(); + // start execution running_ = true; - device_->reset = 0; // wait on device to go busy while (!device_->busy) { @@ -168,9 +164,6 @@ class Processor::Impl { this->tick(); } - // reset device - this->reset(); - this->cout_flush(); } @@ -178,14 +171,16 @@ class Processor::Impl { device_->dcr_wr_valid = 1; device_->dcr_wr_addr = addr; device_->dcr_wr_data = value; - while (device_->dcr_wr_valid) { - this->tick(); - } + this->tick(); + device_->dcr_wr_valid = 0; } private: void reset() { + this->mem_bus_reset(); + this->dcr_bus_reset(); + running_ = false; print_bufs_.clear(); @@ -198,13 +193,17 @@ class Processor::Impl { } mem_rd_rsp_active_ = false; - mem_wr_rsp_active_ = false; - this->mem_bus_reset(); + device_->reset = 1; - this->dcr_bus_reset(); + for (int i = 0; i < RESET_DELAY; ++i) { + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + } - device_->reset = 1; + device_->reset = 0; for (int i = 0; i < RESET_DELAY; ++i) { device_->clk = 0; @@ -215,20 +214,7 @@ class Processor::Impl { } void tick() { - - device_->clk = 0; - this->eval(); - - this->mem_bus_eval(0); - this->dcr_bus_eval(0); - - device_->clk = 1; - this->eval(); - - this->mem_bus_eval(1); - this->dcr_bus_eval(1); - - dram_sim_.tick(); + this->mem_bus_eval(); if (!dram_queue_.empty()) { auto mem_req = dram_queue_.front(); @@ -244,6 +230,13 @@ class Processor::Impl { } } + dram_sim_.tick(); + + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + #ifndef NDEBUG fflush(stdout); #endif @@ -261,207 +254,39 @@ class Processor::Impl { ++timestamp; } -#ifdef AXI_BUS - - void mem_bus_reset() { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - device_->m_axi_rvalid[0] = 0; - device_->m_axi_bvalid[0] = 0; - } - - void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->m_axi_rready[0]; - mem_wr_rsp_ready_ = device_->m_axi_bready[0]; - return; - } - - if (ram_ == nullptr) { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - return; - } - - // process memory read responses - if (mem_rd_rsp_active_ - && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && !(*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - device_->m_axi_rvalid[0] = 1; - device_->m_axi_rid[0] = mem_rsp->tag; - device_->m_axi_rresp[0] = 0; - device_->m_axi_rlast[0] = 1; - memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); - pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_rvalid[0] = 0; - } - } - - // process memory write responses - if (mem_wr_rsp_active_ - && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { - mem_wr_rsp_active_ = false; - } - if (!mem_wr_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && (*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr); - */ - device_->m_axi_bvalid[0] = 1; - device_->m_axi_bid[0] = mem_rsp->tag; - device_->m_axi_bresp[0] = 0; - pending_mem_reqs_.erase(mem_rsp_it); - mem_wr_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_bvalid[0] = 0; - } - } - - // select the memory bank - uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0]; - - // process memory requests - if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { - if (device_->m_axi_wvalid[0]) { - auto byteen = device_->m_axi_wstrb[0]; - auto base_addr = device_->m_axi_awaddr[0]; - auto data = (uint8_t*)device_->m_axi_wdata[0].data(); - - if (base_addr >= uint64_t(IO_COUT_ADDR) - && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_awid[0]; - mem_req->addr = device_->m_axi_awaddr[0]; - mem_req->write = true; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_arid[0]; - mem_req->addr = device_->m_axi_araddr[0]; - ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE); - mem_req->write = false; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } - - device_->m_axi_wready[0] = running_; - device_->m_axi_awready[0] = running_; - device_->m_axi_arready[0] = running_; - } - -#else - void mem_bus_reset() { device_->mem_req_ready = 0; device_->mem_rsp_valid = 0; } - void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->mem_rsp_ready; - return; - } - - if (ram_ == nullptr) { - device_->mem_req_ready = 0; - return; - } - + void mem_bus_eval() { // process memory read responses - if (mem_rd_rsp_active_ - && device_->mem_rsp_valid && mem_rd_rsp_ready_) { + if (mem_rd_rsp_active_ && device_->mem_rsp_ready) { + device_->mem_rsp_valid = 0; mem_rd_rsp_active_ = false; } if (!mem_rd_rsp_active_) { if (!pending_mem_reqs_.empty() && (*pending_mem_reqs_.begin())->ready) { - device_->mem_rsp_valid = 1; auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + /*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); + printf("%02x", mem_rsp->data[i]); } printf("\n"); */ - memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_valid = 1; + memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE); device_->mem_rsp_tag = mem_rsp->tag; pending_mem_reqs_.erase(mem_rsp_it); mem_rd_rsp_active_ = true; delete mem_rsp; - } else { - device_->mem_rsp_valid = 0; } } // process memory requests - if (device_->mem_req_valid && running_) { + if (device_->mem_req_valid && device_->mem_req_ready) { uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); if (device_->mem_req_rw) { auto byteen = device_->mem_req_byteen; @@ -516,7 +341,7 @@ class Processor::Impl { mem_req->addr = byte_addr; mem_req->write = false; mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); pending_mem_reqs_.emplace_back(mem_req); //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); @@ -529,21 +354,10 @@ class Processor::Impl { device_->mem_req_ready = running_; } -#endif - void dcr_bus_reset() { device_->dcr_wr_valid = 0; } - void dcr_bus_eval(bool clk) { - if (!clk) { - return; - } - if (device_->dcr_wr_valid) { - device_->dcr_wr_valid = 0; - } - } - void wait(uint32_t cycles) { for (int i = 0; i < cycles; ++i) { this->tick(); @@ -553,8 +367,8 @@ class Processor::Impl { private: typedef struct { - Device* device; - std::array block; + VVortex* device; + std::array data; uint64_t addr; uint64_t tag; bool write; @@ -569,7 +383,7 @@ class Processor::Impl { DramSim dram_sim_; - Device* device_; + VVortex* device_; #ifdef VCD_OUTPUT VerilatedVcdC *tfp_; @@ -578,10 +392,6 @@ class Processor::Impl { RAM* ram_; bool mem_rd_rsp_active_; - bool mem_rd_rsp_ready_; - - bool mem_wr_rsp_active_; - bool mem_wr_rsp_ready_; bool running_; }; diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 3e256ffb34..6296b88ebc 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -32,11 +32,22 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED +# AFU parameters +ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS))) + CONFIGS += -DM_AXI_MEM_NUM_BANKS=1 +endif +ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS))) + CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32 +endif +ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512 +endif + DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp +SRCS += $(SRC_DIR)/xrt.cpp $(SRC_DIR)/xrt_sim.cpp RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv diff --git a/sim/xrtsim/fpga.cpp b/sim/xrtsim/xrt.cpp similarity index 62% rename from sim/xrtsim/fpga.cpp rename to sim/xrtsim/xrt.cpp index bc1f0cb07b..c0b5aac288 100644 --- a/sim/xrtsim/fpga.cpp +++ b/sim/xrtsim/xrt.cpp @@ -19,7 +19,7 @@ #include #include #include -#include "fpga.h" +#include "xrt.h" #include "xrt_sim.h" #include #include @@ -30,6 +30,13 @@ using namespace vortex; extern "C" { #endif +typedef struct { + size_t size; + xrt_sim* sim; + uint32_t bank; + uint64_t addr; +} buffer_t; + extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) { if (index != 0) return nullptr; @@ -45,6 +52,8 @@ extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) { extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, int* ret_size) { static const char* deviceName = "vortex_xrtsim"; if (name) { + if (size < strlen(deviceName) + 1) + return -1; memcpy(name, deviceName, size); } if (ret_size) { @@ -54,7 +63,10 @@ extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, i } extern int xrtDeviceClose(xrtDeviceHandle dhdl) { + if (dhdl == nullptr) + return -1; auto sim = reinterpret_cast(dhdl); + sim->shutdown(); delete sim; return 0; } @@ -64,19 +76,38 @@ extern int xrtKernelClose(xrtKernelHandle /*kernelHandle*/) { } extern xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags flags, xrtMemoryGroup grp) { - return 0; + auto sim = reinterpret_cast(dhdl); + uint64_t addr; + int err = sim->mem_alloc(size, grp, &addr); + if (err != 0) + return nullptr; + auto buffer = new buffer_t(); + buffer->size = size; + buffer->bank = grp; + buffer->sim = sim; + buffer->addr = addr; + return buffer; } extern int xrtBOFree(xrtBufferHandle bhdl) { - return 0; + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_free(buffer->bank, buffer->addr); } -extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek) { - return 0; +extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset) { + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_write(buffer->bank, buffer->addr + offset, size, src); } -extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip) { - return 0; +extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset) { + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_read(buffer->bank, buffer->addr + offset, size, dst); } extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset) { @@ -84,11 +115,17 @@ extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t s } extern int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data) { - return 0; + if (kernelHandle == nullptr) + return -1; + auto sim = reinterpret_cast(kernelHandle); + return sim->register_write(offset, data); } -extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap) { - return 0; +extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data) { + if (kernelHandle == nullptr) + return -1; + auto sim = reinterpret_cast(kernelHandle); + return sim->register_read(offset, data); } extern int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len) { diff --git a/sim/xrtsim/fpga.h b/sim/xrtsim/xrt.h similarity index 98% rename from sim/xrtsim/fpga.h rename to sim/xrtsim/xrt.h index f36bbadabc..0dbd5cf42d 100644 --- a/sim/xrtsim/fpga.h +++ b/sim/xrtsim/xrt.h @@ -94,15 +94,15 @@ xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags fla int xrtBOFree(xrtBufferHandle bhdl); -int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek); +int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset); -int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip); +int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset); int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset); int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data); -int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap); +int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data); int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len); diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 21961e5ddc..822f91d944 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -32,6 +32,12 @@ #include #include #include +#include +#include + +#include + +#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 @@ -53,6 +59,8 @@ #define RAM_PAGE_SIZE 4096 +#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH) + #define CPU_GPU_LATENCY 200 using namespace vortex; @@ -80,6 +88,35 @@ void sim_trace_enable(bool enable) { /////////////////////////////////////////////////////////////////////////////// +#define MP_M_AXI_MEM_EACH(i) \ + m_axi_mem_[i].awvalid = &device_->m_axi_mem_##i##_awvalid; \ + m_axi_mem_[i].awready = &device_->m_axi_mem_##i##_awready; \ + m_axi_mem_[i].awaddr = &device_->m_axi_mem_##i##_awaddr; \ + m_axi_mem_[i].awid = &device_->m_axi_mem_##i##_awid; \ + m_axi_mem_[i].awlen = &device_->m_axi_mem_##i##_awlen; \ + m_axi_mem_[i].wvalid = &device_->m_axi_mem_##i##_wvalid; \ + m_axi_mem_[i].wready = &device_->m_axi_mem_##i##_wready; \ + m_axi_mem_[i].wdata = &device_->m_axi_mem_##i##_wdata; \ + m_axi_mem_[i].wstrb = &device_->m_axi_mem_##i##_wstrb; \ + m_axi_mem_[i].wlast = &device_->m_axi_mem_##i##_wlast; \ + m_axi_mem_[i].arvalid = &device_->m_axi_mem_##i##_arvalid; \ + m_axi_mem_[i].arready = &device_->m_axi_mem_##i##_arready; \ + m_axi_mem_[i].araddr = &device_->m_axi_mem_##i##_araddr; \ + m_axi_mem_[i].arid = &device_->m_axi_mem_##i##_arid; \ + m_axi_mem_[i].arlen = &device_->m_axi_mem_##i##_arlen; \ + m_axi_mem_[i].rvalid = &device_->m_axi_mem_##i##_rvalid; \ + m_axi_mem_[i].rready = &device_->m_axi_mem_##i##_rready; \ + m_axi_mem_[i].rdata = &device_->m_axi_mem_##i##_rdata; \ + m_axi_mem_[i].rlast = &device_->m_axi_mem_##i##_rlast; \ + m_axi_mem_[i].rid = &device_->m_axi_mem_##i##_rid; \ + m_axi_mem_[i].rresp = &device_->m_axi_mem_##i##_rresp; \ + m_axi_mem_[i].bvalid = &device_->m_axi_mem_##i##_bvalid; \ + m_axi_mem_[i].bready = &device_->m_axi_mem_##i##_bready; \ + m_axi_mem_[i].bresp = &device_->m_axi_mem_##i##_bresp; \ + m_axi_mem_[i].bid = &device_->m_axi_mem_##i##_bid; + +#define MP_M_AXI_MEM(n) MP_REPEAT(n, MP_M_AXI_MEM_EACH, ;) + class xrt_sim::Impl { public: Impl() @@ -97,6 +134,12 @@ class xrt_sim::Impl { if (future_.valid()) { future_.wait(); } + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + delete mem_alloc_[i]; + } + if (ram_) { + delete ram_; + } #ifdef VCD_OUTPUT if (tfp_) { tfp_->close(); @@ -106,9 +149,6 @@ class xrt_sim::Impl { if (device_) { delete device_; } - if (ram_) { - delete ram_; - } } int init() { @@ -129,22 +169,136 @@ class xrt_sim::Impl { tfp_->open("trace.vcd"); #endif + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); - + + // initialize AXI memory interfaces + MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS); + + // initialize memory allocator + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64); + } + // reset the device this->reset(); + // Turn on assertion after reset + Verilated::assertOn(true); + // launch execution thread future_ = std::async(std::launch::async, [&]{ - while (!stop_) { - std::lock_guard guard(mutex_); - this->tick(); - } + while (!stop_) { + std::lock_guard guard(mutex_); + this->tick(); + } }); return 0; } + void shutdown() { + stop_ = true; + if (future_.valid()) { + future_.wait(); + } + } + + int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + return mem_alloc_[bank_id]->allocate(size, addr); + } + + int mem_free(uint32_t bank_id, uint64_t addr) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + return mem_alloc_[bank_id]->release(addr); + } + + int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + ram_->write(data, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + for (int i = size-1; i >= 0; --i) { + printf("%02x", ((const uint8_t*)data)[i]); + } + printf(")\n");*/ + return 0; + } + + int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + ram_->read(data, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + for (int i = size-1; i >= 0; --i) { + printf("%02x", ((uint8_t*)data)[i]); + } + printf(")\n");*/ + return 0; + } + + int register_write(uint32_t offset, uint32_t value) { + std::lock_guard guard(mutex_); + + // write address + device_->s_axi_ctrl_awvalid = 1; + device_->s_axi_ctrl_awaddr = offset; + auto s_axi_ctrl_awready = device_->s_axi_ctrl_awready; + do { + this->tick(); + } while (!(s_axi_ctrl_awready || device_->s_axi_ctrl_awready)); + device_->s_axi_ctrl_awvalid = 0; + + // write data + device_->s_axi_ctrl_wvalid = 1; + device_->s_axi_ctrl_wdata = value; + device_->s_axi_ctrl_wstrb = 0xf; + auto s_axi_ctrl_wready = device_->s_axi_ctrl_wready; + do { + this->tick(); + } while (!(s_axi_ctrl_wready || device_->s_axi_ctrl_wready)); + device_->s_axi_ctrl_wvalid = 0; + + // write response + device_->s_axi_ctrl_bready = 1; + auto s_axi_ctrl_bvalid = device_->s_axi_ctrl_bvalid; + do { + this->tick(); + } while (!(s_axi_ctrl_bvalid || device_->s_axi_ctrl_bvalid)); + device_->s_axi_ctrl_bready = 0; + + return 0; + } + + int register_read(uint32_t offset, uint32_t* value) { + std::lock_guard guard(mutex_); + + // read address + device_->s_axi_ctrl_arvalid = 1; + device_->s_axi_ctrl_araddr = offset; + auto s_axi_ctrl_arready = device_->s_axi_ctrl_arready; + do { + this->tick(); + } while (!(s_axi_ctrl_arready || device_->s_axi_ctrl_arready)); + device_->s_axi_ctrl_arvalid = 0; + + // read data + device_->s_axi_ctrl_rready = 1; + auto s_axi_ctrl_rvalid = device_->s_axi_ctrl_rvalid; + do { + this->tick(); + } while (!(s_axi_ctrl_rvalid || device_->s_axi_ctrl_rvalid)); + *value = device_->s_axi_ctrl_rdata; + device_->s_axi_ctrl_rready = 0; + + return 0; + } + private: void reset() { @@ -155,9 +309,9 @@ class xrt_sim::Impl { reqs.clear(); } - { + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { std::queue empty; - std::swap(dram_queue_, empty); + std::swap(dram_queues_[i], empty); } device_->ap_rst_n = 0; @@ -177,36 +331,34 @@ class xrt_sim::Impl { device_->ap_clk = 1; this->eval(); } - - // Turn on assertion after reset - Verilated::assertOn(true); } void tick() { - this->axi_ctrl_bus_eval(); this->axi_mem_bus_eval(); - if (!dram_queue_.empty()) { - auto mem_req = dram_queue_.front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + if (!dram_queues_[i].empty()) { + auto mem_req = dram_queues_[i].front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { + dram_queues_[i].pop(); } - }, mem_req)) { - dram_queue_.pop(); } } + dram_sim_.tick(); + device_->ap_clk = 0; this->eval(); device_->ap_clk = 1; this->eval(); - dram_sim_.tick(); - #ifndef NDEBUG fflush(stdout); #endif @@ -223,65 +375,208 @@ class xrt_sim::Impl { } void axi_ctrl_bus_reset() { - // address write request - device_->s_axi_ctrl_awvalid = 0; - //device_->s_axi_ctrl_awaddr = 0; - - // data write request - device_->s_axi_ctrl_wvalid = 0; - //device_->s_axi_ctrl_wdata = 0; - //device_->s_axi_ctrl_wstrb = 0; - // address read request device_->s_axi_ctrl_arvalid = 0; - //device_->s_axi_ctrl_araddr = 0; + device_->s_axi_ctrl_araddr = 0; // data read response device_->s_axi_ctrl_rready = 0; + // address write request + device_->s_axi_ctrl_awvalid = 0; + device_->s_axi_ctrl_awaddr = 0; + + // data write request + device_->s_axi_ctrl_wvalid = 0; + device_->s_axi_ctrl_wdata = 0; + device_->s_axi_ctrl_wstrb = 0; + // data write response device_->s_axi_ctrl_bready = 0; } - void axi_ctrl_bus_eval() { - //-- - } - void axi_mem_bus_reset() { - // address write request - device_->m_axi_mem_0_awready = 0; + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + // address read request + *m_axi_mem_[i].arready = 1; - // data write request - device_->m_axi_mem_0_wready = 0; + // address write request + *m_axi_mem_[i].awready = 1; - // address read request - device_->m_axi_mem_0_arready = 0; + // data write request + *m_axi_mem_[i].wready = 0; - // data read response - device_->m_axi_mem_0_rvalid = 0; - //device_->m_axi_mem_0_rdata = 0; - //device_->m_axi_mem_0_rlast = 0; - //device_->m_axi_mem_0_rid = 0; - //device_->m_axi_mem_0_rresp = 0; + // data read response + *m_axi_mem_[i].rvalid = 0; - // data write response - device_->m_axi_mem_0_bvalid = 0; - //device_->m_axi_mem_0_bresp = 0; - //device_->m_axi_mem_0_bid = 0; + // data write response + *m_axi_mem_[i].bvalid = 0; + + // states + m_axi_states_[i].write_req_pending = false; + m_axi_states_[i].write_rsp_pending = false; + m_axi_states_[i].read_rsp_pending = false; + } } void axi_mem_bus_eval() { - //-- + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + // handle read responses + if (m_axi_states_[i].read_rsp_pending + && (*m_axi_mem_[i].rready)) { + *m_axi_mem_[i].rvalid = 0; + m_axi_states_[i].read_rsp_pending = false; + } + } + if (!m_axi_states_[i].read_rsp_pending) { + if (!pending_mem_reqs_[i].empty() + && (*pending_mem_reqs_[i].begin())->ready + && !(*pending_mem_reqs_[i].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[i].begin(); + auto mem_rsp = *mem_rsp_it; + *m_axi_mem_[i].rvalid = 1; + *m_axi_mem_[i].rid = mem_rsp->tag; + *m_axi_mem_[i].rresp = 0; + *m_axi_mem_[i].rlast = 1; + memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE); + pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].read_rsp_pending = true; + delete mem_rsp; + } + } + + // handle write responses + if (m_axi_states_[i].write_rsp_pending) { + if (*m_axi_mem_[i].bready) { + *m_axi_mem_[i].bvalid = 0; + m_axi_states_[i].write_rsp_pending = false; + } + } + if (!m_axi_states_[i].write_rsp_pending) { + if (!pending_mem_reqs_[i].empty() + && (*pending_mem_reqs_[i].begin())->ready + && (*pending_mem_reqs_[i].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[i].begin(); + auto mem_rsp = *mem_rsp_it; + *m_axi_mem_[i].bvalid = 1; + *m_axi_mem_[i].bid = mem_rsp->tag; + *m_axi_mem_[i].bresp = 0; + pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].write_rsp_pending = true; + delete mem_rsp; + } + } + + // handle read requests + if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { + auto mem_req = new mem_req_t(); + mem_req->tag = *m_axi_mem_[i].arid; + mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; + ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE); + mem_req->write = false; + mem_req->ready = false; + pending_mem_reqs_[i].emplace_back(mem_req); + + /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); + for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", mem_req->data[i]); + } + printf("\n");*/ + + // send dram request + dram_queues_[i].push(mem_req); + } + + // handle address write requests + if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !m_axi_states_[i].write_req_pending) { + m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; + m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; + m_axi_states_[i].write_req_pending = true; + } + + // handle data write requests + *m_axi_mem_[i].wready = false; + if (*m_axi_mem_[i].wvalid && m_axi_states_[i].write_req_pending) { + + auto byteen = *m_axi_mem_[i].wstrb; + auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); + auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; + + for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = m_axi_states_[i].write_req_tag; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = false; + pending_mem_reqs_[i].emplace_back(mem_req); + + /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); + for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); + } + printf("\n");*/ + + // send dram request + dram_queues_[i].push(mem_req); + + m_axi_states_[i].write_req_pending = false; + + // acquire write data + *m_axi_mem_[i].wready = true; + } + } } typedef struct { - std::array data; - uint32_t addr; + uint64_t write_req_addr; + uint32_t write_req_tag; + bool write_req_pending; + bool read_rsp_pending; + bool write_rsp_pending; + } m_axi_state_t; + + typedef struct { + std::array data; + uint32_t tag; + uint64_t addr; bool write; bool ready; } mem_req_t; - Vvortex_afu_shim *device_; + typedef struct { + CData* awvalid; + CData* awready; + QData* awaddr; + IData* awid; + CData* awlen; + CData* wvalid; + CData* wready; + VlWide<16>* wdata; + QData* wstrb; + CData* wlast; + CData* arvalid; + CData* arready; + QData* araddr; + IData* arid; + CData* arlen; + CData* rvalid; + CData* rready; + VlWide<16>* rdata; + CData* rlast; + IData* rid; + CData* rresp; + CData* bvalid; + CData* bready; + CData* bresp; + IData* bid; + } m_axi_mem_t; + + Vvortex_afu_shim* device_; RAM* ram_; DramSim dram_sim_; @@ -290,9 +585,15 @@ class xrt_sim::Impl { std::mutex mutex_; - std::list pending_mem_reqs_[MEMORY_BANKS]; + std::list pending_mem_reqs_[M_AXI_MEM_NUM_BANKS]; + + m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS]; - std::queue dram_queue_; + MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS]; + + m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS]; + + std::queue dram_queues_[M_AXI_MEM_NUM_BANKS]; #ifdef VCD_OUTPUT VerilatedVcdC* tfp_; @@ -311,4 +612,32 @@ xrt_sim::~xrt_sim() { int xrt_sim::init() { return impl_->init(); +} + +void xrt_sim::shutdown() { + impl_->shutdown(); +} + +int xrt_sim::mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { + return impl_->mem_alloc(size, bank_id, addr); +} + +int xrt_sim::mem_free(uint32_t bank_id, uint64_t addr) { + return impl_->mem_free(bank_id, addr); +} + +int xrt_sim::mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + return impl_->mem_write(bank_id, addr, size, data); +} + +int xrt_sim::mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + return impl_->mem_read(bank_id, addr, size, data); +} + +int xrt_sim::register_write(uint32_t offset, uint32_t value) { + return impl_->register_write(offset, value); +} + +int xrt_sim::register_read(uint32_t offset, uint32_t* value) { + return impl_->register_read(offset, value); } \ No newline at end of file diff --git a/sim/xrtsim/xrt_sim.h b/sim/xrtsim/xrt_sim.h index e399c33dec..5823f468fb 100644 --- a/sim/xrtsim/xrt_sim.h +++ b/sim/xrtsim/xrt_sim.h @@ -25,6 +25,20 @@ class xrt_sim { int init(); + void shutdown(); + + int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr); + + int mem_free(uint32_t bank_id, uint64_t addr); + + int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* value); + + int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* value); + + int register_write(uint32_t offset, uint32_t value); + + int register_read(uint32_t offset, uint32_t* value); + private: class Impl; From 2d7f9eae0a84c3c80d6f516305b97b3401df743c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 04:44:00 -0700 Subject: [PATCH 196/407] minor update --- hw/rtl/libs/VX_axi_adapter.sv | 25 ++++++++++++++++--------- sim/xrtsim/xrt_sim.cpp | 16 ++++++---------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 6c231cb959..06216f2ab2 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -100,26 +100,33 @@ module VX_axi_adapter #( assign req_bank_sel = '0; end - wire [NUM_BANKS-1:0] axi_aw_ready, axi_write_ready; + wire [NUM_BANKS-1:0] axi_write_ready; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_aw_ready[i] = m_axi_awready[i] || m_axi_aw_ack[i]; - assign axi_write_ready[i] = m_axi_wready[i] && axi_aw_ready[i]; + assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) + && (m_axi_wready[i] || m_axi_w_ack[i]); end // request ack assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - reg [NUM_BANKS-1:0] m_axi_aw_ack; + wire mem_req_fire = mem_req_valid && mem_req_ready; + + // AXi write request synchronization + reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w + wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; + wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; always @(posedge clk) begin - if (reset) begin + if (reset || (mem_req_fire && (req_bank_sel == i))) begin m_axi_aw_ack[i] <= 0; + m_axi_w_ack[i] <= 0; end else begin - if (m_axi_wvalid[i] && m_axi_wready[i]) begin - m_axi_aw_ack[i] <= 0; - end else if (m_axi_awvalid[i] && m_axi_awready[i]) begin + if (m_axi_aw_fire) begin m_axi_aw_ack[i] <= 1; end + if (m_axi_w_fire) begin + m_axi_w_ack[i] <= 1; + end end end end @@ -141,7 +148,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && axi_aw_ready[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 822f91d944..b8af57cfea 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -422,11 +422,9 @@ class xrt_sim::Impl { void axi_mem_bus_eval() { for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { // handle read responses - if (m_axi_states_[i].read_rsp_pending - && (*m_axi_mem_[i].rready)) { - *m_axi_mem_[i].rvalid = 0; - m_axi_states_[i].read_rsp_pending = false; - } + if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) { + *m_axi_mem_[i].rvalid = 0; + m_axi_states_[i].read_rsp_pending = false; } if (!m_axi_states_[i].read_rsp_pending) { if (!pending_mem_reqs_[i].empty() @@ -446,11 +444,9 @@ class xrt_sim::Impl { } // handle write responses - if (m_axi_states_[i].write_rsp_pending) { - if (*m_axi_mem_[i].bready) { - *m_axi_mem_[i].bvalid = 0; - m_axi_states_[i].write_rsp_pending = false; - } + if (m_axi_states_[i].write_rsp_pending && *m_axi_mem_[i].bready) { + *m_axi_mem_[i].bvalid = 0; + m_axi_states_[i].write_rsp_pending = false; } if (!m_axi_states_[i].write_rsp_pending) { if (!pending_mem_reqs_[i].empty() From 4fff940e42647d0546f817bd1cda921495fe3aaa Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 19 Sep 2024 13:21:14 -0700 Subject: [PATCH 197/407] two different versions of bypass connection --- hw/rtl/cache/VX_cache_wrap_l3.sv | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv index 403edf5545..def7237b1f 100644 --- a/hw/rtl/cache/VX_cache_wrap_l3.sv +++ b/hw/rtl/cache/VX_cache_wrap_l3.sv @@ -108,8 +108,9 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( ) mem_bus_cache_if[NUM_MEM_PORTS](); if (NC_OR_BYPASS) begin - `RESET_RELAY (nc_bypass_reset, reset); + + // Slicing version for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin localparam SLICE_BEGIN = i * NUM_REQS_P; @@ -148,6 +149,43 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( ); end + // Connect everything + /* + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_cache_bypass #( + .NUM_REQS (NUM_REQS), + .TAG_SEL_IDX (TAG_SEL_IDX), + + .PASSTHRU (PASSTHRU), + .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), + + .WORD_SIZE (WORD_SIZE), + .LINE_SIZE (LINE_SIZE), + + .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), + .CORE_TAG_WIDTH (TAG_WIDTH), + + .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), + .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), + .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), + + .UUID_WIDTH (UUID_WIDTH), + + .CORE_OUT_BUF (CORE_OUT_BUF), + .MEM_OUT_BUF (MEM_OUT_BUF) + ) cache_bypass ( + .clk (clk), + .reset (nc_bypass_reset), + + .core_bus_in_if (core_bus_if), + .core_bus_out_if(core_bus_cache_if), + + .mem_bus_in_if (mem_bus_cache_if[i]), + .mem_bus_out_if (mem_bus_if[i]) + ); + end + */ + end else begin for (genvar i = 0; i < NUM_REQS; ++i) begin From 380c36d93084a312aac0923a46117e0510bb749c Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 19 Sep 2024 13:31:25 -0700 Subject: [PATCH 198/407] merged rtlsim branch --- third_party/fpnew | 2 +- third_party/softfloat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/fpnew b/third_party/fpnew index 79e4531390..a6af691551 160000 --- a/third_party/fpnew +++ b/third_party/fpnew @@ -1 +1 @@ -Subproject commit 79e453139072df42c9ec8f697132ba485d74e23d +Subproject commit a6af691551ffbd76d5d9cf30774d3295a41615e4 diff --git a/third_party/softfloat b/third_party/softfloat index b51ef8f320..3b70b5d814 160000 --- a/third_party/softfloat +++ b/third_party/softfloat @@ -1 +1 @@ -Subproject commit b51ef8f3201669b2288104c28546fc72532a1ea4 +Subproject commit 3b70b5d8147675932c38b36cd09af6df4eedd919 From d2db612bb40754c177eb471527e9114996932d99 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 22:33:28 -0700 Subject: [PATCH 199/407] adding scope support to xrtsim --- hw/rtl/VX_scope.vh | 6 +- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 188 +++++++++++++++++++++------------- runtime/opae/vortex.cpp | 7 +- runtime/xrt/vortex.cpp | 8 +- sim/xrtsim/xrt_sim.cpp | 43 ++++---- tests/opencl/common.mk | 4 +- tests/regression/common.mk | 4 +- 8 files changed, 154 insertions(+), 108 deletions(-) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index a747706404..a677975ced 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,7 @@ `define SCOPE_IO_SWITCH(__count) \ wire scope_bus_in_w [__count]; \ wire scope_bus_out_w [__count]; \ - `RESET_RELAY_EX(scope_reset_w, scope_reset, __count, 4); \ + `RESET_RELAY_EX(scope_reset_w, scope_reset, __count, `MAX_FANOUT); \ VX_scope_switch #( \ .N (__count) \ ) scope_switch ( \ diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index d97be483dd..7d5a10b940 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -175,7 +175,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cmd_scope_reading <= 1; scope_bus_ctr <= 63; end - scope_bus_in <= 0; if (cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_req_hdr.address)) begin cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data); @@ -189,6 +188,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; + scope_bus_in <= 0; end end if (cmd_scope_reading) begin diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 0acf87744b..c842e25d52 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -132,13 +132,16 @@ module VX_afu_ctrl #( ADDR_BITS = 8; localparam - WSTATE_IDLE = 2'd0, + WSTATE_ADDR = 2'd0, WSTATE_DATA = 2'd1, - WSTATE_RESP = 2'd2; + WSTATE_RESP = 2'd2, + WSTATE_WIDTH = 2; localparam - RSTATE_IDLE = 2'd0, - RSTATE_DATA = 2'd1; + RSTATE_ADDR = 2'd0, + RSTATE_DATA = 2'd1, + RSTATE_RESP = 2'd2, + RSTATE_WIDTH = 2; // device caps wire [63:0] dev_caps = {16'b0, @@ -152,16 +155,18 @@ module VX_afu_ctrl #( 2'(`CLOG2(`XLEN)-4), 30'(`MISA_STD)}; - reg [1:0] wstate; + reg [WSTATE_WIDTH-1:0] wstate; reg [ADDR_BITS-1:0] waddr; wire [31:0] wmask; wire s_axi_aw_fire; wire s_axi_w_fire; + wire s_axi_b_fire; - reg [1:0] rstate; + logic [RSTATE_WIDTH-1:0] rstate; reg [31:0] rdata; - wire [ADDR_BITS-1:0] raddr; + reg [ADDR_BITS-1:0] raddr; wire s_axi_ar_fire; + wire s_axi_r_fire; reg ap_reset_r; reg ap_start_r; @@ -174,15 +179,19 @@ module VX_afu_ctrl #( reg [31:0] dcrv_r; reg dcr_wr_valid_r; + logic wready_stall; + logic rvalid_stall; + `ifdef SCOPE - reg [63:0] scope_bus_wdata; - reg [63:0] scope_bus_rdata; + reg [63:0] scope_bus_wdata, scope_bus_rdata; reg [5:0] scope_bus_ctr; - reg cmd_scope_reading; - reg cmd_scope_writing; + reg cmd_scope_writing, cmd_scope_reading; reg scope_bus_out_r; + reg scope_rdata_valid; + + reg is_scope_waddr, is_scope_raddr; always @(posedge clk) begin if (reset) begin @@ -190,18 +199,33 @@ module VX_afu_ctrl #( cmd_scope_writing <= 0; scope_bus_ctr <= '0; scope_bus_out_r <= 0; + is_scope_waddr <= 0; + is_scope_raddr <= 0; + scope_bus_rdata <= '0; + scope_rdata_valid <= 0; end else begin + if (s_axi_aw_fire) begin + is_scope_waddr <= (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_0) + || (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_1); + end + if (s_axi_ar_fire) begin + is_scope_raddr <= (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_0) + || (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_1); + end if (s_axi_w_fire && waddr == ADDR_SCP_0) begin scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask); end if (s_axi_w_fire && waddr == ADDR_SCP_1) begin scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask); cmd_scope_writing <= 1; + scope_rdata_valid <= 0; scope_bus_out_r <= 1; scope_bus_ctr <= 63; + end if (scope_bus_in) begin cmd_scope_reading <= 1; + scope_bus_rdata <= '0; scope_bus_ctr <= 63; end if (cmd_scope_reading) begin @@ -209,6 +233,7 @@ module VX_afu_ctrl #( scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_reading <= 0; + scope_rdata_valid <= 1; end end if (cmd_scope_writing) begin @@ -216,6 +241,7 @@ module VX_afu_ctrl #( scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; + scope_bus_out_r <= '0; end end end @@ -223,40 +249,51 @@ module VX_afu_ctrl #( assign scope_bus_out = scope_bus_out_r; + assign wready_stall = is_scope_waddr && cmd_scope_writing; + assign rvalid_stall = is_scope_raddr && ~scope_rdata_valid; + +`else + + assign wready_stall = 0; + assign rvalid_stall = 0; + `endif - // AXI Write + // AXI Write Request + assign s_axi_awready = (wstate == WSTATE_ADDR); + assign s_axi_wready = (wstate == WSTATE_DATA) && ~wready_stall; - assign s_axi_awready = (wstate == WSTATE_IDLE); - assign s_axi_wready = (wstate == WSTATE_DATA); + // AXI Write Response assign s_axi_bvalid = (wstate == WSTATE_RESP); assign s_axi_bresp = 2'b00; // OKAY - assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; - assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; - for (genvar i = 0; i < 4; ++i) begin : g_wmask assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}}; end + assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; + assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; + assign s_axi_b_fire = s_axi_bvalid && s_axi_bready; + // wstate always @(posedge clk) begin if (reset) begin - wstate <= WSTATE_IDLE; + wstate <= WSTATE_ADDR; end else begin case (wstate) - WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE; - WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA; - WSTATE_RESP: wstate <= s_axi_bready ? WSTATE_IDLE : WSTATE_RESP; - default: wstate <= WSTATE_IDLE; + WSTATE_ADDR: wstate <= s_axi_aw_fire ? WSTATE_DATA : WSTATE_ADDR; + WSTATE_DATA: wstate <= s_axi_w_fire ? WSTATE_RESP : WSTATE_DATA; + WSTATE_RESP: wstate <= s_axi_b_fire ? WSTATE_ADDR : WSTATE_RESP; + default: wstate <= WSTATE_ADDR; endcase end end // waddr always @(posedge clk) begin - if (s_axi_aw_fire) + if (s_axi_aw_fire) begin waddr <= s_axi_awaddr[ADDR_BITS-1:0]; + end end // wdata @@ -335,75 +372,82 @@ module VX_afu_ctrl #( end end - // AXI Read + // AXI Read Request + assign s_axi_arready = (rstate == RSTATE_ADDR); - assign s_axi_arready = (rstate == RSTATE_IDLE); - assign s_axi_rvalid = (rstate == RSTATE_DATA); + // AXI Read Response + assign s_axi_rvalid = (rstate == RSTATE_RESP); assign s_axi_rdata = rdata; assign s_axi_rresp = 2'b00; // OKAY assign s_axi_ar_fire = s_axi_arvalid && s_axi_arready; - assign raddr = s_axi_araddr[ADDR_BITS-1:0]; + assign s_axi_r_fire = s_axi_rvalid && s_axi_rready; // rstate always @(posedge clk) begin if (reset) begin - rstate <= RSTATE_IDLE; + rstate <= RSTATE_ADDR; end else begin case (rstate) - RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE; - RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA; - default: rstate <= RSTATE_IDLE; + RSTATE_ADDR: rstate <= s_axi_ar_fire ? RSTATE_DATA : RSTATE_ADDR; + RSTATE_DATA: rstate <= (~rvalid_stall) ? RSTATE_RESP : RSTATE_DATA; + RSTATE_RESP: rstate <= s_axi_r_fire ? RSTATE_ADDR : RSTATE_RESP; + default: rstate <= RSTATE_ADDR; endcase end end - // rdata + // raddr always @(posedge clk) begin if (s_axi_ar_fire) begin - rdata <= '0; - case (raddr) - ADDR_AP_CTRL: begin - rdata[0] <= ap_start_r; - rdata[1] <= ap_done; - rdata[2] <= ap_idle; - rdata[3] <= ap_ready; - rdata[7] <= auto_restart_r; - end - ADDR_GIE: begin - rdata <= 32'(gie_r); - end - ADDR_IER: begin - rdata <= 32'(ier_r); - end - ADDR_ISR: begin - rdata <= 32'(isr_r); - end - ADDR_DEV_0: begin - rdata <= dev_caps[31:0]; - end - ADDR_DEV_1: begin - rdata <= dev_caps[63:32]; - end - ADDR_ISA_0: begin - rdata <= isa_caps[31:0]; - end - ADDR_ISA_1: begin - rdata <= isa_caps[63:32]; - end - `ifdef SCOPE - ADDR_SCP_0: begin - rdata <= scope_bus_rdata[31:0]; - end - ADDR_SCP_1: begin - rdata <= scope_bus_rdata[63:32]; - end - `endif - default:; - endcase + raddr <= s_axi_araddr[ADDR_BITS-1:0]; end end + // rdata + always @(posedge clk) begin + rdata <= '0; + case (raddr) + ADDR_AP_CTRL: begin + rdata[0] <= ap_start_r; + rdata[1] <= ap_done; + rdata[2] <= ap_idle; + rdata[3] <= ap_ready; + rdata[7] <= auto_restart_r; + end + ADDR_GIE: begin + rdata <= 32'(gie_r); + end + ADDR_IER: begin + rdata <= 32'(ier_r); + end + ADDR_ISR: begin + rdata <= 32'(isr_r); + end + ADDR_DEV_0: begin + rdata <= dev_caps[31:0]; + end + ADDR_DEV_1: begin + rdata <= dev_caps[63:32]; + end + ADDR_ISA_0: begin + rdata <= isa_caps[31:0]; + end + ADDR_ISA_1: begin + rdata <= isa_caps[63:32]; + end + `ifdef SCOPE + ADDR_SCP_0: begin + rdata <= scope_bus_rdata[31:0]; + end + ADDR_SCP_1: begin + rdata <= scope_bus_rdata[63:32]; + end + `endif + default:; + endcase + end + assign ap_reset = ap_reset_r; assign ap_start = ap_start_r; assign interrupt = gie_r & (| isr_r); diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 3829abcddf..1bc913cc81 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -194,11 +194,10 @@ class vx_device { return device->api_.fpgaReadMMIO64(device->fpga_, 0, MMIO_SCOPE_READ, value); }; - int ret = vx_scope_start(&callback, this, 0, -1); - if (ret != 0) { + CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { api_.fpgaClose(fpga_); - return ret; - } + return err; + }); } #endif return 0; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index de65c1e856..ae551bfa25 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -375,11 +375,9 @@ class vx_device { *value = (((uint64_t)value_hi) << 32) | value_lo; return 0; }; - int ret = vx_scope_start(&callback, device, 0, -1); - if (ret != 0) { - delete device; - return ret; - } + CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { + return err; + }); } #endif diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index b8af57cfea..1aaccc3921 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -217,6 +217,8 @@ class xrt_sim::Impl { } int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + std::lock_guard guard(mutex_); + if (bank_id >= M_AXI_MEM_NUM_BANKS) return -1; uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; @@ -230,6 +232,8 @@ class xrt_sim::Impl { } int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + std::lock_guard guard(mutex_); + if (bank_id >= M_AXI_MEM_NUM_BANKS) return -1; uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; @@ -246,56 +250,57 @@ class xrt_sim::Impl { std::lock_guard guard(mutex_); // write address + //printf("%0ld: [sim] register_write: address=0x%x\n", timestamp, offset); device_->s_axi_ctrl_awvalid = 1; device_->s_axi_ctrl_awaddr = offset; - auto s_axi_ctrl_awready = device_->s_axi_ctrl_awready; - do { + while (!device_->s_axi_ctrl_awready) this->tick(); - } while (!(s_axi_ctrl_awready || device_->s_axi_ctrl_awready)); + this->tick(); device_->s_axi_ctrl_awvalid = 0; // write data + //printf("%0ld: [sim] register_write: data=0x%x\n", timestamp, value); device_->s_axi_ctrl_wvalid = 1; device_->s_axi_ctrl_wdata = value; device_->s_axi_ctrl_wstrb = 0xf; - auto s_axi_ctrl_wready = device_->s_axi_ctrl_wready; - do { + while (!device_->s_axi_ctrl_wready) this->tick(); - } while (!(s_axi_ctrl_wready || device_->s_axi_ctrl_wready)); + this->tick(); device_->s_axi_ctrl_wvalid = 0; // write response - device_->s_axi_ctrl_bready = 1; - auto s_axi_ctrl_bvalid = device_->s_axi_ctrl_bvalid; + //printf("%0ld: [sim] register_write: response\n", timestamp); do { this->tick(); - } while (!(s_axi_ctrl_bvalid || device_->s_axi_ctrl_bvalid)); + } while (!device_->s_axi_ctrl_bvalid); + device_->s_axi_ctrl_bready = 1; + this->tick(); device_->s_axi_ctrl_bready = 0; - + //printf("%0ld: [sim] register_write: done\n", timestamp); return 0; } int register_read(uint32_t offset, uint32_t* value) { std::lock_guard guard(mutex_); - // read address + //printf("%0ld: [sim] register_read: address=0x%x\n", timestamp, offset); device_->s_axi_ctrl_arvalid = 1; device_->s_axi_ctrl_araddr = offset; - auto s_axi_ctrl_arready = device_->s_axi_ctrl_arready; - do { + while (!device_->s_axi_ctrl_arready) this->tick(); - } while (!(s_axi_ctrl_arready || device_->s_axi_ctrl_arready)); + this->tick(); device_->s_axi_ctrl_arvalid = 0; - // read data - device_->s_axi_ctrl_rready = 1; - auto s_axi_ctrl_rvalid = device_->s_axi_ctrl_rvalid; + // read response + //printf("%0ld: [sim] register_read: response\n", timestamp); do { this->tick(); - } while (!(s_axi_ctrl_rvalid || device_->s_axi_ctrl_rvalid)); + } while (!device_->s_axi_ctrl_rvalid); *value = device_->s_axi_ctrl_rdata; + device_->s_axi_ctrl_rready = 1; + this->tick(); device_->s_axi_ctrl_rready = 0; - + //printf("%0ld: [sim] register_read: done (value=0x%x)\n", timestamp, *value); return 0; } diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 8173a2535f..53903dd41c 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -102,9 +102,9 @@ run-opae: $(PROJECT) $(KERNEL_SRCS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 4edc5c8592..0f97d4979a 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,9 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From 63cce35c1a182a8262704ebd3086d2f5f81c8688 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 23:33:23 -0700 Subject: [PATCH 200/407] scope taps annotation --- ci/regression.sh.in | 3 ++- hw/rtl/afu/opae/vortex_afu.sv | 3 ++- hw/rtl/afu/xrt/VX_afu_wrap.sv | 26 +++++++++++++++++++++++--- hw/rtl/core/VX_fetch.sv | 11 +++++++---- hw/rtl/core/VX_issue_slice.sv | 19 ++++++++++--------- hw/rtl/core/VX_lsu_slice.sv | 22 ++++++++++++++++++---- hw/rtl/libs/VX_scope_tap.sv | 10 +++++----- runtime/xrt/vortex.cpp | 5 ++--- tests/unittest/common.mk | 2 +- tests/unittest/vx_malloc/main.cpp | 4 ++-- 10 files changed, 72 insertions(+), 33 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index fb25ef480e..8c88c368af 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -306,7 +306,8 @@ debug() CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" - + ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" + echo "debugging tests done!" } diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 7d5a10b940..126c14eba8 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1016,7 +1016,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_scope_tap #( .SCOPE_ID (0), .TRIGGERW (24), - .PROBEW (431) + .PROBEW (431), + .DEPTH (4096) ) scope_tap ( .clk(clk), .reset(scope_reset_w[0]), diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index c2f865076b..0484f46a7a 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -310,15 +310,35 @@ module VX_afu_wrap #( interrupt, \ vx_busy_wait, \ vx_busy, \ - vx_reset \ + vx_reset, \ + m_axi_mem_awvalid_a, \ + m_axi_mem_awready_a, \ + m_axi_mem_wvalid_a, \ + m_axi_mem_wready_a, \ + m_axi_mem_bvalid_a, \ + m_axi_mem_bready_a, \ + m_axi_mem_arvalid_a, \ + m_axi_mem_arready_a, \ + m_axi_mem_rvalid_a, \ + m_axi_mem_rready_a, \ + dcr_wr_valid \ } `define PROBES { \ - vx_pending_writes \ + vx_pending_writes, \ + m_axi_mem_awaddr_u, \ + m_axi_mem_awid_a, \ + m_axi_mem_bid_a, \ + m_axi_mem_araddr_u, \ + m_axi_mem_arid_a, \ + m_axi_mem_rid_a, \ + dcr_wr_addr, \ + dcr_wr_data \ } VX_scope_tap #( .SCOPE_ID (0), .TRIGGERW ($bits(`TRIGGERS)), - .PROBEW ($bits(`PROBES)) + .PROBEW ($bits(`PROBES)), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset_w[0]), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 044cd0aba4..f07ab39f56 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -41,7 +41,11 @@ module VX_fetch import VX_gpu_pkg::*; #( wire [`UUID_WIDTH-1:0] rsp_uuid; wire [`NW_WIDTH-1:0] req_tag, rsp_tag; + wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_req_fire = icache_req_valid && icache_req_ready; + wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; + `UNUSED_VAR (schedule_fire) + `UNUSED_VAR (icache_rsp_fire) assign req_tag = schedule_if.data.wid; @@ -133,14 +137,13 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_FETCH `ifdef SCOPE - wire schedule_fire = schedule_if.valid && schedule_if.ready; - wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; VX_scope_tap #( .SCOPE_ID (1), .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + + .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + - (ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH) + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset), diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 34b60676fb..19b2ba8bbe 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -36,6 +36,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_scoreboard_if scoreboard_if(); VX_operands_if operands_if(); + wire operands_if_fire = operands_if.valid && operands_if.ready; + wire writeback_if_valid = writeback_if.valid; + `UNUSED_VAR (operands_if_fire) + `UNUSED_VAR (writeback_if_valid) + VX_ibuffer #( .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) ) ibuffer ( @@ -90,24 +95,20 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_ISSUE `ifdef SCOPE - wire operands_if_fire = operands_if.valid && operands_if.ready; - wire operands_if_not_ready = ~operands_if.ready; - wire writeback_if_valid = writeback_if.valid; VX_scope_tap #( .SCOPE_ID (2), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + + .TRIGGERW (2), + .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset), .start (1'b0), .stop (1'b0), .triggers ({ - reset, operands_if_fire, - operands_if_not_ready, writeback_if_valid }), .probes ({ @@ -145,7 +146,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin - if (operands_if.valid && operands_if.ready) begin + if (operands_if_fire) begin `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 4a8e79953a..d4de245bfc 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,17 +536,31 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `ifdef SCOPE + `define TRIGGERS { \ + mem_req_fire, \ + mem_rsp_fire \ + } + `define PROBES { \ + mem_req_rw, \ + full_addr, \ + mem_req_byteen, \ + mem_req_data, \ + execute_if.data.uuid, \ + rsp_data, \ + rsp_uuid \ + } VX_scope_tap #( .SCOPE_ID (3), - .TRIGGERW (3), - .PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH) + .TRIGGERW (2), + .PROBEW (1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset), .start (1'b0), .stop (1'b0), - .triggers({reset, mem_req_fire, mem_rsp_fire}), - .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), + .triggers(`TRIGGERS), + .probes (`PROBES), .bus_in (scope_bus_in), .bus_out(scope_bus_out) ); diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 010b6f2cc6..88a3e9418c 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -19,7 +19,7 @@ module VX_scope_tap #( parameter SCOPE_IDW = 8, // scope identifier width parameter TRIGGERW = 0, // trigger signals width parameter PROBEW = 0, // probe signal width - parameter SIZE = 256, // trace buffer size + parameter DEPTH = 256, // trace buffer depth parameter IDLE_CTRW = 16 // idle time between triggers counter width ) ( input wire clk, @@ -35,7 +35,7 @@ module VX_scope_tap #( localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + TRIGGERW; localparam DATA_BITS = `LOG2UP(DATAW); - localparam ADDRW = `CLOG2(SIZE); + localparam ADDRW = `CLOG2(DEPTH); localparam TRIGGER_ENABLE = (TRIGGERW != 0); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; @@ -64,8 +64,8 @@ module VX_scope_tap #( localparam GET_TYPE_DATA = 2'd3; localparam GET_TYPE_BITS = 2; - `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0]; - `NO_RW_RAM_CHECK reg [IDLE_CTRW-1:0] delta_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [DEPTH-1:0]; + `NO_RW_RAM_CHECK reg [IDLE_CTRW-1:0] delta_store [DEPTH-1:0]; reg [TRIGGERW-1:0] prev_triggers; reg [IDLE_CTRW-1:0] delta; @@ -216,7 +216,7 @@ module VX_scope_tap #( ctrl_state <= CTRL_STATE_IDLE; cmd_start <= 0; start_delay <= '0; - waddr_end <= ADDRW'(SIZE-1); + waddr_end <= ADDRW'(DEPTH-1); bus_out_r <= 0; end else begin bus_out_r <= 0; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index ae551bfa25..511a87be5f 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -757,7 +757,7 @@ class vx_device { if (pOff) { *pOff = offset; } - printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); + //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } @@ -792,8 +792,7 @@ class vx_device { if (pOff) { *pOff = offset; } - printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, - offset); + //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } diff --git a/tests/unittest/common.mk b/tests/unittest/common.mk index 384a2f02c8..9c3e384be6 100644 --- a/tests/unittest/common.mk +++ b/tests/unittest/common.mk @@ -2,7 +2,7 @@ ROOT_DIR := $(realpath ../../..) CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_HOME)/runtime/common +CXXFLAGS += -I$(VORTEX_HOME)/sim/common # Debugging ifdef DEBUG diff --git a/tests/unittest/vx_malloc/main.cpp b/tests/unittest/vx_malloc/main.cpp index f10f986cae..d7e20b4394 100644 --- a/tests/unittest/vx_malloc/main.cpp +++ b/tests/unittest/vx_malloc/main.cpp @@ -1,4 +1,4 @@ -#include +#include #include #define RT_CHECK(_expr) \ @@ -12,7 +12,7 @@ static uint64_t minAddress = 0; static uint64_t maxAddress = 0xffffffff; -static uint32_t pageAlign = 4096; +static uint32_t pageAlign = 4096; static uint32_t blockAlign = 64; int main() { From a61f97f6c66e2837392b7b7d89319f3928e0a164 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 20 Sep 2024 08:09:46 -0700 Subject: [PATCH 201/407] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/vortex_afu.vh | 4 ++-- hw/rtl/libs/VX_axi_adapter.sv | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 126c14eba8..4bfacf960b 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -79,7 +79,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam COUT_TID_WIDTH = `CLOG2(`VX_MEM_BYTEEN_WIDTH); localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8; - localparam COUT_QUEUE_SIZE = 64; + localparam COUT_QUEUE_SIZE = 1024; localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS; localparam MMIO_ISA_CAPS = `AFU_IMAGE_MMIO_ISA_CAPS; diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index 1a14e13163..bf70cb8850 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -15,11 +15,11 @@ `define VORTEX_AFU_VH `ifndef M_AXI_MEM_NUM_BANKS -`define M_AXI_MEM_NUM_BANKS 4 +`define M_AXI_MEM_NUM_BANKS 1 `endif `ifndef M_AXI_MEM_ADDR_WIDTH -`define M_AXI_MEM_ADDR_WIDTH 30 +`define M_AXI_MEM_ADDR_WIDTH 34 `endif `ifndef M_AXI_MEM_DATA_WIDTH diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 06216f2ab2..9524971863 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -100,15 +100,6 @@ module VX_axi_adapter #( assign req_bank_sel = '0; end - wire [NUM_BANKS-1:0] axi_write_ready; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) - && (m_axi_wready[i] || m_axi_w_ack[i]); - end - - // request ack - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - wire mem_req_fire = mem_req_valid && mem_req_ready; // AXi write request synchronization @@ -131,6 +122,15 @@ module VX_axi_adapter #( end end + wire [NUM_BANKS-1:0] axi_write_ready; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready + assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) + && (m_axi_wready[i] || m_axi_w_ack[i]); + end + + // request ack + assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; + // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; From 3bac7eae6aff0dbb5996ee3889630289d2ccd919 Mon Sep 17 00:00:00 2001 From: sij814 Date: Fri, 20 Sep 2024 16:52:12 -0700 Subject: [PATCH 202/407] changed fpnew commit --- third_party/fpnew | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/fpnew b/third_party/fpnew index a6af691551..79e4531390 160000 --- a/third_party/fpnew +++ b/third_party/fpnew @@ -1 +1 @@ -Subproject commit a6af691551ffbd76d5d9cf30774d3295a41615e4 +Subproject commit 79e453139072df42c9ec8f697132ba485d74e23d From 7938c7be5f92cebdd02defeb1dee55691eef0516 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 20 Sep 2024 20:35:58 -0700 Subject: [PATCH 203/407] synthesis updates --- hw/rtl/afu/opae/vortex_afu.sv | 114 ++++++++++----------- hw/rtl/afu/xrt/VX_afu_wrap.sv | 44 ++++----- hw/rtl/core/VX_issue_top.sv | 7 ++ hw/rtl/core/VX_lsu_slice.sv | 17 +--- hw/rtl/fpu/VX_fpu_sqrt.sv | 8 +- hw/rtl/libs/VX_scope_tap.sv | 164 ++++++++++++++++++++----------- hw/syn/altera/dut/Makefile | 10 +- hw/syn/altera/dut/scope/Makefile | 7 ++ hw/syn/altera/opae/Makefile | 8 +- hw/syn/xilinx/dut/Makefile | 10 +- hw/syn/xilinx/dut/scope/Makefile | 7 ++ hw/syn/xilinx/xrt/Makefile | 16 +-- hw/syn/yosys/Makefile | 2 +- sim/opaesim/Makefile | 1 - sim/xrtsim/Makefile | 1 - 15 files changed, 237 insertions(+), 179 deletions(-) create mode 100755 hw/syn/altera/dut/scope/Makefile create mode 100644 hw/syn/xilinx/dut/scope/Makefile diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 4bfacf960b..3e605462f7 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -40,7 +40,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS], input wire avs_readdatavalid [NUM_LOCAL_MEM_BANKS] ); - localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); @@ -50,6 +49,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam CCI_DATA_SIZE = CCI_DATA_WIDTH / 8; localparam CCI_ADDR_WIDTH = $bits(t_ccip_clAddr); + localparam RESET_CTR_WIDTH = `CLOG2(`RESET_DELAY+1); + localparam AVS_RD_QUEUE_SIZE = 32; localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH; localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(`VX_MEM_DATA_WIDTH); @@ -185,7 +186,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end if (cmd_scope_writing) begin scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); - scope_bus_ctr <= scope_bus_ctr - 1; + scope_bus_ctr <= scope_bus_ctr - 6'd1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; scope_bus_in <= 0; @@ -193,7 +194,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end if (cmd_scope_reading) begin cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; - scope_bus_ctr <= scope_bus_ctr - 1; + scope_bus_ctr <= scope_bus_ctr - 6'd1; if (scope_bus_ctr == 0) begin cmd_scope_reading <= 0; end @@ -344,7 +345,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cmd_mem_rd_done; reg cmd_mem_wr_done; - reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; + reg [RESET_CTR_WIDTH-1:0] vx_reset_ctr; reg vx_busy_wait; reg vx_reset = 1; // asserted at initialization wire vx_busy; @@ -384,7 +385,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_reset_ctr <= (`RESET_DELAY-1); + vx_reset_ctr <= RESET_CTR_WIDTH'(`RESET_DELAY-1); vx_reset <= 1; end default: begin @@ -414,7 +415,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ STATE_RUN: begin if (vx_reset) begin // wait until the reset network is ready - if (vx_reset_ctr == 0) begin + if (vx_reset_ctr == RESET_CTR_WIDTH'(0)) begin `ifdef DBG_TRACE_AFU `TRACE(2, ("%t: AFU: Begin execution\n", $time)) `endif @@ -443,8 +444,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ endcase // ensure reset network initialization - if (vx_reset_ctr != '0) begin - vx_reset_ctr <= vx_reset_ctr - 1; + if (vx_reset_ctr != RESET_CTR_WIDTH'(0)) begin + vx_reset_ctr <= vx_reset_ctr - RESET_CTR_WIDTH'(1); end end end @@ -1013,61 +1014,64 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end wire state_changed = (state != state_prev); + `define AFU_TRIGGERS { \ + reset, \ + state_changed, \ + mem_req_fire, \ + mem_rsp_fire, \ + avs_write_fire, \ + avs_read_fire, \ + avs_waitrequest[0], \ + avs_readdatavalid[0], \ + cp2af_sRxPort.c0.mmioRdValid, \ + cp2af_sRxPort.c0.mmioWrValid, \ + cp2af_sRxPort.c0.rspValid, \ + cp2af_sRxPort.c1.rspValid, \ + af2cp_sTxPort.c0.valid, \ + af2cp_sTxPort.c1.valid, \ + cp2af_sRxPort.c0TxAlmFull, \ + cp2af_sRxPort.c1TxAlmFull, \ + af2cp_sTxPort.c2.mmioRdValid, \ + cci_wr_req_fire, \ + cci_wr_rsp_fire, \ + cci_rd_req_fire, \ + cci_rd_rsp_fire, \ + cci_pending_reads_full, \ + cci_pending_writes_empty, \ + cci_pending_writes_full \ + } + + `define AFU_PROBES { \ + cmd_type, \ + state, \ + mmio_req_hdr.address, \ + cp2af_sRxPort.c0.hdr.mdata, \ + af2cp_sTxPort.c0.hdr.address, \ + af2cp_sTxPort.c0.hdr.mdata, \ + af2cp_sTxPort.c1.hdr.address, \ + avs_address[0], \ + avs_byteenable[0], \ + avs_burstcount[0], \ + cci_mem_rd_req_ctr, \ + cci_mem_wr_req_ctr, \ + cci_rd_req_ctr, \ + cci_rd_rsp_ctr, \ + cci_wr_req_ctr, \ + mem_bus_if_addr \ + } + VX_scope_tap #( .SCOPE_ID (0), - .TRIGGERW (24), - .PROBEW (431), + .TRIGGERW ($bits(`AFU_TRIGGERS)), + .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( .clk(clk), .reset(scope_reset_w[0]), .start(1'b0), .stop(1'b0), - .triggers({ - reset, - state_changed, - mem_req_fire, - mem_rsp_fire, - avs_write_fire, - avs_read_fire, - avs_waitrequest[0], - avs_readdatavalid[0], - cp2af_sRxPort.c0.mmioRdValid, - cp2af_sRxPort.c0.mmioWrValid, - cp2af_sRxPort.c0.rspValid, - cp2af_sRxPort.c1.rspValid, - af2cp_sTxPort.c0.valid, - af2cp_sTxPort.c1.valid, - cp2af_sRxPort.c0TxAlmFull, - cp2af_sRxPort.c1TxAlmFull, - af2cp_sTxPort.c2.mmioRdValid, - cci_wr_req_fire, - cci_wr_rsp_fire, - cci_rd_req_fire, - cci_rd_rsp_fire, - cci_pending_reads_full, - cci_pending_writes_empty, - cci_pending_writes_full - }), - .probes({ - cmd_type, - state, - mmio_req_hdr.address, - mmio_req_hdr.length, - cp2af_sRxPort.c0.hdr.mdata, - af2cp_sTxPort.c0.hdr.address, - af2cp_sTxPort.c0.hdr.mdata, - af2cp_sTxPort.c1.hdr.address, - avs_address[0], - avs_byteenable[0], - avs_burstcount[0], - cci_mem_rd_req_ctr, - cci_mem_wr_req_ctr, - cci_rd_req_ctr, - cci_rd_rsp_ctr, - cci_wr_req_ctr, - mem_bus_if_addr - }), + .triggers(`AFU_TRIGGERS), + .probes(`AFU_PROBES), .bus_in(scope_bus_in_w[0]), .bus_out(scope_bus_out_w[0]) ); diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 0484f46a7a..e51d8f17b8 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -301,7 +301,7 @@ module VX_afu_wrap #( `ifdef DBG_SCOPE_AFU `ifdef SCOPE - `define TRIGGERS { \ + `define AFU_TRIGGERS { \ reset, \ ap_reset, \ ap_start, \ @@ -311,41 +311,41 @@ module VX_afu_wrap #( vx_busy_wait, \ vx_busy, \ vx_reset, \ - m_axi_mem_awvalid_a, \ - m_axi_mem_awready_a, \ - m_axi_mem_wvalid_a, \ - m_axi_mem_wready_a, \ - m_axi_mem_bvalid_a, \ - m_axi_mem_bready_a, \ - m_axi_mem_arvalid_a, \ - m_axi_mem_arready_a, \ - m_axi_mem_rvalid_a, \ - m_axi_mem_rready_a, \ + m_axi_mem_awvalid_a[0], \ + m_axi_mem_awready_a[0], \ + m_axi_mem_wvalid_a[0], \ + m_axi_mem_wready_a[0], \ + m_axi_mem_bvalid_a[0], \ + m_axi_mem_bready_a[0], \ + m_axi_mem_arvalid_a[0], \ + m_axi_mem_arready_a[0], \ + m_axi_mem_rvalid_a[0], \ + m_axi_mem_rready_a[0], \ dcr_wr_valid \ } - `define PROBES { \ + `define AFU_PROBES { \ vx_pending_writes, \ - m_axi_mem_awaddr_u, \ - m_axi_mem_awid_a, \ - m_axi_mem_bid_a, \ - m_axi_mem_araddr_u, \ - m_axi_mem_arid_a, \ - m_axi_mem_rid_a, \ + m_axi_mem_awaddr_u[0], \ + m_axi_mem_awid_a[0], \ + m_axi_mem_bid_a[0], \ + m_axi_mem_araddr_u[0], \ + m_axi_mem_arid_a[0], \ + m_axi_mem_rid_a[0], \ dcr_wr_addr, \ dcr_wr_data \ } VX_scope_tap #( .SCOPE_ID (0), - .TRIGGERW ($bits(`TRIGGERS)), - .PROBEW ($bits(`PROBES)), + .TRIGGERW ($bits(`AFU_TRIGGERS)), + .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset_w[0]), .start (1'b0), .stop (1'b0), - .triggers (`TRIGGERS), - .probes (`PROBES), + .triggers (`AFU_TRIGGERS), + .probes (`AFU_PROBES), .bus_in (scope_bus_in_w[0]), .bus_out (scope_bus_out_w[0]) ); diff --git a/hw/rtl/core/VX_issue_top.sv b/hw/rtl/core/VX_issue_top.sv index e148b02f64..2d81ee0440 100644 --- a/hw/rtl/core/VX_issue_top.sv +++ b/hw/rtl/core/VX_issue_top.sv @@ -113,6 +113,13 @@ module VX_issue_top import VX_gpu_pkg::*; #( issue_perf_t issue_perf = '0; `endif +`ifdef SCOPE + wire [0:0] scope_reset_w = 1'b0; + wire [0:0] scope_bus_in_w = 1'b0; + wire [0:0] scope_bus_out_w; + `UNUSED_VAR (scope_bus_out_w) +`endif + VX_issue #( .INSTANCE_ID (INSTANCE_ID) ) issue ( diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index d4de245bfc..0452d0c796 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,19 +536,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `ifdef SCOPE - `define TRIGGERS { \ - mem_req_fire, \ - mem_rsp_fire \ - } - `define PROBES { \ - mem_req_rw, \ - full_addr, \ - mem_req_byteen, \ - mem_req_data, \ - execute_if.data.uuid, \ - rsp_data, \ - rsp_uuid \ - } VX_scope_tap #( .SCOPE_ID (3), .TRIGGERW (2), @@ -559,8 +546,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .reset (scope_reset), .start (1'b0), .stop (1'b0), - .triggers(`TRIGGERS), - .probes (`PROBES), + .triggers({mem_req_fire, mem_rsp_fire}), + .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), .bus_in (scope_bus_in), .bus_out(scope_bus_out) ); diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index fbfb86175d..172a42e6fe 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -101,7 +101,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .clk (clk), .areset (1'b0), .en (pe_enable), - .a (pe_data_in[i]), + .a (pe_data_in[i][0 +: 32]), .q (pe_data_out[i][0 +: 32]) ); assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x; @@ -120,7 +120,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .aclk (clk), .aclken (pe_enable), .s_axis_a_tvalid (1'b1), - .s_axis_a_tdata (pe_data_in[i]), + .s_axis_a_tdata (pe_data_in[i][0 +: 32]), `UNUSED_PIN (m_axis_result_tvalid), .m_axis_result_tdata (pe_data_out[i][0 +: 32]), .m_axis_result_tuser (tuser) @@ -143,8 +143,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( dpi_fsqrt ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, // a - pe_data_in[0][32 +: `INST_FRM_BITS], // frm + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + pe_data_in[0][32 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 88a3e9418c..f446780790 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -17,82 +17,138 @@ module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width - parameter TRIGGERW = 0, // trigger signals width - parameter PROBEW = 0, // probe signal width - parameter DEPTH = 256, // trace buffer depth - parameter IDLE_CTRW = 16 // idle time between triggers counter width + parameter TRIGGERW = 16, // trigger signals width + parameter PROBEW = 256, // probe signal width + parameter DEPTH = 1024, // trace buffer depth + parameter IDLE_CTRW = 16, // idle time between triggers counter width + parameter TX_DATAW = 64 // transfer data width ) ( input wire clk, input wire reset, input wire start, input wire stop, - input wire [TRIGGERW-1:0] triggers, + input wire [`UP(TRIGGERW)-1:0] triggers, input wire [PROBEW-1:0] probes, input wire bus_in, output wire bus_out ); - localparam TX_DATAW = 64; - localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); - localparam DATAW = PROBEW + TRIGGERW; - localparam DATA_BITS = `LOG2UP(DATAW); - localparam ADDRW = `CLOG2(DEPTH); - localparam TRIGGER_ENABLE = (TRIGGERW != 0); - localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; + localparam CTR_WIDTH = 64; + localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); + localparam DATAW = PROBEW + TRIGGERW; + localparam DATA_BITS = `LOG2UP(DATAW); + localparam ADDRW = `CLOG2(DEPTH); + localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; - localparam CTRL_STATE_IDLE = 2'd0; - localparam CTRL_STATE_RECV = 2'd1; - localparam CTRL_STATE_CMD = 2'd2; - localparam CTRL_STATE_SEND = 2'd3; - localparam CTRL_STATE_BITS = 2; + localparam CTRL_STATE_IDLE = 2'd0; + localparam CTRL_STATE_RECV = 2'd1; + localparam CTRL_STATE_CMD = 2'd2; + localparam CTRL_STATE_SEND = 2'd3; + localparam CTRL_STATE_BITS = 2; - localparam TAP_STATE_IDLE = 2'd0; - localparam TAP_STATE_WAIT = 2'd1; - localparam TAP_STATE_RUN = 2'd2; - localparam TAP_STATE_BITS = 2; + localparam TAP_STATE_IDLE = 2'd0; + localparam TAP_STATE_WAIT = 2'd1; + localparam TAP_STATE_RUN = 2'd2; + localparam TAP_STATE_BITS = 2; - localparam CMD_GET_WIDTH = 3'd0; - localparam CMD_GET_COUNT = 3'd1; - localparam CMD_GET_START = 3'd2; - localparam CMD_GET_DATA = 3'd3; - localparam CMD_SET_START = 3'd4; - localparam CMD_SET_STOP = 3'd5; - localparam CMD_TYPE_BITS = 3; + localparam CMD_GET_WIDTH = 3'd0; + localparam CMD_GET_COUNT = 3'd1; + localparam CMD_GET_START = 3'd2; + localparam CMD_GET_DATA = 3'd3; + localparam CMD_SET_START = 3'd4; + localparam CMD_SET_STOP = 3'd5; + localparam CMD_TYPE_BITS = 3; - localparam GET_TYPE_WIDTH = 2'd0; - localparam GET_TYPE_COUNT = 2'd1; - localparam GET_TYPE_START = 2'd2; - localparam GET_TYPE_DATA = 2'd3; - localparam GET_TYPE_BITS = 2; + localparam GET_TYPE_WIDTH = 2'd0; + localparam GET_TYPE_COUNT = 2'd1; + localparam GET_TYPE_START = 2'd2; + localparam GET_TYPE_DATA = 2'd3; + localparam GET_TYPE_BITS = 2; - `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [DEPTH-1:0]; - `NO_RW_RAM_CHECK reg [IDLE_CTRW-1:0] delta_store [DEPTH-1:0]; - - reg [TRIGGERW-1:0] prev_triggers; + reg [`UP(TRIGGERW)-1:0] prev_triggers; reg [IDLE_CTRW-1:0] delta; - reg [63:0] timestamp, start_time; + reg [CTR_WIDTH-1:0] timestamp, start_time; reg [ADDRW-1:0] waddr, waddr_end; + reg write_en; reg cmd_start, delta_flush; - reg [63:0] start_delay, delay_cntr; + reg [CTR_WIDTH-1:0] start_delay, delay_cntr; reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; reg [GET_TYPE_BITS-1:0] get_type; + wire [DATAW-1:0] data_value; + wire [IDLE_CTRW-1:0] delta_value; reg [TX_DATA_BITS-1:0] ser_tx_ctr; reg [DATA_BITS-1:0] read_offset; reg [ADDRW-1:0] raddr; reg read_data; + wire [DATAW-1:0] data_in; + if (TRIGGERW != 0) begin + assign data_in = {probes, triggers}; + end else begin + assign data_in = probes; + end + + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .NO_RWCHECK (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr), + .wdata (data_in), + .raddr (raddr), + .rdata (data_value) + ); + + if (TRIGGERW != 0) begin + VX_dp_ram #( + .DATAW (IDLE_CTRW), + .SIZE (DEPTH), + .NO_RWCHECK (1) + ) delta_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr), + .wdata (delta), + .raddr (raddr), + .rdata (delta_value) + ); + end else begin + assign delta_value = '0; + end + // // trace capture // - wire [ADDRW-1:0] raddr_n = raddr + 1; + wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); - wire [ADDRW:0] count = (ADDRW+1)'(waddr) + 1; + wire [ADDRW:0] count = (ADDRW+1)'(waddr) + (ADDRW+1)'(1); + + always @(*) begin + write_en = 0; + if (tap_state == TAP_STATE_RUN) begin + if (TRIGGERW != 0) begin + if (delta_flush || (triggers != prev_triggers)) begin + write_en = 1; + end + end else begin + write_en = 1; + end + end + end always @(posedge clk) begin if (reset) begin @@ -105,7 +161,7 @@ module VX_scope_tap #( read_data <= 0; timestamp <= '0; end else begin - timestamp <= timestamp + 1; + timestamp <= timestamp + CTR_WIDTH'(1); case (tap_state) TAP_STATE_IDLE: begin @@ -128,7 +184,7 @@ module VX_scope_tap #( end end TAP_STATE_WAIT: begin - delay_cntr <= delay_cntr - 1; + delay_cntr <= delay_cntr - CTR_WIDTH'(1); if (1 == delay_cntr) begin tap_state <= TAP_STATE_RUN; start_time <= timestamp; @@ -138,22 +194,18 @@ module VX_scope_tap #( end end TAP_STATE_RUN: begin - if (TRIGGER_ENABLE != 0) begin + if (TRIGGERW != 0) begin if (delta_flush || (triggers != prev_triggers)) begin - data_store[waddr] <= {probes, triggers}; - delta_store[waddr] <= delta; - waddr <= waddr + 1; + waddr <= waddr + ADDRW'(1); delta <= '0; delta_flush <= 0; end else begin - delta <= delta + 1; - delta_flush <= (delta == (MAX_IDLE_CTR-1)); + delta <= delta + IDLE_CTRW'(1); + delta_flush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end prev_triggers <= triggers; end else begin - data_store[waddr] <= {probes, triggers}; - delta_store[waddr] <= '0; - waddr <= waddr + 1; + waddr <= waddr + ADDRW'(1); end if (stop || (waddr >= waddr_end)) begin waddr <= waddr; @@ -208,8 +260,8 @@ module VX_scope_tap #( wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; - wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_store[raddr] >> read_offset)); - wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_store[raddr]); + wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_value >> read_offset)); + wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_value); always @(posedge clk) begin if (reset) begin @@ -230,7 +282,7 @@ module VX_scope_tap #( ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); end CTRL_STATE_RECV: begin - ser_tx_ctr <= ser_tx_ctr - 1; + ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin ctrl_state <= (cmd_scope_id == SCOPE_ID) ? CTRL_STATE_CMD : CTRL_STATE_IDLE; @@ -262,7 +314,7 @@ module VX_scope_tap #( `endif end CTRL_STATE_SEND: begin - ser_tx_ctr <= ser_tx_ctr - 1; + ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); case (get_type) GET_TYPE_WIDTH: begin bus_out_r <= 1'(DATAW >> ser_tx_ctr); diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index e5655c5fda..173408ecad 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -9,7 +9,7 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top +.PHONY: unittest scope mem_unit lmem cache fpu core issue vortex top ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: @@ -20,10 +20,10 @@ unittest: cp unittest/Makefile unittest/$(BUILD_DIR) $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & -pipeline: - mkdir -p pipeline/$(BUILD_DIR) - cp pipeline/Makefile pipeline/$(BUILD_DIR) - $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & +scope: + mkdir -p scope/$(BUILD_DIR) + cp scope/Makefile scope/$(BUILD_DIR) + $(MAKE) -C scope/$(BUILD_DIR) clean && $(MAKE) -C scope/$(BUILD_DIR) > scope/$(BUILD_DIR)/build.log 2>&1 & mem_unit: mkdir -p mem_unit/$(BUILD_DIR) diff --git a/hw/syn/altera/dut/scope/Makefile b/hw/syn/altera/dut/scope/Makefile new file mode 100755 index 0000000000..405f05e8a7 --- /dev/null +++ b/hw/syn/altera/dut/scope/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_scope_tap +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index a3d373cb09..e961be4532 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -36,7 +36,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED ifeq ($(DEVICE_FAMILY), stratix10) CONFIGS += -DALTERA_S10 @@ -55,9 +54,12 @@ CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 CONFIGS += $(CONFIGS_$(NUM_CORES)c) -# include paths +# include sources +RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR) @@ -96,7 +98,7 @@ ifdef PERF endif # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DNOPAE +XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI all: swconfig ip-gen setup build diff --git a/hw/syn/xilinx/dut/Makefile b/hw/syn/xilinx/dut/Makefile index 0255287fbd..fe37eb4b89 100644 --- a/hw/syn/xilinx/dut/Makefile +++ b/hw/syn/xilinx/dut/Makefile @@ -5,17 +5,17 @@ PREFIX ?= build BUILD_DIR := $(PREFIX) -.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top +.PHONY: unittest scope mem_unit lmem cache fpu core issue vortex top unittest: mkdir -p unittest/$(BUILD_DIR) cp unittest/Makefile unittest/$(BUILD_DIR) $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & -pipeline: - mkdir -p pipeline/$(BUILD_DIR) - cp pipeline/Makefile pipeline/$(BUILD_DIR) - $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & +scope: + mkdir -p scope/$(BUILD_DIR) + cp scope/Makefile scope/$(BUILD_DIR) + $(MAKE) -C scope/$(BUILD_DIR) clean && $(MAKE) -C scope/$(BUILD_DIR) > scope/$(BUILD_DIR)/build.log 2>&1 & mem_unit: mkdir -p mem_unit/$(BUILD_DIR) diff --git a/hw/syn/xilinx/dut/scope/Makefile b/hw/syn/xilinx/dut/scope/Makefile new file mode 100644 index 0000000000..405f05e8a7 --- /dev/null +++ b/hw/syn/xilinx/dut/scope/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_scope_tap +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 0e2aea5a9c..fa0a7873ba 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -63,10 +63,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_TEX -DBG_SCOPE_FLAGS += -DDBG_SCOPE_OM -DBG_SCOPE_FLAGS += -DDBG_SCOPE_RASTER -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # cluster configuration CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 @@ -78,9 +74,11 @@ CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 CONFIGS += $(CONFIGS_$(NUM_CORES)c) -# include paths +# include sources +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif TEX_INCLUDE = -I$(RTL_DIR)/tex @@ -152,7 +150,7 @@ CFLAGS += $(CONFIGS) CFLAGS += $(RTL_INCLUDE) # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DVIVADO, $(CFLAGS)) -I$(DPI_DIR) +XML_CFLAGS = $(filter-out -DSYNTHESIS -DVIVADO, $(CFLAGS)) $(RTL_PKGS) -I$(DPI_DIR) -DSV_DPI # RTL Kernel only supports Hardware and Hardware Emulation. ifneq ($(TARGET),$(findstring $(TARGET), hw hw_emu)) @@ -192,14 +190,10 @@ ifeq ($(TARGET), hw) cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin/vivado.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log - [ -f "$(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx" ] && cp $(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx $(BUILD_DIR)/bin/debug_nets.ltx endif -hwserver: - debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server & - chipscope: - debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/bin/debug_nets.ltx & + debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/bin/vortex_afu.ltx & clean: $(RMDIR) $(BUILD_DIR) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index cba0137a3e..a09d9198de 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -29,7 +29,7 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED + # cluster configuration CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 2def887e9e..ffbfece13c 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -30,7 +30,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # AFU parameters CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 6296b88ebc..4ac3f6edd9 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -30,7 +30,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # AFU parameters ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS))) From 00feb8b424012ee3b765a4042e50eaaa13184be2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 21 Sep 2024 08:39:20 -0700 Subject: [PATCH 204/407] scope analyzer bug fixes --- hw/rtl/afu/opae/vortex_afu.sv | 74 ++++++----- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 5 +- hw/rtl/afu/xrt/VX_afu_wrap.sv | 29 ++--- hw/rtl/core/VX_fetch.sv | 4 +- hw/rtl/core/VX_issue_slice.sv | 4 +- hw/rtl/core/VX_lsu_slice.sv | 4 +- hw/rtl/core/VX_lsu_unit.sv | 2 - hw/rtl/libs/VX_scope_tap.sv | 236 ++++++++++++++++------------------ runtime/common/scope.cpp | 89 +++++++------ runtime/xrt/vortex.cpp | 16 +-- sim/xrtsim/xrt.cpp | 1 - sim/xrtsim/xrt_sim.cpp | 11 -- sim/xrtsim/xrt_sim.h | 2 - 13 files changed, 238 insertions(+), 239 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 3e605462f7..b0de60cf3d 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -170,8 +170,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (reset) begin cmd_scope_reading <= 0; cmd_scope_writing <= 0; - scope_bus_in <= 0; + scope_bus_in <= 0; end else begin + scope_bus_in <= 0; if (scope_bus_out) begin cmd_scope_reading <= 1; scope_bus_ctr <= 63; @@ -183,20 +184,21 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ scope_bus_ctr <= 63; scope_bus_in <= 1; end - end - if (cmd_scope_writing) begin - scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); - scope_bus_ctr <= scope_bus_ctr - 6'd1; - if (scope_bus_ctr == 0) begin - cmd_scope_writing <= 0; - scope_bus_in <= 0; + if (cmd_scope_writing) begin + scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); + scope_bus_ctr <= scope_bus_ctr - 6'd1; + if (scope_bus_ctr == 0) begin + cmd_scope_writing <= 0; + scope_bus_ctr <= 0; + end end - end - if (cmd_scope_reading) begin - cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; - scope_bus_ctr <= scope_bus_ctr - 6'd1; - if (scope_bus_ctr == 0) begin - cmd_scope_reading <= 0; + if (cmd_scope_reading) begin + cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; + scope_bus_ctr <= scope_bus_ctr - 6'd1; + if (scope_bus_ctr == 0) begin + cmd_scope_reading <= 0; + scope_bus_ctr <= 0; + end end end end @@ -327,7 +329,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) + `TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `endif @@ -918,7 +920,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // Vortex /////////////////////////////////////////////////////////////////// - wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state); + wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state); wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data; @@ -1002,11 +1004,10 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef DBG_SCOPE_AFU - wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready; - wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready; - wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; - wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; - wire [LMEM_ADDR_WIDTH-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr; + wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; + wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; + wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready; + wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready; reg [STATE_WIDTH-1:0] state_prev; always @(posedge clk) begin @@ -1016,9 +1017,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `define AFU_TRIGGERS { \ reset, \ + vx_reset, \ + vx_busy, \ + vx_mem_req_fire, \ + vx_mem_rsp_fire, \ + vx_dcr_wr_valid, \ state_changed, \ - mem_req_fire, \ - mem_rsp_fire, \ avs_write_fire, \ avs_read_fire, \ avs_waitrequest[0], \ @@ -1044,6 +1048,15 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `define AFU_PROBES { \ cmd_type, \ state, \ + vx_mem_req_rw, \ + vx_mem_req_byteen, \ + vx_mem_req_addr, \ + vx_mem_req_data, \ + vx_mem_req_tag, \ + vx_mem_rsp_data, \ + vx_mem_rsp_tag, \ + vx_dcr_wr_addr, \ + vx_dcr_wr_data, \ mmio_req_hdr.address, \ cp2af_sRxPort.c0.hdr.mdata, \ af2cp_sTxPort.c0.hdr.address, \ @@ -1056,8 +1069,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_ctr, \ cci_rd_req_ctr, \ cci_rd_rsp_ctr, \ - cci_wr_req_ctr, \ - mem_bus_if_addr \ + cci_wr_req_ctr \ } VX_scope_tap #( @@ -1066,13 +1078,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( - .clk(clk), - .reset(scope_reset_w[0]), - .start(1'b0), - .stop(1'b0), + .clk (clk), + .reset (scope_reset_w[0]), + .start (1'b0), + .stop (1'b0), .triggers(`AFU_TRIGGERS), - .probes(`AFU_PROBES), - .bus_in(scope_bus_in_w[0]), + .probes (`AFU_PROBES), + .bus_in (scope_bus_in_w[0]), .bus_out(scope_bus_out_w[0]) ); `else diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index c842e25d52..12a55ec69d 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -204,6 +204,7 @@ module VX_afu_ctrl #( scope_bus_rdata <= '0; scope_rdata_valid <= 0; end else begin + scope_bus_out_r <= 0; if (s_axi_aw_fire) begin is_scope_waddr <= (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_0) || (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_1); @@ -221,7 +222,6 @@ module VX_afu_ctrl #( scope_rdata_valid <= 0; scope_bus_out_r <= 1; scope_bus_ctr <= 63; - end if (scope_bus_in) begin cmd_scope_reading <= 1; @@ -234,6 +234,7 @@ module VX_afu_ctrl #( if (scope_bus_ctr == 0) begin cmd_scope_reading <= 0; scope_rdata_valid <= 1; + scope_bus_ctr <= 0; end end if (cmd_scope_writing) begin @@ -241,7 +242,7 @@ module VX_afu_ctrl #( scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; - scope_bus_out_r <= '0; + scope_bus_ctr <= 0; end end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index e51d8f17b8..d5726dc73b 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -299,8 +299,8 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// -`ifdef DBG_SCOPE_AFU `ifdef SCOPE +`ifdef DBG_SCOPE_AFU `define AFU_TRIGGERS { \ reset, \ ap_reset, \ @@ -308,9 +308,9 @@ module VX_afu_wrap #( ap_done, \ ap_idle, \ interrupt, \ - vx_busy_wait, \ - vx_busy, \ vx_reset, \ + vx_busy, \ + dcr_wr_valid, \ m_axi_mem_awvalid_a[0], \ m_axi_mem_awready_a[0], \ m_axi_mem_wvalid_a[0], \ @@ -320,19 +320,18 @@ module VX_afu_wrap #( m_axi_mem_arvalid_a[0], \ m_axi_mem_arready_a[0], \ m_axi_mem_rvalid_a[0], \ - m_axi_mem_rready_a[0], \ - dcr_wr_valid \ + m_axi_mem_rready_a[0] \ } `define AFU_PROBES { \ + dcr_wr_addr, \ + dcr_wr_data, \ vx_pending_writes, \ m_axi_mem_awaddr_u[0], \ m_axi_mem_awid_a[0], \ m_axi_mem_bid_a[0], \ m_axi_mem_araddr_u[0], \ m_axi_mem_arid_a[0], \ - m_axi_mem_rid_a[0], \ - dcr_wr_addr, \ - dcr_wr_data \ + m_axi_mem_rid_a[0] \ } VX_scope_tap #( .SCOPE_ID (0), @@ -340,18 +339,19 @@ module VX_afu_wrap #( .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers (`AFU_TRIGGERS), + .clk (clk), + .reset (scope_reset_w[0]), + .start (1'b0), + .stop (1'b0), + .triggers(`AFU_TRIGGERS), .probes (`AFU_PROBES), .bus_in (scope_bus_in_w[0]), - .bus_out (scope_bus_out_w[0]) + .bus_out(scope_bus_out_w[0]) ); `else `SCOPE_IO_UNUSED_W(0) `endif +`endif `ifdef CHIPSCOPE ila_afu ila_afu_inst ( .clk (clk), @@ -373,7 +373,6 @@ module VX_afu_wrap #( }) ); `endif -`endif `ifdef SIMULATION `ifndef VERILATOR diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index f07ab39f56..c1c0e6a57f 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -135,8 +135,8 @@ module VX_fetch import VX_gpu_pkg::*; #( assign fetch_if.data.uuid = rsp_uuid; assign icache_bus_if.rsp_ready = fetch_if.ready; -`ifdef DBG_SCOPE_FETCH `ifdef SCOPE +`ifdef DBG_SCOPE_FETCH VX_scope_tap #( .SCOPE_ID (1), .TRIGGERW (4), @@ -166,6 +166,7 @@ module VX_fetch import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`endif `ifdef CHIPSCOPE ila_fetch ila_fetch_inst ( .clk (clk), @@ -174,7 +175,6 @@ module VX_fetch import VX_gpu_pkg::*; #( .probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready}) ); `endif -`endif `ifdef DBG_TRACE_MEM always @(posedge clk) begin diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 19b2ba8bbe..38e54fcc01 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -93,8 +93,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .dispatch_if (dispatch_if) ); -`ifdef DBG_SCOPE_ISSUE `ifdef SCOPE +`ifdef DBG_SCOPE_ISSUE VX_scope_tap #( .SCOPE_ID (2), .TRIGGERW (2), @@ -133,6 +133,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`endif `ifdef CHIPSCOPE ila_issue ila_issue_inst ( .clk (clk), @@ -142,7 +143,6 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .probe3 ({writeback_if.valid, writeback_if.data}) ); `endif -`endif `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 0452d0c796..d703291c42 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -534,8 +534,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end `endif -`ifdef DBG_SCOPE_LSU `ifdef SCOPE +`ifdef DBG_SCOPE_LSU VX_scope_tap #( .SCOPE_ID (3), .TRIGGERW (2), @@ -554,6 +554,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`endif `ifdef CHIPSCOPE ila_lsu ila_lsu_inst ( .clk (clk), @@ -562,6 +563,5 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready}) ); `endif -`endif endmodule diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index f4a1fc4ae6..6e9e2081c7 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -31,9 +31,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( localparam BLOCK_SIZE = `NUM_LSU_BLOCKS; localparam NUM_LANES = `NUM_LSU_LANES; -`ifdef SCOPE `SCOPE_IO_SWITCH (BLOCK_SIZE); -`endif VX_execute_if #( .NUM_LANES (NUM_LANES) diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index f446780790..f77a4e7447 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -20,7 +20,7 @@ module VX_scope_tap #( parameter TRIGGERW = 16, // trigger signals width parameter PROBEW = 256, // probe signal width parameter DEPTH = 1024, // trace buffer depth - parameter IDLE_CTRW = 16, // idle time between triggers counter width + parameter IDLE_CTRW = 32, // idle time between triggers counter width parameter TX_DATAW = 64 // transfer data width ) ( input wire clk, @@ -64,52 +64,33 @@ module VX_scope_tap #( localparam GET_TYPE_DATA = 2'd3; localparam GET_TYPE_BITS = 2; - reg [`UP(TRIGGERW)-1:0] prev_triggers; - reg [IDLE_CTRW-1:0] delta; - reg [CTR_WIDTH-1:0] timestamp, start_time; - - reg [ADDRW-1:0] waddr, waddr_end; - reg write_en; - - reg cmd_start, delta_flush; - - reg [CTR_WIDTH-1:0] start_delay, delay_cntr; + `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; reg [GET_TYPE_BITS-1:0] get_type; + reg [CTR_WIDTH-1:0] timestamp, start_time; + reg [CTR_WIDTH-1:0] start_delay, delay_cntr; + reg [`UP(TRIGGERW)-1:0] prev_trig; + reg [IDLE_CTRW-1:0] delta; + reg cmd_start, dflush; + + reg [ADDRW-1:0] waddr, waddr_end; + wire [DATAW-1:0] data_in; + wire write_en; + wire [DATAW-1:0] data_value; wire [IDLE_CTRW-1:0] delta_value; - reg [TX_DATA_BITS-1:0] ser_tx_ctr; - reg [DATA_BITS-1:0] read_offset; reg [ADDRW-1:0] raddr; - reg read_data; - - wire [DATAW-1:0] data_in; - if (TRIGGERW != 0) begin - assign data_in = {probes, triggers}; - end else begin - assign data_in = probes; - end - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .NO_RWCHECK (1) - ) data_store ( - .clk (clk), - .reset (reset), - .read (1'b1), - .wren (1'b1), - .write (write_en), - .waddr (waddr), - .wdata (data_in), - .raddr (raddr), - .rdata (data_value) - ); + // + // trace capture + // - if (TRIGGERW != 0) begin + if (TRIGGERW != 0) begin : g_delta_store + assign data_in = {probes, triggers}; + assign write_en = (tap_state == TAP_STATE_RUN) && (dflush || (triggers != prev_trig)); VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), @@ -125,60 +106,60 @@ module VX_scope_tap #( .raddr (raddr), .rdata (delta_value) ); - end else begin + end else begin : g_no_delta_store + assign data_in = probes; + assign write_en = (tap_state == TAP_STATE_RUN); assign delta_value = '0; end - // - // trace capture - // - - wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); - - wire [ADDRW:0] count = (ADDRW+1)'(waddr) + (ADDRW+1)'(1); + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .NO_RWCHECK (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr), + .wdata (data_in), + .raddr (raddr), + .rdata (data_value) + ); - always @(*) begin - write_en = 0; - if (tap_state == TAP_STATE_RUN) begin - if (TRIGGERW != 0) begin - if (delta_flush || (triggers != prev_triggers)) begin - write_en = 1; - end - end else begin - write_en = 1; - end + always @(posedge clk) begin + if (reset) begin + timestamp <= '0; + end else begin + timestamp <= timestamp + CTR_WIDTH'(1); end end always @(posedge clk) begin if (reset) begin - tap_state <= TAP_STATE_IDLE; - raddr <= '0; - waddr <= '0; - delta <= '0; - prev_triggers <= '0; - read_offset <= '0; - read_data <= 0; - timestamp <= '0; + tap_state <= TAP_STATE_IDLE; + delta <= '0; + dflush <= 0; + prev_trig <= '0; + waddr <= '0; end else begin - timestamp <= timestamp + CTR_WIDTH'(1); - case (tap_state) TAP_STATE_IDLE: begin if (start || cmd_start) begin - delta <= '0; - delta_flush <= 1; + delta <= '0; + dflush <= 1; if (0 == start_delay) begin tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: scope_tap%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end else begin tap_state <= TAP_STATE_WAIT; delay_cntr <= start_delay; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) + `TRACE(2, ("%t: scope_tap%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) `endif end end @@ -189,65 +170,39 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: scope_tap%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end end TAP_STATE_RUN: begin - if (TRIGGERW != 0) begin - if (delta_flush || (triggers != prev_triggers)) begin - waddr <= waddr + ADDRW'(1); - delta <= '0; - delta_flush <= 0; + dflush <= 0; + if (!stop && (waddr < waddr_end)) begin + if (TRIGGERW != 0) begin + if (dflush || (triggers != prev_trig)) begin + waddr <= waddr + ADDRW'(1); + delta <= '0; + end else begin + delta <= delta + IDLE_CTRW'(1); + dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); + end + prev_trig <= triggers; end else begin - delta <= delta + IDLE_CTRW'(1); - delta_flush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); + waddr <= waddr + ADDRW'(1); end - prev_triggers <= triggers; end else begin - waddr <= waddr + ADDRW'(1); - end - if (stop || (waddr >= waddr_end)) begin - waddr <= waddr; + tap_state <= TAP_STATE_IDLE; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) + `TRACE(2, ("%t: scope_tap%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif - tap_state <= TAP_STATE_IDLE; end end default:; endcase - - if (ctrl_state == CTRL_STATE_SEND - && get_type == GET_TYPE_DATA - && ser_tx_ctr == 0) begin - if (~read_data) begin - read_data <= 1; - end else begin - if (DATAW > TX_DATAW) begin - `IGNORE_WARNINGS_BEGIN - if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin - read_offset <= read_offset + DATA_BITS'(TX_DATAW); - end else begin - raddr <= raddr_n; - read_data <= 0; - read_offset <= '0; - end - `IGNORE_WARNINGS_END - end else begin - raddr <= raddr_n; - read_data <= 0; - end - if (raddr_n == waddr) begin - raddr <= 0; - end - end - end end end // - // command controller + // trace controller // reg bus_out_r; @@ -256,35 +211,45 @@ module VX_scope_tap #( wire [TX_DATAW-1:0] ser_buf_in_n = {ser_buf_in[TX_DATAW-2:0], bus_in}; `UNUSED_VAR (ser_buf_in) + reg [TX_DATA_BITS-1:0] ser_tx_ctr; + reg [DATA_BITS-1:0] read_offset; + reg is_read_data; + wire [CMD_TYPE_BITS-1:0] cmd_type = ser_buf_in[CMD_TYPE_BITS-1:0]; wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_value >> read_offset)); - wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_value); + wire [TX_DATAW-1:0] get_data = is_read_data ? data_chunk : TX_DATAW'(delta_value); + + wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; + waddr_end <= ADDRW'(DEPTH-1); cmd_start <= 0; start_delay <= '0; - waddr_end <= ADDRW'(DEPTH-1); bus_out_r <= 0; + read_offset <= '0; + raddr <= '0; + is_read_data<= 0; + ser_tx_ctr <= '0; end else begin bus_out_r <= 0; cmd_start <= 0; - case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin + ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); ctrl_state <= CTRL_STATE_RECV; end - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); end CTRL_STATE_RECV: begin ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin + // check if command is for this scope ctrl_state <= (cmd_scope_id == SCOPE_ID) ? CTRL_STATE_CMD : CTRL_STATE_IDLE; end end @@ -302,33 +267,32 @@ module VX_scope_tap #( CMD_GET_START, CMD_GET_COUNT, CMD_GET_DATA: begin - ctrl_state <= CTRL_STATE_SEND; get_type <= GET_TYPE_BITS'(cmd_type); ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); bus_out_r <= 1; + ctrl_state <= CTRL_STATE_SEND; end default:; endcase `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) + `TRACE(2, ("%t: scope_tap%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) `endif end CTRL_STATE_SEND: begin - ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); case (get_type) GET_TYPE_WIDTH: begin bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) + `TRACE(2, ("%t: scope_tap%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) end `endif end GET_TYPE_COUNT: begin - bus_out_r <= 1'(count >> ser_tx_ctr); + bus_out_r <= 1'(waddr >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) + `TRACE(2, ("%t: scope_tap%0d: SEND count=%0d\n", $time, SCOPE_ID, waddr)) end `endif end @@ -336,20 +300,46 @@ module VX_scope_tap #( bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) + `TRACE(2, ("%t: scope_tap%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) end `endif end GET_TYPE_DATA: begin bus_out_r <= 1'(get_data >> ser_tx_ctr); + if (ser_tx_ctr == 0) begin + if (is_read_data) begin + if (DATAW > TX_DATAW) begin + if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin + read_offset <= read_offset + DATA_BITS'(TX_DATAW); + end else begin + read_offset <= '0; + raddr <= raddr_n; + is_read_data <= 0; // swutch delta mode + end + end else begin + raddr <= raddr_n; + is_read_data <= 0; // swutch delta mode + end + if (raddr_n == waddr) begin + raddr <= 0; // end-of-samples reset + end + end else begin + is_read_data <= 1; // switch to data mode + end + end `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) + if (is_read_data) begin + `TRACE(2, ("%t: scope_tap%0d: SEND data=0x%0h\n", $time, SCOPE_ID, get_data)) + end else begin + `TRACE(2, ("%t: scope_tap%0d: SEND delta=0x%0h\n", $time, SCOPE_ID, get_data)) + end end `endif end default:; endcase + ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; end diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 33b13cab40..7edd67692a 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -28,7 +28,7 @@ #include #include -#define FRAME_FLUSH_SIZE 100 +#define SAMPLE_FLUSH_SIZE 100 #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -58,8 +58,8 @@ struct tap_signal_t { struct tap_t { uint32_t id; uint32_t width; - uint32_t frames; - uint32_t cur_frame; + uint32_t samples; + uint32_t cur_sample; uint64_t cycle_time; std::string path; std::vector signals; @@ -135,22 +135,25 @@ static void dump_header(std::ofstream& ofs, std::vector& taps) { ofs << "enddefinitions $end" << std::endl; } -static tap_t* find_nearest_tap(std::vector& taps) { - tap_t* nearest = nullptr; +// return the earliest tap that has data to dump +static tap_t* find_earliest_tap(std::vector& taps) { + tap_t* earliest = nullptr; for (auto& tap : taps) { - if (tap.cur_frame == tap.frames) - continue; - if (nearest != nullptr) { - if (tap.cycle_time < nearest->cycle_time) - nearest = &tap; + if (tap.samples == 0) + continue; // skip empty taps + if (tap.cur_sample == tap.samples) + continue; // skip finished taps + if (earliest != nullptr) { + if (tap.cycle_time < earliest->cycle_time) + earliest = &tap; } else { - nearest = &tap; + earliest = &tap; } } - return nearest; + return earliest; } -static uint64_t advance_time(std::ofstream& ofs, uint64_t next_time, uint64_t cur_time) { +static uint64_t advance_time(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { while (cur_time < next_time) { ofs << '#' << (cur_time * 2 + 0) << std::endl; ofs << "b0 0" << std::endl; @@ -163,7 +166,7 @@ static uint64_t advance_time(std::ofstream& ofs, uint64_t next_time, uint64_t cu static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { uint32_t signal_offset = 0; - uint32_t frame_offset = 0; + uint32_t sample_offset = 0; uint64_t word; std::vector signal_data(tap->width); @@ -176,24 +179,24 @@ static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &word)); do { - uint32_t word_offset = frame_offset % 64; + uint32_t word_offset = sample_offset % 64; signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; ++signal_offset; - ++frame_offset; + ++sample_offset; if (signal_offset == signal_width) { signal_data[signal_width] = 0; // string null termination ofs << 'b' << signal_data.data() << ' ' << signal_it->id << std::endl; - if (frame_offset == tap->width) { - // end-of-frame - ++tap->cur_frame; - if (tap->cur_frame != tap->frames) { + if (sample_offset == tap->width) { + // end-of-sample + ++tap->cur_sample; + if (tap->cur_sample != tap->samples) { // read next delta CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &word)); tap->cycle_time += 1 + word; - if (0 == (tap->cur_frame % FRAME_FLUSH_SIZE)) { + if (0 == (tap->cur_sample % SAMPLE_FLUSH_SIZE)) { ofs << std::flush; - std::cout << std::dec << "[SCOPE] flush tap #" << tap->id << ": "<< tap->cur_frame << "/" << tap->frames << " frames, next_time=" << tap->cycle_time << std::endl; + std::cout << std::dec << "[SCOPE] flush tap #" << tap->id << ": "<< tap->cur_sample << "/" << tap->samples << " samples, next_time=" << tap->cycle_time << std::endl; } } break; @@ -202,8 +205,8 @@ static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { ++signal_it; signal_width = signal_it->width; } - } while ((frame_offset % 64) != 0); - } while (frame_offset != tap->width); + } while ((sample_offset % 64) != 0); + } while (sample_offset != tap->width); return 0; } @@ -285,8 +288,8 @@ int vx_scope_stop(vx_device_h hdevice) { _tap.width = tap["width"].get(); _tap.path = tap["path"].get(); _tap.cycle_time = 0; - _tap.frames = 0; - _tap.cur_frame = 0; + _tap.samples = 0; + _tap.cur_sample = 0; for (auto& signal : tap["signals"]) { auto name = signal[0].get(); @@ -299,19 +302,15 @@ int vx_scope_stop(vx_device_h hdevice) { } } - // stop recording + std::cout << "[SCOPE] stop recording..." << std::endl; + for (auto& tap : taps) { uint64_t cmd_stop = (0 << 11) | (tap.id << 3) | CMD_SET_STOP; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_stop)); } - std::cout << "[SCOPE] trace dump begin..." << std::endl; - - std::ofstream ofs("scope.vcd"); - - dump_header(ofs, taps); + std::cout << "[SCOPE] load trace info..." << std::endl; - // load trace info for (auto& tap : taps) { uint64_t count, start, delta; @@ -320,39 +319,53 @@ int vx_scope_stop(vx_device_h hdevice) { CHECK_ERR(g_callback.registerWrite(hdevice, cmd_count)); CHECK_ERR(g_callback.registerRead(hdevice, &count)); + if (count == 0) + continue; + // get start uint64_t cmd_start = (tap.id << 3) | CMD_GET_START; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_start)); CHECK_ERR(g_callback.registerRead(hdevice, &start)); - // get data + // get delta uint64_t cmd_data = (tap.id << 3) | CMD_GET_DATA; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &delta)); - tap.frames = count; + tap.samples = count; tap.cycle_time = 1 + start + delta; std::cout << std::dec << "[SCOPE] tap #" << tap.id << ": width=" << tap.width - << ", num_frames=" << tap.frames + << ", num_samples=" << tap.samples << ", start_time=" << tap.cycle_time << ", path=" << tap.path << std::endl; } + std::cout << "[SCOPE] dump header..." << std::endl; + + std::ofstream ofs("scope.vcd"); + + dump_header(ofs, taps); + + std::cout << "[SCOPE] dump taps..." << std::endl; + uint64_t cur_time = 0; while (true) { // find the nearest tap - auto tap = find_nearest_tap(taps); + auto tap = find_earliest_tap(taps); if (tap == nullptr) break; // advance clock - cur_time = advance_time(ofs, tap->cycle_time, cur_time); + cur_time = advance_time(ofs, cur_time, tap->cycle_time); // dump tap CHECK_ERR(dump_tap(ofs, tap, hdevice)); }; + // advance clock + advance_time(ofs, cur_time, cur_time + 1); + std::cout << "[SCOPE] trace dump done! - " << (cur_time/2) << " cycles" << std::endl; return 0; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 511a87be5f..48926e80bf 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -581,14 +581,14 @@ class vx_device { return err; }); #ifdef CPP_API - xrtBuffer.write(host_ptr, asize, bo_offset); - xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset); + xrtBuffer.write(host_ptr, size, bo_offset); + xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset); #else - CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, asize, bo_offset), { + CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); - CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset), { + CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); @@ -627,14 +627,14 @@ class vx_device { return err; }); #ifdef CPP_API - xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset); - xrtBuffer.read(host_ptr, asize, bo_offset); + xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset); + xrtBuffer.read(host_ptr, size, bo_offset); #else - CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset), { + CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); - CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, asize, bo_offset), { + CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); diff --git a/sim/xrtsim/xrt.cpp b/sim/xrtsim/xrt.cpp index c0b5aac288..2123358a0a 100644 --- a/sim/xrtsim/xrt.cpp +++ b/sim/xrtsim/xrt.cpp @@ -66,7 +66,6 @@ extern int xrtDeviceClose(xrtDeviceHandle dhdl) { if (dhdl == nullptr) return -1; auto sim = reinterpret_cast(dhdl); - sim->shutdown(); delete sim; return 0; } diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 1aaccc3921..a2725f32db 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -197,13 +197,6 @@ class xrt_sim::Impl { return 0; } - void shutdown() { - stop_ = true; - if (future_.valid()) { - future_.wait(); - } - } - int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { if (bank_id >= M_AXI_MEM_NUM_BANKS) return -1; @@ -615,10 +608,6 @@ int xrt_sim::init() { return impl_->init(); } -void xrt_sim::shutdown() { - impl_->shutdown(); -} - int xrt_sim::mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { return impl_->mem_alloc(size, bank_id, addr); } diff --git a/sim/xrtsim/xrt_sim.h b/sim/xrtsim/xrt_sim.h index 5823f468fb..6a2d5d7da4 100644 --- a/sim/xrtsim/xrt_sim.h +++ b/sim/xrtsim/xrt_sim.h @@ -25,8 +25,6 @@ class xrt_sim { int init(); - void shutdown(); - int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr); int mem_free(uint32_t bank_id, uint64_t addr); From b8199decf47028b1f59cb34f16fd3fcffe50462f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 03:54:40 -0700 Subject: [PATCH 205/407] opaesim and xrtsim multi-bank memory support --- ci/regression.sh.in | 5 +- hw/rtl/Vortex_axi.sv | 166 ++++++++++++++++----------- hw/rtl/afu/opae/local_mem_cfg_pkg.sv | 14 ++- hw/rtl/afu/opae/vortex_afu.sv | 10 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 44 ++----- hw/rtl/afu/xrt/VX_afu_wrap.sv | 32 +++--- hw/rtl/afu/xrt/vortex_afu.v | 14 ++- hw/rtl/afu/xrt/vortex_afu.vh | 20 ++-- hw/rtl/libs/VX_avs_adapter.sv | 31 +++-- hw/rtl/libs/VX_axi_adapter.sv | 46 +++++--- hw/rtl/libs/VX_mem_adapter.sv | 5 +- hw/syn/altera/dut/top/Makefile | 20 ++-- hw/syn/altera/opae/Makefile | 2 +- runtime/include/vortex.h | 1 + runtime/opae/vortex.cpp | 16 ++- runtime/rtlsim/vortex.cpp | 3 + runtime/simx/vortex.cpp | 3 + runtime/xrt/Makefile | 1 + runtime/xrt/vortex.cpp | 144 +++-------------------- sim/common/bitmanip.h | 36 +++++- sim/common/mem_alloc.h | 17 +-- sim/opaesim/Makefile | 21 ++-- sim/opaesim/opae_sim.cpp | 43 +++---- sim/opaesim/vortex_afu_shim.sv | 20 ++-- sim/xrtsim/Makefile | 19 ++- sim/xrtsim/vortex_afu_shim.sv | 75 ++++++------ sim/xrtsim/xrt_sim.cpp | 90 +++++++++------ 27 files changed, 461 insertions(+), 437 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 8c88c368af..37f5d2b208 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -273,10 +273,11 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 # test single-bank DRAM - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress # test 27-bit DRAM address - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=xrt --app=mstress echo "configuration-2 tests done!" } diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index a15a478ee5..17d5d660e3 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -15,7 +15,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, - parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH, + parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8), parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, parameter AXI_NUM_BANKS = 1 )( @@ -82,11 +82,10 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); - localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; - - `STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH)) - `STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH)) - `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) + localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; + localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH); + + `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) wire mem_req_valid; wire mem_req_rw; @@ -101,33 +100,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; wire mem_rsp_ready; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS]; - - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS]; - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS]; - - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS]; - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS]; - - for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin : g_padding - assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]); - assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]); - - assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]); - assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]); + `SCOPE_IO_SWITCH (1) - assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]); - assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]); - end + Vortex vortex ( + `SCOPE_IO_BIND (0) - VX_axi_adapter #( - .DATA_WIDTH (`VX_MEM_DATA_WIDTH), - .ADDR_WIDTH (`MEM_ADDR_WIDTH), - .TAG_WIDTH (`VX_MEM_TAG_WIDTH), - .NUM_BANKS (AXI_NUM_BANKS), - .RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0) - ) axi_adapter ( .clk (clk), .reset (reset), @@ -144,10 +121,95 @@ module Vortex_axi import VX_gpu_pkg::*; #( .mem_rsp_tag (mem_rsp_tag), .mem_rsp_ready (mem_rsp_ready), + .dcr_wr_valid (dcr_wr_valid), + .dcr_wr_addr (dcr_wr_addr), + .dcr_wr_data (dcr_wr_data), + + .busy (busy) + ); + + wire mem_req_valid_a; + wire mem_req_rw_a; + wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a; + wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a; + wire [AXI_DATA_WIDTH-1:0] mem_req_data_a; + wire [AXI_TID_WIDTH-1:0] mem_req_tag_a; + wire mem_req_ready_a; + + wire mem_rsp_valid_a; + wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a; + wire [AXI_TID_WIDTH-1:0] mem_rsp_tag_a; + wire mem_rsp_ready_a; + + VX_mem_adapter #( + .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), + .DST_DATA_WIDTH (AXI_DATA_WIDTH), + .SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), + .DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), + .SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH), + .DST_TAG_WIDTH (AXI_TID_WIDTH), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (0) + ) mem_adapter ( + .clk (clk), + .reset (reset), + + .mem_req_valid_in (mem_req_valid), + .mem_req_addr_in (mem_req_addr), + .mem_req_rw_in (mem_req_rw), + .mem_req_byteen_in (mem_req_byteen), + .mem_req_data_in (mem_req_data), + .mem_req_tag_in (mem_req_tag), + .mem_req_ready_in (mem_req_ready), + + .mem_rsp_valid_in (mem_rsp_valid), + .mem_rsp_data_in (mem_rsp_data), + .mem_rsp_tag_in (mem_rsp_tag), + .mem_rsp_ready_in (mem_rsp_ready), + + .mem_req_valid_out (mem_req_valid_a), + .mem_req_addr_out (mem_req_addr_a), + .mem_req_rw_out (mem_req_rw_a), + .mem_req_byteen_out (mem_req_byteen_a), + .mem_req_data_out (mem_req_data_a), + .mem_req_tag_out (mem_req_tag_a), + .mem_req_ready_out (mem_req_ready_a), + + .mem_rsp_valid_out (mem_rsp_valid_a), + .mem_rsp_data_out (mem_rsp_data_a), + .mem_rsp_tag_out (mem_rsp_tag_a), + .mem_rsp_ready_out (mem_rsp_ready_a) + ); + + VX_axi_adapter #( + .DATA_WIDTH (AXI_DATA_WIDTH), + .ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), + .TAG_WIDTH (AXI_TID_WIDTH), + .NUM_BANKS (AXI_NUM_BANKS), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .BANK_INTERLEAVE (0), + .RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0) + ) axi_adapter ( + .clk (clk), + .reset (reset), + + .mem_req_valid (mem_req_valid_a), + .mem_req_rw (mem_req_rw_a), + .mem_req_byteen (mem_req_byteen_a), + .mem_req_addr (mem_req_addr_a), + .mem_req_data (mem_req_data_a), + .mem_req_tag (mem_req_tag_a), + .mem_req_ready (mem_req_ready_a), + + .mem_rsp_valid (mem_rsp_valid_a), + .mem_rsp_data (mem_rsp_data_a), + .mem_rsp_tag (mem_rsp_tag_a), + .mem_rsp_ready (mem_rsp_ready_a), + .m_axi_awvalid (m_axi_awvalid), .m_axi_awready (m_axi_awready), - .m_axi_awaddr (m_axi_awaddr_unqual), - .m_axi_awid (m_axi_awid_unqual), + .m_axi_awaddr (m_axi_awaddr), + .m_axi_awid (m_axi_awid), .m_axi_awlen (m_axi_awlen), .m_axi_awsize (m_axi_awsize), .m_axi_awburst (m_axi_awburst), @@ -165,13 +227,13 @@ module Vortex_axi import VX_gpu_pkg::*; #( .m_axi_bvalid (m_axi_bvalid), .m_axi_bready (m_axi_bready), - .m_axi_bid (m_axi_bid_unqual), + .m_axi_bid (m_axi_bid), .m_axi_bresp (m_axi_bresp), .m_axi_arvalid (m_axi_arvalid), .m_axi_arready (m_axi_arready), - .m_axi_araddr (m_axi_araddr_unqual), - .m_axi_arid (m_axi_arid_unqual), + .m_axi_araddr (m_axi_araddr), + .m_axi_arid (m_axi_arid), .m_axi_arlen (m_axi_arlen), .m_axi_arsize (m_axi_arsize), .m_axi_arburst (m_axi_arburst), @@ -184,37 +246,9 @@ module Vortex_axi import VX_gpu_pkg::*; #( .m_axi_rvalid (m_axi_rvalid), .m_axi_rready (m_axi_rready), .m_axi_rdata (m_axi_rdata), - .m_axi_rlast (m_axi_rlast) , - .m_axi_rid (m_axi_rid_unqual), + .m_axi_rlast (m_axi_rlast), + .m_axi_rid (m_axi_rid), .m_axi_rresp (m_axi_rresp) ); - `SCOPE_IO_SWITCH (1) - - Vortex vortex ( - `SCOPE_IO_BIND (0) - - .clk (clk), - .reset (reset), - - .mem_req_valid (mem_req_valid), - .mem_req_rw (mem_req_rw), - .mem_req_byteen (mem_req_byteen), - .mem_req_addr (mem_req_addr), - .mem_req_data (mem_req_data), - .mem_req_tag (mem_req_tag), - .mem_req_ready (mem_req_ready), - - .mem_rsp_valid (mem_rsp_valid), - .mem_rsp_data (mem_rsp_data), - .mem_rsp_tag (mem_rsp_tag), - .mem_rsp_ready (mem_rsp_ready), - - .dcr_wr_valid (dcr_wr_valid), - .dcr_wr_addr (dcr_wr_addr), - .dcr_wr_data (dcr_wr_data), - - .busy (busy) - ); - endmodule diff --git a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv index ef9fae28aa..8b0ebaa0be 100644 --- a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv +++ b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv @@ -30,7 +30,17 @@ //`include "platform_afu_top_config.vh" -`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH `PLATFORM_MEMORY_ADDR_WIDTH +`endif + +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH +`endif + +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH +`endif package local_mem_cfg_pkg; @@ -57,5 +67,3 @@ package local_mem_cfg_pkg; typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask; endpackage // local_mem_cfg_pkg - -`endif // PLATFORM_PROVIDES_LOCAL_MEMORY diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index b0de60cf3d..4060a30110 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -42,7 +42,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; - localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); + localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH)); localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt); localparam CCI_DATA_WIDTH = $bits(t_ccip_clData); @@ -96,9 +96,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam STATE_DCR_WRITE = 4; localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1); + localparam BANK_BYTE_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + `CLOG2(`PLATFORM_MEMORY_DATA_WIDTH/8); + wire [127:0] afu_id = `AFU_ACCEL_UUID; - wire [63:0] dev_caps = {16'b0, + wire [63:0] dev_caps = {8'b0, + 5'(BANK_BYTE_ADDR_WIDTH-16), + 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -601,6 +605,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .TAG_WIDTH (AVS_REQ_TAGW + 1), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), + .AVS_ADDR_WIDTH($bits(t_local_mem_addr)), + .BANK_INTERLEAVE (1), .REQ_OUT_BUF (2), .RSP_OUT_BUF (0) ) avs_adapter ( diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 12a55ec69d..e30219270a 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -14,21 +14,21 @@ `include "vortex_afu.vh" module VX_afu_ctrl #( - parameter AXI_ADDR_WIDTH = 8, - parameter AXI_DATA_WIDTH = 32, - parameter AXI_NUM_BANKS = 1 + parameter S_AXI_ADDR_WIDTH = 8, + parameter S_AXI_DATA_WIDTH = 32, + parameter M_AXI_ADDR_WIDTH = 25 ) ( // axi4 lite slave signals input wire clk, input wire reset, input wire s_axi_awvalid, - input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, + input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr, output wire s_axi_awready, input wire s_axi_wvalid, - input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata, - input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb, + input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata, + input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb, output wire s_axi_wready, output wire s_axi_bvalid, @@ -36,11 +36,11 @@ module VX_afu_ctrl #( input wire s_axi_bready, input wire s_axi_arvalid, - input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr, + input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr, output wire s_axi_arready, output wire s_axi_rvalid, - output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata, + output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata, output wire [1:0] s_axi_rresp, input wire s_axi_rready, @@ -56,8 +56,6 @@ module VX_afu_ctrl #( output wire scope_bus_out, `endif - output wire [63:0] mem_base [AXI_NUM_BANKS], - output wire dcr_wr_valid, output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data @@ -125,10 +123,6 @@ module VX_afu_ctrl #( //ADDR_SCP_CTRL = 8'h3C, `endif - ADDR_MEM_0 = 8'h40, - ADDR_MEM_1 = 8'h44, - //ADDR_MEM_CTRL = 8'h48, - ADDR_BITS = 8; localparam @@ -144,7 +138,9 @@ module VX_afu_ctrl #( RSTATE_WIDTH = 2; // device caps - wire [63:0] dev_caps = {16'b0, + wire [63:0] dev_caps = {8'b0, + 5'(M_AXI_ADDR_WIDTH-16), + 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -174,7 +170,6 @@ module VX_afu_ctrl #( reg gie_r; reg [1:0] ier_r; reg [1:0] isr_r; - reg [63:0] mem_r [AXI_NUM_BANKS]; reg [31:0] dcra_r; reg [31:0] dcrv_r; reg dcr_wr_valid_r; @@ -311,10 +306,6 @@ module VX_afu_ctrl #( dcra_r <= '0; dcrv_r <= '0; dcr_wr_valid_r <= 0; - - for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin - mem_r[i] <= '0; - end end else begin dcr_wr_valid_r <= 0; ap_reset_r <= 0; @@ -353,16 +344,7 @@ module VX_afu_ctrl #( dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask); dcr_wr_valid_r <= 1; end - default: begin - for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin - if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin - mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask); - end - if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin - mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask); - end - end - end + default:; endcase if (ier_r[0] & ap_done) @@ -453,8 +435,6 @@ module VX_afu_ctrl #( assign ap_start = ap_start_r; assign interrupt = gie_r & (| isr_r); - assign mem_base = mem_r; - assign dcr_wr_valid = dcr_wr_valid_r; assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r); assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r); diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index d5726dc73b..ca6fed1ae4 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -16,16 +16,17 @@ module VX_afu_wrap #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ID_WIDTH = 32, + parameter C_M_AXI_MEM_ADDR_WIDTH = 25, + parameter C_M_AXI_MEM_DATA_WIDTH = 512, + parameter C_M_AXI_MEM_NUM_BANKS = 2 ) ( // System signals input wire clk, input wire reset, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -48,7 +49,6 @@ module VX_afu_wrap #( output wire interrupt ); - localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS; localparam STATE_IDLE = 0; localparam STATE_RUN = 1; @@ -80,7 +80,7 @@ module VX_afu_wrap #( wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; // convert memory interface to array - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; @@ -88,8 +88,6 @@ module VX_afu_wrap #( reg vx_reset = 1; // asserted at initialization wire vx_busy; - wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS]; - wire dcr_wr_valid; wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data; @@ -181,9 +179,9 @@ module VX_afu_wrap #( end VX_afu_ctrl #( - .AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), - .AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), - .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) + .S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), + .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), + .M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH) ) afu_ctrl ( .clk (clk), .reset (reset), @@ -218,26 +216,24 @@ module VX_afu_wrap #( .scope_bus_out (scope_bus_in), `endif - .mem_base (mem_base), - .dcr_wr_valid (dcr_wr_valid), .dcr_wr_addr (dcr_wr_addr), .dcr_wr_data (dcr_wr_data) ); - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing - assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); - assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); + assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); end `SCOPE_IO_SWITCH (2) Vortex_axi #( .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), - .AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH), + .AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) vortex_axi ( diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 0e042c32b8..985d029cf5 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -16,16 +16,17 @@ module vortex_afu #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH, - parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, + parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( // System signals input wire ap_clk, input wire ap_rst_n, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -54,12 +55,13 @@ module vortex_afu #( .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), - .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH) + .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_wrap ( .clk (ap_clk), .reset (~ap_rst_n), - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awready (s_axi_ctrl_awready), diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index bf70cb8850..f35980c2ae 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -14,20 +14,24 @@ `ifndef VORTEX_AFU_VH `define VORTEX_AFU_VH -`ifndef M_AXI_MEM_NUM_BANKS -`define M_AXI_MEM_NUM_BANKS 1 +`ifndef PLATFORM_MEMORY_BANKS +`define PLATFORM_MEMORY_BANKS 2 `endif -`ifndef M_AXI_MEM_ADDR_WIDTH -`define M_AXI_MEM_ADDR_WIDTH 34 +`ifndef PLATFORM_MEMORY_ADDR_WIDTH +`define PLATFORM_MEMORY_ADDR_WIDTH 25 `endif -`ifndef M_AXI_MEM_DATA_WIDTH -`define M_AXI_MEM_DATA_WIDTH 512 +`ifndef PLATFORM_MEMORY_DATA_WIDTH +`define PLATFORM_MEMORY_DATA_WIDTH 512 `endif -`ifndef M_AXI_MEM_ID_WIDTH -`define M_AXI_MEM_ID_WIDTH 32 +`ifndef PLATFORM_MEMORY_OFFSET +`define PLATFORM_MEMORY_OFFSET 0 +`endif + +`ifndef PLATFORM_MEMORY_ID_WIDTH +`define PLATFORM_MEMORY_ID_WIDTH 32 `endif `define GEN_AXI_MEM(i) \ diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 8d308ec362..fe9a9a53b2 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -21,6 +21,8 @@ module VX_avs_adapter #( parameter NUM_BANKS = 1, parameter TAG_WIDTH = 1, parameter RD_QUEUE_SIZE = 1, + parameter BANK_INTERLEAVE= 0, + parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS), parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 ) ( @@ -45,7 +47,7 @@ module VX_avs_adapter #( // AVS bus output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS], input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS], + output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS], input wire avs_waitrequest [NUM_BANKS], output wire avs_write [NUM_BANKS], output wire avs_read [NUM_BANKS], @@ -53,28 +55,35 @@ module VX_avs_adapter #( output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS], input wire avs_readdatavalid [NUM_BANKS] ); - localparam DATA_SIZE = DATA_WIDTH/8; - localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); - localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); - localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; + localparam DATA_SIZE = DATA_WIDTH/8; + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; + + `STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter")) // Requests handling ////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; - wire [BANK_ADDRW-1:0] req_bank_sel; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; if (NUM_BANKS > 1) begin : g_bank_sel - assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin : g_bank_sel + if (BANK_INTERLEAVE) begin : g_interleave + assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + end else begin : g_no_interleave + assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + end + end else begin : g_no_bank_sel assign req_bank_sel = '0; + assign req_bank_off = mem_req_addr; end - assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS]; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end @@ -142,7 +151,7 @@ module VX_avs_adapter #( assign avs_read[i] = valid_out && ~rw_out; assign avs_write[i] = valid_out && rw_out; - assign avs_address[i] = ADDR_WIDTH'(addr_out); + assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out); assign avs_byteenable[i] = byteen_out; assign avs_writedata[i] = data_out; assign avs_burstcount[i] = BURST_WIDTH'(1); diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 9524971863..bdd699053d 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -19,7 +19,8 @@ module VX_axi_adapter #( parameter ADDR_WIDTH = 32, parameter TAG_WIDTH = 8, parameter NUM_BANKS = 1, - parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), + parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), + parameter BANK_INTERLEAVE= 0, parameter RSP_OUT_BUF = 0 ) ( input wire clk, @@ -29,7 +30,7 @@ module VX_axi_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, @@ -43,7 +44,7 @@ module VX_axi_adapter #( // AXI write request address channel output wire m_axi_awvalid [NUM_BANKS], input wire m_axi_awready [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], + output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS], @@ -70,7 +71,7 @@ module VX_axi_adapter #( // AXI read address channel output wire m_axi_arvalid [NUM_BANKS], input wire m_axi_arready [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], + output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS], @@ -89,15 +90,28 @@ module VX_axi_adapter #( input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS], input wire [1:0] m_axi_rresp [NUM_BANKS] ); - localparam AXSIZE = `CLOG2(DATA_WIDTH/8); - localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); - localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); - - wire [BANK_ADDRW-1:0] req_bank_sel; - if (NUM_BANKS > 1) begin : g_req_bank_sel - assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin : g_req_bank_sel_0 + localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8); + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; + localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8); + + `STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH)) + + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + wire [BANK_OFFSETW-1:0] req_bank_off; + + if (NUM_BANKS > 1) begin : g_bank_sel + if (BANK_INTERLEAVE) begin : g_interleave + assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + end else begin : g_no_interleave + assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + end + end else begin : g_no_bank_sel assign req_bank_sel = '0; + assign req_bank_off = mem_req_addr; end wire mem_req_fire = mem_req_valid && mem_req_ready; @@ -134,10 +148,10 @@ module VX_axi_adapter #( // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; - assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; + assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off); assign m_axi_awid[i] = mem_req_tag; assign m_axi_awlen[i] = 8'b00000000; - assign m_axi_awsize[i] = 3'(AXSIZE); + assign m_axi_awsize[i] = 3'(DATA_SIZE); assign m_axi_awburst[i] = 2'b00; assign m_axi_awlock[i] = 2'b00; assign m_axi_awcache[i] = 4'b0000; @@ -166,10 +180,10 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); - assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; + assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off); assign m_axi_arid[i] = mem_req_tag; assign m_axi_arlen[i] = 8'b00000000; - assign m_axi_arsize[i] = 3'(AXSIZE); + assign m_axi_arsize[i] = 3'(DATA_SIZE); assign m_axi_arburst[i] = 2'b00; assign m_axi_arlock[i] = 2'b00; assign m_axi_arcache[i] = 4'b0000; diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 5f32e1aa15..066de829f7 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -53,8 +53,6 @@ module VX_mem_adapter #( input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out, output wire mem_rsp_ready_out ); - `STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!")) - localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8); localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH); localparam SRC_LDATAW = `CLOG2(SRC_DATA_WIDTH); @@ -74,6 +72,7 @@ module VX_mem_adapter #( wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w; wire mem_rsp_ready_in_w; + `UNUSED_VAR (mem_req_tag_in) `UNUSED_VAR (mem_rsp_tag_out) if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data @@ -122,7 +121,7 @@ module VX_mem_adapter #( assign mem_rsp_valid_in_w = mem_rsp_valid_out; assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx]; - assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]); + assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[DST_TAG_WIDTH-1:D]); assign mem_rsp_ready_out = mem_rsp_ready_in_w; end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index 99889f4ae1..e4dfae274d 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -7,17 +7,21 @@ include ../../common.mk # AFU parameters CONFIGS += -DNOPAE CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + endif endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 endif #CONFIGS += -DNUM_CORES=2 diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index e961be4532..19f9d0836d 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -98,7 +98,7 @@ ifdef PERF endif # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI +XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=26 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI all: swconfig ip-gen setup build diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index 853da5994d..8fa6c20577 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -35,6 +35,7 @@ typedef void* vx_buffer_h; #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 #define VX_CAPS_NUM_MEM_BANKS 0x8 +#define VX_CAPS_MEM_BANK_SIZE 0x9 // device isa flags #define VX_ISA_STD_A (1ull << ISA_STD_A) diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 1bc913cc81..f06f34bea8 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -163,11 +163,6 @@ class vx_device { }); { - // retrieve FPGA global memory size - CHECK_FPGA_ERR(api_.fpgaPropertiesGetLocalMemorySize(filter, &global_mem_size_), { - global_mem_size_ = GLOBAL_MEM_SIZE; - }); - // Load ISA CAPS CHECK_FPGA_ERR(api_.fpgaReadMMIO64(fpga_, 0, MMIO_ISA_CAPS, &isa_caps_), { api_.fpgaClose(fpga_); @@ -179,6 +174,12 @@ class vx_device { api_.fpgaClose(fpga_); return -1; }); + + // Determine global memory size + uint64_t num_banks, bank_size; + this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks); + this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size); + global_mem_size_ = num_banks * bank_size; } #ifdef SCOPE @@ -231,7 +232,10 @@ class vx_device { _value = isa_caps_; break; case VX_CAPS_NUM_MEM_BANKS: - _value = MEMORY_BANKS; + _value = 1 << ((dev_caps_ >> 48) & 0x7); + break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); diff --git a/runtime/rtlsim/vortex.cpp b/runtime/rtlsim/vortex.cpp index 91df7f7e89..7ba7f9471e 100644 --- a/runtime/rtlsim/vortex.cpp +++ b/runtime/rtlsim/vortex.cpp @@ -80,6 +80,9 @@ class vx_device { case VX_CAPS_NUM_MEM_BANKS: _value = MEMORY_BANKS; break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS); + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 70ceb7fc48..eb32709ec5 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -84,6 +84,9 @@ class vx_device { case VX_CAPS_NUM_MEM_BANKS: _value = MEMORY_BANKS; break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS); + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/xrt/Makefile b/runtime/xrt/Makefile index d4fbc51a85..7fadb43fd2 100644 --- a/runtime/xrt/Makefile +++ b/runtime/xrt/Makefile @@ -8,6 +8,7 @@ SRC_DIR := $(VORTEX_HOME)/runtime/xrt CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common +CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += -fPIC LDFLAGS += -shared -pthread diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 48926e80bf..3acb9b3c67 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -49,7 +49,6 @@ using namespace vortex; #define MMIO_ISA_ADDR 0x1C #define MMIO_DCR_ADDR 0x28 #define MMIO_SCP_ADDR 0x34 -#define MMIO_MEM_ADDR 0x40 #define CTL_AP_START (1 << 0) #define CTL_AP_DONE (1 << 1) @@ -58,24 +57,6 @@ using namespace vortex; #define CTL_AP_RESET (1 << 4) #define CTL_AP_RESTART (1 << 7) -struct platform_info_t { - const char *prefix_name; - uint8_t lg2_num_banks; - uint8_t lg2_bank_size; - uint64_t mem_base; -}; - -static const platform_info_t g_platforms[] = { - {"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB - {"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 - {"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 - {"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 - {"xilinx_u280", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 - {"xilinx_u55c", 5, 29, 0x0}, // 32 x 512 MB = 16 GB HBM2 - {"xilinx_vck5000", 0, 33, 0xC000000000}, // 1 x 8 GB = 8 GB DDR4 - {"xilinx_kv260", 0, 32, 0x0}, // 1 x 4 GB = 4 GB DDR4 -}; - #ifdef CPP_API typedef xrt::device xrt_device_t; @@ -113,18 +94,6 @@ static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) { } #endif -static int get_platform_info(const std::string &device_name, - platform_info_t *platform_info) { - for (size_t i = 0; i < (sizeof(g_platforms) / sizeof(platform_info_t)); ++i) { - auto &platform = g_platforms[i]; - if (device_name.rfind(platform.prefix_name, 0) == 0) { - *platform_info = platform; - return 0; - } - } - return -1; -} - /////////////////////////////////////////////////////////////////////////////// class vx_device { @@ -181,58 +150,6 @@ class vx_device { auto xclbin = xrt::xclbin(xlbin_path_s); auto device_name = xrtDevice.get_info(); - /*{ - uint32_t num_banks = 0; - uint64_t bank_size = 0; - uint64_t mem_base = 0; - - auto mem_json = - nlohmann::json::parse(xrtDevice.get_info()); if - (!mem_json.is_null()) { uint32_t index = 0; for (auto& mem : - mem_json["board"]["memory"]["memories"]) { auto enabled = - mem["enabled"].get(); if (enabled == "true") { if (index == 0) - { mem_base = std::stoull(mem["base_address"].get(), nullptr, - 16); bank_size = std::stoull(mem["range_bytes"].get(), nullptr, - 16); - } - ++index; - } - } - num_banks = index; - } - - fprintf(stderr, "[VXDRV] memory description: base=0x%lx, size=0x%lx, - count=%d\n", mem_base, bank_size, num_banks); - }*/ - - /*{ - std::cout << "Device" << device_index << " : " << - xrtDevice.get_info() << std::endl; std::cout << " - bdf : " << xrtDevice.get_info() << std::endl; - std::cout << " kdma : " << - xrtDevice.get_info() << std::endl; std::cout << " - max_freq : " << - xrtDevice.get_info() << - std::endl; std::cout << " memory : " << - xrtDevice.get_info() << std::endl; std::cout << " - thermal : " << xrtDevice.get_info() << - std::endl; std::cout << " m2m : " << std::boolalpha << - xrtDevice.get_info() << std::dec << std::endl; - std::cout << " nodma : " << std::boolalpha << - xrtDevice.get_info() << std::dec << std::endl; - - std::cout << "Memory info :" << std::endl; - for (const auto& mem_bank : xclbin.get_mems()) { - std::cout << " index : " << mem_bank.get_index() << std::endl; - std::cout << " tag : " << mem_bank.get_tag() << std::endl; - std::cout << " type : " << (int)mem_bank.get_type() << std::endl; - std::cout << " base_address : 0x" << std::hex << - mem_bank.get_base_address() << std::endl; std::cout << " size : 0x" << - (mem_bank.get_size_kb() * 1000) << std::dec << std::endl; std::cout << " - used :" << mem_bank.get_used() << std::endl; - } - }*/ - #else CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), { @@ -275,11 +192,6 @@ class vx_device { printf("info: device name=%s.\n", device_name.c_str()); - CHECK_ERR(get_platform_info(device_name, &platform_), { - fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str()); - return err; - }); - CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), { return err; }); @@ -300,36 +212,13 @@ class vx_device { return err; }); - uint32_t num_banks = 1 << platform_.lg2_num_banks; - uint64_t bank_size = 1ull << platform_.lg2_bank_size; - - // adjust memory banks allocation to architecture limit - int isa_arch = VX_ISA_ARCH(isa_caps_); - if (isa_arch == 32) { - uint64_t max_mem_size = 1ull << 32; - uint32_t need_num_banks = max_mem_size / bank_size; - if (num_banks > need_num_banks) { - printf("info: adjusted number of banks from %d to %d.\n", num_banks, need_num_banks); - num_banks = need_num_banks; - platform_.lg2_num_banks = log2ceil(num_banks); - } - } - - for (uint32_t i = 0; i < num_banks; ++i) { - uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12); - uint64_t reg_value = platform_.mem_base + i * bank_size; - - CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), { - return err; - }); + uint64_t num_banks; + this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks); + lg2_num_banks_ = log2ceil(num_banks); - CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), { - return err; - }); - #ifndef BANK_INTERLEAVE - break; - #endif - } + uint64_t bank_size; + this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size); + lg2_bank_size_ = log2ceil(bank_size); global_mem_size_ = num_banks * bank_size; @@ -418,7 +307,10 @@ class vx_device { _value = isa_caps_; break; case VX_CAPS_NUM_MEM_BANKS: - _value = MEMORY_BANKS; + _value = 1 << ((dev_caps_ >> 48) & 0x7); + break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); @@ -734,23 +626,23 @@ class vx_device { MemoryAllocator global_mem_; xrt_device_t xrtDevice_; xrt_kernel_t xrtKernel_; - platform_info_t platform_; uint64_t dev_caps_; uint64_t isa_caps_; uint64_t global_mem_size_; DeviceConfig dcrs_; std::unordered_map> mpm_cache_; + uint32_t lg2_num_banks_; + uint32_t lg2_bank_size_; #ifdef BANK_INTERLEAVE std::vector xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { - uint32_t num_banks = 1 << platform_.lg2_num_banks; + uint32_t num_banks = 1 << lg2_num_banks_; uint64_t block_addr = addr / CACHE_BLOCK_SIZE; uint32_t index = block_addr & (num_banks - 1); - uint64_t offset = - (block_addr >> platform_.lg2_num_banks) * CACHE_BLOCK_SIZE; + uint64_t offset = (block_addr >> lg2_num_banks_) * CACHE_BLOCK_SIZE; if (pIdx) { *pIdx = index; } @@ -778,9 +670,9 @@ class vx_device { std::unordered_map xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { - uint32_t num_banks = 1 << platform_.lg2_num_banks; - uint64_t bank_size = 1ull << platform_.lg2_bank_size; - uint32_t index = addr >> platform_.lg2_bank_size; + uint32_t num_banks = 1 << lg2_num_banks_; + uint64_t bank_size = 1ull << lg2_bank_size_; + uint32_t index = addr >> lg2_bank_size_; uint64_t offset = addr & (bank_size - 1); if (index > num_banks) { fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr); @@ -807,7 +699,7 @@ class vx_device { } } else { printf("allocating bank%d...\n", bank_id); - uint64_t bank_size = 1ull << platform_.lg2_bank_size; + uint64_t bank_size = 1ull << lg2_bank_size_; #ifdef CPP_API xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id); #else diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index a6cd87ff16..3c58580433 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,30 +20,58 @@ constexpr uint32_t count_leading_zeros(uint32_t value) { return value ? __builtin_clz(value) : 32; } +constexpr uint32_t count_leading_zeros(uint64_t value) { + return value ? __builtin_clzll(value) : 64; +} + constexpr uint32_t count_trailing_zeros(uint32_t value) { return value ? __builtin_ctz(value) : 32; } +constexpr uint32_t count_trailing_zeros(uint64_t value) { + return value ? __builtin_ctzll(value) : 64; +} + constexpr bool ispow2(uint32_t value) { return value && !(value & (value - 1)); } +constexpr bool ispow2(uint64_t value) { + return value && !(value & (value - 1)); +} + constexpr uint32_t log2ceil(uint32_t value) { return 32 - count_leading_zeros(value - 1); } +constexpr uint32_t log2ceil(uint64_t value) { + return 64 - count_leading_zeros(value - 1); +} + inline unsigned log2up(uint32_t value) { return std::max(1, log2ceil(value)); } +inline unsigned log2up(uint64_t value) { + return std::max(1, log2ceil(value)); +} + constexpr unsigned log2floor(uint32_t value) { return 31 - count_leading_zeros(value); } +constexpr unsigned log2floor(uint64_t value) { + return 63 - count_leading_zeros(value); +} + constexpr unsigned ceil2(uint32_t value) { return 32 - count_leading_zeros(value); } +constexpr unsigned ceil2(uint64_t value) { + return 64 - count_leading_zeros(value); +} + inline uint64_t bit_clr(uint64_t bits, uint32_t index) { assert(index <= 63); return bits & ~(1ull << index); @@ -86,7 +114,7 @@ template T sext(const T& word, uint32_t width) { assert(width > 1); assert(width <= (sizeof(T) * 8)); - if (width == (sizeof(T) * 8)) + if (width == (sizeof(T) * 8)) return word; T mask((static_cast(1) << width) - 1); return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : (word & mask); @@ -96,7 +124,7 @@ template T zext(const T& word, uint32_t width) { assert(width > 1); assert(width <= (sizeof(T) * 8)); - if (width == (sizeof(T) * 8)) + if (width == (sizeof(T) * 8)) return word; T mask((static_cast(1) << width) - 1); return word & mask; diff --git a/sim/common/mem_alloc.h b/sim/common/mem_alloc.h index 480c198a6d..9ea6660d98 100644 --- a/sim/common/mem_alloc.h +++ b/sim/common/mem_alloc.h @@ -71,13 +71,14 @@ class MemoryAllocator { // Check if the reservation is within memory capacity bounds if (addr + size > capacity_) { - printf("error: address range out of bounds\n"); + printf("error: address range out of bounds - requested=0x%lx, capacity=0x%lx\n", (addr + size), capacity_); return -1; } // Ensure the reservation does not overlap with existing pages - if (hasPageOverlap(addr, size)) { - printf("error: address range overlaps with existing allocation\n"); + uint64_t overlapStart, overlapEnd; + if (hasPageOverlap(addr, size, &overlapStart, &overlapEnd)) { + printf("error: address range overlaps with existing allocation - requested=[0x%lx-0x%lx], existing=[0x%lx, 0x%lx]\n", addr, addr+size, overlapStart, overlapEnd); return -1; } @@ -509,15 +510,15 @@ class MemoryAllocator { return false; } - bool hasPageOverlap(uint64_t start, uint64_t size) { + bool hasPageOverlap(uint64_t start, uint64_t size, uint64_t* overlapStart, uint64_t* overlapEnd) { page_t* current = pages_; while (current != nullptr) { uint64_t pageStart = current->addr; uint64_t pageEnd = pageStart + current->size; - uint64_t requestEnd = start + size; - if ((start >= pageStart && start < pageEnd) || // Start of request is inside the page - (requestEnd > pageStart && requestEnd <= pageEnd) || // End of request is inside the page - (start <= pageStart && requestEnd >= pageEnd)) { // Request envelops the page + uint64_t end = start + size; + if ((start <= pageEnd) && (end >= pageStart)) { + *overlapStart = pageStart; + *overlapEnd = pageEnd; return true; } current = current->next; diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index ffbfece13c..ce8602c186 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -32,18 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU # AFU parameters -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + endif endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 430e4478bb..2a06595dfa 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,7 +35,7 @@ #include #include -#define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) +#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 @@ -145,6 +145,9 @@ class opae_sim::Impl { // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); + // calculate memory bank size + mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) * PLATFORM_MEMORY_DATA_SIZE; + // reset the device this->reset(); @@ -406,14 +409,14 @@ class opae_sim::Impl { } void avs_bus_reset() { - for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { device_->avs_readdatavalid[b] = 0; device_->avs_waitrequest[b] = 0; } } void avs_bus_eval() { - for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { // process memory responses device_->avs_readdatavalid[b] = 0; if (!pending_mem_reqs_[b].empty() @@ -421,7 +424,7 @@ class opae_sim::Impl { auto mem_rd_it = pending_mem_reqs_[b].begin(); auto mem_req = *mem_rd_it; device_->avs_readdatavalid[b] = 1; - memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_MEMORY_DATA_SIZE); uint32_t addr = mem_req->addr; pending_mem_reqs_[b].erase(mem_rd_it); delete mem_req; @@ -429,19 +432,20 @@ class opae_sim::Impl { // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; + uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE; if (device_->avs_write[b]) { + // process write request uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); - for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[byte_addr + i] = data[i]; } } - /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr); - for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { - printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]); + /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); } printf("\n");*/ @@ -455,22 +459,20 @@ class opae_sim::Impl { dram_queue_.push(mem_req); } else if (device_->avs_read[b]) { + // process read request auto mem_req = new mem_req_t(); mem_req->addr = device_->avs_address[b]; mem_req->bank_id = b; - ram_->read(mem_req->data.data(), byte_addr, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + ram_->read(mem_req->data.data(), byte_addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); - for (auto& req : pending_mem_reqs_[b]) { - if (req.cycles_left != 0) - printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); - else - printf(" %0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, byte_addr); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", mem_req->data[i]); } - printf("}\n");*/ + printf("\n");*/ // send dram request dram_queue_.push(mem_req); @@ -481,7 +483,7 @@ class opae_sim::Impl { } typedef struct { - std::array data; + std::array data; uint32_t addr; uint32_t bank_id; bool write; @@ -514,9 +516,10 @@ class opae_sim::Impl { bool stop_; std::unordered_map host_buffers_; - int64_t host_buffer_ids_; + uint64_t host_buffer_ids_; + uint64_t mem_bank_size_; - std::list pending_mem_reqs_[PLATFORM_PARAM_LOCAL_MEMORY_BANKS]; + std::list pending_mem_reqs_[PLATFORM_MEMORY_BANKS]; std::list cci_reads_; std::list cci_writes_; diff --git a/sim/opaesim/vortex_afu_shim.sv b/sim/opaesim/vortex_afu_shim.sv index 2a0d63e42e..e494ada8e2 100644 --- a/sim/opaesim/vortex_afu_shim.sv +++ b/sim/opaesim/vortex_afu_shim.sv @@ -78,22 +78,22 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( output t_ccip_mmioData af2cp_sTxPort_c2_data, // Avalon signals for local memory access - output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS] + output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS], + input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS], + output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS], + input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS], + output logic avs_write [`PLATFORM_MEMORY_BANKS], + output logic avs_read [`PLATFORM_MEMORY_BANKS], + output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS], + output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS], + input avs_readdatavalid [`PLATFORM_MEMORY_BANKS] ); t_if_ccip_Rx cp2af_sRxPort; t_if_ccip_Tx af2cp_sTxPort; vortex_afu #( - .NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS) + .NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS) ) afu ( .clk(clk), .reset(reset), diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 4ac3f6edd9..4b95d55bd2 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -32,14 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU # AFU parameters -ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS))) - CONFIGS += -DM_AXI_MEM_NUM_BANKS=1 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + endif endif -ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 +endif +ifeq (,$(findstring PLATFORM_MEMORY_OFFSET,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_OFFSET=0 endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 648e25e7aa..04350055b4 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -11,22 +11,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -`include "VX_platform.vh" `include "vortex_afu.vh" module vortex_afu_shim #( - parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, + parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, + parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( // System signals input wire ap_clk, input wire ap_rst_n, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -50,35 +50,38 @@ module vortex_afu_shim #( output wire interrupt `IGNORE_WARNINGS_END ); - vortex_afu #( - .C_S_AXI_CTRL_ADDR_WIDTH(C_S_AXI_CTRL_ADDR_WIDTH), - .C_S_AXI_CTRL_DATA_WIDTH(C_S_AXI_CTRL_DATA_WIDTH), - .C_M_AXI_MEM_ID_WIDTH(C_M_AXI_MEM_ID_WIDTH), - .C_M_AXI_MEM_ADDR_WIDTH(C_M_AXI_MEM_ADDR_WIDTH), - .C_M_AXI_MEM_DATA_WIDTH(C_M_AXI_MEM_DATA_WIDTH) - ) afu ( - .ap_clk(ap_clk), - .ap_rst_n(ap_rst_n), - // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - .s_axi_ctrl_awvalid(s_axi_ctrl_awvalid), - .s_axi_ctrl_awready(s_axi_ctrl_awready), - .s_axi_ctrl_awaddr(s_axi_ctrl_awaddr), - .s_axi_ctrl_wvalid(s_axi_ctrl_wvalid), - .s_axi_ctrl_wready(s_axi_ctrl_wready), - .s_axi_ctrl_wdata(s_axi_ctrl_wdata), - .s_axi_ctrl_wstrb(s_axi_ctrl_wstrb), - .s_axi_ctrl_arvalid(s_axi_ctrl_arvalid), - .s_axi_ctrl_arready(s_axi_ctrl_arready), - .s_axi_ctrl_araddr(s_axi_ctrl_araddr), - .s_axi_ctrl_rvalid(s_axi_ctrl_rvalid), - .s_axi_ctrl_rready(s_axi_ctrl_rready), - .s_axi_ctrl_rdata(s_axi_ctrl_rdata), - .s_axi_ctrl_rresp(s_axi_ctrl_rresp), - .s_axi_ctrl_bvalid(s_axi_ctrl_bvalid), - .s_axi_ctrl_bready(s_axi_ctrl_bready), - .s_axi_ctrl_bresp(s_axi_ctrl_bresp), - .interrupt(interrupt) - ); + VX_afu_wrap #( + .C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), + .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), + .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), + .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), + .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) + ) afu_wrap ( + .clk (ap_clk), + .reset (~ap_rst_n), + + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), + + .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), + .s_axi_ctrl_awready (s_axi_ctrl_awready), + .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), + .s_axi_ctrl_wvalid (s_axi_ctrl_wvalid), + .s_axi_ctrl_wready (s_axi_ctrl_wready), + .s_axi_ctrl_wdata (s_axi_ctrl_wdata), + .s_axi_ctrl_wstrb (s_axi_ctrl_wstrb), + .s_axi_ctrl_arvalid (s_axi_ctrl_arvalid), + .s_axi_ctrl_arready (s_axi_ctrl_arready), + .s_axi_ctrl_araddr (s_axi_ctrl_araddr), + .s_axi_ctrl_rvalid (s_axi_ctrl_rvalid), + .s_axi_ctrl_rready (s_axi_ctrl_rready), + .s_axi_ctrl_rdata (s_axi_ctrl_rdata), + .s_axi_ctrl_rresp (s_axi_ctrl_rresp), + .s_axi_ctrl_bvalid (s_axi_ctrl_bvalid), + .s_axi_ctrl_bready (s_axi_ctrl_bready), + .s_axi_ctrl_bresp (s_axi_ctrl_bresp), + + .interrupt (interrupt) + ); endmodule diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index a2725f32db..1a63cdfdcf 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -37,7 +37,7 @@ #include -#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8) +#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 @@ -59,10 +59,24 @@ #define RAM_PAGE_SIZE 4096 -#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH) - #define CPU_GPU_LATENCY 200 +#if PLATFORM_MEMORY_ADDR_WIDTH > 32 + typedef QData Vl_m_addr_t; +#else + typedef IData Vl_m_addr_t; +#endif + +#if PLATFORM_MEMORY_DATA_WIDTH > 64 + typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t; +#else +#if PLATFORM_MEMORY_DATA_WIDTH > 32 + typedef QData Vl_m_data_t; +#else + typedef IData Vl_m_data_t; +#endif +#endif + using namespace vortex; static uint64_t timestamp = 0; @@ -134,7 +148,7 @@ class xrt_sim::Impl { if (future_.valid()) { future_.wait(); } - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { delete mem_alloc_[i]; } if (ram_) { @@ -169,15 +183,18 @@ class xrt_sim::Impl { tfp_->open("trace.vcd"); #endif + // calculate memory bank size + mem_bank_size_ = ((1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_BANKS) * PLATFORM_MEMORY_DATA_SIZE; + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); // initialize AXI memory interfaces - MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS); + MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS); // initialize memory allocator - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { - mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64); + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64); } // reset the device @@ -198,13 +215,13 @@ class xrt_sim::Impl { } int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; return mem_alloc_[bank_id]->allocate(size, addr); } int mem_free(uint32_t bank_id, uint64_t addr) { - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; return mem_alloc_[bank_id]->release(addr); } @@ -212,11 +229,11 @@ class xrt_sim::Impl { int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { std::lock_guard guard(mutex_); - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; - uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + uint64_t base_addr = bank_id * mem_bank_size_ + addr; ram_->write(data, base_addr, size); - /*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-write: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size); for (int i = size-1; i >= 0; --i) { printf("%02x", ((const uint8_t*)data)[i]); } @@ -227,11 +244,11 @@ class xrt_sim::Impl { int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { std::lock_guard guard(mutex_); - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; - uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + uint64_t base_addr = bank_id * mem_bank_size_ + addr; ram_->read(data, base_addr, size); - /*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-read: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size); for (int i = size-1; i >= 0; --i) { printf("%02x", ((uint8_t*)data)[i]); } @@ -307,7 +324,7 @@ class xrt_sim::Impl { reqs.clear(); } - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { std::queue empty; std::swap(dram_queues_[i], empty); } @@ -334,7 +351,7 @@ class xrt_sim::Impl { void tick() { this->axi_mem_bus_eval(); - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { if (!dram_queues_[i].empty()) { auto mem_req = dram_queues_[i].front(); if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) { @@ -394,7 +411,7 @@ class xrt_sim::Impl { } void axi_mem_bus_reset() { - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // address read request *m_axi_mem_[i].arready = 1; @@ -418,7 +435,7 @@ class xrt_sim::Impl { } void axi_mem_bus_eval() { - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // handle read responses if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) { *m_axi_mem_[i].rvalid = 0; @@ -434,7 +451,7 @@ class xrt_sim::Impl { *m_axi_mem_[i].rid = mem_rsp->tag; *m_axi_mem_[i].rresp = 0; *m_axi_mem_[i].rlast = 1; - memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE); + memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); pending_mem_reqs_[i].erase(mem_rsp_it); m_axi_states_[i].read_rsp_pending = true; delete mem_rsp; @@ -465,14 +482,14 @@ class xrt_sim::Impl { if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { auto mem_req = new mem_req_t(); mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; - ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE); + mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE; + ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[i].emplace_back(mem_req); /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); - for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { printf("%02x", mem_req->data[i]); } printf("\n");*/ @@ -494,9 +511,9 @@ class xrt_sim::Impl { auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; + auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE; - for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) { + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[byte_addr + i] = data[i]; } @@ -510,7 +527,7 @@ class xrt_sim::Impl { pending_mem_reqs_[i].emplace_back(mem_req); /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); - for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { printf("%02x", data[i]); } printf("\n");*/ @@ -535,7 +552,7 @@ class xrt_sim::Impl { } m_axi_state_t; typedef struct { - std::array data; + std::array data; uint32_t tag; uint64_t addr; bool write; @@ -545,22 +562,22 @@ class xrt_sim::Impl { typedef struct { CData* awvalid; CData* awready; - QData* awaddr; + Vl_m_addr_t* awaddr; IData* awid; CData* awlen; CData* wvalid; CData* wready; - VlWide<16>* wdata; + Vl_m_data_t* wdata; QData* wstrb; CData* wlast; CData* arvalid; CData* arready; - QData* araddr; + Vl_m_addr_t* araddr; IData* arid; CData* arlen; CData* rvalid; CData* rready; - VlWide<16>* rdata; + Vl_m_data_t* rdata; CData* rlast; IData* rid; CData* rresp; @@ -573,21 +590,22 @@ class xrt_sim::Impl { Vvortex_afu_shim* device_; RAM* ram_; DramSim dram_sim_; + uint64_t mem_bank_size_; std::future future_; bool stop_; std::mutex mutex_; - std::list pending_mem_reqs_[M_AXI_MEM_NUM_BANKS]; + std::list pending_mem_reqs_[PLATFORM_MEMORY_BANKS]; - m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS]; + m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS]; - MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS]; + MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS]; - m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS]; + m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS]; - std::queue dram_queues_[M_AXI_MEM_NUM_BANKS]; + std::queue dram_queues_[PLATFORM_MEMORY_BANKS]; #ifdef VCD_OUTPUT VerilatedVcdC* tfp_; From 54f0c8e270c19ae4b554f365fff64a47c395d301 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:31:14 -0700 Subject: [PATCH 206/407] scope analyzer optimization --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 2 +- hw/rtl/libs/VX_fifo_queue.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- hw/rtl/libs/VX_scope_tap.sv | 45 +++++++++++++++++++++++++++------ 5 files changed, 41 insertions(+), 12 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 4060a30110..7a9ef4526b 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -189,7 +189,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ scope_bus_in <= 1; end if (cmd_scope_writing) begin - scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); + scope_bus_in <= cmd_scope_wdata[scope_bus_ctr]; scope_bus_ctr <= scope_bus_ctr - 6'd1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index e30219270a..4c8cc95a04 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -233,7 +233,7 @@ module VX_afu_ctrl #( end end if (cmd_scope_writing) begin - scope_bus_out_r <= 1'(scope_bus_wdata >> scope_bus_ctr); + scope_bus_out_r <= scope_bus_wdata[scope_bus_ctr]; scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 7eb760e6bb..c5a4bf32e3 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -40,7 +40,7 @@ module VX_fifo_queue #( `STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!")) `STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!")) `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) - `STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!")) + `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) VX_pending_size #( .SIZE (DEPTH), diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 229ff6cf20..913656bf8f 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -311,7 +311,7 @@ module VX_mem_scheduler #( assign mem_req_addr_b[i][j] = reqq_addr_s[r]; assign mem_req_flags_b[i][j] = reqq_flags_s[r]; assign mem_req_data_b[i][j] = reqq_data_s[r]; - end else begin : g_extra + end else begin : g_padding assign mem_req_mask_b[i][j] = 0; assign mem_req_byteen_b[i][j] = '0; assign mem_req_addr_b[i][j] = '0; diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index f77a4e7447..c3d111c059 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -17,9 +17,9 @@ module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width - parameter TRIGGERW = 16, // trigger signals width - parameter PROBEW = 256, // probe signal width - parameter DEPTH = 1024, // trace buffer depth + parameter TRIGGERW = 32, // trigger signals width + parameter PROBEW = 4999, // probe signal width + parameter DEPTH = 8192, // trace buffer depth parameter IDLE_CTRW = 32, // idle time between triggers counter width parameter TX_DATAW = 64 // transfer data width ) ( @@ -38,6 +38,7 @@ module VX_scope_tap #( localparam DATA_BITS = `LOG2UP(DATAW); localparam ADDRW = `CLOG2(DEPTH); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; + localparam TX_DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; @@ -65,6 +66,7 @@ module VX_scope_tap #( localparam GET_TYPE_BITS = 2; `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) + `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; @@ -94,6 +96,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), + .OUT_REG (1), + .READ_ENABLE (0), .NO_RWCHECK (1) ) delta_store ( .clk (clk), @@ -115,6 +119,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), + .OUT_REG (1), + .READ_ENABLE (0), .NO_RWCHECK (1) ) data_store ( .clk (clk), @@ -214,14 +220,12 @@ module VX_scope_tap #( reg [TX_DATA_BITS-1:0] ser_tx_ctr; reg [DATA_BITS-1:0] read_offset; reg is_read_data; + reg [1:0] read_en; wire [CMD_TYPE_BITS-1:0] cmd_type = ser_buf_in[CMD_TYPE_BITS-1:0]; wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; - wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_value >> read_offset)); - wire [TX_DATAW-1:0] get_data = is_read_data ? data_chunk : TX_DATAW'(delta_value); - wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); always @(posedge clk) begin @@ -235,9 +239,11 @@ module VX_scope_tap #( raddr <= '0; is_read_data<= 0; ser_tx_ctr <= '0; + read_en <= '0; end else begin bus_out_r <= 0; cmd_start <= 0; + read_en <= '0; case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin @@ -305,7 +311,7 @@ module VX_scope_tap #( `endif end GET_TYPE_DATA: begin - bus_out_r <= 1'(get_data >> ser_tx_ctr); + read_en <= {is_read_data, 1'b1}; if (ser_tx_ctr == 0) begin if (is_read_data) begin if (DATAW > TX_DATAW) begin @@ -349,7 +355,30 @@ module VX_scope_tap #( end end - assign bus_out = bus_out_r; + wire [TX_DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; + for (genvar i = 0; i < TX_DATA_BLOCKS; ++i) begin : g_data_blocks + for (genvar j = 0; j < TX_DATAW; ++j) begin : g_j + localparam k = i * TX_DATAW + j; + if (k < DATAW) begin : g_valid + assign data_blocks[i][j] = data_value[k]; + end else begin : g_padding + assign data_blocks[i][j] = '0; + end + end + end + + wire [TX_DATAW-1:0] get_data = read_en[1] ? data_blocks[read_offset] : TX_DATAW'(delta_value); + wire bus_out_w = read_en[0] ? get_data[ser_tx_ctr] : bus_out_r; + + VX_pipe_register #( + .DATAW (1) + ) buf_out ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in (bus_out_w), + .data_out (bus_out) + ); endmodule `TRACING_ON From 5e123d0507da426e7852e9555a622d1ffe98268a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:31:54 -0700 Subject: [PATCH 207/407] minor update --- hw/rtl/libs/VX_stream_switch.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index 01217b6684..e3848e4c3a 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -46,7 +46,7 @@ module VX_stream_switch #( if (ii < NUM_INPUTS) begin : g_valid assign valid_in_w[i][j] = valid_in[ii]; assign data_in_w[i][j] = data_in[ii]; - end else begin : g_extra + end else begin : g_padding assign valid_in_w[i][j] = 0; assign data_in_w[i][j] = '0; end @@ -121,7 +121,7 @@ module VX_stream_switch #( .valid_out (valid_out[ii]), .ready_out (ready_out[ii]) ); - end else begin : g_extra + end else begin : g_padding `UNUSED_VAR (valid_out_w[i][j]) assign ready_out_w[i][j] = '0; end From f5eca75311b655a53a8c5e30d25a359e2e52b5f1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:43:48 -0700 Subject: [PATCH 208/407] handling synthesis builds with simulation enabled (e.g xrt with hw_emu) --- hw/rtl/VX_platform.vh | 124 ++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 70 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 7f6805c509..3e9042737d 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -22,61 +22,31 @@ /////////////////////////////////////////////////////////////////////////////// -`ifdef VIVADO -`define STRING -`else -`define STRING string -`endif - -`ifdef SYNTHESIS - -`define TRACING_ON -`define TRACING_OFF - -`ifndef NDEBUG - `define DEBUG_BLOCK(x) x - `define TRACE(level, args) \ - if (level <= `DEBUG_LEVEL) begin \ - $write args; \ - end -`else - `define DEBUG_BLOCK(x) - `define TRACE(level, args) -`endif - -`define IGNORE_UNOPTFLAT_BEGIN -`define IGNORE_UNOPTFLAT_END -`define IGNORE_UNUSED_BEGIN -`define IGNORE_UNUSED_END -`define IGNORE_WARNINGS_BEGIN -`define IGNORE_WARNINGS_END -`define UNUSED_PARAM(x) -`define UNUSED_SPARAM(x) -`define UNUSED_VAR(x) -`define UNUSED_PIN(x) . x () -`define UNUSED_ARG(x) x - -`define __SCOPE (* mark_debug="true" *) +`ifdef SIMULATION -`define __SCOPE_X +`define STATIC_ASSERT(cond, msg) \ +generate \ + /* verilator lint_off GENUNNAMED */ \ + if (!(cond)) $error msg; \ + /* verilator lint_on GENUNNAMED */ \ +endgenerate -`define __SCOPE_ON \ - `undef __SCOPE_X \ - `define __SCOPE_X `__SCOPE +`define ERROR(msg) \ + $error msg -`define __SCOPE_OFF \ - `undef __SCOPE_X \ - `define __SCOPE_X +`define ASSERT(cond, msg) \ + assert(cond) else $error msg -`else // not SYNTHESIS +`define RUNTIME_ASSERT(cond, msg) \ + always @(posedge clk) begin \ + assert(cond) else $error msg; \ + end `define __SCOPE `define __SCOPE_X `define __SCOPE_ON `define __SCOPE_OFF -`ifdef VERILATOR - `ifndef TRACING_ALL `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ @@ -148,7 +118,6 @@ `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ -`endif // not VERILATOR `ifdef SV_DPI `define TRACE(level, args) dpi_trace(level, $sformatf args); @@ -159,31 +128,43 @@ end `endif -`endif +`else // SYNTHESIS + +`define STATIC_ASSERT(cond, msg) +`define ERROR(msg) // +`define ASSERT(cond, msg) // +`define RUNTIME_ASSERT(cond, msg) + +`define DEBUG_BLOCK(x) +`define TRACE(level, args) + +`define TRACING_ON +`define TRACING_OFF + +`define IGNORE_UNOPTFLAT_BEGIN +`define IGNORE_UNOPTFLAT_END +`define IGNORE_UNUSED_BEGIN +`define IGNORE_UNUSED_END +`define IGNORE_WARNINGS_BEGIN +`define IGNORE_WARNINGS_END +`define UNUSED_PARAM(x) +`define UNUSED_SPARAM(x) +`define UNUSED_VAR(x) +`define UNUSED_PIN(x) . x () +`define UNUSED_ARG(x) x + +`define __SCOPE (* mark_debug="true" *) + +`define __SCOPE_X + +`define __SCOPE_ON \ + `undef __SCOPE_X \ + `define __SCOPE_X `__SCOPE + +`define __SCOPE_OFF \ + `undef __SCOPE_X \ + `define __SCOPE_X -`ifdef SIMULATION - `define STATIC_ASSERT(cond, msg) \ - generate \ - /* verilator lint_off GENUNNAMED */ \ - if (!(cond)) $error msg; \ - /* verilator lint_on GENUNNAMED */ \ - endgenerate - - `define ERROR(msg) \ - $error msg - - `define ASSERT(cond, msg) \ - assert(cond) else $error msg - - `define RUNTIME_ASSERT(cond, msg) \ - always @(posedge clk) begin \ - assert(cond) else $error msg; \ - end -`else // not SIMULATION - `define STATIC_ASSERT(cond, msg) - `define ERROR(msg) // - `define ASSERT(cond, msg) // - `define RUNTIME_ASSERT(cond, msg) `endif /////////////////////////////////////////////////////////////////////////////// @@ -195,6 +176,7 @@ `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) +`define STRING string `elsif VIVADO `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) @@ -202,6 +184,7 @@ `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) `define DISABLE_BRAM (* ram_style = "registers" *) `define PRESERVE_NET (* keep = "true" *) +`define STRING `else `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) x.DATA_WIDTH @@ -209,6 +192,7 @@ `define NO_RW_RAM_CHECK `define DISABLE_BRAM `define PRESERVE_NET +`define STRING string `endif /////////////////////////////////////////////////////////////////////////////// From 15ead4acf6887aca5e2a2b02e8bcc1cc1d799fc1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:46:10 -0700 Subject: [PATCH 209/407] xrt with merge memory interface --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 9 ++++++++- hw/rtl/afu/xrt/vortex_afu.v | 17 ++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index ca6fed1ae4..8530ee97aa 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -26,8 +26,11 @@ module VX_afu_wrap #( input wire reset, // AXI4 master interface +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA), +`else `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), - +`endif // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, output wire s_axi_ctrl_awready, @@ -80,7 +83,11 @@ module VX_afu_wrap #( wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; // convert memory interface to array +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); +`else `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); +`endif reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 985d029cf5..94aced3eca 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -17,16 +17,25 @@ module vortex_afu #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, +`ifdef SYNTHESIS + parameter C_M_AXI_MEM_ADDR_WIDTH = 64, + parameter C_M_AXI_MEM_NUM_BANKS = 1 +`else + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog(`PLATFORM_MEMORY_DATA_WIDTH/8), parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS +`endif ) ( // System signals input wire ap_clk, input wire ap_rst_n, // AXI4 master interface +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA), +`else `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), +`endif // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -60,9 +69,11 @@ module vortex_afu #( ) afu_wrap ( .clk (ap_clk), .reset (~ap_rst_n), - + `ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA), + `else `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - + `endif .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awready (s_axi_ctrl_awready), .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), From b146fab2909add2a29206f19bfce0d1ab0b8fc19 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:46:55 -0700 Subject: [PATCH 210/407] xrt kernel registers update --- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 19 +++++++++---------- runtime/xrt/vortex.cpp | 7 ++++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 4c8cc95a04..a544983d62 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -107,22 +107,21 @@ module VX_afu_ctrl #( ADDR_DEV_0 = 8'h10, ADDR_DEV_1 = 8'h14, - //ADDR_DEV_CTRL = 8'h18, - ADDR_ISA_0 = 8'h1C, - ADDR_ISA_1 = 8'h20, - //ADDR_ISA_CTRL = 8'h24, + ADDR_ISA_0 = 8'h18, + ADDR_ISA_1 = 8'h1C, - ADDR_DCR_0 = 8'h28, - ADDR_DCR_1 = 8'h2C, - //ADDR_DCR_CTRL = 8'h30, + ADDR_DCR_0 = 8'h20, + ADDR_DCR_1 = 8'h24, `ifdef SCOPE - ADDR_SCP_0 = 8'h34, - ADDR_SCP_1 = 8'h38, - //ADDR_SCP_CTRL = 8'h3C, + ADDR_SCP_0 = 8'h28, + ADDR_SCP_1 = 8'h2C, `endif + ADDR_MEM_0 = 8'h30, + ADDR_MEM_1 = 8'h34, + ADDR_BITS = 8; localparam diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 3acb9b3c67..9385457f5a 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -46,9 +46,10 @@ using namespace vortex; #define MMIO_CTL_ADDR 0x00 #define MMIO_DEV_ADDR 0x10 -#define MMIO_ISA_ADDR 0x1C -#define MMIO_DCR_ADDR 0x28 -#define MMIO_SCP_ADDR 0x34 +#define MMIO_ISA_ADDR 0x18 +#define MMIO_DCR_ADDR 0x20 +#define MMIO_SCP_ADDR 0x28 +#define MMIO_MEM_ADDR 0x30 #define CTL_AP_START (1 << 0) #define CTL_AP_DONE (1 << 1) From 8bb5e5ab8af42136f1372d8e4f1ca3e7edfa6741 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:47:23 -0700 Subject: [PATCH 211/407] build error fix --- hw/rtl/libs/VX_pending_size.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 50737634f2..1e72cef192 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -35,7 +35,7 @@ module VX_pending_size #( `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) - if (SIZE == 1) begin : g_size1 + if (SIZE == 1) begin : g_size_eq1 reg size_r; @@ -59,7 +59,7 @@ module VX_pending_size #( assign alm_full = 1'b1; assign size = size_r; - end else begin : g_sizeN + end else begin : g_size_gt1 reg empty_r, alm_empty_r; reg full_r, alm_full_r; @@ -124,7 +124,7 @@ module VX_pending_size #( end end - if (SIZE > 2) begin : g_sizeN + if (SIZE > 2) begin : g_size_gt2 wire is_empty_n = (used_r == ADDRW'(1)); wire is_full_n = (used_r == ADDRW'(SIZE-1)); @@ -152,7 +152,7 @@ module VX_pending_size #( end end - end else begin : g_size2 + end else begin : g_size_eq2 always @(posedge clk) begin if (reset) begin From e38c2c1fbaa2198122251c76355d961e4dae5e1d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 02:12:47 -0700 Subject: [PATCH 212/407] xilinx xrt platforms configuration --- ci/regression.sh.in | 6 +- hw/rtl/afu/opae/local_mem_cfg_pkg.sv | 2 +- hw/rtl/afu/opae/vortex_afu.sv | 4 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 3 - hw/rtl/afu/xrt/vortex_afu.v | 2 +- hw/rtl/afu/xrt/vortex_afu.vh | 2 +- hw/syn/altera/dut/top/Makefile | 4 +- hw/syn/altera/opae/Makefile | 2 +- hw/syn/xilinx/xrt/Makefile | 21 ++--- hw/syn/xilinx/xrt/gen_xml.py | 75 +++++++++++++++++ hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- hw/syn/xilinx/xrt/package_kernel.tcl | 115 ++++----------------------- hw/syn/xilinx/xrt/platforms.mk | 51 ++++++++++++ sim/opaesim/Makefile | 4 +- sim/opaesim/opae_sim.cpp | 2 +- sim/xrtsim/Makefile | 4 +- sim/xrtsim/vortex_afu_shim.sv | 2 +- sim/xrtsim/xrt_sim.cpp | 2 +- 18 files changed, 167 insertions(+), 136 deletions(-) create mode 100644 hw/syn/xilinx/xrt/gen_xml.py create mode 100644 hw/syn/xilinx/xrt/platforms.mk diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 37f5d2b208..c45e8c3fff 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -275,9 +275,9 @@ config2() # test single-bank DRAM CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress - # test 27-bit DRAM address - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=xrt --app=mstress + # test 33-bit DRAM address + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress echo "configuration-2 tests done!" } diff --git a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv index 8b0ebaa0be..c63825548d 100644 --- a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv +++ b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv @@ -31,7 +31,7 @@ //`include "platform_afu_top_config.vh" `ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH -`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH `PLATFORM_MEMORY_ADDR_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8)) `endif `ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 7a9ef4526b..57b03cb210 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -96,12 +96,10 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam STATE_DCR_WRITE = 4; localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1); - localparam BANK_BYTE_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + `CLOG2(`PLATFORM_MEMORY_DATA_WIDTH/8); - wire [127:0] afu_id = `AFU_ACCEL_UUID; wire [63:0] dev_caps = {8'b0, - 5'(BANK_BYTE_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index a544983d62..1db8cc4e21 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -119,9 +119,6 @@ module VX_afu_ctrl #( ADDR_SCP_1 = 8'h2C, `endif - ADDR_MEM_0 = 8'h30, - ADDR_MEM_1 = 8'h34, - ADDR_BITS = 8; localparam diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 94aced3eca..918474d521 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -22,7 +22,7 @@ module vortex_afu #( parameter C_M_AXI_MEM_ADDR_WIDTH = 64, parameter C_M_AXI_MEM_NUM_BANKS = 1 `else - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS `endif ) ( diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index f35980c2ae..8018171e71 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -19,7 +19,7 @@ `endif `ifndef PLATFORM_MEMORY_ADDR_WIDTH -`define PLATFORM_MEMORY_ADDR_WIDTH 25 +`define PLATFORM_MEMORY_ADDR_WIDTH 31 `endif `ifndef PLATFORM_MEMORY_DATA_WIDTH diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index e4dfae274d..2a273e6981 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -12,9 +12,9 @@ ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) endif ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq ($(XLEN),64) - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 else - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 endif endif ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 19f9d0836d..61935f2e4e 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -98,7 +98,7 @@ ifdef PERF endif # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=26 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI +XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI all: swconfig ip-gen setup build diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index fa0a7873ba..a5a38e281b 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -76,6 +76,7 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include sources RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv +RTL_PKGS += $(RTL_DIR)/tex/VX_tex_pkg.sv $(RTL_DIR)/raster/VX_raster_pkg.sv $(RTL_DIR)/om/VX_om_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv @@ -91,18 +92,8 @@ RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache VPP_FLAGS += --vivado.synth.jobs $(JOBS) --vivado.impl.jobs $(JOBS) -ifeq ($(DEV_ARCH), zynquplus) -# ztnq -else ifeq ($(DEV_ARCH), versal) -# versal -else -# alveo -ifneq ($(findstring xilinx_u55c,$(XSA)),) - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] -else - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] -endif -endif +# load platform settings +include $(SRC_DIR)/platforms.mk VPP_FLAGS += --report_level 2 VPP_FLAGS += --config $(SRC_DIR)/vitis.ini @@ -173,8 +164,12 @@ scope-json: $(BUILD_DIR)/scope.json $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o scope.json +gen-xml: +$(BUILD_DIR)/kernel.xml: + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SRC_DIR)/gen_xml.py -n $(M_AXI_NUM_BANKS) -d $(M_AXI_DATA_WIDTH) -a $(M_AXI_ADDRESS_WIDTH) -o kernel.xml + gen-xo: $(XO_CONTAINER) -$(XO_CONTAINER): $(BUILD_DIR)/sources.txt +$(XO_CONTAINER): $(BUILD_DIR)/sources.txt $(BUILD_DIR)/kernel.xml mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) diff --git a/hw/syn/xilinx/xrt/gen_xml.py b/hw/syn/xilinx/xrt/gen_xml.py new file mode 100644 index 0000000000..4ba906b9a4 --- /dev/null +++ b/hw/syn/xilinx/xrt/gen_xml.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import xml.etree.ElementTree as ET +from xml.dom import minidom + +def prettify(elem): + """Return a pretty-printed XML string for the Element.""" + rough_string = ET.tostring(elem, 'utf-8') + reparsed = minidom.parseString(rough_string) + return reparsed.toprettyxml(indent=" ") + +def generate_xml(numbanks, datawidth, addresswidth, offset, output_file): + root = ET.Element("root", versionMajor="1", versionMinor="6") + kernel = ET.SubElement(root, "kernel", name="vortex_afu", language="ip_c", + vlnv="mycompany.com:kernel:vortex_afu:1.0", + attributes="", preferredWorkGroupSizeMultiple="0", + workGroupSize="1", interrupt="true") + + ports = ET.SubElement(kernel, "ports") + + # control ports + ET.SubElement(ports, "port", name="s_axi_ctrl", mode="slave", range="0x1000", dataWidth="32", portType="addressable", base="0x0") + + # memory ports + for i in range(numbanks): + port_name = f"m_axi_mem_{i}" + ET.SubElement(ports, "port", name=port_name, mode="master", range=f"0x{(1 << addresswidth) - 1:X}", dataWidth=str(datawidth), portType="addressable", base=f"0x0") + + args = ET.SubElement(kernel, "args") + + # control args + ET.SubElement(args, "arg", name="dev", addressQualifier="0", id="0", port="s_axi_ctrl", size="0x4", offset="0x010", type="uint", hostOffset="0x0", hostSize="0x4") + ET.SubElement(args, "arg", name="isa", addressQualifier="0", id="1", port="s_axi_ctrl", size="0x4", offset="0x018", type="uint", hostOffset="0x0", hostSize="0x4") + ET.SubElement(args, "arg", name="dcr", addressQualifier="0", id="2", port="s_axi_ctrl", size="0x4", offset="0x020", type="uint", hostOffset="0x0", hostSize="0x4") + ET.SubElement(args, "arg", name="scp", addressQualifier="0", id="3", port="s_axi_ctrl", size="0x4", offset="0x028", type="uint", hostOffset="0x0", hostSize="0x4") + + # memory args + for i in range(numbanks): + arg_name = f"mem_{i}" + ET.SubElement(args, "arg", name=arg_name, addressQualifier="1", id=str(4 + i), + port=f"m_axi_mem_{i}", size="0x8", offset=f"0x{offset + (i * 8):X}", + type="int*", hostOffset="0x0", hostSize="0x8") + + # Pretty-print and write the XML to file + with open(output_file, "w") as f: + f.write(prettify(root)) + +def main(): + parser = argparse.ArgumentParser(description="Kernel Configuration File Generator") + parser.add_argument("-n", "--numbanks", type=int, default=1, help="Number of AXI memory banks") + parser.add_argument("-d", "--datawidth", type=int, default=512, help="Data width of the AXI memory ports") + parser.add_argument("-a", "--addresswidth", type=int, default=28, help="Address width of the AXI memory ports") + parser.add_argument("-x", "--offset", type=lambda x: int(x, 0), default=0x30, help="Starting offset for kernel args (hex)") + parser.add_argument("-o", "--output", type=str, default="kernel.xml", help="Output XML file name") + args = parser.parse_args() + + # Call the generate function + generate_xml(args.numbanks, args.datawidth, args.addresswidth, args.offset, args.output) + +if __name__ == "__main__": + main() diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index c36c98e36a..9301a096ec 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -37,4 +37,4 @@ set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 source ${script_path}/package_kernel.tcl -package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" +package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" -kernel_xml ${build_dir}/kernel.xml diff --git a/hw/syn/xilinx/xrt/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl index aa7e96f3f9..ed09639dde 100644 --- a/hw/syn/xilinx/xrt/package_kernel.tcl +++ b/hw/syn/xilinx/xrt/package_kernel.tcl @@ -41,14 +41,27 @@ set vdefines_list [lindex $vlist 2] #puts ${vincludes_list} #puts ${vdefines_list} -# find if chipscope is enabled set chipscope 0 +set num_banks 1 +set merged_mem_if 0 + +# parse vdefines_list for configuration parameters foreach def $vdefines_list { set fields [split $def "="] set name [lindex $fields 0] if { $name == "CHIPSCOPE" } { set chipscope 1 } + if { $name == "PLATFORM_MEMORY_BANKS" } { + set num_banks [lindex $fields 1] + } + if { $name == "PLATFORM_MERGED_MEMORY_INTERFACE" } { + set merged_mem_if 1 + } +} + +if { $merged_mem_if == 1 } { + set num_banks 1 } create_project -force kernel_pack $path_to_tmp_project @@ -143,108 +156,10 @@ foreach up [ipx::get_user_parameters] { ipx::associate_bus_interfaces -busif s_axi_ctrl -clock ap_clk $core -for {set i 0} {$i < 1} {incr i} { +for {set i 0} {$i < $num_banks} {incr i} { ipx::associate_bus_interfaces -busif m_axi_mem_$i -clock ap_clk $core } -set mem_map [::ipx::add_memory_map -quiet "s_axi_ctrl" $core] -set addr_block [::ipx::add_address_block -quiet "reg0" $mem_map] - -set reg [::ipx::add_register "CTRL" $addr_block] - set_property description "Control signals" $reg - set_property address_offset 0x000 $reg - set_property size 32 $reg - -set field [ipx::add_field AP_START $reg] - set_property ACCESS {read-write} $field - set_property BIT_OFFSET {0} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_start'.} $field - set_property MODIFIED_WRITE_VALUE {modify} $field - -set field [ipx::add_field AP_DONE $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {1} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_done'.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field AP_IDLE $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {2} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_idle'.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field AP_READY $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {3} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_ready'.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field RESERVED_1 $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {4} $field - set_property BIT_WIDTH {3} $field - set_property DESCRIPTION {Reserved. 0s on read.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field AUTO_RESTART $reg] - set_property ACCESS {read-write} $field - set_property BIT_OFFSET {7} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'auto_restart'.} $field - set_property MODIFIED_WRITE_VALUE {modify} $field - -set field [ipx::add_field RESERVED_2 $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {8} $field - set_property BIT_WIDTH {24} $field - set_property DESCRIPTION {Reserved. 0s on read.} $field - set_property READ_ACTION {modify} $field - -set reg [::ipx::add_register "GIER" $addr_block] - set_property description "Global Interrupt Enable Register" $reg - set_property address_offset 0x004 $reg - set_property size 32 $reg - -set reg [::ipx::add_register "IP_IER" $addr_block] - set_property description "IP Interrupt Enable Register" $reg - set_property address_offset 0x008 $reg - set_property size 32 $reg - -set reg [::ipx::add_register "IP_ISR" $addr_block] - set_property description "IP Interrupt Status Register" $reg - set_property address_offset 0x00C $reg - set_property size 32 $reg - -set reg [::ipx::add_register -quiet "DEV" $addr_block] - set_property address_offset 0x010 $reg - set_property size [expr {8*8}] $reg - -set reg [::ipx::add_register -quiet "ISA" $addr_block] - set_property address_offset 0x01C $reg - set_property size [expr {8*8}] $reg - -set reg [::ipx::add_register -quiet "DCR" $addr_block] - set_property address_offset 0x028 $reg - set_property size [expr {8*8}] $reg - -set reg [::ipx::add_register -quiet "SCP" $addr_block] - set_property address_offset 0x034 $reg - set_property size [expr {8*8}] $reg - -for {set i 0} {$i < 1} {incr i} { - set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] - set_property address_offset [expr {0x040 + $i * 12}] $reg - set_property size [expr {8*8}] $reg - set regparam [::ipx::add_register_parameter -quiet {ASSOCIATED_BUSIF} $reg] - set_property value m_axi_mem_$i $regparam -} - -set_property slave_memory_map_ref "s_axi_ctrl" [::ipx::get_bus_interfaces -of $core "s_axi_ctrl"] - set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} $core set_property sdx_kernel true $core set_property sdx_kernel_type rtl $core diff --git a/hw/syn/xilinx/xrt/platforms.mk b/hw/syn/xilinx/xrt/platforms.mk new file mode 100644 index 0000000000..a3584942c7 --- /dev/null +++ b/hw/syn/xilinx/xrt/platforms.mk @@ -0,0 +1,51 @@ +# Platform specific configurations +# Add your platform specific configurations here + +M_AXI_NUM_BANKS := 1 +M_AXI_DATA_WIDTH := 512 +M_AXI_ADDRESS_WIDTH := 32 + +ifeq ($(DEV_ARCH), zynquplus) +# zynquplus +CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 +else ifeq ($(DEV_ARCH), versal) +# versal +CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 +ifneq ($(findstring xilinx_vck5000,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_OFFSET=40'hC000000000 +endif +else +# alveo +ifneq ($(findstring xilinx_u55c,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + #VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] + #CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE + VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)]) + M_AXI_NUM_BANKS := 32 + M_AXI_ADDRESS_WIDTH := 28 +else ifneq ($(findstring xilinx_u50,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] + M_AXI_NUM_BANKS := 16 + M_AXI_ADDRESS_WIDTH := 28 +else ifneq ($(findstring xilinx_u280,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] + M_AXI_NUM_BANKS := 16 + M_AXI_ADDRESS_WIDTH := 28 +else ifneq ($(findstring xilinx_u250,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 + M_AXI_NUM_BANKS := 4 + M_AXI_ADDRESS_WIDTH := 34 +else ifneq ($(findstring xilinx_u200,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 + M_AXI_NUM_BANKS := 4 + M_AXI_ADDRESS_WIDTH := 34 +else + CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 + M_AXI_NUM_BANKS := 1 + M_AXI_ADDRESS_WIDTH := 32 +endif +endif + +CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=$(M_AXI_DATA_WIDTH) \ No newline at end of file diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index ce8602c186..b04f8ddb47 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -37,9 +37,9 @@ ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) endif ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq ($(XLEN),64) - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 else - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 endif endif ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 2a06595dfa..0f0d67d9cc 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -146,7 +146,7 @@ class opae_sim::Impl { ram_ = new RAM(0, RAM_PAGE_SIZE); // calculate memory bank size - mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) * PLATFORM_MEMORY_DATA_SIZE; + mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH; // reset the device this->reset(); diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 4b95d55bd2..83efa688f4 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -37,9 +37,9 @@ ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) endif ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq ($(XLEN),64) - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 else - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 endif endif ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 04350055b4..9b3e2e8edf 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -17,7 +17,7 @@ module vortex_afu_shim #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 1a63cdfdcf..feb13dd1cd 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -184,7 +184,7 @@ class xrt_sim::Impl { #endif // calculate memory bank size - mem_bank_size_ = ((1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_BANKS) * PLATFORM_MEMORY_DATA_SIZE; + mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH; // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); From 923d2bb94c1a569fad413088d2da87bfba252830 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 02:30:34 -0700 Subject: [PATCH 213/407] mark as executable --- hw/syn/altera/power_play.sh | 0 hw/syn/xilinx/xrt/gen_xml.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 hw/syn/altera/power_play.sh mode change 100644 => 100755 hw/syn/xilinx/xrt/gen_xml.py diff --git a/hw/syn/altera/power_play.sh b/hw/syn/altera/power_play.sh old mode 100644 new mode 100755 diff --git a/hw/syn/xilinx/xrt/gen_xml.py b/hw/syn/xilinx/xrt/gen_xml.py old mode 100644 new mode 100755 From a80be895baae04383941da222bc7a85247efd76f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 03:05:46 -0700 Subject: [PATCH 214/407] fixed compiler errors --- runtime/opae/Makefile | 2 +- runtime/rtlsim/Makefile | 2 +- runtime/simx/Makefile | 2 +- runtime/stub/Makefile | 2 +- runtime/xrt/Makefile | 2 +- sim/common/bitmanip.h | 94 +++++++++++++++++++------------------- tests/opencl/common.mk | 2 +- tests/regression/common.mk | 2 +- tests/unittest/common.mk | 2 +- 9 files changed, 56 insertions(+), 54 deletions(-) diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index b002375d9c..04545c887f 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -9,7 +9,7 @@ SYN_DIR := $(HW_DIR)/syn/altera/opae SRC_DIR := $(VORTEX_HOME)/runtime/opae -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) diff --git a/runtime/rtlsim/Makefile b/runtime/rtlsim/Makefile index f6adbf8c8c..a7b15d9ac3 100644 --- a/runtime/rtlsim/Makefile +++ b/runtime/rtlsim/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/rtlsim -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/rtlsim -I$(COMMON_DIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) diff --git a/runtime/simx/Makefile b/runtime/simx/Makefile index c20e33b535..8eb0e6b441 100644 --- a/runtime/simx/Makefile +++ b/runtime/simx/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/simx -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMMON_DIR) -I$(SIM_DIR)/common CXXFLAGS += $(CONFIGS) diff --git a/runtime/stub/Makefile b/runtime/stub/Makefile index ae6e27ed10..8315bd8af2 100644 --- a/runtime/stub/Makefile +++ b/runtime/stub/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/stub -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/common CXXFLAGS += -fPIC diff --git a/runtime/xrt/Makefile b/runtime/xrt/Makefile index 7fadb43fd2..f255002f28 100644 --- a/runtime/xrt/Makefile +++ b/runtime/xrt/Makefile @@ -6,7 +6,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/xrt -CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += -fPIC diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index 3c58580433..89247b89ca 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -16,60 +16,62 @@ #include #include -constexpr uint32_t count_leading_zeros(uint32_t value) { - return value ? __builtin_clz(value) : 32; -} - -constexpr uint32_t count_leading_zeros(uint64_t value) { - return value ? __builtin_clzll(value) : 64; -} - -constexpr uint32_t count_trailing_zeros(uint32_t value) { - return value ? __builtin_ctz(value) : 32; -} - -constexpr uint32_t count_trailing_zeros(uint64_t value) { - return value ? __builtin_ctzll(value) : 64; -} - -constexpr bool ispow2(uint32_t value) { - return value && !(value & (value - 1)); -} - -constexpr bool ispow2(uint64_t value) { +template +constexpr uint32_t count_leading_zeros(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return value ? __builtin_clzll(value) : 64; + } else { + return value ? __builtin_clz(value) : 32; + } +} + +template +constexpr uint32_t count_trailing_zeros(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return value ? __builtin_ctzll(value) : 64; + } else { + return value ? __builtin_ctz(value) : 32; + } +} + +template +constexpr bool ispow2(T value) { + static_assert(std::is_integral::value, "invalid data type"); return value && !(value & (value - 1)); } -constexpr uint32_t log2ceil(uint32_t value) { - return 32 - count_leading_zeros(value - 1); +template +constexpr uint32_t log2ceil(T value) { + static_assert(std::is_integral::value, "invalid data type"); + return (sizeof(T) * 8) - count_leading_zeros(value - 1); } -constexpr uint32_t log2ceil(uint64_t value) { - return 64 - count_leading_zeros(value - 1); -} - -inline unsigned log2up(uint32_t value) { - return std::max(1, log2ceil(value)); -} - -inline unsigned log2up(uint64_t value) { +template +inline unsigned log2up(T value) { + static_assert(std::is_integral::value, "invalid data type"); return std::max(1, log2ceil(value)); } -constexpr unsigned log2floor(uint32_t value) { - return 31 - count_leading_zeros(value); -} - -constexpr unsigned log2floor(uint64_t value) { - return 63 - count_leading_zeros(value); -} - -constexpr unsigned ceil2(uint32_t value) { - return 32 - count_leading_zeros(value); -} - -constexpr unsigned ceil2(uint64_t value) { - return 64 - count_leading_zeros(value); +template +constexpr unsigned log2floor(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return 63 - count_leading_zeros(value); + } else { + return 31 - count_leading_zeros(value); + } +} + +template +constexpr unsigned ceil2(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return 64 - count_leading_zeros(value); + } else { + return 32 - count_leading_zeros(value); + } } inline uint64_t bit_clr(uint64_t bits, uint32_t index) { diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 53903dd41c..36d2956cbe 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -40,7 +40,7 @@ VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_HOME)/kernel/scripts/link$(X VX_BINTOOL += OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing CXXFLAGS += -pthread CXXFLAGS += -I$(POCL_PATH)/include diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 0f97d4979a..142d5cb2ee 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -50,7 +50,7 @@ VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(ROOT_DIR)/hw LDFLAGS += -L$(VORTEX_RT_PATH) -lvortex diff --git a/tests/unittest/common.mk b/tests/unittest/common.mk index 9c3e384be6..c04db4d11d 100644 --- a/tests/unittest/common.mk +++ b/tests/unittest/common.mk @@ -1,7 +1,7 @@ ROOT_DIR := $(realpath ../../..) -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(VORTEX_HOME)/sim/common # Debugging From 828b8827e796b731ce883c092734b45f752ca24e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 03:36:35 -0700 Subject: [PATCH 215/407] build error fix --- tests/opencl/bfs/CLHelper.h | 24 ++++++++++++------------ tests/opencl/bfs/main.cc | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/opencl/bfs/CLHelper.h b/tests/opencl/bfs/CLHelper.h index d485cbc365..e4106845bd 100755 --- a/tests/opencl/bfs/CLHelper.h +++ b/tests/opencl/bfs/CLHelper.h @@ -431,7 +431,7 @@ void _clRelease() { } //-------------------------------------------------------- //--cambine:create buffer and then copy data from host to device -cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) throw(string) { +cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, @@ -445,7 +445,7 @@ cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) throw(string) { //------------------------------------------------------- //--cambine: create read only buffer for devices //--date: 17/01/2011 -cl_mem _clMallocRW(int size, void *h_mem_ptr) throw(string) { +cl_mem _clMallocRW(int size, void *h_mem_ptr) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, @@ -459,7 +459,7 @@ cl_mem _clMallocRW(int size, void *h_mem_ptr) throw(string) { //------------------------------------------------------- //--cambine: create read and write buffer for devices //--date: 17/01/2011 -cl_mem _clMalloc(int size, void *h_mem_ptr) throw(string) { +cl_mem _clMalloc(int size, void *h_mem_ptr) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, @@ -474,7 +474,7 @@ cl_mem _clMalloc(int size, void *h_mem_ptr) throw(string) { //------------------------------------------------------- //--cambine: transfer data from host to device //--date: 17/01/2011 -void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string) { +void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) { oclHandles.cl_status = clEnqueueWriteBuffer( oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem_ptr, 0, NULL, NULL); #ifdef ERRMSG @@ -485,7 +485,7 @@ void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string) { //-------------------------------------------------------- //--cambine:create buffer and then copy data from host to device with pinned // memory -cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) throw(string) { +cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) { cl_mem d_mem, d_mem_pinned; float *h_mem_pinned = NULL; d_mem_pinned = clCreateBuffer(oclHandles.context, @@ -528,7 +528,7 @@ cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) throw(string) { //-------------------------------------------------------- //--cambine:create write only buffer on device -cl_mem _clMallocWO(int size) throw(string) { +cl_mem _clMallocWO(int size) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_WRITE_ONLY, size, 0, &oclHandles.cl_status); @@ -541,7 +541,7 @@ cl_mem _clMallocWO(int size) throw(string) { //-------------------------------------------------------- // transfer data from device to host -void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) throw(string) { +void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) { oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem, 0, 0, 0); #ifdef ERRMSG @@ -580,7 +580,7 @@ void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) throw(string) { //-------------------------------------------------------- // set kernel arguments void _clSetArgs(int kernel_id, int arg_idx, void *d_mem, - int size = 0) throw(string) { + int size = 0) { if (!size) { oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem); @@ -657,7 +657,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void *d_mem, #endif } } -void _clFinish() throw(string) { +void _clFinish() { oclHandles.cl_status = clFinish(oclHandles.queue); #ifdef ERRMSG oclHandles.error_str = "excpetion in _clFinish"; @@ -683,7 +683,7 @@ void _clFinish() throw(string) { //-------------------------------------------------------- //--cambine:enqueue kernel void _clInvokeKernel(int kernel_id, int work_items, - int work_group_size) throw(string) { + int work_group_size) { cl_uint work_dim = WORK_DIM; //cl_event e[1]; if (work_items % work_group_size != 0) // process situations that work_items @@ -755,7 +755,7 @@ void _clInvokeKernel(int kernel_id, int work_items, // #endif } void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, - int group_y) throw(string) { + int group_y) { cl_uint work_dim = WORK_DIM; size_t local_work_size[] = {group_x, group_y}; size_t global_work_size[] = {range_x, range_y}; @@ -832,7 +832,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, //-------------------------------------------------------- // release OpenCL objects -void _clFree(cl_mem ob) throw(string) { +void _clFree(cl_mem ob) { if (ob != NULL) oclHandles.cl_status = clReleaseMemObject(ob); #ifdef ERRMSG diff --git a/tests/opencl/bfs/main.cc b/tests/opencl/bfs/main.cc index 5379506030..cd55f5b3fb 100755 --- a/tests/opencl/bfs/main.cc +++ b/tests/opencl/bfs/main.cc @@ -72,7 +72,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, char *h_graph_visited, - int *h_cost) throw(std::string) { + int *h_cost) { // int number_elements = height*width; char h_over; From 29ea3041c48c332ba6f2aa364f111a95223d8de5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 03:52:03 -0700 Subject: [PATCH 216/407] build fix --- hw/rtl/cache/VX_cache_bank.sv | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 59b4be8713..054b7c5896 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -273,18 +273,20 @@ module VX_cache_bank #( assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin : g_data_sel_lo - assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); - end else begin : g_data_sel_lo_ro - assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; + if (WRITE_ENABLE) begin : g_data_sel + for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i + if (i < `CS_WORD_WIDTH) begin : g_lo + assign data_sel[i] = replay_valid ? replay_data[i] : (mem_rsp_valid ? mem_rsp_data[i] : core_req_data[i]); + end else begin : g_hi + assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel + end + end + end else begin : g_data_sel_ro + assign data_sel = mem_rsp_data; `UNUSED_VAR (core_req_data) `UNUSED_VAR (replay_data) end - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel_hi - assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel - end - if (UUID_WIDTH != 0) begin : g_req_uuid_sel assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin : g_req_uuid_sel_0 From 406583c0bdc091a4eb25f067a1b81340f49cbed1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 04:00:23 -0700 Subject: [PATCH 217/407] build fix --- ci/regression.sh.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index c45e8c3fff..6590da3d6e 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -273,7 +273,7 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 # test single-bank DRAM - CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=${XLEN}" ./ci/blackbox.sh --driver=opae --app=mstress # test 33-bit DRAM address CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress From e5e9a5c2e9fef9390c43f1d0d3a852e86b08956e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 04:03:04 -0700 Subject: [PATCH 218/407] build fix --- ci/regression.sh.in | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 6590da3d6e..cead14925b 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -273,7 +273,13 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 # test single-bank DRAM - CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=${XLEN}" ./ci/blackbox.sh --driver=opae --app=mstress + if [ "$XLEN" == "64" ]; then + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress + else + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress + fi # test 33-bit DRAM address CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress From 030071571d45ada0b68fb983f6e12584f64032e1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 04:30:28 -0700 Subject: [PATCH 219/407] test memory bank interleaving --- ci/regression.sh.in | 4 ++++ hw/rtl/afu/opae/vortex_afu.sv | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index cead14925b..a0506f1175 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -285,6 +285,10 @@ config2() CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress + # test DRAM banks interleaving + CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress + echo "configuration-2 tests done!" } diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 57b03cb210..435455ae04 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -18,6 +18,10 @@ `endif `include "VX_define.vh" +`ifndef PLATFORM_MEMORY_INTERLEAVE +`define PLATFORM_MEMORY_INTERLEAVE 1 +`endif + module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #( parameter NUM_LOCAL_MEM_BANKS = 2 ) ( @@ -604,7 +608,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .TAG_WIDTH (AVS_REQ_TAGW + 1), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), .AVS_ADDR_WIDTH($bits(t_local_mem_addr)), - .BANK_INTERLEAVE (1), + .BANK_INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE), .REQ_OUT_BUF (2), .RSP_OUT_BUF (0) ) avs_adapter ( From 818522f7e401a7251500036661b0209f7a6b7617 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 05:57:08 -0700 Subject: [PATCH 220/407] CI scripts update --- .github/workflows/ci.yml | 3 +-- ci/regression.sh.in | 6 ++++-- runtime/xrt/vortex.cpp | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 64317337bb..1676aea4cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,7 +117,7 @@ jobs: strategy: fail-fast: false matrix: - name: [regression, opencl, cache, config1, config2, debug, stress] + name: [regression, opencl, cache, config1, config2, debug, stress, synthesis] xlen: [32, 64] steps: @@ -161,7 +161,6 @@ jobs: ./ci/regression.sh --unittest ./ci/regression.sh --isa ./ci/regression.sh --kernel - ./ci/regression.sh --synthesis ./ci/regression.sh --regression else ./ci/regression.sh --${{ matrix.name }} diff --git a/ci/regression.sh.in b/ci/regression.sh.in index a0506f1175..ea9aa25609 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -313,8 +313,10 @@ debug() test_csv_trace - CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" - CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" + CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" + CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 9385457f5a..d542e72fe2 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -191,8 +191,6 @@ class vx_device { xrtDevice_ = xrtDevice; xrtKernel_ = xrtKernel; - printf("info: device name=%s.\n", device_name.c_str()); - CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), { return err; }); @@ -223,6 +221,8 @@ class vx_device { global_mem_size_ = num_banks * bank_size; + printf("info: device name=%s, memory_capacity=0x%lx bytes, memory_banks=%ld.\n", device_name.c_str(), global_mem_size_, num_banks); + #ifdef BANK_INTERLEAVE xrtBuffers_.reserve(num_banks); for (uint32_t i = 0; i < num_banks; ++i) { From 9a6dbdf1a97c77f4f95a6223e9f6014a561bda45 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 08:56:57 -0700 Subject: [PATCH 221/407] xrtsim addressing fix --- hw/rtl/Vortex_axi.sv | 16 ++++++++-------- hw/rtl/afu/opae/vortex_afu.sv | 6 +++--- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 5 ++--- hw/rtl/afu/xrt/VX_afu_wrap.sv | 20 ++++++++++++-------- hw/rtl/afu/xrt/vortex_afu.v | 3 +-- hw/rtl/libs/VX_avs_adapter.sv | 32 ++++++++++++++++++-------------- hw/rtl/libs/VX_axi_adapter.sv | 34 ++++++++++++++++++---------------- sim/xrtsim/vortex_afu_shim.sv | 4 ++-- sim/xrtsim/xrt_sim.cpp | 14 ++++---------- 9 files changed, 68 insertions(+), 66 deletions(-) diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 17d5d660e3..7582063969 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -84,7 +84,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( ); localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH); - + `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) wire mem_req_valid; @@ -182,13 +182,13 @@ module Vortex_axi import VX_gpu_pkg::*; #( ); VX_axi_adapter #( - .DATA_WIDTH (AXI_DATA_WIDTH), - .ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), - .TAG_WIDTH (AXI_TID_WIDTH), - .NUM_BANKS (AXI_NUM_BANKS), - .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), - .BANK_INTERLEAVE (0), - .RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0) + .DATA_WIDTH (AXI_DATA_WIDTH), + .ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH), + .ADDR_WIDTH_OUT (AXI_ADDR_WIDTH), + .TAG_WIDTH (AXI_TID_WIDTH), + .NUM_BANKS (AXI_NUM_BANKS), + .BANK_INTERLEAVE(0), + .RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0) ) axi_adapter ( .clk (clk), .reset (reset), diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 435455ae04..1440b28086 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -602,13 +602,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_avs_adapter #( .DATA_WIDTH (LMEM_DATA_WIDTH), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH_IN (LMEM_ADDR_WIDTH), + .ADDR_WIDTH_OUT($bits(t_local_mem_addr)), .BURST_WIDTH (LMEM_BURST_CTRW), .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .TAG_WIDTH (AVS_REQ_TAGW + 1), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), - .AVS_ADDR_WIDTH($bits(t_local_mem_addr)), - .BANK_INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE), + .BANK_INTERLEAVE(`PLATFORM_MEMORY_INTERLEAVE), .REQ_OUT_BUF (2), .RSP_OUT_BUF (0) ) avs_adapter ( diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 1db8cc4e21..382b31f8aa 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -15,8 +15,7 @@ module VX_afu_ctrl #( parameter S_AXI_ADDR_WIDTH = 8, - parameter S_AXI_DATA_WIDTH = 32, - parameter M_AXI_ADDR_WIDTH = 25 + parameter S_AXI_DATA_WIDTH = 32 ) ( // axi4 lite slave signals input wire clk, @@ -135,7 +134,7 @@ module VX_afu_ctrl #( // device caps wire [63:0] dev_caps = {8'b0, - 5'(M_AXI_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 8530ee97aa..235247177b 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -17,8 +17,8 @@ module VX_afu_wrap #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = 32, - parameter C_M_AXI_MEM_ADDR_WIDTH = 25, parameter C_M_AXI_MEM_DATA_WIDTH = 512, + parameter C_M_AXI_MEM_ADDR_WIDTH = 25, parameter C_M_AXI_MEM_NUM_BANKS = 2 ) ( // System signals @@ -52,6 +52,11 @@ module VX_afu_wrap #( output wire interrupt ); +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS); +`else + localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH; +`endif localparam STATE_IDLE = 0; localparam STATE_RUN = 1; @@ -187,8 +192,7 @@ module VX_afu_wrap #( VX_afu_ctrl #( .S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), - .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), - .M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH) + .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH) ) afu_ctrl ( .clk (clk), .reset (reset), @@ -228,19 +232,19 @@ module VX_afu_wrap #( .dcr_wr_data (dcr_wr_data) ); - wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing - assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); - assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); end `SCOPE_IO_SWITCH (2) Vortex_axi #( .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), - .AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), + .AXI_ADDR_WIDTH (M_AXI_MEM_ADDR_WIDTH), .AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) vortex_axi ( diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 918474d521..afda57f721 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -18,11 +18,10 @@ module vortex_afu #( parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, -`ifdef SYNTHESIS parameter C_M_AXI_MEM_ADDR_WIDTH = 64, +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE parameter C_M_AXI_MEM_NUM_BANKS = 1 `else - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS `endif ) ( diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index fe9a9a53b2..58144e7fe2 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -16,13 +16,13 @@ `TRACING_OFF module VX_avs_adapter #( parameter DATA_WIDTH = 1, - parameter ADDR_WIDTH = 1, + parameter ADDR_WIDTH_IN = 1, + parameter ADDR_WIDTH_OUT= 32, parameter BURST_WIDTH = 1, parameter NUM_BANKS = 1, parameter TAG_WIDTH = 1, parameter RD_QUEUE_SIZE = 1, parameter BANK_INTERLEAVE= 0, - parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS), parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 ) ( @@ -33,7 +33,7 @@ module VX_avs_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, @@ -47,7 +47,7 @@ module VX_avs_adapter #( // AVS bus output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS], input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS], - output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_BANKS], input wire avs_waitrequest [NUM_BANKS], output wire avs_write [NUM_BANKS], output wire avs_read [NUM_BANKS], @@ -58,30 +58,34 @@ module VX_avs_adapter #( localparam DATA_SIZE = DATA_WIDTH/8; localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); - localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; + localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // to input space + localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS; - `STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter")) + `STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN)) // Requests handling ////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; - wire [BANK_SEL_WIDTH-1:0] req_bank_sel; - wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; + wire [BANK_OFFSETW-1:0] req_bank_off; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + + wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr); + if (NUM_BANKS > 1) begin : g_bank_sel if (BANK_INTERLEAVE) begin : g_interleave - assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; - assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW]; end else begin : g_no_interleave - assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; - assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0]; end end else begin : g_no_bank_sel assign req_bank_sel = '0; - assign req_bank_off = mem_req_addr; + assign req_bank_off = mem_req_addr_out; end for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push @@ -151,7 +155,7 @@ module VX_avs_adapter #( assign avs_read[i] = valid_out && ~rw_out; assign avs_write[i] = valid_out && rw_out; - assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out); + assign avs_address[i] = ADDR_WIDTH_OUT'(addr_out); assign avs_byteenable[i] = byteen_out; assign avs_writedata[i] = data_out; assign avs_burstcount[i] = BURST_WIDTH'(1); diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index bdd699053d..a21b8554fb 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -16,10 +16,10 @@ `TRACING_OFF module VX_axi_adapter #( parameter DATA_WIDTH = 512, - parameter ADDR_WIDTH = 32, + parameter ADDR_WIDTH_IN = 1, + parameter ADDR_WIDTH_OUT = 32, parameter TAG_WIDTH = 8, parameter NUM_BANKS = 1, - parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), parameter BANK_INTERLEAVE= 0, parameter RSP_OUT_BUF = 0 ) ( @@ -30,7 +30,7 @@ module VX_axi_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, @@ -44,7 +44,7 @@ module VX_axi_adapter #( // AXI write request address channel output wire m_axi_awvalid [NUM_BANKS], input wire m_axi_awready [NUM_BANKS], - output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS], @@ -71,7 +71,7 @@ module VX_axi_adapter #( // AXI read address channel output wire m_axi_arvalid [NUM_BANKS], input wire m_axi_arready [NUM_BANKS], - output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS], @@ -93,25 +93,27 @@ module VX_axi_adapter #( localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); - localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; - localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8); + localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS - `CLOG2(DATA_WIDTH/8); // to input space + localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS; - `STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH)) + `STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN)) - wire [BANK_SEL_WIDTH-1:0] req_bank_sel; wire [BANK_OFFSETW-1:0] req_bank_off; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + + wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr); if (NUM_BANKS > 1) begin : g_bank_sel if (BANK_INTERLEAVE) begin : g_interleave - assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; - assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW]; end else begin : g_no_interleave - assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; - assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0]; end end else begin : g_no_bank_sel assign req_bank_sel = '0; - assign req_bank_off = mem_req_addr; + assign req_bank_off = mem_req_addr_out; end wire mem_req_fire = mem_req_valid && mem_req_ready; @@ -148,7 +150,7 @@ module VX_axi_adapter #( // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; - assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off); + assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_awid[i] = mem_req_tag; assign m_axi_awlen[i] = 8'b00000000; assign m_axi_awsize[i] = 3'(DATA_SIZE); @@ -180,7 +182,7 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); - assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off); + assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_arid[i] = mem_req_tag; assign m_axi_arlen[i] = 8'b00000000; assign m_axi_arsize[i] = 3'(DATA_SIZE); diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 9b3e2e8edf..f94617f1ee 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -17,8 +17,8 @@ module vortex_afu_shim #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, + parameter C_M_AXI_MEM_ADDR_WIDTH = 64, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( // System signals @@ -54,8 +54,8 @@ module vortex_afu_shim #( .C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), - .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_wrap ( .clk (ap_clk), diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index feb13dd1cd..4ee15baa1b 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -61,12 +61,6 @@ #define CPU_GPU_LATENCY 200 -#if PLATFORM_MEMORY_ADDR_WIDTH > 32 - typedef QData Vl_m_addr_t; -#else - typedef IData Vl_m_addr_t; -#endif - #if PLATFORM_MEMORY_DATA_WIDTH > 64 typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t; #else @@ -482,7 +476,7 @@ class xrt_sim::Impl { if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { auto mem_req = new mem_req_t(); mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE; + mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr); ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; @@ -511,7 +505,7 @@ class xrt_sim::Impl { auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE; + auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr; for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { @@ -562,7 +556,7 @@ class xrt_sim::Impl { typedef struct { CData* awvalid; CData* awready; - Vl_m_addr_t* awaddr; + QData* awaddr; IData* awid; CData* awlen; CData* wvalid; @@ -572,7 +566,7 @@ class xrt_sim::Impl { CData* wlast; CData* arvalid; CData* arready; - Vl_m_addr_t* araddr; + QData* araddr; IData* arid; CData* arlen; CData* rvalid; From 2cf483ddf5755a48ae493e1a78dfffef4986fbee Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 21:01:24 -0700 Subject: [PATCH 222/407] xrt afu bug fixes --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 5 +- hw/syn/xilinx/xrt/Makefile | 6 +- hw/syn/xilinx/xrt/gen_xml.py | 75 -------------------- hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- hw/syn/xilinx/xrt/package_kernel.tcl | 100 +++++++++++++++++++++++++++ hw/syn/xilinx/xrt/platforms.mk | 34 +++------ sim/xrtsim/xrt_sim.cpp | 4 +- 7 files changed, 116 insertions(+), 110 deletions(-) delete mode 100755 hw/syn/xilinx/xrt/gen_xml.py diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 235247177b..e515b080ba 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -236,8 +236,9 @@ module VX_afu_wrap #( wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing - assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); - assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + localparam [C_M_AXI_MEM_ADDR_WIDTH-1:0] BANK_OFFSET = C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET) + C_M_AXI_MEM_ADDR_WIDTH'(i) << M_AXI_MEM_ADDR_WIDTH; + assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + BANK_OFFSET; + assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET; end `SCOPE_IO_SWITCH (2) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index a5a38e281b..957940afac 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -164,12 +164,8 @@ scope-json: $(BUILD_DIR)/scope.json $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o scope.json -gen-xml: -$(BUILD_DIR)/kernel.xml: - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SRC_DIR)/gen_xml.py -n $(M_AXI_NUM_BANKS) -d $(M_AXI_DATA_WIDTH) -a $(M_AXI_ADDRESS_WIDTH) -o kernel.xml - gen-xo: $(XO_CONTAINER) -$(XO_CONTAINER): $(BUILD_DIR)/sources.txt $(BUILD_DIR)/kernel.xml +$(XO_CONTAINER): $(BUILD_DIR)/sources.txt mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) diff --git a/hw/syn/xilinx/xrt/gen_xml.py b/hw/syn/xilinx/xrt/gen_xml.py deleted file mode 100755 index 4ba906b9a4..0000000000 --- a/hw/syn/xilinx/xrt/gen_xml.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright © 2019-2023 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import xml.etree.ElementTree as ET -from xml.dom import minidom - -def prettify(elem): - """Return a pretty-printed XML string for the Element.""" - rough_string = ET.tostring(elem, 'utf-8') - reparsed = minidom.parseString(rough_string) - return reparsed.toprettyxml(indent=" ") - -def generate_xml(numbanks, datawidth, addresswidth, offset, output_file): - root = ET.Element("root", versionMajor="1", versionMinor="6") - kernel = ET.SubElement(root, "kernel", name="vortex_afu", language="ip_c", - vlnv="mycompany.com:kernel:vortex_afu:1.0", - attributes="", preferredWorkGroupSizeMultiple="0", - workGroupSize="1", interrupt="true") - - ports = ET.SubElement(kernel, "ports") - - # control ports - ET.SubElement(ports, "port", name="s_axi_ctrl", mode="slave", range="0x1000", dataWidth="32", portType="addressable", base="0x0") - - # memory ports - for i in range(numbanks): - port_name = f"m_axi_mem_{i}" - ET.SubElement(ports, "port", name=port_name, mode="master", range=f"0x{(1 << addresswidth) - 1:X}", dataWidth=str(datawidth), portType="addressable", base=f"0x0") - - args = ET.SubElement(kernel, "args") - - # control args - ET.SubElement(args, "arg", name="dev", addressQualifier="0", id="0", port="s_axi_ctrl", size="0x4", offset="0x010", type="uint", hostOffset="0x0", hostSize="0x4") - ET.SubElement(args, "arg", name="isa", addressQualifier="0", id="1", port="s_axi_ctrl", size="0x4", offset="0x018", type="uint", hostOffset="0x0", hostSize="0x4") - ET.SubElement(args, "arg", name="dcr", addressQualifier="0", id="2", port="s_axi_ctrl", size="0x4", offset="0x020", type="uint", hostOffset="0x0", hostSize="0x4") - ET.SubElement(args, "arg", name="scp", addressQualifier="0", id="3", port="s_axi_ctrl", size="0x4", offset="0x028", type="uint", hostOffset="0x0", hostSize="0x4") - - # memory args - for i in range(numbanks): - arg_name = f"mem_{i}" - ET.SubElement(args, "arg", name=arg_name, addressQualifier="1", id=str(4 + i), - port=f"m_axi_mem_{i}", size="0x8", offset=f"0x{offset + (i * 8):X}", - type="int*", hostOffset="0x0", hostSize="0x8") - - # Pretty-print and write the XML to file - with open(output_file, "w") as f: - f.write(prettify(root)) - -def main(): - parser = argparse.ArgumentParser(description="Kernel Configuration File Generator") - parser.add_argument("-n", "--numbanks", type=int, default=1, help="Number of AXI memory banks") - parser.add_argument("-d", "--datawidth", type=int, default=512, help="Data width of the AXI memory ports") - parser.add_argument("-a", "--addresswidth", type=int, default=28, help="Address width of the AXI memory ports") - parser.add_argument("-x", "--offset", type=lambda x: int(x, 0), default=0x30, help="Starting offset for kernel args (hex)") - parser.add_argument("-o", "--output", type=str, default="kernel.xml", help="Output XML file name") - args = parser.parse_args() - - # Call the generate function - generate_xml(args.numbanks, args.datawidth, args.addresswidth, args.offset, args.output) - -if __name__ == "__main__": - main() diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index 9301a096ec..d5b1e41a27 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -37,4 +37,4 @@ set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 source ${script_path}/package_kernel.tcl -package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" -kernel_xml ${build_dir}/kernel.xml +package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" \ No newline at end of file diff --git a/hw/syn/xilinx/xrt/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl index ed09639dde..ebe767c695 100644 --- a/hw/syn/xilinx/xrt/package_kernel.tcl +++ b/hw/syn/xilinx/xrt/package_kernel.tcl @@ -160,6 +160,106 @@ for {set i 0} {$i < $num_banks} {incr i} { ipx::associate_bus_interfaces -busif m_axi_mem_$i -clock ap_clk $core } +set mem_map [::ipx::add_memory_map -quiet "s_axi_ctrl" $core] +set addr_block [::ipx::add_address_block -quiet "reg0" $mem_map] + +set reg [::ipx::add_register "CTRL" $addr_block] +set_property description "Control signals" $reg +set_property address_offset 0x000 $reg +set_property size 32 $reg + +set field [ipx::add_field AP_START $reg] +set_property ACCESS {read-write} $field +set_property BIT_OFFSET {0} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_start'.} $field +set_property MODIFIED_WRITE_VALUE {modify} $field + +set field [ipx::add_field AP_DONE $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {1} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_done'.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field AP_IDLE $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {2} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_idle'.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field AP_READY $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {3} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_ready'.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field RESERVED_1 $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {4} $field +set_property BIT_WIDTH {3} $field +set_property DESCRIPTION {Reserved. 0s on read.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field AUTO_RESTART $reg] +set_property ACCESS {read-write} $field +set_property BIT_OFFSET {7} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'auto_restart'.} $field +set_property MODIFIED_WRITE_VALUE {modify} $field + +set field [ipx::add_field RESERVED_2 $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {8} $field +set_property BIT_WIDTH {24} $field +set_property DESCRIPTION {Reserved. 0s on read.} $field +set_property READ_ACTION {modify} $field + +set reg [::ipx::add_register "GIER" $addr_block] +set_property description "Global Interrupt Enable Register" $reg +set_property address_offset 0x004 $reg +set_property size 32 $reg + +set reg [::ipx::add_register "IP_IER" $addr_block] +set_property description "IP Interrupt Enable Register" $reg +set_property address_offset 0x008 $reg +set_property size 32 $reg + +set reg [::ipx::add_register "IP_ISR" $addr_block] +set_property description "IP Interrupt Status Register" $reg +set_property address_offset 0x00C $reg +set_property size 32 $reg + +set reg [::ipx::add_register -quiet "DEV" $addr_block] +set_property address_offset 0x010 $reg +set_property size [expr {8*8}] $reg + +set reg [::ipx::add_register -quiet "ISA" $addr_block] +set_property address_offset 0x018 $reg +set_property size [expr {8*8}] $reg + +set reg [::ipx::add_register -quiet "DCR" $addr_block] +set_property address_offset 0x020 $reg +set_property size [expr {8*8}] $reg + +set reg [::ipx::add_register -quiet "SCP" $addr_block] +set_property address_offset 0x028 $reg +set_property size [expr {8*8}] $reg + +for {set i 0} {$i < $num_banks} {incr i} { +# Add register for each memory bank +set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] +set_property address_offset [expr {0x30 + $i * 8}] $reg +set_property size [expr {8*8}] $reg +# Associate the bus interface +set regparam [::ipx::add_register_parameter ASSOCIATED_BUSIF $reg] +set_property value m_axi_mem_$i $regparam +} + +set_property slave_memory_map_ref "s_axi_ctrl" [::ipx::get_bus_interfaces -of $core "s_axi_ctrl"] + set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} $core set_property sdx_kernel true $core set_property sdx_kernel_type rtl $core diff --git a/hw/syn/xilinx/xrt/platforms.mk b/hw/syn/xilinx/xrt/platforms.mk index a3584942c7..5a9a88e4d9 100644 --- a/hw/syn/xilinx/xrt/platforms.mk +++ b/hw/syn/xilinx/xrt/platforms.mk @@ -1,9 +1,7 @@ # Platform specific configurations # Add your platform specific configurations here -M_AXI_NUM_BANKS := 1 -M_AXI_DATA_WIDTH := 512 -M_AXI_ADDRESS_WIDTH := 32 +CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 ifeq ($(DEV_ARCH), zynquplus) # zynquplus @@ -17,35 +15,21 @@ endif else # alveo ifneq ($(findstring xilinx_u55c,$(XSA)),) - CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 - #VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] - #CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE - VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)]) - M_AXI_NUM_BANKS := 32 - M_AXI_ADDRESS_WIDTH := 28 + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=29 + CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] + #VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)]) else ifneq ($(findstring xilinx_u50,$(XSA)),) - CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] - M_AXI_NUM_BANKS := 16 - M_AXI_ADDRESS_WIDTH := 28 + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] else ifneq ($(findstring xilinx_u280,$(XSA)),) - CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] - M_AXI_NUM_BANKS := 16 - M_AXI_ADDRESS_WIDTH := 28 + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] else ifneq ($(findstring xilinx_u250,$(XSA)),) CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 - M_AXI_NUM_BANKS := 4 - M_AXI_ADDRESS_WIDTH := 34 else ifneq ($(findstring xilinx_u200,$(XSA)),) CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 - M_AXI_NUM_BANKS := 4 - M_AXI_ADDRESS_WIDTH := 34 else CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 - M_AXI_NUM_BANKS := 1 - M_AXI_ADDRESS_WIDTH := 32 endif endif - -CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=$(M_AXI_DATA_WIDTH) \ No newline at end of file diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 4ee15baa1b..96adf08583 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -476,7 +476,7 @@ class xrt_sim::Impl { if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { auto mem_req = new mem_req_t(); mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr); + mem_req->addr = uint64_t(*m_axi_mem_[i].araddr); ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; @@ -505,7 +505,7 @@ class xrt_sim::Impl { auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr; + auto byte_addr = m_axi_states_[i].write_req_addr; for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { From a9a5ded030ebee8b76631a7cc4681adbf84605cf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 23:54:43 -0700 Subject: [PATCH 223/407] bitmanip logceil fix --- sim/common/bitmanip.h | 22 +++++++--------------- sim/simx/cache_sim.cpp | 26 +++++++++++++++++++++++--- sim/simx/mem_sim.cpp | 4 ++-- sim/simx/types.h | 9 +++++---- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index 89247b89ca..053f254c84 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -20,9 +20,9 @@ template constexpr uint32_t count_leading_zeros(T value) { static_assert(std::is_integral::value, "invalid data type"); if constexpr (sizeof(T) > 4) { - return value ? __builtin_clzll(value) : 64; + return value ? __builtin_clzll(value) : (sizeof(T) * 8); } else { - return value ? __builtin_clz(value) : 32; + return value ? __builtin_clz(value) : (sizeof(T) * 8); } } @@ -30,9 +30,9 @@ template constexpr uint32_t count_trailing_zeros(T value) { static_assert(std::is_integral::value, "invalid data type"); if constexpr (sizeof(T) > 4) { - return value ? __builtin_ctzll(value) : 64; + return value ? __builtin_ctzll(value) : (sizeof(T) * 8); } else { - return value ? __builtin_ctz(value) : 32; + return value ? __builtin_ctz(value) : (sizeof(T) * 8); } } @@ -45,7 +45,7 @@ constexpr bool ispow2(T value) { template constexpr uint32_t log2ceil(T value) { static_assert(std::is_integral::value, "invalid data type"); - return (sizeof(T) * 8) - count_leading_zeros(value - 1); + return (sizeof(T) * 8) - count_leading_zeros(value - 1); } template @@ -57,21 +57,13 @@ inline unsigned log2up(T value) { template constexpr unsigned log2floor(T value) { static_assert(std::is_integral::value, "invalid data type"); - if constexpr (sizeof(T) > 4) { - return 63 - count_leading_zeros(value); - } else { - return 31 - count_leading_zeros(value); - } + return (sizeof(T) * 8 - 1) - count_leading_zeros(value); } template constexpr unsigned ceil2(T value) { static_assert(std::is_integral::value, "invalid data type"); - if constexpr (sizeof(T) > 4) { - return 64 - count_leading_zeros(value); - } else { - return 32 - count_leading_zeros(value); - } + return (sizeof(T) * 8) - count_leading_zeros(value); } inline uint64_t bit_clr(uint64_t bits, uint32_t index) { diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index 71b2f46998..27a73ba726 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -170,6 +170,25 @@ struct bank_req_t { } }; +inline std::ostream &operator<<(std::ostream &os, const bank_req_t& req) { + os << "set=" << req.set_id << ", rw=" << req.write; + os << std::dec << ", type=" << req.type; + os << ", tag=0x" << std::hex << req.tag; + os << ", req_tags={"; + bool first_port = true; + for (auto& port : req.ports) { + if (port.valid) { + if (!first_port) os << ", "; + first_port = false; + os << "[" << std::dec << port.req_id << "]=0x" << std::hex << port.req_tag; + } + } + os << "}"; + os << std::dec << ", cid=" << req.cid; + os << " (#" << req.uuid << ")"; + return os; +} + struct mshr_entry_t { bank_req_t bank_req; uint32_t line_id; @@ -542,7 +561,7 @@ class CacheSim::Impl { uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs; MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency); - DT(3, simobject_->name() << " core-rsp: " << core_rsp); + DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp); } void processBypassRequest(const MemReq& core_req, uint32_t req_id) { @@ -550,13 +569,13 @@ class CacheSim::Impl { MemReq mem_req(core_req); mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id; bypass_switch_->ReqIn.at(1).push(mem_req, 1); - DT(3, simobject_->name() << " dram-req: " << mem_req); + DT(3, simobject_->name() << " bypass-dram-req: " << mem_req); } if (core_req.write && config_.write_reponse) { MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1); - DT(3, simobject_->name() << " core-rsp: " << core_rsp); + DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp); } } @@ -694,6 +713,7 @@ class CacheSim::Impl { // allocate MSHR auto mshr_id = bank.mshr.allocate(pipeline_req, (free_line_id != -1) ? free_line_id : repl_line_id); + DT(3, simobject_->name() << "-bank" << bank_id << " mshr-enqueue: " << pipeline_req); // send fill request if (!mshr_pending) { diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index a38f4c01c8..37ea3bb88e 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -77,7 +77,7 @@ class MemSim::Impl { if (!rsp_args->request.write) { MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1); - DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp << " bank: " << rsp_args->i); + DT(3, rsp_args->simobject->name() << " mem-rsp: bank=" << rsp_args->i << ", " << mem_rsp); } delete rsp_args; }, @@ -90,7 +90,7 @@ class MemSim::Impl { continue; } - DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i); + DT(3, simobject_->name() << " mem-req: bank=" << i << ", " << mem_req); simobject_->MemReqPorts.at(i).pop(); counter++; diff --git a/sim/simx/types.h b/sim/simx/types.h index 17cf1685ff..2ca6dc8fb5 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -281,17 +281,18 @@ struct LsuReq { }; inline std::ostream &operator<<(std::ostream &os, const LsuReq& req) { - os << "rw=" << req.write << ", mask=" << req.mask << ", "; + os << "rw=" << req.write << ", mask=" << req.mask << ", addr={"; + bool first_addr = true; for (size_t i = 0; i < req.mask.size(); ++i) { - os << "addr" << i << "="; + if (!first_addr) os << ", "; + first_addr = false; if (req.mask.test(i)) { os << "0x" << std::hex << req.addrs.at(i) << std::dec; } else { os << "-"; } - os << ", "; } - os << "tag=0x" << std::hex << req.tag << std::dec << ", cid=" << req.cid; + os << "}, tag=0x" << std::hex << req.tag << std::dec << ", cid=" << req.cid; os << " (#" << req.uuid << ")"; return os; } From ce4f90e843bb0ac123fdc6060cf046de4ed58d7a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 24 Sep 2024 01:20:26 -0700 Subject: [PATCH 224/407] scope analyzer updates --- hw/syn/xilinx/xrt/Makefile | 6 +++--- runtime/common/scope.cpp | 32 ++++++++++++++++++++++++++++++++ tests/opencl/common.mk | 6 ++++-- tests/regression/common.mk | 6 ++++-- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 957940afac..ee2d642196 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -160,9 +160,9 @@ gen-ast: $(BUILD_DIR)/vortex.xml $(BUILD_DIR)/vortex.xml: mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); verilator --xml-only -O0 $(XML_CFLAGS) vortex_afu.v --xml-output vortex.xml -scope-json: $(BUILD_DIR)/scope.json -$(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o scope.json +scope-json: $(BIN_DIR)/scope.json +$(BIN_DIR)/scope.json: $(BUILD_DIR)/vortex.xml + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o bin/scope.json gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 7edd67692a..def7be20b6 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -30,6 +30,8 @@ #define SAMPLE_FLUSH_SIZE 100 +#define TIMEOUT_TIME (60*60) + #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -67,6 +69,10 @@ struct tap_t { static scope_callback_t g_callback; +static bool g_running = false; + +static std::mutex g_stop_mutex; + using json = nlohmann::json; static std::vector split(const std::string &s, char delimiter) { @@ -264,13 +270,39 @@ int vx_scope_start(scope_callback_t* callback, vx_device_h hdevice, uint64_t sta } } + g_running = true; + + // create auto-stop thread + uint32_t timeout_time = TIMEOUT_TIME; + const char* env_timeout = std::getenv("SCOPE_TIMEOUT"); + if (env_timeout != nullptr) { + std::stringstream ss(env_timeout); + uint32_t env_value; + if (ss >> env_value) { + timeout_time = env_value; + std::cout << "[SCOPE] timeout time=" << env_value << std::endl; + } + } + std::thread([hdevice, timeout_time]() { + std::this_thread::sleep_for(std::chrono::seconds(timeout_time)); + std::cout << "[SCOPE] auto-stop timeout!" << std::endl; + vx_scope_stop(hdevice); + }).detach(); + return 0; } int vx_scope_stop(vx_device_h hdevice) { + std::lock_guard lock(g_stop_mutex); + if (nullptr == hdevice) return -1; + if (!g_running) + return 0; + + g_running = false; + std::vector taps; { diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 36d2956cbe..3a3de87ee8 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -102,9 +102,11 @@ run-opae: $(PROJECT) $(KERNEL_SRCS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) +else ifeq ($(TARGET), hw_emu) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 142d5cb2ee..94fe840df4 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,11 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) +else ifeq ($(TARGET), hw_emu) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From 0e3206747a6ece3b2fc6bd66f89bba7c5c37c6ec Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 24 Sep 2024 21:46:26 -0700 Subject: [PATCH 225/407] scope_tap bug fix --- ci/blackbox.sh | 4 ++ hw/rtl/libs/VX_scope_tap.sv | 134 +++++++++++++++++++++--------------- hw/syn/xilinx/xrt/Makefile | 2 +- runtime/common/scope.cpp | 28 ++++---- 4 files changed, 99 insertions(+), 69 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 51639b201f..27a43781b4 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -195,6 +195,10 @@ main() { mv -f $APP_PATH/trace.vcd . fi + if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then + mv -f $APP_PATH/scope.vcd . + fi + exit $status } diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index c3d111c059..b1977d388a 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -33,12 +33,12 @@ module VX_scope_tap #( output wire bus_out ); localparam CTR_WIDTH = 64; - localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); + localparam DATA_IDX_WISTH = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + TRIGGERW; - localparam DATA_BITS = `LOG2UP(DATAW); localparam ADDRW = `CLOG2(DEPTH); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; - localparam TX_DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); + localparam DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); + localparam BLOCK_IDX_WISTH = `LOG2UP(DATA_BLOCKS); localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; @@ -59,18 +59,18 @@ module VX_scope_tap #( localparam CMD_SET_STOP = 3'd5; localparam CMD_TYPE_BITS = 3; - localparam GET_TYPE_WIDTH = 2'd0; - localparam GET_TYPE_COUNT = 2'd1; - localparam GET_TYPE_START = 2'd2; - localparam GET_TYPE_DATA = 2'd3; - localparam GET_TYPE_BITS = 2; + localparam SEND_TYPE_WIDTH = 2'd0; + localparam SEND_TYPE_COUNT = 2'd1; + localparam SEND_TYPE_START = 2'd2; + localparam SEND_TYPE_DATA = 2'd3; + localparam SEND_TYPE_BITS = 2; `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; - reg [GET_TYPE_BITS-1:0] get_type; + reg [SEND_TYPE_BITS-1:0] send_type; reg [CTR_WIDTH-1:0] timestamp, start_time; reg [CTR_WIDTH-1:0] start_delay, delay_cntr; @@ -217,42 +217,73 @@ module VX_scope_tap #( wire [TX_DATAW-1:0] ser_buf_in_n = {ser_buf_in[TX_DATAW-2:0], bus_in}; `UNUSED_VAR (ser_buf_in) - reg [TX_DATA_BITS-1:0] ser_tx_ctr; - reg [DATA_BITS-1:0] read_offset; + wire [DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; + logic [BLOCK_IDX_WISTH-1:0] data_block_idx; + reg [DATA_IDX_WISTH-1:0] ser_tx_ctr; reg is_read_data; - reg [1:0] read_en; + reg is_get_data; wire [CMD_TYPE_BITS-1:0] cmd_type = ser_buf_in[CMD_TYPE_BITS-1:0]; wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; + for (genvar i = 0; i < DATA_BLOCKS; ++i) begin : g_data_blocks + for (genvar j = 0; j < TX_DATAW; ++j) begin : g_j + localparam k = i * TX_DATAW + j; + if (k < DATAW) begin : g_valid + assign data_blocks[i][j] = data_value[k]; + end else begin : g_padding + assign data_blocks[i][j] = '0; + end + end + end + + if (DATA_BLOCKS > 1) begin : g_data_block_idx + always @(posedge clk) begin + if (reset) begin + data_block_idx <= '0; + end else if ((ctrl_state == CTRL_STATE_SEND) + && (send_type == SEND_TYPE_DATA) + && (ser_tx_ctr == 0) + && is_read_data) begin + if (data_block_idx < BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin + data_block_idx <= data_block_idx + BLOCK_IDX_WISTH'(1); + end else begin + data_block_idx <= '0; + end + end + end + end else begin : g_data_block_idx_0 + assign data_block_idx = 0; + end + wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; + send_type <= SEND_TYPE_BITS'(SEND_TYPE_WIDTH); waddr_end <= ADDRW'(DEPTH-1); cmd_start <= 0; start_delay <= '0; bus_out_r <= 0; - read_offset <= '0; raddr <= '0; is_read_data<= 0; ser_tx_ctr <= '0; - read_en <= '0; + is_get_data <= 0; end else begin bus_out_r <= 0; cmd_start <= 0; - read_en <= '0; + is_get_data <= 0; case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); + ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_RECV; end end CTRL_STATE_RECV: begin - ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); + ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin // check if command is for this scope @@ -273,10 +304,10 @@ module VX_scope_tap #( CMD_GET_START, CMD_GET_COUNT, CMD_GET_DATA: begin - get_type <= GET_TYPE_BITS'(cmd_type); - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); - bus_out_r <= 1; + send_type <= SEND_TYPE_BITS'(cmd_type); + ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_SEND; + bus_out_r <= 1; end default:; endcase @@ -285,8 +316,8 @@ module VX_scope_tap #( `endif end CTRL_STATE_SEND: begin - case (get_type) - GET_TYPE_WIDTH: begin + case (send_type) + SEND_TYPE_WIDTH: begin bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin @@ -294,7 +325,7 @@ module VX_scope_tap #( end `endif end - GET_TYPE_COUNT: begin + SEND_TYPE_COUNT: begin bus_out_r <= 1'(waddr >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin @@ -302,7 +333,7 @@ module VX_scope_tap #( end `endif end - GET_TYPE_START: begin + SEND_TYPE_START: begin bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin @@ -310,24 +341,16 @@ module VX_scope_tap #( end `endif end - GET_TYPE_DATA: begin - read_en <= {is_read_data, 1'b1}; + SEND_TYPE_DATA: begin + is_get_data <= 1; if (ser_tx_ctr == 0) begin if (is_read_data) begin - if (DATAW > TX_DATAW) begin - if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin - read_offset <= read_offset + DATA_BITS'(TX_DATAW); - end else begin - read_offset <= '0; - raddr <= raddr_n; - is_read_data <= 0; // swutch delta mode - end - end else begin + if (data_block_idx == BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin raddr <= raddr_n; - is_read_data <= 0; // swutch delta mode - end - if (raddr_n == waddr) begin - raddr <= 0; // end-of-samples reset + is_read_data <= 0; // switch to delta mode + if (raddr_n == waddr) begin + raddr <= 0; // end-of-samples reset + end end end else begin is_read_data <= 1; // switch to data mode @@ -345,7 +368,7 @@ module VX_scope_tap #( end default:; endcase - ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); + ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; end @@ -355,23 +378,26 @@ module VX_scope_tap #( end end - wire [TX_DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; - for (genvar i = 0; i < TX_DATA_BLOCKS; ++i) begin : g_data_blocks - for (genvar j = 0; j < TX_DATAW; ++j) begin : g_j - localparam k = i * TX_DATAW + j; - if (k < DATAW) begin : g_valid - assign data_blocks[i][j] = data_value[k]; - end else begin : g_padding - assign data_blocks[i][j] = '0; - end - end - end + wire [BLOCK_IDX_WISTH-1:0] data_block_idx_r; + wire [DATA_IDX_WISTH-1:0] ser_tx_ctr_r; + wire is_read_data_r; + + VX_pipe_register #( + .DATAW (1 + DATA_IDX_WISTH + BLOCK_IDX_WISTH) + ) data_sel_buf ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in ({is_read_data, ser_tx_ctr, data_block_idx}), + .data_out ({is_read_data_r, ser_tx_ctr_r, data_block_idx_r}) + ); - wire [TX_DATAW-1:0] get_data = read_en[1] ? data_blocks[read_offset] : TX_DATAW'(delta_value); - wire bus_out_w = read_en[0] ? get_data[ser_tx_ctr] : bus_out_r; + wire [TX_DATAW-1:0] get_data = is_read_data_r ? data_blocks[data_block_idx_r] : TX_DATAW'(delta_value); + wire bus_out_w = is_get_data ? get_data[ser_tx_ctr_r] : bus_out_r; VX_pipe_register #( - .DATAW (1) + .DATAW (1), + .DEPTH (1) ) buf_out ( .clk (clk), .reset (reset), diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index ee2d642196..67eccf8410 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -131,7 +131,7 @@ endif # Enable scope analyzer ifdef SCOPE CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) - SCOPE_JSON += $(BUILD_DIR)/scope.json + SCOPE_JSON += $(BIN_DIR)/scope.json endif # compilation flags diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index def7be20b6..820fa54f5e 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -159,7 +159,7 @@ static tap_t* find_earliest_tap(std::vector& taps) { return earliest; } -static uint64_t advance_time(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { +static uint64_t advance_clock(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { while (cur_time < next_time) { ofs << '#' << (cur_time * 2 + 0) << std::endl; ofs << "b0 0" << std::endl; @@ -383,20 +383,20 @@ int vx_scope_stop(vx_device_h hdevice) { std::cout << "[SCOPE] dump taps..." << std::endl; uint64_t cur_time = 0; - - while (true) { - // find the nearest tap - auto tap = find_earliest_tap(taps); - if (tap == nullptr) - break; + auto tap = find_earliest_tap(taps); + if (tap != nullptr) { + cur_time = (tap->cycle_time > 0) ? (tap->cycle_time-1) : 0; + do { + // advance clock + cur_time = advance_clock(ofs, cur_time, tap->cycle_time); + // dump tap + CHECK_ERR(dump_tap(ofs, tap, hdevice)); + // find the nearest tap + tap = find_earliest_tap(taps); + } while (tap != nullptr); // advance clock - cur_time = advance_time(ofs, cur_time, tap->cycle_time); - // dump tap - CHECK_ERR(dump_tap(ofs, tap, hdevice)); - }; - - // advance clock - advance_time(ofs, cur_time, cur_time + 1); + advance_clock(ofs, cur_time, cur_time + 1); + } std::cout << "[SCOPE] trace dump done! - " << (cur_time/2) << " cycles" << std::endl; From 4f11278d2cce934e50b5c13d1a2bc839b5ff6429 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 25 Sep 2024 10:28:19 -0700 Subject: [PATCH 226/407] scope_tap bug fixes and improvements --- hw/rtl/VX_define.vh | 12 +++ hw/rtl/VX_scope.vh | 59 +++++++++------ hw/rtl/Vortex_axi.sv | 2 +- hw/rtl/afu/opae/vortex_afu.sv | 133 +++++++++++++++------------------ hw/rtl/afu/xrt/VX_afu_wrap.sv | 84 +++++++++------------ hw/rtl/core/VX_core.sv | 2 +- hw/rtl/core/VX_execute.sv | 2 +- hw/rtl/core/VX_fetch.sv | 30 +++----- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_issue_slice.sv | 28 +++---- hw/rtl/core/VX_lsu_slice.sv | 33 ++++---- hw/rtl/libs/VX_edge_trigger.sv | 43 +++++++++++ hw/rtl/libs/VX_scope_switch.sv | 9 ++- hw/rtl/libs/VX_scope_tap.sv | 130 ++++++++++++++++---------------- runtime/common/scope.cpp | 17 ++++- runtime/opae/vortex.cpp | 2 +- runtime/xrt/vortex.cpp | 2 +- 17 files changed, 313 insertions(+), 277 deletions(-) create mode 100644 hw/rtl/libs/VX_edge_trigger.sv diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 85fa40f0d2..7c1590dff2 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -317,6 +317,18 @@ /////////////////////////////////////////////////////////////////////////////// +`define NEG_EDGE(dst, src) \ + wire dst; \ + VX_edge_trigger #( \ + .POS (0), \ + .INIT (0) \ + ) __``dst``__ ( \ + .clk (clk), \ + .reset (1'b0), \ + .data_in (src), \ + .data_out (dst) \ + ) + `define BUFFER_EX(dst, src, ena, latency) \ VX_pipe_register #( \ .DATAW ($bits(dst)), \ diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index a677975ced..b88a2718b8 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -21,10 +21,20 @@ input wire scope_bus_in, \ output wire scope_bus_out, +`define SCOPE_IO_BIND(__i) \ + .scope_reset (scope_reset_w[__i]), \ + .scope_bus_in (scope_bus_in_w[__i]), \ + .scope_bus_out (scope_bus_out_w[__i]), + +`define SCOPE_IO_UNUSED(__i) \ + `UNUSED_VAR (scope_reset_w[__i]); \ + `UNUSED_VAR (scope_bus_in_w[__i]); \ + assign scope_bus_out_w[__i] = 0; + `define SCOPE_IO_SWITCH(__count) \ - wire scope_bus_in_w [__count]; \ - wire scope_bus_out_w [__count]; \ - `RESET_RELAY_EX(scope_reset_w, scope_reset, __count, `MAX_FANOUT); \ + wire [__count-1:0] scope_bus_in_w; \ + wire [__count-1:0] scope_bus_out_w; \ + wire [__count-1:0] scope_reset_w = {__count{scope_reset}}; \ VX_scope_switch #( \ .N (__count) \ ) scope_switch ( \ @@ -34,35 +44,42 @@ .rsp_out (scope_bus_out), \ .req_out (scope_bus_in_w), \ .rsp_in (scope_bus_out_w) \ - ); - -`define SCOPE_IO_BIND(__i) \ - .scope_reset (scope_reset_w[__i]), \ - .scope_bus_in (scope_bus_in_w[__i]), \ - .scope_bus_out (scope_bus_out_w[__i]), + ) -`define SCOPE_IO_UNUSED() \ - `UNUSED_VAR (scope_reset); \ - `UNUSED_VAR (scope_bus_in); \ - assign scope_bus_out = 0; +`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __start, __stop, __depth) \ + VX_scope_tap #( \ + .SCOPE_ID (__id), \ + .TRIGGERW (__triggers_w), \ + .PROBEW (__probes_w), \ + .DEPTH (__depth) \ + ) scope_tap_``idx ( \ + .clk (clk), \ + .reset (scope_reset_w[__idx]), \ + .start (__start), \ + .stop (__stop), \ + .triggers(__triggers), \ + .probes (__probes), \ + .bus_in (scope_bus_in_w[__idx]), \ + .bus_out(scope_bus_out_w[__idx]) \ + ) -`define SCOPE_IO_UNUSED_W(__i) \ - `UNUSED_VAR (scope_reset_w[__i]); \ - `UNUSED_VAR (scope_bus_in_w[__i]); \ - assign scope_bus_out_w[__i] = 0; +`define SCOPE_TAP(__idx, __id, __triggers, __probes, __start, __stop, __depth) \ + `SCOPE_TAP_EX(__idx, __id, $bits(__triggers), $bits(__probes), __triggers, __probes, __start, __stop, __depth) `else `define SCOPE_IO_DECL -`define SCOPE_IO_SWITCH(__count) - `define SCOPE_IO_BIND(__i) -`define SCOPE_IO_UNUSED_W(__i) - `define SCOPE_IO_UNUSED(__i) +`define SCOPE_IO_SWITCH(__count) + +`define SCOPE_TAP(__idx, __id, __triggers, __probes, __depth) + +`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __depth) + `endif `endif // VX_SCOPE_VH diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 7582063969..7d238aacde 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -100,7 +100,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; wire mem_rsp_ready; - `SCOPE_IO_SWITCH (1) + `SCOPE_IO_SWITCH (1); Vortex vortex ( `SCOPE_IO_BIND (0) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 1440b28086..38994c1c5d 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -932,7 +932,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data; - `SCOPE_IO_SWITCH (2) + `SCOPE_IO_SWITCH (2); Vortex vortex ( `SCOPE_IO_BIND (1) @@ -1023,80 +1023,65 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end wire state_changed = (state != state_prev); - `define AFU_TRIGGERS { \ - reset, \ - vx_reset, \ - vx_busy, \ - vx_mem_req_fire, \ - vx_mem_rsp_fire, \ - vx_dcr_wr_valid, \ - state_changed, \ - avs_write_fire, \ - avs_read_fire, \ - avs_waitrequest[0], \ - avs_readdatavalid[0], \ - cp2af_sRxPort.c0.mmioRdValid, \ - cp2af_sRxPort.c0.mmioWrValid, \ - cp2af_sRxPort.c0.rspValid, \ - cp2af_sRxPort.c1.rspValid, \ - af2cp_sTxPort.c0.valid, \ - af2cp_sTxPort.c1.valid, \ - cp2af_sRxPort.c0TxAlmFull, \ - cp2af_sRxPort.c1TxAlmFull, \ - af2cp_sTxPort.c2.mmioRdValid, \ - cci_wr_req_fire, \ - cci_wr_rsp_fire, \ - cci_rd_req_fire, \ - cci_rd_rsp_fire, \ - cci_pending_reads_full, \ - cci_pending_writes_empty, \ - cci_pending_writes_full \ - } - - `define AFU_PROBES { \ - cmd_type, \ - state, \ - vx_mem_req_rw, \ - vx_mem_req_byteen, \ - vx_mem_req_addr, \ - vx_mem_req_data, \ - vx_mem_req_tag, \ - vx_mem_rsp_data, \ - vx_mem_rsp_tag, \ - vx_dcr_wr_addr, \ - vx_dcr_wr_data, \ - mmio_req_hdr.address, \ - cp2af_sRxPort.c0.hdr.mdata, \ - af2cp_sTxPort.c0.hdr.address, \ - af2cp_sTxPort.c0.hdr.mdata, \ - af2cp_sTxPort.c1.hdr.address, \ - avs_address[0], \ - avs_byteenable[0], \ - avs_burstcount[0], \ - cci_mem_rd_req_ctr, \ - cci_mem_wr_req_ctr, \ - cci_rd_req_ctr, \ - cci_rd_rsp_ctr, \ - cci_wr_req_ctr \ - } - - VX_scope_tap #( - .SCOPE_ID (0), - .TRIGGERW ($bits(`AFU_TRIGGERS)), - .PROBEW ($bits(`AFU_PROBES)), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers(`AFU_TRIGGERS), - .probes (`AFU_PROBES), - .bus_in (scope_bus_in_w[0]), - .bus_out(scope_bus_out_w[0]) - ); + `NEG_EDGE (reset_negedge, reset); + + `SCOPE_TAP (0, 0, { + vx_reset, + vx_busy, + vx_mem_req_fire, + vx_mem_rsp_fire, + vx_dcr_wr_valid, + state_changed, + avs_write_fire, + avs_read_fire, + avs_waitrequest[0], + avs_readdatavalid[0], + cp2af_sRxPort.c0.mmioRdValid, + cp2af_sRxPort.c0.mmioWrValid, + cp2af_sRxPort.c0.rspValid, + cp2af_sRxPort.c1.rspValid, + af2cp_sTxPort.c0.valid, + af2cp_sTxPort.c1.valid, + cp2af_sRxPort.c0TxAlmFull, + cp2af_sRxPort.c1TxAlmFull, + af2cp_sTxPort.c2.mmioRdValid, + cci_wr_req_fire, + cci_wr_rsp_fire, + cci_rd_req_fire, + cci_rd_rsp_fire, + cci_pending_reads_full, + cci_pending_writes_empty, + cci_pending_writes_full + },{ + cmd_type, + state, + vx_mem_req_rw, + vx_mem_req_byteen, + vx_mem_req_addr, + vx_mem_req_data, + vx_mem_req_tag, + vx_mem_rsp_data, + vx_mem_rsp_tag, + vx_dcr_wr_addr, + vx_dcr_wr_data, + mmio_req_hdr.address, + cp2af_sRxPort.c0.hdr.mdata, + af2cp_sTxPort.c0.hdr.address, + af2cp_sTxPort.c0.hdr.mdata, + af2cp_sTxPort.c1.hdr.address, + avs_address[0], + avs_byteenable[0], + avs_burstcount[0], + cci_mem_rd_req_ctr, + cci_mem_wr_req_ctr, + cci_rd_req_ctr, + cci_rd_rsp_ctr, + cci_wr_req_ctr + }, + reset_negedge, 1'b0, 4096 + ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) `endif /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index e515b080ba..73da63e585 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -241,7 +241,7 @@ module VX_afu_wrap #( assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET; end - `SCOPE_IO_SWITCH (2) + `SCOPE_IO_SWITCH (2); Vortex_axi #( .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), @@ -309,55 +309,41 @@ module VX_afu_wrap #( `ifdef SCOPE `ifdef DBG_SCOPE_AFU - `define AFU_TRIGGERS { \ - reset, \ - ap_reset, \ - ap_start, \ - ap_done, \ - ap_idle, \ - interrupt, \ - vx_reset, \ - vx_busy, \ - dcr_wr_valid, \ - m_axi_mem_awvalid_a[0], \ - m_axi_mem_awready_a[0], \ - m_axi_mem_wvalid_a[0], \ - m_axi_mem_wready_a[0], \ - m_axi_mem_bvalid_a[0], \ - m_axi_mem_bready_a[0], \ - m_axi_mem_arvalid_a[0], \ - m_axi_mem_arready_a[0], \ - m_axi_mem_rvalid_a[0], \ - m_axi_mem_rready_a[0] \ - } - `define AFU_PROBES { \ - dcr_wr_addr, \ - dcr_wr_data, \ - vx_pending_writes, \ - m_axi_mem_awaddr_u[0], \ - m_axi_mem_awid_a[0], \ - m_axi_mem_bid_a[0], \ - m_axi_mem_araddr_u[0], \ - m_axi_mem_arid_a[0], \ - m_axi_mem_rid_a[0] \ - } - VX_scope_tap #( - .SCOPE_ID (0), - .TRIGGERW ($bits(`AFU_TRIGGERS)), - .PROBEW ($bits(`AFU_PROBES)), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers(`AFU_TRIGGERS), - .probes (`AFU_PROBES), - .bus_in (scope_bus_in_w[0]), - .bus_out(scope_bus_out_w[0]) - ); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP (0, 0, { + ap_reset, + ap_start, + ap_done, + ap_idle, + interrupt, + vx_reset, + vx_busy, + dcr_wr_valid, + m_axi_mem_awvalid_a[0], + m_axi_mem_awready_a[0], + m_axi_mem_wvalid_a[0], + m_axi_mem_wready_a[0], + m_axi_mem_bvalid_a[0], + m_axi_mem_bready_a[0], + m_axi_mem_arvalid_a[0], + m_axi_mem_arready_a[0], + m_axi_mem_rvalid_a[0], + m_axi_mem_rready_a[0] + }, { + dcr_wr_addr, + dcr_wr_data, + vx_pending_writes, + m_axi_mem_awaddr_u[0], + m_axi_mem_awid_a[0], + m_axi_mem_bid_a[0], + m_axi_mem_araddr_u[0], + m_axi_mem_arid_a[0], + m_axi_mem_rid_a[0] + }, + reset_negedge, 1'b0, 4096 + ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 1d3e126137..260cedca3e 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -84,7 +84,7 @@ module VX_core import VX_gpu_pkg::*; #( .base_dcrs (base_dcrs) ); - `SCOPE_IO_SWITCH (3) + `SCOPE_IO_SWITCH (3); VX_schedule #( .INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)), diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index 6c148649b0..4f66757f12 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -61,7 +61,7 @@ module VX_execute import VX_gpu_pkg::*; #( .branch_ctl_if (branch_ctl_if) ); - `SCOPE_IO_SWITCH (1) + `SCOPE_IO_SWITCH (1); VX_lsu_unit #( .INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID)) diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index c1c0e6a57f..baeb152f21 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -137,34 +137,24 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_FETCH - VX_scope_tap #( - .SCOPE_ID (1), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + - ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + - (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers ({ - reset, + `SCOPE_IO_SWITCH (1); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 1, 3, ( + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + + ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH + ), { schedule_fire, icache_req_fire, icache_rsp_fire - }), - .probes ({ + }, { schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag - }), - .bus_in (scope_bus_in), - .bus_out (scope_bus_out) + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index a2e689b7c8..84bcc00722 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -50,7 +50,7 @@ module VX_issue import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0] decode_ready_in; assign decode_if.ready = decode_ready_in[decode_isw]; - `SCOPE_IO_SWITCH (`ISSUE_WIDTH) + `SCOPE_IO_SWITCH (`ISSUE_WIDTH); for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices VX_decode_if #( diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 38e54fcc01..a496af8e39 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -95,23 +95,16 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_ISSUE - VX_scope_tap #( - .SCOPE_ID (2), - .TRIGGERW (2), - .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + + `SCOPE_IO_SWITCH (1); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 2, 2, ( + `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers ({ + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 + ), { operands_if_fire, writeback_if_valid - }), - .probes ({ + }, { operands_if.data.uuid, operands_if.data.tmask, operands_if.data.ex_type, @@ -126,12 +119,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #( writeback_if.data.rd, writeback_if.data.data, writeback_if.data.eop - }), - .bus_in (scope_bus_in), - .bus_out (scope_bus_out) + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index d703291c42..962bcd70cd 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,23 +536,26 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_LSU - VX_scope_tap #( - .SCOPE_ID (3), - .TRIGGERW (2), - .PROBEW (1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers({mem_req_fire, mem_rsp_fire}), - .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), - .bus_in (scope_bus_in), - .bus_out(scope_bus_out) + `SCOPE_IO_SWITCH (1); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 3, 2, ( + 1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH + ), { + mem_req_fire, + mem_rsp_fire + }, { + mem_req_rw, + full_addr, + mem_req_byteen, + mem_req_data, + execute_if.data.uuid, + rsp_data, + rsp_uuid + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/libs/VX_edge_trigger.sv b/hw/rtl/libs/VX_edge_trigger.sv new file mode 100644 index 0000000000..9e876985c3 --- /dev/null +++ b/hw/rtl/libs/VX_edge_trigger.sv @@ -0,0 +1,43 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_edge_trigger #( + parameter POS = 0, + parameter INIT = 0 +) ( + input wire clk, + input wire reset, + input wire data_in, + output wire data_out +); + reg prev; + + always @(posedge clk) begin + if (reset) begin + prev <= INIT; + end else begin + prev <= data_in; + end + end + + if (POS != 0) begin : g_pos + assign data_out = data_in & ~prev; + end else begin : g_neg + assign data_out = ~data_in & prev; + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_scope_switch.sv b/hw/rtl/libs/VX_scope_switch.sv index da5f13bcb9..2e964f31ef 100644 --- a/hw/rtl/libs/VX_scope_switch.sv +++ b/hw/rtl/libs/VX_scope_switch.sv @@ -20,8 +20,8 @@ module VX_scope_switch #( input wire clk, input wire reset, input wire req_in, - output wire req_out [N], - input wire rsp_in [N], + output wire [N-1:0] req_out, + input wire [N-1:0] rsp_in, output wire rsp_out ); if (N > 1) begin : g_switch @@ -46,7 +46,10 @@ module VX_scope_switch #( end end - assign req_out = req_out_r; + for (genvar i = 0; i < N; ++i) begin : g_req_out + assign req_out[i] = req_out_r[i]; + end + assign rsp_out = rsp_out_r; end else begin : g_passthru diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index b1977d388a..8b6eee65e1 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -33,12 +33,13 @@ module VX_scope_tap #( output wire bus_out ); localparam CTR_WIDTH = 64; - localparam DATA_IDX_WISTH = `LOG2UP(TX_DATAW); + localparam SER_CTR_WIDTH = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + TRIGGERW; localparam ADDRW = `CLOG2(DEPTH); + localparam SIZEW = `CLOG2(DEPTH+1); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; localparam DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); - localparam BLOCK_IDX_WISTH = `LOG2UP(DATA_BLOCKS); + localparam BLOCK_IDX_WIDTH = `LOG2UP(DATA_BLOCKS); localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; @@ -47,8 +48,8 @@ module VX_scope_tap #( localparam CTRL_STATE_BITS = 2; localparam TAP_STATE_IDLE = 2'd0; - localparam TAP_STATE_WAIT = 2'd1; - localparam TAP_STATE_RUN = 2'd2; + localparam TAP_STATE_RUN = 2'd1; + localparam TAP_STATE_DONE = 2'd2; localparam TAP_STATE_BITS = 2; localparam CMD_GET_WIDTH = 3'd0; @@ -57,13 +58,14 @@ module VX_scope_tap #( localparam CMD_GET_DATA = 3'd3; localparam CMD_SET_START = 3'd4; localparam CMD_SET_STOP = 3'd5; + localparam CMD_SET_DEPTH = 3'd6; localparam CMD_TYPE_BITS = 3; - localparam SEND_TYPE_WIDTH = 2'd0; - localparam SEND_TYPE_COUNT = 2'd1; - localparam SEND_TYPE_START = 2'd2; - localparam SEND_TYPE_DATA = 2'd3; - localparam SEND_TYPE_BITS = 2; + localparam SEND_TYPE_WIDTH = 2'd0; + localparam SEND_TYPE_COUNT = 2'd1; + localparam SEND_TYPE_START = 2'd2; + localparam SEND_TYPE_DATA = 2'd3; + localparam SEND_TYPE_BITS = 2; `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) @@ -73,12 +75,13 @@ module VX_scope_tap #( reg [SEND_TYPE_BITS-1:0] send_type; reg [CTR_WIDTH-1:0] timestamp, start_time; - reg [CTR_WIDTH-1:0] start_delay, delay_cntr; + reg [CTR_WIDTH-1:0] start_delay, stop_delay; reg [`UP(TRIGGERW)-1:0] prev_trig; reg [IDLE_CTRW-1:0] delta; - reg cmd_start, dflush; + reg cmd_start, cmd_stop; + reg dflush; - reg [ADDRW-1:0] waddr, waddr_end; + reg [SIZEW-1:0] waddr, waddr_end; wire [DATAW-1:0] data_in; wire write_en; @@ -105,7 +108,7 @@ module VX_scope_tap #( .read (1'b1), .wren (1'b1), .write (write_en), - .waddr (waddr), + .waddr (waddr[ADDRW-1:0]), .wdata (delta), .raddr (raddr), .rdata (delta_value) @@ -128,7 +131,7 @@ module VX_scope_tap #( .read (1'b1), .wren (1'b1), .write (write_en), - .waddr (waddr), + .waddr (waddr[ADDRW-1:0]), .wdata (data_in), .raddr (raddr), .rdata (data_value) @@ -144,35 +147,16 @@ module VX_scope_tap #( always @(posedge clk) begin if (reset) begin - tap_state <= TAP_STATE_IDLE; - delta <= '0; - dflush <= 0; - prev_trig <= '0; - waddr <= '0; + tap_state <= TAP_STATE_IDLE; + delta <= '0; + dflush <= 0; + prev_trig <= '0; + waddr <= '0; end else begin case (tap_state) TAP_STATE_IDLE: begin if (start || cmd_start) begin - delta <= '0; - dflush <= 1; - if (0 == start_delay) begin - tap_state <= TAP_STATE_RUN; - start_time <= timestamp; - `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: scope_tap%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) - `endif - end else begin - tap_state <= TAP_STATE_WAIT; - delay_cntr <= start_delay; - `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: scope_tap%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) - `endif - end - end - end - TAP_STATE_WAIT: begin - delay_cntr <= delay_cntr - CTR_WIDTH'(1); - if (1 == delay_cntr) begin + dflush <= 1; tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE @@ -182,10 +166,10 @@ module VX_scope_tap #( end TAP_STATE_RUN: begin dflush <= 0; - if (!stop && (waddr < waddr_end)) begin + if (!(stop || cmd_stop) && (waddr < waddr_end)) begin if (TRIGGERW != 0) begin if (dflush || (triggers != prev_trig)) begin - waddr <= waddr + ADDRW'(1); + waddr <= waddr + SIZEW'(1); delta <= '0; end else begin delta <= delta + IDLE_CTRW'(1); @@ -193,10 +177,10 @@ module VX_scope_tap #( end prev_trig <= triggers; end else begin - waddr <= waddr + ADDRW'(1); + waddr <= waddr + SIZEW'(1); end end else begin - tap_state <= TAP_STATE_IDLE; + tap_state <= TAP_STATE_DONE; `ifdef DBG_TRACE_SCOPE `TRACE(2, ("%t: scope_tap%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif @@ -218,8 +202,8 @@ module VX_scope_tap #( `UNUSED_VAR (ser_buf_in) wire [DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; - logic [BLOCK_IDX_WISTH-1:0] data_block_idx; - reg [DATA_IDX_WISTH-1:0] ser_tx_ctr; + logic [BLOCK_IDX_WIDTH-1:0] data_block_idx; + reg [SER_CTR_WIDTH-1:0] ser_tx_ctr; reg is_read_data; reg is_get_data; @@ -246,8 +230,8 @@ module VX_scope_tap #( && (send_type == SEND_TYPE_DATA) && (ser_tx_ctr == 0) && is_read_data) begin - if (data_block_idx < BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin - data_block_idx <= data_block_idx + BLOCK_IDX_WISTH'(1); + if (data_block_idx < BLOCK_IDX_WIDTH'(DATA_BLOCKS-1)) begin + data_block_idx <= data_block_idx + BLOCK_IDX_WIDTH'(1); end else begin data_block_idx <= '0; end @@ -257,15 +241,15 @@ module VX_scope_tap #( assign data_block_idx = 0; end - wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); - always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; send_type <= SEND_TYPE_BITS'(SEND_TYPE_WIDTH); - waddr_end <= ADDRW'(DEPTH-1); + waddr_end <= SIZEW'(DEPTH); cmd_start <= 0; + cmd_stop <= 0; start_delay <= '0; + stop_delay <= '0; bus_out_r <= 0; raddr <= '0; is_read_data<= 0; @@ -273,17 +257,28 @@ module VX_scope_tap #( is_get_data <= 0; end else begin bus_out_r <= 0; - cmd_start <= 0; is_get_data <= 0; + + if (start_delay != 0) begin + start_delay <= start_delay - CTR_WIDTH'(1); + end + + if (stop_delay != 0) begin + stop_delay <= stop_delay - CTR_WIDTH'(1); + end + + cmd_start <= (start_delay == CTR_WIDTH'(1)); + cmd_stop <= (stop_delay == CTR_WIDTH'(1)); + case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin - ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); + ser_tx_ctr <= SER_CTR_WIDTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_RECV; end end CTRL_STATE_RECV: begin - ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); + ser_tx_ctr <= ser_tx_ctr - SER_CTR_WIDTH'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin // check if command is for this scope @@ -294,18 +289,22 @@ module VX_scope_tap #( ctrl_state <= CTRL_STATE_IDLE; case (cmd_type) CMD_SET_START: begin - start_delay <= 64'(cmd_data); - cmd_start <= 1; + start_delay <= CTR_WIDTH'(cmd_data); + cmd_start <= (cmd_data == 0); end CMD_SET_STOP: begin - waddr_end <= ADDRW'(cmd_data); + stop_delay <= CTR_WIDTH'(cmd_data); + cmd_stop <= (cmd_data == 0); + end + CMD_SET_DEPTH: begin + waddr_end <= SIZEW'(cmd_data); end CMD_GET_WIDTH, CMD_GET_START, CMD_GET_COUNT, CMD_GET_DATA: begin - send_type <= SEND_TYPE_BITS'(cmd_type); - ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); + send_type <= SEND_TYPE_BITS'(cmd_type); + ser_tx_ctr <= SER_CTR_WIDTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_SEND; bus_out_r <= 1; end @@ -345,12 +344,9 @@ module VX_scope_tap #( is_get_data <= 1; if (ser_tx_ctr == 0) begin if (is_read_data) begin - if (data_block_idx == BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin - raddr <= raddr_n; + if (data_block_idx == BLOCK_IDX_WIDTH'(DATA_BLOCKS-1)) begin + raddr <= raddr + ADDRW'(1); is_read_data <= 0; // switch to delta mode - if (raddr_n == waddr) begin - raddr <= 0; // end-of-samples reset - end end end else begin is_read_data <= 1; // switch to data mode @@ -368,7 +364,7 @@ module VX_scope_tap #( end default:; endcase - ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); + ser_tx_ctr <= ser_tx_ctr - SER_CTR_WIDTH'(1); if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; end @@ -378,12 +374,12 @@ module VX_scope_tap #( end end - wire [BLOCK_IDX_WISTH-1:0] data_block_idx_r; - wire [DATA_IDX_WISTH-1:0] ser_tx_ctr_r; + wire [BLOCK_IDX_WIDTH-1:0] data_block_idx_r; + wire [SER_CTR_WIDTH-1:0] ser_tx_ctr_r; wire is_read_data_r; VX_pipe_register #( - .DATAW (1 + DATA_IDX_WISTH + BLOCK_IDX_WISTH) + .DATAW (1 + SER_CTR_WIDTH + BLOCK_IDX_WIDTH) ) data_sel_buf ( .clk (clk), .reset (reset), diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 820fa54f5e..361a327caf 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -32,6 +32,8 @@ #define TIMEOUT_TIME (60*60) +#define MAX_DELAY_CYCLES 10000 + #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -41,6 +43,7 @@ #define CMD_GET_DATA 3 #define CMD_SET_START 4 #define CMD_SET_STOP 5 +#define CMD_SET_DEPTH 6 #define CHECK_ERR(_expr) \ do { \ @@ -96,7 +99,7 @@ static void dump_module(std::ofstream& ofs, auto itt = tails.find(name); if (itt != tails.end()) { for (auto& signal : itt->second->signals) { - ofs << indent << " $var reg " << signal.width << " " << signal.id << " " << signal.name << " $end" << std::endl; + ofs << indent << " $var wire " << signal.width << " " << signal.id << " " << signal.name << " $end" << std::endl; } } @@ -114,7 +117,7 @@ static void dump_header(std::ofstream& ofs, std::vector& taps) { ofs << "$version Generated by Vortex Scope Analyzer $end" << std::endl; ofs << "$timescale 1 ns $end" << std::endl; ofs << "$scope module TOP $end" << std::endl; - ofs << " $var reg 1 0 clk $end" << std::endl; + ofs << " $var wire 1 0 clk $end" << std::endl; std::unordered_map> hierarchy; std::unordered_set heads; @@ -160,6 +163,14 @@ static tap_t* find_earliest_tap(std::vector& taps) { } static uint64_t advance_clock(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { + uint64_t delta = next_time - cur_time; + if (delta > MAX_DELAY_CYCLES) { + ofs << '#' << (cur_time * 2 + 0) << std::endl; + ofs << "bx 0" << std::endl; + ofs << '#' << (cur_time * 2 + 1) << std::endl; + ofs << "bx 0" << std::endl; + cur_time = next_time - MAX_DELAY_CYCLES; + } while (cur_time < next_time) { ofs << '#' << (cur_time * 2 + 0) << std::endl; ofs << "b0 0" << std::endl; @@ -350,7 +361,6 @@ int vx_scope_stop(vx_device_h hdevice) { uint64_t cmd_count = (tap.id << 3) | CMD_GET_COUNT; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_count)); CHECK_ERR(g_callback.registerRead(hdevice, &count)); - if (count == 0) continue; @@ -385,7 +395,6 @@ int vx_scope_stop(vx_device_h hdevice) { uint64_t cur_time = 0; auto tap = find_earliest_tap(taps); if (tap != nullptr) { - cur_time = (tap->cycle_time > 0) ? (tap->cycle_time-1) : 0; do { // advance clock cur_time = advance_clock(ofs, cur_time, tap->cycle_time); diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index f06f34bea8..a7f77ee970 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -195,7 +195,7 @@ class vx_device { return device->api_.fpgaReadMMIO64(device->fpga_, 0, MMIO_SCOPE_READ, value); }; - CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { + CHECK_ERR(vx_scope_start(&callback, this, -1, -1), { api_.fpgaClose(fpga_); return err; }); diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index d542e72fe2..ffc7870d48 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -265,7 +265,7 @@ class vx_device { *value = (((uint64_t)value_hi) << 32) | value_lo; return 0; }; - CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { + CHECK_ERR(vx_scope_start(&callback, this, -1, -1), { return err; }); } From 27543e240edd5763df0c275db9f1bf19ef2380de Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 25 Sep 2024 19:11:40 -0700 Subject: [PATCH 227/407] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 27 +++++++++------------------ hw/rtl/core/VX_fetch.sv | 15 +++++++-------- hw/rtl/core/VX_issue_slice.sv | 14 +++++--------- hw/rtl/core/VX_lsu_slice.sv | 10 +++++----- 4 files changed, 26 insertions(+), 40 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 38994c1c5d..ff5ce4179a 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1012,11 +1012,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef DBG_SCOPE_AFU - wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; - wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; - wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready; - wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready; - reg [STATE_WIDTH-1:0] state_prev; always @(posedge clk) begin state_prev <= state; @@ -1028,12 +1023,15 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `SCOPE_TAP (0, 0, { vx_reset, vx_busy, - vx_mem_req_fire, - vx_mem_rsp_fire, + vx_mem_req_valid, + vx_mem_req_ready, + vx_mem_rsp_valid, + vx_mem_rsp_ready, vx_dcr_wr_valid, state_changed, - avs_write_fire, - avs_read_fire, + avs_read[0], + avs_write[0], + avs_waitrequest[0], avs_waitrequest[0], avs_readdatavalid[0], cp2af_sRxPort.c0.mmioRdValid, @@ -1044,14 +1042,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ af2cp_sTxPort.c1.valid, cp2af_sRxPort.c0TxAlmFull, cp2af_sRxPort.c1TxAlmFull, - af2cp_sTxPort.c2.mmioRdValid, - cci_wr_req_fire, - cci_wr_rsp_fire, - cci_rd_req_fire, - cci_rd_rsp_fire, - cci_pending_reads_full, - cci_pending_writes_empty, - cci_pending_writes_full + af2cp_sTxPort.c2.mmioRdValid },{ cmd_type, state, @@ -1081,7 +1072,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED(0) + `SCOPE_IO_UNUSED_W(0) `endif /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index baeb152f21..d96ef7abdc 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -41,11 +41,7 @@ module VX_fetch import VX_gpu_pkg::*; #( wire [`UUID_WIDTH-1:0] rsp_uuid; wire [`NW_WIDTH-1:0] req_tag, rsp_tag; - wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_req_fire = icache_req_valid && icache_req_ready; - wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; - `UNUSED_VAR (schedule_fire) - `UNUSED_VAR (icache_rsp_fire) assign req_tag = schedule_if.data.wid; @@ -139,13 +135,16 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_FETCH `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 1, 3, ( + `SCOPE_TAP_EX (0, 1, 6, ( `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH ), { - schedule_fire, - icache_req_fire, - icache_rsp_fire + schedule_if.valid, + schedule_if.ready, + icache_bus_if.req_valid, + icache_bus_if.req_ready, + icache_bus_if.rsp_valid, + icache_bus_if.rsp_ready }, { schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index a496af8e39..583967cc8e 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -36,11 +36,6 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_scoreboard_if scoreboard_if(); VX_operands_if operands_if(); - wire operands_if_fire = operands_if.valid && operands_if.ready; - wire writeback_if_valid = writeback_if.valid; - `UNUSED_VAR (operands_if_fire) - `UNUSED_VAR (writeback_if_valid) - VX_ibuffer #( .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) ) ibuffer ( @@ -97,13 +92,14 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_ISSUE `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 2, 2, ( + `SCOPE_TAP_EX (0, 2, 3, ( `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 ), { - operands_if_fire, - writeback_if_valid + operands_if.valid, + operands_if.ready, + writeback_if.valid }, { operands_if.data.uuid, operands_if.data.tmask, @@ -138,7 +134,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin - if (operands_if_fire) begin + if (operands_if.valid && operands_if.ready) begin `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 962bcd70cd..4ca88c7b31 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -102,8 +102,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire mem_req_fire = mem_req_valid && mem_req_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; - `UNUSED_VAR (mem_req_fire) - `UNUSED_VAR (mem_rsp_fire) wire mem_rsp_sop_pkt, mem_rsp_eop_pkt; wire no_rsp_buf_valid, no_rsp_buf_ready; @@ -538,11 +536,13 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 3, 2, ( + `SCOPE_TAP_EX (0, 3, 4, ( 1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH ), { - mem_req_fire, - mem_rsp_fire + mem_req_valid, + mem_req_ready, + mem_rsp_valid, + mem_rsp_ready }, { mem_req_rw, full_addr, From 9a3eb7405188ef6b391ec67aa0d5a563aee5d62f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 26 Sep 2024 09:50:38 -0700 Subject: [PATCH 228/407] adding scope.py support for structs --- hw/rtl/VX_scope.vh | 8 +-- hw/rtl/afu/opae/vortex_afu.sv | 1 - hw/scripts/scope.py | 115 ++++++++++++++++++++++++---------- 3 files changed, 87 insertions(+), 37 deletions(-) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index b88a2718b8..43ad91e859 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -38,12 +38,12 @@ VX_scope_switch #( \ .N (__count) \ ) scope_switch ( \ - .clk (clk), \ - .reset (scope_reset), \ - .req_in (scope_bus_in), \ + .clk (clk), \ + .reset (scope_reset), \ + .req_in (scope_bus_in), \ .rsp_out (scope_bus_out), \ .req_out (scope_bus_in_w), \ - .rsp_in (scope_bus_out_w) \ + .rsp_in (scope_bus_out_w) \ ) `define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __start, __stop, __depth) \ diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index ff5ce4179a..37afa93262 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1032,7 +1032,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ avs_read[0], avs_write[0], avs_waitrequest[0], - avs_waitrequest[0], avs_readdatavalid[0], cp2af_sRxPort.c0.mmioRdValid, cp2af_sRxPort.c0.mmioWrValid, diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 5361e8afe4..931371643d 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,9 +19,9 @@ import re import json -vl_int_re = re.compile(r"\d+'s*h([\da-fA-F]+)") +vl_int_re = re.compile(r"\d+'s*h([\da-fA-F]+)") -def parse_vl_int(text): +def parse_vl_int(text): str_hex = re.sub(vl_int_re, r'\1', text) return int(str_hex, 16) @@ -34,15 +34,17 @@ def source_loc(xml_doc, xml_loc): end_col = loc[4] file = xml_doc.find(".//file/[@id='" + file_id + "']").get("filename") return file + " (" + start_line + ":" + start_col + "-" + end_line + ":" + end_col + ")" - + def parse_dtype_width(xml_doc, dtype_id): xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") - if xml_type.tag == "packarraydtype" or xml_type.tag == "unpackarraydtype": + if xml_type.tag in ["packarraydtype", "unpackarraydtype"]: sub_dtype_id = xml_type.get("sub_dtype_id") base_width = parse_dtype_width(xml_doc, sub_dtype_id) - const = xml_type.iter("const") - left = parse_vl_int(next(const).get("name")) - right = parse_vl_int(next(const).get("name")) + const_iter = xml_type.iter("const") + first_const = next(const_iter) + second_const = next(const_iter) + left = parse_vl_int(first_const.get("name")) + right = parse_vl_int(second_const.get("name")) return base_width * (left - right + 1) elif xml_type.tag == "structdtype": width = 0 @@ -65,31 +67,74 @@ def parse_dtype_width(xml_doc, dtype_id): if left != None and right != None: return int(left) - int(right) + 1 return 1 - + def parse_var_name(xml_doc, xml_node): if xml_node.tag == "varref": return xml_node.get("name") elif xml_node.tag == "varxref": name = xml_node.get("name") dotted = xml_node.get("dotted") - return dotted + '.' + name + return f"{dotted}.{name}" else: raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc"))) return name +def parse_sel_field(xml_doc, dtype_id, offset, width): + xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") + name = xml_type.get("name") + if xml_type.tag == "structdtype": + bit_offset = 0 + members = list(xml_type.findall("memberdtype")) + members.reverse() + for member in members: + sub_dtype_id = member.get("sub_dtype_id") + member_name = member.get("name") + member_width = parse_dtype_width(xml_doc, sub_dtype_id) + if bit_offset <= offset < bit_offset + member_width: + if sub_dtype_id: + sub_field = parse_sel_field(xml_doc, sub_dtype_id, offset - bit_offset, width) + return f".{member_name}{sub_field}" + else: + return f".{member_name}" + bit_offset += member_width + raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) + elif xml_type.tag in ["packarraydtype", "unpackarraydtype"]: + sub_dtype_id = xml_type.get("sub_dtype_id") + base_width = parse_dtype_width(xml_doc, sub_dtype_id) + if width > base_width: + return "" + array_index = offset // base_width + sub_offset = offset % base_width + array_sel_name = f"[{array_index}]" + sub_field = parse_sel_field(xml_doc, sub_dtype_id, sub_offset, width) + return f"{array_sel_name}{sub_field}" + elif xml_type.tag == "basicdtype": + if (offset == 0): + return "" + return f"_{offset}" + else: + raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) + return None + def parse_sel_name(xml_doc, xml_node): - name = parse_var_name(xml_doc, xml_node.find("*")) - const = xml_node.iter("const") - offset = parse_vl_int(next(const).get("name")) - #size = parse_vl_int(next(const).get("name")) - return name + '_' + str(offset) + first_child = xml_node.find("*") + name = parse_var_name(xml_doc, first_child) + dtype_id = first_child.get("dtype_id") + const_iter = xml_node.iter("const") + first_const = next(const_iter) + second_const = next(const_iter) + offset = parse_vl_int(first_const.get("name")) + width = parse_vl_int(second_const.get("name")) + return name + parse_sel_field(xml_doc, dtype_id, offset, width) -def parse_array_name(xml_doc, xml_node): +def parse_arraysel_name(xml_doc, xml_node): if xml_node.tag == "arraysel": - name = parse_array_name(xml_doc, xml_node.find("*")) - xml_size = xml_node.find("const").get("name") - array_size = parse_vl_int(xml_size) - name = name + '_' + str(array_size) + first_child = xml_node.find("*") + name = parse_arraysel_name(xml_doc, first_child) + const_iter = xml_node.iter("const") + first_const = next(const_iter) + offset = parse_vl_int(first_const.get("name")) + name = f"{name}[{offset}]" else: name = parse_var_name(xml_doc, xml_node) return name @@ -97,9 +142,10 @@ def parse_array_name(xml_doc, xml_node): def parse_vl_port(xml_doc, xml_node, signals): total_width = 0 if xml_node.tag == "concat": - for xml_child in xml_node.findall("*"): + child_nodes = xml_node.findall("*") + for xml_child in child_nodes: total_width = total_width + parse_vl_port(xml_doc, xml_child, signals) - elif xml_node.tag == "varref" or xml_node.tag == "varxref": + elif xml_node.tag in ["varref", "varxref"]: name = parse_var_name(xml_doc, xml_node) dtype_id = xml_node.get("dtype_id") signal_width = parse_dtype_width(xml_doc, dtype_id) @@ -112,20 +158,25 @@ def parse_vl_port(xml_doc, xml_node, signals): signals.append([name, signal_width]) total_width = total_width + signal_width elif xml_node.tag == "arraysel": - name = parse_array_name(xml_doc, xml_node) + name = parse_arraysel_name(xml_doc, xml_node) dtype_id = xml_node.get("dtype_id") signal_width = parse_dtype_width(xml_doc, dtype_id) signals.append([name, signal_width]) total_width = total_width + signal_width else: raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_node.get("loc"))) + # Check for duplicate signal names + signal_names = [signal[0] for signal in signals] + duplicates = set([name for name in signal_names if signal_names.count(name) > 1]) + if len(duplicates) > 0: + raise ET.ParseError("duplicate signal names: " + ", ".join(duplicates)) return total_width def parse_xml(filename, max_taps): xml_doc = ET.parse(filename) modules = {} xml_modules = xml_doc.findall(".//module/[@origName='VX_scope_tap']") - for xml_module in xml_modules: + for xml_module in xml_modules: scope_id = parse_vl_int(xml_module.find(".//var/[@name='SCOPE_ID']/const").get("name")) triggerw = parse_vl_int(xml_module.find(".//var/[@name='TRIGGERW']/const").get("name")) probew = parse_vl_int(xml_module.find(".//var/[@name='PROBEW']/const").get("name")) @@ -133,16 +184,16 @@ def parse_xml(filename, max_taps): modules[module_name] = [scope_id, triggerw, probew] taps = [] - xml_instances = xml_doc.iter("instance") - for xml_instance in xml_instances: + xml_instances = xml_doc.iter("instance") + for xml_instance in xml_instances: if (max_taps != -1 and len(taps) >= max_taps): - break + break defName = xml_instance.get("defName") module = modules.get(defName) if module is None: continue triggers = [] - probes = [] + probes = [] w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='triggers']/*"), triggers) if w != module[1]: raise ET.ParseError("invalid triggers width: actual=" + str(w) + ", expected=" + str(module[1])) @@ -157,19 +208,19 @@ def parse_xml(filename, max_taps): path = hier.rsplit(".", 1)[0] taps.append({"id":module[0], "width":module[1] + module[2], - "signals":signals, + "signals":signals, "path":path}) return {"version":"0.1.0", "taps":taps} -def main(): +def main(): parser = argparse.ArgumentParser(description='Scope headers generator.') parser.add_argument('-o', nargs='?', default='scope.json', metavar='o', help='Output JSON manifest') parser.add_argument('-n', nargs='?', default=-1, metavar='n', type=int, help='Maximum number of taps to read') parser.add_argument('xml', help='Design XML descriptor file') args = parser.parse_args() #print("args=", args) - scope_taps = parse_xml(args.xml, args.n) + scope_taps = parse_xml(args.xml, args.n) with open(args.o, "w") as f: json.dump(scope_taps, f, ensure_ascii=False, indent=4) From 5db1937a5efa45fc71a6a413ed8bbd1b72914aa6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 07:52:38 -0700 Subject: [PATCH 229/407] fixed scope parser array indexing --- hw/scripts/scope.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 931371643d..db5fda1a90 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -33,7 +33,7 @@ def source_loc(xml_doc, xml_loc): end_line = loc[3] end_col = loc[4] file = xml_doc.find(".//file/[@id='" + file_id + "']").get("filename") - return file + " (" + start_line + ":" + start_col + "-" + end_line + ":" + end_col + ")" + return f"{file} ({start_line}:{start_col}-{end_line}:{end_col})" def parse_dtype_width(xml_doc, dtype_id): xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") @@ -75,6 +75,8 @@ def parse_var_name(xml_doc, xml_node): name = xml_node.get("name") dotted = xml_node.get("dotted") return f"{dotted}.{name}" + elif xml_node.tag == "arraysel": + return parse_arraysel_name(xml_doc, xml_node) else: raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc"))) return name @@ -91,7 +93,7 @@ def parse_sel_field(xml_doc, dtype_id, offset, width): member_name = member.get("name") member_width = parse_dtype_width(xml_doc, sub_dtype_id) if bit_offset <= offset < bit_offset + member_width: - if sub_dtype_id: + if width != member_width and sub_dtype_id: sub_field = parse_sel_field(xml_doc, sub_dtype_id, offset - bit_offset, width) return f".{member_name}{sub_field}" else: @@ -105,13 +107,14 @@ def parse_sel_field(xml_doc, dtype_id, offset, width): return "" array_index = offset // base_width sub_offset = offset % base_width - array_sel_name = f"[{array_index}]" + array_sel_name = f"_{array_index}" # array indexing is not supported in VCD sub_field = parse_sel_field(xml_doc, sub_dtype_id, sub_offset, width) return f"{array_sel_name}{sub_field}" elif xml_type.tag == "basicdtype": - if (offset == 0): - return "" - return f"_{offset}" + if width == 1: + return F"[{offset}]" + end = width - 1 + offset + return F"[{end}:{offset}]" else: raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) return None @@ -134,7 +137,7 @@ def parse_arraysel_name(xml_doc, xml_node): const_iter = xml_node.iter("const") first_const = next(const_iter) offset = parse_vl_int(first_const.get("name")) - name = f"{name}[{offset}]" + name = f"{name}_{offset}" # array indexing is not supported in VCD else: name = parse_var_name(xml_doc, xml_node) return name From e9f19a0bf9fbfdcf16d2f5a2f3235c858346b412 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 09:13:24 -0700 Subject: [PATCH 230/407] fixed BRAM multi-dimensional array bug on Xilinx Vivado --- hw/rtl/libs/VX_mem_scheduler.sv | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 913656bf8f..ef41a89f03 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -460,14 +460,15 @@ module VX_mem_scheduler #( end else begin : g_rsp_full reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES-1:00][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; + integer k = (rsp_batch_idx * CORE_CHANNELS * WORD_WIDTH) + (i * WORD_WIDTH); + rsp_store_n[k +: WORD_WIDTH] = mem_rsp_data_s[i]; end end end @@ -488,7 +489,8 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[i][j]; + localparam k = (i * CORE_CHANNELS * WORD_WIDTH) + (j * WORD_WIDTH); + assign crsp_data[r] = rsp_store_n[k +: WORD_WIDTH]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From 533ddffc476caef4673b99bd77b5d42d4f8d546f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 09:48:05 -0700 Subject: [PATCH 231/407] cleanup multi-dimensional array to improve synthesis compatibility --- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/fpu/VX_fpu_fpnew.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 22 +++++++++++++--------- hw/rtl/libs/VX_mem_scheduler.sv | 4 ++-- hw/rtl/libs/VX_pe_serializer.sv | 2 +- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index f306812639..42a91e4c24 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -69,7 +69,7 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st2, src_data_m_st2; + reg [NUM_SRC_OPDS-1:0][(`NUM_THREADS * `XLEN)-1:0] src_data_st2, src_data_m_st2; reg [NUM_SRC_OPDS-1:0] data_fetched_st1; diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 15a6c8d52c..596a86513e 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -90,7 +90,7 @@ module VX_fpu_fpnew reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out; - reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands; + logic [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands; wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result; fpnew_pkg::status_t fpu_status; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 55cad2df7d..c27f04da4d 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -154,23 +154,27 @@ module VX_mem_coalescer #( wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r; - reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; - reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged; - - always @(*) begin - req_byteen_merged = '0; - req_data_merged = 'x; - for (integer i = 0; i < OUT_REQS; ++i) begin + wire [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; + wire [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged; + + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_data_merged + reg [DATA_RATIO-1:0][DATA_IN_SIZE-1:0] byteen_merged; + reg [DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] data_merged; + always @(*) begin + byteen_merged = '0; + data_merged = 'x; for (integer j = 0; j < DATA_RATIO; ++j) begin for (integer k = 0; k < DATA_IN_SIZE; ++k) begin // perform byte-level merge since each thread may have different bytes enabled if (current_pmask[i * DATA_RATIO + j] && in_req_byteen[DATA_RATIO * i + j][k]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; + byteen_merged[in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + data_merged[in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; end end end end + assign req_byteen_merged[i] = byteen_merged; + assign req_data_merged[i] = data_merged; end wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & req_rem_mask_r)); diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index ef41a89f03..3d6884f1a3 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,8 +459,8 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility + reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 58fced4103..4a66a63991 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -105,7 +105,7 @@ module VX_pe_serializer #( end end - reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r, data_out_n; + reg [BATCH_SIZE-1:0][(NUM_PES * DATA_OUT_WIDTH)-1:0] data_out_r, data_out_n; always @(*) begin data_out_n = data_out_r; From f2c970868e9788cddf789d92f389cfb4ae24955e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 10:02:59 -0700 Subject: [PATCH 232/407] minor update --- hw/rtl/libs/VX_mem_scheduler.sv | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 3d6884f1a3..73647911a9 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,16 +459,16 @@ module VX_mem_scheduler #( end else begin : g_rsp_full + // use flattened arrays for BRAM synthesis compatibility reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility + reg [(CORE_BATCHES * CORE_CHANNELS)-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - integer k = (rsp_batch_idx * CORE_CHANNELS * WORD_WIDTH) + (i * WORD_WIDTH); - rsp_store_n[k +: WORD_WIDTH] = mem_rsp_data_s[i]; + rsp_store_n[rsp_batch_idx * CORE_CHANNELS + i] = mem_rsp_data_s[i]; end end end @@ -489,8 +489,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - localparam k = (i * CORE_CHANNELS * WORD_WIDTH) + (j * WORD_WIDTH); - assign crsp_data[r] = rsp_store_n[k +: WORD_WIDTH]; + assign crsp_data[r] = rsp_store_n[i * CORE_CHANNELS + j]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From 6e401620279ca89863199df85d777c3c5487932d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 11:36:31 -0700 Subject: [PATCH 233/407] extending scope triggering to capture continous firing events --- hw/rtl/VX_scope.vh | 16 ++++++++------- hw/rtl/afu/opae/vortex_afu.sv | 28 +++++++++++++++++--------- hw/rtl/afu/xrt/VX_afu_wrap.sv | 12 +++++++++++- hw/rtl/core/VX_fetch.sv | 9 ++++++++- hw/rtl/core/VX_issue_slice.sv | 9 ++++++--- hw/rtl/core/VX_lsu_slice.sv | 5 ++++- hw/rtl/libs/VX_scope_tap.sv | 35 +++++++++++++++++++++------------ hw/scripts/scope.py | 37 ++++++++++++++++++++++++----------- 8 files changed, 105 insertions(+), 46 deletions(-) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 43ad91e859..b3d427ede3 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -46,10 +46,11 @@ .rsp_in (scope_bus_out_w) \ ) -`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __start, __stop, __depth) \ +`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __htriggers_w, __probes_w, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \ VX_scope_tap #( \ .SCOPE_ID (__id), \ - .TRIGGERW (__triggers_w), \ + .XTRIGGERW(__xtriggers_w), \ + .HTRIGGERW(__htriggers_w), \ .PROBEW (__probes_w), \ .DEPTH (__depth) \ ) scope_tap_``idx ( \ @@ -57,14 +58,15 @@ .reset (scope_reset_w[__idx]), \ .start (__start), \ .stop (__stop), \ - .triggers(__triggers), \ + .xtriggers(__xtriggers), \ + .htriggers(__htriggers), \ .probes (__probes), \ .bus_in (scope_bus_in_w[__idx]), \ .bus_out(scope_bus_out_w[__idx]) \ ) -`define SCOPE_TAP(__idx, __id, __triggers, __probes, __start, __stop, __depth) \ - `SCOPE_TAP_EX(__idx, __id, $bits(__triggers), $bits(__probes), __triggers, __probes, __start, __stop, __depth) +`define SCOPE_TAP(__idx, __id, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \ + `SCOPE_TAP_EX(__idx, __id, $bits(__xtriggers), $bits(__htriggers), $bits(__probes), __xtriggers, __htriggers, __probes, __start, __stop, __depth) `else @@ -76,9 +78,9 @@ `define SCOPE_IO_SWITCH(__count) -`define SCOPE_TAP(__idx, __id, __triggers, __probes, __depth) +`define SCOPE_TAP(__idx, __id, __xtriggers, __probes, __depth) -`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __depth) +`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __probes_w, __xtriggers, __probes, __depth) `endif diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 37afa93262..b872efa977 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1016,10 +1016,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin state_prev <= state; end - wire state_changed = (state != state_prev); + wire state_changed = (state != state_prev); + wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready; + wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready; + wire avs_req_fire = (avs_write[0] || avs_read[0]) && ~avs_waitrequest[0]; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP (0, 0, { vx_reset, vx_busy, @@ -1027,21 +1029,29 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ vx_mem_req_ready, vx_mem_rsp_valid, vx_mem_rsp_ready, - vx_dcr_wr_valid, - state_changed, avs_read[0], avs_write[0], avs_waitrequest[0], - avs_readdatavalid[0], - cp2af_sRxPort.c0.mmioRdValid, - cp2af_sRxPort.c0.mmioWrValid, cp2af_sRxPort.c0.rspValid, cp2af_sRxPort.c1.rspValid, af2cp_sTxPort.c0.valid, af2cp_sTxPort.c1.valid, cp2af_sRxPort.c0TxAlmFull, - cp2af_sRxPort.c1TxAlmFull, - af2cp_sTxPort.c2.mmioRdValid + cp2af_sRxPort.c1TxAlmFull + },{ + state_changed, + vx_dcr_wr_valid, // ack-free + avs_readdatavalid[0], // ack-free + cp2af_sRxPort.c0.mmioRdValid, // ack-free + cp2af_sRxPort.c0.mmioWrValid, // ack-free + af2cp_sTxPort.c2.mmioRdValid, // ack-free + cp2af_sRxPort.c0.rspValid, // ack-free + cp2af_sRxPort.c1.rspValid, // ack-free + cci_rd_req_fire, + cci_wr_req_fire, + avs_req_fire, + vx_mem_req_fire, + vx_mem_rsp_fire },{ cmd_type, state, diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 73da63e585..c4ff50d45a 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -309,6 +309,11 @@ module VX_afu_wrap #( `ifdef SCOPE `ifdef DBG_SCOPE_AFU + wire m_axi_mem_awfire_0 = m_axi_mem_awvalid_a[0] & m_axi_mem_awready_a[0]; + wire m_axi_mem_arfire_0 = m_axi_mem_arvalid_a[0] & m_axi_mem_arready_a[0]; + wire m_axi_mem_wfire_0 = m_axi_mem_wvalid_a[0] & m_axi_mem_wready_a[0]; + wire m_axi_mem_bfire_0 = m_axi_mem_bvalid_a[0] & m_axi_mem_bready_a[0]; + `NEG_EDGE (reset_negedge, reset); `SCOPE_TAP (0, 0, { ap_reset, @@ -318,7 +323,6 @@ module VX_afu_wrap #( interrupt, vx_reset, vx_busy, - dcr_wr_valid, m_axi_mem_awvalid_a[0], m_axi_mem_awready_a[0], m_axi_mem_wvalid_a[0], @@ -330,6 +334,12 @@ module VX_afu_wrap #( m_axi_mem_rvalid_a[0], m_axi_mem_rready_a[0] }, { + dcr_wr_valid, + m_axi_mem_awfire_0, + m_axi_mem_arfire_0, + m_axi_mem_wfire_0, + m_axi_mem_bfire_0 + },{ dcr_wr_addr, dcr_wr_data, vx_pending_writes, diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index d96ef7abdc..35e1060375 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -134,8 +134,11 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_FETCH `SCOPE_IO_SWITCH (1); + wire schedule_fire = schedule_if.valid && schedule_if.ready; + wire icache_bus_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready; + wire icache_bus_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 1, 6, ( + `SCOPE_TAP_EX (0, 1, 6, 3, ( `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH ), { @@ -146,6 +149,10 @@ module VX_fetch import VX_gpu_pkg::*; #( icache_bus_if.rsp_valid, icache_bus_if.rsp_ready }, { + schedule_fire, + icache_bus_req_fire, + icache_bus_rsp_fire + },{ schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 583967cc8e..5032065d3d 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -91,15 +91,18 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_ISSUE `SCOPE_IO_SWITCH (1); + wire operands_fire = operands_if.valid && operands_if.ready; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 2, 3, ( + `SCOPE_TAP_EX (0, 2, 2, 2, ( `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 ), { operands_if.valid, - operands_if.ready, - writeback_if.valid + operands_if.ready + }, { + operands_fire, + writeback_if.valid // ack-free }, { operands_if.data.uuid, operands_if.data.tmask, diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 4ca88c7b31..0f947af78d 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,13 +536,16 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 3, 4, ( + `SCOPE_TAP_EX (0, 3, 4, 2, ( 1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH ), { mem_req_valid, mem_req_ready, mem_rsp_valid, mem_rsp_ready + }, { + mem_req_fire, + mem_rsp_fire }, { mem_req_rw, full_addr, diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 8b6eee65e1..d3c42c5b5d 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -17,9 +17,10 @@ module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width - parameter TRIGGERW = 32, // trigger signals width - parameter PROBEW = 4999, // probe signal width - parameter DEPTH = 8192, // trace buffer depth + parameter XTRIGGERW = 0, // changed trigger signals width + parameter HTRIGGERW = 0, // high trigger signals width + parameter PROBEW = 1, // probe signal width + parameter DEPTH = 256, // trace buffer depth parameter IDLE_CTRW = 32, // idle time between triggers counter width parameter TX_DATAW = 64 // transfer data width ) ( @@ -27,14 +28,15 @@ module VX_scope_tap #( input wire reset, input wire start, input wire stop, - input wire [`UP(TRIGGERW)-1:0] triggers, + input wire [`UP(XTRIGGERW)-1:0] xtriggers, + input wire [`UP(HTRIGGERW)-1:0] htriggers, input wire [PROBEW-1:0] probes, input wire bus_in, output wire bus_out ); localparam CTR_WIDTH = 64; localparam SER_CTR_WIDTH = `LOG2UP(TX_DATAW); - localparam DATAW = PROBEW + TRIGGERW; + localparam DATAW = PROBEW + XTRIGGERW + HTRIGGERW; localparam ADDRW = `CLOG2(DEPTH); localparam SIZEW = `CLOG2(DEPTH+1); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; @@ -76,7 +78,7 @@ module VX_scope_tap #( reg [CTR_WIDTH-1:0] timestamp, start_time; reg [CTR_WIDTH-1:0] start_delay, stop_delay; - reg [`UP(TRIGGERW)-1:0] prev_trig; + reg [`UP(XTRIGGERW)-1:0] prev_xtrig; reg [IDLE_CTRW-1:0] delta; reg cmd_start, cmd_stop; reg dflush; @@ -93,9 +95,16 @@ module VX_scope_tap #( // trace capture // - if (TRIGGERW != 0) begin : g_delta_store - assign data_in = {probes, triggers}; - assign write_en = (tap_state == TAP_STATE_RUN) && (dflush || (triggers != prev_trig)); + if (XTRIGGERW != 0 || HTRIGGERW != 0) begin : g_delta_store + if (XTRIGGERW != 0 && HTRIGGERW != 0) begin : g_data_in_pxh + assign data_in = {probes, xtriggers, htriggers}; + end else if (XTRIGGERW != 0) begin : g_data_in_px + assign data_in = {probes, xtriggers}; + end else begin : g_data_in_ph + assign data_in = {probes, htriggers}; + end + wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != 0); + assign write_en = (tap_state == TAP_STATE_RUN) && (has_triggered || dflush); VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), @@ -150,7 +159,7 @@ module VX_scope_tap #( tap_state <= TAP_STATE_IDLE; delta <= '0; dflush <= 0; - prev_trig <= '0; + prev_xtrig <= '0; waddr <= '0; end else begin case (tap_state) @@ -167,15 +176,15 @@ module VX_scope_tap #( TAP_STATE_RUN: begin dflush <= 0; if (!(stop || cmd_stop) && (waddr < waddr_end)) begin - if (TRIGGERW != 0) begin - if (dflush || (triggers != prev_trig)) begin + if (XTRIGGERW != 0) begin + if (dflush || (xtriggers != prev_xtrig)) begin waddr <= waddr + SIZEW'(1); delta <= '0; end else begin delta <= delta + IDLE_CTRW'(1); dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end - prev_trig <= triggers; + prev_xtrig <= xtriggers; end else begin waddr <= waddr + SIZEW'(1); end diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index db5fda1a90..9503fd757e 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -181,10 +181,11 @@ def parse_xml(filename, max_taps): xml_modules = xml_doc.findall(".//module/[@origName='VX_scope_tap']") for xml_module in xml_modules: scope_id = parse_vl_int(xml_module.find(".//var/[@name='SCOPE_ID']/const").get("name")) - triggerw = parse_vl_int(xml_module.find(".//var/[@name='TRIGGERW']/const").get("name")) + xtriggerw = parse_vl_int(xml_module.find(".//var/[@name='XTRIGGERW']/const").get("name")) + htriggerw = parse_vl_int(xml_module.find(".//var/[@name='HTRIGGERW']/const").get("name")) probew = parse_vl_int(xml_module.find(".//var/[@name='PROBEW']/const").get("name")) module_name = xml_module.get("name") - modules[module_name] = [scope_id, triggerw, probew] + modules[module_name] = [scope_id, xtriggerw, htriggerw, probew] taps = [] xml_instances = xml_doc.iter("instance") @@ -195,22 +196,36 @@ def parse_xml(filename, max_taps): module = modules.get(defName) if module is None: continue - triggers = [] + + xtriggers = [] + htriggers = [] probes = [] - w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='triggers']/*"), triggers) - if w != module[1]: - raise ET.ParseError("invalid triggers width: actual=" + str(w) + ", expected=" + str(module[1])) + + if module[1] > 0: + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='xtriggers']/*"), xtriggers) + if w != module[1]: + raise ET.ParseError("invalid xtriggers width: actual=" + str(w) + ", expected=" + str(module[1])) + + if module[2] > 0: + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='htriggers']/*"), htriggers) + if w != module[2]: + raise ET.ParseError("invalid htriggers width: actual=" + str(w) + ", expected=" + str(module[2])) + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='probes']/*"), probes) - if w != module[2]: - raise ET.ParseError("invalid probes width: actual=" + str(w) + ", expected=" + str(module[2])) + if w != module[3]: + raise ET.ParseError("invalid probes width: actual=" + str(w) + ", expected=" + str(module[3])) + signals = probes - for trigger in triggers: - signals.append(trigger) + for xtrigger in xtriggers: + signals.append(xtrigger) + for htrigger in htriggers: + signals.append(htrigger) + loc = xml_instance.get("loc") hier = xml_doc.find(".//cell/[@loc='" + loc + "']").get("hier") path = hier.rsplit(".", 1)[0] taps.append({"id":module[0], - "width":module[1] + module[2], + "width":module[1] + module[2] + module[3], "signals":signals, "path":path}) From ec8cc4c84ddf12f96375c8f4d5299b82f6c24a9a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 14:21:09 -0700 Subject: [PATCH 234/407] minor update --- hw/rtl/libs/VX_scope_tap.sv | 61 ++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index d3c42c5b5d..c4bf918ff0 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -34,6 +34,7 @@ module VX_scope_tap #( input wire bus_in, output wire bus_out ); + localparam HAS_TRIGGERS = XTRIGGERW != 0 || HTRIGGERW != 0; localparam CTR_WIDTH = 64; localparam SER_CTR_WIDTH = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + XTRIGGERW + HTRIGGERW; @@ -95,7 +96,9 @@ module VX_scope_tap #( // trace capture // - if (XTRIGGERW != 0 || HTRIGGERW != 0) begin : g_delta_store + wire do_capture; + + if (HAS_TRIGGERS) begin : g_delta_store if (XTRIGGERW != 0 && HTRIGGERW != 0) begin : g_data_in_pxh assign data_in = {probes, xtriggers, htriggers}; end else if (XTRIGGERW != 0) begin : g_data_in_px @@ -103,8 +106,9 @@ module VX_scope_tap #( end else begin : g_data_in_ph assign data_in = {probes, htriggers}; end - wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != 0); - assign write_en = (tap_state == TAP_STATE_RUN) && (has_triggered || dflush); + wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != '0); + assign do_capture = dflush || has_triggered; + assign write_en = (tap_state == TAP_STATE_RUN) && do_capture; VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), @@ -112,20 +116,21 @@ module VX_scope_tap #( .READ_ENABLE (0), .NO_RWCHECK (1) ) delta_store ( - .clk (clk), - .reset (reset), - .read (1'b1), - .wren (1'b1), - .write (write_en), - .waddr (waddr[ADDRW-1:0]), - .wdata (delta), - .raddr (raddr), - .rdata (delta_value) + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr[ADDRW-1:0]), + .wdata (delta), + .raddr (raddr), + .rdata (delta_value) ); end else begin : g_no_delta_store - assign data_in = probes; + assign data_in = probes; assign write_en = (tap_state == TAP_STATE_RUN); assign delta_value = '0; + assign do_capture = 1; end VX_dp_ram #( @@ -135,15 +140,15 @@ module VX_scope_tap #( .READ_ENABLE (0), .NO_RWCHECK (1) ) data_store ( - .clk (clk), - .reset (reset), - .read (1'b1), - .wren (1'b1), - .write (write_en), - .waddr (waddr[ADDRW-1:0]), - .wdata (data_in), - .raddr (raddr), - .rdata (data_value) + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr[ADDRW-1:0]), + .wdata (data_in), + .raddr (raddr), + .rdata (data_value) ); always @(posedge clk) begin @@ -159,7 +164,7 @@ module VX_scope_tap #( tap_state <= TAP_STATE_IDLE; delta <= '0; dflush <= 0; - prev_xtrig <= '0; + prev_xtrig <= '0; waddr <= '0; end else begin case (tap_state) @@ -176,17 +181,17 @@ module VX_scope_tap #( TAP_STATE_RUN: begin dflush <= 0; if (!(stop || cmd_stop) && (waddr < waddr_end)) begin - if (XTRIGGERW != 0) begin - if (dflush || (xtriggers != prev_xtrig)) begin - waddr <= waddr + SIZEW'(1); + if (do_capture) begin + waddr <= waddr + SIZEW'(1); + end + if (HAS_TRIGGERS) begin + if (do_capture) begin delta <= '0; end else begin delta <= delta + IDLE_CTRW'(1); dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end prev_xtrig <= xtriggers; - end else begin - waddr <= waddr + SIZEW'(1); end end else begin tap_state <= TAP_STATE_DONE; From 989341a77dc3012b3c1113f9e3d2ffd7843665a0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 15:13:42 -0700 Subject: [PATCH 235/407] minor udpate --- hw/rtl/libs/VX_scope_tap.sv | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index c4bf918ff0..6a9b70ff1f 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -80,13 +80,13 @@ module VX_scope_tap #( reg [CTR_WIDTH-1:0] timestamp, start_time; reg [CTR_WIDTH-1:0] start_delay, stop_delay; reg [`UP(XTRIGGERW)-1:0] prev_xtrig; + reg [`UP(HTRIGGERW)-1:0] prev_htrig; reg [IDLE_CTRW-1:0] delta; reg cmd_start, cmd_stop; reg dflush; reg [SIZEW-1:0] waddr, waddr_end; wire [DATAW-1:0] data_in; - wire write_en; wire [DATAW-1:0] data_value; wire [IDLE_CTRW-1:0] delta_value; @@ -98,6 +98,8 @@ module VX_scope_tap #( wire do_capture; + wire write_en = (tap_state == TAP_STATE_RUN) && do_capture; + if (HAS_TRIGGERS) begin : g_delta_store if (XTRIGGERW != 0 && HTRIGGERW != 0) begin : g_data_in_pxh assign data_in = {probes, xtriggers, htriggers}; @@ -106,9 +108,7 @@ module VX_scope_tap #( end else begin : g_data_in_ph assign data_in = {probes, htriggers}; end - wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != '0); - assign do_capture = dflush || has_triggered; - assign write_en = (tap_state == TAP_STATE_RUN) && do_capture; + assign do_capture = dflush || (xtriggers != prev_xtrig) || (htriggers != prev_htrig) || (htriggers != '0); VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), @@ -128,7 +128,6 @@ module VX_scope_tap #( ); end else begin : g_no_delta_store assign data_in = probes; - assign write_en = (tap_state == TAP_STATE_RUN); assign delta_value = '0; assign do_capture = 1; end @@ -165,6 +164,7 @@ module VX_scope_tap #( delta <= '0; dflush <= 0; prev_xtrig <= '0; + prev_htrig <= '0; waddr <= '0; end else begin case (tap_state) @@ -192,6 +192,7 @@ module VX_scope_tap #( dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end prev_xtrig <= xtriggers; + prev_htrig <= htriggers; end end else begin tap_state <= TAP_STATE_DONE; From 9027555e6a446295905c4a4af55c5d5916deb7cb Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 20:30:57 -0700 Subject: [PATCH 236/407] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index b872efa977..4737eb43c8 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1081,7 +1081,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) `endif /////////////////////////////////////////////////////////////////////////////// From eee037ffcd65b3f9b535b0715fc56e7585acc762 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 20:59:29 -0700 Subject: [PATCH 237/407] minor update --- hw/rtl/VX_config.vh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a93b73b305..1e10aca8ea 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -221,8 +221,10 @@ `endif `ifndef SV_DPI +`ifndef DPI_DISABLE `define DPI_DISABLE `endif +`endif `ifndef FPU_FPNEW `ifndef FPU_DSP From 87e613d29dea5a08db59509e38dcd26489745413 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 05:20:37 -0700 Subject: [PATCH 238/407] fixed XRT AFU deadlock on exit --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 67 +++++++++++++++++++++------------ hw/rtl/libs/VX_axi_adapter.sv | 37 +++++++----------- hw/rtl/libs/VX_axi_write_ack.sv | 60 +++++++++++++++++++++++++++++ sim/opaesim/opae_sim.cpp | 7 ---- sim/rtlsim/processor.cpp | 28 ++------------ sim/xrtsim/xrt_sim.cpp | 56 ++++++++++++--------------- 6 files changed, 143 insertions(+), 112 deletions(-) create mode 100644 hw/rtl/libs/VX_axi_write_ack.sv diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index c4ff50d45a..2b1bfb7c25 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -61,6 +61,9 @@ module VX_afu_wrap #( localparam STATE_IDLE = 0; localparam STATE_RUN = 1; + localparam PENDING_SIZEW = 12; // max outstanding requests size + localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1); + wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS]; wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS]; wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS]; @@ -95,7 +98,7 @@ module VX_afu_wrap #( `endif reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; - reg [15:0] vx_pending_writes; + reg [PENDING_SIZEW-1:0] vx_pending_writes; reg vx_busy_wait; reg vx_reset = 1; // asserted at initialization wire vx_busy; @@ -118,23 +121,10 @@ module VX_afu_wrap #( wire scope_reset = reset; `endif - reg m_axi_mem_wfire; - reg m_axi_mem_bfire; - - always @(*) begin - m_axi_mem_wfire = 0; - m_axi_mem_bfire = 0; - for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin - m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]; - m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i]; - end - end - always @(posedge clk) begin if (reset || ap_reset) begin - state <= STATE_IDLE; - vx_pending_writes <= '0; - vx_reset <= 1; + state <= STATE_IDLE; + vx_reset <= 1; end else begin case (state) STATE_IDLE: begin @@ -181,12 +171,39 @@ module VX_afu_wrap #( if (vx_reset_ctr != '0) begin vx_reset_ctr <= vx_reset_ctr - 1; end + end + end - // track pending writes - if (m_axi_mem_wfire && ~m_axi_mem_bfire) - vx_pending_writes <= vx_pending_writes + 1; - if (~m_axi_mem_wfire && m_axi_mem_bfire) - vx_pending_writes <= vx_pending_writes - 1; + wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire; + wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps; + + for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_awfire + VX_axi_write_ack axi_write_ack ( + .clk (clk), + .reset (reset), + .awvalid(m_axi_mem_awvalid_a[i]), + .awready(m_axi_mem_awready_a[i]), + .wvalid (m_axi_mem_wvalid_a[i]), + .wready (m_axi_mem_wready_a[i]), + .tx_ack (m_axi_wr_req_fire[i]), + `UNUSED_PIN (aw_ack), + `UNUSED_PIN (w_ack), + `UNUSED_PIN (tx_rdy) + ); + assign m_axi_wr_rsp_fire[i] = m_axi_mem_bvalid_a[i] & m_axi_mem_bready_a[i]; + end + + `POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire); + `POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire); + + wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) - + (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps); + + always @(posedge clk) begin + if (reset) begin + vx_pending_writes <= '0; + end else begin + vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub); end end @@ -408,16 +425,16 @@ module VX_afu_wrap #( always @(posedge clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin - `TRACE(2, ("%t: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) + `TRACE(2, ("%t: AXI Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) end if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin - `TRACE(2, ("%t: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) + `TRACE(2, ("%t: AXI Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) end if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin - `TRACE(2, ("%t: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) + `TRACE(2, ("%t: AXI Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) end if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin - `TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) + `TRACE(2, ("%t: AXI Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) end end end diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index a21b8554fb..f0144ff91f 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -116,32 +116,21 @@ module VX_axi_adapter #( assign req_bank_off = mem_req_addr_out; end - wire mem_req_fire = mem_req_valid && mem_req_ready; - // AXi write request synchronization - reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w - wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; - wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; - always @(posedge clk) begin - if (reset || (mem_req_fire && (req_bank_sel == i))) begin - m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; - end else begin - if (m_axi_aw_fire) begin - m_axi_aw_ack[i] <= 1; - end - if (m_axi_w_fire) begin - m_axi_w_ack[i] <= 1; - end - end - end - end - - wire [NUM_BANKS-1:0] axi_write_ready; + reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) - && (m_axi_wready[i] || m_axi_w_ack[i]); + VX_axi_write_ack axi_write_ack ( + .clk (clk), + .reset (reset), + .awvalid(m_axi_awvalid[i]), + .awready(m_axi_awready[i]), + .wvalid (m_axi_wvalid[i]), + .wready (m_axi_wready[i]), + .aw_ack (m_axi_aw_ack[i]), + .w_ack (m_axi_w_ack[i]), + .tx_rdy (axi_write_ready[i]), + `UNUSED_PIN (tx_ack) + ); end // request ack diff --git a/hw/rtl/libs/VX_axi_write_ack.sv b/hw/rtl/libs/VX_axi_write_ack.sv new file mode 100644 index 0000000000..257ef18e5f --- /dev/null +++ b/hw/rtl/libs/VX_axi_write_ack.sv @@ -0,0 +1,60 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_axi_write_ack ( + input wire clk, + input wire reset, + input wire awvalid, + input wire awready, + input wire wvalid, + input wire wready, + output wire aw_ack, + output wire w_ack, + output wire tx_ack, + output wire tx_rdy +); + reg awfired; + reg wfired; + + wire awfire = awvalid && awready; + wire wfire = wvalid && wready; + + always @(posedge clk) begin + if (reset) begin + awfired <= 0; + wfired <= 0; + end else begin + if (awfire) begin + awfired <= 1; + end + if (wfire) begin + wfired <= 1; + end + if (tx_ack) begin + awfired <= 0; + wfired <= 0; + end + end + end + + assign aw_ack = awfired; + assign w_ack = wfired; + + assign tx_ack = (awfire || awfired) && (wfire || wfired); + assign tx_rdy = (awready || awfired) && (wready || wfired); + +endmodule +`TRACING_ON diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 0f0d67d9cc..5f619a743c 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -263,13 +263,6 @@ class opae_sim::Impl { } device_->reset = 0; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - } } void tick() { diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 1f6af60dd1..f651ad9d83 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -151,9 +151,6 @@ class Processor::Impl { // reset device this->reset(); - // start execution - running_ = true; - // wait on device to go busy while (!device_->busy) { this->tick(); @@ -181,8 +178,6 @@ class Processor::Impl { this->mem_bus_reset(); this->dcr_bus_reset(); - running_ = false; - print_bufs_.clear(); pending_mem_reqs_.clear(); @@ -192,8 +187,6 @@ class Processor::Impl { std::swap(dram_queue_, empty); } - mem_rd_rsp_active_ = false; - device_->reset = 1; for (int i = 0; i < RESET_DELAY; ++i) { @@ -204,13 +197,7 @@ class Processor::Impl { } device_->reset = 0; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - } + device_->mem_req_ready = 1; } void tick() { @@ -261,11 +248,10 @@ class Processor::Impl { void mem_bus_eval() { // process memory read responses - if (mem_rd_rsp_active_ && device_->mem_rsp_ready) { + if (device_->mem_rsp_valid && device_->mem_rsp_ready) { device_->mem_rsp_valid = 0; - mem_rd_rsp_active_ = false; } - if (!mem_rd_rsp_active_) { + if (!device_->mem_rsp_valid) { if (!pending_mem_reqs_.empty() && (*pending_mem_reqs_.begin())->ready) { auto mem_rsp_it = pending_mem_reqs_.begin(); @@ -280,7 +266,6 @@ class Processor::Impl { memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE); device_->mem_rsp_tag = mem_rsp->tag; pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; delete mem_rsp; } } @@ -291,7 +276,6 @@ class Processor::Impl { if (device_->mem_req_rw) { auto byteen = device_->mem_req_byteen; auto data = VDataCast::get(device_->mem_req_data); - if (byte_addr >= uint64_t(IO_COUT_ADDR) && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { // process console output @@ -350,8 +334,6 @@ class Processor::Impl { dram_queue_.push(mem_req); } } - - device_->mem_req_ready = running_; } void dcr_bus_reset() { @@ -390,10 +372,6 @@ class Processor::Impl { #endif RAM* ram_; - - bool mem_rd_rsp_active_; - - bool running_; }; /////////////////////////////////////////////////////////////////////////////// diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 96adf08583..d572b9479f 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -333,12 +333,9 @@ class xrt_sim::Impl { } device_->ap_rst_n = 1; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->ap_clk = 0; - this->eval(); - device_->ap_clk = 1; - this->eval(); + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + *m_axi_mem_[i].arready = 1; + *m_axi_mem_[i].awready = 1; } } @@ -407,10 +404,10 @@ class xrt_sim::Impl { void axi_mem_bus_reset() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // address read request - *m_axi_mem_[i].arready = 1; + *m_axi_mem_[i].arready = 0; // address write request - *m_axi_mem_[i].awready = 1; + *m_axi_mem_[i].awready = 0; // data write request *m_axi_mem_[i].wready = 0; @@ -423,19 +420,16 @@ class xrt_sim::Impl { // states m_axi_states_[i].write_req_pending = false; - m_axi_states_[i].write_rsp_pending = false; - m_axi_states_[i].read_rsp_pending = false; } } void axi_mem_bus_eval() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // handle read responses - if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) { - *m_axi_mem_[i].rvalid = 0; - m_axi_states_[i].read_rsp_pending = false; + if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) { + *m_axi_mem_[i].rvalid = 0; } - if (!m_axi_states_[i].read_rsp_pending) { + if (!*m_axi_mem_[i].rvalid) { if (!pending_mem_reqs_[i].empty() && (*pending_mem_reqs_[i].begin())->ready && !(*pending_mem_reqs_[i].begin())->write) { @@ -447,17 +441,15 @@ class xrt_sim::Impl { *m_axi_mem_[i].rlast = 1; memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].read_rsp_pending = true; delete mem_rsp; } } // handle write responses - if (m_axi_states_[i].write_rsp_pending && *m_axi_mem_[i].bready) { + if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) { *m_axi_mem_[i].bvalid = 0; - m_axi_states_[i].write_rsp_pending = false; } - if (!m_axi_states_[i].write_rsp_pending) { + if (!*m_axi_mem_[i].bvalid) { if (!pending_mem_reqs_[i].empty() && (*pending_mem_reqs_[i].begin())->ready && (*pending_mem_reqs_[i].begin())->write) { @@ -467,7 +459,6 @@ class xrt_sim::Impl { *m_axi_mem_[i].bid = mem_rsp->tag; *m_axi_mem_[i].bresp = 0; pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].write_rsp_pending = true; delete mem_rsp; } } @@ -492,17 +483,21 @@ class xrt_sim::Impl { dram_queues_[i].push(mem_req); } - // handle address write requests - if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !m_axi_states_[i].write_req_pending) { + if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) { + *m_axi_mem_[i].wready = 0; + } + + // handle address write requestsls + if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) { m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; - m_axi_states_[i].write_req_pending = true; + // activate data channel + *m_axi_mem_[i].wready = 1; + m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid; } // handle data write requests - *m_axi_mem_[i].wready = false; - if (*m_axi_mem_[i].wvalid && m_axi_states_[i].write_req_pending) { - + if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) { auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); auto byte_addr = m_axi_states_[i].write_req_addr; @@ -529,10 +524,11 @@ class xrt_sim::Impl { // send dram request dram_queues_[i].push(mem_req); - m_axi_states_[i].write_req_pending = false; - - // acquire write data - *m_axi_mem_[i].wready = true; + // deactivate data channel + if (m_axi_states_[i].write_req_pending) { + *m_axi_mem_[i].wready = 0; + m_axi_states_[i].write_req_pending = false; + } } } } @@ -541,8 +537,6 @@ class xrt_sim::Impl { uint64_t write_req_addr; uint32_t write_req_tag; bool write_req_pending; - bool read_rsp_pending; - bool write_rsp_pending; } m_axi_state_t; typedef struct { From b634f9f47d88baff1ab96196e147abf7b5302770 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 20:15:03 -0700 Subject: [PATCH 239/407] count_leading_zeros fix --- sim/common/bitmanip.h | 4 ++-- sim/simx/constants.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index 053f254c84..4bfe569610 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -20,9 +20,9 @@ template constexpr uint32_t count_leading_zeros(T value) { static_assert(std::is_integral::value, "invalid data type"); if constexpr (sizeof(T) > 4) { - return value ? __builtin_clzll(value) : (sizeof(T) * 8); + return value ? __builtin_clzll(value) - (64 - sizeof(T) * 8) : sizeof(T) * 8; } else { - return value ? __builtin_clz(value) : (sizeof(T) * 8); + return value ? __builtin_clz(value) - (32 - sizeof(T) * 8) : sizeof(T) * 8; } } diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 0c707b55ca..33fa9979c8 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -21,14 +21,14 @@ #define MEM_CLOCK_RATIO 1 #endif -#define LSU_WORD_SIZE (XLEN / 8) -#define LSU_CHANNELS NUM_LSU_LANES -#define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS) +inline constexpr int LSU_WORD_SIZE = (XLEN / 8); +inline constexpr int LSU_CHANNELS = NUM_LSU_LANES; +inline constexpr int LSU_NUM_REQS = (NUM_LSU_BLOCKS * LSU_CHANNELS); -#define DCACHE_WORD_SIZE LSU_LINE_SIZE -#define DCACHE_CHANNELS UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE) -#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS) +inline constexpr int DCACHE_WORD_SIZE = LSU_LINE_SIZE; +inline constexpr int DCACHE_CHANNELS = UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE); +inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS); -#define NUM_SOCKETS UP(NUM_CORES / SOCKET_SIZE) +inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE); -#define PER_ISSUE_WARPS NUM_WARPS / ISSUE_WIDTH \ No newline at end of file +inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH; \ No newline at end of file From 4329e3f968c99f2cd5c1ba0c87cd12fb0a0903da Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 20:28:57 -0700 Subject: [PATCH 240/407] minor update --- sim/simx/constants.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 33fa9979c8..c651bbfc44 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -13,6 +13,8 @@ #pragma once +#include + #ifndef RAM_PAGE_SIZE #define RAM_PAGE_SIZE 4096 #endif From b8475c65dc9864d87920bd71fa6b805b9a59426e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 21:25:55 -0700 Subject: [PATCH 241/407] adjusting platform caps --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 2 +- runtime/opae/vortex.cpp | 2 +- runtime/xrt/vortex.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 4737eb43c8..7e0bcfaeda 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -103,7 +103,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [127:0] afu_id = `AFU_ACCEL_UUID; wire [63:0] dev_caps = {8'b0, - 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 382b31f8aa..d14328c7d5 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -134,7 +134,7 @@ module VX_afu_ctrl #( // device caps wire [63:0] dev_caps = {8'b0, - 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index a7f77ee970..38ee514abf 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -235,7 +235,7 @@ class vx_device { _value = 1 << ((dev_caps_ >> 48) & 0x7); break; case VX_CAPS_MEM_BANK_SIZE: - _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); + _value = 1ull << (20 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index ffc7870d48..d71f2e1429 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -311,7 +311,7 @@ class vx_device { _value = 1 << ((dev_caps_ >> 48) & 0x7); break; case VX_CAPS_MEM_BANK_SIZE: - _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); + _value = 1ull << (20 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); From 30571d716cf6d44e2e80d30025b2a4b86bd47af8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 21:37:48 -0700 Subject: [PATCH 242/407] updated scope CI test --- .github/workflows/ci.yml | 2 +- ci/regression.sh.in | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1676aea4cb..d2bbd9a9f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,7 +117,7 @@ jobs: strategy: fail-fast: false matrix: - name: [regression, opencl, cache, config1, config2, debug, stress, synthesis] + name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis] xlen: [32, 64] steps: diff --git a/ci/regression.sh.in b/ci/regression.sh.in index ea9aa25609..71172599ea 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -318,10 +318,18 @@ debug() CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + + echo "debugging tests done!" +} + +scope() +{ + echo "begin scope tests..." + ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" - echo "debugging tests done!" + echo "debugging scope done!" } stress() @@ -348,7 +356,7 @@ synthesis() show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]" } declare -a tests=() @@ -386,6 +394,9 @@ while [ "$1" != "" ]; do --debug ) tests+=("debug") ;; + --scope ) + tests+=("scope") + ;; --stress ) tests+=("stress") ;; @@ -403,6 +414,7 @@ while [ "$1" != "" ]; do tests+=("config1") tests+=("config2") tests+=("debug") + tests+=("scope") tests+=("stress") tests+=("synthesis") ;; From 5c694a997c0ca6b97d8f0d68993a2b4cd64978c5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 00:09:25 -0700 Subject: [PATCH 243/407] update scope tap testing --- ci/regression.sh.in | 23 ++++++++++++++--------- runtime/common/scope.cpp | 14 ++++++++++++++ sim/opaesim/opae_sim.cpp | 29 +++++++++++++++++------------ 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 71172599ea..f2ce1b17d3 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -260,11 +260,11 @@ config2() # disabling ZICOND extension CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo - # test 128-bit MEM block + # test 128-bit memory block CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress - # test XLEN-bit MEM block + # test XLEN-bit memory block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress @@ -272,7 +272,7 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8 CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 - # test single-bank DRAM + # test single-bank memory if [ "$XLEN" == "64" ]; then CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress @@ -281,11 +281,16 @@ config2() CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress fi - # test 33-bit DRAM address - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress + # test larger memory address + if [ "$XLEN" == "64" ]; then + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress + else + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress + fi - # test DRAM banks interleaving + # test memory banks interleaving CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress @@ -326,8 +331,8 @@ scope() { echo "begin scope tests..." - ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" - ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" + SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope + SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope echo "debugging scope done!" } diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 361a327caf..8f86709442 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -261,6 +261,20 @@ int vx_scope_start(scope_callback_t* callback, vx_device_h hdevice, uint64_t sta } } + // setup capture size + const char* capture_size_env = std::getenv("SCOPE_DEPTH"); + if (capture_size_env != nullptr) { + std::stringstream ss(capture_size_env); + uint32_t capture_size; + if (ss >> capture_size) { + for (auto& tap : json_obj["taps"]) { + auto id = tap["id"].get(); + uint64_t cmd_depth = (capture_size << 11) | (id << 3) | CMD_SET_DEPTH; + CHECK_ERR(g_callback.registerWrite(hdevice, cmd_depth)); + } + } + } + // set stop time if (stop_time != uint64_t(-1)) { std::cout << "[SCOPE] stop time: " << std::dec << stop_time << "s" << std::endl; diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 5f619a743c..fe1832c1b7 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -78,8 +78,9 @@ static uint64_t trace_stop_time = TRACE_STOP_TIME; bool sim_trace_enabled() { if (timestamp >= trace_start_time - && timestamp < trace_stop_time) + && timestamp < trace_stop_time) { return true; + } return trace_enabled; } @@ -156,10 +157,10 @@ class opae_sim::Impl { // launch execution thread future_ = std::async(std::launch::async, [&]{ - while (!stop_) { - std::lock_guard guard(mutex_); - this->tick(); - } + while (!stop_) { + std::lock_guard guard(mutex_); + this->tick(); + } }); return 0; @@ -178,7 +179,7 @@ class opae_sim::Impl { return -1; // set uninitialized data to "baadf00d" for (uint32_t i = 0; i < len; ++i) { - ((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff; + ((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff; } host_buffer_t buffer; buffer.data = (uint64_t*)alloc; @@ -207,8 +208,9 @@ class opae_sim::Impl { std::lock_guard guard(mutex_); // simulate CPU-GPU latency - for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) + for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) { this->tick(); + } // simulate mmio request device_->vcp2af_sRxPort_c0_mmioRdValid = 1; @@ -225,8 +227,9 @@ class opae_sim::Impl { std::lock_guard guard(mutex_); // simulate CPU-GPU latency - for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) + for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) { this->tick(); + } // simulate mmio request device_->vcp2af_sRxPort_c0_mmioWrValid = 1; @@ -324,13 +327,14 @@ class opae_sim::Impl { void sRxPort_bus_eval() { // check mmio request bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid - || device_->vcp2af_sRxPort_c0_mmioWrValid; + || device_->vcp2af_sRxPort_c0_mmioWrValid; // schedule CCI read responses std::list::iterator cci_rd_it(cci_reads_.end()); for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { - if (it->cycles_left > 0) + if (it->cycles_left > 0) { it->cycles_left -= 1; + } if ((cci_rd_it == ie) && (it->cycles_left == 0)) { cci_rd_it = it; } @@ -339,8 +343,9 @@ class opae_sim::Impl { // schedule CCI write responses std::list::iterator cci_wr_it(cci_writes_.end()); for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { - if (it->cycles_left > 0) + if (it->cycles_left > 0) { it->cycles_left -= 1; + } if ((cci_wr_it == ie) && (it->cycles_left == 0)) { cci_wr_it = it; } @@ -358,7 +363,7 @@ class opae_sim::Impl { // send CCI read response (ensure mmio disabled) device_->vcp2af_sRxPort_c0_rspValid = 0; if (!mmio_req_enabled - && (cci_rd_it != cci_reads_.end())) { + && (cci_rd_it != cci_reads_.end())) { device_->vcp2af_sRxPort_c0_rspValid = 1; device_->vcp2af_sRxPort_c0_hdr_resp_type = 0; memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE); From cf3909a9101026350d68f4acabd187b55f55ebf9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 07:52:53 -0700 Subject: [PATCH 244/407] minor update --- hw/rtl/libs/VX_encoder.sv | 12 +++++------- hw/rtl/libs/VX_find_first.sv | 16 +++++++++------- hw/rtl/libs/VX_pipe_buffer.sv | 3 +-- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/hw/rtl/libs/VX_encoder.sv b/hw/rtl/libs/VX_encoder.sv index ed65ed4f6e..86ccad7925 100644 --- a/hw/rtl/libs/VX_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -40,8 +40,8 @@ module VX_encoder #( end else if (MODEL == 1) begin : g_model1 localparam M = 1 << LN; `IGNORE_UNOPTFLAT_BEGIN - wire [LN-1:0][M-1:0] addr; - wire [LN:0][M-1:0] v; + wire [M-1:0] addr [LN]; + wire [M-1:0] v [LN+1]; `IGNORE_UNOPTFLAT_END // base case, also handle padding for non-power of two inputs @@ -50,19 +50,17 @@ module VX_encoder #( for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin : g_scan_l localparam SN = 1 << (LN - lvl); localparam SI = M / SN; - localparam SW = lvl; - for (genvar s = 0; s < SN; ++s) begin : g_scan_s `IGNORE_UNOPTFLAT_BEGIN wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]}; `IGNORE_UNOPTFLAT_END assign v[lvl][s*SI] = (| vs); if (lvl == 1) begin : g_lvl_1 - assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE]; + assign addr[lvl-1][s*SI +: lvl] = vs[!REVERSE]; end else begin : g_lvl_n - assign addr[lvl-1][s*SI +: SW] = { + assign addr[lvl-1][s*SI +: lvl] = { vs[!REVERSE], - addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1] + addr[lvl-2][s*SI +: lvl-1] | addr[lvl-2][s*SI+(SI>>1) +: lvl-1] }; end end diff --git a/hw/rtl/libs/VX_find_first.sv b/hw/rtl/libs/VX_find_first.sv index 43666737ce..2a1714e18e 100644 --- a/hw/rtl/libs/VX_find_first.sv +++ b/hw/rtl/libs/VX_find_first.sv @@ -28,10 +28,10 @@ module VX_find_first #( localparam TL = (1 << LOGN) - 1; localparam TN = (1 << (LOGN+1)) - 1; -`IGNORE_WARNINGS_BEGIN - wire [TN-1:0] s_n; - wire [TN-1:0][DATAW-1:0] d_n; -`IGNORE_WARNINGS_END +`IGNORE_UNOPTFLAT_BEGIN + wire s_n [TN]; + wire [DATAW-1:0] d_n [TN]; +`IGNORE_UNOPTFLAT_END for (genvar i = 0; i < N; ++i) begin : g_reverse assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i]; @@ -46,9 +46,11 @@ module VX_find_first #( end for (genvar j = 0; j < LOGN; ++j) begin : g_scan - for (genvar i = 0; i < (2**j); ++i) begin : g_i - assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1]; - assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1]; + localparam I = 1 << j; + for (genvar i = 0; i < I; ++i) begin : g_i + localparam K = I+i-1; + assign s_n[K] = s_n[2*K+1] | s_n[2*K+2]; + assign d_n[K] = s_n[2*K+1] ? d_n[2*K+1] : d_n[2*K+2]; end end diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv index d71a78dacb..5ba23bc08f 100644 --- a/hw/rtl/libs/VX_pipe_buffer.sv +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -46,7 +46,7 @@ module VX_pipe_buffer #( end else begin : g_register wire [DEPTH:0] valid; `IGNORE_UNOPTFLAT_BEGIN - wire [DEPTH:0] ready; + wire ready [DEPTH+1]; `IGNORE_UNOPTFLAT_END wire [DEPTH:0][DATAW-1:0] data; @@ -71,7 +71,6 @@ module VX_pipe_buffer #( assign valid_out = valid[DEPTH]; assign data_out = data[DEPTH]; assign ready[DEPTH] = ready_out; - end endmodule From 60860ec684d20d7592dd2f978fb5da48dc799413 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 09:03:24 -0700 Subject: [PATCH 245/407] minor update --- sim/rtlsim/processor.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index f651ad9d83..2e0189a713 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -143,7 +143,6 @@ class Processor::Impl { } void run() { - #ifndef NDEBUG std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; #endif @@ -151,6 +150,9 @@ class Processor::Impl { // reset device this->reset(); + // start + device_->reset = 0; + // wait on device to go busy while (!device_->busy) { this->tick(); @@ -161,6 +163,9 @@ class Processor::Impl { this->tick(); } + // stop + device_->reset = 1; + this->cout_flush(); } @@ -196,7 +201,6 @@ class Processor::Impl { this->eval(); } - device_->reset = 0; device_->mem_req_ready = 1; } From a3031922ce88d704eb6dbcc898c566f22f3e8829 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 09:07:45 -0700 Subject: [PATCH 246/407] minor update --- ci/regression.sh.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index f2ce1b17d3..9827199bb0 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -331,8 +331,8 @@ scope() { echo "begin scope tests..." - SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope - SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope + SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope + SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope echo "debugging scope done!" } From 2d00cec9d3f31e5bf3487bae8a83bc8c9ca44438 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 02:12:30 -0700 Subject: [PATCH 247/407] minor update --- hw/rtl/libs/VX_mem_scheduler.sv | 9 ++++----- hw/syn/xilinx/xrt/Makefile | 6 +++--- sim/rtlsim/processor.cpp | 36 +++++++++++++++++++++------------ 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 73647911a9..1a0b2c597e 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -433,7 +433,7 @@ module VX_mem_scheduler #( end end - if (RSP_PARTIAL != 0) begin : g_rsp_partial + if (RSP_PARTIAL != 0 || CORE_REQS == 1) begin : g_rsp_partial reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; @@ -459,16 +459,15 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - // use flattened arrays for BRAM synthesis compatibility reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [(CORE_BATCHES * CORE_CHANNELS)-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_BATCHES-1:0][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[rsp_batch_idx * CORE_CHANNELS + i] = mem_rsp_data_s[i]; + rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; end end end @@ -489,7 +488,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[i * CORE_CHANNELS + j]; + assign crsp_data[r] = rsp_store_n[i][j]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 67eccf8410..f5997352c1 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -178,9 +178,9 @@ $(BIN_DIR)/emconfig.json: report: $(XCLBIN_CONTAINER) ifeq ($(TARGET), hw) - cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin/vivado.log - cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log - cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log + cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin endif chipscope: diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 2e0189a713..32f4b4e1ea 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -152,6 +152,7 @@ class Processor::Impl { // start device_->reset = 0; + device_->mem_req_ready = 1; // wait on device to go busy while (!device_->busy) { @@ -175,6 +176,7 @@ class Processor::Impl { device_->dcr_wr_data = value; this->tick(); device_->dcr_wr_valid = 0; + this->tick(); } private: @@ -184,7 +186,6 @@ class Processor::Impl { this->dcr_bus_reset(); print_bufs_.clear(); - pending_mem_reqs_.clear(); { @@ -200,12 +201,21 @@ class Processor::Impl { device_->clk = 1; this->eval(); } - - device_->mem_req_ready = 1; } void tick() { - this->mem_bus_eval(); + + device_->clk = 0; + this->eval(); + + this->mem_bus_eval(0); + + device_->clk = 1; + this->eval(); + + this->mem_bus_eval(1); + + dram_sim_.tick(); if (!dram_queue_.empty()) { auto mem_req = dram_queue_.front(); @@ -221,13 +231,6 @@ class Processor::Impl { } } - dram_sim_.tick(); - - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - #ifndef NDEBUG fflush(stdout); #endif @@ -250,9 +253,14 @@ class Processor::Impl { device_->mem_rsp_valid = 0; } - void mem_bus_eval() { + void mem_bus_eval(bool clk) { + if (!clk) { + mem_rd_rsp_ready_ = device_->mem_rsp_ready; + return; + } + // process memory read responses - if (device_->mem_rsp_valid && device_->mem_rsp_ready) { + if (device_->mem_rsp_valid && mem_rd_rsp_ready_) { device_->mem_rsp_valid = 0; } if (!device_->mem_rsp_valid) { @@ -375,6 +383,8 @@ class Processor::Impl { VerilatedVcdC *tfp_; #endif + bool mem_rd_rsp_ready_; + RAM* ram_; }; From 1deb13c469f87003d3d99498df3cc0ba84284f6f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 03:36:00 -0700 Subject: [PATCH 248/407] minor update --- hw/rtl/cache/VX_bank_flush.sv | 3 ++- hw/rtl/libs/VX_cyclic_arbiter.sv | 3 ++- hw/rtl/libs/VX_rr_arbiter.sv | 3 ++- hw/rtl/libs/VX_stream_xbar.sv | 6 ++++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index e90c93cf6a..a01ae0e0b1 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -115,7 +115,8 @@ module VX_bank_flush #( if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way VX_decoder #( - .N (`CS_WAY_SEL_BITS) + .N (`CS_WAY_SEL_BITS), + .D (NUM_WAYS) ) ctr_decoder ( .data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), .valid_in (1'b1), diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index ff803b9108..a4dead008f 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -66,7 +66,8 @@ module VX_cyclic_arbiter #( ); VX_decoder #( - .N (LOG_NUM_REQS) + .N (LOG_NUM_REQS), + .D (NUM_REQS) ) grant_decoder ( .data_in (grant_index), .valid_in (1'b1), diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 3831238dc7..efe9838d66 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -481,7 +481,8 @@ module VX_rr_arbiter #( end VX_decoder #( - .N (LOG_NUM_REQS) + .N (LOG_NUM_REQS), + .D (NUM_REQS) ) grant_decoder ( .data_in (grant_index), .valid_in (grant_valid), diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index db59f895eb..febfd0465b 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -65,7 +65,8 @@ module VX_stream_xbar #( for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders VX_decoder #( - .N (OUT_WIDTH) + .N (OUT_WIDTH), + .D (NUM_OUTPUTS) ) sel_in_decoder ( .data_in (sel_in[i]), .valid_in (valid_in[i]), @@ -137,7 +138,8 @@ module VX_stream_xbar #( wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; VX_decoder #( - .N (OUT_WIDTH) + .N (OUT_WIDTH), + .D (NUM_OUTPUTS) ) sel_in_decoder ( .data_in (sel_in[0]), .valid_in (valid_in[0]), From 6f81df5edb1828f610327a94772fc51017550e02 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 06:25:50 -0700 Subject: [PATCH 249/407] axi_adapter large tags support --- hw/rtl/VX_define.vh | 4 +++ hw/rtl/Vortex_axi.sv | 18 +++++----- hw/rtl/core/VX_fetch.sv | 11 +++--- hw/rtl/libs/VX_axi_adapter.sv | 67 ++++++++++++++++++++++++++--------- hw/rtl/libs/VX_mem_adapter.sv | 4 +++ hw/scripts/scope.py | 6 ++-- 6 files changed, 79 insertions(+), 31 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 7c1590dff2..8b59bc9107 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -52,8 +52,12 @@ `ifndef NDEBUG `define UUID_WIDTH 44 `else +`ifdef SCOPE +`define UUID_WIDTH 44 +`else `define UUID_WIDTH 1 `endif +`endif `define PC_BITS (`XLEN-1) `define OFFSET_BITS 12 diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 7d238aacde..483773223a 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -82,10 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); - localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; - localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH); - - `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) + localparam DST_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH); + localparam SRC_LDATAW = `CLOG2(AXI_DATA_WIDTH); + localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW; + localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0); + localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + SUB_LDATAW; wire mem_req_valid; wire mem_req_rw; @@ -133,12 +134,12 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a; wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a; wire [AXI_DATA_WIDTH-1:0] mem_req_data_a; - wire [AXI_TID_WIDTH-1:0] mem_req_tag_a; + wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a; wire mem_req_ready_a; wire mem_rsp_valid_a; wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a; - wire [AXI_TID_WIDTH-1:0] mem_rsp_tag_a; + wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a; wire mem_rsp_ready_a; VX_mem_adapter #( @@ -147,7 +148,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( .SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), .DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), .SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH), - .DST_TAG_WIDTH (AXI_TID_WIDTH), + .DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH), .REQ_OUT_BUF (0), .RSP_OUT_BUF (0) ) mem_adapter ( @@ -185,7 +186,8 @@ module Vortex_axi import VX_gpu_pkg::*; #( .DATA_WIDTH (AXI_DATA_WIDTH), .ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH), .ADDR_WIDTH_OUT (AXI_ADDR_WIDTH), - .TAG_WIDTH (AXI_TID_WIDTH), + .TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH), + .TAG_WIDTH_OUT (AXI_TID_WIDTH), .NUM_BANKS (AXI_NUM_BANKS), .BANK_INTERLEAVE(0), .RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0) diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 35e1060375..cf862aa06d 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -137,10 +137,13 @@ module VX_fetch import VX_gpu_pkg::*; #( wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_bus_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready; wire icache_bus_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; + wire [`UUID_WIDTH-1:0] icache_bus_req_uuid = icache_bus_if.req_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH]; + wire [`UUID_WIDTH-1:0] icache_bus_rsp_uuid = icache_bus_if.rsp_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH]; `NEG_EDGE (reset_negedge, reset); `SCOPE_TAP_EX (0, 1, 6, 3, ( - `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + - ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + + `UUID_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + + `UUID_WIDTH + (ICACHE_WORD_SIZE * 8) ), { schedule_if.valid, schedule_if.ready, @@ -154,8 +157,8 @@ module VX_fetch import VX_gpu_pkg::*; #( icache_bus_rsp_fire },{ schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, - icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, - icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag + icache_bus_req_uuid, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, + icache_bus_rsp_uuid, icache_bus_if.rsp_data.data }, reset_negedge, 1'b0, 4096 ); diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index f0144ff91f..255789fd71 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -18,9 +18,11 @@ module VX_axi_adapter #( parameter DATA_WIDTH = 512, parameter ADDR_WIDTH_IN = 1, parameter ADDR_WIDTH_OUT = 32, - parameter TAG_WIDTH = 8, + parameter TAG_WIDTH_IN = 8, + parameter TAG_WIDTH_OUT = 8, parameter NUM_BANKS = 1, parameter BANK_INTERLEAVE= 0, + parameter TAG_BUFFER_SIZE= 32, parameter RSP_OUT_BUF = 0 ) ( input wire clk, @@ -32,20 +34,20 @@ module VX_axi_adapter #( input wire [DATA_WIDTH/8-1:0] mem_req_byteen, input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, - input wire [TAG_WIDTH-1:0] mem_req_tag, + input wire [TAG_WIDTH_IN-1:0] mem_req_tag, output wire mem_req_ready, // Vortex response output wire mem_rsp_valid, output wire [DATA_WIDTH-1:0] mem_rsp_data, - output wire [TAG_WIDTH-1:0] mem_rsp_tag, + output wire [TAG_WIDTH_IN-1:0] mem_rsp_tag, input wire mem_rsp_ready, // AXI write request address channel output wire m_axi_awvalid [NUM_BANKS], input wire m_axi_awready [NUM_BANKS], output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS], - output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], + output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS], output wire [1:0] m_axi_awburst [NUM_BANKS], @@ -65,14 +67,14 @@ module VX_axi_adapter #( // AXI write response channel input wire m_axi_bvalid [NUM_BANKS], output wire m_axi_bready [NUM_BANKS], - input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS], + input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_BANKS], input wire [1:0] m_axi_bresp [NUM_BANKS], // AXI read address channel output wire m_axi_arvalid [NUM_BANKS], input wire m_axi_arready [NUM_BANKS], output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS], - output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], + output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS], output wire [1:0] m_axi_arburst [NUM_BANKS], @@ -87,7 +89,7 @@ module VX_axi_adapter #( output wire m_axi_rready [NUM_BANKS], input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_BANKS], input wire m_axi_rlast [NUM_BANKS], - input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS], + input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_BANKS], input wire [1:0] m_axi_rresp [NUM_BANKS] ); localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8); @@ -133,14 +135,47 @@ module VX_axi_adapter #( ); end + wire tbuf_full; + wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out; + wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out; + + // handle tag width mismatch + if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf + localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE); + wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr; + VX_index_buffer #( + .DATAW (TAG_WIDTH_IN), + .SIZE (TAG_BUFFER_SIZE) + ) tag_buf ( + .clk (clk), + .reset (reset), + .acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready), + .write_addr (tbuf_waddr), + .write_data (mem_req_tag), + .read_data (mem_rsp_tag), + .read_addr (tbuf_raddr), + .release_en (mem_rsp_valid && mem_rsp_ready), + .full (tbuf_full), + `UNUSED_PIN (empty) + ); + assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr); + assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0]; + `UNUSED_VAR (mem_rsp_tag_out) + end else begin : g_no_tag_buf + assign tbuf_full = 0; + assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag); + assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0]; + `UNUSED_VAR (mem_rsp_tag_out) + end + // request ack - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; + assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full; // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr - assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; + assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i]; assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); - assign m_axi_awid[i] = mem_req_tag; + assign m_axi_awid[i] = mem_req_tag_out; assign m_axi_awlen[i] = 8'b00000000; assign m_axi_awsize[i] = 3'(DATA_SIZE); assign m_axi_awburst[i] = 2'b00; @@ -153,7 +188,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; @@ -170,9 +205,9 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req - assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); + assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full; assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); - assign m_axi_arid[i] = mem_req_tag; + assign m_axi_arid[i] = mem_req_tag_out; assign m_axi_arlen[i] = 8'b00000000; assign m_axi_arsize[i] = 3'(DATA_SIZE); assign m_axi_arburst[i] = 2'b00; @@ -186,7 +221,7 @@ module VX_axi_adapter #( // AXI read response channel wire [NUM_BANKS-1:0] rsp_arb_valid_in; - wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in; + wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH_OUT-1:0] rsp_arb_data_in; wire [NUM_BANKS-1:0] rsp_arb_ready_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp @@ -200,7 +235,7 @@ module VX_axi_adapter #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (DATA_WIDTH + TAG_WIDTH), + .DATAW (DATA_WIDTH + TAG_WIDTH_OUT), .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( @@ -209,7 +244,7 @@ module VX_axi_adapter #( .valid_in (rsp_arb_valid_in), .data_in (rsp_arb_data_in), .ready_in (rsp_arb_ready_in), - .data_out ({mem_rsp_data, mem_rsp_tag}), + .data_out ({mem_rsp_data, mem_rsp_tag_out}), .valid_out (mem_rsp_valid), .ready_out (mem_rsp_ready), `UNUSED_PIN (sel_out) diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 066de829f7..4ece7cf699 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -59,6 +59,10 @@ module VX_mem_adapter #( localparam D = `ABS(DST_LDATAW - SRC_LDATAW); localparam P = 2**D; + localparam EXPECTED_TAG_WIDTH = SRC_TAG_WIDTH + ((DST_LDATAW > SRC_LDATAW) ? D : 0); + + `STATIC_ASSERT(DST_TAG_WIDTH >= EXPECTED_TAG_WIDTH, ("invalid DST_TAG_WIDTH parameter, current=%0d, expected=%0d", DST_TAG_WIDTH, EXPECTED_TAG_WIDTH)) + wire mem_req_valid_out_w; wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out_w; wire mem_req_rw_out_w; diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 9503fd757e..f6d93961bf 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -78,7 +78,7 @@ def parse_var_name(xml_doc, xml_node): elif xml_node.tag == "arraysel": return parse_arraysel_name(xml_doc, xml_node) else: - raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_node.tag + ", " + source_loc(xml_doc, xml_node.get("loc"))) return name def parse_sel_field(xml_doc, dtype_id, offset, width): @@ -116,7 +116,7 @@ def parse_sel_field(xml_doc, dtype_id, offset, width): end = width - 1 + offset return F"[{end}:{offset}]" else: - raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_type.tag + ", " + source_loc(xml_doc, xml_type.get("loc"))) return None def parse_sel_name(xml_doc, xml_node): @@ -167,7 +167,7 @@ def parse_vl_port(xml_doc, xml_node, signals): signals.append([name, signal_width]) total_width = total_width + signal_width else: - raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_node.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_node.tag + ", " + source_loc(xml_doc, xml_node.get("loc"))) # Check for duplicate signal names signal_names = [signal[0] for signal in signals] duplicates = set([name for name in signal_names if signal_names.count(name) > 1]) From ee690248414cff7650b259ce7e9cc0b2ab1a7c41 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 09:17:42 -0700 Subject: [PATCH 250/407] minor update --- hw/rtl/Vortex_axi.sv | 6 +++--- hw/rtl/core/VX_schedule.sv | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 483773223a..418a2aa5cb 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -82,11 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); - localparam DST_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH); - localparam SRC_LDATAW = `CLOG2(AXI_DATA_WIDTH); + localparam DST_LDATAW = `CLOG2(AXI_DATA_WIDTH); + localparam SRC_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH); localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW; localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0); - localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + SUB_LDATAW; + localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH - SUB_LDATAW; wire mem_req_valid; wire mem_req_rw; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index e7937fe493..9b49ae2680 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -332,7 +332,7 @@ module VX_schedule import VX_gpu_pkg::*; #( }; wire [`UUID_WIDTH-1:0] instr_uuid; -`ifndef NDEBUG +`ifdef UUID_ENABLE VX_uuid_gen #( .CORE_ID (CORE_ID), .UUID_WIDTH (`UUID_WIDTH) From a3aca502b7db8d67448b36aa5841089f18290d29 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 14:20:48 -0700 Subject: [PATCH 251/407] minor update --- hw/rtl/VX_define.vh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 8b59bc9107..4ccb008804 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -50,9 +50,11 @@ `define PERF_CTR_BITS 44 `ifndef NDEBUG +`define UUID_ENABLE `define UUID_WIDTH 44 `else `ifdef SCOPE +`define UUID_ENABLE `define UUID_WIDTH 44 `else `define UUID_WIDTH 1 From 44ebc12ed4011fd997083f340372f130d3d9eb33 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 1 Oct 2024 00:55:45 -0700 Subject: [PATCH 252/407] minor update --- hw/rtl/core/VX_issue_slice.sv | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 5032065d3d..f287525c74 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -91,29 +91,47 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_ISSUE `SCOPE_IO_SWITCH (1); + wire decode_fire = decode_if.valid && decode_if.ready; wire operands_fire = operands_if.valid && operands_if.ready; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 2, 2, 2, ( - `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + - 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 + `SCOPE_TAP_EX (0, 2, 4, 3, ( + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS * 4 + + `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (3 * `XLEN) + + `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 ), { + decode_if.valid, + decode_if.ready, operands_if.valid, operands_if.ready }, { + decode_fire, operands_fire, writeback_if.valid // ack-free }, { + decode_if.data.uuid, + decode_if.data.wid, + decode_if.data.tmask, + decode_if.data.PC, + decode_if.data.ex_type, + decode_if.data.op_type, + decode_if.data.wb, + decode_if.data.rd, + decode_if.data.rs1, + decode_if.data.rs2, + decode_if.data.rs3, operands_if.data.uuid, + operands_if.data.wis, operands_if.data.tmask, + operands_if.data.PC, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.wb, operands_if.data.rd, - operands_if.data.rs1_data, - operands_if.data.rs2_data, - operands_if.data.rs3_data, + operands_if.data.rs1_data[0], + operands_if.data.rs2_data[0], + operands_if.data.rs3_data[0], writeback_if.data.uuid, + writeback_if.data.wis, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.data, From 5cb033ae13cd288622ac5e102db7b1c45b76fa58 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 07:12:30 -0700 Subject: [PATCH 253/407] minor update --- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 0f947af78d..1f39ab5a75 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire lsu_mem_rsp_ready; VX_mem_scheduler #( - .INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)), + .INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)), .CORE_REQS (NUM_LANES), .MEM_CHANNELS(NUM_LANES), .WORD_SIZE (LSU_WORD_SIZE), diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 1a0b2c597e..1426d59c0e 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,15 +459,15 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES-1:0][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; - always @(*) begin - rsp_store_n = rsp_store[ibuf_raddr]; - for (integer i = 0; i < CORE_CHANNELS; ++i) begin + for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store_n + always @(*) begin + rsp_store_n[i] = rsp_store[ibuf_raddr][i]; if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; + rsp_store_n[i][rsp_batch_idx] = mem_rsp_data_s[i]; end end end @@ -488,7 +488,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[i][j]; + assign crsp_data[r] = rsp_store_n[j][i]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From ad7377c8bab97d909f16b2d498e56815597c79eb Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 07:41:29 -0700 Subject: [PATCH 254/407] minor udpate --- hw/rtl/libs/VX_mem_scheduler.sv | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 1426d59c0e..4ba8bf1479 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,16 +459,21 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; + wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; - for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store_n - always @(*) begin - rsp_store_n[i] = rsp_store[ibuf_raddr][i]; - if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[i][rsp_batch_idx] = mem_rsp_data_s[i]; + for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store + for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j + reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + wire rsp_wren = mem_rsp_fire_s + && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) + && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); + always @(posedge clk) begin + if (rsp_wren) begin + rsp_store[ibuf_raddr] <= mem_rsp_data_s[i]; + end end + assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr]; end end @@ -476,9 +481,6 @@ module VX_mem_scheduler #( if (ibuf_push) begin rsp_orig_mask[ibuf_waddr] <= core_req_mask; end - if (mem_rsp_valid_s) begin - rsp_store[ibuf_raddr] <= rsp_store_n; - end end assign crsp_valid = mem_rsp_valid_s && rsp_complete; From 4b8ca42e85186ba73597b87fa378645a5dbe0e68 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 09:27:26 -0700 Subject: [PATCH 255/407] minor update --- hw/rtl/libs/VX_stream_buffer.sv | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 51e33db60c..4b77df83de 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -45,7 +45,7 @@ module VX_stream_buffer #( assign valid_out = valid_in; assign data_out = data_in; - end else if (OUT_REG != 0) begin : g_with_reg + end else if (OUT_REG != 0) begin : g_out_reg reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] buffer; @@ -84,23 +84,27 @@ module VX_stream_buffer #( assign valid_out = valid_out_r; assign data_out = data_out_r; - end else begin : g_no_reg + end else begin : g_no_out_reg reg [1:0][DATAW-1:0] shift_reg; - reg [1:0] fifo_state; + reg [1:0] fifo_state, fifo_state_n; - wire fire_in = valid_in && ready_in; + wire fire_in = valid_in && ready_in; wire fire_out = valid_out && ready_out; + always @(*) begin + case ({fire_in, fire_out}) + 2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 + 2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 + default: fifo_state_n = fifo_state; + endcase + end + always @(posedge clk) begin if (reset) begin fifo_state <= 2'b00; end else begin - case ({fire_in, fire_out}) - 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 - 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 - default: fifo_state <= fifo_state; - endcase + fifo_state <= fifo_state_n; end end From 83badaac86cd4578484ebfd3b1b11fe089f666b7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 11:10:33 -0700 Subject: [PATCH 256/407] minor update --- hw/rtl/libs/VX_elastic_buffer.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 5067a4dd32..c90aa06162 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -97,8 +97,10 @@ module VX_elastic_buffer #( wire [DATAW-1:0] data_out_t; wire ready_out_t; + wire valid_out_t = ~empty; + wire push = valid_in && ready_in; - wire pop = ~empty && ready_out_t; + wire pop = valid_out_t && ready_out_t; VX_fifo_queue #( .DATAW (DATAW), @@ -127,7 +129,7 @@ module VX_elastic_buffer #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (~empty), + .valid_in (valid_out_t), .data_in (data_out_t), .ready_in (ready_out_t), .valid_out (valid_out), From d1175a03c9606dce16cdc8f16772fba701fbb0af Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Wed, 2 Oct 2024 14:16:57 -0400 Subject: [PATCH 257/407] update the code accessing registers in obsoleted way --- sim/simx/emulator.cpp | 2 +- sim/simx/execute.cpp | 2 +- sim/simx/func_unit.cpp | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 8d57f69fa9..08c51845c4 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,11 +74,11 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) + , ipdom_size_(arch.num_threads()-1) // [TBC] Currently, tradeoff between scratchpad size & performance has not been evaluated. Scratchpad is // considered to be big enough to hold input tiles for one output tile. // In future versions, scratchpad size should be fixed to an appropriate value. , scratchpad(std::vector(32 * 32 * 32768)) - , ipdom_size_(arch.num_threads()-1) { this->clear(); } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index a7d8a937d2..e70d45cb24 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1473,7 +1473,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->fu_type = FUType::LSU; trace->lsu_type = LsuType::TCU_LOAD; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index 2de58639b5..a182f6d8b8 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -222,7 +222,10 @@ void LsuUnit::tick() { input.pop(); } } -///////// TENSOR code TBC //////////////////////////////// +/* TO BE FIXED:Tensor_core code + send_request is not used anymore. Need to be modified number of load +*/ +/* int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { int count = 0; @@ -275,6 +278,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { } return count; } +*/ /////////////////////////////////////////////////////////////////////////////// From b7531c9de1d4acbc33e2040fb6f4f100eb96d015 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Wed, 2 Oct 2024 17:46:01 -0400 Subject: [PATCH 258/407] support 64bit --- tests/regression/matmul/kernel.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp index b0b4753c79..5fa976df48 100644 --- a/tests/regression/matmul/kernel.cpp +++ b/tests/regression/matmul/kernel.cpp @@ -8,9 +8,9 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { int32_t* src0_ptr = (int32_t*)arg->src0_addr; int32_t* src1_ptr = (int32_t*)arg->src1_addr; int32_t* dst_ptr = (int32_t*)arg->dst_addr; - unsigned a_addr = reinterpret_cast(src0_ptr); - unsigned b_addr = reinterpret_cast(src1_ptr); - unsigned c_addr = reinterpret_cast(dst_ptr); + uint64_t a_addr = reinterpret_cast(src0_ptr); + uint64_t b_addr = reinterpret_cast(src1_ptr); + uint64_t c_addr = reinterpret_cast(dst_ptr); uint32_t tc_size = arg->tc_size; uint32_t TC_per_warp = arg->TC_per_warp; @@ -100,9 +100,9 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { //TODO :: change this for new task->thread distribution if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit) { - unsigned a_addr_base = a_addr + offset*arg->data_size; - unsigned b_addr_base = b_addr + offset*arg->data_size; - unsigned c_addr_base = c_addr + offset_c*arg->data_size; + uint64_t a_addr_base = a_addr + offset*arg->data_size; + uint64_t b_addr_base = b_addr + offset*arg->data_size; + uint64_t c_addr_base = c_addr + offset_c*arg->data_size; csr_write(VX_MAT_MUL_SIZE,n_tiles); csr_write(VX_TC_NUM,TC_per_warp); csr_write(VX_TC_SIZE,tc_size); From 5cf6797bd36b737ed1dda5482389a5ee4162c750 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Thu, 3 Oct 2024 15:19:39 -0400 Subject: [PATCH 259/407] - Change STARTUP_ADDR to use the same 0x80000000 address - Fix environment variable for vortex kernel directories --- .github/workflows/ci.yml | 29 +++++++++++++++-------------- ci/regression.sh.in | 6 +----- config.mk.in | 3 --- hw/rtl/VX_config.vh | 4 ++-- tests/kernel/common.mk | 3 +-- tests/opencl/common.mk | 3 +-- tests/regression/common.mk | 3 +-- 7 files changed, 21 insertions(+), 30 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 404edc12ce..f4f5902a81 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,13 +21,13 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 with: submodules: recursive - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -36,7 +36,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -71,7 +71,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 - name: Install Dependencies run: | @@ -79,7 +79,7 @@ jobs: - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -88,7 +88,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -106,23 +106,23 @@ jobs: make tests -s > /dev/null - name: Upload Build Artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} - test: + tests: runs-on: ubuntu-20.04 needs: build strategy: fail-fast: false matrix: - name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm ] + name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm] xlen: [32, 64] steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 - name: Install Dependencies run: | @@ -130,7 +130,7 @@ jobs: - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -139,7 +139,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -147,10 +147,11 @@ jobs: ${{ runner.os }}-thirdparty- - name: Download Build Artifact - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} + - name: Run tests run: | cd build${{ matrix.xlen }} @@ -167,7 +168,7 @@ jobs: complete: runs-on: ubuntu-20.04 - needs: test + needs: tests steps: - name: Check Completion diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 92a56d56fd..4297eee8db 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -269,11 +269,7 @@ config2() # custom program startup address make -C tests/regression/dogfood clean-kernel - if [ "$XLEN" == "64" ]; then - STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood - else - STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood - fi + STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood ./ci/blackbox.sh --driver=simx --app=dogfood ./ci/blackbox.sh --driver=rtlsim --app=dogfood make -C tests/regression/dogfood clean-kernel diff --git a/config.mk.in b/config.mk.in index be369b56ee..57f77059e5 100644 --- a/config.mk.in +++ b/config.mk.in @@ -31,7 +31,4 @@ RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) -VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime -VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel - THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 69f72b7a17..c349f367a6 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -177,7 +177,7 @@ `endif `ifndef STARTUP_ADDR -`define STARTUP_ADDR 64'h180000000 +`define STARTUP_ADDR 64'h080000000 `endif `ifndef USER_BASE_ADDR @@ -190,7 +190,7 @@ `ifdef VM_ENABLE `ifndef PAGE_TABLE_BASE_ADDR -`define PAGE_TABLE_BASE_ADDR 64'h1F0000000 +`define PAGE_TABLE_BASE_ADDR 64'h0F0000000 `endif `endif diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index 8fd609d581..7829ffb149 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -2,11 +2,10 @@ ROOT_DIR := $(realpath ../../..) ifeq ($(XLEN),64) CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 else CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 endif +STARTUP_ADDR ?= 0x80000000 VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 3a3de87ee8..bb7b1e0d64 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -5,13 +5,12 @@ TARGET ?= opaesim XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt XRT_DEVICE_INDEX ?= 0 +STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=64 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=32 endif diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 94fe840df4..2cba5ef9a8 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -8,12 +8,11 @@ XRT_DEVICE_INDEX ?= 0 VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel +STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 endif LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) From dd16d70515e7f37e5efb15c3f7196c2cefaf82e3 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Thu, 3 Oct 2024 17:29:21 -0400 Subject: [PATCH 260/407] contributing and fpga docs --- docs/contributing.md | 2 +- docs/fpga_setup.md | 148 +++++++++++++++++++++++++++++++++++++++++++ docs/simulation.md | 2 +- 3 files changed, 150 insertions(+), 2 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 5264454d2b..e871582729 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -41,4 +41,4 @@ However, contributions are strongly encouraged and keep the project moving forwa ## Creating and Adding Tests -see `testing.md` \ No newline at end of file +The CI pipeline the vortex tests. If you are contributing code changes, then review `testing.md` to learn how to integrate your own tests \ No newline at end of file diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 80d71e45fa..2a673f8fc0 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -1,5 +1,153 @@ # FPGA Startup and Configuration Guide +## Gaining Access to FPGA's with CRNCH +If you are associated with Georgia Tech and need remote access to the FPGA's, you can utilize CRNCH's server. + +## What is CRNCH? + +**C**enter for **R**esearch into **N**ovel **C**omputing **H**ierarchies + +## What does CRNCH Offer? + +**The Rogues Gallery (RG)**: new concept focused on developing our understanding of next-generation hardware with a focus on unorthodox and uncommon technologies. **RG** will acquire new and unique hardware (ie, the aforementioned “*rogues*”) from vendors, research labs, and startups and make this hardware available to students, faculty, and industry collaborators within a managed data center environment + +## Why are the Rouges Important? + +By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moore’s Law era of “cheap transistors” ends*. + +## How is the Rouges Gallery Funded? + +Rogues Gallery testbed is primarily supported by the National Science Foundation (NSF) under NSF Award Number [#2016701](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2016701&HistoricalAwards=false) + +## Rouges Gallery Documentation + +You can read about RG in more detail on its official documentation [page](https://gt-crnch-rg.readthedocs.io/en/main/index.html#). + +You can listen to a talk about RG [here](https://mediaspace.gatech.edu/media/Jeff%20Young%20-%20Rogues%20Gallery%20-%20CRNCH%20Summit%202021/1_lqlgr0jj) + +[CRNCH Summit 2023](https://github.com/gt-crnch/crnch-summit-2023/tree/main) + +## Request Access for Rouges Gallery + +You should use [this form](https://crnch-rg.cc.gatech.edu/request-rogues-gallery-access/) to request access to RG’s reconfigurable computing (vortex fpga) resources. You should receive an email with your ticket item being created. Once it gets processed, you should get an email confirmed your access has been granted. It might take some time to get processed. + +## How to Access Rouges Gallery? + +CRNCH resources do not require any VPN access for GT members so you can head to the web url for open on-demand: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/) + +Alternatively, you can `ssh` into rg with: `ssh @rg-login.crnch.gatech.edu` + +(`ssh usubramanya3@rg-login.crnch.gatech.edu`) + +Once you’ve logged in, you can use Slurm to request other nodes within the testbed. See more information on Slurm at [this page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). + +Note that you can also use VSCode to log into the Rogues Gallery via its terminal functionality. See [this page for more details](https://gt-crnch-rg.readthedocs.io/en/main/general/visual-studio-code.html). + +## **What Machines are Available in the Rogues Gallery?** + +Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). + +## Which Machine do we Need from RG? + +There are three primary nodes you might use. The table below summarizes: + +| Name | Device | Description | +| --- | --- | --- | +| flubber1 | u50 | can synthesize vortex | +| flubber4 | u250 | missing HBM | +| flubber5 | u280 | can synthesize vortex | + + +*Note*: The `USERSCRATCH` folder is synchronized between all RG nodes. That means you can upload your files to `rg-login` and have them available on `flubber[1,4-5`. Changes on one node will be reflected across all nodes. + +## How to Access flubber for Synthesis? + +Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery: + +```bash +ssh [@rg-login.crnch.gatech.edu](mailto:usubramanya3@rg-login.crnch.gatech.edu) +``` + +Then, to access the hardware node you need to `ssh` into flubber: + +```bash +ssh flubber1 +``` + +## Synthesis for Xillinx Boards + +XRT Environment Setup +---------------------- + + $ source /opt/xilinx/Vitis/2023.1/settings64.sh + $ source /opt/xilinx/xrt/setup.sh + + +Check Installed FPGA Platforms +------------------------------ + + $ platforminfo -l + + +Build FPGA image +---------------- + + $ cd hw/syn/xilinx/xrt + $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make + +Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" + +The generated bitstream will be located under /bin/vortex_afu.xclbin + +Sample FPGA Run Test +-------------------- + +Ensure you have the correct opae runtime for the FPGA target + + $ make -C runtime/xrt clean + $ TARGET=hw make -C runtime/xrt + +Run the following from your Vortex build directory + + $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" + +--- + +The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. + +For long-running jobs, invocation of this makefile can be made of the following form: + +`[CONFIGS=] [PREFIX=] [NUM_CORES=<#>] TARGET=hw|hw_emu PLATFORM= nohup make > 2>&1 &` + +For example: + +```bash +CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 nohup make > build_u280_hw_4c.log 2>&1 & +``` + +The build is complete when the bitstream file `vortex_afu.xclbin` exists in `hw|hw_emu/bin`. + +## Running a Program on FPGA + +The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt driver using the following command: + +`FPGA_BIN_DIR= TARGET=hw|hw_emu PLATFORM= ./ci/blackbox.sh --driver=xrt --app=` + +For example: + +`FPGA_BIN_DIR=`realpath hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin` TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo` + +## Synthesis for Intel (Altera) Boards + +To set up the environment, source the XRT setup.sh and other Xilinx scripts. For example: + +``` +source /opt/xilinx/xrt/setup.sh +source /tools/reconfig/xilinx/Vivado/2022.1/settings64.sh +source /tools/reconfig/xilinx/Vitis/2022.1/settings64.sh + +``` + OPAE Environment Setup ---------------------- diff --git a/docs/simulation.md b/docs/simulation.md index 86ce1f1358..e1c5780344 100644 --- a/docs/simulation.md +++ b/docs/simulation.md @@ -10,7 +10,7 @@ SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant ### FGPA Simulation -The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) +The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Intel (Altera) based FPGAs. ### How to Test From 6a447350b7000c959c325198bc573d89a963c2b8 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Thu, 3 Oct 2024 17:42:47 -0400 Subject: [PATCH 261/407] remove redundant docs after consolidating --- docs/altera_fpga_guide.md | 79 --------------------------------------- docs/xilinx_fpga_guide.md | 36 ------------------ 2 files changed, 115 deletions(-) delete mode 100644 docs/altera_fpga_guide.md delete mode 100644 docs/xilinx_fpga_guide.md diff --git a/docs/altera_fpga_guide.md b/docs/altera_fpga_guide.md deleted file mode 100644 index 61d1ae26e3..0000000000 --- a/docs/altera_fpga_guide.md +++ /dev/null @@ -1,79 +0,0 @@ -# FPGA Startup and Configuration Guide - -OPAE Environment Setup ----------------------- - - $ source /opt/inteldevstack/init_env_user.sh - $ export OPAE_HOME=/opt/opae/1.1.2 - $ export PATH=$OPAE_HOME/bin:$PATH - $ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH - $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH - $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH - -OPAE Build ------------------- - -The FPGA has to following configuration options: -- DEVICE_FAMILY=arria10 | stratix10 -- NUM_CORES=#n - -Command line: - - $ cd hw/syn/altera/opae - $ PREFIX=test1 TARGET=fpga NUM_CORES=4 make - -A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete. -Setting TARGET=ase will build the project for simulation using Intel ASE. - - -OPAE Build Configuration ------------------------- - -The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: -- `NUM_WARPS`: Number of warps per cores -- `NUM_THREADS`: Number of threads per warps -- `PERF_ENABLE`: enable the use of all profile counters - -You configure the syntesis build from the command line: - - $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make - -OPAE Build Progress -------------------- - -You could check the last 10 lines in the build log for possible errors until build completion. - - $ tail -n 10 /build.log - -Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. - - $ ps -u - -If the build fails and you need to restart it, clean up the build folder using the following command: - - $ make clean - -The bitstream file `vortex_afu.gbs` should exist when the build is done: - - $ ls -lsa /synth/vortex_afu.gbs - - -Signing the bitstream and Programming the FPGA ----------------------------------------------- - - $ cd - $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs - $ fpgasupdate vortex_afu_unsigned_ssl.gbs - -Sample FPGA Run Test --------------------- - -Ensure you have the correct opae runtime for the FPGA target - - $ make -C runtime/opae clean - $ TARGET=FPGA make -C runtime/opae - -Run the following from your Vortex build directory - - $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" - diff --git a/docs/xilinx_fpga_guide.md b/docs/xilinx_fpga_guide.md deleted file mode 100644 index f2960deb6c..0000000000 --- a/docs/xilinx_fpga_guide.md +++ /dev/null @@ -1,36 +0,0 @@ -# FPGA Startup and Configuration Guide - -XRT Environment Setup ----------------------- - - $ source /opt/xilinx/Vitis/2023.1/settings64.sh - $ source /opt/xilinx/xrt/setup.sh - - -Check Installed FPGA Platforms ------------------------------- - - $ platforminfo -l - - -Build FPGA image ----------------- - - $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make - -Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" - -The generated bitstream will be located under /bin/vortex_afu.xclbin - -Sample FPGA Run Test --------------------- - -Ensure you have the correct opae runtime for the FPGA target - - $ make -C runtime/xrt clean - $ TARGET=hw make -C runtime/xrt - -Run the following from your Vortex build directory - - $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" \ No newline at end of file From 32b0376b28a68ec5d8158229210999a39638ee9c Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Thu, 3 Oct 2024 17:43:39 -0400 Subject: [PATCH 262/407] remove old artifacts --- sim/common/bfloat.cpp | 221 ------------------------------------------ sim/common/bfloat.hpp | 0 2 files changed, 221 deletions(-) delete mode 100644 sim/common/bfloat.cpp delete mode 100644 sim/common/bfloat.hpp diff --git a/sim/common/bfloat.cpp b/sim/common/bfloat.cpp deleted file mode 100644 index e44f81b8b9..0000000000 --- a/sim/common/bfloat.cpp +++ /dev/null @@ -1,221 +0,0 @@ -#include -#include - -#include -#include - -// get float "in-memory" to exploit iee754 binary representation of floating point values -// use a u to trick compiler into letting you access float's bits directly -// bitwise operations cannot be done directly on iee754 representations per compiler settings -// ordering of the fields is important here -class MyFloat -{ -private: - void printBinary(int n, int i) - { - // Prints the binary representation - // of a number n up to i-bits. - int k; - for (k = i - 1; k >= 0; k--) - { - - if ((n >> k) & 1) - std::cout << "1"; - else - std::cout << "0"; - } - } - -public: - union BFloat_t - { - float f; - int i; - struct - { - uint32_t dead : 16; // don't use these, just place-holders - uint32_t mantissa : 7; // Mantissa (fractional part) of the number - uint32_t exponent : 8; // Exponent (power of 2) of the number - uint32_t sign : 1; - } parts; - }; - - void printBFloat(BFloat_t b) - { - std::cout << b.parts.sign << " | "; - printBinary(b.parts.exponent, 8); - std::cout << " | "; - printBinary(b.parts.mantissa, 7); - std::cout << std::endl; - } - - BFloat_t in_mem; - - MyFloat(float x) - { - in_mem.f = x; - printBFloat(in_mem); - } - - MyFloat(uint8_t mantissa, uint8_t exponent, bool sign) - { - in_mem.parts.mantissa = mantissa & 0x7F; - in_mem.parts.exponent = exponent; - in_mem.parts.sign = (int)sign; - - std::cout << "inside constructor" << std::endl; - std::cout << "bfloat:" << in_mem.f << std::endl; - printBFloat(in_mem); - } - - friend MyFloat operator+(const MyFloat &a, const MyFloat &b) - { - // get fields - bool a_sign = (bool)a.in_mem.parts.sign; - uint8_t a_exp = a.in_mem.parts.exponent - 127; - uint8_t a_mantissa = a.in_mem.parts.mantissa | 0x80; // add in the implicit bit - - bool b_sign = (bool)b.in_mem.parts.sign; - uint8_t b_exp = b.in_mem.parts.exponent - 127; - uint8_t b_mantissa = b.in_mem.parts.mantissa | 0x80; // add in the implicit bit - - // align mantissas by shifting the smaller exponent to the larger exponent - if (a_exp < b_exp) - { - a_mantissa >>= (b_exp - a_exp); - a_exp = b_exp; - } - else - { - b_mantissa >>= (a_exp - b_exp); - b_exp = a_exp; - } - - // add mantissas and adjust exponent if necessary - int sum_mantissa = a_mantissa + b_mantissa; - if (sum_mantissa & 0x100) - { // this val check might be wrong - sum_mantissa >>= 1; - a_exp++; - } - - // build binary representation of result - return MyFloat(sum_mantissa, a_exp, a_sign); - } - - friend MyFloat operator*(const MyFloat &a, const MyFloat &b) - { - uint16_t a_exp = a.in_mem.parts.exponent; - uint16_t b_exp = b.in_mem.parts.exponent; - uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit - uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bi - - std::bitset<8> bits(a_exp); - std::cout << "Binary a exp: " << bits << std::endl; - - bool product_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; - - if (a_exp == 0xFF || b_exp == 0xff) - { - return MyFloat(0, 0xFF, product_sign); - } - // Multiply mantissas - uint32_t product_mantissa = static_cast(a_mantissa) * static_cast(b_mantissa); - - // Add exponents - int product_exp = a_exp + b_exp - 127; - - product_mantissa = (product_mantissa + 0x40) >> 7; - - // Round to nearest even (round half to even) - if ((product_mantissa & 0x7F) == 0x40 && (product_mantissa & 0x1) != 0) - { - product_mantissa++; - } - if (product_mantissa & 0x0100) - { // Check if the implicit bit shifted to the left - product_mantissa >>= 1; - product_exp++; - } - else - { - product_mantissa &= 0x7F; // Remove the implicit bit - } - return MyFloat(product_mantissa, product_exp, product_sign); - } - - friend MyFloat operator/(const MyFloat &a, const MyFloat &b) - { - uint16_t a_exp = a.in_mem.parts.exponent; - uint16_t b_exp = b.in_mem.parts.exponent; - std::bitset<8> bits(b_exp); - std::cout << "Binary b exp: " << bits << std::endl; - uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit - uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bit - - bool quotient_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; - - // Check if divisor is zero - if (b_exp == 0 && b_mantissa == 0) - { - std::cout << "HERE" << std::endl; - return MyFloat(0, 0xFF, quotient_sign); // Return infinity with the appropriate sign - } - - // Check for infinity or zero in dividend - if (a_exp == 0xFF || a_exp == 0) - { - return MyFloat(0, a_exp, quotient_sign); - } - - // Subtract exponents - int quotient_exp = a_exp - b_exp + 127; - - // Divide mantissas - uint32_t quotient_mantissa = (static_cast(a_mantissa) << 8) / static_cast(b_mantissa); - - quotient_mantissa = (quotient_mantissa + 0x40) >> 8; - - // Round to nearest even (round half to even) - if ((quotient_mantissa & 0x1) != 0 && (quotient_mantissa & 0x7F) == 0x40) - { - quotient_mantissa--; - } - else if ((quotient_mantissa & 0x7F) == 0x40) - { - quotient_mantissa++; - } - - if (quotient_mantissa & 0x0100) - { // Check if the implicit bit shifted to the left - quotient_mantissa >>= 1; - quotient_exp++; - } - else - { - quotient_mantissa &= 0x7F; // Remove the implicit bit - } - return MyFloat(quotient_mantissa, quotient_exp, quotient_sign); - } -}; - -int main() -{ - float a = 8; - float b = 0; - std::cout << a << std::endl; - - std::bitset bits(*reinterpret_cast(&a)); - std::cout << "Binary representation of " << a << " is \n" - << bits << std::endl; - std::cout << "Binary representation of " << b << " is \n" - << bits << std::endl; - - MyFloat bfloat_version_of_a(a); - MyFloat bfloat_version_of_b(b); - MyFloat c = bfloat_version_of_a / bfloat_version_of_b; - - // You can now print the result stored in c or perform other operations with it. - - return 0; -} diff --git a/sim/common/bfloat.hpp b/sim/common/bfloat.hpp deleted file mode 100644 index e69de29bb2..0000000000 From 208c5b3804636dd6cf0690e00d681ddebfb5bb92 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 4 Oct 2024 08:56:49 -0400 Subject: [PATCH 263/407] reorg docs --- README.md | 10 ++++--- docs/contributing.md | 17 ++++------- docs/fpga_setup.md | 71 +++++++++++++++++--------------------------- docs/index.md | 25 +--------------- docs/simulation.md | 24 +++++++++++++-- 5 files changed, 60 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index ec8d10bd56..553939b501 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Vortex GPGPU -Vortex is a full-stack open-source RISC-V GPGPU. +Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backend drivers*, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. ## Specifications @@ -29,12 +29,14 @@ Vortex is a full-stack open-source RISC-V GPGPU. - `ci`: Continuous integration scripts. - `miscs`: Miscellaneous resources. -## Build Instructions -More detailed build instructions can be found [here](docs/install_vortex.md). +## Quick Start +The following steps demonstrate how to run Vortex with the default driver: simx. If you are interested in a different backend, look [here](docs/simulation.md). + ### Supported OS Platforms - Ubuntu 18.04, 20.04 - Centos 7 ### Toolchain Dependencies +The following dependencies will be fetched prebuilt by `toolchain_install.sh`. - [POCL](http://portablecl.org/) - [LLVM](https://llvm.org/) - [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain) @@ -107,4 +109,4 @@ echo "source /ci/toolchain_env.sh" >> ~/.bashrc ```sh ./ci/blackbox.sh --app=demo --debug=3 ``` -- For additional information, check out the /docs. +- For additional information, check out the [documentation](docs/index.md) diff --git a/docs/contributing.md b/docs/contributing.md index e871582729..f10f4017bf 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -22,7 +22,7 @@ However, contributions are strongly encouraged and keep the project moving forwa 6. Otherwise, you can go to your fork on Github online and manually create a PR (todo) (todo): how to name and format your PR, what information you should add to the PR, does not need to be too strict if you are attending the weekly meetings* 7. Github uses the following semantics: `base repository` gets the changes from your `head repository` -8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `develop` since active development should only be added to this branch +8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `master` since the master branch is protected by reviewed PRs. 9. And you should assign the `head repository` to `/vortex` (which represents your fork of vortex) and the `base` branch to the one created in step 2 10. Now that your intended PR has been specified, you should review the status. Check for merge conflicts, if all your commits are present, and all the modified files make sense 11. You can still make a PR if there are issues in step 10, just make sure the structure is correct according to steps 7-9 @@ -31,14 +31,7 @@ However, contributions are strongly encouraged and keep the project moving forwa 14. As long as the `head repository`'s `base` branch is the one you edited, the PR will automatically get the most recent changes 15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR - -- You should create a new branch from develop that is clearly named with the feature that you want to add -- Avoid pushing directly to the `master` branch instead you will need to make a Pull Request (PR) -- There should be protections in place that prevent pushing directly to the main branch, but don't rely on it -- When you make a PR it will be tested against the continuous integration (ci) pipeline (see `continuous_integration.md`) -- It is not sufficient to just write some tests, they need to be incorporated into the ci pipeline to make sure they are run -- During a PR, you might receive feedback regarding your changes and you might need to make further commits to your branch - - -## Creating and Adding Tests -The CI pipeline the vortex tests. If you are contributing code changes, then review `testing.md` to learn how to integrate your own tests \ No newline at end of file +## What Makes a Good Contribution? +- If you are contributing code changes, then review `testing.md` to ensure your tests are integrated into the CI pipeline +- During a PR, you should consider the advice you are provided by your reviewers. Remember you keep adding commits to an open PR! +- If your change aims to fix an issue opened on Github, please tag that issue in the PR itself \ No newline at end of file diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 2a673f8fc0..78ed63e257 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -1,7 +1,7 @@ # FPGA Startup and Configuration Guide ## Gaining Access to FPGA's with CRNCH -If you are associated with Georgia Tech and need remote access to the FPGA's, you can utilize CRNCH's server. +If you are associated with Georgia Tech (or related workshops) you can use CRNCH's server to gain remote access to FPGA's. Otherwise, you can skip to the Xilinx or Intel (Altera) synthesis steps below. ## What is CRNCH? @@ -37,11 +37,10 @@ CRNCH resources do not require any VPN access for GT members so you can head to Alternatively, you can `ssh` into rg with: `ssh @rg-login.crnch.gatech.edu` -(`ssh usubramanya3@rg-login.crnch.gatech.edu`) +(`ssh gburdell3@rg-login.crnch.gatech.edu`) -Once you’ve logged in, you can use Slurm to request other nodes within the testbed. See more information on Slurm at [this page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). - -Note that you can also use VSCode to log into the Rogues Gallery via its terminal functionality. See [this page for more details](https://gt-crnch-rg.readthedocs.io/en/main/general/visual-studio-code.html). +## Synthesis for Xilinx Boards +First, you need to get access to the server with the Xilinx FPGAs. ## **What Machines are Available in the Rogues Gallery?** @@ -49,7 +48,7 @@ Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io ## Which Machine do we Need from RG? -There are three primary nodes you might use. The table below summarizes: +There are three primary nodes you might use for Xilinx FPGAs. The table below summarizes: | Name | Device | Description | | --- | --- | --- | @@ -62,59 +61,43 @@ There are three primary nodes you might use. The table below summarizes: ## How to Access flubber for Synthesis? -Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery: +Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery, if you have not already. ```bash ssh [@rg-login.crnch.gatech.edu](mailto:usubramanya3@rg-login.crnch.gatech.edu) ``` -Then, to access the hardware node you need to `ssh` into flubber: +Once you’ve logged in, you can use Slurm to request an interactive job. First, view the available Slurm Partitions here [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). Then, the example requests can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). +In our case we might run: ```bash -ssh flubber1 +salloc -p rg-fpga --nodes=1 --ntasks-per-node=1 --nodelist flubber1 --time=01:00:00 ``` -## Synthesis for Xillinx Boards - -XRT Environment Setup ----------------------- - - $ source /opt/xilinx/Vitis/2023.1/settings64.sh - $ source /opt/xilinx/xrt/setup.sh - +## Environment Setup +Once you are logged in, you will need to complete some first time configurations. -Check Installed FPGA Platforms ------------------------------- +### Clone Repo - $ platforminfo -l +### Source Configuration Scripts +``` +$ source /opt/xilinx/xrt/setup.sh +$ source /opt/xilinx/Vitis/2023.1/settings64.sh +``` +### Check Installed FPGA Platforms +`platforminfo -l` -Build FPGA image ----------------- +### Build FPGA image +The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. +``` $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make - + $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make build_u50_hw_4c.log 2>&1 & +``` Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" - The generated bitstream will be located under /bin/vortex_afu.xclbin -Sample FPGA Run Test --------------------- - -Ensure you have the correct opae runtime for the FPGA target - - $ make -C runtime/xrt clean - $ TARGET=hw make -C runtime/xrt - -Run the following from your Vortex build directory - - $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" - ---- - -The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. - For long-running jobs, invocation of this makefile can be made of the following form: `[CONFIGS=] [PREFIX=] [NUM_CORES=<#>] TARGET=hw|hw_emu PLATFORM= nohup make > 2>&1 &` @@ -127,7 +110,7 @@ CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET The build is complete when the bitstream file `vortex_afu.xclbin` exists in `hw|hw_emu/bin`. -## Running a Program on FPGA +### Running a Program on Xilinx FPGA The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt driver using the following command: @@ -135,9 +118,9 @@ The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt d For example: -`FPGA_BIN_DIR=`realpath hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin` TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo` +```FPGA_BIN_DIR= hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo``` -## Synthesis for Intel (Altera) Boards +### Synthesis for Intel (Altera) Boards To set up the environment, source the XRT setup.sh and other Xilinx scripts. For example: diff --git a/docs/index.md b/docs/index.md index 14a45f3357..a53a2fd15f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -5,29 +5,6 @@ - [Codebase Layout](codebase.md) - [Microarchitecture](microarchitecture.md) - [Cache Subsystem](cache_subsystem.md) -- [Software](software.md) - [Simulation](simulation.md) -- [Altera FPGA Setup Guide](altera_fpga_guide.md) -- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md) +- [Contributing](contributing.md) - [Debugging](debugging.md) -- [Useful Links](references.md) - -## Installation - -- For the different environments Vortex supports, [read this document](environment_setup.md). -- To install on your own system, [follow this document](install_vortex.md). - -## Quick Start Scenarios - -Running Vortex simulators with different configurations: -- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads - - $ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic - -- Run demo driver test with opae driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads - - $ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo - -- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads - - $ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood diff --git a/docs/simulation.md b/docs/simulation.md index e1c5780344..d55b3cd943 100644 --- a/docs/simulation.md +++ b/docs/simulation.md @@ -6,11 +6,14 @@ ### Cycle-Approximate Simulation -SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simX` folder. +SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simx` folder. The [readme](README.md) has the most detailed instructions for building and running simX. + +- To install on your own system, [follow this document](install_vortex.md). +- For the different Georgia Tech environments Vortex supports, [read this document](environment_setup.md). ### FGPA Simulation -The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Intel (Altera) based FPGAs. +The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Altera based FPGAs. ### How to Test @@ -47,4 +50,19 @@ PERF: core1: instrs=90693, cycles=53108, IPC=1.707709 PERF: core2: instrs=90849, cycles=53107, IPC=1.710678 PERF: core3: instrs=90836, cycles=50347, IPC=1.804199 PERF: instrs=363180, cycles=53108, IPC=6.838518 -``` \ No newline at end of file +``` + +## Additional Quick Start Scenarios + +Running Vortex simulators with different configurations: +- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads + + $ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic + +- Run demo driver test with opae driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads + + $ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo + +- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads + + $ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood \ No newline at end of file From 0bf79a0f0526621932d9cf502be0ee50ae5d49af Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 4 Oct 2024 10:13:31 -0400 Subject: [PATCH 264/407] Revert "Initial HBM changes for RTL" --- hw/rtl/Vortex_hbm.sv | 229 ---------- hw/rtl/cache/VX_cache_bypass.sv | 2 - hw/rtl/cache/VX_cache_bypass_l3.sv | 355 ---------------- hw/rtl/cache/VX_cache_l3.sv | 640 ---------------------------- hw/rtl/cache/VX_cache_wrap_l3.sv | 331 --------------- sim/rtlsim/Makefile | 4 +- sim/rtlsim/processor_hbm.cpp | 656 ----------------------------- third_party/softfloat | 2 +- 8 files changed, 3 insertions(+), 2216 deletions(-) delete mode 100644 hw/rtl/Vortex_hbm.sv delete mode 100644 hw/rtl/cache/VX_cache_bypass_l3.sv delete mode 100644 hw/rtl/cache/VX_cache_l3.sv delete mode 100644 hw/rtl/cache/VX_cache_wrap_l3.sv delete mode 100644 sim/rtlsim/processor_hbm.cpp diff --git a/hw/rtl/Vortex_hbm.sv b/hw/rtl/Vortex_hbm.sv deleted file mode 100644 index d2ffc344df..0000000000 --- a/hw/rtl/Vortex_hbm.sv +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module Vortex_hbm import VX_gpu_pkg::*; ( - `SCOPE_IO_DECL - - // Clock - input wire clk, - input wire reset, - - // Memory request - output wire mem_req_valid [`NUM_MEM_PORTS], - output wire mem_req_rw [`NUM_MEM_PORTS], - output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS], - output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS], - output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS], - output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS], - input wire mem_req_ready [`NUM_MEM_PORTS], - - // Memory response - input wire mem_rsp_valid [`NUM_MEM_PORTS], - input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS], - input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS], - output wire mem_rsp_ready [`NUM_MEM_PORTS], - - // DCR write request - input wire dcr_wr_valid, - input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, - input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, - - // Status - output wire busy -); - -`ifdef SCOPE - localparam scope_cluster = 0; - `SCOPE_IO_SWITCH (`NUM_CLUSTERS); -`endif - -`ifdef PERF_ENABLE - VX_mem_perf_if mem_perf_if(); - assign mem_perf_if.icache = 'x; - assign mem_perf_if.dcache = 'x; - assign mem_perf_if.l2cache = 'x; - assign mem_perf_if.lmem = 'x; -`endif - - VX_mem_bus_if #( - .DATA_SIZE (`L2_LINE_SIZE), - .TAG_WIDTH (L2_MEM_TAG_WIDTH) - ) per_cluster_mem_bus_if[`NUM_CLUSTERS](); - - VX_mem_bus_if #( - .DATA_SIZE (`L3_LINE_SIZE), - .TAG_WIDTH (L3_MEM_TAG_WIDTH) - ) mem_bus_if[`NUM_MEM_PORTS](); - - `RESET_RELAY (l3_reset, reset); - - VX_cache_wrap_l3 #( - .INSTANCE_ID ("l3cache"), - .CACHE_SIZE (`L3_CACHE_SIZE), - .LINE_SIZE (`L3_LINE_SIZE), - .NUM_BANKS (`L3_NUM_BANKS), - .NUM_WAYS (`L3_NUM_WAYS), - .WORD_SIZE (L3_WORD_SIZE), - .NUM_MEM_PORTS (`NUM_MEM_PORTS), - .NUM_REQS (L3_NUM_REQS), - .CRSQ_SIZE (`L3_CRSQ_SIZE), - .MSHR_SIZE (`L3_MSHR_SIZE), - .MRSQ_SIZE (`L3_MRSQ_SIZE), - .MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE), - .TAG_WIDTH (L2_MEM_TAG_WIDTH), - .WRITE_ENABLE (1), - .WRITEBACK (`L3_WRITEBACK), - .DIRTY_BYTES (`L3_WRITEBACK), - .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), - .NC_ENABLE (1), - .PASSTHRU (!`L3_ENABLED) - ) l3cache ( - .clk (clk), - .reset (l3_reset), - - `ifdef PERF_ENABLE - .cache_perf (mem_perf_if.l3cache), - `endif - - .core_bus_if (per_cluster_mem_bus_if), - .mem_bus_if (mem_bus_if) - ); - - wire mem_req_fire[`NUM_MEM_PORTS-1:0]; - wire mem_rsp_fire[`NUM_MEM_PORTS-1:0]; - - for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin - assign mem_req_valid[i] = mem_bus_if[i].req_valid; - assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; - assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen; - assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; - assign mem_req_data[i] = mem_bus_if[i].req_data.data; - assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; - assign mem_bus_if[i].req_ready = mem_req_ready[i]; - `UNUSED_VAR (mem_bus_if[i].req_data.atype) - - assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; - assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; - assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; - assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; - - assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i]; - assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i]; - `UNUSED_VAR (mem_req_fire[i]) - `UNUSED_VAR (mem_rsp_fire[i]) - end - - VX_dcr_bus_if dcr_bus_if(); - assign dcr_bus_if.write_valid = dcr_wr_valid; - assign dcr_bus_if.write_addr = dcr_wr_addr; - assign dcr_bus_if.write_data = dcr_wr_data; - - wire [`NUM_CLUSTERS-1:0] per_cluster_busy; - - // Generate all clusters - for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters - - `RESET_RELAY (cluster_reset, reset); - - VX_dcr_bus_if cluster_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); - - VX_cluster #( - .CLUSTER_ID (cluster_id), - .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) - ) cluster ( - `SCOPE_IO_BIND (scope_cluster + cluster_id) - - .clk (clk), - .reset (cluster_reset), - - `ifdef PERF_ENABLE - .mem_perf_if (mem_perf_if), - `endif - - .dcr_bus_if (cluster_dcr_bus_if), - - .mem_bus_if (per_cluster_mem_bus_if[cluster_id]), - - .busy (per_cluster_busy[cluster_id]) - ); - end - - `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1)); - -`ifdef PERF_ENABLE - - reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads; - mem_perf_t mem_perf; - - for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin - always @(posedge clk) begin - if (reset) begin - perf_mem_pending_reads <= '0; - end else begin - perf_mem_pending_reads <= $signed(perf_mem_pending_reads) + - `PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i]))); - end - end - end - - wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0]; - wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0]; - - for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin - assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw; - assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw; - end - - always @(posedge clk) begin - if (reset) begin - mem_perf <= '0; - end else begin - for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin - mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]); - mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]); - end - mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads; - end - end - assign mem_perf_if.mem = mem_perf; - -`endif - -`ifdef DBG_TRACE_MEM - always @(posedge clk) begin - for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin - if (mem_req_fire[i]) begin - if (mem_req_rw[i]) - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i], i)); - else - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], i)); - end - if (mem_rsp_fire[i]) begin - `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); - end - end - end -`endif - -`ifdef SIMULATION - always @(posedge clk) begin - $fflush(); // flush stdout buffer - end -`endif - -endmodule diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 18dfd50ad4..379d33e8a9 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -250,9 +250,7 @@ module VX_cache_bypass #( end end - `IGNORE_UNUSED_BEGIN wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; - `IGNORE_UNUSED_END VX_bits_remove #( .N (MEM_TAG_OUT_WIDTH), diff --git a/hw/rtl/cache/VX_cache_bypass_l3.sv b/hw/rtl/cache/VX_cache_bypass_l3.sv deleted file mode 100644 index 69393cfc67..0000000000 --- a/hw/rtl/cache/VX_cache_bypass_l3.sv +++ /dev/null @@ -1,355 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_cache_define.vh" - -module VX_cache_bypass_l3 #( - parameter NUM_REQS = 1, - parameter NUM_OUTPUTS = 1, - parameter TAG_SEL_IDX = 0, - - parameter PASSTHRU = 0, - parameter NC_ENABLE = 0, - - parameter WORD_SIZE = 1, - parameter LINE_SIZE = 1, - - parameter CORE_ADDR_WIDTH = 1, - - parameter CORE_TAG_WIDTH = 1, - - parameter MEM_ADDR_WIDTH = 1, - parameter MEM_TAG_IN_WIDTH = 1, - parameter MEM_TAG_OUT_WIDTH = 1, - - parameter UUID_WIDTH = 0, - - parameter CORE_OUT_BUF = 0, - parameter MEM_OUT_BUF = 0, - - parameter CORE_DATA_WIDTH = WORD_SIZE * 8 - ) ( - input wire clk, - input wire reset, - - // Core request in - VX_mem_bus_if.slave core_bus_in_if [NUM_REQS], - - // Core request out - VX_mem_bus_if.master core_bus_out_if [NUM_REQS], - - // Memory request in - VX_mem_bus_if.slave mem_bus_in_if, - - // Memory request out - VX_mem_bus_if.master mem_bus_out_if -); - localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); - - localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); - localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; - - localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; - localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE); - - localparam CORE_TAG_ID_BITS = CORE_TAG_WIDTH - UUID_WIDTH; - localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS; - localparam MEM_TAG_BYPASS_BITS = UUID_WIDTH + MEM_TAG_ID_BITS; - - `STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter")) - - // handle core requests /////////////////////////////////////////////////// - - wire core_req_nc_valid; - wire [NUM_REQS-1:0] core_req_nc_valids; - wire [NUM_REQS-1:0] core_req_nc_idxs; - wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; - wire [NUM_REQS-1:0] core_req_nc_sel; - wire [NUM_REQS-1:0] core_req_nc_ready; - - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU != 0) begin - assign core_req_nc_idxs[i] = 1'b1; - end else if (NC_ENABLE) begin - assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; - end else begin - assign core_req_nc_idxs[i] = 1'b0; - end - assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; - end - - /* - - VX_generic_arbiter #( - .NUM_REQS (NUM_REQS), - .TYPE (PASSTHRU ? "R" : "P") - ) core_req_nc_arb ( - .clk (clk), - .reset (reset), - .requests (core_req_nc_valids), - .grant_index (core_req_nc_idx), - .grant_onehot (core_req_nc_sel), - .grant_valid (core_req_nc_valid), - .grant_ready (core_req_nc_ready) - ); - */ - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; - assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; - assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) - : core_bus_out_if[i].req_ready; - end - - // handle memory requests ///////////////////////////////////////////////// - - wire [NUM_OUTPUTS-1:0] mem_req_out_valid; - wire [NUM_OUTPUTS-1:0] mem_req_out_rw; - wire [NUM_OUTPUTS-1:0][LINE_SIZE-1:0] mem_req_out_byteen; - wire [NUM_OUTPUTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr; - wire [NUM_OUTPUTS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype; - wire [NUM_OUTPUTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_out_data; - wire [NUM_OUTPUTS-1:0][MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; - wire [NUM_OUTPUTS-1:0] mem_req_out_ready; - - wire [NUM_REQS-1:0] core_req_nc_sel_rw; - wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_nc_sel_byteen; - wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; - wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype; - wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_nc_sel_data; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; - - wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_nc_mux_in[i] = { - core_bus_in_if[i].req_data.rw, - core_bus_in_if[i].req_data.byteen, - core_bus_in_if[i].req_data.addr, - core_bus_in_if[i].req_data.atype, - core_bus_in_if[i].req_data.data, - core_bus_in_if[i].req_data.tag - }; - end - - assign { - core_req_nc_sel_rw, - core_req_nc_sel_byteen, - core_req_nc_sel_addr, - core_req_nc_sel_atype, - core_req_nc_sel_data, - core_req_nc_sel_tag - } = core_req_nc_mux_in; - - assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready; - - assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid; - assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw; - assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH]; - assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype; - - wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; - - wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; - - if (WORDS_PER_LINE > 1) begin - reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; - reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; - - wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; - - always @(*) begin - mem_req_byteen_in_r = '0; - mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen; - - mem_req_data_in_r = 'x; - mem_req_data_in_r[req_wsel] = core_req_nc_sel_data; - end - - assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r; - assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; - if (NUM_REQS > 1) begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); - end else begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); - end - end else begin - assign mem_req_out_byteen = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.byteen : core_req_nc_sel_byteen; - assign mem_req_out_data = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.data : core_req_nc_sel_data; - if (NUM_REQS > 1) begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id}); - end else begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id}); - end - end - - wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; - - if (UUID_WIDTH != 0) begin - assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; - end else begin - assign mem_req_tag_bypass = mem_req_tag_id_bypass; - end - - if (PASSTHRU != 0) begin - assign mem_req_out_tag = mem_req_tag_bypass; - `UNUSED_VAR (mem_bus_in_if[0].req_data.tag) - end else begin - if (NC_ENABLE) begin - VX_bits_insert #( - .N (MEM_TAG_OUT_WIDTH-1), - .S (1), - .POS (TAG_SEL_IDX) - ) mem_req_tag_in_nc_insert ( - .data_in (mem_bus_in_if[0].req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if[0].req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), - .ins_in (~mem_bus_in_if[0].req_valid), - .data_out (mem_req_out_tag) - ); - end else begin - assign mem_req_out_tag = mem_bus_in_if[0].req_data.tag; - end - end - - assign mem_bus_in_if[0].req_ready = mem_req_out_ready; - - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_out_valid), - .ready_in (mem_req_out_ready), - .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), - .data_out ({mem_bus_out_if[0].req_data.rw, mem_bus_out_if[0].req_data.byteen, mem_bus_out_if[0].req_data.addr, mem_bus_out_if[0].req_data.atype, mem_bus_out_if[0].req_data.data, mem_bus_out_if[0].req_data.tag}), - .valid_out (mem_bus_out_if[0].req_valid), - .ready_out (mem_bus_out_if[0].req_ready) - ); - - // handle core responses ////////////////////////////////////////////////// - - wire [NUM_REQS-1:0] core_rsp_in_valid; - wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_in_data; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_in_tag; - wire [NUM_REQS-1:0] core_rsp_in_ready; - - wire is_mem_rsp_nc; - if (PASSTHRU != 0) begin - assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid; - end else begin - if (NC_ENABLE) begin - assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid && mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; - end else begin - assign is_mem_rsp_nc = 1'b0; - end - end - - wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; - - VX_bits_remove #( - .N (MEM_TAG_OUT_WIDTH), - .S (NC_ENABLE), - .POS (TAG_SEL_IDX) - ) mem_rsp_tag_in_nc_remove ( - .data_in (mem_bus_out_if[0].rsp_data.tag), - .data_out (mem_rsp_tag_id_nc) - ); - - wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; - if (NUM_REQS > 1) begin - assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; - end else begin - assign rsp_idx = 1'b0; - end - - reg [NUM_REQS-1:0] rsp_nc_valid_r; - always @(*) begin - rsp_nc_valid_r = '0; - rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; - assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; - end - - if (WORDS_PER_LINE > 1) begin - wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? - core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; - end - end else begin - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data; - end - end - - wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2; - if (UUID_WIDTH != 0) begin - assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]}; - end else begin - assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU) begin - assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2; - end else if (NC_ENABLE) begin - assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; - end else begin - assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; - end - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - VX_elastic_buffer #( - .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) - ) core_rsp_buf ( - .clk (clk), - .reset (reset), - .valid_in (core_rsp_in_valid[i]), - .ready_in (core_rsp_in_ready[i]), - .data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}), - .data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}), - .valid_out (core_bus_in_if[i].rsp_valid), - .ready_out (core_bus_in_if[i].rsp_ready) - ); - end - - // handle memory responses //////////////////////////////////////////////// - - if (PASSTHRU != 0) begin - assign mem_bus_in_if[0].rsp_valid = 1'b0; - assign mem_bus_in_if[0].rsp_data.data = '0; - assign mem_bus_in_if[0].rsp_data.tag = '0; - end else if (NC_ENABLE) begin - assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid && ~mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; - assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; - assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0]; - end else begin - assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid; - assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; - assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc; - end - - wire [NUM_REQS-1:0] core_rsp_out_valid; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; - end - - assign mem_bus_out_if[0].rsp_ready = is_mem_rsp_nc ? (~core_rsp_out_valid[rsp_idx] && core_rsp_in_ready[rsp_idx]) : mem_bus_in_if[0].rsp_ready; - -endmodule diff --git a/hw/rtl/cache/VX_cache_l3.sv b/hw/rtl/cache/VX_cache_l3.sv deleted file mode 100644 index 7eb7556de4..0000000000 --- a/hw/rtl/cache/VX_cache_l3.sv +++ /dev/null @@ -1,640 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_cache_define.vh" - -module VX_cache_l3 import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "", - - // Number of Word requests per cycle - parameter NUM_REQS = 4, - - // Size of cache in bytes - parameter CACHE_SIZE = 4096, - // Size of line inside a bank in bytes - parameter LINE_SIZE = 64, - // Number of banks - parameter NUM_BANKS = 1, - // Number of memory ports - parameter NUM_MEM_PORTS = 1, - // Number of associative ways - parameter NUM_WAYS = 1, - // Size of a word in bytes - parameter WORD_SIZE = `XLEN/8, - - // Core Response Queue Size - parameter CRSQ_SIZE = 2, - // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, - // Memory Response Queue Size - parameter MRSQ_SIZE = 0, - // Memory Request Queue Size - parameter MREQ_SIZE = 4, - - // Enable cache writeable - parameter WRITE_ENABLE = 1, - - // Enable cache writeback - parameter WRITEBACK = 0, - - // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, - - // Request debug identifier - parameter UUID_WIDTH = 0, - - // core request tag size - parameter TAG_WIDTH = UUID_WIDTH + 1, - - // Core response output register - parameter CORE_OUT_BUF = 0, - - // Memory request output register - parameter MEM_OUT_BUF = 0 - ) ( - // PERF -`ifdef PERF_ENABLE - output cache_perf_t cache_perf, -`endif - - input wire clk, - input wire reset, - - VX_mem_bus_if.slave core_bus_if [NUM_REQS], - VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] -); - - `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2")) - `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable")) - `STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback")) - - // In writeback mode, memory fill response may issue a new memory request to handle evicted blocks. - // We need to ensure that the memory request queue never fills up to avoid deadlock. - `STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE")) - - localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); - localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); - localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; - localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; - localparam WORD_WIDTH = WORD_SIZE * 8; - localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE); - localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); - localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); - localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); - localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; - localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; - - localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); - localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); - - localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; - -`ifdef PERF_ENABLE - wire [NUM_BANKS-1:0] perf_read_miss_per_bank; - wire [NUM_BANKS-1:0] perf_write_miss_per_bank; - wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank; -`endif - - VX_mem_bus_if #( - .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) - ) core_bus2_if[NUM_REQS](); - - wire [NUM_BANKS-1:0] per_bank_flush_begin; - wire [NUM_BANKS-1:0] per_bank_flush_end; - - wire [NUM_BANKS-1:0] per_bank_core_req_fire; - - VX_cache_flush #( - .NUM_REQS (NUM_REQS), - .NUM_BANKS (NUM_BANKS), - .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency - ) flush_unit ( - .clk (clk), - .reset (reset), - .core_bus_in_if (core_bus_if), - .core_bus_out_if (core_bus2_if), - .bank_req_fire (per_bank_core_req_fire), - .flush_begin (per_bank_flush_begin), - .flush_end (per_bank_flush_end) - ); - - /////////////////////////////////////////////////////////////////////////// - - // Core response buffering - wire [NUM_REQS-1:0] core_rsp_valid_s; - wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s; - wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; - wire [NUM_REQS-1:0] core_rsp_ready_s; - - `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT); - - for (genvar i = 0; i < NUM_REQS; ++i) begin - - VX_elastic_buffer #( - .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), - .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) - ) core_rsp_buf ( - .clk (clk), - .reset (core_rsp_reset[i]), - .valid_in (core_rsp_valid_s[i]), - .ready_in (core_rsp_ready_s[i]), - .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), - .data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}), - .valid_out (core_bus2_if[i].rsp_valid), - .ready_out (core_bus2_if[i].rsp_ready) - ); - end - - /////////////////////////////////////////////////////////////////////////// - - // Memory request buffering - wire [NUM_MEM_PORTS-1:0] mem_req_valid_s; - wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire [NUM_MEM_PORTS-1:0] mem_req_rw_s; - wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; - wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_s; - wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_s; - wire [NUM_MEM_PORTS-1:0] mem_req_flush_s; - wire [NUM_MEM_PORTS-1:0] mem_req_ready_s; - - wire [NUM_MEM_PORTS-1:0] mem_bus_if_flush; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_valid_s[i]), - .ready_in (mem_req_ready_s[i]), - .data_in ({mem_req_rw_s[i], mem_req_byteen_s[i], mem_req_addr_s[i], mem_req_data_s[i], mem_req_tag_s[i], mem_req_flush_s[i]}), - .data_out ({mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag, mem_bus_if_flush[i]}), - .valid_out (mem_bus_if[i].req_valid), - .ready_out (mem_bus_if[i].req_ready) - ); - - assign mem_bus_if[i].req_data.atype = mem_bus_if_flush[i] ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; - - end - - /////////////////////////////////////////////////////////////////////////// - - // Memory response buffering - wire [NUM_MEM_PORTS-1:0] mem_rsp_valid_s; - wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_rsp_data_s; - wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; - wire [NUM_MEM_PORTS-1:0] mem_rsp_ready_s; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - VX_elastic_buffer #( - .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), - .SIZE (MRSQ_SIZE), - .OUT_REG (MRSQ_SIZE > 2) - ) mem_rsp_queue ( - .clk (clk), - .reset (reset), - .valid_in (mem_bus_if[i].rsp_valid), - .ready_in (mem_bus_if[i].rsp_ready), - .data_in ({mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data}), - .data_out ({mem_rsp_tag_s[i], mem_rsp_data_s[i]}), - .valid_out (mem_rsp_valid_s[i]), - .ready_out (mem_rsp_ready_s[i]) - ); - end - - /////////////////////////////////////////////////////////////////////////// - - wire [NUM_BANKS-1:0] per_bank_core_req_valid; - wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; - wire [NUM_BANKS-1:0] per_bank_core_req_rw; - wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_core_req_wsel; - wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; - wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; - wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; - wire [NUM_BANKS-1:0] per_bank_core_req_flush; - wire [NUM_BANKS-1:0] per_bank_core_req_ready; - - wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; - wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data; - wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx; - wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; - - wire [NUM_BANKS-1:0] per_bank_mem_req_valid; - wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; - wire [NUM_BANKS-1:0] per_bank_mem_req_rw; - wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; - wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; - wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; - wire [NUM_BANKS-1:0] per_bank_mem_req_flush; - wire [NUM_BANKS-1:0] per_bank_mem_req_ready; - - wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; - - assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; - - if (NUM_BANKS == 1) begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready; - end else begin - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign mem_rsp_ready_s[i] = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[i])]; - end - end - - // Bank requests dispatch - - wire [NUM_REQS-1:0] core_req_valid; - wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr; - wire [NUM_REQS-1:0] core_req_rw; - wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; - wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; - wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; - wire [NUM_REQS-1:0] core_req_flush; - wire [NUM_REQS-1:0] core_req_ready; - - wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; - wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid; - wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel; - - wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; - wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_valid[i] = core_bus2_if[i].req_valid; - assign core_req_rw[i] = core_bus2_if[i].req_data.rw; - assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; - assign core_req_addr[i] = core_bus2_if[i].req_data.addr; - assign core_req_data[i] = core_bus2_if[i].req_data.data; - assign core_req_tag[i] = core_bus2_if[i].req_data.tag; - assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; - assign core_bus2_if[i].req_ready = core_req_ready[i]; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (WORDS_PER_LINE > 1) begin - assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; - end else begin - assign core_req_wsel[i] = '0; - end - assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH]; - end - - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; - end - end else begin - assign core_req_bid = '0; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_data_in[i] = { - core_req_line_addr[i], - core_req_rw[i], - core_req_wsel[i], - core_req_byteen[i], - core_req_data[i], - core_req_tag[i], - core_req_flush[i] - }; - end - -`ifdef PERF_ENABLE - wire [`PERF_CTR_BITS-1:0] perf_collisions; -`endif - - `RESET_RELAY (req_xbar_reset, reset); - - VX_stream_xbar #( - .NUM_INPUTS (NUM_REQS), - .NUM_OUTPUTS (NUM_BANKS), - .DATAW (CORE_REQ_DATAW), - .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), - .OUT_BUF (REQ_XBAR_BUF) - ) req_xbar ( - .clk (clk), - .reset (req_xbar_reset), - `ifdef PERF_ENABLE - .collisions(perf_collisions), - `else - `UNUSED_PIN(collisions), - `endif - .valid_in (core_req_valid), - .data_in (core_req_data_in), - .sel_in (core_req_bid), - .ready_in (core_req_ready), - .valid_out (per_bank_core_req_valid), - .data_out (core_req_data_out), - .sel_out (per_bank_core_req_idx), - .ready_out (per_bank_core_req_ready) - ); - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign { - per_bank_core_req_addr[i], - per_bank_core_req_rw[i], - per_bank_core_req_wsel[i], - per_bank_core_req_byteen[i], - per_bank_core_req_data[i], - per_bank_core_req_tag[i], - per_bank_core_req_flush[i] - } = core_req_data_out[i]; - end - - // Banks access - for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks - wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; - wire curr_bank_mem_rsp_valid; - - if (NUM_BANKS == 1) begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; - end else begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s[bank_id] && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[bank_id]) == bank_id); - end - - `RESET_RELAY (bank_reset, reset); - - VX_cache_bank #( - .BANK_ID (bank_id), - .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), - .CACHE_SIZE (CACHE_SIZE), - .LINE_SIZE (LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .NUM_WAYS (NUM_WAYS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQS (NUM_REQS), - .CRSQ_SIZE (CRSQ_SIZE), - .MSHR_SIZE (MSHR_SIZE), - .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .DIRTY_BYTES (DIRTY_BYTES), - .WRITEBACK (WRITEBACK), - .UUID_WIDTH (UUID_WIDTH), - .TAG_WIDTH (TAG_WIDTH), - .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) - ) bank ( - .clk (clk), - .reset (bank_reset), - - `ifdef PERF_ENABLE - .perf_read_misses (perf_read_miss_per_bank[bank_id]), - .perf_write_misses (perf_write_miss_per_bank[bank_id]), - .perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]), - `endif - - // Core request - .core_req_valid (per_bank_core_req_valid[bank_id]), - .core_req_addr (per_bank_core_req_addr[bank_id]), - .core_req_rw (per_bank_core_req_rw[bank_id]), - .core_req_wsel (per_bank_core_req_wsel[bank_id]), - .core_req_byteen (per_bank_core_req_byteen[bank_id]), - .core_req_data (per_bank_core_req_data[bank_id]), - .core_req_tag (per_bank_core_req_tag[bank_id]), - .core_req_idx (per_bank_core_req_idx[bank_id]), - .core_req_flush (per_bank_core_req_flush[bank_id]), - .core_req_ready (per_bank_core_req_ready[bank_id]), - - // Core response - .core_rsp_valid (per_bank_core_rsp_valid[bank_id]), - .core_rsp_data (per_bank_core_rsp_data[bank_id]), - .core_rsp_tag (per_bank_core_rsp_tag[bank_id]), - .core_rsp_idx (per_bank_core_rsp_idx[bank_id]), - .core_rsp_ready (per_bank_core_rsp_ready[bank_id]), - - // Memory request - .mem_req_valid (per_bank_mem_req_valid[bank_id]), - .mem_req_addr (curr_bank_mem_req_addr), - .mem_req_rw (per_bank_mem_req_rw[bank_id]), - .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), - .mem_req_data (per_bank_mem_req_data[bank_id]), - .mem_req_id (per_bank_mem_req_id[bank_id]), - .mem_req_flush (per_bank_mem_req_flush[bank_id]), - .mem_req_ready (per_bank_mem_req_ready[bank_id]), - - // Memory response - .mem_rsp_valid (curr_bank_mem_rsp_valid), - .mem_rsp_data (mem_rsp_data_s[bank_id]), - .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s[bank_id])), - .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), - - .flush_begin (per_bank_flush_begin[bank_id]), - .flush_end (per_bank_flush_end[bank_id]) - ); - - if (NUM_BANKS == 1) begin - assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; - end else begin - assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); - end - end - - // Bank responses gather - - wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in; - wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; - end - - `RESET_RELAY (rsp_xbar_reset, reset); - - VX_stream_xbar #( - .NUM_INPUTS (NUM_BANKS), - .NUM_OUTPUTS (NUM_REQS), - .DATAW (CORE_RSP_DATAW), - .ARBITER ("F") - ) rsp_xbar ( - .clk (clk), - .reset (rsp_xbar_reset), - `UNUSED_PIN (collisions), - .valid_in (per_bank_core_rsp_valid), - .data_in (core_rsp_data_in), - .sel_in (per_bank_core_rsp_idx), - .ready_in (per_bank_core_rsp_ready), - .valid_out (core_rsp_valid_s), - .data_out (core_rsp_data_out), - .ready_out (core_rsp_ready_s), - `UNUSED_PIN (sel_out) - ); - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; - end - - /////////////////////////////////////////////////////////////////////////// - - wire [NUM_MEM_PORTS-1:0] mem_req_valid_p; - wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; - wire [NUM_MEM_PORTS-1:0] mem_req_rw_p; - wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_p; - wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_p; - wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_p; - wire [NUM_MEM_PORTS-1:0][MSHR_ADDR_WIDTH-1:0] mem_req_id_p; - wire [NUM_MEM_PORTS-1:0] mem_req_flush_p; - wire [NUM_MEM_PORTS-1:0] mem_req_ready_p; - - // Memory request arbitration - - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; - wire [NUM_MEM_PORTS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_out; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign data_in[i] = { - per_bank_mem_req_addr[i], - per_bank_mem_req_rw[i], - per_bank_mem_req_byteen[i], - per_bank_mem_req_data[i], - per_bank_mem_req_id[i], - per_bank_mem_req_flush[i] - }; - end - - VX_stream_arb #( - .NUM_INPUTS (NUM_BANKS), - .NUM_OUTPUTS (NUM_MEM_PORTS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), - .ARBITER ("F") - ) mem_req_arb ( - .clk (clk), - .reset (reset), - .valid_in (per_bank_mem_req_valid), - .ready_in (per_bank_mem_req_ready), - .data_in (data_in), - .data_out (data_out), - .valid_out (mem_req_valid_p), - .ready_out (mem_req_ready_p), - `UNUSED_PIN (sel_out) - ); - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign { - mem_req_addr_p[i], - mem_req_rw_p[i], - mem_req_byteen_p[i], - mem_req_data_p[i], - mem_req_id_p[i], - mem_req_flush_p[i] - } = data_out[i]; - end - - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p[i]); - assign mem_req_tag_p[i] = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p[i]}); - end - end else begin - assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); - end - - // Memory request multi-port handling - - assign mem_req_valid_s = mem_req_valid_p; - assign mem_req_addr_s = mem_req_addr_p; - assign mem_req_tag_s = mem_req_tag_p; - assign mem_req_flush_s = mem_req_flush_p; - assign mem_req_ready_p = mem_req_ready_s; - - if (WRITE_ENABLE != 0) begin - assign mem_req_rw_s = mem_req_rw_p; - assign mem_req_byteen_s = mem_req_byteen_p; - assign mem_req_data_s = mem_req_data_p; - end else begin - `UNUSED_VAR (mem_req_byteen_p) - `UNUSED_VAR (mem_req_data_p) - `UNUSED_VAR (mem_req_rw_p) - - assign mem_req_rw_s = 0; - assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; - assign mem_req_data_s = '0; - end - -`ifdef PERF_ENABLE - // per cycle: core_reads, core_writes - wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; - wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; - - wire [NUM_REQS-1:0] perf_core_reads_per_req; - wire [NUM_REQS-1:0] perf_core_writes_per_req; - - // per cycle: read misses, write misses, msrq stalls, pipeline stalls - wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle; - wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle; - wire [`CLOG2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle; - wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; - - `BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw); - `BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw); - - `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req); - `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req); - `POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank); - `POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank); - `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); - - wire [NUM_REQS-1:0] perf_crsp_stall_per_req; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; - end - - `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req); - - wire perf_mem_stall_per_cycle = mem_bus_if[0].req_valid && ~mem_bus_if[0].req_ready; - - reg [`PERF_CTR_BITS-1:0] perf_core_reads; - reg [`PERF_CTR_BITS-1:0] perf_core_writes; - reg [`PERF_CTR_BITS-1:0] perf_read_misses; - reg [`PERF_CTR_BITS-1:0] perf_write_misses; - reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls; - reg [`PERF_CTR_BITS-1:0] perf_mem_stalls; - reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; - - always @(posedge clk) begin - if (reset) begin - perf_core_reads <= '0; - perf_core_writes <= '0; - perf_read_misses <= '0; - perf_write_misses <= '0; - perf_mshr_stalls <= '0; - perf_mem_stalls <= '0; - perf_crsp_stalls <= '0; - end else begin - perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle); - perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle); - perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle); - perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle); - perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle); - perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle); - perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle); - end - end - - assign cache_perf.reads = perf_core_reads; - assign cache_perf.writes = perf_core_writes; - assign cache_perf.read_misses = perf_read_misses; - assign cache_perf.write_misses = perf_write_misses; - assign cache_perf.bank_stalls = perf_collisions; - assign cache_perf.mshr_stalls = perf_mshr_stalls; - assign cache_perf.mem_stalls = perf_mem_stalls; - assign cache_perf.crsp_stalls = perf_crsp_stalls; -`endif - -endmodule diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv deleted file mode 100644 index def7237b1f..0000000000 --- a/hw/rtl/cache/VX_cache_wrap_l3.sv +++ /dev/null @@ -1,331 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_cache_define.vh" - -module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "", - - parameter TAG_SEL_IDX = 0, - - // Number of Word requests per cycle - parameter NUM_REQS = 4, - - - // Size of cache in bytes - parameter CACHE_SIZE = 4096, - // Size of line inside a bank in bytes - parameter LINE_SIZE = 64, - // Number of banks - parameter NUM_BANKS = 1, - // Number of associative ways - parameter NUM_WAYS = 1, - // Size of a word in bytes - parameter WORD_SIZE = 4, - // Number of memory ports - parameter NUM_MEM_PORTS = 4, - - // Core Response Queue Size - parameter CRSQ_SIZE = 2, - // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, - // Memory Response Queue Size - parameter MRSQ_SIZE = 0, - // Memory Request Queue Size - parameter MREQ_SIZE = 4, - - // Enable cache writeable - parameter WRITE_ENABLE = 1, - - // Enable cache writeback - parameter WRITEBACK = 0, - - // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, - - // Request debug identifier - parameter UUID_WIDTH = 0, - - // core request tag size - parameter TAG_WIDTH = UUID_WIDTH + 1, - - // enable bypass for non-cacheable addresses - parameter NC_ENABLE = 0, - - // Force bypass for all requests - parameter PASSTHRU = 0, - - // Core response output buffer - parameter CORE_OUT_BUF = 0, - - // Memory request output buffer - parameter MEM_OUT_BUF = 0 - ) ( - - input wire clk, - input wire reset, - - // PERF -`ifdef PERF_ENABLE - output cache_perf_t cache_perf, -`endif - - VX_mem_bus_if.slave core_bus_if [NUM_REQS], - VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] -); - - `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) - - localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; - - localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); - - localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); - - localparam NUM_REQS_P = NUM_REQS / NUM_MEM_PORTS; - - VX_mem_bus_if #( - .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) - ) core_bus_cache_if[NUM_REQS](); - - VX_mem_bus_if #( - .DATA_SIZE (LINE_SIZE), - .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) - ) mem_bus_cache_if[NUM_MEM_PORTS](); - - if (NC_OR_BYPASS) begin - `RESET_RELAY (nc_bypass_reset, reset); - - // Slicing version - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - - localparam SLICE_BEGIN = i * NUM_REQS_P; - localparam SLICE_END = SLICE_BEGIN + NUM_REQS_P; - - VX_cache_bypass #( - .NUM_REQS (NUM_REQS_P), - .TAG_SEL_IDX (TAG_SEL_IDX), - - .PASSTHRU (PASSTHRU), - .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), - - .WORD_SIZE (WORD_SIZE), - .LINE_SIZE (LINE_SIZE), - - .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), - .CORE_TAG_WIDTH (TAG_WIDTH), - - .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), - .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), - .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), - - .UUID_WIDTH (UUID_WIDTH), - - .CORE_OUT_BUF (CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_OUT_BUF) - ) cache_bypass ( - .clk (clk), - .reset (nc_bypass_reset), - - .core_bus_in_if (core_bus_if[SLICE_END-1:SLICE_BEGIN]), - .core_bus_out_if(core_bus_cache_if[SLICE_END-1:SLICE_BEGIN]), - - .mem_bus_in_if (mem_bus_cache_if[i]), - .mem_bus_out_if (mem_bus_if[i]) - ); - end - - // Connect everything - /* - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - VX_cache_bypass #( - .NUM_REQS (NUM_REQS), - .TAG_SEL_IDX (TAG_SEL_IDX), - - .PASSTHRU (PASSTHRU), - .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), - - .WORD_SIZE (WORD_SIZE), - .LINE_SIZE (LINE_SIZE), - - .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), - .CORE_TAG_WIDTH (TAG_WIDTH), - - .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), - .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), - .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), - - .UUID_WIDTH (UUID_WIDTH), - - .CORE_OUT_BUF (CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_OUT_BUF) - ) cache_bypass ( - .clk (clk), - .reset (nc_bypass_reset), - - .core_bus_in_if (core_bus_if), - .core_bus_out_if(core_bus_cache_if), - - .mem_bus_in_if (mem_bus_cache_if[i]), - .mem_bus_out_if (mem_bus_if[i]) - ); - end - */ - - end else begin - - for (genvar i = 0; i < NUM_REQS; ++i) begin - `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); - end - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if[i], mem_bus_cache_if[i]); - end - end - - if (PASSTHRU != 0) begin - - for (genvar i = 0; i < NUM_REQS; ++i) begin - `UNUSED_VAR (core_bus_cache_if[i].req_valid) - `UNUSED_VAR (core_bus_cache_if[i].req_data) - assign core_bus_cache_if[i].req_ready = 0; - - assign core_bus_cache_if[i].rsp_valid = 0; - assign core_bus_cache_if[i].rsp_data = '0; - `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) - end - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign mem_bus_cache_if[i].req_valid = 0; - assign mem_bus_cache_if[i].req_data = '0; - `UNUSED_VAR (mem_bus_cache_if[i].req_ready) - - `UNUSED_VAR (mem_bus_cache_if[i].rsp_valid) - `UNUSED_VAR (mem_bus_cache_if[i].rsp_data) - assign mem_bus_cache_if[i].rsp_ready = 0; - end - - `ifdef PERF_ENABLE - assign cache_perf = '0; - `endif - - end else begin - - `RESET_RELAY (cache_reset, reset); - - VX_cache_l3 #( - .INSTANCE_ID (INSTANCE_ID), - .CACHE_SIZE (CACHE_SIZE), - .LINE_SIZE (LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .NUM_MEM_PORTS (NUM_MEM_PORTS), - .NUM_WAYS (NUM_WAYS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQS (NUM_REQS), - .CRSQ_SIZE (CRSQ_SIZE), - .MSHR_SIZE (MSHR_SIZE), - .MRSQ_SIZE (MRSQ_SIZE), - .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), - .UUID_WIDTH (UUID_WIDTH), - .TAG_WIDTH (TAG_WIDTH), - .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), - .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) - ) cache ( - .clk (clk), - .reset (cache_reset), - `ifdef PERF_ENABLE - .cache_perf (cache_perf), - `endif - .core_bus_if (core_bus_cache_if), - .mem_bus_if (mem_bus_cache_if) - ); - - end - -`ifdef DBG_TRACE_CACHE - - for (genvar i = 0; i < NUM_REQS; ++i) begin - wire [`UP(UUID_WIDTH)-1:0] core_req_uuid; - wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid; - - if (UUID_WIDTH != 0) begin - assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin - assign core_req_uuid = 0; - assign core_rsp_uuid = 0; - end - - wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready; - wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready; - - always @(posedge clk) begin - if (core_req_fire) begin - if (core_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); - else - `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); - end - if (core_rsp_fire) begin - `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); - end - end - end - - wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_req_uuid; - wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin - assign mem_req_uuid[i] = mem_bus_if[i].req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; - assign mem_rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin - assign mem_req_uuid[i] = 0; - assign mem_rsp_uuid[i] = 0; - end - end - - wire mem_req_fire [NUM_MEM_PORTS-1:0]; - wire mem_rsp_fire [NUM_MEM_PORTS-1:0]; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign mem_req_fire[i] = mem_bus_if[i].req_valid && mem_bus_if[i].req_ready; - assign mem_rsp_fire[i] = mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready; - end - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - always @(posedge clk) begin - if (mem_req_fire[i]) begin - if (mem_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d) bank=%d\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_req_uuid[i], i)); - else - `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d) bank=%d\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_req_uuid[i], i)); - end - if (mem_rsp_fire[i]) begin - `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", - $time, INSTANCE_ID, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data, mem_rsp_uuid[i])); - end - end - end -`endif - -endmodule diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 9ddccc19d4..3deffc759d 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -37,13 +37,13 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += $(SRC_DIR)/processor_hbm.cpp +SRCS += $(SRC_DIR)/processor.cpp ifdef AXI_BUS TOP = Vortex_axi CXXFLAGS += -DAXI_BUS else - TOP = Vortex_hbm + TOP = Vortex endif VL_FLAGS = --exe diff --git a/sim/rtlsim/processor_hbm.cpp b/sim/rtlsim/processor_hbm.cpp deleted file mode 100644 index 5f7bee7eee..0000000000 --- a/sim/rtlsim/processor_hbm.cpp +++ /dev/null @@ -1,656 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "processor.h" - -#ifdef AXI_BUS -#include "VVortex_axi.h" -typedef VVortex_axi Device; -#else -#include "VVortex_hbm.h" -typedef VVortex_hbm Device; -#endif - -#ifdef VCD_OUTPUT -#include -#endif - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - -#ifndef MEM_CLOCK_RATIO -#define MEM_CLOCK_RATIO 1 -#endif - -#ifndef TRACE_START_TIME -#define TRACE_START_TIME 0ull -#endif - -#ifndef TRACE_STOP_TIME -#define TRACE_STOP_TIME -1ull -#endif - -#ifndef VERILATOR_RESET_VALUE -#define VERILATOR_RESET_VALUE 2 -#endif - -#if (XLEN == 32) -typedef uint32_t Word; -#elif (XLEN == 64) -typedef uint64_t Word; -#else -#error unsupported XLEN -#endif - -#define VL_WDATA_GETW(lwp, i, n, w) \ - VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w) - -using namespace vortex; - -static uint64_t timestamp = 0; - -double sc_time_stamp() { - return timestamp; -} - -/////////////////////////////////////////////////////////////////////////////// - -static bool trace_enabled = false; -static uint64_t trace_start_time = TRACE_START_TIME; -static uint64_t trace_stop_time = TRACE_STOP_TIME; - -bool sim_trace_enabled() { - if (timestamp >= trace_start_time - && timestamp < trace_stop_time) - return true; - return trace_enabled; -} - -void sim_trace_enable(bool enable) { - trace_enabled = enable; -} - -/////////////////////////////////////////////////////////////////////////////// - -class Processor::Impl { -public: - Impl() : dram_sim_(MEM_CLOCK_RATIO) { - // force random values for unitialized signals - Verilated::randReset(VERILATOR_RESET_VALUE); - Verilated::randSeed(50); - - // turn off assertion before reset - Verilated::assertOn(false); - - // create RTL module instance - device_ = new Device(); - - #ifdef VCD_OUTPUT - Verilated::traceEverOn(true); - tfp_ = new VerilatedVcdC(); - device_->trace(tfp_, 99); - tfp_->open("trace.vcd"); - #endif - - pending_mem_reqs_.resize(NUM_MEM_PORTS); - dram_queue_.resize(NUM_MEM_PORTS); - - mem_rd_rsp_active_.resize(NUM_MEM_PORTS); - mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); - - mem_wr_rsp_active_.resize(NUM_MEM_PORTS); - mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); - - ram_ = nullptr; - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif - // reset the device - this->reset(); - - // Turn on assertion after reset - Verilated::assertOn(true); - } - - ~Impl() { - this->cout_flush(); - - #ifdef VCD_OUTPUT - tfp_->close(); - delete tfp_; - #endif - - delete device_; - } - - void cout_flush() { - for (auto& buf : print_bufs_) { - auto str = buf.second.str(); - if (!str.empty()) { - std::cout << "#" << buf.first << ": " << str << std::endl; - } - } - } - - void attach_ram(RAM* ram) { - ram_ = ram; - } - - void run() { - - #ifndef NDEBUG - std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; - #endif - - // start execution - running_ = true; - device_->reset = 0; - - /* - device_->mem_req_valid[1] = 0; - device_->mem_req_ready[1] = 0; - device_->mem_rsp_valid[1] = 0; - device_->mem_rsp_ready[1] = 0; - */ - - // wait on device to go busy - while (!device_->busy) { - this->tick(); - } - - // wait on device to go idle - while (device_->busy) { - this->tick(); - } - - // reset device - this->reset(); - - this->cout_flush(); - } - - void dcr_write(uint32_t addr, uint32_t value) { - device_->dcr_wr_valid = 1; - device_->dcr_wr_addr = addr; - device_->dcr_wr_data = value; - while (device_->dcr_wr_valid) { - this->tick(); - } - } - -private: - - void reset() { - running_ = false; - - print_bufs_.clear(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - - pending_mem_reqs_.at(i).clear(); - - { - std::queue empty; - std::swap(dram_queue_.at(i), empty); - } - - mem_rd_rsp_active_.at(i) = false; - mem_wr_rsp_active_.at(i) = false; - } - - this->mem_bus_reset(); - - this->dcr_bus_reset(); - - device_->reset = 1; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - } - } - - void tick() { - - device_->clk = 0; - this->eval(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - this->mem_bus_eval(0, i); - } - this->dcr_bus_eval(0); - - device_->clk = 1; - this->eval(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - this->mem_bus_eval(1, i); - } - this->dcr_bus_eval(1); - - dram_sim_.tick(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (!dram_queue_.at(i).empty()) { - auto mem_req = dram_queue_.at(i).front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; - } - }, mem_req)) { - dram_queue_.at(i).pop(); - } - } - } - - #ifndef NDEBUG - fflush(stdout); - #endif - } - - void eval() { - device_->eval(); - #ifdef VCD_OUTPUT - if (sim_trace_enabled()) { - tfp_->dump(timestamp); - } else { - exit(-1); - } - #endif - ++timestamp; - } - -#ifdef AXI_BUS - - void mem_bus_reset() { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - device_->m_axi_rvalid[0] = 0; - device_->m_axi_bvalid[0] = 0; - } - - void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->m_axi_rready[0]; - mem_wr_rsp_ready_ = device_->m_axi_bready[0]; - return; - } - - if (ram_ == nullptr) { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - return; - } - - // process memory read responses - if (mem_rd_rsp_active_ - && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && !(*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - device_->m_axi_rvalid[0] = 1; - device_->m_axi_rid[0] = mem_rsp->tag; - device_->m_axi_rresp[0] = 0; - device_->m_axi_rlast[0] = 1; - memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); - pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_rvalid[0] = 0; - } - } - - // process memory write responses - if (mem_wr_rsp_active_ - && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { - mem_wr_rsp_active_ = false; - } - if (!mem_wr_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && (*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr); - */ - device_->m_axi_bvalid[0] = 1; - device_->m_axi_bid[0] = mem_rsp->tag; - device_->m_axi_bresp[0] = 0; - pending_mem_reqs_.erase(mem_rsp_it); - mem_wr_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_bvalid[0] = 0; - } - } - - // select the memory bank - uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0]; - - // process memory requests - if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { - if (device_->m_axi_wvalid[0]) { - auto byteen = device_->m_axi_wstrb[0]; - auto base_addr = device_->m_axi_awaddr[0]; - auto data = (uint8_t*)device_->m_axi_wdata[0].data(); - - if (base_addr >= uint64_t(IO_COUT_ADDR) - && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_awid[0]; - mem_req->addr = device_->m_axi_awaddr[0]; - mem_req->write = true; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_arid[0]; - mem_req->addr = device_->m_axi_araddr[0]; - ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE); - mem_req->write = false; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } - - device_->m_axi_wready[0] = running_; - device_->m_axi_awready[0] = running_; - device_->m_axi_arready[0] = running_; - } - -#else - - void mem_bus_reset() { - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - device_->mem_req_ready[i] = 0; - device_->mem_rsp_valid[i] = 0; - } - } - - void mem_bus_eval(bool clk, int n) { - if (!clk) { - mem_rd_rsp_ready_.at(n) = device_->mem_rsp_ready[n]; - return; - } - - if (ram_ == nullptr) { - device_->mem_req_ready[n] = 0; - return; - } - - // process memory read responses - if (mem_rd_rsp_active_.at(n) - && device_->mem_rsp_valid[n] && mem_rd_rsp_ready_.at(n)) { - mem_rd_rsp_active_.at(n) = false; - } - if (!mem_rd_rsp_active_.at(n)) { - if (!pending_mem_reqs_.at(n).empty() - && (*pending_mem_reqs_.at(n).begin())->ready) { - device_->mem_rsp_valid[n] = 1; - auto mem_rsp_it = pending_mem_reqs_.at(n).begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - memcpy(VDataCast::get(device_->mem_rsp_data[n]), mem_rsp->block.data(), MEM_BLOCK_SIZE); - device_->mem_rsp_tag[n] = mem_rsp->tag; - pending_mem_reqs_.at(n).erase(mem_rsp_it); - mem_rd_rsp_active_.at(n) = true; - delete mem_rsp; - } else { - device_->mem_rsp_valid[n] = 0; - } - } - - // process memory requests - if (device_->mem_req_valid[n] && running_) { - uint64_t byte_addr = (device_->mem_req_addr[n] * MEM_BLOCK_SIZE); - if (device_->mem_req_rw[n]) { - auto byteen = device_->mem_req_byteen[n]; - auto data = VDataCast::get(device_->mem_req_data[n]); - - if (byte_addr >= uint64_t(IO_COUT_ADDR) - && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < IO_COUT_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%d=%02x,", i, data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[n]; - mem_req->addr = byte_addr; - mem_req->write = true; - mem_req->ready = true; - - // send dram request - dram_queue_.at(n).push(mem_req); - } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[n]; - mem_req->addr = byte_addr; - mem_req->write = false; - mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); - pending_mem_reqs_.at(n).emplace_back(mem_req); - - //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); - - // send dram request - dram_queue_.at(n).push(mem_req); - } - } - - device_->mem_req_ready[n] = running_; - } - -#endif - - void dcr_bus_reset() { - device_->dcr_wr_valid = 0; - } - - void dcr_bus_eval(bool clk) { - if (!clk) { - return; - } - if (device_->dcr_wr_valid) { - device_->dcr_wr_valid = 0; - } - } - - void wait(uint32_t cycles) { - for (int i = 0; i < cycles; ++i) { - this->tick(); - } - } - -private: - - typedef struct { - Device* device; - std::array block; - uint64_t addr; - uint64_t tag; - bool write; - bool ready; - } mem_req_t; - - std::unordered_map print_bufs_; - - std::vector> pending_mem_reqs_; - - std::vector> dram_queue_; - - DramSim dram_sim_; - - Device* device_; - -#ifdef VCD_OUTPUT - VerilatedVcdC *tfp_; -#endif - - RAM* ram_; - - std::vector mem_rd_rsp_active_; - std::vector mem_rd_rsp_ready_; - - std::vector mem_wr_rsp_active_; - std::vector mem_wr_rsp_ready_; - - bool running_; -}; - -/////////////////////////////////////////////////////////////////////////////// - -Processor::Processor() - : impl_(new Impl()) -{} - -Processor::~Processor() { - delete impl_; -} - -void Processor::attach_ram(RAM* mem) { - impl_->attach_ram(mem); -} - -void Processor::run() { - impl_->run(); -} - -void Processor::dcr_write(uint32_t addr, uint32_t value) { - return impl_->dcr_write(addr, value); -} \ No newline at end of file diff --git a/third_party/softfloat b/third_party/softfloat index 3b70b5d814..b51ef8f320 160000 --- a/third_party/softfloat +++ b/third_party/softfloat @@ -1 +1 @@ -Subproject commit 3b70b5d8147675932c38b36cd09af6df4eedd919 +Subproject commit b51ef8f3201669b2288104c28546fc72532a1ea4 From 2eeb2ac532eadf158ac4b41d2cc98cda2f350cb9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 5 Oct 2024 13:46:10 -0700 Subject: [PATCH 265/407] fixed memory flags propagation through the cache hierarchy --- ci/regression.sh.in | 12 +- hw/rtl/VX_cluster.sv | 1 + hw/rtl/VX_config.vh | 8 -- hw/rtl/VX_socket.sv | 2 + hw/rtl/Vortex.sv | 1 + hw/rtl/cache/VX_bank_flush.sv | 4 +- hw/rtl/cache/VX_cache.sv | 45 +++++--- hw/rtl/cache/VX_cache_bank.sv | 114 ++++++++++--------- hw/rtl/cache/VX_cache_cluster.sv | 4 + hw/rtl/cache/VX_cache_data.sv | 182 ++++++++++++++++--------------- hw/rtl/cache/VX_cache_tags.sv | 30 ++--- hw/rtl/cache/VX_cache_wrap.sv | 4 + hw/rtl/libs/VX_cyclic_arbiter.sv | 4 +- hw/rtl/libs/VX_decoder.sv | 27 +++-- hw/rtl/libs/VX_mem_adapter.sv | 8 +- hw/rtl/libs/VX_mem_coalescer.sv | 21 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 29 +++-- hw/rtl/libs/VX_rr_arbiter.sv | 4 +- hw/rtl/libs/VX_stream_xbar.sv | 8 +- 19 files changed, 279 insertions(+), 229 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 9827199bb0..443b34f5ac 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -142,8 +142,8 @@ cache() CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx # test cache ways - CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx - CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx + CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx # test cache banking CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx @@ -154,10 +154,10 @@ cache() CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx # test writeback - CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress - CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress - CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress - CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress + CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress + CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress # cache clustering CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2 diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 73d9b34abc..b5e9e0a5c4 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -100,6 +100,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .WRITEBACK (`L2_WRITEBACK), .DIRTY_BYTES (`L2_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), .MEM_OUT_BUF (3), .NC_ENABLE (1), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 1e10aca8ea..fb47566336 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -564,12 +564,8 @@ // Cache Size `ifndef L2_CACHE_SIZE -`ifdef ALTERA_S10 -`define L2_CACHE_SIZE 2097152 -`else `define L2_CACHE_SIZE 1048576 `endif -`endif // Number of Banks `ifndef L2_NUM_BANKS @@ -610,11 +606,7 @@ // Cache Size `ifndef L3_CACHE_SIZE -`ifdef ALTERA_S10 `define L3_CACHE_SIZE 2097152 -`else -`define L3_CACHE_SIZE 1048576 -`endif `endif // Number of Banks diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 69ff88a2ce..9c7fe12870 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -100,6 +100,7 @@ module VX_socket import VX_gpu_pkg::*; #( .MRSQ_SIZE (`ICACHE_MRSQ_SIZE), .MREQ_SIZE (`ICACHE_MREQ_SIZE), .TAG_WIDTH (ICACHE_TAG_WIDTH), + .FLAGS_WIDTH (0), .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (0), .NC_ENABLE (0), @@ -146,6 +147,7 @@ module VX_socket import VX_gpu_pkg::*; #( .MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE), .TAG_WIDTH (DCACHE_TAG_WIDTH), .UUID_WIDTH (`UUID_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`DCACHE_WRITEBACK), .DIRTY_BYTES (`DCACHE_WRITEBACK), diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index e07aaae4d1..4f9f495cef 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -86,6 +86,7 @@ module Vortex import VX_gpu_pkg::*; ( .WRITEBACK (`L3_WRITEBACK), .DIRTY_BYTES (`L3_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), .MEM_OUT_BUF (3), .NC_ENABLE (1), diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index a01ae0e0b1..3228bd3a5b 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -118,8 +118,8 @@ module VX_bank_flush #( .N (`CS_WAY_SEL_BITS), .D (NUM_WAYS) ) ctr_decoder ( - .data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), - .valid_in (1'b1), + .sel_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .data_in (1'b1), .data_out (flush_way) ); end else begin : g_flush_way_all diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 06887944ca..d749e6ee91 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -54,6 +54,9 @@ module VX_cache import VX_gpu_pkg::*; #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // Core response output register parameter CORE_OUT_BUF = 0, @@ -90,7 +93,7 @@ module VX_cache import VX_gpu_pkg::*; #( localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); - localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; + localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + `UP(FLAGS_WIDTH); localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH; @@ -206,13 +209,13 @@ module VX_cache import VX_gpu_pkg::*; #( wire [LINE_SIZE-1:0] mem_req_byteen; wire [`CS_LINE_WIDTH-1:0] mem_req_data; wire [MEM_TAG_WIDTH-1:0] mem_req_tag; - wire mem_req_flush; + wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags; wire mem_req_ready; - wire mem_req_flush_b; + wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flush_b; VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -220,13 +223,18 @@ module VX_cache import VX_gpu_pkg::*; #( .reset (reset), .valid_in (mem_req_valid), .ready_in (mem_req_ready), - .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), + .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flags}), .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), .valid_out (mem_bus_tmp_if.req_valid), .ready_out (mem_bus_tmp_if.req_ready) ); - assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; + if (FLAGS_WIDTH != 0) begin : g_mem_req_flags + assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b; + end else begin : g_no_mem_req_flags + assign mem_bus_tmp_if.req_data.flags = '0; + `UNUSED_VAR (mem_req_flush_b) + end if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); @@ -244,7 +252,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; - wire [NUM_BANKS-1:0] per_bank_core_req_flush; + wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_core_req_flags; wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; @@ -259,7 +267,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag; - wire [NUM_BANKS-1:0] per_bank_mem_req_flush; + wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_mem_req_flags; wire [NUM_BANKS-1:0] per_bank_mem_req_ready; wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; @@ -276,7 +284,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; - wire [NUM_REQS-1:0] core_req_flush; + wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags; wire [NUM_REQS-1:0] core_req_ready; wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; @@ -293,7 +301,7 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_req_addr[i] = core_bus2_if[i].req_data.addr; assign core_req_data[i] = core_bus2_if[i].req_data.data; assign core_req_tag[i] = core_bus2_if[i].req_data.tag; - assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; + assign core_req_flags[i] = `UP(FLAGS_WIDTH)'(core_bus2_if[i].req_data.flags); assign core_bus2_if[i].req_ready = core_req_ready[i]; end @@ -325,7 +333,7 @@ module VX_cache import VX_gpu_pkg::*; #( core_req_byteen[i], core_req_data[i], core_req_tag[i], - core_req_flush[i] + core_req_flags[i] }; end @@ -366,7 +374,7 @@ module VX_cache import VX_gpu_pkg::*; #( per_bank_core_req_byteen[i], per_bank_core_req_data[i], per_bank_core_req_tag[i], - per_bank_core_req_flush[i] + per_bank_core_req_flags[i] } = core_req_data_out[i]; end @@ -393,6 +401,7 @@ module VX_cache import VX_gpu_pkg::*; #( .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)), .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) ) bank ( @@ -414,7 +423,7 @@ module VX_cache import VX_gpu_pkg::*; #( .core_req_data (per_bank_core_req_data[bank_id]), .core_req_tag (per_bank_core_req_tag[bank_id]), .core_req_idx (per_bank_core_req_idx[bank_id]), - .core_req_flush (per_bank_core_req_flush[bank_id]), + .core_req_flags (per_bank_core_req_flags[bank_id]), .core_req_ready (per_bank_core_req_ready[bank_id]), // Core response @@ -431,7 +440,7 @@ module VX_cache import VX_gpu_pkg::*; #( .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), .mem_req_data (per_bank_mem_req_data[bank_id]), .mem_req_tag (per_bank_mem_req_tag[bank_id]), - .mem_req_flush (per_bank_mem_req_flush[bank_id]), + .mem_req_flags (per_bank_mem_req_flags[bank_id]), .mem_req_ready (per_bank_mem_req_ready[bank_id]), // Memory response @@ -487,7 +496,7 @@ module VX_cache import VX_gpu_pkg::*; #( // Memory request arbitration - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH))-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in assign data_in[i] = { @@ -496,7 +505,7 @@ module VX_cache import VX_gpu_pkg::*; #( per_bank_mem_req_byteen[i], per_bank_mem_req_data[i], per_bank_mem_req_tag[i], - per_bank_mem_req_flush[i] + per_bank_mem_req_flags[i] }; end @@ -504,7 +513,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1), + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .ARBITER ("R") ) mem_req_arb ( .clk (clk), @@ -512,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}), + .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flags}), .valid_out (mem_req_valid), .ready_out (mem_req_ready), `UNUSED_PIN (sel_out) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 054b7c5896..c1fea14b29 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -53,6 +53,9 @@ module VX_cache_bank #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // Core response output register parameter CORE_OUT_REG = 0, @@ -82,7 +85,7 @@ module VX_cache_bank #( input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id) input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array - input wire core_req_flush, // flush enable + input wire [`UP(FLAGS_WIDTH)-1:0] core_req_flags, output wire core_req_ready, // Core Response @@ -99,7 +102,7 @@ module VX_cache_bank #( output wire [LINE_SIZE-1:0] mem_req_byteen, output wire [`CS_LINE_WIDTH-1:0] mem_req_data, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, - output wire mem_req_flush, + output wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags, input wire mem_req_ready, // Memory response @@ -143,22 +146,25 @@ module VX_cache_bank #( wire [NUM_WAYS-1:0] flush_way_st0; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1; + wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; + wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; wire rw_sel, rw_st0, rw_st1; - wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1; + wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; + wire [`CS_WORD_WIDTH-1:0] write_data_st0, write_data_st1; wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; - wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1; + wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; + wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; wire valid_sel, valid_st0, valid_st1; wire is_creq_st0, is_creq_st1; wire is_fill_st0, is_fill_st1; wire is_replay_st0, is_replay_st1; - wire creq_flush_sel, creq_flush_st0, creq_flush_st1; + wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire evict_dirty_st0, evict_dirty_st1; - wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1; + wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1; wire [NUM_WAYS-1:0] tag_matches_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1; @@ -264,11 +270,11 @@ module VX_cache_bank #( assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign rw_sel = replay_valid ? replay_rw : core_req_rw; assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; - assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel; + assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) : (replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag)); - assign creq_flush_sel = core_req_valid && core_req_flush; + assign flags_sel = core_req_valid ? core_req_flags : '0; assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); @@ -294,14 +300,14 @@ module VX_cache_bank #( end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}), - .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) + .data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}), + .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st0 @@ -321,9 +327,10 @@ module VX_cache_bank #( wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0; wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0; - wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; + assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; - assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; + assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; + assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); wire [NUM_WAYS-1:0] evict_way_st0; wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0; @@ -353,7 +360,9 @@ module VX_cache_bank #( .write (do_cache_wr_st0), .lookup (do_lookup_st0), .line_addr (addr_st0), - .way_sel (flush_way_st0), + .way_idx (flush_way_st0), + + // tag matches .tag_matches(tag_matches_st0), // replacement @@ -362,29 +371,29 @@ module VX_cache_bank #( .evict_tag (evict_tag_st0) ); - wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0; + wire [`CS_TAG_SEL_BITS-1:0] line_tag2_st0; wire is_flush2_st0 = WRITEBACK && is_flush_st0; assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0; - assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0; + assign way_idx_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0; - assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0; + assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) ); // we have a tag hit - wire is_hit_st1 = (| way_sel_st1); + wire is_hit_st1 = (| way_idx_st1); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; @@ -413,9 +422,7 @@ module VX_cache_bank #( wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1; wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1; - assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0]; - - `UNUSED_VAR (do_write_miss_st1) + assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) @@ -426,28 +433,16 @@ module VX_cache_bank #( assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write always @(posedge clk) begin // stall reads following writes to same line address - rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1) + rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_idx_st0 == line_idx_st1) && ~rdw_hazard3_st1; // release pipeline stall end - wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}}; + assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1; - wire [LINE_SIZE-1:0] write_byteen_st1; wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; wire [LINE_SIZE-1:0] dirty_byteen_st1; - if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel - reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w; - always @(*) begin - write_byteen_w = '0; - write_byteen_w[wsel_st1] = byteen_st1; - end - assign write_byteen_st1 = write_byteen_w; - end else begin : g_write_byteen_st1 - assign write_byteen_st1 = byteen_st1; - end - VX_cache_data #( .INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)), .BANK_ID (BANK_ID), @@ -473,12 +468,12 @@ module VX_cache_bank #( .fill (do_fill_st1), .flush (do_flush_st1), .write (do_cache_wr_st1), - .way_sel (way_sel_st1), + .way_idx (way_idx_st1), .line_addr (addr_st1), - .wsel (wsel_st1), + .word_idx (word_idx_st1), .fill_data (fill_data_st1), .write_data (write_data_st1), - .write_byteen(write_byteen_st1), + .write_byteen(byteen_st1), .read_data (read_data_st1), .dirty_data (dirty_data_st1), .dirty_byteen(dirty_byteen_st1) @@ -488,13 +483,14 @@ module VX_cache_bank #( wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0; wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall; wire mshr_lookup_st0 = mshr_allocate_st0; + wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall; // release allocated mshr entry if we had a hit wire mshr_release_st1; - if (WRITEBACK) begin : g_mshr_release_st1 + if (WRITEBACK) begin : g_mshr_release assign mshr_release_st1 = is_hit_st1; - end else begin : g_mshr_release_st1_ro + end else begin : g_mshr_release_ro // we need to keep missed write requests in MSHR if there is already a pending entry to the same address // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content // this can happen when writes are sent late, when the fill was already in flight. @@ -548,7 +544,7 @@ module VX_cache_bank #( .allocate_valid (mshr_allocate_st0), .allocate_addr (addr_st0), .allocate_rw (rw_st0), - .allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), + .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_prev (mshr_prev_st0), `UNUSED_PIN (allocate_ready), @@ -571,7 +567,7 @@ module VX_cache_bank #( wire [MSHR_SIZE-1:0] lookup_matches; for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches assign lookup_matches[i] = mshr_lookup_pending_st0[i] - && (i != mshr_alloc_id_st0) // exclude current mshr id + && (i != mshr_id_st0) // exclude current mshr id && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough end assign mshr_pending_st0 = (| lookup_matches); @@ -613,7 +609,7 @@ module VX_cache_bank #( wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag; wire mreq_queue_rw; - wire mreq_queue_flush; + wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags; wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1; wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; @@ -629,6 +625,7 @@ module VX_cache_bank #( || do_writeback_st1) && ~rdw_hazard3_st1; end else begin : g_mreq_queue_push_ro + `UNUSED_VAR (do_write_miss_st1) `UNUSED_VAR (do_writeback_st1) assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) || do_creq_wr_st1) @@ -637,7 +634,7 @@ module VX_cache_bank #( assign mreq_queue_pop = mem_req_valid && mem_req_ready; assign mreq_queue_addr = addr_st1; - assign mreq_queue_flush = creq_flush_st1; + assign mreq_queue_flags = flags_st1; if (WRITE_ENABLE) begin : g_mreq_queue if (WRITEBACK) begin : g_writeback @@ -645,9 +642,18 @@ module VX_cache_bank #( assign mreq_queue_data = dirty_data_st1; assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; end else begin : g_writethrough + wire [LINE_SIZE-1:0] line_byteen; + VX_decoder #( + .N (`CS_WORD_SEL_BITS), + .M (WORD_SIZE) + ) byteen_dec ( + .sel_in (word_idx_st1), + .data_in (byteen_st1), + .data_out (line_byteen) + ); assign mreq_queue_rw = rw_st1; - assign mreq_queue_data = write_data_st1; - assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1; + assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}}; + assign mreq_queue_byteen = rw_st1 ? line_byteen : '1; `UNUSED_VAR (is_fill_or_flush_st1) `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) @@ -667,17 +673,17 @@ module VX_cache_bank #( end VX_fifo_queue #( - .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .DEPTH (MREQ_SIZE), - .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), + .ALM_FULL (MREQ_SIZE - PIPELINE_STAGES), .OUT_REG (MEM_OUT_REG) ) mem_req_queue ( .clk (clk), .reset (reset), .push (mreq_queue_push), .pop (mreq_queue_pop), - .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}), - .data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}), + .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flags}), + .data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flags}), .empty (mreq_queue_empty), .alm_full (mreq_queue_alm_full), `UNUSED_PIN (full), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 5a8bb98659..71a2ad00b2 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -58,6 +58,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // enable bypass for non-cacheable addresses parameter NC_ENABLE = 0, @@ -156,6 +159,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (ARB_TAG_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .TAG_SEL_IDX (TAG_SEL_IDX), .CORE_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_BUF), .MEM_OUT_BUF ((NUM_CACHES > 1) ? 2 : MEM_OUT_BUF), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 04b0ff746c..aa2a1d0efe 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -50,11 +50,11 @@ module VX_cache_data #( input wire flush, input wire write, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel, + input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, - input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data, - input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen, - input wire [NUM_WAYS-1:0] way_sel, + input wire [`CS_WORD_WIDTH-1:0] write_data, + input wire [WORD_SIZE-1:0] write_byteen, + input wire [NUM_WAYS-1:0] way_idx, output wire [`CS_WORD_WIDTH-1:0] read_data, output wire [`CS_LINE_WIDTH-1:0] dirty_data, output wire [LINE_SIZE-1:0] dirty_byteen @@ -68,132 +68,144 @@ module VX_cache_data #( `UNUSED_VAR (read) `UNUSED_VAR (flush) - localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1; + localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; - wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; + wire [`LOG2UP(NUM_WAYS)-1:0] way_idx_bin; + wire [`CS_LINE_SEL_BITS-1:0] line_idx; - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; + assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; + + VX_encoder #( + .N (NUM_WAYS) + ) way_idx_enc ( + .data_in (way_idx), + .data_out (way_idx_bin), + `UNUSED_PIN (valid_out) + ); if (WRITEBACK) begin : g_dirty_data - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; - VX_transpose #( - .DATAW (`CS_WORD_WIDTH), - .N (`CS_WORDS_PER_LINE), - .M (NUM_WAYS) - ) transpose ( - .data_in (line_rdata), - .data_out (transposed_rdata) - ); - assign dirty_data = transposed_rdata[way_idx]; + assign dirty_data = line_rdata[way_idx_bin]; end else begin : g_dirty_data_0 assign dirty_data = '0; end if (DIRTY_BYTES) begin : g_dirty_byteen - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_rdata; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_wdata; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata - wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); - assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j + wire [WORD_SIZE-1:0] word_mask = {WORD_SIZE{(WORD_SIZE == 1) || (word_idx == j)}}; + wire [WORD_SIZE-1:0] wdata = write ? (bs_rdata[i][j] | (write_byteen & word_mask)) : ((fill || flush) ? '0 : bs_rdata[i][j]); + assign bs_wdata[i][j] = init ? '0 : (way_idx[i] ? wdata : bs_rdata[i][j]); + end end + wire bs_read = write || fill || flush; + wire bs_write = init || write || fill || flush; + VX_sp_ram #( .DATAW (LINE_SIZE * NUM_WAYS), .SIZE (`CS_LINES_PER_BANK) ) byteen_store ( .clk (clk), .reset (reset), - .read (write || fill || flush), - .write (init || write || fill || flush), + .read (bs_read && ~stall), + .write (bs_write && ~stall), .wren (1'b1), - .addr (line_sel), + .addr (line_idx), .wdata (bs_wdata), .rdata (bs_rdata) ); - assign dirty_byteen = bs_rdata[way_idx]; + assign dirty_byteen = bs_rdata[way_idx_bin]; end else begin : g_dirty_byteen_0 assign dirty_byteen = '1; end - // order the data layout to perform ways multiplexing last. - // this allows converting way index to binary in parallel with BRAM readaccess and way selection. + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; + wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [BYTEENW-1:0] line_wren; + wire line_write; + wire line_read; - if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i - for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j - assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i]; - assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i]) - & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}}; + wire way_en = (NUM_WAYS == 1) || way_idx[i]; + + if (WRITE_ENABLE != 0) begin : g_line_data + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j + wire word_en = (WORD_SIZE == 1) || (word_idx == j); + assign line_wdata[j] = fill ? fill_data[j] : write_data; + assign wren_w[j] = {WORD_SIZE{fill}} | (write_byteen & {WORD_SIZE{word_en}}); + end + assign line_wren = wren_w; + assign line_write = (fill || write) && way_en; + if (WRITEBACK) begin : g_line_read_wb + assign line_read = (read || fill || flush); + end else begin : g_line_read_wt + assign line_read = read; end + end else begin : g_line_data_ro + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_data) + assign line_wdata = fill_data; + assign line_wren = 1'b1; + assign line_write = fill && way_en; + assign line_read = read; end - assign line_wren = wren_w; - end else begin : g_line_wdata_ro - `UNUSED_VAR (write) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) - assign line_wdata = fill_data; - assign line_wren = fill; - end - VX_encoder #( - .N (NUM_WAYS) - ) way_enc ( - .data_in (way_sel), - .data_out (way_idx), - `UNUSED_PIN (valid_out) - ); - - wire line_read = (read && ~stall) - || (WRITEBACK && (fill || flush)); - - wire line_write = write || fill; - - VX_sp_ram #( - .DATAW (`CS_LINE_WIDTH * NUM_WAYS), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), - .NO_RWCHECK (1), - .RW_ASSERT (1) - ) data_store ( - .clk (clk), - .reset (reset), - .read (line_read), - .write (line_write), - .wren (line_wren), - .addr (line_sel), - .wdata (line_wdata), - .rdata (line_rdata) - ); + VX_sp_ram #( + .DATAW (`CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (BYTEENW), + .NO_RWCHECK (1), + .RW_ASSERT (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (line_read && ~stall), + .write (line_write && ~stall), + .wren (line_wren), + .addr (line_idx), + .wdata (line_wdata), + .rdata (line_rdata[i]) + ); + end - wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; - if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel - assign per_way_rdata = line_rdata[wsel]; - end else begin : g_per_way_rdata - `UNUSED_VAR (wsel) - assign per_way_rdata = line_rdata; + if (`CS_WORDS_PER_LINE > 1) begin : g_read_data + // order the data layout to perform ways multiplexing last. + // this allows converting way index to binary in parallel with BRAM readaccess and way selection. + wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; + VX_transpose #( + .DATAW (`CS_WORD_WIDTH), + .N (NUM_WAYS), + .M (`CS_WORDS_PER_LINE) + ) transpose ( + .data_in (line_rdata), + .data_out (transposed_rdata) + ); + assign read_data = transposed_rdata[word_idx][way_idx_bin]; + end else begin : g_read_data_1w + `UNUSED_VAR (word_idx) + assign read_data = line_rdata[way_idx_bin]; end - assign read_data = per_way_rdata[way_idx]; `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) end if (flush && ~stall) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) end if (read && ~stall) begin - `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) + `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) end if (write && ~stall) begin - `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) + `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 92497b80bb..cc8fa8571c 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -47,7 +47,7 @@ module VX_cache_tags #( input wire write, input wire lookup, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] way_sel, + input wire [NUM_WAYS-1:0] way_idx, output wire [NUM_WAYS-1:0] tag_matches, // eviction @@ -62,7 +62,7 @@ module VX_cache_tags #( // valid, dirty, tag localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS; - wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; + wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; @@ -80,7 +80,7 @@ module VX_cache_tags #( end end - assign evict_way = fill ? evict_way_r : way_sel; + assign evict_way = fill ? evict_way_r : way_idx; VX_onehot_mux #( .DATAW (`CS_TAG_SEL_BITS), @@ -103,7 +103,7 @@ module VX_cache_tags #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store wire do_fill = fill_s && evict_way[i]; - wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode + wire do_flush = flush_s && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode wire do_write = WRITEBACK && write && tag_matches[i]; wire line_read = (WRITEBACK && (fill_s || flush_s)); @@ -130,10 +130,10 @@ module VX_cache_tags #( ) tag_store ( .clk (clk), .reset (reset), - .read (line_read), - .write (line_write), + .read (line_read && ~stall), + .write (line_write && ~stall), .wren (1'b1), - .addr (line_sel), + .addr (line_idx), .wdata (line_wdata), .rdata (line_rdata) ); @@ -146,29 +146,29 @@ module VX_cache_tags #( assign evict_dirty = | (read_dirty & evict_way); `ifdef DBG_TRACE_CACHE - wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel}; + wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) + `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) end if (flush && ~stall) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) end if (lookup && ~stall) begin if (tag_matches != 0) begin if (write) begin - `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end end else begin if (write) begin - `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 0b8a1f3c46..d958736c44 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -57,6 +57,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // enable bypass for non-cacheable addresses parameter NC_ENABLE = 0, @@ -175,6 +178,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) ) cache ( diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index a4dead008f..2899b55fd4 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -69,8 +69,8 @@ module VX_cyclic_arbiter #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( - .data_in (grant_index), - .valid_in (1'b1), + .sel_in (grant_index), + .data_in (1'b1), .data_out (grant_onehot_w) ); diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 7c0c760e56..ce2c509e66 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -18,25 +18,30 @@ `TRACING_OFF module VX_decoder #( - parameter N = 1, + parameter N = 0, parameter M = 1, parameter MODEL = 0, parameter D = 1 << N ) ( - input wire [N-1:0] data_in, - input wire [M-1:0] valid_in, + input wire [`UP(N)-1:0] sel_in, + input wire [M-1:0] data_in, output wire [D-1:0][M-1:0] data_out ); - logic [D-1:0][M-1:0] shift; - if (MODEL == 1) begin : g_model1 - always @(*) begin - shift = '0; - shift[data_in] = {M{1'b1}}; + if (N != 0) begin : g_decoder + logic [D-1:0][M-1:0] shift; + if (MODEL == 1) begin : g_model1 + always @(*) begin + shift = '0; + shift[sel_in] = {M{1'b1}}; + end + end else begin : g_model0 + assign shift = ((D*M)'({M{1'b1}})) << (sel_in * M); end - end else begin : g_model0 - assign shift = ((D*M)'({M{1'b1}})) << (data_in * M); + assign data_out = {D{data_in}} & shift; + end else begin : g_passthru + `UNUSED_VAR (sel_in) + assign data_out = data_in; end - assign data_out = {D{valid_in}} & shift; endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 4ece7cf699..2cae6fead6 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -104,8 +104,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH/8) ) req_be_dec ( - .data_in (req_idx), - .valid_in (mem_req_byteen_in), + .sel_in (req_idx), + .data_in (mem_req_byteen_in), .data_out (mem_req_byteen_out_w) ); @@ -113,8 +113,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH) ) req_data_dec ( - .data_in (req_idx), - .valid_in (mem_req_data_in), + .sel_in (req_idx), + .data_in (mem_req_data_in), .data_out (mem_req_data_out_w) ); diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index c27f04da4d..760290a1c6 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -18,7 +18,7 @@ module VX_mem_coalescer #( parameter `STRING INSTANCE_ID = "", parameter NUM_REQS = 1, parameter ADDR_WIDTH = 32, - parameter FLAGS_WIDTH = 1, + parameter FLAGS_WIDTH = 0, parameter DATA_IN_SIZE = 4, parameter DATA_OUT_SIZE = 64, parameter TAG_WIDTH = 8, @@ -43,7 +43,7 @@ module VX_mem_coalescer #( input wire [NUM_REQS-1:0] in_req_mask, input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr, - input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags, + input wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] in_req_flags, input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data, input wire [TAG_WIDTH-1:0] in_req_tag, output wire in_req_ready, @@ -61,7 +61,7 @@ module VX_mem_coalescer #( output wire [OUT_REQS-1:0] out_req_mask, output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen, output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr, - output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags, + output wire [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags, output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data, output wire [OUT_TAG_WIDTH-1:0] out_req_tag, input wire out_req_ready, @@ -92,7 +92,7 @@ module VX_mem_coalescer #( logic out_req_rw_r, out_req_rw_n; logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; - logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n; + logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags_r, out_req_flags_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n; logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; @@ -110,7 +110,7 @@ module VX_mem_coalescer #( logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; - logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n; + logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] seed_flags_r, seed_flags_n; logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; @@ -139,7 +139,7 @@ module VX_mem_coalescer #( assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W]; end - wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; + wire [DATA_RATIO-1:0][`UP(FLAGS_WIDTH)-1:0] req_flags; for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags assign req_flags[j] = in_req_flags[DATA_RATIO * i + j]; end @@ -221,7 +221,7 @@ module VX_mem_coalescer #( end VX_pipe_register #( - .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), + .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), .RESETW (1 + NUM_REQS + 1), .INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0}) ) pipe_reg ( @@ -270,7 +270,12 @@ module VX_mem_coalescer #( assign out_req_mask = out_req_mask_r; assign out_req_byteen = out_req_byteen_r; assign out_req_addr = out_req_addr_r; - assign out_req_flags = out_req_flags_r; + if (FLAGS_WIDTH != 0) begin : g_out_req_flags + assign out_req_flags = out_req_flags_r; + end else begin : g_out_req_flags_0 + `UNUSED_VAR (out_req_flags_r) + assign out_req_flags = '0; + end assign out_req_data = out_req_data_r; assign out_req_tag = out_req_tag_r; diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 4ba8bf1479..abd68da241 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -21,7 +21,7 @@ module VX_mem_scheduler #( parameter WORD_SIZE = 4, parameter LINE_SIZE = WORD_SIZE, parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), - parameter FLAGS_WIDTH = 1, + parameter FLAGS_WIDTH = 0, parameter TAG_WIDTH = 8, parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter CORE_QUEUE_SIZE= 8, @@ -50,7 +50,7 @@ module VX_mem_scheduler #( input wire [CORE_REQS-1:0] core_req_mask, input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr, - input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags, + input wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags, input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data, input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, @@ -72,7 +72,7 @@ module VX_mem_scheduler #( output wire [MEM_CHANNELS-1:0] mem_req_mask, output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen, output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags, + output wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags, output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, input wire mem_req_ready, @@ -112,7 +112,7 @@ module VX_mem_scheduler #( wire reqq_rw; wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen; wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr; - wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags; + wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags; wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data; wire [REQQ_TAG_WIDTH-1:0] reqq_tag; wire reqq_ready; @@ -122,7 +122,7 @@ module VX_mem_scheduler #( wire reqq_rw_s; wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s; wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s; - wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s; + wire [MERGED_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags_s; wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s; wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s; wire reqq_ready_s; @@ -132,7 +132,7 @@ module VX_mem_scheduler #( wire mem_req_rw_s; wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s; + wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_s; wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; wire mem_req_ready_s; @@ -167,7 +167,7 @@ module VX_mem_scheduler #( end VX_elastic_buffer #( - .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), + .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + `UP(FLAGS_WIDTH) + WORD_WIDTH) + REQQ_TAG_WIDTH), .SIZE (CORE_QUEUE_SIZE), .OUT_REG (1) ) req_queue ( @@ -297,7 +297,7 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b; - wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b; + wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b; wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; @@ -385,8 +385,10 @@ module VX_mem_scheduler #( assign reqq_ready_s = req_sent_all; + wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_u; + VX_elastic_buffer #( - .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), + .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + `UP(FLAGS_WIDTH) + LINE_WIDTH) + MEM_TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -395,11 +397,18 @@ module VX_mem_scheduler #( .valid_in (mem_req_valid_s), .ready_in (mem_req_ready_s), .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}), - .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}), + .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags_u, mem_req_data, mem_req_tag}), .valid_out (mem_req_valid), .ready_out (mem_req_ready) ); + if (FLAGS_WIDTH != 0) begin : g_mem_req_flags + assign mem_req_flags = mem_req_flags_u; + end else begin : g_mem_req_flags_0 + `UNUSED_VAR (mem_req_flags_u) + assign mem_req_flags = '0; + end + // Handle memory responses //////////////////////////////////////////////// reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index efe9838d66..3ca1f57e56 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -484,8 +484,8 @@ module VX_rr_arbiter #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( - .data_in (grant_index), - .valid_in (grant_valid), + .sel_in (grant_index), + .data_in (grant_valid), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index febfd0465b..0c4eff2f16 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -68,8 +68,8 @@ module VX_stream_xbar #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) ) sel_in_decoder ( - .data_in (sel_in[i]), - .valid_in (valid_in[i]), + .sel_in (sel_in[i]), + .data_in (valid_in[i]), .data_out (per_output_valid_in[i]) ); assign ready_in[i] = | per_output_ready_in_w[i]; @@ -141,8 +141,8 @@ module VX_stream_xbar #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) ) sel_in_decoder ( - .data_in (sel_in[0]), - .valid_in (valid_in[0]), + .sel_in (sel_in[0]), + .data_in (valid_in[0]), .data_out (valid_out_w) ); From 07ce16e75cc808458cdd7c9ca3037cc161859278 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 5 Oct 2024 17:42:26 -0700 Subject: [PATCH 266/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 32 +++++++++++++------------------- hw/rtl/cache/VX_cache_data.sv | 21 +++++++++------------ hw/rtl/cache/VX_cache_tags.sv | 25 +++++++++---------------- 3 files changed, 31 insertions(+), 47 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index c1fea14b29..bbf8965846 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -351,14 +351,12 @@ module VX_cache_bank #( .req_uuid (req_uuid_st0), - .stall (pipe_stall), - // init/flush/fill/write/lookup .init (do_init_st0), - .flush (do_flush_st0), - .fill (do_fill_st0), - .write (do_cache_wr_st0), - .lookup (do_lookup_st0), + .flush (do_flush_st0 && ~pipe_stall), + .fill (do_fill_st0 && ~pipe_stall), + .write (do_cache_wr_st0 && ~pipe_stall), + .lookup (do_lookup_st0 && ~pipe_stall), .line_addr (addr_st0), .way_idx (flush_way_st0), @@ -458,16 +456,12 @@ module VX_cache_bank #( ) cache_data ( .clk (clk), .reset (reset), - .req_uuid (req_uuid_st1), - - .stall (pipe_stall), - .init (do_init_st1), - .read (do_cache_rd_st1), - .fill (do_fill_st1), - .flush (do_flush_st1), - .write (do_cache_wr_st1), + .fill (do_fill_st1 && ~pipe_stall), + .flush (do_flush_st1 && ~pipe_stall), + .write (do_cache_wr_st1 && ~pipe_stall), + .read (do_cache_rd_st1 && ~pipe_stall), .way_idx (way_idx_st1), .line_addr (addr_st1), .word_idx (word_idx_st1), @@ -481,10 +475,10 @@ module VX_cache_bank #( wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0; wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0; - wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall; + wire mshr_allocate_st0 = valid_st0 && is_creq_st0; wire mshr_lookup_st0 = mshr_allocate_st0; - wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall; + wire mshr_finalize_st1 = valid_st1 && is_creq_st1; // release allocated mshr entry if we had a hit wire mshr_release_st1; @@ -541,7 +535,7 @@ module VX_cache_bank #( .dequeue_ready (replay_ready), // allocate - .allocate_valid (mshr_allocate_st0), + .allocate_valid (mshr_allocate_st0 && ~pipe_stall), .allocate_addr (addr_st0), .allocate_rw (rw_st0), .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), @@ -550,13 +544,13 @@ module VX_cache_bank #( `UNUSED_PIN (allocate_ready), // lookup - .lookup_valid (mshr_lookup_st0), + .lookup_valid (mshr_lookup_st0 && ~pipe_stall), .lookup_addr (addr_st0), .lookup_pending (mshr_lookup_pending_st0), .lookup_rw (mshr_lookup_rw_st0), // finalize - .finalize_valid (mshr_finalize_st1), + .finalize_valid (mshr_finalize_st1 && ~pipe_stall), .finalize_release(mshr_release_st1), .finalize_pending(mshr_pending_st1), .finalize_id (mshr_id_st1), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index aa2a1d0efe..6419343035 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -42,13 +42,11 @@ module VX_cache_data #( input wire[`UP(UUID_WIDTH)-1:0] req_uuid, `IGNORE_UNUSED_END - input wire stall, - input wire init, - input wire read, input wire fill, input wire flush, input wire write, + input wire read, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, @@ -62,7 +60,6 @@ module VX_cache_data #( `UNUSED_SPARAM (INSTANCE_ID) `UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (WORD_SIZE) - `UNUSED_VAR (stall) `UNUSED_VAR (line_addr) `UNUSED_VAR (init) `UNUSED_VAR (read) @@ -111,8 +108,8 @@ module VX_cache_data #( ) byteen_store ( .clk (clk), .reset (reset), - .read (bs_read && ~stall), - .write (bs_write && ~stall), + .read (bs_read), + .write (bs_write), .wren (1'b1), .addr (line_idx), .wdata (bs_wdata), @@ -166,8 +163,8 @@ module VX_cache_data #( ) data_store ( .clk (clk), .reset (reset), - .read (line_read && ~stall), - .write (line_write && ~stall), + .read (line_read), + .write (line_write), .wren (line_wren), .addr (line_idx), .wdata (line_wdata), @@ -195,16 +192,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin - if (fill && ~stall) begin + if (fill) begin `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) end - if (flush && ~stall) begin + if (flush) begin `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) end - if (read && ~stall) begin + if (read) begin `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) end - if (write && ~stall) begin + if (write) begin `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) end end diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index cc8fa8571c..354a57b0b3 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -38,8 +38,6 @@ module VX_cache_tags #( input wire [`UP(UUID_WIDTH)-1:0] req_uuid, `IGNORE_UNUSED_END - input wire stall, - // init/fill/lookup input wire init, input wire flush, @@ -75,7 +73,7 @@ module VX_cache_tags #( always @(posedge clk) begin if (reset) begin evict_way_r <= 1; - end else if (~stall) begin // holding the value on stalls prevents filling different slots twice + end else if (lookup) begin evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]}; end end @@ -91,22 +89,17 @@ module VX_cache_tags #( .data_out (evict_tag) ); end else begin : g_evict_way_0 - `UNUSED_VAR (stall) assign evict_way = 1'b1; assign evict_tag = read_tag; end - // fill and flush need to also read in writeback mode - wire fill_s = fill && (!WRITEBACK || ~stall); - wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - wire do_fill = fill_s && evict_way[i]; - wire do_flush = flush_s && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode + wire do_fill = fill && evict_way[i]; + wire do_flush = flush && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode wire do_write = WRITEBACK && write && tag_matches[i]; - wire line_read = (WRITEBACK && (fill_s || flush_s)); + wire line_read = (WRITEBACK && (fill || flush)); wire line_write = init || do_fill || do_flush || do_write; wire line_valid = ~(init || flush); @@ -130,8 +123,8 @@ module VX_cache_tags #( ) tag_store ( .clk (clk), .reset (reset), - .read (line_read && ~stall), - .write (line_write && ~stall), + .read (line_read), + .write (line_write), .wren (1'b1), .addr (line_idx), .wdata (line_wdata), @@ -148,16 +141,16 @@ module VX_cache_tags #( `ifdef DBG_TRACE_CACHE wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; always @(posedge clk) begin - if (fill && ~stall) begin + if (fill) begin `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) end - if (flush && ~stall) begin + if (flush) begin `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) end - if (lookup && ~stall) begin + if (lookup) begin if (tag_matches != 0) begin if (write) begin `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) From c91f9684fcd5e8e876143b7da9028456fa7692a8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 5 Oct 2024 18:35:26 -0700 Subject: [PATCH 267/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index bbf8965846..cbc8d30b4c 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -491,13 +491,15 @@ module VX_cache_bank #( assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); end + wire mshr_dequeue = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall; + VX_pending_size #( .SIZE (MSHR_SIZE) ) mshr_pending_size ( .clk (clk), .reset (reset), .incr (core_req_fire), - .decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)), + .decr (replay_fire || mshr_dequeue), .empty (mshr_empty), `UNUSED_PIN (alm_empty), .full (mshr_alm_full), From ee96d4334b19397836c6ba1fa88ac1a540db5aad Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 8 Oct 2024 23:01:01 -0700 Subject: [PATCH 268/407] VX_onehot_encoder update --- hw/rtl/VX_platform.vh | 11 ++++++++--- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 2 +- hw/rtl/cache/VX_cache_mshr.sv | 5 +++-- hw/rtl/libs/VX_matrix_arbiter.sv | 2 +- .../libs/{VX_encoder.sv => VX_onehot_encoder.sv} | 4 ++-- hw/rtl/libs/VX_pending_size.sv | 14 ++++++++------ hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 8 files changed, 25 insertions(+), 17 deletions(-) rename hw/rtl/libs/{VX_encoder.sv => VX_onehot_encoder.sv} (97%) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 3e9042737d..4f78fee242 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -37,9 +37,11 @@ endgenerate `define ASSERT(cond, msg) \ assert(cond) else $error msg -`define RUNTIME_ASSERT(cond, msg) \ - always @(posedge clk) begin \ - assert(cond) else $error msg; \ +`define RUNTIME_ASSERT(cond, msg) \ + always @(posedge clk) begin \ + if (!reset) begin \ + `ASSERT(cond, msg); \ + end \ end `define __SCOPE @@ -172,6 +174,7 @@ endgenerate `ifdef QUARTUS `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) +`define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) @@ -180,6 +183,7 @@ endgenerate `elsif VIVADO `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) +`define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) `define DISABLE_BRAM (* ram_style = "registers" *) @@ -188,6 +192,7 @@ endgenerate `else `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) x.DATA_WIDTH +`define USE_BLOCK_BRAM `define USE_FAST_BRAM `define NO_RW_RAM_CHECK `define DISABLE_BRAM diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 7e0bcfaeda..f21f851c02 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -968,7 +968,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_TID_WIDTH-1:0] cout_tid; - VX_encoder #( + VX_onehot_encoder #( .N (`VX_MEM_BYTEEN_WIDTH) ) cout_tid_enc ( .data_in (vx_mem_req_byteen), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 6419343035..d749cdcd4b 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -73,7 +73,7 @@ module VX_cache_data #( assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; - VX_encoder #( + VX_onehot_encoder #( .N (NUM_WAYS) ) way_idx_enc ( .data_in (way_idx), diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 482c110dcf..4e86f25c77 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -148,11 +148,12 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); - VX_encoder #( + VX_priority_encoder #( .N (MSHR_SIZE) ) prev_sel ( .data_in (addr_matches & ~next_table_x), - .data_out (prev_idx), + .index_out (prev_idx), + `UNUSED_PIN (onehot_out), `UNUSED_PIN (valid_out) ); diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 2840ef43ec..b6b88e47ac 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -72,7 +72,7 @@ module VX_matrix_arbiter #( assign grant_onehot = grant; - VX_encoder #( + VX_onehot_encoder #( .N (NUM_REQS) ) encoder ( .data_in (grant_onehot), diff --git a/hw/rtl/libs/VX_encoder.sv b/hw/rtl/libs/VX_onehot_encoder.sv similarity index 97% rename from hw/rtl/libs/VX_encoder.sv rename to hw/rtl/libs/VX_onehot_encoder.sv index 86ccad7925..08198e4300 100644 --- a/hw/rtl/libs/VX_encoder.sv +++ b/hw/rtl/libs/VX_onehot_encoder.sv @@ -13,11 +13,11 @@ `include "VX_platform.vh" -// Fast encoder using parallel prefix computation +// Fast one-hot encoder using parallel prefix computation // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_encoder #( +module VX_onehot_encoder #( parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 1e72cef192..b94889e6e1 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -66,11 +66,13 @@ module VX_pending_size #( if (INCRW != 1 || DECRW != 1) begin : g_wide_step - localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); + localparam DELTAW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); logic [SIZEW-1:0] size_n, size_r; - assign size_n = $signed(size_r) + SIZEW'($signed(SUBW'(incr) - SUBW'(decr))); + wire [DELTAW-1:0] delta = DELTAW'(incr) - DELTAW'(decr); + + assign size_n = $signed(size_r) + SIZEW'($signed(delta)); always @(posedge clk) begin if (reset) begin @@ -80,8 +82,8 @@ module VX_pending_size #( alm_full_r <= 0; size_r <= '0; end else begin - `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); - `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); + `ASSERT((DELTAW'(incr) <= DELTAW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); + `ASSERT((DELTAW'(incr) >= DELTAW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); empty_r <= (size_n == SIZEW'(0)); full_r <= (size_n == SIZEW'(SIZE)); alm_empty_r <= (size_n <= SIZEW'(ALM_EMPTY)); @@ -129,7 +131,7 @@ module VX_pending_size #( wire is_empty_n = (used_r == ADDRW'(1)); wire is_full_n = (used_r == ADDRW'(SIZE-1)); - wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr}; + wire [1:0] delta = {~incr & decr, incr ^ decr}; always @(posedge clk) begin if (reset) begin @@ -148,7 +150,7 @@ module VX_pending_size #( if (is_empty_n) empty_r <= 1; end - used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop)); + used_r <= $signed(used_r) + ADDRW'($signed(delta)); end end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 3ca1f57e56..f5304b0234 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -448,7 +448,7 @@ module VX_rr_arbiter #( end end - VX_encoder #( + VX_onehot_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), From f49084b2987b011ea8f4527546c86e8ffc7630fd Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 8 Oct 2024 23:44:36 -0700 Subject: [PATCH 269/407] improving block rams inference with registered read address. --- hw/rtl/libs/VX_dp_ram.sv | 159 +++++++++++++++++++++++++++++---------- hw/rtl/libs/VX_sp_ram.sv | 2 + 2 files changed, 121 insertions(+), 40 deletions(-) diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 21ab03ad5e..c278275528 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,6 +19,7 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, + parameter RADDR_REG = 0, parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, @@ -57,8 +58,7 @@ module VX_dp_ram #( `UNUSED_PARAM (RW_ASSERT) `UNUSED_VAR (read) - - `RUNTIME_ASSERT((((WRENW == 1) ) || ~write) || (| wren), ("%t: invalid write enable mask", $time)) + `UNUSED_VAR (wren) if (OUT_REG && !READ_ENABLE) begin : g_out_reg `UNUSED_PARAM (NO_RWCHECK) @@ -78,7 +78,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -96,7 +96,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -104,7 +104,7 @@ module VX_dp_ram #( end end `else - // default synthesis + // Not Quartus if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -117,7 +117,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -135,7 +135,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -152,7 +152,7 @@ module VX_dp_ram #( if (write) ram[waddr] <= wdata; if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -167,7 +167,7 @@ module VX_dp_ram #( if (write) ram[waddr] <= wdata; if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -179,6 +179,7 @@ module VX_dp_ram #( end else begin : g_no_out_reg // OUT_REG==0 || READ_ENABLE=1 wire [DATAW-1:0] rdata_w; + reg [ADDRW-1:0] raddr_reg; `ifdef SYNTHESIS if (WRENW > 1) begin : g_writeen `ifdef QUARTUS @@ -192,8 +193,16 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; @@ -205,8 +214,16 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_rwcheck reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -217,8 +234,16 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end end `else @@ -233,8 +258,16 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; @@ -246,8 +279,16 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -258,8 +299,16 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end end `endif @@ -272,8 +321,16 @@ module VX_dp_ram #( if (write) begin ram[waddr] <= wdata; end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; @@ -282,8 +339,16 @@ module VX_dp_ram #( if (write) begin ram[waddr] <= wdata; end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -291,8 +356,16 @@ module VX_dp_ram #( if (write) begin ram[waddr] <= wdata; end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end end end @@ -316,39 +389,46 @@ module VX_dp_ram #( ram[waddr] <= ram_n; end end + if (read) begin + raddr_reg <= raddr; + end end - if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass - reg [DATAW-1:0] prev_data; - reg [ADDRW-1:0] prev_waddr; - reg prev_write; + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + `UNUSED_VAR (raddr_reg) + if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; - always @(posedge clk) begin - if (reset) begin - prev_write <= 0; - prev_data <= '0; - prev_waddr <= '0; - end else begin - prev_write <= write; - prev_data <= ram[waddr]; - prev_waddr <= waddr; + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[waddr]; + prev_waddr <= waddr; + end end - end - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_assert - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) + assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin : g_rw_assert + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) + end + end else begin : g_rdata_with_bypass + assign rdata_w = ram[raddr]; end - end else begin : g_rdata_with_bypass - assign rdata_w = ram[raddr]; end `endif - if (OUT_REG != 0) begin : g_rdata_req reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (READ_ENABLE && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else if (!READ_ENABLE || read) begin rdata_r <= rdata_w; end @@ -357,7 +437,6 @@ module VX_dp_ram #( end else begin : g_rdata_comb assign rdata = rdata_w; end - end endmodule diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index efce4b5f2f..7974cb6795 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -26,6 +26,7 @@ module VX_sp_ram #( parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, + parameter RADDR_REG = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, parameter ADDRW = `LOG2UP(SIZE) @@ -44,6 +45,7 @@ module VX_sp_ram #( .SIZE (SIZE), .WRENW (WRENW), .OUT_REG (OUT_REG), + .RADDR_REG (RADDR_REG), .LUTRAM (LUTRAM), .NO_RWCHECK (NO_RWCHECK), .RW_ASSERT (RW_ASSERT), From a5381fd78867525209fab3de5952f326ace6def5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 9 Oct 2024 04:14:15 -0700 Subject: [PATCH 270/407] async bram optimization --- hw/rtl/cache/VX_cache_bank.sv | 50 ++++-------- hw/rtl/cache/VX_cache_data.sv | 8 +- hw/rtl/cache/VX_cache_mshr.sv | 134 +++++++++++++++------------------ hw/rtl/cache/VX_cache_tags.sv | 14 ++-- hw/rtl/core/VX_fetch.sv | 5 +- hw/rtl/core/VX_ipdom_stack.sv | 28 ++++--- hw/rtl/core/VX_split_join.sv | 3 +- hw/rtl/libs/VX_fifo_queue.sv | 47 +++++++----- hw/rtl/libs/VX_index_buffer.sv | 2 +- 9 files changed, 135 insertions(+), 156 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index cbc8d30b4c..6a7fcaf52e 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -167,7 +167,6 @@ module VX_cache_bank #( wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1; wire [NUM_WAYS-1:0] tag_matches_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; - wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1; wire mshr_pending_st0, mshr_pending_st1; wire mshr_empty; @@ -380,14 +379,14 @@ module VX_cache_bank #( assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) ); // we have a tag hit @@ -473,25 +472,20 @@ module VX_cache_bank #( .dirty_byteen(dirty_byteen_st1) ); - wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0; - wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0; wire mshr_allocate_st0 = valid_st0 && is_creq_st0; - wire mshr_lookup_st0 = mshr_allocate_st0; - - wire mshr_finalize_st1 = valid_st1 && is_creq_st1; // release allocated mshr entry if we had a hit wire mshr_release_st1; if (WRITEBACK) begin : g_mshr_release - assign mshr_release_st1 = is_hit_st1; + assign mshr_release_st1 = valid_st1 && is_creq_st1 && is_hit_st1; end else begin : g_mshr_release_ro // we need to keep missed write requests in MSHR if there is already a pending entry to the same address // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content // this can happen when writes are sent late, when the fill was already in flight. - assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); + assign mshr_release_st1 = valid_st1 && is_creq_st1 && (is_hit_st1 || (rw_st1 && ~mshr_pending_st1)); end - wire mshr_dequeue = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall; + wire mshr_dequeue = mshr_release_st1 && ~pipe_stall; VX_pending_size #( .SIZE (MSHR_SIZE) @@ -513,6 +507,8 @@ module VX_cache_bank #( .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), .MSHR_SIZE (MSHR_SIZE), + .WRITEBACK (WRITEBACK), + .RDW_STALL (1), .UUID_WIDTH (UUID_WIDTH), .DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH) ) cache_mshr ( @@ -520,8 +516,8 @@ module VX_cache_bank #( .reset (reset), .deq_req_uuid (req_uuid_sel), - .lkp_req_uuid (req_uuid_st0), - .fin_req_uuid (req_uuid_st1), + .alc_req_uuid (req_uuid_st0), + .rel_req_uuid (req_uuid_st1), // memory fill .fill_valid (mem_rsp_fire), @@ -542,32 +538,14 @@ module VX_cache_bank #( .allocate_rw (rw_st0), .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), - .allocate_prev (mshr_prev_st0), + .allocate_pending(mshr_pending_st0), `UNUSED_PIN (allocate_ready), - // lookup - .lookup_valid (mshr_lookup_st0 && ~pipe_stall), - .lookup_addr (addr_st0), - .lookup_pending (mshr_lookup_pending_st0), - .lookup_rw (mshr_lookup_rw_st0), - - // finalize - .finalize_valid (mshr_finalize_st1 && ~pipe_stall), - .finalize_release(mshr_release_st1), - .finalize_pending(mshr_pending_st1), - .finalize_id (mshr_id_st1), - .finalize_prev (mshr_prev_st1) + // release + .release_valid (mshr_release_st1 && ~pipe_stall), + .release_id (mshr_id_st1) ); - // check if there are pending requests to same line in the MSHR - wire [MSHR_SIZE-1:0] lookup_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches - assign lookup_matches[i] = mshr_lookup_pending_st0[i] - && (i != mshr_id_st0) // exclude current mshr id - && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough - end - assign mshr_pending_st0 = (| lookup_matches); - // schedule core response wire crsp_queue_valid, crsp_queue_ready; diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index d749cdcd4b..54a78e3570 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -193,16 +193,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) end if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) end if (read) begin - `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) + `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) end if (write) begin - `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) + `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 4e86f25c77..ff3ead64f9 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -24,36 +24,22 @@ // arrival and are dequeued in the same order. // Each entry has a next pointer to the next entry pending for the same cache line. // -// During the fill operation, the MSHR will release the MSHR entry at fill_id +// During the fill request, the MSHR will release the MSHR entry at fill_id // which represents the first request in the pending list that initiated the memory fill. // -// The dequeue operation directly follows the fill operation and will release +// The dequeue response directly follows the fill request and will release // all the subsequent entries linked to fill_id (pending the same cache line). // -// During the allocation operation, the MSHR will allocate the next free slot +// During the allocation request, the MSHR will allocate the next free slot // for the incoming core request. We return the allocated slot id as well as // the slot id of the previous entry for the same cache line. This is used to -// link the new entry to the pending list during finalization. +// link the new entry to the pending list. // -// The lookup operation is used to find all pending entries for a given cache line. -// This is used to by the cache bank to determine if a cache miss is already pending -// and therefore avoid issuing a memory fill request. -// -// The finalize operation is used to release the allocated MSHR entry if we had a hit. -// If we had a miss and finalize_pending is true, we link the allocated entry to -// its corresponding pending list (via finalize_prev). +// The release request is used to invalidate the allocated MSHR entry if we had a cache hit. // // Warning: This MSHR implementation is strongly coupled with the bank pipeline // and as such changes to either module requires careful evaluation. // -// This architecture implements three pipeline stages: -// - Arbitration: cache bank arbitration before entering pipeline. -// fill and dequeue operations are executed at this stage. -// - stage 0: cache bank tag access stage. -// allocate and lookup operations are executed at this stage. -// - stage 1: cache bank tdatag access stage. -// finalize operation is executed at this stage. -// module VX_cache_mshr #( parameter `STRING INSTANCE_ID= "", @@ -68,6 +54,11 @@ module VX_cache_mshr #( parameter UUID_WIDTH = 0, // MSHR parameters parameter DATA_WIDTH = 1, + // Enable cache writeback + parameter WRITEBACK = 0, + // Cache stall on read during write + RDW_STALL = 0, + parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE) ) ( input wire clk, @@ -75,8 +66,8 @@ module VX_cache_mshr #( `IGNORE_UNUSED_BEGIN input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid, - input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid, - input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid, + input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid, + input wire[`UP(UUID_WIDTH)-1:0] rel_req_uuid, `IGNORE_UNUSED_END // memory fill @@ -98,21 +89,12 @@ module VX_cache_mshr #( input wire allocate_rw, input wire [DATA_WIDTH-1:0] allocate_data, output wire [MSHR_ADDR_WIDTH-1:0] allocate_id, - output wire [MSHR_ADDR_WIDTH-1:0] allocate_prev, + output wire allocate_pending, output wire allocate_ready, - // lookup - input wire lookup_valid, - input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr, - output wire [MSHR_SIZE-1:0] lookup_pending, - output wire [MSHR_SIZE-1:0] lookup_rw, - - // finalize - input wire finalize_valid, - input wire finalize_release, - input wire finalize_pending, - input wire [MSHR_ADDR_WIDTH-1:0] finalize_id, - input wire [MSHR_ADDR_WIDTH-1:0] finalize_prev + // release + input wire release_valid, + input wire [MSHR_ADDR_WIDTH-1:0] release_id ); `UNUSED_PARAM (BANK_ID) @@ -130,13 +112,15 @@ module VX_cache_mshr #( reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n; wire [MSHR_ADDR_WIDTH-1:0] prev_idx; + reg [MSHR_ADDR_WIDTH-1:0] post_alloc_id, post_alloc_previd; + reg post_alloc_val; wire allocate_fire = allocate_valid && allocate_ready; wire dequeue_fire = dequeue_valid && dequeue_ready; wire [MSHR_SIZE-1:0] addr_matches; for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches - assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr); + assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr); end VX_lzc #( @@ -148,6 +132,7 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); + // find matching tail-entry VX_priority_encoder #( .N (MSHR_SIZE) ) prev_sel ( @@ -172,18 +157,19 @@ module VX_cache_mshr #( valid_table_n[dequeue_id] = 0; if (next_table[dequeue_id]) begin dequeue_id_n = next_index[dequeue_id]; + end else if (!RDW_STALL && post_alloc_val && (post_alloc_previd == dequeue_id)) begin + dequeue_id_n = post_alloc_id; end else begin dequeue_val_n = 0; end end - if (finalize_valid) begin - if (finalize_release) begin - valid_table_n[finalize_id] = 0; - end - if (finalize_pending) begin - next_table_x[finalize_prev] = 1; - end + if (release_valid) begin + valid_table_n[release_id] = 0; + end + + if (post_alloc_val) begin + next_table_x[post_alloc_previd] = 1; end next_table_n = next_table_x; @@ -198,39 +184,43 @@ module VX_cache_mshr #( valid_table <= '0; allocate_rdy <= 0; dequeue_val <= 0; + post_alloc_val <= 0; end else begin valid_table <= valid_table_n; allocate_rdy <= allocate_rdy_n; dequeue_val <= dequeue_val_n; + post_alloc_val <= allocate_fire && allocate_pending; end if (allocate_fire) begin - addr_table[allocate_id] <= allocate_addr; + addr_table[allocate_id] <= allocate_addr; write_table[allocate_id] <= allocate_rw; end - if (finalize_valid && finalize_pending) begin - next_index[finalize_prev] <= finalize_id; + if (post_alloc_val) begin + next_index[post_alloc_previd] <= post_alloc_id; end dequeue_id_r <= dequeue_id_n; allocate_id_r <= allocate_id_n; next_table <= next_table_n; + post_alloc_id <= allocate_id; + post_alloc_previd <= prev_idx; end `RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid)) + `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid)) - `RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) + `RUNTIME_ASSERT((~release_valid || valid_table[release_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_table[release_id], BANK_ID), release_id, rel_req_uuid)) `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) VX_dp_ram #( - .DATAW (DATA_WIDTH), - .SIZE (MSHR_SIZE), - .LUTRAM (1) + .DATAW (DATA_WIDTH), + .SIZE (MSHR_SIZE), + .RADDR_REG (1) ) entries ( .clk (clk), .reset (reset), @@ -239,7 +229,7 @@ module VX_cache_mshr #( .wren (1'b1), .waddr (allocate_id_r), .wdata (allocate_data), - .raddr (dequeue_id_r), + .raddr (dequeue_id_n), .rdata (dequeue_data) ); @@ -247,18 +237,17 @@ module VX_cache_mshr #( assign allocate_ready = allocate_rdy; assign allocate_id = allocate_id_r; - assign allocate_prev = prev_idx; - - assign dequeue_valid = dequeue_val; - assign dequeue_addr = addr_table[dequeue_id_r]; - assign dequeue_rw = write_table[dequeue_id_r]; - assign dequeue_id = dequeue_id_r; - - // return pending entries for the given cache line - assign lookup_pending = addr_matches; - assign lookup_rw = write_table; + if (WRITEBACK) begin : g_pending_wb + assign allocate_pending = |addr_matches; + end else begin : g_pending_wt + // exclude write requests if writethrough + assign allocate_pending = |(addr_matches & ~write_table); + end - `UNUSED_VAR (lookup_valid) + assign dequeue_valid = dequeue_val; + assign dequeue_addr = addr_table[dequeue_id_r]; + assign dequeue_rw = write_table[dequeue_id_r]; + assign dequeue_id = dequeue_id_r; `ifdef DBG_TRACE_CACHE reg show_table; @@ -266,23 +255,18 @@ module VX_cache_mshr #( if (reset) begin show_table <= 0; end else begin - show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; + show_table <= allocate_fire || post_alloc_val || release_valid || fill_valid || dequeue_fire; end if (allocate_fire) begin - `TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) - end - if (lookup_valid) begin - `TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) + `TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid)) end - if (finalize_valid) begin - `TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) + if (release_valid) begin + `TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, release_id, rel_req_uuid)) end if (fill_valid) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) end if (dequeue_fire) begin `TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 354a57b0b3..678f7af76c 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -142,26 +142,26 @@ module VX_cache_tags #( wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; always @(posedge clk) begin if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) + `TRACE(3, ("%t: %s init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) end if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) end if (lookup) begin if (tag_matches != 0) begin if (write) begin - `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end end else begin if (write) begin - `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end end end diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index cf862aa06d..eb1f3d761a 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -51,9 +51,8 @@ module VX_fetch import VX_gpu_pkg::*; #( wire [`NUM_THREADS-1:0] rsp_tmask; VX_dp_ram #( - .DATAW (`PC_BITS + `NUM_THREADS), - .SIZE (`NUM_WARPS), - .LUTRAM (1) + .DATAW (`PC_BITS + `NUM_THREADS), + .SIZE (`NUM_WARPS) ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index ded232f300..04efd91d31 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -16,7 +16,6 @@ module VX_ipdom_stack #( parameter WIDTH = 1, parameter DEPTH = 1, - parameter OUT_REG = 0, parameter ADDRW = `LOG2UP(DEPTH) ) ( input wire clk, @@ -33,7 +32,7 @@ module VX_ipdom_stack #( ); reg slot_set [DEPTH-1:0]; - reg [ADDRW-1:0] rd_ptr, wr_ptr; + reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr; reg empty_r, full_r; @@ -41,35 +40,42 @@ module VX_ipdom_stack #( wire d_set_n = slot_set[rd_ptr]; + always @(*) begin + rd_ptr_n = rd_ptr; + if (push) begin + rd_ptr_n = wr_ptr; + end else if (pop) begin + rd_ptr_n = rd_ptr - ADDRW'(d_set_n); + end + end + always @(posedge clk) begin if (reset) begin - rd_ptr <= '0; wr_ptr <= '0; empty_r <= 1; full_r <= 0; + rd_ptr <= '0; end else begin `ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time)); `ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time)); `ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time)); if (push) begin - rd_ptr <= wr_ptr; wr_ptr <= wr_ptr + ADDRW'(1); empty_r <= 0; full_r <= (ADDRW'(DEPTH-1) == wr_ptr); end else if (pop) begin wr_ptr <= wr_ptr - ADDRW'(d_set_n); - rd_ptr <= rd_ptr - ADDRW'(d_set_n); empty_r <= (rd_ptr == 0) && (d_set_n == 1); full_r <= 0; end + rd_ptr <= rd_ptr_n; end end VX_dp_ram #( - .DATAW (WIDTH * 2), - .SIZE (DEPTH), - .OUT_REG (OUT_REG ? 1 : 0), - .LUTRAM (OUT_REG ? 0 : 1) + .DATAW (WIDTH * 2), + .SIZE (DEPTH), + .RADDR_REG (1) ) store ( .clk (clk), .reset (reset), @@ -78,7 +84,7 @@ module VX_ipdom_stack #( .wren (1'b1), .waddr (wr_ptr), .wdata ({q1, q0}), - .raddr (rd_ptr), + .raddr (rd_ptr_n), .rdata ({d1, d0}) ); @@ -94,7 +100,7 @@ module VX_ipdom_stack #( VX_pipe_register #( .DATAW (1), - .DEPTH (OUT_REG) + .DEPTH (0) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 7955437a68..c3f1f73f37 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -48,8 +48,7 @@ module VX_split_join import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), - .DEPTH (`DV_STACK_SIZE), - .OUT_REG (0) + .DEPTH (`DV_STACK_SIZE) ) ipdom_stack ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index c5a4bf32e3..03521ce1a9 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -20,7 +20,7 @@ module VX_fifo_queue #( parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, - parameter LUTRAM = 1, + parameter LUTRAM = 0, parameter SIZEW = `CLOG2(DEPTH+1) ) ( input wire clk, @@ -80,30 +80,38 @@ module VX_fifo_queue #( reg [DATAW-1:0] dout_r; reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r; + reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n; + + always @(*) begin + rd_ptr_n_n = rd_ptr_r; + if (pop) begin + if (DEPTH > 2) begin + rd_ptr_n_n = rd_ptr_r + ADDRW'(2); + end else begin // (DEPTH == 2); + rd_ptr_n_n = ~rd_ptr_n_r; + end + end + end always @(posedge clk) begin if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - rd_ptr_n_r <= 1; + wr_ptr_r <= '0; + rd_ptr_r <= '0; + rd_ptr_n_r <= '0; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); if (pop) begin rd_ptr_r <= rd_ptr_n_r; - if (DEPTH > 2) begin - rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); - end else begin // (DEPTH == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; - end end + rd_ptr_n_r <= rd_ptr_n_n; end end VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM) + .LUTRAM (LUTRAM), + .RADDR_REG (1) ) dp_ram ( .clk (clk), .reset (reset), @@ -112,7 +120,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n_r), + .raddr (rd_ptr_n_n), .rdata (dout) ); @@ -130,23 +138,28 @@ module VX_fifo_queue #( end else begin : g_no_out_reg - reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; reg [ADDRW-1:0] wr_ptr_r; + always @(*) begin + rd_ptr_n = rd_ptr_r + ADDRW'(pop); + end + always @(posedge clk) begin if (reset) begin - rd_ptr_r <= '0; wr_ptr_r <= '0; + rd_ptr_r <= '0; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_r + ADDRW'(pop); + rd_ptr_r <= rd_ptr_n; end end VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM) + .LUTRAM (LUTRAM), + .RADDR_REG (1) ) dp_ram ( .clk (clk), .reset (reset), @@ -155,7 +168,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_r), + .raddr (rd_ptr_n), .rdata (data_out) ); diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 4e84398188..61875b7fb6 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -17,7 +17,7 @@ module VX_index_buffer #( parameter DATAW = 1, parameter SIZE = 1, - parameter LUTRAM = 1, + parameter LUTRAM = 0, parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, From d3df61abb06eef0f89bf4113ff9e9909dae3ffc7 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Wed, 9 Oct 2024 12:32:49 -0400 Subject: [PATCH 271/407] add initial development and production dockerfiles --- Dockerfile.dev | 20 +++++++++++ .../{Dockerfile.ubuntu => Dockerfile.prod} | 33 +++++++++++++------ 2 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 Dockerfile.dev rename miscs/docker/{Dockerfile.ubuntu => Dockerfile.prod} (61%) diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 0000000000..22cd741556 --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,20 @@ +FROM ubuntu:20.04 + +LABEL "Udit Subramanya"="usubramanya3@gatech.edu" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y build-essential valgrind git wget libpng-dev libboost-all-dev uuid-dev ccache cmake + +# Third-Party Repository to Install g++11 on Ubuntu 18.04 +RUN apt-get install -y manpages-dev software-properties-common +RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test + +RUN apt-get install -y gcc-11 g++-11 + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11 +RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11 + +# create a directory for mounting the volume +WORKDIR /root/vortex \ No newline at end of file diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.prod similarity index 61% rename from miscs/docker/Dockerfile.ubuntu rename to miscs/docker/Dockerfile.prod index 64bb5813de..e1a8d94b57 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.prod @@ -17,29 +17,42 @@ FROM ubuntu:20.04 # Set non-interactive installation to avoid user input during build ARG DEBIAN_FRONTEND=noninteractive -# Update and install necessary dependencies -RUN apt-get update && apt-get install -y \ +# Install necessary dependencies and upgrade installed components +RUN apt-get update -y && \ + apt-get install -y \ software-properties-common \ build-essential \ python3 \ git \ wget \ curl \ - ca-certificates && \ + ca-certificates \ + valgrind \ + libstdc++6 \ + binutils \ + uuid-dev \ + ccache \ + cmake && \ + apt-get upgrade -y && \ + gcc_version=$(gcc -dumpversion) && \ + if dpkg --compare-versions "$gcc_version" lt 11; then \ + echo "GCC version is less than 11. Installing GCC 11..." && \ + add-apt-repository -y ppa:ubuntu-toolchain-r/test && \ + apt-get update -y && \ + apt-get install -y g++-11 gcc-11 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100; \ + else \ + echo "GCC version is 11 or greater. No need to install GCC 11."; \ + fi && \ rm -rf /var/lib/apt/lists/* -# upgrade installed components -RUN apt-get upgrade && apt-get update - # Clone the Vortex repository RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /vortex # Set the initial working directory WORKDIR /vortex -# install system dependencies -RUN ./ci/install_dependencies.sh - # Configure the build folder RUN mkdir build && cd build && ../configure @@ -50,4 +63,4 @@ RUN cd build && ./ci/toolchain_install.sh --all RUN echo "source /vortex/build/ci/toolchain_env.sh" >> ~/.bashrc # Set the working directory to /vortex/build -WORKDIR /vortex/build \ No newline at end of file +WORKDIR /vortex/build From 8155173aab19b723e7fd8a612881ff5ec871e5fe Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 11 Oct 2024 07:40:21 -0700 Subject: [PATCH 272/407] add documentation based on intial feedback --- README.md | 5 ++++- docs/contributing.md | 6 +++--- docs/testing.md | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 553939b501..83a81a4218 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backend drivers*, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. +## Website +Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) + ## Specifications - Support RISC-V RV32IMAF and RV64IMAFD @@ -30,7 +33,7 @@ Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backe - `miscs`: Miscellaneous resources. ## Quick Start -The following steps demonstrate how to run Vortex with the default driver: simx. If you are interested in a different backend, look [here](docs/simulation.md). +If you are interested in a stable release of Vortex, you can download the latest release [here](https://github.com/vortexgpgpu/vortex/releases/latest). Otherwise, you can pull the most recent, but (potentially) unstable version as shown below. The following steps demonstrate how to build and run Vortex with the default driver: SimX. If you are interested in a different backend, look [here](docs/simulation.md). ### Supported OS Platforms - Ubuntu 18.04, 20.04 diff --git a/docs/contributing.md b/docs/contributing.md index f10f4017bf..0250e9f9f1 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -14,8 +14,8 @@ In an effort to keep `vortex` organized, permissions to directly create branches However, contributions are strongly encouraged and keep the project moving forward! Here is the procedure for contributing: 1. Create a fork of `vortex` -2. In your fork, create a branch that briefly explains the work you are adding (ie: `develop-documentation`) branches from `develop` and adds some documentation -3. Make your changes on your new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations +2. In your fork, create a branch from `master` that briefly explains the work you are adding (ie: `develop-documentation`) +3. Make your changes on the new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations 4. Since you are the owner of your fork, you have full permissions to push commits to your fork 4. When you are satisfied with the changes on your fork, you can open a PR from your fork using the online interface 5. If you recently made a push, you will get automatically get a prompt on Github online to create a PR, which you can press @@ -32,6 +32,6 @@ However, contributions are strongly encouraged and keep the project moving forwa 15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR ## What Makes a Good Contribution? -- If you are contributing code changes, then review `testing.md` to ensure your tests are integrated into the CI pipeline +- If you are contributing code changes, then review [testing.md](./testing.md) to ensure your tests are integrated into the [CI pipeline](continuous_integration.md) - During a PR, you should consider the advice you are provided by your reviewers. Remember you keep adding commits to an open PR! - If your change aims to fix an issue opened on Github, please tag that issue in the PR itself \ No newline at end of file diff --git a/docs/testing.md b/docs/testing.md index b2ae8fb2c1..0ec46bda93 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -49,4 +49,4 @@ Compile your test: `$ make -C tests/regression/` Run your test: `$ ./ci/blackbox.sh --driver=simx --app= --debug` ## Adding Your Tests to the CI Pipeline -See `continuous_integration.md` \ No newline at end of file +If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md). \ No newline at end of file From 28bf27e951b7343087368b295f9e8b9b429217ae Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 03:40:45 -0700 Subject: [PATCH 273/407] rtl cache redesign to support xilinx bram types --- ci/regression.sh.in | 1 + hw/rtl/VX_cluster.sv | 2 +- hw/rtl/VX_config.vh | 15 ++ hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- hw/rtl/cache/VX_bank_flush.sv | 38 ++-- hw/rtl/cache/VX_cache_bank.sv | 378 ++++++++++++++++++++-------------- hw/rtl/cache/VX_cache_data.sv | 181 ++++++++-------- hw/rtl/cache/VX_cache_mshr.sv | 67 +++--- hw/rtl/cache/VX_cache_tags.sv | 123 ++++------- 10 files changed, 422 insertions(+), 387 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 443b34f5ac..ddd4f12bd6 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -155,6 +155,7 @@ cache() # test writeback CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index b5e9e0a5c4..366d1bbac4 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -98,7 +98,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (L2_TAG_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`L2_WRITEBACK), - .DIRTY_BYTES (`L2_WRITEBACK), + .DIRTY_BYTES (`L2_DIRTYBYTES), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index fb47566336..4f666ce203 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -542,6 +542,11 @@ `define DCACHE_WRITEBACK 0 `endif +// Enable Cache Dirty bytes +`ifndef DCACHE_DIRTYBYTES +`define DCACHE_DIRTYBYTES 0 +`endif + // LMEM Configurable Knobs //////////////////////////////////////////////////// `ifndef LMEM_DISABLE @@ -602,6 +607,11 @@ `define L2_WRITEBACK 0 `endif +// Enable Cache Dirty bytes +`ifndef L2_DIRTYBYTES +`define L2_DIRTYBYTES 0 +`endif + // L3cache Configurable Knobs ///////////////////////////////////////////////// // Cache Size @@ -644,6 +654,11 @@ `define L3_WRITEBACK 0 `endif +// Enable Cache Dirty bytes +`ifndef L3_DIRTYBYTES +`define L3_DIRTYBYTES 0 +`endif + `ifndef MEMORY_BANKS `define MEMORY_BANKS 2 `endif diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 9c7fe12870..4ce547c7e4 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -150,7 +150,7 @@ module VX_socket import VX_gpu_pkg::*; #( .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`DCACHE_WRITEBACK), - .DIRTY_BYTES (`DCACHE_WRITEBACK), + .DIRTY_BYTES (`DCACHE_DIRTYBYTES), .NC_ENABLE (1), .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 4f9f495cef..40f95a81aa 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -84,7 +84,7 @@ module Vortex import VX_gpu_pkg::*; ( .TAG_WIDTH (L2_MEM_TAG_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`L3_WRITEBACK), - .DIRTY_BYTES (`L3_WRITEBACK), + .DIRTY_BYTES (`L3_DIRTYBYTES), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 3228bd3a5b..ca28d749bf 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -48,20 +48,20 @@ module VX_bank_flush #( localparam STATE_WAIT2 = 4; localparam STATE_DONE = 5; - reg [2:0] state_r, state_n; + reg [2:0] state, state_n; - reg [CTR_WIDTH-1:0] counter_r; + reg [CTR_WIDTH-1:0] counter; always @(*) begin - state_n = state_r; - case (state_r) + state_n = state; + case (state) STATE_IDLE: begin if (flush_begin) begin state_n = STATE_WAIT1; end end STATE_INIT: begin - if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin + if (counter == ((2 ** `CS_LINE_SEL_BITS)-1)) begin state_n = STATE_IDLE; end end @@ -72,7 +72,7 @@ module VX_bank_flush #( end end STATE_FLUSH: begin - if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin + if (counter == ((2 ** CTR_WIDTH)-1) && flush_ready) begin state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2; end end @@ -93,32 +93,32 @@ module VX_bank_flush #( always @(posedge clk) begin if (reset) begin - state_r <= STATE_INIT; - counter_r <= '0; + state <= STATE_INIT; + counter <= '0; end else begin - state_r <= state_n; - if (state_r != STATE_IDLE) begin - if ((state_r == STATE_INIT) - || ((state_r == STATE_FLUSH) && flush_ready)) begin - counter_r <= counter_r + CTR_WIDTH'(1); + state <= state_n; + if (state != STATE_IDLE) begin + if ((state == STATE_INIT) + || ((state == STATE_FLUSH) && flush_ready)) begin + counter <= counter + CTR_WIDTH'(1); end end else begin - counter_r <= '0; + counter <= '0; end end end - assign flush_end = (state_r == STATE_DONE); - assign flush_init = (state_r == STATE_INIT); - assign flush_valid = (state_r == STATE_FLUSH); - assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; + assign flush_end = (state == STATE_DONE); + assign flush_init = (state == STATE_INIT); + assign flush_valid = (state == STATE_FLUSH); + assign flush_line = counter[`CS_LINE_SEL_BITS-1:0]; if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way VX_decoder #( .N (`CS_WAY_SEL_BITS), .D (NUM_WAYS) ) ctr_decoder ( - .sel_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), .data_in (1'b1), .data_out (flush_way) ); diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 6a7fcaf52e..d32e9423f8 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -141,13 +141,18 @@ module VX_cache_bank #( wire [MSHR_ADDR_WIDTH-1:0] replay_id; wire replay_ready; - wire is_init_st0, is_init_st1; + + wire valid_sel, valid_st0, valid_st1; + wire is_init_st0; + wire is_creq_st0, is_creq_st1; + wire is_fill_st0, is_fill_st1; wire is_flush_st0, is_flush_st1; wire [NUM_WAYS-1:0] flush_way_st0; + wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; - wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; + wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1; wire rw_sel, rw_st0, rw_st1; wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; @@ -158,16 +163,10 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; - wire valid_sel, valid_st0, valid_st1; - wire is_creq_st0, is_creq_st1; - wire is_fill_st0, is_fill_st1; wire is_replay_st0, is_replay_st1; wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; - wire evict_dirty_st0, evict_dirty_st1; - wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1; - wire [NUM_WAYS-1:0] tag_matches_st0; - wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; wire mshr_pending_st0, mshr_pending_st1; + wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_id_st0, mshr_prev_id_st1; wire mshr_empty; wire flush_valid; @@ -201,11 +200,9 @@ module VX_cache_bank #( .bank_empty (no_pending_req) ); - wire rdw_hazard1_sel; - wire rdw_hazard2_sel; - reg rdw_hazard3_st1; + logic rdw_hazard, post_hazard; - wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1; + wire pipe_stall = crsp_queue_stall || rdw_hazard; // inputs arbitration: // mshr replay has highest priority to maximize utilization since there is no miss. @@ -224,17 +221,14 @@ module VX_cache_bank #( wire creq_enable = creq_grant && core_req_valid; assign replay_ready = replay_grant - && ~rdw_hazard1_sel && ~pipe_stall; assign mem_rsp_ready = fill_grant && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions - && ~rdw_hazard2_sel && ~pipe_stall; assign flush_ready = flush_grant && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions - && ~rdw_hazard2_sel && ~pipe_stall; assign core_req_ready = creq_grant @@ -298,6 +292,12 @@ module VX_cache_bank #( assign req_uuid_sel = '0; end + wire is_init_sel = init_valid; + wire is_creq_sel = creq_enable || replay_enable; + wire is_fill_sel = fill_enable; + wire is_flush_sel = flush_enable; + wire is_replay_sel = replay_enable; + VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) @@ -305,8 +305,8 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}), - .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0}) + .data_in ({valid_sel, is_init_sel, is_fill_sel, is_flush_sel, is_creq_sel, is_replay_sel, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}), + .data_out ({valid_st0, is_init_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st0 @@ -315,82 +315,67 @@ module VX_cache_bank #( assign req_uuid_st0 = '0; end - wire do_init_st0 = valid_st0 && is_init_st0; - wire do_flush_st0 = valid_st0 && is_flush_st0; - wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0; - wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0; - wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0; - wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0; - wire do_fill_st0 = valid_st0 && is_fill_st0; - wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0; - wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0; - wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0; + wire is_read_st0 = is_creq_st0 && ~rw_st0; + wire is_write_st0 = is_creq_st0 && rw_st0; + + wire do_init_st0 = valid_st0 && is_init_st0; + wire do_flush_st0 = valid_st0 && is_flush_st0; + wire do_read_st0 = valid_st0 && is_read_st0; + wire do_write_st0 = valid_st0 && is_write_st0; + wire do_fill_st0 = valid_st0 && is_fill_st0; assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; - assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); - wire [NUM_WAYS-1:0] evict_way_st0; - wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0; + wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; + wire [NUM_WAYS-1:0] tag_matches_st1; + + wire do_lookup_st0 = do_read_st0 || do_write_st0; VX_cache_tags #( - .INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)), - .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), - .WRITEBACK (WRITEBACK), - .UUID_WIDTH (UUID_WIDTH) + .WRITEBACK (WRITEBACK) ) cache_tags ( .clk (clk), .reset (reset), - - .req_uuid (req_uuid_st0), - - // init/flush/fill/write/lookup + .stall (pipe_stall), + // inputs .init (do_init_st0), .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), - .write (do_cache_wr_st0 && ~pipe_stall), .lookup (do_lookup_st0 && ~pipe_stall), .line_addr (addr_st0), - .way_idx (flush_way_st0), - - // tag matches - .tag_matches(tag_matches_st0), - - // replacement - .evict_dirty(evict_dirty_st0), + .flush_way (flush_way_st0), + // outputs + .tag_matches_r(tag_matches_st1), + .line_tag_r (line_tag_st1), + .evict_tag_r(evict_tag_st1), .evict_way (evict_way_st0), - .evict_tag (evict_tag_st0) + .evict_way_r(evict_way_st1) ); - wire [`CS_TAG_SEL_BITS-1:0] line_tag2_st0; - wire is_flush2_st0 = WRITEBACK && is_flush_st0; - - assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0; - - assign way_idx_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0; - - assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0; + wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; + assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_id_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_id_st1, mshr_pending_st1}) ); // we have a tag hit - wire is_hit_st1 = (| way_idx_st1); + wire is_hit_st1 = (| tag_matches_st1); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; @@ -398,51 +383,71 @@ module VX_cache_bank #( assign req_uuid_st1 = '0; end - wire is_read_st1 = is_creq_st1 && ~rw_st1; - wire is_write_st1 = is_creq_st1 && rw_st1; - - wire do_init_st1 = valid_st1 && is_init_st1; - wire do_fill_st1 = valid_st1 && is_fill_st1; - wire do_flush_st1 = valid_st1 && is_flush_st1; + wire is_read_st1 = is_creq_st1 && ~rw_st1; + wire is_write_st1 = is_creq_st1 && rw_st1; - wire do_creq_rd_st1 = valid_st1 && is_read_st1; - wire do_creq_wr_st1 = valid_st1 && is_write_st1; - wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1; - wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1; - - wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1; - wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1; - - wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1; - wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1; - - wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1; - wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1; + wire do_read_st1 = valid_st1 && is_read_st1; + wire do_write_st1 = valid_st1 && is_write_st1; + wire do_fill_st1 = valid_st1 && is_fill_st1; + wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) - // both tag and data stores use BRAM with no read-during-write protection. - // we ned to stall the pipeline to prevent read-after-write hazards. - assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill - assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write - always @(posedge clk) begin - // stall reads following writes to same line address - rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_idx_st0 == line_idx_st1) - && ~rdw_hazard3_st1; // release pipeline stall + if (WRITE_ENABLE) begin : g_rdw_hazard + // This implementation uses single-port BRAMs for the tags and data stores. + // Using different stages for read and write operations requires a pipeline stall in between due to address port sharing. + // Tags fill/flush can perform read and write in the same stage, since no dependency between. + // Data fill/flush can perform read and write in the same stage, since way_idx is available in st0. + // A data read should happen in st0 for its result to be available in st1. + // A data write should happen in st1 when the tag hit status is available. + wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; + wire is_read_sel = is_creq_sel && !rw_sel; + wire is_write_sel = is_creq_sel && rw_sel; + wire is_same_read_sel = is_read_sel && (line_idx_sel == line_idx_st0); + always @(posedge clk) begin + if (reset) begin + post_hazard <= 0; + rdw_hazard <= 0; + end else begin + if (!crsp_queue_stall) begin + post_hazard <= rdw_hazard; + rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || is_same_read_sel || (is_flush_sel && !WRITEBACK)); + end + end + end + end else begin : g_rdw_hazard_ro + assign rdw_hazard = 0; + assign post_hazard = 0; end assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; - wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1; - - wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; - wire [LINE_SIZE-1:0] dirty_byteen_st1; + `UNUSED_VAR (data_st1) + + wire [`CS_LINE_WIDTH-1:0] evict_data_st1; + wire [LINE_SIZE-1:0] evict_byteen_st1; + wire line_dirty_st1; + + wire data_write; + wire [`CS_LINE_SEL_BITS-1:0] data_line_idx; + + if (WRITE_ENABLE) begin : g_data_ctrl + // by default all data accesses happen in sto and use line_idx_st0. + // data writes should happen in st1 when the tag hit is available, + // and use line_idx_st1 to ensure the correct line is updated. + // if a rdw hazard is active due to conflict, ensure we don't write twice. + assign data_write = do_write_st1 && !post_hazard && ~crsp_queue_stall; + assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0; + end else begin : g_data_ctrl_ro + `UNUSED_VAR (post_hazard) + `UNUSED_VAR (do_write_st1) + assign data_write = 0; + assign data_line_idx = line_idx_st0; + end VX_cache_data #( - .INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)), - .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -450,50 +455,58 @@ module VX_cache_bank #( .WORD_SIZE (WORD_SIZE), .WRITE_ENABLE (WRITE_ENABLE), .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), - .UUID_WIDTH (UUID_WIDTH) + .DIRTY_BYTES (DIRTY_BYTES) ) cache_data ( .clk (clk), .reset (reset), - .req_uuid (req_uuid_st1), - .init (do_init_st1), - .fill (do_fill_st1 && ~pipe_stall), - .flush (do_flush_st1 && ~pipe_stall), - .write (do_cache_wr_st1 && ~pipe_stall), - .read (do_cache_rd_st1 && ~pipe_stall), - .way_idx (way_idx_st1), - .line_addr (addr_st1), - .word_idx (word_idx_st1), - .fill_data (fill_data_st1), + .stall (pipe_stall), + // inputs + .init (do_init_st0), + .fill (do_fill_st0 && ~pipe_stall), + .flush (do_flush_st0 && ~pipe_stall), + .read (do_read_st0 && ~pipe_stall), + .write (data_write), + .evict_way (evict_way_st0), + .tag_matches(tag_matches_st1), + .line_idx (data_line_idx), + .fill_data (data_st0), .write_data (write_data_st1), + .word_idx (word_idx_st1), .write_byteen(byteen_st1), + // outputs .read_data (read_data_st1), - .dirty_data (dirty_data_st1), - .dirty_byteen(dirty_byteen_st1) + .line_dirty (line_dirty_st1), + .evict_data (evict_data_st1), + .evict_byteen(evict_byteen_st1) ); - wire mshr_allocate_st0 = valid_st0 && is_creq_st0; + wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~is_replay_st0; + wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~is_replay_st1; // release allocated mshr entry if we had a hit wire mshr_release_st1; if (WRITEBACK) begin : g_mshr_release - assign mshr_release_st1 = valid_st1 && is_creq_st1 && is_hit_st1; + assign mshr_release_st1 = is_hit_st1; end else begin : g_mshr_release_ro - // we need to keep missed write requests in MSHR if there is already a pending entry to the same address - // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content - // this can happen when writes are sent late, when the fill was already in flight. - assign mshr_release_st1 = valid_st1 && is_creq_st1 && (is_hit_st1 || (rw_st1 && ~mshr_pending_st1)); + // we need to keep missed write requests in MSHR if there is already a pending entry to the same address. + // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content. + // this can happen when writes are sent to memory late, when a related fill was already in flight. + assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); end - wire mshr_dequeue = mshr_release_st1 && ~pipe_stall; + wire mshr_release_fire = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall; + + wire [1:0] mshr_dequeue; + `POP_COUNT(mshr_dequeue, {replay_fire, mshr_release_fire}); VX_pending_size #( - .SIZE (MSHR_SIZE) + .SIZE (MSHR_SIZE), + .DECRW (2) ) mshr_pending_size ( .clk (clk), .reset (reset), .incr (core_req_fire), - .decr (replay_fire || mshr_dequeue), + .decr (mshr_dequeue), .empty (mshr_empty), `UNUSED_PIN (alm_empty), .full (mshr_alm_full), @@ -508,7 +521,6 @@ module VX_cache_bank #( .NUM_BANKS (NUM_BANKS), .MSHR_SIZE (MSHR_SIZE), .WRITEBACK (WRITEBACK), - .RDW_STALL (1), .UUID_WIDTH (UUID_WIDTH), .DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH) ) cache_mshr ( @@ -517,7 +529,7 @@ module VX_cache_bank #( .deq_req_uuid (req_uuid_sel), .alc_req_uuid (req_uuid_st0), - .rel_req_uuid (req_uuid_st1), + .fin_req_uuid (req_uuid_st1), // memory fill .fill_valid (mem_rsp_fire), @@ -539,11 +551,15 @@ module VX_cache_bank #( .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_pending(mshr_pending_st0), + .allocate_previd(mshr_prev_id_st0), `UNUSED_PIN (allocate_ready), - // release - .release_valid (mshr_release_st1 && ~pipe_stall), - .release_id (mshr_id_st1) + // finalize + .finalize_valid (mshr_finalize_st1 && ~pipe_stall), + .finalize_is_release(mshr_release_st1), + .finalize_is_pending(mshr_pending_st1), + .finalize_id (mshr_id_st1), + .finalize_previd(mshr_prev_id_st1) ); // schedule core response @@ -553,7 +569,7 @@ module VX_cache_bank #( wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx; wire [TAG_WIDTH-1:0] crsp_queue_tag; - assign crsp_queue_valid = do_cache_rd_st1; + assign crsp_queue_valid = do_read_st1 && is_hit_st1; assign crsp_queue_idx = req_idx_st1; assign crsp_queue_data = read_data_st1; assign crsp_queue_tag = tag_st1; @@ -565,7 +581,7 @@ module VX_cache_bank #( ) core_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (crsp_queue_valid && ~rdw_hazard3_st1), + .valid_in (crsp_queue_valid && ~rdw_hazard), .ready_in (crsp_queue_ready), .data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}), .data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}), @@ -585,37 +601,26 @@ module VX_cache_bank #( wire mreq_queue_rw; wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags; - wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1; + wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK); wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; - wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; - - if (WRITEBACK) begin : g_mreq_queue_push - if (DIRTY_BYTES) begin : g_dirty_bytes - // ensure dirty bytes match the tag info - wire has_dirty_bytes = (| dirty_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) - end - assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) - || do_writeback_st1) - && ~rdw_hazard3_st1; - end else begin : g_mreq_queue_push_ro - `UNUSED_VAR (do_write_miss_st1) - `UNUSED_VAR (do_writeback_st1) - assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) - || do_creq_wr_st1) - && ~rdw_hazard3_st1; - end - - assign mreq_queue_pop = mem_req_valid && mem_req_ready; - assign mreq_queue_addr = addr_st1; - assign mreq_queue_flags = flags_st1; + wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1; + wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1}; if (WRITE_ENABLE) begin : g_mreq_queue - if (WRITEBACK) begin : g_writeback + if (WRITEBACK) begin : g_wb + if (DIRTY_BYTES) begin : g_dirty_bytes + // ensure dirty bytes match the tag info + wire has_dirty_bytes = (| evict_byteen_st1); + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) + end + assign mreq_queue_push = (((do_read_st1 || do_write_st1) && ~is_hit_st1 && ~mshr_pending_st1) + || do_writeback_st1) + && ~pipe_stall; + assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1; assign mreq_queue_rw = is_fill_or_flush_st1; - assign mreq_queue_data = dirty_data_st1; - assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; - end else begin : g_writethrough + assign mreq_queue_data = evict_data_st1; + assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1; + end else begin : g_wt wire [LINE_SIZE-1:0] line_byteen; VX_decoder #( .N (`CS_WORD_SEL_BITS), @@ -625,19 +630,30 @@ module VX_cache_bank #( .data_in (byteen_st1), .data_out (line_byteen) ); + assign mreq_queue_push = ((do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) + || do_write_st1) + && ~pipe_stall; + assign mreq_queue_addr = addr_st1; assign mreq_queue_rw = rw_st1; assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}}; assign mreq_queue_byteen = rw_st1 ? line_byteen : '1; `UNUSED_VAR (is_fill_or_flush_st1) - `UNUSED_VAR (dirty_data_st1) - `UNUSED_VAR (dirty_byteen_st1) + `UNUSED_VAR (do_writeback_st1) + `UNUSED_VAR (evict_addr_st1) + `UNUSED_VAR (evict_data_st1) + `UNUSED_VAR (evict_byteen_st1) end end else begin : g_mreq_queue_ro + assign mreq_queue_push = (do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) + && ~pipe_stall; + assign mreq_queue_addr = addr_st1; assign mreq_queue_rw = 0; assign mreq_queue_data = '0; assign mreq_queue_byteen = '1; - `UNUSED_VAR (dirty_data_st1) - `UNUSED_VAR (dirty_byteen_st1) + `UNUSED_VAR (do_writeback_st1) + `UNUSED_VAR (evict_addr_st1) + `UNUSED_VAR (evict_data_st1) + `UNUSED_VAR (evict_byteen_st1) end if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid @@ -646,6 +662,9 @@ module VX_cache_bank #( assign mreq_queue_tag = mshr_id_st1; end + assign mreq_queue_pop = mem_req_valid && mem_req_ready; + assign mreq_queue_flags = flags_st1; + VX_fifo_queue #( .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .DEPTH (MREQ_SIZE), @@ -667,6 +686,10 @@ module VX_cache_bank #( assign mem_req_valid = ~mreq_queue_empty; + `UNUSED_VAR (do_fill_st1) + `UNUSED_VAR (do_flush_st1) + `UNUSED_VAR (evict_way_st1) + /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE @@ -681,7 +704,7 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) + `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) end if (mem_rsp_fire) begin `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) @@ -696,13 +719,54 @@ module VX_cache_bank #( `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end end + if (do_init_st0) begin + `TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0)) + end + if (do_fill_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + end + if (do_flush_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + end + if (do_read_st1 && ~pipe_stall) begin + if (is_hit_st1) begin + `TRACE(3, ("%t: %s tags-rd-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end else begin + `TRACE(3, ("%t: %s tags-rd-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) + end + end + if (do_write_st1 && ~pipe_stall) begin + if (is_hit_st1) begin + `TRACE(3, ("%t: %s tags-wr-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end else begin + `TRACE(3, ("%t: %s tags-wr-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) + end + end + if (do_fill_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) + end + if (do_flush_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + end + if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) + end + if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) + end if (crsp_queue_fire) begin `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin - if (do_creq_wr_st1 && !WRITEBACK) begin + if (!WRITEBACK && do_write_st1) begin `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) - end else if (do_writeback_st1) begin + end else if (WRITEBACK && do_writeback_st1) begin `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else begin `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 54a78e3570..278caccd5f 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -14,8 +14,6 @@ `include "VX_cache_define.vh" module VX_cache_data #( - parameter `STRING INSTANCE_ID= "", - parameter BANK_ID = 0, // Size of cache in bytes parameter CACHE_SIZE = 1024, // Size of line inside a bank in bytes @@ -31,94 +29,105 @@ module VX_cache_data #( // Enable cache writeback parameter WRITEBACK = 0, // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, - // Request debug identifier - parameter UUID_WIDTH = 0 + parameter DIRTY_BYTES = 0 ) ( input wire clk, input wire reset, - -`IGNORE_UNUSED_BEGIN - input wire[`UP(UUID_WIDTH)-1:0] req_uuid, -`IGNORE_UNUSED_END - + input wire stall, + // inputs input wire init, input wire fill, input wire flush, - input wire write, input wire read, - input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, + input wire write, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx, + input wire [NUM_WAYS-1:0] evict_way, + input wire [NUM_WAYS-1:0] tag_matches, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, input wire [`CS_WORD_WIDTH-1:0] write_data, input wire [WORD_SIZE-1:0] write_byteen, - input wire [NUM_WAYS-1:0] way_idx, + input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, + // outputs output wire [`CS_WORD_WIDTH-1:0] read_data, - output wire [`CS_LINE_WIDTH-1:0] dirty_data, - output wire [LINE_SIZE-1:0] dirty_byteen + output wire line_dirty, + output wire [`CS_LINE_WIDTH-1:0] evict_data, + output wire [LINE_SIZE-1:0] evict_byteen ); - `UNUSED_SPARAM (INSTANCE_ID) - `UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (WORD_SIZE) - `UNUSED_VAR (line_addr) - `UNUSED_VAR (init) - `UNUSED_VAR (read) - `UNUSED_VAR (flush) + `UNUSED_VAR (stall) localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - wire [`LOG2UP(NUM_WAYS)-1:0] way_idx_bin; - wire [`CS_LINE_SEL_BITS-1:0] line_idx; - assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; + if (WRITEBACK != 0) begin : g_writeback + localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0); + wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r; - VX_onehot_encoder #( - .N (NUM_WAYS) - ) way_idx_enc ( - .data_in (way_idx), - .data_out (way_idx_bin), - `UNUSED_PIN (valid_out) - ); - - if (WRITEBACK) begin : g_dirty_data - assign dirty_data = line_rdata[way_idx_bin]; - end else begin : g_dirty_data_0 - assign dirty_data = '0; - end - - if (DIRTY_BYTES) begin : g_dirty_byteen - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_rdata; - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_wdata; + VX_onehot_encoder #( + .N (NUM_WAYS) + ) fill_way_enc ( + .data_in (evict_way), + .data_out (evict_way_idx), + `UNUSED_PIN (valid_out) + ); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j - wire [WORD_SIZE-1:0] word_mask = {WORD_SIZE{(WORD_SIZE == 1) || (word_idx == j)}}; - wire [WORD_SIZE-1:0] wdata = write ? (bs_rdata[i][j] | (write_byteen & word_mask)) : ((fill || flush) ? '0 : bs_rdata[i][j]); - assign bs_wdata[i][j] = init ? '0 : (way_idx[i] ? wdata : bs_rdata[i][j]); + `BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1); + + wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata; + wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata; + wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; + + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata + wire dirty_data = write; // only asserted on writes + wire dirty_wren = init || (write ? tag_matches[i] : evict_way[i]); + + if (DIRTY_BYTES != 0) begin : g_dirty_bytes + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_data; + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_wren; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j + wire word_sel = tag_matches[i] && ((WORD_SIZE == 1) || (word_idx == j)); + wire [WORD_SIZE-1:0] word_en = write_byteen & {WORD_SIZE{word_sel}}; + assign bytes_data[j] = {WORD_SIZE{write}}; // only asserted on writes + assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? word_en : {WORD_SIZE{evict_way[i]}}); + end + assign byteen_wdata[i] = {dirty_data, bytes_data}; + assign byteen_wren[i] = {dirty_wren, bytes_wren}; + assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; + end else begin : g_no_dirty_bytes + assign byteen_wdata[i] = dirty_data; + assign byteen_wren[i] = dirty_wren; + assign line_dirty = byteen_rdata[evict_way_idx_r]; + assign evict_byteen = '1; end end - wire bs_read = write || fill || flush; - wire bs_write = init || write || fill || flush; + wire byteen_read = fill || flush; + wire byteen_write = init || write || fill || flush; VX_sp_ram #( - .DATAW (LINE_SIZE * NUM_WAYS), - .SIZE (`CS_LINES_PER_BANK) + .DATAW (BYTEEN_DATAW * NUM_WAYS), + .WRENW (BYTEEN_DATAW * NUM_WAYS), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (1) ) byteen_store ( .clk (clk), .reset (reset), - .read (bs_read), - .write (bs_write), - .wren (1'b1), + .read (byteen_read), + .write (byteen_write), + .wren (byteen_wren), .addr (line_idx), - .wdata (bs_wdata), - .rdata (bs_rdata) + .wdata (byteen_wdata), + .rdata (byteen_rdata) ); - assign dirty_byteen = bs_rdata[way_idx_bin]; - end else begin : g_dirty_byteen_0 - assign dirty_byteen = '1; + assign evict_data = line_rdata[evict_way_idx_r]; + + end else begin : g_no_writeback + `UNUSED_VAR (init) + assign line_dirty = 0; + assign evict_data = '0; + assign evict_byteen = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store @@ -128,29 +137,26 @@ module VX_cache_data #( wire line_write; wire line_read; - wire way_en = (NUM_WAYS == 1) || way_idx[i]; - if (WRITE_ENABLE != 0) begin : g_line_data wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j wire word_en = (WORD_SIZE == 1) || (word_idx == j); - assign line_wdata[j] = fill ? fill_data[j] : write_data; - assign wren_w[j] = {WORD_SIZE{fill}} | (write_byteen & {WORD_SIZE{word_en}}); + assign line_wdata[j] = write ? write_data : fill_data[j]; + assign wren_w[j] = write ? (write_byteen & {WORD_SIZE{word_en}}) : {WORD_SIZE{1'b1}}; end assign line_wren = wren_w; - assign line_write = (fill || write) && way_en; - if (WRITEBACK) begin : g_line_read_wb - assign line_read = (read || fill || flush); - end else begin : g_line_read_wt - assign line_read = read; - end + assign line_write = (fill && ((NUM_WAYS == 1) || evict_way[i])) + || (write && tag_matches[i]); + assign line_read = read || ((fill || flush) && WRITEBACK); end else begin : g_line_data_ro `UNUSED_VAR (write) + `UNUSED_VAR (flush) `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_data) + `UNUSED_VAR (word_idx) assign line_wdata = fill_data; assign line_wren = 1'b1; - assign line_write = fill && way_en; + assign line_write = fill && ((NUM_WAYS == 1) || evict_way[i]); assign line_read = read; end @@ -158,8 +164,7 @@ module VX_cache_data #( .DATAW (`CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), .WRENW (BYTEENW), - .NO_RWCHECK (1), - .RW_ASSERT (1) + .OUT_REG (1) ) data_store ( .clk (clk), .reset (reset), @@ -172,9 +177,18 @@ module VX_cache_data #( ); end + wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; + VX_onehot_encoder #( + .N (NUM_WAYS) + ) hit_idx_enc ( + .data_in (tag_matches), + .data_out (hit_way_idx), + `UNUSED_PIN (valid_out) + ); + if (`CS_WORDS_PER_LINE > 1) begin : g_read_data // order the data layout to perform ways multiplexing last. - // this allows converting way index to binary in parallel with BRAM readaccess and way selection. + // this allows converting way index to binary in parallel with BRAM read and word indexing. wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; VX_transpose #( .DATAW (`CS_WORD_WIDTH), @@ -184,27 +198,10 @@ module VX_cache_data #( .data_in (line_rdata), .data_out (transposed_rdata) ); - assign read_data = transposed_rdata[word_idx][way_idx_bin]; + assign read_data = transposed_rdata[word_idx][hit_way_idx]; end else begin : g_read_data_1w `UNUSED_VAR (word_idx) - assign read_data = line_rdata[way_idx_bin]; - end - -`ifdef DBG_TRACE_CACHE - always @(posedge clk) begin - if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) - end - if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) - end - if (read) begin - `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) - end - if (write) begin - `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) - end + assign read_data = line_rdata[hit_way_idx]; end -`endif endmodule diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index ff3ead64f9..c8f89376a7 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -24,7 +24,7 @@ // arrival and are dequeued in the same order. // Each entry has a next pointer to the next entry pending for the same cache line. // -// During the fill request, the MSHR will release the MSHR entry at fill_id +// During the fill request, the MSHR will dequue the MSHR entry at the fill_id location // which represents the first request in the pending list that initiated the memory fill. // // The dequeue response directly follows the fill request and will release @@ -35,7 +35,8 @@ // the slot id of the previous entry for the same cache line. This is used to // link the new entry to the pending list. // -// The release request is used to invalidate the allocated MSHR entry if we had a cache hit. +// The finalize request is used to persit or release the currently allocated MSHR entry +// if we had a cache miss or a hit, respectively. // // Warning: This MSHR implementation is strongly coupled with the bank pipeline // and as such changes to either module requires careful evaluation. @@ -56,8 +57,6 @@ module VX_cache_mshr #( parameter DATA_WIDTH = 1, // Enable cache writeback parameter WRITEBACK = 0, - // Cache stall on read during write - RDW_STALL = 0, parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE) ) ( @@ -67,7 +66,7 @@ module VX_cache_mshr #( `IGNORE_UNUSED_BEGIN input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid, input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid, - input wire[`UP(UUID_WIDTH)-1:0] rel_req_uuid, + input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid, `IGNORE_UNUSED_END // memory fill @@ -90,11 +89,15 @@ module VX_cache_mshr #( input wire [DATA_WIDTH-1:0] allocate_data, output wire [MSHR_ADDR_WIDTH-1:0] allocate_id, output wire allocate_pending, + output wire [MSHR_ADDR_WIDTH-1:0] allocate_previd, output wire allocate_ready, - // release - input wire release_valid, - input wire [MSHR_ADDR_WIDTH-1:0] release_id + // finalize + input wire finalize_valid, + input wire finalize_is_release, + input wire finalize_is_pending, + input wire [MSHR_ADDR_WIDTH-1:0] finalize_previd, + input wire [MSHR_ADDR_WIDTH-1:0] finalize_id ); `UNUSED_PARAM (BANK_ID) @@ -112,8 +115,6 @@ module VX_cache_mshr #( reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n; wire [MSHR_ADDR_WIDTH-1:0] prev_idx; - reg [MSHR_ADDR_WIDTH-1:0] post_alloc_id, post_alloc_previd; - reg post_alloc_val; wire allocate_fire = allocate_valid && allocate_ready; wire dequeue_fire = dequeue_valid && dequeue_ready; @@ -157,19 +158,20 @@ module VX_cache_mshr #( valid_table_n[dequeue_id] = 0; if (next_table[dequeue_id]) begin dequeue_id_n = next_index[dequeue_id]; - end else if (!RDW_STALL && post_alloc_val && (post_alloc_previd == dequeue_id)) begin - dequeue_id_n = post_alloc_id; + end else if (finalize_valid && finalize_is_pending && (finalize_previd == dequeue_id)) begin + dequeue_id_n = finalize_id; end else begin dequeue_val_n = 0; end end - if (release_valid) begin - valid_table_n[release_id] = 0; - end - - if (post_alloc_val) begin - next_table_x[post_alloc_previd] = 1; + if (finalize_valid) begin + if (finalize_is_release) begin + valid_table_n[finalize_id] = 0; + end + if (finalize_is_pending) begin + next_table_x[finalize_previd] = 1; + end end next_table_n = next_table_x; @@ -184,12 +186,10 @@ module VX_cache_mshr #( valid_table <= '0; allocate_rdy <= 0; dequeue_val <= 0; - post_alloc_val <= 0; end else begin valid_table <= valid_table_n; allocate_rdy <= allocate_rdy_n; dequeue_val <= dequeue_val_n; - post_alloc_val <= allocate_fire && allocate_pending; end if (allocate_fire) begin @@ -197,22 +197,20 @@ module VX_cache_mshr #( write_table[allocate_id] <= allocate_rw; end - if (post_alloc_val) begin - next_index[post_alloc_previd] <= post_alloc_id; + if (finalize_valid && finalize_is_pending) begin + next_index[finalize_previd] <= finalize_id; end dequeue_id_r <= dequeue_id_n; allocate_id_r <= allocate_id_n; next_table <= next_table_n; - post_alloc_id <= allocate_id; - post_alloc_previd <= prev_idx; end - `RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, + `RUNTIME_ASSERT(~(allocate_fire && valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid)) - `RUNTIME_ASSERT((~release_valid || valid_table[release_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[release_id], BANK_ID), release_id, rel_req_uuid)) + `RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) @@ -220,7 +218,7 @@ module VX_cache_mshr #( VX_dp_ram #( .DATAW (DATA_WIDTH), .SIZE (MSHR_SIZE), - .RADDR_REG (1) + .OUT_REG (1) ) entries ( .clk (clk), .reset (reset), @@ -236,7 +234,9 @@ module VX_cache_mshr #( assign fill_addr = addr_table[fill_id]; assign allocate_ready = allocate_rdy; - assign allocate_id = allocate_id_r; + assign allocate_id = allocate_id_r; + assign allocate_previd = prev_idx; + if (WRITEBACK) begin : g_pending_wb assign allocate_pending = |addr_matches; end else begin : g_pending_wt @@ -255,14 +255,17 @@ module VX_cache_mshr #( if (reset) begin show_table <= 0; end else begin - show_table <= allocate_fire || post_alloc_val || release_valid || fill_valid || dequeue_fire; + show_table <= allocate_fire || finalize_valid || fill_valid || dequeue_fire; end if (allocate_fire) begin `TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid)) end - if (release_valid) begin - `TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, release_id, rel_req_uuid)) + if (finalize_valid && finalize_is_release) begin + `TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid)) + end + if (finalize_valid && finalize_is_pending) begin + `TRACE(3, ("%t: %s finalize: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid)) end if (fill_valid) begin `TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 678f7af76c..b7a1957efe 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -14,8 +14,6 @@ `include "VX_cache_define.vh" module VX_cache_tags #( - parameter `STRING INSTANCE_ID = "", - parameter BANK_ID = 0, // Size of cache in bytes parameter CACHE_SIZE = 1024, // Size of line inside a bank in bytes @@ -27,99 +25,86 @@ module VX_cache_tags #( // Size of a word in bytes parameter WORD_SIZE = 1, // Enable cache writeback - parameter WRITEBACK = 0, - // Request debug identifier - parameter UUID_WIDTH = 0 + parameter WRITEBACK = 0 ) ( input wire clk, input wire reset, + input wire stall, -`IGNORE_UNUSED_BEGIN - input wire [`UP(UUID_WIDTH)-1:0] req_uuid, -`IGNORE_UNUSED_END - - // init/fill/lookup + // inputs input wire init, input wire flush, input wire fill, - input wire write, input wire lookup, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] way_idx, - output wire [NUM_WAYS-1:0] tag_matches, + input wire [NUM_WAYS-1:0] flush_way, - // eviction - output wire evict_dirty, + // outputs + output wire [NUM_WAYS-1:0] tag_matches_r, + output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r, output wire [NUM_WAYS-1:0] evict_way, - output wire [`CS_TAG_SEL_BITS-1:0] evict_tag + output wire [NUM_WAYS-1:0] evict_way_r, + output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r ); - `UNUSED_SPARAM (INSTANCE_ID) - `UNUSED_PARAM (BANK_ID) - `UNUSED_VAR (lookup) - - // valid, dirty, tag - localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS; + // valid, tag + localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0] read_valid; - wire [NUM_WAYS-1:0] read_dirty; - if (NUM_WAYS > 1) begin : g_evict_way - reg [NUM_WAYS-1:0] evict_way_r; + if (NUM_WAYS > 1) begin : g_evict_way + reg [NUM_WAYS-1:0] victim_way; // cyclic assignment of replacement way always @(posedge clk) begin if (reset) begin - evict_way_r <= 1; - end else if (lookup) begin - evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]}; + victim_way <= 1; + end else if (~stall) begin + victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; end end + assign evict_way = fill ? victim_way : flush_way; + `BUFFER_EX(evict_way_r, evict_way, ~stall, 1); + end else begin : g_evict_way_0 + `UNUSED_VAR (flush_way) + assign evict_way = 1'b1; + assign evict_way_r = 1'b1; + end - assign evict_way = fill ? evict_way_r : way_idx; - + if (WRITEBACK) begin : g_evict_tag_wb VX_onehot_mux #( .DATAW (`CS_TAG_SEL_BITS), .N (NUM_WAYS) ) evict_tag_sel ( .data_in (read_tag), - .sel_in (evict_way), - .data_out (evict_tag) + .sel_in (evict_way_r), + .data_out (evict_tag_r) ); - end else begin : g_evict_way_0 - assign evict_way = 1'b1; - assign evict_tag = read_tag; + end else begin : g_evict_tag_wt + assign evict_tag_r = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - wire do_fill = fill && evict_way[i]; - wire do_flush = flush && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode - wire do_write = WRITEBACK && write && tag_matches[i]; + wire do_fill = fill && evict_way[i]; + wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode - wire line_read = (WRITEBACK && (fill || flush)); - wire line_write = init || do_fill || do_flush || do_write; - wire line_valid = ~(init || flush); + wire line_read = lookup || (WRITEBACK && (fill || flush)); + wire line_write = init || do_fill || do_flush; + wire line_valid = fill; wire [TAG_WIDTH-1:0] line_wdata; wire [TAG_WIDTH-1:0] line_rdata; - if (WRITEBACK) begin : g_writeback - assign line_wdata = {line_valid, write, line_tag}; - assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata; - end else begin : g_writethrough - assign line_wdata = {line_valid, line_tag}; - assign {read_valid[i], read_tag[i]} = line_rdata; - assign read_dirty[i] = 1'b0; - end + assign line_wdata = {line_valid, line_tag}; + assign {read_valid[i], read_tag[i]} = line_rdata; VX_sp_ram #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .NO_RWCHECK (1), - .RW_ASSERT (1) + .OUT_REG (1) ) tag_store ( .clk (clk), .reset (reset), @@ -132,40 +117,10 @@ module VX_cache_tags #( ); end - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches - assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); - end - - assign evict_dirty = | (read_dirty & evict_way); + `BUFFER_EX(line_tag_r, line_tag, ~stall, 1); -`ifdef DBG_TRACE_CACHE - wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; - always @(posedge clk) begin - if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) - end - if (init) begin - `TRACE(3, ("%t: %s init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) - end - if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) - end - if (lookup) begin - if (tag_matches != 0) begin - if (write) begin - `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) - end else begin - `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) - end - end else begin - if (write) begin - `TRACE(3, ("%t: %s write-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) - end else begin - `TRACE(3, ("%t: %s read-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) - end - end - end + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches + assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]); end -`endif endmodule From 684f2e2d3d118efcc7e1b650c905f110381f3f5b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 03:42:51 -0700 Subject: [PATCH 274/407] minor update --- hw/rtl/core/VX_ipdom_stack.sv | 45 ++++++++++------------------------- hw/rtl/libs/VX_fifo_queue.sv | 10 ++++---- 2 files changed, 16 insertions(+), 39 deletions(-) diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index 04efd91d31..9bc39b864a 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -30,22 +30,20 @@ module VX_ipdom_stack #( output wire empty, output wire full ); - reg slot_set [DEPTH-1:0]; - reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr; reg empty_r, full_r; wire [WIDTH-1:0] d0, d1; - wire d_set_n = slot_set[rd_ptr]; + wire d_set_r; always @(*) begin rd_ptr_n = rd_ptr; if (push) begin rd_ptr_n = wr_ptr; end else if (pop) begin - rd_ptr_n = rd_ptr - ADDRW'(d_set_n); + rd_ptr_n = rd_ptr - ADDRW'(d_set_r); end end @@ -64,49 +62,30 @@ module VX_ipdom_stack #( empty_r <= 0; full_r <= (ADDRW'(DEPTH-1) == wr_ptr); end else if (pop) begin - wr_ptr <= wr_ptr - ADDRW'(d_set_n); - empty_r <= (rd_ptr == 0) && (d_set_n == 1); + wr_ptr <= wr_ptr - ADDRW'(d_set_r); + empty_r <= (rd_ptr == 0) && d_set_r; full_r <= 0; end rd_ptr <= rd_ptr_n; end end + wire [WIDTH * 2:0] qout = push ? {1'b0, q1, q0} : {1'b1, d1, d0}; + VX_dp_ram #( - .DATAW (WIDTH * 2), + .DATAW (1 + WIDTH * 2), .SIZE (DEPTH), - .RADDR_REG (1) + .OUT_REG (1) ) store ( .clk (clk), .reset (reset), .read (1'b1), - .write (push), + .write (push || pop), .wren (1'b1), - .waddr (wr_ptr), - .wdata ({q1, q0}), + .waddr (push ? wr_ptr : rd_ptr), + .wdata (qout), .raddr (rd_ptr_n), - .rdata ({d1, d0}) - ); - - always @(posedge clk) begin - if (push) begin - slot_set[wr_ptr] <= 0; - end else if (pop) begin - slot_set[rd_ptr] <= 1; - end - end - - wire d_set_r; - - VX_pipe_register #( - .DATAW (1), - .DEPTH (0) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .enable (1'b1), - .data_in (d_set_n), - .data_out (d_set_r) + .rdata ({d_set_r, d1, d0}) ); assign d = d_set_r ? d0 : d1; diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 03521ce1a9..8af35bc7ba 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -110,8 +110,7 @@ module VX_fifo_queue #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM), - .RADDR_REG (1) + .LUTRAM (LUTRAM) ) dp_ram ( .clk (clk), .reset (reset), @@ -120,7 +119,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n_n), + .raddr (rd_ptr_n_r), .rdata (dout) ); @@ -158,8 +157,7 @@ module VX_fifo_queue #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM), - .RADDR_REG (1) + .LUTRAM (LUTRAM) ) dp_ram ( .clk (clk), .reset (reset), @@ -168,7 +166,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n), + .raddr (rd_ptr_r), .rdata (data_out) ); From 9f32e5693c012019984d7f04c1ae8f504cf1ad79 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 10:41:32 -0700 Subject: [PATCH 275/407] minor update --- hw/syn/xilinx/sandbox/Makefile | 5 +---- hw/syn/xilinx/xrt/Makefile | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index e4def9c4e8..074fcb87ca 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -24,11 +24,8 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif -TEX_INCLUDE = -I$(RTL_DIR)/tex -RASTER_INCLUDE = -I$(RTL_DIR)/raster -OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) RTL_INCLUDE += -I$(SRC_DIR) # compilation flags diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index f5997352c1..2517f27770 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -76,17 +76,13 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include sources RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv -RTL_PKGS += $(RTL_DIR)/tex/VX_tex_pkg.sv $(RTL_DIR)/raster/VX_raster_pkg.sv $(RTL_DIR)/om/VX_om_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif -TEX_INCLUDE = -I$(RTL_DIR)/tex -RASTER_INCLUDE = -I$(RTL_DIR)/raster -OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) # Kernel compiler global settings VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache From 37f4d053937534b9a6275a584c5d9081e6b7e496 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 10:44:04 -0700 Subject: [PATCH 276/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index d32e9423f8..7f1153edee 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -693,8 +693,8 @@ module VX_cache_bank #( /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE - assign perf_read_misses = do_read_miss_st1; - assign perf_write_misses = do_write_miss_st1; + assign perf_read_misses = do_read_st1 && ~is_hit_st1; + assign perf_write_misses = do_write_st1 && ~is_hit_st1; assign perf_mshr_stalls = mshr_alm_full; `endif From 1d626588ef79a6862f8148cf48198b1873dde435 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 11:49:12 -0700 Subject: [PATCH 277/407] minor update --- hw/rtl/cache/VX_cache_mshr.sv | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index c8f89376a7..10c2c948b6 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -169,6 +169,9 @@ module VX_cache_mshr #( if (finalize_is_release) begin valid_table_n[finalize_id] = 0; end + // warning: This code allows 'finalize_is_pending' to be asserted regardless of hit/miss + // to reduce the its propagation delay into the MSHR. this is safe because wrong updates + // to 'next_table_n' will be cleared during 'allocate_fire' below. if (finalize_is_pending) begin next_table_x[finalize_previd] = 1; end From 9e5638c9b082ca567ea7796d2e735fbcc69c4126 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 12:06:55 -0700 Subject: [PATCH 278/407] minor update --- hw/rtl/libs/VX_fifo_queue.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 8af35bc7ba..99efd3d38b 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -83,7 +83,7 @@ module VX_fifo_queue #( reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n; always @(*) begin - rd_ptr_n_n = rd_ptr_r; + rd_ptr_n_n = rd_ptr_n_r; if (pop) begin if (DEPTH > 2) begin rd_ptr_n_n = rd_ptr_r + ADDRW'(2); @@ -97,7 +97,7 @@ module VX_fifo_queue #( if (reset) begin wr_ptr_r <= '0; rd_ptr_r <= '0; - rd_ptr_n_r <= '0; + rd_ptr_n_r <= 1; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); if (pop) begin From f63233334e3545893fe5053da9abede3def8eb09 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 16:22:59 -0700 Subject: [PATCH 279/407] minor update --- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/libs/VX_axi_adapter.sv | 22 +++++++++++--------- sim/xrtsim/xrt_sim.cpp | 38 ++++++++++++++++++++++------------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 10c2c948b6..c94cf8e656 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -215,7 +215,7 @@ module VX_cache_mshr #( `RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) - `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, + `RUNTIME_ASSERT(~(fill_valid && ~valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) VX_dp_ram #( diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 255789fd71..162b0581ac 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -135,7 +135,7 @@ module VX_axi_adapter #( ); end - wire tbuf_full; + wire mem_req_tag_ready; wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out; wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out; @@ -143,13 +143,14 @@ module VX_axi_adapter #( if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE); wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr; + wire tbuf_full; VX_index_buffer #( .DATAW (TAG_WIDTH_IN), .SIZE (TAG_BUFFER_SIZE) ) tag_buf ( .clk (clk), .reset (reset), - .acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready), + .acquire_en (mem_req_valid && ~mem_req_rw && mem_req_ready), .write_addr (tbuf_waddr), .write_data (mem_req_tag), .read_data (mem_rsp_tag), @@ -158,22 +159,24 @@ module VX_axi_adapter #( .full (tbuf_full), `UNUSED_PIN (empty) ); + assign mem_req_tag_ready = mem_req_rw || ~tbuf_full; assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr); assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0]; `UNUSED_VAR (mem_rsp_tag_out) end else begin : g_no_tag_buf - assign tbuf_full = 0; + assign mem_req_tag_ready = 1; assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag); assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0]; `UNUSED_VAR (mem_rsp_tag_out) end // request ack - assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full; + assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : + (m_axi_arready[req_bank_sel] && mem_req_tag_ready); // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr - assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i]; + assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_awid[i] = mem_req_tag_out; assign m_axi_awlen[i] = 8'b00000000; @@ -188,7 +191,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; @@ -205,7 +208,7 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req - assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full; + assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && mem_req_tag_ready; assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_arid[i] = mem_req_tag_out; assign m_axi_arlen[i] = 8'b00000000; @@ -228,9 +231,8 @@ module VX_axi_adapter #( assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)) - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)) - `UNUSED_VAR (m_axi_rlast[i]) + `RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rlast[i] == 0), ("%t: *** AXI response error", $time)) + `RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rresp[i] != 0), ("%t: *** AXI response error", $time)) end VX_stream_arb #( diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index d572b9479f..cd2e1b90cb 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -333,6 +333,8 @@ class xrt_sim::Impl { } device_->ap_rst_n = 1; + + // this AXI device is always ready to accept new requests for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { *m_axi_mem_[i].arready = 1; *m_axi_mem_[i].awready = 1; @@ -381,53 +383,56 @@ class xrt_sim::Impl { } void axi_ctrl_bus_reset() { - // address read request + // read request address device_->s_axi_ctrl_arvalid = 0; device_->s_axi_ctrl_araddr = 0; - // data read response + // read response device_->s_axi_ctrl_rready = 0; - // address write request + // write request address device_->s_axi_ctrl_awvalid = 0; device_->s_axi_ctrl_awaddr = 0; - // data write request + // write request data device_->s_axi_ctrl_wvalid = 0; device_->s_axi_ctrl_wdata = 0; device_->s_axi_ctrl_wstrb = 0; - // data write response + // write response device_->s_axi_ctrl_bready = 0; } void axi_mem_bus_reset() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { - // address read request + // read request address *m_axi_mem_[i].arready = 0; - // address write request + // write request address *m_axi_mem_[i].awready = 0; - // data write request + // write request data *m_axi_mem_[i].wready = 0; - // data read response + // read response *m_axi_mem_[i].rvalid = 0; - // data write response + // write response *m_axi_mem_[i].bvalid = 0; // states m_axi_states_[i].write_req_pending = false; + m_axi_states_[i].write_rsp_pending = false; + m_axi_states_[i].read_rsp_pending = false; } } void axi_mem_bus_eval() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // handle read responses - if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) { - *m_axi_mem_[i].rvalid = 0; + if (*m_axi_mem_[i].rvalid && (*m_axi_mem_[i].rready || ~m_axi_states_[i].read_rsp_pending)) { + *m_axi_mem_[i].rvalid = 0; + m_axi_states_[i].read_rsp_pending = false; } if (!*m_axi_mem_[i].rvalid) { if (!pending_mem_reqs_[i].empty() @@ -441,13 +446,15 @@ class xrt_sim::Impl { *m_axi_mem_[i].rlast = 1; memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].read_rsp_pending = !*m_axi_mem_[i].rready; delete mem_rsp; } } // handle write responses - if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) { + if (*m_axi_mem_[i].bvalid && (*m_axi_mem_[i].bready || ~m_axi_states_[i].write_rsp_pending)) { *m_axi_mem_[i].bvalid = 0; + m_axi_states_[i].write_rsp_pending = false; } if (!*m_axi_mem_[i].bvalid) { if (!pending_mem_reqs_[i].empty() @@ -459,6 +466,7 @@ class xrt_sim::Impl { *m_axi_mem_[i].bid = mem_rsp->tag; *m_axi_mem_[i].bresp = 0; pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].write_rsp_pending = !*m_axi_mem_[i].bready; delete mem_rsp; } } @@ -487,7 +495,7 @@ class xrt_sim::Impl { *m_axi_mem_[i].wready = 0; } - // handle address write requestsls + // handle address write requestsls if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) { m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; @@ -537,6 +545,8 @@ class xrt_sim::Impl { uint64_t write_req_addr; uint32_t write_req_tag; bool write_req_pending; + bool write_rsp_pending; + bool read_rsp_pending; } m_axi_state_t; typedef struct { From 26df675e24e1bc05deb3610b1425f413f41364e6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 20:08:38 -0700 Subject: [PATCH 280/407] minor update --- sim/xrtsim/xrt_sim.cpp | 179 ++++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 85 deletions(-) diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index cd2e1b90cb..8dd8009318 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -338,11 +338,22 @@ class xrt_sim::Impl { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { *m_axi_mem_[i].arready = 1; *m_axi_mem_[i].awready = 1; + *m_axi_mem_[i].wready = 1; } } void tick() { - this->axi_mem_bus_eval(); + device_->ap_clk = 0; + this->eval(); + + this->axi_mem_bus_eval(0); + + device_->ap_clk = 1; + this->eval(); + + this->axi_mem_bus_eval(1); + + dram_sim_.tick(); for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { if (!dram_queues_[i].empty()) { @@ -360,13 +371,6 @@ class xrt_sim::Impl { } } - dram_sim_.tick(); - - device_->ap_clk = 0; - this->eval(); - device_->ap_clk = 1; - this->eval(); - #ifndef NDEBUG fflush(stdout); #endif @@ -404,149 +408,154 @@ class xrt_sim::Impl { } void axi_mem_bus_reset() { - for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { // read request address - *m_axi_mem_[i].arready = 0; + *m_axi_mem_[b].arready = 0; // write request address - *m_axi_mem_[i].awready = 0; + *m_axi_mem_[b].awready = 0; // write request data - *m_axi_mem_[i].wready = 0; + *m_axi_mem_[b].wready = 0; // read response - *m_axi_mem_[i].rvalid = 0; + *m_axi_mem_[b].rvalid = 0; // write response - *m_axi_mem_[i].bvalid = 0; + *m_axi_mem_[b].bvalid = 0; // states - m_axi_states_[i].write_req_pending = false; - m_axi_states_[i].write_rsp_pending = false; - m_axi_states_[i].read_rsp_pending = false; + m_axi_states_[b].write_req_addr_ack = false; + m_axi_states_[b].write_req_data_ack = false; } } - void axi_mem_bus_eval() { - for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + void axi_mem_bus_eval(bool clk) { + if (!clk) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { + m_axi_states_[b].read_rsp_ready = *m_axi_mem_[b].rready; + m_axi_states_[b].write_rsp_ready = *m_axi_mem_[b].bready; + } + return; + } + + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { // handle read responses - if (*m_axi_mem_[i].rvalid && (*m_axi_mem_[i].rready || ~m_axi_states_[i].read_rsp_pending)) { - *m_axi_mem_[i].rvalid = 0; - m_axi_states_[i].read_rsp_pending = false; + if (*m_axi_mem_[b].rvalid && m_axi_states_[b].read_rsp_ready) { + *m_axi_mem_[b].rvalid = 0; } - if (!*m_axi_mem_[i].rvalid) { - if (!pending_mem_reqs_[i].empty() - && (*pending_mem_reqs_[i].begin())->ready - && !(*pending_mem_reqs_[i].begin())->write) { - auto mem_rsp_it = pending_mem_reqs_[i].begin(); + if (!*m_axi_mem_[b].rvalid) { + if (!pending_mem_reqs_[b].empty() + && (*pending_mem_reqs_[b].begin())->ready + && !(*pending_mem_reqs_[b].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[b].begin(); auto mem_rsp = *mem_rsp_it; - *m_axi_mem_[i].rvalid = 1; - *m_axi_mem_[i].rid = mem_rsp->tag; - *m_axi_mem_[i].rresp = 0; - *m_axi_mem_[i].rlast = 1; - memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); - pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].read_rsp_pending = !*m_axi_mem_[i].rready; + *m_axi_mem_[b].rvalid = 1; + *m_axi_mem_[b].rid = mem_rsp->tag; + *m_axi_mem_[b].rresp = 0; + *m_axi_mem_[b].rlast = 1; + memcpy(m_axi_mem_[b].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); + pending_mem_reqs_[b].erase(mem_rsp_it); delete mem_rsp; } } // handle write responses - if (*m_axi_mem_[i].bvalid && (*m_axi_mem_[i].bready || ~m_axi_states_[i].write_rsp_pending)) { - *m_axi_mem_[i].bvalid = 0; - m_axi_states_[i].write_rsp_pending = false; + if (*m_axi_mem_[b].bvalid && m_axi_states_[b].write_rsp_ready) { + *m_axi_mem_[b].bvalid = 0; } - if (!*m_axi_mem_[i].bvalid) { - if (!pending_mem_reqs_[i].empty() - && (*pending_mem_reqs_[i].begin())->ready - && (*pending_mem_reqs_[i].begin())->write) { - auto mem_rsp_it = pending_mem_reqs_[i].begin(); + if (!*m_axi_mem_[b].bvalid) { + if (!pending_mem_reqs_[b].empty() + && (*pending_mem_reqs_[b].begin())->ready + && (*pending_mem_reqs_[b].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[b].begin(); auto mem_rsp = *mem_rsp_it; - *m_axi_mem_[i].bvalid = 1; - *m_axi_mem_[i].bid = mem_rsp->tag; - *m_axi_mem_[i].bresp = 0; - pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].write_rsp_pending = !*m_axi_mem_[i].bready; + *m_axi_mem_[b].bvalid = 1; + *m_axi_mem_[b].bid = mem_rsp->tag; + *m_axi_mem_[b].bresp = 0; + pending_mem_reqs_[b].erase(mem_rsp_it); delete mem_rsp; } } // handle read requests - if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { + if (*m_axi_mem_[b].arvalid && *m_axi_mem_[b].arready) { auto mem_req = new mem_req_t(); - mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = uint64_t(*m_axi_mem_[i].araddr); + mem_req->tag = *m_axi_mem_[b].arid; + mem_req->addr = uint64_t(*m_axi_mem_[b].araddr); ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; - pending_mem_reqs_[i].emplace_back(mem_req); + pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); + /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, mem_req->tag); for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { - printf("%02x", mem_req->data[i]); + printf("%02x", mem_req->data[b]); } printf("\n");*/ // send dram request - dram_queues_[i].push(mem_req); + dram_queues_[b].push(mem_req); } - if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) { - *m_axi_mem_[i].wready = 0; + // handle write address requests + if (*m_axi_mem_[b].awvalid && *m_axi_mem_[b].awready && !m_axi_states_[b].write_req_addr_ack) { + m_axi_states_[b].write_req_addr = *m_axi_mem_[b].awaddr; + m_axi_states_[b].write_req_tag = *m_axi_mem_[b].awid; + m_axi_states_[b].write_req_addr_ack = true; } - // handle address write requestsls - if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) { - m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; - m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; - // activate data channel - *m_axi_mem_[i].wready = 1; - m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid; + // handle write data requests + if (*m_axi_mem_[b].wvalid && *m_axi_mem_[b].wready && !m_axi_states_[b].write_req_data_ack) { + m_axi_states_[b].write_req_byteen = *m_axi_mem_[b].wstrb; + auto data = (const uint8_t*)m_axi_mem_[b].wdata->data(); + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) { + m_axi_states_[b].write_req_data[i] = data[i]; + } + m_axi_states_[b].write_req_data_ack = true; } - // handle data write requests - if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) { - auto byteen = *m_axi_mem_[i].wstrb; - auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = m_axi_states_[i].write_req_addr; - - for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { + // handle write requests + if (m_axi_states_[b].write_req_addr_ack && m_axi_states_[b].write_req_data_ack) { + auto byteen = m_axi_states_[b].write_req_byteen; + auto byte_addr = m_axi_states_[b].write_req_addr; + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) { if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; + (*ram_)[byte_addr + i] = m_axi_states_[b].write_req_data[i]; } } - auto mem_req = new mem_req_t(); - mem_req->tag = m_axi_states_[i].write_req_tag; + mem_req->tag = m_axi_states_[b].write_req_tag; mem_req->addr = byte_addr; mem_req->write = true; mem_req->ready = false; - pending_mem_reqs_[i].emplace_back(mem_req); + pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); + /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, byteen, mem_req->tag); for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { - printf("%02x", data[i]); + printf("%02x", m_axi_states_[b].write_req_data[i]]); } printf("\n");*/ // send dram request - dram_queues_[i].push(mem_req); + dram_queues_[b].push(mem_req); - // deactivate data channel - if (m_axi_states_[i].write_req_pending) { - *m_axi_mem_[i].wready = 0; - m_axi_states_[i].write_req_pending = false; - } + // clear acks + m_axi_states_[b].write_req_addr_ack = false; + m_axi_states_[b].write_req_data_ack = false; } } } typedef struct { + std::array write_req_data; + uint64_t write_req_byteen; uint64_t write_req_addr; uint32_t write_req_tag; - bool write_req_pending; - bool write_rsp_pending; - bool read_rsp_pending; + bool read_rsp_ready; + bool write_rsp_ready; + bool write_req_addr_ack; + bool write_req_data_ack; } m_axi_state_t; typedef struct { From 2a2fc2ae3934e912313a8d1b567d9457a405bde8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 23:25:41 -0700 Subject: [PATCH 281/407] minor update --- ci/regression.sh.in | 2 +- hw/rtl/VX_config.vh | 6 +++--- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/core/VX_ipdom_stack.sv | 2 +- hw/rtl/mem/VX_local_mem.sv | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index ddd4f12bd6..662b40717a 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -154,7 +154,7 @@ cache() CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx # test writeback - CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 4f666ce203..a4e48da5fd 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -544,7 +544,7 @@ // Enable Cache Dirty bytes `ifndef DCACHE_DIRTYBYTES -`define DCACHE_DIRTYBYTES 0 +`define DCACHE_DIRTYBYTES 1 `endif // LMEM Configurable Knobs //////////////////////////////////////////////////// @@ -609,7 +609,7 @@ // Enable Cache Dirty bytes `ifndef L2_DIRTYBYTES -`define L2_DIRTYBYTES 0 +`define L2_DIRTYBYTES 1 `endif // L3cache Configurable Knobs ///////////////////////////////////////////////// @@ -656,7 +656,7 @@ // Enable Cache Dirty bytes `ifndef L3_DIRTYBYTES -`define L3_DIRTYBYTES 0 +`define L3_DIRTYBYTES 1 `endif `ifndef MEMORY_BANKS diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index c94cf8e656..ae6ebb7feb 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -222,7 +222,7 @@ module VX_cache_mshr #( .DATAW (DATA_WIDTH), .SIZE (MSHR_SIZE), .OUT_REG (1) - ) entries ( + ) mshr_store ( .clk (clk), .reset (reset), .read (1'b1), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index 9bc39b864a..d5d0001323 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -76,7 +76,7 @@ module VX_ipdom_stack #( .DATAW (1 + WIDTH * 2), .SIZE (DEPTH), .OUT_REG (1) - ) store ( + ) ipdom_store ( .clk (clk), .reset (reset), .read (1'b1), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 7131c3f21e..2ba66347e7 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -169,7 +169,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_REG (1), .READ_ENABLE (0), .NO_RWCHECK (1) - ) data_store ( + ) lmem_store ( .clk (clk), .reset (reset), .read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]), From fe5442dbb3594e74136e12b7645dad87d8e905eb Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 23:34:57 -0700 Subject: [PATCH 282/407] minor update --- hw/rtl/VX_config.vh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a4e48da5fd..0cff1810ec 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -544,7 +544,7 @@ // Enable Cache Dirty bytes `ifndef DCACHE_DIRTYBYTES -`define DCACHE_DIRTYBYTES 1 +`define DCACHE_DIRTYBYTES `DCACHE_WRITEBACK `endif // LMEM Configurable Knobs //////////////////////////////////////////////////// @@ -609,7 +609,7 @@ // Enable Cache Dirty bytes `ifndef L2_DIRTYBYTES -`define L2_DIRTYBYTES 1 +`define L2_DIRTYBYTES `L2_WRITEBACK `endif // L3cache Configurable Knobs ///////////////////////////////////////////////// @@ -656,7 +656,7 @@ // Enable Cache Dirty bytes `ifndef L3_DIRTYBYTES -`define L3_DIRTYBYTES 1 +`define L3_DIRTYBYTES `L3_WRITEBACK `endif `ifndef MEMORY_BANKS From 0d044230742312de09c3ddf0dc3b9836a6cd2d7b Mon Sep 17 00:00:00 2001 From: MichaelJSr Date: Mon, 14 Oct 2024 10:12:33 -0700 Subject: [PATCH 283/407] Readded the ecall and ebreak instruction traps so that the riscv-vector tests run properly --- sim/simx/emulator.cpp | 12 ++++++++++++ sim/simx/emulator.h | 4 ++++ sim/simx/execute.cpp | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 4fc066d66d..05b3497c45 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -625,3 +625,15 @@ void Emulator::update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid) { this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, tid, wid) | fflags, tid, wid); } } + +// For riscv-vector test functionality, ecall and ebreak must trap +// These instructions are used in the vector tests to stop execution of the test +// Therefore, without these instructions, undefined and incorrect behavior happens +// +// For now, we need these instructions to trap for testing the riscv-vector isa +void Emulator::trigger_ecall() { + active_warps_.reset(); +} +void Emulator::trigger_ebreak() { + active_warps_.reset(); +} \ No newline at end of file diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index d1b14dacad..5f1b91d5d4 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -122,6 +122,10 @@ class Emulator { void update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid); + void trigger_ecall(); // Re-added for riscv-vector test functionality + + void trigger_ebreak(); // Re-added for riscv-vector test functionality + const Arch& arch_; const DCRS& dcrs_; Core* core_; diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index e70d45cb24..dd82535715 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -830,7 +830,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->fetch_stall = true; switch (csr_addr) { case 0x000: // RV32I: ECALL + this->trigger_ecall(); // Re-added for riscv-vector test functionality + break; case 0x001: // RV32I: EBREAK + this->trigger_ebreak(); // Re-added for riscv-vector test functionality + break; case 0x002: // RV32I: URET case 0x102: // RV32I: SRET case 0x302: // RV32I: MRET From 37757fab8ffac71df2a8cc8a6d52de547184bdb7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 14 Oct 2024 15:48:49 -0700 Subject: [PATCH 284/407] fixed fifo_queue support for BRAM --- hw/rtl/VX_config.vh | 12 +-- hw/rtl/cache/VX_cache.sv | 4 +- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/libs/VX_fifo_queue.sv | 150 ++++++++++++++++------------------- 4 files changed, 79 insertions(+), 89 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 0cff1810ec..da05fc9e9e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -480,7 +480,7 @@ // Number of Associative Ways `ifndef ICACHE_NUM_WAYS -`define ICACHE_NUM_WAYS 1 +`define ICACHE_NUM_WAYS 4 `endif // Dcache Configurable Knobs ////////////////////////////////////////////////// @@ -529,12 +529,12 @@ // Memory Response Queue Size `ifndef DCACHE_MRSQ_SIZE -`define DCACHE_MRSQ_SIZE 0 +`define DCACHE_MRSQ_SIZE 4 `endif // Number of Associative Ways `ifndef DCACHE_NUM_WAYS -`define DCACHE_NUM_WAYS 1 +`define DCACHE_NUM_WAYS 4 `endif // Enable Cache Writeback @@ -594,12 +594,12 @@ // Memory Response Queue Size `ifndef L2_MRSQ_SIZE -`define L2_MRSQ_SIZE 0 +`define L2_MRSQ_SIZE 4 `endif // Number of Associative Ways `ifndef L2_NUM_WAYS -`define L2_NUM_WAYS 2 +`define L2_NUM_WAYS 4 `endif // Enable Cache Writeback @@ -641,7 +641,7 @@ // Memory Response Queue Size `ifndef L3_MRSQ_SIZE -`define L3_MRSQ_SIZE 0 +`define L3_MRSQ_SIZE 4 `endif // Number of Associative Ways diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index d749e6ee91..c31699c1ef 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -402,8 +402,8 @@ module VX_cache import VX_gpu_pkg::*; #( .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), - .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)), - .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) + .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : 1), + .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : 1) ) bank ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index e1a9457ded..abb261b7e5 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -39,7 +39,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), - .OUT_REG (2) // 2-cycle EB for area reduction + .OUT_REG (1) ) instr_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 99efd3d38b..ca11857800 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -42,6 +42,9 @@ module VX_fifo_queue #( `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) + `UNUSED_PARAM (OUT_REG) + `UNUSED_PARAM (LUTRAM) + VX_pending_size #( .SIZE (DEPTH), .ALM_EMPTY (ALM_EMPTY), @@ -74,102 +77,89 @@ module VX_fifo_queue #( localparam ADDRW = `CLOG2(DEPTH); - if (OUT_REG != 0) begin : g_out_reg + wire [DATAW-1:0] data_out_w; + reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; + reg [ADDRW-1:0] wr_ptr_r; - wire [DATAW-1:0] dout; - reg [DATAW-1:0] dout_r; - reg [ADDRW-1:0] wr_ptr_r; - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n; - - always @(*) begin - rd_ptr_n_n = rd_ptr_n_r; - if (pop) begin - if (DEPTH > 2) begin - rd_ptr_n_n = rd_ptr_r + ADDRW'(2); - end else begin // (DEPTH == 2); - rd_ptr_n_n = ~rd_ptr_n_r; - end - end + always @(*) begin + rd_ptr_n = rd_ptr_r + ADDRW'(pop); + end + + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= '0; + rd_ptr_r <= (OUT_REG != 0) ? 1 : 0; + end else begin + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + rd_ptr_r <= rd_ptr_n; end + end - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - rd_ptr_n_r <= 1; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; + wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n; + + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); + wire bypass = push && (empty || (going_empty && pop)); + wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop; + + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM), + .OUT_REG(!LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (read), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_w), + .rdata (data_out_w) + ); + + if (OUT_REG != 0) begin : g_out_reg + reg [DATAW-1:0] data_out_r, data_out_n; + + if (LUTRAM) begin : g_lutram + assign data_out_n = data_out_w; + end else begin : g_no_lutram + reg [DATAW-1:0] data_out_p; + reg rdw_hazard_r; + wire rdw_hazard = push && (wr_ptr_r == rd_ptr_w); + always @(posedge clk) begin + if (rdw_hazard) begin + data_out_p <= data_in; end - rd_ptr_n_r <= rd_ptr_n_n; + rdw_hazard_r <= rdw_hazard; end + assign data_out_n = rdw_hazard_r ? data_out_p : data_out_w; end - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_n_r), - .rdata (dout) - ); - - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); - always @(posedge clk) begin - if (push && (empty || (going_empty && pop))) begin - dout_r <= data_in; + if (bypass) begin + data_out_r <= data_in; end else if (pop) begin - dout_r <= dout; + data_out_r <= data_out_n; end end - assign data_out = dout_r; + assign data_out = data_out_r; end else begin : g_no_out_reg - - reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; - reg [ADDRW-1:0] wr_ptr_r; - - always @(*) begin - rd_ptr_n = rd_ptr_r + ADDRW'(pop); - end - - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_n; + if (LUTRAM) begin : g_lutram + assign data_out = data_out_w; + end else begin : g_no_lutram + reg [DATAW-1:0] data_in_r; + reg bypass_r; + always @(posedge clk) begin + if (bypass) begin + data_in_r <= data_in; + end + bypass_r <= bypass; end + assign data_out = bypass_r ? data_in_r : data_out_w; end - - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_r), - .rdata (data_out) - ); - end end From 03a1e2582894ef0a291b7f80deec33a4ee48027e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 00:28:09 -0700 Subject: [PATCH 285/407] adding cache replacement policy --- hw/rtl/VX_cluster.sv | 1 + hw/rtl/VX_config.vh | 24 +++- hw/rtl/VX_platform.vh | 2 +- hw/rtl/VX_socket.sv | 2 + hw/rtl/Vortex.sv | 1 + hw/rtl/cache/VX_cache.sv | 10 +- hw/rtl/cache/VX_cache_bank.sv | 49 ++++++-- hw/rtl/cache/VX_cache_cluster.sv | 10 +- hw/rtl/cache/VX_cache_define.vh | 6 + hw/rtl/cache/VX_cache_repl.sv | 200 +++++++++++++++++++++++++++++++ hw/rtl/cache/VX_cache_tags.sv | 16 +-- hw/rtl/cache/VX_cache_wrap.sv | 10 +- 12 files changed, 292 insertions(+), 39 deletions(-) create mode 100644 hw/rtl/cache/VX_cache_repl.sv diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 366d1bbac4..9aa5fe706b 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -99,6 +99,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .WRITE_ENABLE (1), .WRITEBACK (`L2_WRITEBACK), .DIRTY_BYTES (`L2_DIRTYBYTES), + .REPL_POLICY (`L2_REPL_POLICY), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index da05fc9e9e..48f8ca3dc4 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -151,6 +151,10 @@ `define L3_LINE_SIZE `MEM_BLOCK_SIZE `endif +`ifndef MEMORY_BANKS +`define MEMORY_BANKS 2 +`endif + `ifdef XLEN_64 `ifndef STACK_BASE_ADDR @@ -483,6 +487,11 @@ `define ICACHE_NUM_WAYS 4 `endif +// Replacement Policy +`ifndef ICACHE_REPL_POLICY +`define ICACHE_REPL_POLICY 1 +`endif + // Dcache Configurable Knobs ////////////////////////////////////////////////// // Cache Enable @@ -547,6 +556,11 @@ `define DCACHE_DIRTYBYTES `DCACHE_WRITEBACK `endif +// Replacement Policy +`ifndef DCACHE_REPL_POLICY +`define DCACHE_REPL_POLICY 1 +`endif + // LMEM Configurable Knobs //////////////////////////////////////////////////// `ifndef LMEM_DISABLE @@ -612,6 +626,11 @@ `define L2_DIRTYBYTES `L2_WRITEBACK `endif +// Replacement Policy +`ifndef L2_REPL_POLICY +`define L2_REPL_POLICY 1 +`endif + // L3cache Configurable Knobs ///////////////////////////////////////////////// // Cache Size @@ -659,8 +678,9 @@ `define L3_DIRTYBYTES `L3_WRITEBACK `endif -`ifndef MEMORY_BANKS -`define MEMORY_BANKS 2 +// Replacement Policy +`ifndef L3_REPL_POLICY +`define L3_REPL_POLICY 1 `endif // Number of Memory Ports from LLC diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 4f78fee242..8ea849ed3b 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -222,7 +222,7 @@ endgenerate `define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x))) -`define UP(x) (((x) != 0) ? (x) : 1) +`define UP(x) (((x) > 0) ? (x) : 1) `define CDIV(n,d) ((n + d - 1) / (d)) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 4ce547c7e4..d9a8f5bf8b 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -103,6 +103,7 @@ module VX_socket import VX_gpu_pkg::*; #( .FLAGS_WIDTH (0), .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (0), + .REPL_POLICY (`ICACHE_REPL_POLICY), .NC_ENABLE (0), .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) @@ -151,6 +152,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITE_ENABLE (1), .WRITEBACK (`DCACHE_WRITEBACK), .DIRTY_BYTES (`DCACHE_DIRTYBYTES), + .REPL_POLICY (`DCACHE_REPL_POLICY), .NC_ENABLE (1), .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 40f95a81aa..0fa3ce31fd 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -85,6 +85,7 @@ module Vortex import VX_gpu_pkg::*; ( .WRITE_ENABLE (1), .WRITEBACK (`L3_WRITEBACK), .DIRTY_BYTES (`L3_DIRTYBYTES), + .REPL_POLICY (`L3_REPL_POLICY), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index c31699c1ef..b27b2df312 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -48,6 +48,9 @@ module VX_cache import VX_gpu_pkg::*; #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -393,12 +396,13 @@ module VX_cache import VX_gpu_pkg::*; #( .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .REPL_POLICY (REPL_POLICY), .CRSQ_SIZE (CRSQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .DIRTY_BYTES (DIRTY_BYTES), - .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7f1153edee..7c5ca1e409 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -47,6 +47,9 @@ module VX_cache_bank #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -324,6 +327,14 @@ module VX_cache_bank #( wire do_write_st0 = valid_st0 && is_write_st0; wire do_fill_st0 = valid_st0 && is_fill_st0; + wire is_read_st1 = is_creq_st1 && ~rw_st1; + wire is_write_st1 = is_creq_st1 && rw_st1; + + wire do_read_st1 = valid_st1 && is_read_st1; + wire do_write_st1 = valid_st1 && is_write_st1; + wire do_fill_st1 = valid_st1 && is_fill_st1; + wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; + assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; @@ -331,8 +342,32 @@ module VX_cache_bank #( wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; wire [NUM_WAYS-1:0] tag_matches_st1; + wire is_hit_st1 = (| tag_matches_st1); + wire do_lookup_st0 = do_read_st0 || do_write_st0; + reg [NUM_WAYS-1:0] victim_way_st0; + + VX_cache_repl #( + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .REPL_POLICY (REPL_POLICY) + ) cache_repl ( + .clk (clk), + .reset (reset), + .stall (pipe_stall), + .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1), + .hit_line (line_idx_st1), + .hit_way (tag_matches_st1), + .repl_valid (do_fill_st0), + .repl_line (line_idx_st0), + .repl_way (victim_way_st0) + ); + + assign evict_way_st0 = is_fill_st0 ? victim_way_st0 : flush_way_st0; + VX_cache_tags #( .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), @@ -350,12 +385,11 @@ module VX_cache_bank #( .fill (do_fill_st0 && ~pipe_stall), .lookup (do_lookup_st0 && ~pipe_stall), .line_addr (addr_st0), - .flush_way (flush_way_st0), + .evict_way (evict_way_st0), // outputs .tag_matches_r(tag_matches_st1), .line_tag_r (line_tag_st1), .evict_tag_r(evict_tag_st1), - .evict_way (evict_way_st0), .evict_way_r(evict_way_st1) ); @@ -374,23 +408,12 @@ module VX_cache_bank #( .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_id_st1, mshr_pending_st1}) ); - // we have a tag hit - wire is_hit_st1 = (| tag_matches_st1); - if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin : g_req_uuid_st1_0 assign req_uuid_st1 = '0; end - wire is_read_st1 = is_creq_st1 && ~rw_st1; - wire is_write_st1 = is_creq_st1 && rw_st1; - - wire do_read_st1 = valid_st1 && is_read_st1; - wire do_write_st1 = valid_st1 && is_write_st1; - wire do_fill_st1 = valid_st1 && is_fill_st1; - wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; - assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 71a2ad00b2..b4c2db979c 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -52,6 +52,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -150,13 +153,14 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .REPL_POLICY (REPL_POLICY), .CRSQ_SIZE (CRSQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .MRSQ_SIZE (MRSQ_SIZE), .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (ARB_TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index 342a40a1bd..b75845ecab 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -73,4 +73,10 @@ `PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \ `PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1)) +/////////////////////////////////////////////////////////////////////////////// + +`define CS_REPL_RANDOM 0 +`define CS_REPL_CYCLIC 1 +`define CS_REPL_PLRU 2 + `endif // VX_CACHE_DEFINE_VH diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv new file mode 100644 index 0000000000..59c5deddb7 --- /dev/null +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -0,0 +1,200 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +// Fast PLRU encoder and decoder utility +// Adapted from BaseJump STL: http://bjump.org/data_out.html + +module plru_decoder #( + parameter NUM_WAYS = 1, + parameter WAY_IDX_BITS = $clog2(NUM_WAYS), + parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS) +) ( + input wire [WAY_IDX_WIDTH-1:0] way_idx, + input wire [`UP(NUM_WAYS-1)-1:0] lru_in, + output wire [`UP(NUM_WAYS-1)-1:0] lru_out +); + if (NUM_WAYS != 1) begin : g_plru_decoder + wire [`UP(NUM_WAYS-1)-1:0] data; + `IGNORE_UNOPTFLAT_BEGIN + wire [`UP(NUM_WAYS-1)-1:0] mask; + `IGNORE_UNOPTFLAT_END + for (genvar i = 0; i < NUM_WAYS-1; ++i) begin : g_i + if (i == 0) begin : g_i_0 + assign mask[i] = 1'b1; + end else if (i % 2 == 1) begin : g_i_odd + assign mask[i] = mask[(i-1)/2] & ~way_idx[WAY_IDX_BITS-$clog2(i+2)+1]; + end else begin : g_i_even + assign mask[i] = mask[(i-2)/2] & way_idx[WAY_IDX_BITS-$clog2(i+2)+1]; + end + assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)]; + end + assign lru_out = (data & mask) | (lru_in & ~mask); + end else begin : g_plru_decoder_1 + `UNUSED_VAR (way_idx) + `UNUSED_VAR (lru_in) + assign lru_out = '0; + end + +endmodule + +module plru_encoder #( + parameter NUM_WAYS = 1, + parameter WAY_IDX_BITS = $clog2(NUM_WAYS), + parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS) +) ( + input wire [`UP(NUM_WAYS-1)-1:0] lru_in, + output wire [WAY_IDX_WIDTH-1:0] way_idx +); + if (NUM_WAYS != 1) begin : g_plru_encoder + wire [WAY_IDX_WIDTH-1:0] tmp; + for (genvar i = 0; i < WAY_IDX_WIDTH; ++i) begin : g_i + if (i == 0) begin : g_i_0 + assign tmp[WAY_IDX_WIDTH-1] = lru_in[0]; + end else begin : g_i_n + assign tmp[WAY_IDX_WIDTH-1-i] = lru_in[((2**i)-1)+:(1 << i)][tmp[WAY_IDX_WIDTH-1-:i]]; + end + end + assign way_idx = tmp; + end else begin : g_plru_encoder_1 + `UNUSED_VAR (lru_in) + assign way_idx = '0; + end + +endmodule + +module VX_cache_repl #( + parameter CACHE_SIZE = 1024, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 64, + // Number of banks + parameter NUM_BANKS = 1, + // Number of associative ways + parameter NUM_WAYS = 1, + // replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC +) ( + input wire clk, + input wire reset, + input wire stall, + input wire hit_valid, + input wire [`CS_LINE_SEL_BITS-1:0] hit_line, + input wire [NUM_WAYS-1:0] hit_way, + input wire repl_valid, + input wire [`CS_LINE_SEL_BITS-1:0] repl_line, + output wire [NUM_WAYS-1:0] repl_way +); + `UNUSED_VAR (stall) + + localparam WAY_IDX_BITS = $clog2(NUM_WAYS); + localparam WAY_IDX_WIDTH = `UP(WAY_IDX_BITS); + + if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru + // Pseudo Least Recently Used replacement policy + localparam LRU_WIDTH = NUM_WAYS-1; + `UNUSED_VAR (repl_valid) + + reg [`CS_LINES_PER_BANK-1:0][`UP(LRU_WIDTH)-1:0] plru_tree; + + wire [WAY_IDX_WIDTH-1:0] repl_way_idx; + wire [WAY_IDX_WIDTH-1:0] hit_way_idx; + wire [`UP(LRU_WIDTH)-1:0] plru_update; + + always @(posedge clk) begin + if (reset) begin + plru_tree <= '0; + end else begin + if (hit_valid) begin + plru_tree[hit_line] <= plru_update; + end + end + end + + VX_onehot_encoder #( + .N (NUM_WAYS) + ) hit_way_enc ( + .data_in (hit_way), + .data_out (hit_way_idx), + `UNUSED_PIN (valid_out) + ); + + plru_decoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_dec ( + .way_idx (hit_way_idx), + .lru_in (plru_tree[hit_line]), + .lru_out (plru_update) + ); + + plru_encoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_enc ( + .lru_in (plru_tree[repl_line]), + .way_idx (repl_way_idx) + ); + + VX_decoder #( + .N (WAY_IDX_BITS) + ) repl_way_dec ( + .sel_in (repl_way_idx), + .data_in (1'b1), + .data_out (repl_way) + ); + + end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic + // Cyclic replacement policy + localparam CTR_WIDTH = $clog2(NUM_WAYS); + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + reg [`CS_LINES_PER_BANK-1:0][`UP(CTR_WIDTH)-1:0] counters; + always @(posedge clk) begin + if (reset) begin + counters <= '0; + end else if (repl_valid) begin + counters[repl_line] <= counters[repl_line] + 1; + end + end + VX_decoder #( + .N (WAY_IDX_BITS) + ) ctr_decoder ( + .sel_in (counters[repl_line]), + .data_in (1'b1), + .data_out (repl_way) + ); + end else begin : g_random + // Random replacement policy + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) + if (NUM_WAYS != 1) begin : g_repl_way + reg [NUM_WAYS-1:0] victim_way; + always @(posedge clk) begin + if (reset) begin + victim_way <= 1; + end else if (~stall) begin + victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; + end + end + assign repl_way = victim_way; + end else begin : g_repl_way_1 + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign repl_way = 1'b1; + end + end + +endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index b7a1957efe..8793420e10 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -37,12 +37,11 @@ module VX_cache_tags #( input wire fill, input wire lookup, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] flush_way, + input wire [NUM_WAYS-1:0] evict_way, // outputs output wire [NUM_WAYS-1:0] tag_matches_r, output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r, - output wire [NUM_WAYS-1:0] evict_way, output wire [NUM_WAYS-1:0] evict_way_r, output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r ); @@ -56,20 +55,9 @@ module VX_cache_tags #( wire [NUM_WAYS-1:0] read_valid; if (NUM_WAYS > 1) begin : g_evict_way - reg [NUM_WAYS-1:0] victim_way; - // cyclic assignment of replacement way - always @(posedge clk) begin - if (reset) begin - victim_way <= 1; - end else if (~stall) begin - victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; - end - end - assign evict_way = fill ? victim_way : flush_way; `BUFFER_EX(evict_way_r, evict_way, ~stall, 1); end else begin : g_evict_way_0 - `UNUSED_VAR (flush_way) - assign evict_way = 1'b1; + `UNUSED_VAR (evict_way) assign evict_way_r = 1'b1; end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index d958736c44..ca8c53edab 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -51,6 +51,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -169,13 +172,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .REPL_POLICY (REPL_POLICY), .CRSQ_SIZE (CRSQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .MRSQ_SIZE (MRSQ_SIZE), .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), From db98965f567e4a9ca4254d9f30598906f754ceb5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 02:27:07 -0700 Subject: [PATCH 286/407] minor update --- hw/rtl/libs/VX_dp_ram.sv | 468 ++++++++++++--------------------------- hw/rtl/libs/VX_sp_ram.sv | 2 - 2 files changed, 146 insertions(+), 324 deletions(-) diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index c278275528..595b3a42ad 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,7 +19,6 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter RADDR_REG = 0, parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, @@ -44,328 +43,174 @@ module VX_dp_ram #( localparam WSELW = DATAW / WRENW; `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) -`define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin : g_init \ - if (INIT_FILE != "") begin : g_file \ - initial $readmemh(INIT_FILE, ram); \ - end else begin : g_value \ - initial begin \ - for (integer i = 0; i < SIZE; ++i) \ - ram[i] = INIT_VALUE; \ - end \ - end \ +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`define RAM_WREN_BLOCK_ALTERA(__we__) \ + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we__) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + end + +`define RAM_WREN_BLOCK_XILINX(__we__) \ + reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we__) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + end + +`define RAM_WRITE_BLOCK(__we__) \ + reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we__) begin \ + ram[waddr] <= wdata; \ + end \ + end + +`define RAM_READ_BLOCK_OUT_REG(__re__) \ + always @(posedge clk) begin \ + if (__re__) begin \ + if (RESET_OUT && reset) begin \ + rdata_r <= INIT_VALUE; \ + end else begin \ + rdata_r <= ram[raddr]; \ + end \ + end \ end `UNUSED_PARAM (RW_ASSERT) `UNUSED_VAR (read) `UNUSED_VAR (wren) - if (OUT_REG && !READ_ENABLE) begin : g_out_reg - `UNUSED_PARAM (NO_RWCHECK) + if (OUT_REG) begin : g_out_reg reg [DATAW-1:0] rdata_r; - wire cs = read || write; - if (WRENW != 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + if (READ_ENABLE) begin : g_readen + if (WRENW != 1) begin : g_writeen + `ifdef QUARTUS + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read || write) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read || write) end - end else begin : g_no_lutram - reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + `else + // Not Quartus + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read || write) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read || write) end - end - `else - // Not Quartus - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end - end - end else begin : g_no_lutram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + `endif + end else begin : g_no_writeen + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read || write) + end else begin : g_no_lutram + `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read || write) end end - `endif - end else begin : g_no_writeen - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) - ram[waddr] <= wdata; - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + end else begin : g_no_readen + if (WRENW != 1) begin : g_writeen + `ifdef QUARTUS + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read) end - - end else begin : g_no_lutram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) - ram[waddr] <= wdata; - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + `else + // Not Quartus + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read) + end + `endif + end else begin : g_no_writeen + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read) + end else begin : g_no_lutram + `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read) end end end assign rdata = rdata_r; end else begin : g_no_out_reg - // OUT_REG==0 || READ_ENABLE=1 - wire [DATAW-1:0] rdata_w; - reg [ADDRW-1:0] raddr_reg; `ifdef SYNTHESIS if (WRENW > 1) begin : g_writeen `ifdef QUARTUS if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) + assign rdata = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_ALTERA(write) + assign rdata = ram[raddr]; end else begin : g_rwcheck - reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `RAM_WREN_BLOCK_ALTERA(write) + assign rdata = ram[raddr]; end end `else // default synthesis if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) + assign rdata = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_XILINX(write) + assign rdata = ram[raddr]; end else begin : g_rwcheck - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `RAM_WREN_BLOCK_XILINX(write) + assign rdata = ram[raddr]; end end `endif end else begin : g_no_writeen // (WRENW == 1) if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) + assign rdata = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `NO_RW_RAM_CHECK `RAM_WRITE_BLOCK(write) + assign rdata = ram[raddr]; end else begin : g_rwcheck - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `RAM_WRITE_BLOCK(write) + assign rdata = ram[raddr]; end end end @@ -389,54 +234,33 @@ module VX_dp_ram #( ram[waddr] <= ram_n; end end - if (read) begin - raddr_reg <= raddr; - end end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - `UNUSED_VAR (raddr_reg) - if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass - reg [DATAW-1:0] prev_data; - reg [ADDRW-1:0] prev_waddr; - reg prev_write; + if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; - always @(posedge clk) begin - if (reset) begin - prev_write <= 0; - prev_data <= '0; - prev_waddr <= '0; - end else begin - prev_write <= write; - prev_data <= ram[waddr]; - prev_waddr <= waddr; - end + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[waddr]; + prev_waddr <= waddr; end + end - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_assert - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) - end - end else begin : g_rdata_with_bypass - assign rdata_w = ram[raddr]; + assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin : g_rw_assert + `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) end + end else begin : g_rdata_with_bypass + assign rdata = ram[raddr]; end `endif - if (OUT_REG != 0) begin : g_rdata_req - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (READ_ENABLE && reset) begin - rdata_r <= INIT_VALUE; - end else if (!READ_ENABLE || read) begin - rdata_r <= rdata_w; - end - end - assign rdata = rdata_r; - end else begin : g_rdata_comb - assign rdata = rdata_w; - end end endmodule diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 7974cb6795..efce4b5f2f 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -26,7 +26,6 @@ module VX_sp_ram #( parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, - parameter RADDR_REG = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, parameter ADDRW = `LOG2UP(SIZE) @@ -45,7 +44,6 @@ module VX_sp_ram #( .SIZE (SIZE), .WRENW (WRENW), .OUT_REG (OUT_REG), - .RADDR_REG (RADDR_REG), .LUTRAM (LUTRAM), .NO_RWCHECK (NO_RWCHECK), .RW_ASSERT (RW_ASSERT), From 68b78fc42fed53e99415945777a6c1e5f6968124 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 02:32:17 -0700 Subject: [PATCH 287/407] minor update --- hw/rtl/libs/VX_dp_ram.sv | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 595b3a42ad..4220eca185 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -113,28 +113,28 @@ module VX_dp_ram #( `ifdef QUARTUS if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end else begin : g_no_lutram `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end `else // Not Quartus if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end else begin : g_no_lutram `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end `endif end else begin : g_no_writeen if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end else begin : g_no_lutram `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end end end else begin : g_no_readen @@ -142,28 +142,28 @@ module VX_dp_ram #( `ifdef QUARTUS if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end else begin : g_no_lutram `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end `else // Not Quartus if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end else begin : g_no_lutram `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end `endif end else begin : g_no_writeen if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end else begin : g_no_lutram `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end end end From 1d5e4f63dd7e9fada25ecf1a9b6d7e7c86b364a8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 03:24:02 -0700 Subject: [PATCH 288/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7c5ca1e409..9b55734e78 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -358,10 +358,10 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .stall (pipe_stall), - .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1), + .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1 && ~pipe_stall), .hit_line (line_idx_st1), .hit_way (tag_matches_st1), - .repl_valid (do_fill_st0), + .repl_valid (do_fill_st0 && ~pipe_stall), .repl_line (line_idx_st0), .repl_way (victim_way_st0) ); From e62b638d886d5df38be7871a5af59e1f800362a1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 10:36:05 -0700 Subject: [PATCH 289/407] minor update --- hw/rtl/cache/VX_cache_repl.sv | 34 ++++++++++++++++----------------- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 59c5deddb7..68f2e89d35 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -25,7 +25,7 @@ module plru_decoder #( input wire [`UP(NUM_WAYS-1)-1:0] lru_in, output wire [`UP(NUM_WAYS-1)-1:0] lru_out ); - if (NUM_WAYS != 1) begin : g_plru_decoder + if (NUM_WAYS > 1) begin : g_dec wire [`UP(NUM_WAYS-1)-1:0] data; `IGNORE_UNOPTFLAT_BEGIN wire [`UP(NUM_WAYS-1)-1:0] mask; @@ -41,7 +41,7 @@ module plru_decoder #( assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)]; end assign lru_out = (data & mask) | (lru_in & ~mask); - end else begin : g_plru_decoder_1 + end else begin : g_no_dec `UNUSED_VAR (way_idx) `UNUSED_VAR (lru_in) assign lru_out = '0; @@ -57,17 +57,19 @@ module plru_encoder #( input wire [`UP(NUM_WAYS-1)-1:0] lru_in, output wire [WAY_IDX_WIDTH-1:0] way_idx ); - if (NUM_WAYS != 1) begin : g_plru_encoder - wire [WAY_IDX_WIDTH-1:0] tmp; - for (genvar i = 0; i < WAY_IDX_WIDTH; ++i) begin : g_i - if (i == 0) begin : g_i_0 - assign tmp[WAY_IDX_WIDTH-1] = lru_in[0]; - end else begin : g_i_n - assign tmp[WAY_IDX_WIDTH-1-i] = lru_in[((2**i)-1)+:(1 << i)][tmp[WAY_IDX_WIDTH-1-:i]]; - end + if (NUM_WAYS > 1) begin : g_enc + wire [WAY_IDX_BITS-1:0] tmp; + for (genvar i = 0; i < WAY_IDX_BITS; ++i) begin : g_i + VX_mux #( + .N (2**i) + ) mux ( + .data_in (lru_in[((2**i)-1)+:(2**i)]), + .sel_in (tmp[WAY_IDX_BITS-1-:i]), + .data_out (tmp[WAY_IDX_BITS-1-i]) + ); end assign way_idx = tmp; - end else begin : g_plru_encoder_1 + end else begin : g_no_enc `UNUSED_VAR (lru_in) assign way_idx = '0; end @@ -105,7 +107,7 @@ module VX_cache_repl #( localparam LRU_WIDTH = NUM_WAYS-1; `UNUSED_VAR (repl_valid) - reg [`CS_LINES_PER_BANK-1:0][`UP(LRU_WIDTH)-1:0] plru_tree; + reg [`UP(LRU_WIDTH)-1:0] plru_tree [0:`CS_LINES_PER_BANK-1]; wire [WAY_IDX_WIDTH-1:0] repl_way_idx; wire [WAY_IDX_WIDTH-1:0] hit_way_idx; @@ -158,11 +160,9 @@ module VX_cache_repl #( `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) - reg [`CS_LINES_PER_BANK-1:0][`UP(CTR_WIDTH)-1:0] counters; + reg [`UP(CTR_WIDTH)-1:0] counters [0:`CS_LINES_PER_BANK-1]; always @(posedge clk) begin - if (reset) begin - counters <= '0; - end else if (repl_valid) begin + if (repl_valid) begin counters[repl_line] <= counters[repl_line] + 1; end end @@ -180,7 +180,7 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) - if (NUM_WAYS != 1) begin : g_repl_way + if (NUM_WAYS > 1) begin : g_repl_way reg [NUM_WAYS-1:0] victim_way; always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index abd68da241..2ff21655ab 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -473,7 +473,7 @@ module VX_mem_scheduler #( for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j - reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1]; wire rsp_wren = mem_rsp_fire_s && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); From 645befdce6ebe6b06438a8a0ea632da1f8860cdf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 11:23:29 -0700 Subject: [PATCH 290/407] minor update --- hw/rtl/cache/VX_cache_repl.sv | 100 +++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 39 deletions(-) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 68f2e89d35..aac0483fd5 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -21,9 +21,9 @@ module plru_decoder #( parameter WAY_IDX_BITS = $clog2(NUM_WAYS), parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS) ) ( - input wire [WAY_IDX_WIDTH-1:0] way_idx, - input wire [`UP(NUM_WAYS-1)-1:0] lru_in, - output wire [`UP(NUM_WAYS-1)-1:0] lru_out + input wire [WAY_IDX_WIDTH-1:0] way_idx, + output wire [`UP(NUM_WAYS-1)-1:0] lru_data, + output wire [`UP(NUM_WAYS-1)-1:0] lru_mask ); if (NUM_WAYS > 1) begin : g_dec wire [`UP(NUM_WAYS-1)-1:0] data; @@ -40,11 +40,12 @@ module plru_decoder #( end assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)]; end - assign lru_out = (data & mask) | (lru_in & ~mask); + assign lru_data = data; + assign lru_mask = mask; end else begin : g_no_dec `UNUSED_VAR (way_idx) - `UNUSED_VAR (lru_in) - assign lru_out = '0; + assign lru_data = '0; + assign lru_mask = '0; end endmodule @@ -60,13 +61,17 @@ module plru_encoder #( if (NUM_WAYS > 1) begin : g_enc wire [WAY_IDX_BITS-1:0] tmp; for (genvar i = 0; i < WAY_IDX_BITS; ++i) begin : g_i - VX_mux #( - .N (2**i) - ) mux ( - .data_in (lru_in[((2**i)-1)+:(2**i)]), - .sel_in (tmp[WAY_IDX_BITS-1-:i]), - .data_out (tmp[WAY_IDX_BITS-1-i]) - ); + if (i == 0) begin : g_i_0 + assign tmp[WAY_IDX_WIDTH-1] = lru_in[0]; + end else begin : g_i_n + VX_mux #( + .N (2**i) + ) mux ( + .data_in (lru_in[((2**i)-1)+:(2**i)]), + .sel_in (tmp[WAY_IDX_BITS-1-:i]), + .data_out (tmp[WAY_IDX_BITS-1-i]) + ); + end end assign way_idx = tmp; end else begin : g_no_enc @@ -104,24 +109,29 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy - localparam LRU_WIDTH = NUM_WAYS-1; - `UNUSED_VAR (repl_valid) - - reg [`UP(LRU_WIDTH)-1:0] plru_tree [0:`CS_LINES_PER_BANK-1]; + localparam LRU_WIDTH = `UP(NUM_WAYS-1); wire [WAY_IDX_WIDTH-1:0] repl_way_idx; wire [WAY_IDX_WIDTH-1:0] hit_way_idx; - wire [`UP(LRU_WIDTH)-1:0] plru_update; - - always @(posedge clk) begin - if (reset) begin - plru_tree <= '0; - end else begin - if (hit_valid) begin - plru_tree[hit_line] <= plru_update; - end - end - end + wire [LRU_WIDTH-1:0] plru_rdata; + wire [LRU_WIDTH-1:0] plru_wdata; + wire [LRU_WIDTH-1:0] plru_wmask; + + VX_dp_ram #( + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH) + ) plru_store ( + .clk (clk), + .reset (reset), + .read (repl_valid), + .write (hit_valid), + .wren (plru_wmask), + .waddr (hit_line), + .raddr (repl_line), + .wdata (plru_wdata), + .rdata (plru_rdata) + ); VX_onehot_encoder #( .N (NUM_WAYS) @@ -134,15 +144,15 @@ module VX_cache_repl #( plru_decoder #( .NUM_WAYS (NUM_WAYS) ) plru_dec ( - .way_idx (hit_way_idx), - .lru_in (plru_tree[hit_line]), - .lru_out (plru_update) + .way_idx (hit_way_idx), + .lru_data (plru_wdata), + .lru_mask (plru_wmask) ); plru_encoder #( .NUM_WAYS (NUM_WAYS) ) plru_enc ( - .lru_in (plru_tree[repl_line]), + .lru_in (plru_rdata), .way_idx (repl_way_idx) ); @@ -160,16 +170,28 @@ module VX_cache_repl #( `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) - reg [`UP(CTR_WIDTH)-1:0] counters [0:`CS_LINES_PER_BANK-1]; - always @(posedge clk) begin - if (repl_valid) begin - counters[repl_line] <= counters[repl_line] + 1; - end - end + + wire [`UP(CTR_WIDTH)-1:0] ctr_rdata; + wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1; + + VX_sp_ram #( + .DATAW (`UP(CTR_WIDTH)), + .SIZE (`CS_LINES_PER_BANK) + ) ctr_store ( + .clk (clk), + .reset (reset), + .read (repl_valid), + .write (repl_valid), + .wren (1'b1), + .addr (repl_line), + .wdata (ctr_wdata), + .rdata (ctr_rdata) + ); + VX_decoder #( .N (WAY_IDX_BITS) ) ctr_decoder ( - .sel_in (counters[repl_line]), + .sel_in (ctr_rdata), .data_in (1'b1), .data_out (repl_way) ); From e06333b3c0f7670cf502d094f7dd3bcf44535f45 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 11:28:33 -0700 Subject: [PATCH 291/407] minor update --- ci/regression.sh.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 662b40717a..390fd1459a 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -153,6 +153,11 @@ cache() CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx + # replacement policy + CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DDCACHE_REPL_POLICY=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DDCACHE_REPL_POLICY=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + # test writeback CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress From f695e4d75447f9abd7820404bc3ac6a334ef0a11 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 14:59:31 -0700 Subject: [PATCH 292/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 9b55734e78..942f357408 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -224,25 +224,26 @@ module VX_cache_bank #( wire creq_enable = creq_grant && core_req_valid; assign replay_ready = replay_grant + && ~(!WRITEBACK && replay_rw && mreq_queue_alm_full) // needed for writethrough && ~pipe_stall; assign mem_rsp_ready = fill_grant - && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions + && ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback && ~pipe_stall; assign flush_ready = flush_grant - && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions + && ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback && ~pipe_stall; assign core_req_ready = creq_grant - && ~mreq_queue_alm_full - && ~mshr_alm_full + && ~mreq_queue_alm_full // needed for fill requests + && ~mshr_alm_full // needed for mshr allocation && ~pipe_stall; wire init_fire = init_valid; wire replay_fire = replay_valid && replay_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; - wire flush_fire = flush_valid && flush_ready; + wire flush_fire = flush_valid && flush_ready; wire core_req_fire = core_req_valid && core_req_ready; wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0]; @@ -266,15 +267,14 @@ module VX_cache_bank #( assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign rw_sel = replay_valid ? replay_rw : core_req_rw; assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; + assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : + (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) : (replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag)); assign flags_sel = core_req_valid ? core_req_flags : '0; - assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : - (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin : g_data_sel for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i if (i < `CS_WORD_WIDTH) begin : g_lo @@ -417,7 +417,7 @@ module VX_cache_bank #( assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit - `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) + `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time)) if (WRITE_ENABLE) begin : g_rdw_hazard // This implementation uses single-port BRAMs for the tags and data stores. @@ -503,6 +503,7 @@ module VX_cache_bank #( .evict_byteen(evict_byteen_st1) ); + // only allocate MSHR entries for non-replay core requests wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~is_replay_st0; wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~is_replay_st1; @@ -636,6 +637,8 @@ module VX_cache_bank #( wire has_dirty_bytes = (| evict_byteen_st1); `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end + // issue a fill request on a read/write miss + // issue a writeback on a dirty line eviction assign mreq_queue_push = (((do_read_st1 || do_write_st1) && ~is_hit_st1 && ~mshr_pending_st1) || do_writeback_st1) && ~pipe_stall; @@ -653,6 +656,8 @@ module VX_cache_bank #( .data_in (byteen_st1), .data_out (line_byteen) ); + // issue a fill request on a read miss + // issue a memory write on a write request assign mreq_queue_push = ((do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) || do_write_st1) && ~pipe_stall; @@ -667,6 +672,7 @@ module VX_cache_bank #( `UNUSED_VAR (evict_byteen_st1) end end else begin : g_mreq_queue_ro + // issue a fill request on a read miss assign mreq_queue_push = (do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) && ~pipe_stall; assign mreq_queue_addr = addr_st1; From a7ba377581aaaa9fb08e228fd87db045ec084575 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 16 Oct 2024 18:04:11 -0700 Subject: [PATCH 293/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 20 ++++++++++++-------- hw/rtl/cache/VX_cache_data.sv | 5 +++-- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- sim/rtlsim/processor.cpp | 2 -- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 942f357408..3b6b3d0766 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -169,7 +169,7 @@ module VX_cache_bank #( wire is_replay_st0, is_replay_st1; wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire mshr_pending_st0, mshr_pending_st1; - wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_id_st0, mshr_prev_id_st1; + wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1; wire mshr_empty; wire flush_valid; @@ -404,8 +404,8 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_id_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_id_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 @@ -426,10 +426,13 @@ module VX_cache_bank #( // Data fill/flush can perform read and write in the same stage, since way_idx is available in st0. // A data read should happen in st0 for its result to be available in st1. // A data write should happen in st1 when the tag hit status is available. + // The r/w hazard is needed for consecutive writes since they both wonly write in st1. + // The r/w hazard is also not needed for next writethrough fill/flush to the same line. + // For reads or writeback fill/flush to the same line, we sill need the hazard + // because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic. wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; - wire is_read_sel = is_creq_sel && !rw_sel; wire is_write_sel = is_creq_sel && rw_sel; - wire is_same_read_sel = is_read_sel && (line_idx_sel == line_idx_st0); + wire is_same_line = (line_idx_sel == line_idx_st0); always @(posedge clk) begin if (reset) begin post_hazard <= 0; @@ -437,7 +440,8 @@ module VX_cache_bank #( end else begin if (!crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || is_same_read_sel || (is_flush_sel && !WRITEBACK)); + rdw_hazard <= do_write_st0 && valid_sel + && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); end end end @@ -575,7 +579,7 @@ module VX_cache_bank #( .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_pending(mshr_pending_st0), - .allocate_previd(mshr_prev_id_st0), + .allocate_previd(mshr_previd_st0), `UNUSED_PIN (allocate_ready), // finalize @@ -583,7 +587,7 @@ module VX_cache_bank #( .finalize_is_release(mshr_release_st1), .finalize_is_pending(mshr_pending_st1), .finalize_id (mshr_id_st1), - .finalize_previd(mshr_prev_id_st1) + .finalize_previd(mshr_previd_st1) ); // schedule core response diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 278caccd5f..ebce2109d7 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -141,8 +141,9 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j wire word_en = (WORD_SIZE == 1) || (word_idx == j); - assign line_wdata[j] = write ? write_data : fill_data[j]; - assign wren_w[j] = write ? (write_byteen & {WORD_SIZE{word_en}}) : {WORD_SIZE{1'b1}}; + // warning: should prioritize the fill over write to handle the case where both are asserted + assign line_wdata[j] = fill ? fill_data[j] : write_data; + assign wren_w[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); end assign line_wren = wren_w; assign line_write = (fill && ((NUM_WAYS == 1) || evict_way[i])) diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 84bcc00722..5da33cbba9 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -52,7 +52,7 @@ module VX_issue import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (`ISSUE_WIDTH); - for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices + for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_slices VX_decode_if #( .NUM_WARPS (PER_ISSUE_WARPS) ) per_issue_decode_if(); diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 6e9e2081c7..674ca2686e 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -52,7 +52,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_slices VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 32f4b4e1ea..1807e56307 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -241,8 +241,6 @@ class Processor::Impl { #ifdef VCD_OUTPUT if (sim_trace_enabled()) { tfp_->dump(timestamp); - } else { - exit(-1); } #endif ++timestamp; From 5971158f434872f59a0b28225d3255b0f4f1f528 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 16 Oct 2024 20:22:42 -0700 Subject: [PATCH 294/407] minor update --- ci/regression.sh.in | 1 + hw/rtl/cache/VX_cache_bank.sv | 61 ++++++++++---------- hw/rtl/cache/VX_cache_data.sv | 102 +++++++++++++++++++--------------- 3 files changed, 89 insertions(+), 75 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 390fd1459a..c3abb43df0 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -142,6 +142,7 @@ cache() CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx # test cache ways + CONFIGS="-DICACHE_NUM_WAYS=1 -DDCACHE_NUM_WAYS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 3b6b3d0766..ad9ad588a1 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -393,7 +393,6 @@ module VX_cache_bank #( .evict_way_r(evict_way_st1) ); - wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; @@ -440,8 +439,7 @@ module VX_cache_bank #( end else begin if (!crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel - && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); + rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); end end end @@ -737,19 +735,24 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) + `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, + rsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) end if (mem_rsp_fire) begin - `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) + `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) end if (replay_fire) begin - `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin if (core_req_rw) begin - `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) + `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) end else begin - `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end end if (do_init_st0) begin @@ -764,45 +767,43 @@ module VX_cache_bank #( `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && ~pipe_stall) begin - if (is_hit_st1) begin - `TRACE(3, ("%t: %s tags-rd-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) - end else begin - `TRACE(3, ("%t: %s tags-rd-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) - end + `TRACE(3, ("%t: %s tags-read: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) end if (do_write_st1 && ~pipe_stall) begin - if (is_hit_st1) begin - `TRACE(3, ("%t: %s tags-wr-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) - end else begin - `TRACE(3, ("%t: %s tags-wr-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) - end + `TRACE(3, ("%t: %s tags-write: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) end if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) end if (crsp_queue_fire) begin - `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin if (!WRITEBACK && do_write_st1) begin - `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else if (WRITEBACK && do_writeback_st1) begin - `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else begin - `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) + `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) end end end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index ebce2109d7..dc07af1edb 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -56,7 +56,7 @@ module VX_cache_data #( `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; + localparam BYTEENW = (WRITE_ENABLE != 0 || NUM_WAYS != 1) ? (LINE_SIZE * NUM_WAYS) : 1; wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; @@ -125,59 +125,71 @@ module VX_cache_data #( end else begin : g_no_writeback `UNUSED_VAR (init) + `UNUSED_VAR (flush) assign line_dirty = 0; assign evict_data = '0; assign evict_byteen = '0; end - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store - - wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; - wire line_write; - wire line_read; - - if (WRITE_ENABLE != 0) begin : g_line_data - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j - wire word_en = (WORD_SIZE == 1) || (word_idx == j); - // warning: should prioritize the fill over write to handle the case where both are asserted - assign line_wdata[j] = fill ? fill_data[j] : write_data; - assign wren_w[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [BYTEENW-1:0] line_wren; + wire line_write; + wire line_read; + + if (BYTEENW != 1) begin : g_wdata + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren_w; + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways + wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; + if (WRITE_ENABLE != 0) begin : g_we + wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] word_wdata; + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] word_wren; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words + wire word_en = (WORD_SIZE == 1) || (word_idx == j); + // warning: should prioritize the fill over write in case both are asserted + assign word_wdata[j] = fill ? fill_data[j] : write_data; + assign word_wren[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); + end + wire way_en = fill ? fill_way_en : tag_matches[i]; + assign line_wdata[i] = word_wdata; + assign line_wren_w[i] = word_wren & {LINE_SIZE{way_en}}; + end else begin : g_ro + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_data) + `UNUSED_VAR (word_idx) + assign line_wdata[i] = fill_data; + assign line_wren_w[i] = {LINE_SIZE{fill_way_en}}; end - assign line_wren = wren_w; - assign line_write = (fill && ((NUM_WAYS == 1) || evict_way[i])) - || (write && tag_matches[i]); - assign line_read = read || ((fill || flush) && WRITEBACK); - end else begin : g_line_data_ro - `UNUSED_VAR (write) - `UNUSED_VAR (flush) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) - `UNUSED_VAR (word_idx) - assign line_wdata = fill_data; - assign line_wren = 1'b1; - assign line_write = fill && ((NUM_WAYS == 1) || evict_way[i]); - assign line_read = read; end - - VX_sp_ram #( - .DATAW (`CS_LINE_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), - .OUT_REG (1) - ) data_store ( - .clk (clk), - .reset (reset), - .read (line_read), - .write (line_write), - .wren (line_wren), - .addr (line_idx), - .wdata (line_wdata), - .rdata (line_rdata[i]) - ); + assign line_wren = line_wren_w; + end else begin : g_ro_1w_wdata + `UNUSED_VAR (write) + `UNUSED_VAR (evict_way) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_data) + assign line_wdata = fill_data; + assign line_wren = 1'b1; end + assign line_write = fill || (write && WRITE_ENABLE); + assign line_read = read || ((fill || flush) && WRITEBACK); + + VX_sp_ram #( + .DATAW (NUM_WAYS * `CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (BYTEENW), + .OUT_REG (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (line_read), + .write (line_write), + .wren (line_wren), + .addr (line_idx), + .wdata (line_wdata), + .rdata (line_rdata) + ); + wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; VX_onehot_encoder #( .N (NUM_WAYS) From 077b682d7d649dcd51a41a41da488c12d83d3842 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 17 Oct 2024 04:58:29 -0700 Subject: [PATCH 295/407] minor update --- hw/rtl/Vortex.sv | 6 ++-- hw/rtl/cache/VX_cache_bank.sv | 41 +++++++++++------------- hw/rtl/cache/VX_cache_data.sv | 44 ++++++++++++------------- hw/rtl/cache/VX_cache_wrap.sv | 12 +++---- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/core/VX_dcr_data.sv | 4 +-- hw/rtl/core/VX_lsu_slice.sv | 32 +++++++++---------- hw/rtl/core/VX_scoreboard.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 40 +++++++++++------------ hw/rtl/libs/VX_mem_scheduler.sv | 48 ++++++++++++++-------------- hw/rtl/mem/VX_gbar_unit.sv | 4 +-- hw/rtl/mem/VX_local_mem.sv | 6 ++-- tests/regression/dogfood/testcases.h | 48 ++++++++++++++-------------- 13 files changed, 143 insertions(+), 146 deletions(-) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 0fa3ce31fd..bae697c65d 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -204,13 +204,13 @@ module Vortex import VX_gpu_pkg::*; ( always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid)) + `TRACE(2, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid)) end else begin - `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid)) + `TRACE(2, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid)) + `TRACE(2, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index ad9ad588a1..0e16e6c658 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -161,7 +161,7 @@ module VX_cache_bank #( wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; - wire [`CS_WORD_WIDTH-1:0] write_data_st0, write_data_st1; + wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1; wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; @@ -335,8 +335,7 @@ module VX_cache_bank #( wire do_fill_st1 = valid_st1 && is_fill_st1; wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; - assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; - + assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; @@ -346,6 +345,8 @@ module VX_cache_bank #( wire do_lookup_st0 = do_read_st0 || do_write_st0; + wire do_lookup_st1 = do_read_st1 || do_write_st1; + reg [NUM_WAYS-1:0] victim_way_st0; VX_cache_repl #( @@ -358,7 +359,7 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .stall (pipe_stall), - .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1 && ~pipe_stall), + .hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall), .hit_line (line_idx_st1), .hit_way (tag_matches_st1), .repl_valid (do_fill_st0 && ~pipe_stall), @@ -437,9 +438,9 @@ module VX_cache_bank #( post_hazard <= 0; rdw_hazard <= 0; end else begin - if (!crsp_queue_stall) begin + if (~crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); + rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (/*is_fill_sel ||*/is_flush_sel))); end end end @@ -448,7 +449,7 @@ module VX_cache_bank #( assign post_hazard = 0; end - assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; + assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (data_st1) wire [`CS_LINE_WIDTH-1:0] evict_data_st1; @@ -463,7 +464,7 @@ module VX_cache_bank #( // data writes should happen in st1 when the tag hit is available, // and use line_idx_st1 to ensure the correct line is updated. // if a rdw hazard is active due to conflict, ensure we don't write twice. - assign data_write = do_write_st1 && !post_hazard && ~crsp_queue_stall; + assign data_write = do_write_st1 && ~post_hazard && ~crsp_queue_stall; assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0; end else begin : g_data_ctrl_ro `UNUSED_VAR (post_hazard) @@ -495,7 +496,7 @@ module VX_cache_bank #( .tag_matches(tag_matches_st1), .line_idx (data_line_idx), .fill_data (data_st0), - .write_data (write_data_st1), + .write_word (write_word_st1), .word_idx (word_idx_st1), .write_byteen(byteen_st1), // outputs @@ -574,7 +575,7 @@ module VX_cache_bank #( .allocate_valid (mshr_allocate_st0 && ~pipe_stall), .allocate_addr (addr_st0), .allocate_rw (rw_st0), - .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), + .allocate_data ({word_idx_st0, byteen_st0, write_word_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_pending(mshr_pending_st0), .allocate_previd(mshr_previd_st0), @@ -641,7 +642,7 @@ module VX_cache_bank #( end // issue a fill request on a read/write miss // issue a writeback on a dirty line eviction - assign mreq_queue_push = (((do_read_st1 || do_write_st1) && ~is_hit_st1 && ~mshr_pending_st1) + assign mreq_queue_push = ((do_lookup_st1 && ~is_hit_st1 && ~mshr_pending_st1) || do_writeback_st1) && ~pipe_stall; assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1; @@ -665,7 +666,7 @@ module VX_cache_bank #( && ~pipe_stall; assign mreq_queue_addr = addr_st1; assign mreq_queue_rw = rw_st1; - assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}}; + assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_word_st1}}; assign mreq_queue_byteen = rw_st1 ? line_byteen : '1; `UNUSED_VAR (is_fill_or_flush_st1) `UNUSED_VAR (do_writeback_st1) @@ -735,8 +736,8 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, - rsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) + `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, + crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) end if (mem_rsp_fire) begin `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, @@ -766,13 +767,9 @@ module VX_cache_bank #( `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end - if (do_read_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-read: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) - end - if (do_write_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-write: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) + if (do_lookup_st1 && ~pipe_stall) begin + `TRACE(3, ("%t: %s tags-Lookup: addr=0x%0h, rw=%b, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) end if (do_fill_st0 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, @@ -788,7 +785,7 @@ module VX_cache_bank #( end if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) end if (crsp_queue_fire) begin `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index dc07af1edb..7b5f1c5525 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -44,7 +44,7 @@ module VX_cache_data #( input wire [NUM_WAYS-1:0] evict_way, input wire [NUM_WAYS-1:0] tag_matches, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, - input wire [`CS_WORD_WIDTH-1:0] write_data, + input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, // outputs @@ -79,26 +79,23 @@ module VX_cache_data #( wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata + wire evict_way_en = (NUM_WAYS == 1) || evict_way[i]; wire dirty_data = write; // only asserted on writes - wire dirty_wren = init || (write ? tag_matches[i] : evict_way[i]); - + wire dirty_wren = init || (write ? tag_matches[i] : evict_way_en); if (DIRTY_BYTES != 0) begin : g_dirty_bytes wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_data; wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j - wire word_sel = tag_matches[i] && ((WORD_SIZE == 1) || (word_idx == j)); - wire [WORD_SIZE-1:0] word_en = write_byteen & {WORD_SIZE{word_sel}}; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words + wire word_en = ((`CS_WORDS_PER_LINE == 1) || (word_idx == j)); + wire [WORD_SIZE-1:0] write_mask = write_byteen & {WORD_SIZE{word_en && tag_matches[i]}}; assign bytes_data[j] = {WORD_SIZE{write}}; // only asserted on writes - assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? word_en : {WORD_SIZE{evict_way[i]}}); + assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? write_mask : {WORD_SIZE{evict_way_en}}); end assign byteen_wdata[i] = {dirty_data, bytes_data}; assign byteen_wren[i] = {dirty_wren, bytes_wren}; - assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; end else begin : g_no_dirty_bytes assign byteen_wdata[i] = dirty_data; assign byteen_wren[i] = dirty_wren; - assign line_dirty = byteen_rdata[evict_way_idx_r]; - assign evict_byteen = '1; end end @@ -121,6 +118,13 @@ module VX_cache_data #( .rdata (byteen_rdata) ); + if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen + assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; + end else begin : g_line_dirty + assign line_dirty = byteen_rdata[evict_way_idx_r]; + assign evict_byteen = '1; + end + assign evict_data = line_rdata[evict_way_idx_r]; end else begin : g_no_writeback @@ -141,21 +145,17 @@ module VX_cache_data #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; if (WRITE_ENABLE != 0) begin : g_we - wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] word_wdata; - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] word_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words - wire word_en = (WORD_SIZE == 1) || (word_idx == j); - // warning: should prioritize the fill over write in case both are asserted - assign word_wdata[j] = fill ? fill_data[j] : write_data; - assign word_wren[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_wren; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_wren + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_wren[j] = write_byteen & {WORD_SIZE{word_en}}; end - wire way_en = fill ? fill_way_en : tag_matches[i]; - assign line_wdata[i] = word_wdata; - assign line_wren_w[i] = word_wren & {LINE_SIZE{way_en}}; + assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren_w[i] = fill ? {LINE_SIZE{fill_way_en}} : (write_wren & {LINE_SIZE{tag_matches[i]}}); end else begin : g_ro `UNUSED_VAR (write) `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) + `UNUSED_VAR (write_word) `UNUSED_VAR (word_idx) assign line_wdata[i] = fill_data; assign line_wren_w[i] = {LINE_SIZE{fill_way_en}}; @@ -166,7 +166,7 @@ module VX_cache_data #( `UNUSED_VAR (write) `UNUSED_VAR (evict_way) `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) + `UNUSED_VAR (write_word) assign line_wdata = fill_data; assign line_wren = 1'b1; end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index ca8c53edab..a9b872dd05 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -240,13 +240,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) begin - `TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) + `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) end else begin - `TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) + `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) end end if (core_rsp_fire) begin - `TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) + `TRACE(2, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) end end end @@ -268,15 +268,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin if (mem_bus_if.req_data.rw) begin - `TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) end else begin - `TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)) end end diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 53c7ae57aa..8e43d8f3f4 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -194,7 +194,7 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (br_enable) begin - `TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)) end end diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 042c87e552..6a13e034a6 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (dcr_bus_if.write_valid) begin - `TRACE(1, ("%t: base-dcr: state=", $time)) + `TRACE(2, ("%t: base-dcr: state=", $time)) trace_base_dcr(1, dcr_bus_if.write_addr); - `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)) + `TRACE(2, (", data=0x%h\n", dcr_bus_if.write_data)) end end `endif diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 1f39ab5a75..67fc3eaa89 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -504,30 +504,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID)) + `TRACE(2, ("%t: *** %s fence wait\n", $time, INSTANCE_ID)) end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) - `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)) - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES) - `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) + `TRACE(2, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES) + `TRACE(2, (", byteen=0x%0h, data=", mem_req_byteen)) + `TRACE_ARRAY1D(2, "0x%0h", mem_req_data, NUM_LANES) + `TRACE(2, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end else begin - `TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) - `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) + `TRACE(2, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES) + `TRACE(2, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", + `TRACE(2, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)) - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES) - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) + `TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data, NUM_LANES) + `TRACE(2, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) end end `endif diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 1fe9a7f44d..5b01cc5504 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -206,7 +206,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end else begin if (staging_if[w].valid && ~staging_if[w].ready) begin `ifdef DBG_TRACE_PIPELINE - `TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", + `TRACE(4, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, operands_busy, staging_if[w].data.uuid)) `endif diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 760290a1c6..19a7040951 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -351,30 +351,30 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) - `TRACE(1, (", byteen=")) - `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS) - `TRACE(1, (", data=")) - `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS) + `TRACE(2, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS) + `TRACE(2, (", byteen=")) + `TRACE_ARRAY1D(2, "0x%h", out_req_byteen, OUT_REQS) + `TRACE(2, (", data=")) + `TRACE_ARRAY1D(2, "0x%0h", out_req_data, OUT_REQS) end else begin - `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) + `TRACE(2, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS) end - `TRACE(1, (", offset=")) - `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS) - `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) + `TRACE(2, (", offset=")) + `TRACE_ARRAY1D(2, "%0d", out_req_offset, NUM_REQS) + `TRACE(2, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) end if (out_rsp_fire) begin - `TRACE(1, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) - `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS) - `TRACE(1, (", offset=")) - `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS) - `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)) + `TRACE(2, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) + `TRACE_ARRAY1D(2, "0x%0h", out_rsp_data, OUT_REQS) + `TRACE(2, (", offset=")) + `TRACE_ARRAY1D(2, "%0d", ibuf_dout_offset, NUM_REQS) + `TRACE(2, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 2ff21655ab..523257eb4c 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -593,41 +593,41 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) - `TRACE(1, (", byteen=")) - `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS) - `TRACE(1, (", data=")) - `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS) + `TRACE(2, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS) + `TRACE(2, (", byteen=")) + `TRACE_ARRAY1D(2, "0x%h", core_req_byteen, CORE_REQS) + `TRACE(2, (", data=")) + `TRACE_ARRAY1D(2, "0x%0h", core_req_data, CORE_REQS) end else begin - `TRACE(1, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) + `TRACE(2, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS) end - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) + `TRACE(2, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) - `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS) - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) + `TRACE(2, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) + `TRACE_ARRAY1D(2, "0x%0h", core_rsp_data, CORE_REQS) + `TRACE(2, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) - `TRACE(1, (", byteen=")) - `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS) - `TRACE(1, (", data=")) - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS) + `TRACE(2, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS) + `TRACE(2, (", byteen=")) + `TRACE_ARRAY1D(2, "0x%h", mem_req_byteen_s, CORE_CHANNELS) + `TRACE(2, (", data=")) + `TRACE_ARRAY1D(2, "0x%0h", mem_req_data_s, CORE_CHANNELS) end else begin - `TRACE(1, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) + `TRACE(2, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS) end - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) + `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end if (mem_rsp_fire_s) begin - `TRACE(1, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) + `TRACE(2, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) + `TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) + `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) end end `endif diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index c9707748fe..ac4c09349b 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -60,11 +60,11 @@ module VX_gbar_unit #( `ifdef DBG_TRACE_GBAR always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin - `TRACE(1, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", + `TRACE(2, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)) end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) + `TRACE(2, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) end end `endif diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 2ba66347e7..03c4acdd19 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -330,15 +330,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])) end else begin - `TRACE(1, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])) end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin - `TRACE(1, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])) end end diff --git a/tests/regression/dogfood/testcases.h b/tests/regression/dogfood/testcases.h index f5760ec06e..f3562bb179 100644 --- a/tests/regression/dogfood/testcases.h +++ b/tests/regression/dogfood/testcases.h @@ -141,7 +141,7 @@ class Test_IADD : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] + b[i]; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -171,7 +171,7 @@ class Test_IMUL : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i]; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -201,7 +201,7 @@ class Test_IDIV : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] / b[i]; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -233,7 +233,7 @@ class Test_IDIV_MUL : public ITestCase { auto y = a[i] * b[i]; auto ref = x + y; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -263,7 +263,7 @@ class Test_FADD : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] + b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -293,7 +293,7 @@ class Test_FSUB : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] - b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -323,7 +323,7 @@ class Test_FMUL : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -353,7 +353,7 @@ class Test_FMADD : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i] + b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -383,7 +383,7 @@ class Test_FMSUB : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i] - b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -413,7 +413,7 @@ class Test_FNMADD : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = -a[i] * b[i] - b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -443,7 +443,7 @@ class Test_FNMSUB : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = -a[i] * b[i] + b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -475,7 +475,7 @@ class Test_FNMADD_MADD : public ITestCase { auto y = a[i] * b[i] + b[i]; auto ref = x + y; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -505,7 +505,7 @@ class Test_FDIV : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] / b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -537,7 +537,7 @@ class Test_FDIV2 : public ITestCase { auto y = b[i] / a[i]; auto ref = x + y; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -568,7 +568,7 @@ class Test_FSQRT : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = sqrt(a[i] * b[i]); if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -600,7 +600,7 @@ class Test_FTOI : public ITestCase { auto x = a[i] + b[i]; auto ref = (int32_t)x; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -632,7 +632,7 @@ class Test_FTOU : public ITestCase { auto x = a[i] + b[i]; auto ref = (uint32_t)x; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -663,7 +663,7 @@ class Test_ITOF : public ITestCase { auto x = a[i] + b[i]; auto ref = (float)x; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -694,7 +694,7 @@ class Test_UTOF : public ITestCase { auto x = a[i] + b[i]; auto ref = (float)x; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -724,7 +724,7 @@ class Test_FCLAMP : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = fmin(fmax(1.0f, a[i]), b[i]); if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -754,7 +754,7 @@ class Test_ICLAMP : public ITestCase { for (uint32_t i = 0; i < n; ++i) { auto ref = std::min(std::max(1, a[i]), b[i]); if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -787,7 +787,7 @@ class Test_TRIGO : public ITestCase { ref = sinf(ref); } if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -820,7 +820,7 @@ class Test_BAR : public ITestCase { for (uint32_t i = 0; i < n; ++i) { uint32_t ref = a[i] + 1; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; ++errors; } } @@ -857,7 +857,7 @@ class Test_GBAR : public ITestCase { for (uint32_t i = 0; i < n; ++i) { uint32_t ref = a[i] + 1; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; ++errors; } } From 91fee5da1154aa891362e864dd1c79ee9cd67a32 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 17 Oct 2024 11:25:17 -0700 Subject: [PATCH 296/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 0e16e6c658..9f0575328a 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -440,7 +440,7 @@ module VX_cache_bank #( end else begin if (~crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (/*is_fill_sel ||*/is_flush_sel))); + rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); end end end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 7b5f1c5525..aeb4a11b39 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -79,18 +79,20 @@ module VX_cache_data #( wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata + wire evict = fill || flush; wire evict_way_en = (NUM_WAYS == 1) || evict_way[i]; wire dirty_data = write; // only asserted on writes - wire dirty_wren = init || (write ? tag_matches[i] : evict_way_en); + wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]); if (DIRTY_BYTES != 0) begin : g_dirty_bytes - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_data; - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words - wire word_en = ((`CS_WORDS_PER_LINE == 1) || (word_idx == j)); - wire [WORD_SIZE-1:0] write_mask = write_byteen & {WORD_SIZE{word_en && tag_matches[i]}}; - assign bytes_data[j] = {WORD_SIZE{write}}; // only asserted on writes - assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? write_mask : {WORD_SIZE{evict_way_en}}); + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end + wire [LINE_SIZE-1:0] bytes_data = {LINE_SIZE{write}}; // only asserted on writes + wire [LINE_SIZE-1:0] bytes_wren = {LINE_SIZE{init}} + | {LINE_SIZE{evict && evict_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); assign byteen_wdata[i] = {dirty_data, bytes_data}; assign byteen_wren[i] = {dirty_wren, bytes_wren}; end else begin : g_no_dirty_bytes @@ -145,13 +147,14 @@ module VX_cache_data #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; if (WRITE_ENABLE != 0) begin : g_we - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_wren + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); - assign write_wren[j] = write_byteen & {WORD_SIZE{word_en}}; + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end - assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; - assign line_wren_w[i] = fill ? {LINE_SIZE{fill_way_en}} : (write_wren & {LINE_SIZE{tag_matches[i]}}); + assign line_wdata[i] = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren_w[i] = {LINE_SIZE{fill && fill_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end else begin : g_ro `UNUSED_VAR (write) `UNUSED_VAR (write_byteen) From 6b1091e08f78d152e6fb560b350f82f60fedd002 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 17 Oct 2024 14:07:22 -0700 Subject: [PATCH 297/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 +++- hw/rtl/cache/VX_cache_mshr.sv | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 9f0575328a..a24a07ee9d 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -616,7 +616,9 @@ module VX_cache_bank #( .ready_out (core_rsp_ready) ); - assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready; + // we use 'do_read_st1' instead 'crsp_queue_valid' + // to remove costly 'is_hit_st1' signal from critical paths. + assign crsp_queue_stall = do_read_st1 && ~crsp_queue_ready; // schedule memory request diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index ae6ebb7feb..17546ba2ad 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -101,8 +101,8 @@ module VX_cache_mshr #( ); `UNUSED_PARAM (BANK_ID) - reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; - reg [MSHR_ADDR_WIDTH-1:0] next_index [MSHR_SIZE-1:0]; + reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [0:MSHR_SIZE-1]; + reg [MSHR_ADDR_WIDTH-1:0] next_index [0:MSHR_SIZE-1]; reg [MSHR_SIZE-1:0] valid_table, valid_table_n; reg [MSHR_SIZE-1:0] next_table, next_table_x, next_table_n; From 8f29ad58aeaa2525b6f9ddbfbe404fb7e9ede7ab Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 18 Oct 2024 23:54:20 -0700 Subject: [PATCH 298/407] block ram redesign to support synthesizable write-first mode --- hw/rtl/VX_platform.vh | 3 + hw/rtl/cache/VX_cache.sv | 20 +- hw/rtl/cache/VX_cache_bank.sv | 4 +- hw/rtl/cache/VX_cache_bypass.sv | 2 +- hw/rtl/cache/VX_cache_cluster.sv | 18 +- hw/rtl/cache/VX_cache_data.sv | 93 ++++----- hw/rtl/cache/VX_cache_repl.sv | 29 ++- hw/rtl/cache/VX_cache_top.sv | 12 +- hw/rtl/cache/VX_cache_wrap.sv | 18 +- hw/rtl/core/VX_operands.sv | 4 +- hw/rtl/libs/VX_dp_ram.sv | 315 +++++++++++++++---------------- hw/rtl/libs/VX_fifo_queue.sv | 43 +---- hw/rtl/libs/VX_scope_tap.sv | 8 +- hw/rtl/libs/VX_sp_ram.sv | 4 +- hw/rtl/mem/VX_local_mem.sv | 4 +- 15 files changed, 265 insertions(+), 312 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 8ea849ed3b..eb58e17989 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -173,6 +173,7 @@ endgenerate `ifdef QUARTUS `define MAX_FANOUT 8 +`define MAX_LUTRAM 1024 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) @@ -182,6 +183,7 @@ endgenerate `define STRING string `elsif VIVADO `define MAX_FANOUT 8 +`define MAX_LUTRAM 1024 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) @@ -191,6 +193,7 @@ endgenerate `define STRING `else `define MAX_FANOUT 8 +`define MAX_LUTRAM 1024 `define IF_DATA_SIZE(x) x.DATA_WIDTH `define USE_BLOCK_BRAM `define USE_FAST_BRAM diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index b27b2df312..40f062eccb 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -20,22 +20,22 @@ module VX_cache import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 4096, + parameter CACHE_SIZE = 32768, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 4, // Number of associative ways - parameter NUM_WAYS = 1, + parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = `XLEN/8, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -49,7 +49,7 @@ module VX_cache import VX_gpu_pkg::*; #( parameter DIRTY_BYTES = 0, // Replacement policy - parameter REPL_POLICY = `CS_REPL_CYCLIC, + parameter REPL_POLICY = `CS_REPL_CYCLIC, // Request debug identifier parameter UUID_WIDTH = 0, @@ -61,10 +61,10 @@ module VX_cache import VX_gpu_pkg::*; #( parameter FLAGS_WIDTH = 0, // Core response output register - parameter CORE_OUT_BUF = 0, + parameter CORE_OUT_BUF = 3, // Memory request output register - parameter MEM_OUT_BUF = 0 + parameter MEM_OUT_BUF = 3 ) ( // PERF `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index a24a07ee9d..20c0c06122 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -295,6 +295,8 @@ module VX_cache_bank #( assign req_uuid_sel = '0; end + wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; + wire is_init_sel = init_valid; wire is_creq_sel = creq_enable || replay_enable; wire is_fill_sel = fill_enable; @@ -364,6 +366,7 @@ module VX_cache_bank #( .hit_way (tag_matches_st1), .repl_valid (do_fill_st0 && ~pipe_stall), .repl_line (line_idx_st0), + .repl_line_n(line_idx_sel), .repl_way (victim_way_st0) ); @@ -430,7 +433,6 @@ module VX_cache_bank #( // The r/w hazard is also not needed for next writethrough fill/flush to the same line. // For reads or writeback fill/flush to the same line, we sill need the hazard // because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic. - wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; wire is_write_sel = is_creq_sel && rw_sel; wire is_same_line = (line_idx_sel == line_idx_st0); always @(posedge clk) begin diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 4b3b3a59ab..8f62343640 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,7 +268,7 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); end - + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index b4c2db979c..32662e848c 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -24,22 +24,22 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 16384, + parameter CACHE_SIZE = 32768, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 4, // Number of associative ways parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -53,7 +53,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( parameter DIRTY_BYTES = 0, // Replacement policy - parameter REPL_POLICY = `CS_REPL_CYCLIC, + parameter REPL_POLICY = `CS_REPL_CYCLIC, // Request debug identifier parameter UUID_WIDTH = 0, @@ -68,10 +68,10 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( parameter NC_ENABLE = 0, // Core response output buffer - parameter CORE_OUT_BUF = 0, + parameter CORE_OUT_BUF = 3, // Memory request output buffer - parameter MEM_OUT_BUF = 0 + parameter MEM_OUT_BUF = 3 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index aeb4a11b39..22326e63be 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -56,7 +56,7 @@ module VX_cache_data #( `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - localparam BYTEENW = (WRITE_ENABLE != 0 || NUM_WAYS != 1) ? (LINE_SIZE * NUM_WAYS) : 1; + localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; @@ -137,61 +137,50 @@ module VX_cache_data #( assign evict_byteen = '0; end - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; - wire line_write; - wire line_read; + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store + wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [BYTEENW-1:0] line_wren; - if (BYTEENW != 1) begin : g_wdata - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren_w; - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways - wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; - if (WRITE_ENABLE != 0) begin : g_we - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask - wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); - assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; - end - assign line_wdata[i] = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; - assign line_wren_w[i] = {LINE_SIZE{fill && fill_way_en}} - | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); - end else begin : g_ro - `UNUSED_VAR (write) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_word) - `UNUSED_VAR (word_idx) - assign line_wdata[i] = fill_data; - assign line_wren_w[i] = {LINE_SIZE{fill_way_en}}; + wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; + + if (WRITE_ENABLE != 0) begin : g_wdata + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end + assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren = {LINE_SIZE{fill && fill_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); + + end else begin : g_ro_wdata + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_word) + `UNUSED_VAR (word_idx) + assign line_wdata = fill_data; + assign line_wren = fill_way_en; end - assign line_wren = line_wren_w; - end else begin : g_ro_1w_wdata - `UNUSED_VAR (write) - `UNUSED_VAR (evict_way) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_word) - assign line_wdata = fill_data; - assign line_wren = 1'b1; - end - assign line_write = fill || (write && WRITE_ENABLE); - assign line_read = read || ((fill || flush) && WRITEBACK); - - VX_sp_ram #( - .DATAW (NUM_WAYS * `CS_LINE_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), - .OUT_REG (1) - ) data_store ( - .clk (clk), - .reset (reset), - .read (line_read), - .write (line_write), - .wren (line_wren), - .addr (line_idx), - .wdata (line_wdata), - .rdata (line_rdata) - ); + wire line_write = fill || (write && WRITE_ENABLE); + wire line_read = read || ((fill || flush) && WRITEBACK); + + VX_sp_ram #( + .DATAW (`CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (BYTEENW), + .OUT_REG (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (line_read), + .write (line_write), + .wren (line_wren), + .addr (line_idx), + .wdata (line_wdata), + .rdata (line_rdata[i]) + ); + end wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; VX_onehot_encoder #( diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index aac0483fd5..dbd51afddc 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -99,6 +99,7 @@ module VX_cache_repl #( input wire [`CS_LINE_SEL_BITS-1:0] hit_line, input wire [NUM_WAYS-1:0] hit_way, input wire repl_valid, + input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n, input wire [`CS_LINE_SEL_BITS-1:0] repl_line, output wire [NUM_WAYS-1:0] repl_way ); @@ -110,6 +111,7 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); + localparam FORCE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= 1024; wire [WAY_IDX_WIDTH-1:0] repl_way_idx; wire [WAY_IDX_WIDTH-1:0] hit_way_idx; @@ -118,17 +120,18 @@ module VX_cache_repl #( wire [LRU_WIDTH-1:0] plru_wmask; VX_dp_ram #( - .DATAW (LRU_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (LRU_WIDTH) + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH), + .OUT_REG (FORCE_BRAM) ) plru_store ( .clk (clk), .reset (reset), - .read (repl_valid), + .read (FORCE_BRAM ? ~stall : repl_valid), .write (hit_valid), .wren (plru_wmask), .waddr (hit_line), - .raddr (repl_line), + .raddr (FORCE_BRAM ? repl_line_n : repl_line), .wdata (plru_wdata), .rdata (plru_rdata) ); @@ -167,23 +170,28 @@ module VX_cache_repl #( end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic // Cyclic replacement policy localparam CTR_WIDTH = $clog2(NUM_WAYS); + localparam FORCE_BRAM = (CTR_WIDTH * `CS_LINES_PER_BANK) >= 1024; + `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) wire [`UP(CTR_WIDTH)-1:0] ctr_rdata; wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1; - VX_sp_ram #( - .DATAW (`UP(CTR_WIDTH)), - .SIZE (`CS_LINES_PER_BANK) + VX_dp_ram #( + .DATAW (`UP(CTR_WIDTH)), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (FORCE_BRAM) ) ctr_store ( .clk (clk), .reset (reset), - .read (repl_valid), + .read (FORCE_BRAM ? ~stall : repl_valid), .write (repl_valid), .wren (1'b1), - .addr (repl_line), + .raddr (FORCE_BRAM ? repl_line_n : repl_line), + .waddr (repl_line), .wdata (ctr_wdata), .rdata (ctr_rdata) ); @@ -202,6 +210,7 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) + `UNUSED_VAR (repl_line_n) if (NUM_WAYS > 1) begin : g_repl_way reg [NUM_WAYS-1:0] victim_way; always @(posedge clk) begin diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index 3fa0e5d65f..d6bd4aace5 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -20,7 +20,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 16384, + parameter CACHE_SIZE = 32768, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks @@ -28,14 +28,14 @@ module VX_cache_top import VX_gpu_pkg::*; #( // Number of associative ways parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -55,10 +55,10 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter TAG_WIDTH = 16, // Core response output buffer - parameter CORE_OUT_BUF = 2, + parameter CORE_OUT_BUF = 3, // Memory request output buffer - parameter MEM_OUT_BUF = 2, + parameter MEM_OUT_BUF = 3, parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS) ) ( diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index a9b872dd05..c181fb4660 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -27,18 +27,18 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 4, // Number of associative ways - parameter NUM_WAYS = 1, + parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -52,7 +52,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( parameter DIRTY_BYTES = 0, // Replacement policy - parameter REPL_POLICY = `CS_REPL_CYCLIC, + parameter REPL_POLICY = `CS_REPL_CYCLIC, // Request debug identifier parameter UUID_WIDTH = 0, @@ -70,10 +70,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( parameter PASSTHRU = 0, // Core response output buffer - parameter CORE_OUT_BUF = 0, + parameter CORE_OUT_BUF = 3, // Memory request output buffer - parameter MEM_OUT_BUF = 0 + parameter MEM_OUT_BUF = 3 ) ( input wire clk, diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 42a91e4c24..b396d18306 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -266,9 +266,9 @@ module VX_operands import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), - .OUT_REG (1), - .READ_ENABLE (1), .WRENW (BYTEENW), + .OUT_REG (1), + .WRITE_MODE ("U"), `ifdef GPR_RESET .RESET_RAM (1), `endif diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 4220eca185..b770cfa68e 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -24,7 +24,7 @@ module VX_dp_ram #( parameter RW_ASSERT = 0, parameter RESET_RAM = 0, parameter RESET_OUT = 0, - parameter READ_ENABLE = 0, + parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -41,7 +41,10 @@ module VX_dp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; + localparam USE_BRAM = !LUTRAM && ((DATAW * SIZE) >= `MAX_LUTRAM); + `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) + `UNUSED_PARAM (RW_ASSERT) `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ @@ -56,187 +59,155 @@ module VX_dp_ram #( end \ end -`define RAM_WREN_BLOCK_ALTERA(__we__) \ - reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; \ - `RAM_INITIALIZATION \ - always @(posedge clk) begin \ - if (__we__) begin \ - for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ - end \ - end \ - end \ - end - -`define RAM_WREN_BLOCK_XILINX(__we__) \ - reg [DATAW-1:0] ram [0:SIZE-1]; \ - `RAM_INITIALIZATION \ - always @(posedge clk) begin \ - if (__we__) begin \ - for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end \ - end \ - end - -`define RAM_WRITE_BLOCK(__we__) \ - reg [DATAW-1:0] ram [0:SIZE-1]; \ - `RAM_INITIALIZATION \ - always @(posedge clk) begin \ - if (__we__) begin \ - ram[waddr] <= wdata; \ - end \ - end - -`define RAM_READ_BLOCK_OUT_REG(__re__) \ - always @(posedge clk) begin \ - if (__re__) begin \ - if (RESET_OUT && reset) begin \ - rdata_r <= INIT_VALUE; \ - end else begin \ - rdata_r <= ram[raddr]; \ - end \ - end \ - end - - `UNUSED_PARAM (RW_ASSERT) - `UNUSED_VAR (read) - `UNUSED_VAR (wren) - +`ifdef SYNTHESIS +`ifdef QUARTUS + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : ""); + localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : ""; + `define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) +`else + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : ""); + localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : ""; + `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + `define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *) +`endif if (OUT_REG) begin : g_out_reg reg [DATAW-1:0] rdata_r; - if (READ_ENABLE) begin : g_readen - if (WRENW != 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) - end - `else - // Not Quartus - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) + if (WRITE_MODE == "R") begin : g_read_first + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE end - `endif - end else begin : g_no_writeen - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) - end else begin : g_no_lutram - `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read || write) begin + rdata_r <= ram[raddr]; end end - end else begin : g_no_readen - if (WRENW != 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) - end - `else - // Not Quartus - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + end else if (WRITE_MODE == "W") begin : g_write_first + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE end - `endif - end else begin : g_no_writeen - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) - end else begin : g_no_lutram - `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read || write) begin + rdata_r = ram[raddr]; end end - end - assign rdata = rdata_r; - end else begin : g_no_out_reg - `ifdef SYNTHESIS - if (WRENW > 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - assign rdata = ram[raddr]; - end else begin : g_no_lutram - if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_ALTERA(write) - assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_WREN_BLOCK_ALTERA(write) - assign rdata = ram[raddr]; + end else if (WRITE_MODE == "N") begin : g_no_change + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE end - end - `else - // default synthesis - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - assign rdata = ram[raddr]; - end else begin : g_no_lutram - if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_XILINX(write) - assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_WREN_BLOCK_XILINX(write) - assign rdata = ram[raddr]; + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read && ~write) begin + rdata_r <= ram[raddr]; end end - `endif - end else begin : g_no_writeen - // (WRENW == 1) - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - assign rdata = ram[raddr]; - end else begin : g_no_lutram - if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_WRITE_BLOCK(write) - assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_WRITE_BLOCK(write) - assign rdata = ram[raddr]; + end end else if (WRITE_MODE == "U") begin : g_undefined + `RAM_NO_RWCHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read) begin + rdata_r <= ram[raddr]; end end + end else begin + `STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE)) end - `else - // simulation - reg [DATAW-1:0] ram [0:SIZE-1]; + else begin : g_no_out_reg + `UNUSED_VAR (read) + `RAM_NO_RWCHECK `RAM_ARRAY `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[raddr]; + end +`else + // simulation + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n + assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; + end - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n - assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; + always @(posedge clk) begin + if (RESET_RAM && reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + end else begin + if (write) begin + ram[waddr] <= ram_n; + end end + end + if (OUT_REG && WRITE_MODE == "R") begin : g_read_first + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (RESET_RAM && reset) begin - for (integer i = 0; i < SIZE; ++i) begin - ram[i] <= DATAW'(INIT_VALUE); - end - end else begin - if (write) begin - ram[waddr] <= ram_n; + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read || write) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; + end else if (OUT_REG && WRITE_MODE == "W") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read || write) begin + if (write && (raddr == waddr)) begin + rdata_r <= ram_n; + end else begin + rdata_r <= ram[raddr]; end end end - - if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass + assign rdata = rdata_r; + end else if (OUT_REG && WRITE_MODE == "N") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read && ~write) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; + end else begin : g_async_or_undef + wire [DATAW-1:0] rdata_w; + if (USE_BRAM && NO_RWCHECK) begin : g_rdata_no_bypass reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -253,15 +224,29 @@ module VX_dp_ram #( end end - assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_assert - `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) + assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin : g_rw_asert + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) end end else begin : g_rdata_with_bypass - assign rdata = ram[raddr]; + assign rdata_w = ram[raddr]; + end + if (OUT_REG) begin : g_out_reg + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read) begin + rdata_r <= rdata_w; + end + end + assign rdata = rdata_r; + end else begin : g_no_out_reg + `UNUSED_VAR (read) + assign rdata = rdata_w; end - `endif end +`endif endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index ca11857800..1410a0dd09 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -16,11 +16,11 @@ `TRACING_OFF module VX_fifo_queue #( parameter DATAW = 1, - parameter DEPTH = 2, + parameter DEPTH = 1, parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, + parameter LUTRAM = ((DATAW * DEPTH) < `MAX_LUTRAM), parameter SIZEW = `CLOG2(DEPTH+1) ) ( input wire clk, @@ -105,7 +105,8 @@ module VX_fifo_queue #( .DATAW (DATAW), .SIZE (DEPTH), .LUTRAM (LUTRAM), - .OUT_REG(!LUTRAM) + .OUT_REG(!LUTRAM), + .WRITE_MODE("W") ) dp_ram ( .clk (clk), .reset (reset), @@ -119,47 +120,17 @@ module VX_fifo_queue #( ); if (OUT_REG != 0) begin : g_out_reg - reg [DATAW-1:0] data_out_r, data_out_n; - - if (LUTRAM) begin : g_lutram - assign data_out_n = data_out_w; - end else begin : g_no_lutram - reg [DATAW-1:0] data_out_p; - reg rdw_hazard_r; - wire rdw_hazard = push && (wr_ptr_r == rd_ptr_w); - always @(posedge clk) begin - if (rdw_hazard) begin - data_out_p <= data_in; - end - rdw_hazard_r <= rdw_hazard; - end - assign data_out_n = rdw_hazard_r ? data_out_p : data_out_w; - end - + reg [DATAW-1:0] data_out_r; always @(posedge clk) begin if (bypass) begin data_out_r <= data_in; end else if (pop) begin - data_out_r <= data_out_n; + data_out_r <= data_out_w; end end - assign data_out = data_out_r; - end else begin : g_no_out_reg - if (LUTRAM) begin : g_lutram - assign data_out = data_out_w; - end else begin : g_no_lutram - reg [DATAW-1:0] data_in_r; - reg bypass_r; - always @(posedge clk) begin - if (bypass) begin - data_in_r <= data_in; - end - bypass_r <= bypass; - end - assign data_out = bypass_r ? data_in_r : data_out_w; - end + assign data_out = data_out_w; end end diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 6a9b70ff1f..78e85e16fd 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -112,9 +112,7 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), - .OUT_REG (1), - .READ_ENABLE (0), - .NO_RWCHECK (1) + .OUT_REG (1) ) delta_store ( .clk (clk), .reset (reset), @@ -135,9 +133,7 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (1), - .READ_ENABLE (0), - .NO_RWCHECK (1) + .OUT_REG (1) ) data_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index efce4b5f2f..faaf0dd2f4 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -24,7 +24,7 @@ module VX_sp_ram #( parameter RW_ASSERT = 0, parameter RESET_RAM = 0, parameter RESET_OUT = 0, - parameter READ_ENABLE = 0, + parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -49,7 +49,7 @@ module VX_sp_ram #( .RW_ASSERT (RW_ASSERT), .RESET_RAM (RESET_RAM), .RESET_OUT (RESET_OUT), - .READ_ENABLE(READ_ENABLE), + .WRITE_MODE (WRITE_MODE), .INIT_ENABLE(INIT_ENABLE), .INIT_FILE (INIT_FILE), .INIT_VALUE (INIT_VALUE), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 03c4acdd19..557f4a9f75 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -166,9 +166,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), - .OUT_REG (1), - .READ_ENABLE (0), - .NO_RWCHECK (1) + .OUT_REG (1) ) lmem_store ( .clk (clk), .reset (reset), From b6bd6467efe685c27b031e5478a21d5ec3050aed Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 19 Oct 2024 20:04:51 -0700 Subject: [PATCH 299/407] cache hit timing optimization --- hw/rtl/cache/VX_bank_flush.sv | 15 +- hw/rtl/cache/VX_cache_bank.sv | 177 +++++++++--------------- hw/rtl/cache/VX_cache_data.sv | 123 ++++++++--------- hw/rtl/cache/VX_cache_define.vh | 1 + hw/rtl/cache/VX_cache_repl.sv | 213 +++++++++++++---------------- hw/rtl/cache/VX_cache_tags.sv | 55 +++----- hw/rtl/libs/VX_dp_ram.sv | 21 ++- hw/rtl/libs/VX_fifo_queue.sv | 16 +-- hw/unittest/generic_queue/Makefile | 2 + 9 files changed, 265 insertions(+), 358 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index ca28d749bf..68eefd3631 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -33,7 +33,7 @@ module VX_bank_flush #( output wire flush_init, output wire flush_valid, output wire [`CS_LINE_SEL_BITS-1:0] flush_line, - output wire [NUM_WAYS-1:0] flush_way, + output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way, input wire flush_ready, input wire mshr_empty, input wire bank_empty @@ -113,17 +113,10 @@ module VX_bank_flush #( assign flush_valid = (state == STATE_FLUSH); assign flush_line = counter[`CS_LINE_SEL_BITS-1:0]; - if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way - VX_decoder #( - .N (`CS_WAY_SEL_BITS), - .D (NUM_WAYS) - ) ctr_decoder ( - .sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), - .data_in (1'b1), - .data_out (flush_way) - ); + if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way + assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; end else begin : g_flush_way_all - assign flush_way = {NUM_WAYS{1'b1}}; + assign flush_way = '0; end endmodule diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 20c0c06122..574659d7e8 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -150,19 +150,19 @@ module VX_cache_bank #( wire is_creq_st0, is_creq_st1; wire is_fill_st0, is_fill_st1; wire is_flush_st0, is_flush_st1; - wire [NUM_WAYS-1:0] flush_way_st0; - wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1; + wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0; + wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; - wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1; + wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1; + wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; + wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1; wire rw_sel, rw_st0, rw_st1; wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1; - wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; @@ -170,18 +170,18 @@ module VX_cache_bank #( wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire mshr_pending_st0, mshr_pending_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1; + wire is_hit_st0, is_hit_st1; wire mshr_empty; wire flush_valid; wire init_valid; wire [`CS_LINE_SEL_BITS-1:0] flush_sel; - wire [NUM_WAYS-1:0] flush_way; + wire [`CS_WAY_SEL_WIDTH-1:0] flush_way; wire flush_ready; // ensure we have no pending memory request in the bank wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty; - // flush unit VX_bank_flush #( .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), @@ -203,9 +203,7 @@ module VX_cache_bank #( .bank_empty (no_pending_req) ); - logic rdw_hazard, post_hazard; - - wire pipe_stall = crsp_queue_stall || rdw_hazard; + wire pipe_stall = crsp_queue_stall; // inputs arbitration: // mshr replay has highest priority to maximize utilization since there is no miss. @@ -295,8 +293,6 @@ module VX_cache_bank #( assign req_uuid_sel = '0; end - wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; - wire is_init_sel = init_valid; wire is_creq_sel = creq_enable || replay_enable; wire is_fill_sel = fill_enable; @@ -304,7 +300,7 @@ module VX_cache_bank #( wire is_replay_sel = replay_enable; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) ) pipe_reg0 ( .clk (clk), @@ -334,22 +330,18 @@ module VX_cache_bank #( wire do_read_st1 = valid_st1 && is_read_st1; wire do_write_st1 = valid_st1 && is_write_st1; - wire do_fill_st1 = valid_st1 && is_fill_st1; - wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; - assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0]; + assign line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; + assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); - wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; - wire [NUM_WAYS-1:0] tag_matches_st1; - - wire is_hit_st1 = (| tag_matches_st1); + assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0]; wire do_lookup_st0 = do_read_st0 || do_write_st0; - wire do_lookup_st1 = do_read_st1 || do_write_st1; - reg [NUM_WAYS-1:0] victim_way_st0; + wire [`CS_WAY_SEL_WIDTH-1:0] victim_way_st0; + wire [NUM_WAYS-1:0] tag_matches_st0; VX_cache_repl #( .CACHE_SIZE (CACHE_SIZE), @@ -363,10 +355,10 @@ module VX_cache_bank #( .stall (pipe_stall), .hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall), .hit_line (line_idx_st1), - .hit_way (tag_matches_st1), + .hit_way (way_idx_st1), .repl_valid (do_fill_st0 && ~pipe_stall), - .repl_line (line_idx_st0), .repl_line_n(line_idx_sel), + .repl_line (line_idx_st0), .repl_way (victim_way_st0) ); @@ -388,27 +380,29 @@ module VX_cache_bank #( .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), .lookup (do_lookup_st0 && ~pipe_stall), - .line_addr (addr_st0), + .line_idx_n (line_idx_sel), + .line_idx (line_idx_st0), + .line_tag (line_tag_st0), .evict_way (evict_way_st0), // outputs - .tag_matches_r(tag_matches_st1), - .line_tag_r (line_tag_st1), - .evict_tag_r(evict_tag_st1), - .evict_way_r(evict_way_st1) + .tag_matches(tag_matches_st0), + .evict_tag (evict_tag_st0) ); + assign is_hit_st0 = (| tag_matches_st0); + wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_hit_st0, rw_st0, flags_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_hit_st1, rw_st1, flags_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 @@ -422,58 +416,12 @@ module VX_cache_bank #( // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time)) - if (WRITE_ENABLE) begin : g_rdw_hazard - // This implementation uses single-port BRAMs for the tags and data stores. - // Using different stages for read and write operations requires a pipeline stall in between due to address port sharing. - // Tags fill/flush can perform read and write in the same stage, since no dependency between. - // Data fill/flush can perform read and write in the same stage, since way_idx is available in st0. - // A data read should happen in st0 for its result to be available in st1. - // A data write should happen in st1 when the tag hit status is available. - // The r/w hazard is needed for consecutive writes since they both wonly write in st1. - // The r/w hazard is also not needed for next writethrough fill/flush to the same line. - // For reads or writeback fill/flush to the same line, we sill need the hazard - // because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic. - wire is_write_sel = is_creq_sel && rw_sel; - wire is_same_line = (line_idx_sel == line_idx_st0); - always @(posedge clk) begin - if (reset) begin - post_hazard <= 0; - rdw_hazard <= 0; - end else begin - if (~crsp_queue_stall) begin - post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); - end - end - end - end else begin : g_rdw_hazard_ro - assign rdw_hazard = 0; - assign post_hazard = 0; - end - assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (data_st1) - wire [`CS_LINE_WIDTH-1:0] evict_data_st1; + wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1; wire [LINE_SIZE-1:0] evict_byteen_st1; - wire line_dirty_st1; - - wire data_write; - wire [`CS_LINE_SEL_BITS-1:0] data_line_idx; - - if (WRITE_ENABLE) begin : g_data_ctrl - // by default all data accesses happen in sto and use line_idx_st0. - // data writes should happen in st1 when the tag hit is available, - // and use line_idx_st1 to ensure the correct line is updated. - // if a rdw hazard is active due to conflict, ensure we don't write twice. - assign data_write = do_write_st1 && ~post_hazard && ~crsp_queue_stall; - assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0; - end else begin : g_data_ctrl_ro - `UNUSED_VAR (post_hazard) - `UNUSED_VAR (do_write_st1) - assign data_write = 0; - assign data_line_idx = line_idx_st0; - end + wire evict_dirty_st1; VX_cache_data #( .CACHE_SIZE (CACHE_SIZE), @@ -493,18 +441,18 @@ module VX_cache_bank #( .fill (do_fill_st0 && ~pipe_stall), .flush (do_flush_st0 && ~pipe_stall), .read (do_read_st0 && ~pipe_stall), - .write (data_write), + .write (do_write_st0 && ~pipe_stall), .evict_way (evict_way_st0), - .tag_matches(tag_matches_st1), - .line_idx (data_line_idx), + .tag_matches(tag_matches_st0), + .line_idx (line_idx_st0), .fill_data (data_st0), - .write_word (write_word_st1), - .word_idx (word_idx_st1), - .write_byteen(byteen_st1), + .write_word (write_word_st0), + .word_idx (word_idx_st0), + .write_byteen(byteen_st0), // outputs + .way_idx (way_idx_st1), .read_data (read_data_st1), - .line_dirty (line_dirty_st1), - .evict_data (evict_data_st1), + .evict_dirty(evict_dirty_st1), .evict_byteen(evict_byteen_st1) ); @@ -600,7 +548,7 @@ module VX_cache_bank #( assign crsp_queue_valid = do_read_st1 && is_hit_st1; assign crsp_queue_idx = req_idx_st1; - assign crsp_queue_data = read_data_st1; + assign crsp_queue_data = read_data_st1[word_idx_st1]; assign crsp_queue_tag = tag_st1; VX_elastic_buffer #( @@ -610,7 +558,7 @@ module VX_cache_bank #( ) core_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (crsp_queue_valid && ~rdw_hazard), + .valid_in (crsp_queue_valid), .ready_in (crsp_queue_ready), .data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}), .data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}), @@ -618,9 +566,7 @@ module VX_cache_bank #( .ready_out (core_rsp_ready) ); - // we use 'do_read_st1' instead 'crsp_queue_valid' - // to remove costly 'is_hit_st1' signal from critical paths. - assign crsp_queue_stall = do_read_st1 && ~crsp_queue_ready; + assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready; // schedule memory request @@ -634,7 +580,7 @@ module VX_cache_bank #( wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK); wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; - wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1; + wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1}; if (WRITE_ENABLE) begin : g_mreq_queue @@ -642,7 +588,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| evict_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end // issue a fill request on a read/write miss // issue a writeback on a dirty line eviction @@ -651,8 +597,10 @@ module VX_cache_bank #( && ~pipe_stall; assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1; assign mreq_queue_rw = is_fill_or_flush_st1; - assign mreq_queue_data = evict_data_st1; + assign mreq_queue_data = read_data_st1; assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1; + `UNUSED_VAR (write_word_st1) + `UNUSED_VAR (byteen_st1) end else begin : g_wt wire [LINE_SIZE-1:0] line_byteen; VX_decoder #( @@ -675,7 +623,6 @@ module VX_cache_bank #( `UNUSED_VAR (is_fill_or_flush_st1) `UNUSED_VAR (do_writeback_st1) `UNUSED_VAR (evict_addr_st1) - `UNUSED_VAR (evict_data_st1) `UNUSED_VAR (evict_byteen_st1) end end else begin : g_mreq_queue_ro @@ -688,8 +635,9 @@ module VX_cache_bank #( assign mreq_queue_byteen = '1; `UNUSED_VAR (do_writeback_st1) `UNUSED_VAR (evict_addr_st1) - `UNUSED_VAR (evict_data_st1) `UNUSED_VAR (evict_byteen_st1) + `UNUSED_VAR (write_word_st1) + `UNUSED_VAR (byteen_st1) end if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid @@ -722,10 +670,6 @@ module VX_cache_bank #( assign mem_req_valid = ~mreq_queue_empty; - `UNUSED_VAR (do_fill_st1) - `UNUSED_VAR (do_flush_st1) - `UNUSED_VAR (evict_way_st1) - /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE @@ -740,8 +684,8 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, - crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) + `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, + crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full)) end if (mem_rsp_fire) begin `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, @@ -764,32 +708,37 @@ module VX_cache_bank #( `TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0)) end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_lookup_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-Lookup: addr=0x%0h, rw=%b, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) + if (is_hit_st1) begin + `TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end else begin + `TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1)) end if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) end if (crsp_queue_fire) begin `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 22326e63be..65cf9e0261 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -41,38 +41,23 @@ module VX_cache_data #( input wire read, input wire write, input wire [`CS_LINE_SEL_BITS-1:0] line_idx, - input wire [NUM_WAYS-1:0] evict_way, + input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, input wire [NUM_WAYS-1:0] tag_matches, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, // outputs - output wire [`CS_WORD_WIDTH-1:0] read_data, - output wire line_dirty, - output wire [`CS_LINE_WIDTH-1:0] evict_data, + output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, + output wire [`CS_LINE_WIDTH-1:0] read_data, + output wire evict_dirty, output wire [LINE_SIZE-1:0] evict_byteen ); `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; - - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - if (WRITEBACK != 0) begin : g_writeback localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0); - wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r; - - VX_onehot_encoder #( - .N (NUM_WAYS) - ) fill_way_enc ( - .data_in (evict_way), - .data_out (evict_way_idx), - `UNUSED_PIN (valid_out) - ); - - `BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1); wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata; wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata; @@ -80,7 +65,7 @@ module VX_cache_data #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata wire evict = fill || flush; - wire evict_way_en = (NUM_WAYS == 1) || evict_way[i]; + wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i); wire dirty_data = write; // only asserted on writes wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]); if (DIRTY_BYTES != 0) begin : g_dirty_bytes @@ -121,54 +106,47 @@ module VX_cache_data #( ); if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen - assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; + assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx]; end else begin : g_line_dirty - assign line_dirty = byteen_rdata[evict_way_idx_r]; + assign evict_dirty = byteen_rdata[way_idx]; assign evict_byteen = '1; end - assign evict_data = line_rdata[evict_way_idx_r]; - end else begin : g_no_writeback `UNUSED_VAR (init) `UNUSED_VAR (flush) - assign line_dirty = 0; - assign evict_data = '0; + assign evict_dirty = 0; assign evict_byteen = '0; end - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store - wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; + if (WRITE_ENABLE) begin : g_data_store + // create a single write-enable block ram to reduce area overhead + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren; + wire line_write; + wire line_read; - if (WRITE_ENABLE != 0) begin : g_wdata + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata + wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i); wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end - assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; - assign line_wren = {LINE_SIZE{fill && fill_way_en}} - | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); - - end else begin : g_ro_wdata - `UNUSED_VAR (write) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_word) - `UNUSED_VAR (word_idx) - assign line_wdata = fill_data; - assign line_wren = fill_way_en; + assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end - wire line_write = fill || (write && WRITE_ENABLE); - wire line_read = read || ((fill || flush) && WRITEBACK); + assign line_write = fill || (write && WRITE_ENABLE); + assign line_read = read || ((fill || flush) && WRITEBACK); VX_sp_ram #( - .DATAW (`CS_LINE_WIDTH), + .DATAW (NUM_WAYS * `CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), + .WRENW (NUM_WAYS * LINE_SIZE), .OUT_REG (1) ) data_store ( .clk (clk), @@ -178,35 +156,46 @@ module VX_cache_data #( .wren (line_wren), .addr (line_idx), .wdata (line_wdata), - .rdata (line_rdata[i]) + .rdata (line_rdata) ); + end else begin : g_data_store + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_word) + `UNUSED_VAR (word_idx) + + // we don't merge the ways into a single block ram due to WREN overhead + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways + wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i); + VX_sp_ram #( + .DATAW (`CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (read), + .write (fill && fill_way_en), + .wren (1'b1), + .addr (line_idx), + .wdata (fill_data), + .rdata (line_rdata[i]) + ); + end end - wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; + wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx; + VX_onehot_encoder #( .N (NUM_WAYS) - ) hit_idx_enc ( + ) way_idx_enc ( .data_in (tag_matches), - .data_out (hit_way_idx), + .data_out (hit_idx), `UNUSED_PIN (valid_out) ); - if (`CS_WORDS_PER_LINE > 1) begin : g_read_data - // order the data layout to perform ways multiplexing last. - // this allows converting way index to binary in parallel with BRAM read and word indexing. - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; - VX_transpose #( - .DATAW (`CS_WORD_WIDTH), - .N (NUM_WAYS), - .M (`CS_WORDS_PER_LINE) - ) transpose ( - .data_in (line_rdata), - .data_out (transposed_rdata) - ); - assign read_data = transposed_rdata[word_idx][hit_way_idx]; - end else begin : g_read_data_1w - `UNUSED_VAR (word_idx) - assign read_data = line_rdata[hit_way_idx]; - end + `BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1); + + assign read_data = line_rdata[way_idx]; endmodule diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index b75845ecab..65b2399000 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -22,6 +22,7 @@ `define CS_LINE_WIDTH (8 * LINE_SIZE) `define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS) `define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS) +`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS) `define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS)) `define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index dbd51afddc..24425328d5 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -97,135 +97,114 @@ module VX_cache_repl #( input wire stall, input wire hit_valid, input wire [`CS_LINE_SEL_BITS-1:0] hit_line, - input wire [NUM_WAYS-1:0] hit_way, + input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way, input wire repl_valid, input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n, input wire [`CS_LINE_SEL_BITS-1:0] repl_line, - output wire [NUM_WAYS-1:0] repl_way + output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way ); + localparam WAY_SEL_WIDTH = `CS_WAY_SEL_WIDTH; `UNUSED_VAR (stall) - localparam WAY_IDX_BITS = $clog2(NUM_WAYS); - localparam WAY_IDX_WIDTH = `UP(WAY_IDX_BITS); - - if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru - // Pseudo Least Recently Used replacement policy - localparam LRU_WIDTH = `UP(NUM_WAYS-1); - localparam FORCE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= 1024; - - wire [WAY_IDX_WIDTH-1:0] repl_way_idx; - wire [WAY_IDX_WIDTH-1:0] hit_way_idx; - wire [LRU_WIDTH-1:0] plru_rdata; - wire [LRU_WIDTH-1:0] plru_wdata; - wire [LRU_WIDTH-1:0] plru_wmask; - - VX_dp_ram #( - .DATAW (LRU_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (LRU_WIDTH), - .OUT_REG (FORCE_BRAM) - ) plru_store ( - .clk (clk), - .reset (reset), - .read (FORCE_BRAM ? ~stall : repl_valid), - .write (hit_valid), - .wren (plru_wmask), - .waddr (hit_line), - .raddr (FORCE_BRAM ? repl_line_n : repl_line), - .wdata (plru_wdata), - .rdata (plru_rdata) - ); - - VX_onehot_encoder #( - .N (NUM_WAYS) - ) hit_way_enc ( - .data_in (hit_way), - .data_out (hit_way_idx), - `UNUSED_PIN (valid_out) - ); - - plru_decoder #( - .NUM_WAYS (NUM_WAYS) - ) plru_dec ( - .way_idx (hit_way_idx), - .lru_data (plru_wdata), - .lru_mask (plru_wmask) - ); - - plru_encoder #( - .NUM_WAYS (NUM_WAYS) - ) plru_enc ( - .lru_in (plru_rdata), - .way_idx (repl_way_idx) - ); - - VX_decoder #( - .N (WAY_IDX_BITS) - ) repl_way_dec ( - .sel_in (repl_way_idx), - .data_in (1'b1), - .data_out (repl_way) - ); - - end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic - // Cyclic replacement policy - localparam CTR_WIDTH = $clog2(NUM_WAYS); - localparam FORCE_BRAM = (CTR_WIDTH * `CS_LINES_PER_BANK) >= 1024; - - `UNUSED_VAR (hit_valid) - `UNUSED_VAR (hit_line) - `UNUSED_VAR (hit_way) - `UNUSED_VAR (repl_valid) - - wire [`UP(CTR_WIDTH)-1:0] ctr_rdata; - wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1; - - VX_dp_ram #( - .DATAW (`UP(CTR_WIDTH)), - .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (FORCE_BRAM) - ) ctr_store ( - .clk (clk), - .reset (reset), - .read (FORCE_BRAM ? ~stall : repl_valid), - .write (repl_valid), - .wren (1'b1), - .raddr (FORCE_BRAM ? repl_line_n : repl_line), - .waddr (repl_line), - .wdata (ctr_wdata), - .rdata (ctr_rdata) - ); - - VX_decoder #( - .N (WAY_IDX_BITS) - ) ctr_decoder ( - .sel_in (ctr_rdata), - .data_in (1'b1), - .data_out (repl_way) - ); - end else begin : g_random - // Random replacement policy - `UNUSED_VAR (hit_valid) - `UNUSED_VAR (hit_line) - `UNUSED_VAR (hit_way) - `UNUSED_VAR (repl_valid) - `UNUSED_VAR (repl_line) - `UNUSED_VAR (repl_line_n) - if (NUM_WAYS > 1) begin : g_repl_way - reg [NUM_WAYS-1:0] victim_way; + if (NUM_WAYS > 1) begin : g_enable + if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru + // Pseudo Least Recently Used replacement policy + localparam LRU_WIDTH = `UP(NUM_WAYS-1); + localparam USE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; + + wire [LRU_WIDTH-1:0] plru_rdata; + wire [LRU_WIDTH-1:0] plru_wdata; + wire [LRU_WIDTH-1:0] plru_wmask; + + VX_dp_ram #( + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH), + .OUT_REG (USE_BRAM) + ) plru_store ( + .clk (clk), + .reset (reset), + .read (USE_BRAM ? ~stall : repl_valid), + .write (hit_valid), + .wren (plru_wmask), + .waddr (hit_line), + .raddr (USE_BRAM ? repl_line_n : repl_line), + .wdata (plru_wdata), + .rdata (plru_rdata) + ); + + plru_decoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_dec ( + .way_idx (hit_way), + .lru_data (plru_wdata), + .lru_mask (plru_wmask) + ); + + plru_encoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_enc ( + .lru_in (plru_rdata), + .way_idx (repl_way) + ); + + end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic + // Cyclic replacement policy + localparam USE_BRAM = (WAY_SEL_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; + + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + + wire [WAY_SEL_WIDTH-1:0] ctr_rdata; + wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1; + + VX_dp_ram #( + .DATAW (WAY_SEL_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (USE_BRAM) + ) ctr_store ( + .clk (clk), + .reset (reset), + .read (USE_BRAM ? ~stall : repl_valid), + .write (repl_valid), + .wren (1'b1), + .raddr (USE_BRAM ? repl_line_n : repl_line), + .waddr (repl_line), + .wdata (ctr_wdata), + .rdata (ctr_rdata) + ); + + assign repl_way = ctr_rdata; + end else begin : g_random + // Random replacement policy + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) + `UNUSED_VAR (repl_line_n) + reg [WAY_SEL_WIDTH-1:0] victim_idx; always @(posedge clk) begin if (reset) begin - victim_way <= 1; + victim_idx <= 0; end else if (~stall) begin - victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; + victim_idx <= victim_idx + 1; end end - assign repl_way = victim_way; - end else begin : g_repl_way_1 - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign repl_way = 1'b1; + assign repl_way = victim_idx; end + end else begin : g_disable + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) + `UNUSED_VAR (repl_line_n) + assign repl_way = 1'b0; end endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 8793420e10..71f7809dcf 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -36,50 +36,35 @@ module VX_cache_tags #( input wire flush, input wire fill, input wire lookup, - input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] evict_way, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx, + input wire [`CS_TAG_SEL_BITS-1:0] line_tag, + input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, // outputs - output wire [NUM_WAYS-1:0] tag_matches_r, - output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r, - output wire [NUM_WAYS-1:0] evict_way_r, - output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r + output wire [NUM_WAYS-1:0] tag_matches, + output wire [`CS_TAG_SEL_BITS-1:0] evict_tag ); // valid, tag localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; - wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; - wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); - wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0] read_valid; - - if (NUM_WAYS > 1) begin : g_evict_way - `BUFFER_EX(evict_way_r, evict_way, ~stall, 1); - end else begin : g_evict_way_0 - `UNUSED_VAR (evict_way) - assign evict_way_r = 1'b1; - end + `UNUSED_VAR (lookup) if (WRITEBACK) begin : g_evict_tag_wb - VX_onehot_mux #( - .DATAW (`CS_TAG_SEL_BITS), - .N (NUM_WAYS) - ) evict_tag_sel ( - .data_in (read_tag), - .sel_in (evict_way_r), - .data_out (evict_tag_r) - ); + assign evict_tag = read_tag[evict_way]; end else begin : g_evict_tag_wt - assign evict_tag_r = '0; + assign evict_tag = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - wire do_fill = fill && evict_way[i]; - wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode + wire way_en = (NUM_WAYS == 1) || (evict_way == i); + wire do_fill = fill && way_en; + wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode - wire line_read = lookup || (WRITEBACK && (fill || flush)); + //wire line_read = lookup || (WRITEBACK && (fill || flush)); wire line_write = init || do_fill || do_flush; wire line_valid = fill; @@ -89,26 +74,26 @@ module VX_cache_tags #( assign line_wdata = {line_valid, line_tag}; assign {read_valid[i], read_tag[i]} = line_rdata; - VX_sp_ram #( + VX_dp_ram #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + .OUT_REG (1), + .WRITE_MODE ("W") ) tag_store ( .clk (clk), .reset (reset), - .read (line_read), + .read (~stall), .write (line_write), .wren (1'b1), - .addr (line_idx), + .waddr (line_idx), + .raddr (line_idx_n), .wdata (line_wdata), .rdata (line_rdata) ); end - `BUFFER_EX(line_tag_r, line_tag, ~stall, 1); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches - assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]); + assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); end endmodule diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index b770cfa68e..7616aa5b95 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -61,7 +61,7 @@ module VX_dp_ram #( `ifdef SYNTHESIS `ifdef QUARTUS - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : ""); + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "auto"); localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : ""; `define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -70,9 +70,9 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) -`else - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : ""); - localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : ""; +`elif VIVADO + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto"); + localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto"; `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ if (wren[i]) begin \ @@ -80,6 +80,14 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *) +`else + `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + `define RAM_NO_RWCHECK `endif if (OUT_REG) begin : g_out_reg reg [DATAW-1:0] rdata_r; @@ -122,7 +130,7 @@ module VX_dp_ram #( rdata_r <= ram[raddr]; end end - end end else if (WRITE_MODE == "U") begin : g_undefined + end else if (WRITE_MODE == "U") begin : g_undefined `RAM_NO_RWCHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -138,7 +146,8 @@ module VX_dp_ram #( end else begin `STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE)) end - else begin : g_no_out_reg + assign rdata = rdata_r; + end else begin : g_no_out_reg `UNUSED_VAR (read) `RAM_NO_RWCHECK `RAM_ARRAY `RAM_INITIALIZATION diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 1410a0dd09..9323c4dc06 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -20,7 +20,7 @@ module VX_fifo_queue #( parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, - parameter LUTRAM = ((DATAW * DEPTH) < `MAX_LUTRAM), + parameter LUTRAM = 0, parameter SIZEW = `CLOG2(DEPTH+1) ) ( input wire clk, @@ -42,9 +42,6 @@ module VX_fifo_queue #( `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) - `UNUSED_PARAM (OUT_REG) - `UNUSED_PARAM (LUTRAM) - VX_pending_size #( .SIZE (DEPTH), .ALM_EMPTY (ALM_EMPTY), @@ -62,6 +59,8 @@ module VX_fifo_queue #( ); if (DEPTH == 1) begin : g_depth_1 + `UNUSED_PARAM (OUT_REG) + `UNUSED_PARAM (LUTRAM) reg [DATAW-1:0] head_r; @@ -75,6 +74,7 @@ module VX_fifo_queue #( end else begin : g_depth_n + localparam USE_BRAM = !LUTRAM && ((DATAW * DEPTH) >= `MAX_LUTRAM); localparam ADDRW = `CLOG2(DEPTH); wire [DATAW-1:0] data_out_w; @@ -95,17 +95,17 @@ module VX_fifo_queue #( end end - wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n; + wire [ADDRW-1:0] rd_ptr_w = USE_BRAM ? rd_ptr_n : rd_ptr_r; wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); wire bypass = push && (empty || (going_empty && pop)); - wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop; + wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop; VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM), - .OUT_REG(!LUTRAM), + .LUTRAM (!USE_BRAM), + .OUT_REG(USE_BRAM), .WRITE_MODE("W") ) dp_ram ( .clk (clk), diff --git a/hw/unittest/generic_queue/Makefile b/hw/unittest/generic_queue/Makefile index 0adf78faea..ad79c6f944 100644 --- a/hw/unittest/generic_queue/Makefile +++ b/hw/unittest/generic_queue/Makefile @@ -21,4 +21,6 @@ RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs TOP := VX_fifo_queue +PARAMS := -GDATAW=32 -GDEPTH=8 + include ../common.mk \ No newline at end of file From 4206ffdb80586cc433d3eb10681410565365a5d9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 19 Oct 2024 21:39:34 -0700 Subject: [PATCH 300/407] minor update --- hw/rtl/cache/VX_cache_bank.sv | 55 +++++++++++++++++----------- hw/rtl/cache/VX_cache_data.sv | 67 ++++++++++------------------------- hw/rtl/cache/VX_cache_tags.sv | 36 ++++++++++++------- 3 files changed, 77 insertions(+), 81 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 574659d7e8..7d10223783 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -151,7 +151,7 @@ module VX_cache_bank #( wire is_fill_st0, is_fill_st1; wire is_flush_st0, is_flush_st1; wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0; - wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st1; + wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st0, way_idx_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1; @@ -166,11 +166,12 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; + wire is_dirty_st0, is_dirty_st1; wire is_replay_st0, is_replay_st1; + wire is_hit_st0, is_hit_st1; wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire mshr_pending_st0, mshr_pending_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1; - wire is_hit_st0, is_hit_st1; wire mshr_empty; wire flush_valid; @@ -379,30 +380,42 @@ module VX_cache_bank #( .init (do_init_st0), .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), - .lookup (do_lookup_st0 && ~pipe_stall), + .read (do_read_st0 && ~pipe_stall), + .write (do_write_st0 && ~pipe_stall), .line_idx_n (line_idx_sel), .line_idx (line_idx_st0), .line_tag (line_tag_st0), .evict_way (evict_way_st0), // outputs .tag_matches(tag_matches_st0), + .evict_dirty(is_dirty_st0), .evict_tag (evict_tag_st0) ); + wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx_st0; + VX_onehot_encoder #( + .N (NUM_WAYS) + ) way_idx_enc ( + .data_in (tag_matches_st0), + .data_out (hit_idx_st0), + `UNUSED_PIN (valid_out) + ); + + assign way_idx_st0 = is_creq_st0 ? hit_idx_st0 : evict_way_st0; assign is_hit_st0 = (| tag_matches_st0); wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_hit_st0, rw_st0, flags_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_hit_st1, rw_st1, flags_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_dirty_st0, is_hit_st0, rw_st0, flags_st0, way_idx_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_dirty_st1, is_hit_st1, rw_st1, flags_st1, way_idx_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 @@ -421,7 +434,6 @@ module VX_cache_bank #( wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1; wire [LINE_SIZE-1:0] evict_byteen_st1; - wire evict_dirty_st1; VX_cache_data #( .CACHE_SIZE (CACHE_SIZE), @@ -449,10 +461,9 @@ module VX_cache_bank #( .write_word (write_word_st0), .word_idx (word_idx_st0), .write_byteen(byteen_st0), - // outputs .way_idx (way_idx_st1), + // outputs .read_data (read_data_st1), - .evict_dirty(evict_dirty_st1), .evict_byteen(evict_byteen_st1) ); @@ -580,7 +591,7 @@ module VX_cache_bank #( wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK); wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; - wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; + wire do_writeback_st1 = do_fill_or_flush_st1 && is_dirty_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1}; if (WRITE_ENABLE) begin : g_mreq_queue @@ -588,7 +599,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| evict_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (is_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, is_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end // issue a fill request on a read/write miss // issue a writeback on a dirty line eviction @@ -670,6 +681,8 @@ module VX_cache_bank #( assign mem_req_valid = ~mreq_queue_empty; + `UNUSED_VAR (do_lookup_st0) + /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE @@ -708,29 +721,29 @@ module VX_cache_bank #( `TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0)) end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0)) end - if (do_lookup_st1 && ~pipe_stall) begin - if (is_hit_st1) begin + if (do_lookup_st0 && ~pipe_stall) begin + if (is_hit_st0) begin `TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0)) end else begin `TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0)) end end if (do_fill_st0 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, data_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 65cf9e0261..75e2c79355 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -47,51 +47,40 @@ module VX_cache_data #( input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, + input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, // outputs - output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, output wire [`CS_LINE_WIDTH-1:0] read_data, - output wire evict_dirty, output wire [LINE_SIZE-1:0] evict_byteen ); `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - if (WRITEBACK != 0) begin : g_writeback - localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0); + if (DIRTY_BYTES != 0) begin : g_dirty_bytes - wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata; - wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata; - wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_rdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wren; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata wire evict = fill || flush; wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i); - wire dirty_data = write; // only asserted on writes - wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]); - if (DIRTY_BYTES != 0) begin : g_dirty_bytes - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask - wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); - assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; - end - wire [LINE_SIZE-1:0] bytes_data = {LINE_SIZE{write}}; // only asserted on writes - wire [LINE_SIZE-1:0] bytes_wren = {LINE_SIZE{init}} - | {LINE_SIZE{evict && evict_way_en}} - | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); - assign byteen_wdata[i] = {dirty_data, bytes_data}; - assign byteen_wren[i] = {dirty_wren, bytes_wren}; - end else begin : g_no_dirty_bytes - assign byteen_wdata[i] = dirty_data; - assign byteen_wren[i] = dirty_wren; + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end + assign byteen_wdata[i] = {LINE_SIZE{write}}; // only asserted on writes + assign byteen_wren[i] = {LINE_SIZE{init}} + | {LINE_SIZE{evict && evict_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end wire byteen_read = fill || flush; wire byteen_write = init || write || fill || flush; VX_sp_ram #( - .DATAW (BYTEEN_DATAW * NUM_WAYS), - .WRENW (BYTEEN_DATAW * NUM_WAYS), + .DATAW (LINE_SIZE * NUM_WAYS), + .WRENW (LINE_SIZE * NUM_WAYS), .SIZE (`CS_LINES_PER_BANK), .OUT_REG (1) ) byteen_store ( @@ -105,17 +94,10 @@ module VX_cache_data #( .rdata (byteen_rdata) ); - if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen - assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx]; - end else begin : g_line_dirty - assign evict_dirty = byteen_rdata[way_idx]; - assign evict_byteen = '1; - end - - end else begin : g_no_writeback + assign evict_byteen = byteen_rdata[way_idx]; + end else begin : g_no_dirty_bytes `UNUSED_VAR (init) `UNUSED_VAR (flush) - assign evict_dirty = 0; assign evict_byteen = '0; end @@ -140,8 +122,8 @@ module VX_cache_data #( | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end - assign line_write = fill || (write && WRITE_ENABLE); assign line_read = read || ((fill || flush) && WRITEBACK); + assign line_write = fill || (write && WRITE_ENABLE); VX_sp_ram #( .DATAW (NUM_WAYS * `CS_LINE_WIDTH), @@ -163,6 +145,7 @@ module VX_cache_data #( `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_word) `UNUSED_VAR (word_idx) + `UNUSED_VAR (tag_matches) // we don't merge the ways into a single block ram due to WREN overhead for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways @@ -184,18 +167,6 @@ module VX_cache_data #( end end - wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx; - - VX_onehot_encoder #( - .N (NUM_WAYS) - ) way_idx_enc ( - .data_in (tag_matches), - .data_out (hit_idx), - `UNUSED_PIN (valid_out) - ); - - `BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1); - assign read_data = line_rdata[way_idx]; endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 71f7809dcf..79afb29d4d 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -35,7 +35,8 @@ module VX_cache_tags #( input wire init, input wire flush, input wire fill, - input wire lookup, + input wire read, + input wire write, input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n, input wire [`CS_LINE_SEL_BITS-1:0] line_idx, input wire [`CS_TAG_SEL_BITS-1:0] line_tag, @@ -43,36 +44,47 @@ module VX_cache_tags #( // outputs output wire [NUM_WAYS-1:0] tag_matches, + output wire evict_dirty, output wire [`CS_TAG_SEL_BITS-1:0] evict_tag ); - // valid, tag - localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; + // valid, dirty, tag + localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS; wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0] read_valid; - `UNUSED_VAR (lookup) + wire [NUM_WAYS-1:0] read_dirty; + `UNUSED_VAR (read) if (WRITEBACK) begin : g_evict_tag_wb + assign evict_dirty = read_dirty[evict_way]; assign evict_tag = read_tag[evict_way]; end else begin : g_evict_tag_wt + `UNUSED_VAR (read_dirty) + assign evict_dirty = 1'b0; assign evict_tag = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - - wire way_en = (NUM_WAYS == 1) || (evict_way == i); - wire do_fill = fill && way_en; + wire way_en = (NUM_WAYS == 1) || (evict_way == i); + wire do_fill = fill && way_en; wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode + wire do_write = WRITEBACK && write && tag_matches[i]; // only write on hit - //wire line_read = lookup || (WRITEBACK && (fill || flush)); - wire line_write = init || do_fill || do_flush; - wire line_valid = fill; + //wire line_read = read || write || (WRITEBACK && (fill || flush)); + wire line_write = init || do_fill || do_flush || do_write; + wire line_valid = fill || write; wire [TAG_WIDTH-1:0] line_wdata; wire [TAG_WIDTH-1:0] line_rdata; - assign line_wdata = {line_valid, line_tag}; - assign {read_valid[i], read_tag[i]} = line_rdata; + if (WRITEBACK) begin : g_wdata + assign line_wdata = {line_valid, write, line_tag}; + assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata; + end else begin : g_wdata + assign line_wdata = {line_valid, line_tag}; + assign {read_valid[i], read_tag[i]} = line_rdata; + assign read_dirty[i] = 1'b0; + end VX_dp_ram #( .DATAW (TAG_WIDTH), From 2bd22253ebff32c928c28ccfd9abc99953eb65db Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 19 Oct 2024 22:14:38 -0700 Subject: [PATCH 301/407] minor update --- hw/rtl/cache/VX_bank_flush.sv | 3 ++- hw/rtl/cache/VX_cache_flush.sv | 3 ++- hw/rtl/libs/VX_dp_ram.sv | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 68eefd3631..e50f8ef441 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -55,7 +55,8 @@ module VX_bank_flush #( always @(*) begin state_n = state; case (state) - STATE_IDLE: begin + //STATE_IDLE: + default : begin if (flush_begin) begin state_n = STATE_WAIT1; end diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index b318dc5af6..d10cb52752 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -128,7 +128,8 @@ module VX_cache_flush #( lock_released_n = lock_released; flush_uuid_n = flush_uuid_r; case (state) - STATE_IDLE: begin + //STATE_IDLE: + default: begin if (flush_req_enable) begin state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH; for (integer i = NUM_REQS-1; i >= 0; --i) begin diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 7616aa5b95..9e863f7135 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -70,7 +70,7 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) -`elif VIVADO +`elsif VIVADO localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto"); localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto"; `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; From 9373e2195004270342b409b28a3be1787e6f4242 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 07:32:32 -0700 Subject: [PATCH 302/407] minor update --- hw/rtl/cache/VX_cache_tags.sv | 2 +- hw/rtl/core/VX_operands.sv | 4 +- hw/rtl/libs/VX_dp_ram.sv | 165 ++++++++------------------- hw/rtl/libs/VX_fifo_queue.sv | 9 +- hw/rtl/libs/VX_index_buffer.sv | 12 +- hw/rtl/libs/VX_sp_ram.sv | 201 ++++++++++++++++++++++++++++----- 6 files changed, 231 insertions(+), 162 deletions(-) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 79afb29d4d..7afbbfff42 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -90,7 +90,7 @@ module VX_cache_tags #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), .OUT_REG (1), - .WRITE_MODE ("W") + .NEW_DATA (1) ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index b396d18306..5bfbe3aa67 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -267,12 +267,10 @@ module VX_operands import VX_gpu_pkg::*; #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), .WRENW (BYTEENW), - .OUT_REG (1), - .WRITE_MODE ("U"), `ifdef GPR_RESET .RESET_RAM (1), `endif - .NO_RWCHECK (1) + .OUT_REG (1) ) gpr_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 9e863f7135..c964c101b5 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,12 +19,10 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, parameter RESET_RAM = 0, - parameter RESET_OUT = 0, - parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined + parameter NEW_DATA = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -41,9 +39,8 @@ module VX_dp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; - localparam USE_BRAM = !LUTRAM && ((DATAW * SIZE) >= `MAX_LUTRAM); - `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) + `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) `UNUSED_PARAM (RW_ASSERT) `define RAM_INITIALIZATION \ @@ -61,25 +58,12 @@ module VX_dp_ram #( `ifdef SYNTHESIS `ifdef QUARTUS - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "auto"); - localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : ""; - `define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ if (wren[i]) begin \ ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ end \ end - `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) -`elsif VIVADO - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto"); - localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto"; - `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end - `define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *) `else `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -87,136 +71,96 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ end \ end - `define RAM_NO_RWCHECK `endif - if (OUT_REG) begin : g_out_reg - reg [DATAW-1:0] rdata_r; - if (WRITE_MODE == "R") begin : g_read_first - `RAM_ARRAY + if (OUT_REG) begin : g_sync + if (NEW_DATA) begin : g_new_data + (* rw_addr_collision = "yes" *) `RAM_ARRAY + `UNUSED_VAR (wren) `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin if (write) begin `RAM_WRITE end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read || write) begin - rdata_r <= ram[raddr]; + if (read) begin + addr_reg <= raddr; end end - end else if (WRITE_MODE == "W") begin : g_write_first + assign rdata = ram[addr_reg]; + end else begin : g_old_data `RAM_ARRAY `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (write) begin `RAM_WRITE end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read || write) begin - rdata_r = ram[raddr]; + if (read) begin + rdata_r <= ram[raddr]; end end - end else if (WRITE_MODE == "N") begin : g_no_change - `RAM_ARRAY + assign rdata = rdata_r; + end + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcehck + `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin `RAM_WRITE end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read && ~write) begin - rdata_r <= ram[raddr]; - end end - end else if (WRITE_MODE == "U") begin : g_undefined - `RAM_NO_RWCHECK `RAM_ARRAY + assign rdata = ram[raddr]; + end else begin : g_rwcheck + `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin `RAM_WRITE end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read) begin - rdata_r <= ram[raddr]; - end - end - end else begin - `STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE)) - end - assign rdata = rdata_r; - end else begin : g_no_out_reg - `UNUSED_VAR (read) - `RAM_NO_RWCHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end `else // simulation reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n - assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; - end - always @(posedge clk) begin if (RESET_RAM && reset) begin for (integer i = 0; i < SIZE; ++i) begin ram[i] <= DATAW'(INIT_VALUE); end - end else begin - if (write) begin - ram[waddr] <= ram_n; + end else if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) begin + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end end end end - if (OUT_REG && WRITE_MODE == "R") begin : g_read_first - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read || write) begin - rdata_r <= ram[raddr]; + if (OUT_REG) begin : g_sync + if (NEW_DATA) begin : g_new_data + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (read) begin + addr_reg <= raddr; + end end - end - assign rdata = rdata_r; - end else if (OUT_REG && WRITE_MODE == "W") begin : g_read_first - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read || write) begin - if (write && (raddr == waddr)) begin - rdata_r <= ram_n; - end else begin + assign rdata = ram[addr_reg]; + end else begin : g_old_data + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read) begin rdata_r <= ram[raddr]; end end + assign rdata = rdata_r; end - assign rdata = rdata_r; - end else if (OUT_REG && WRITE_MODE == "N") begin : g_read_first - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read && ~write) begin - rdata_r <= ram[raddr]; - end - end - assign rdata = rdata_r; - end else begin : g_async_or_undef - wire [DATAW-1:0] rdata_w; - if (USE_BRAM && NO_RWCHECK) begin : g_rdata_no_bypass + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcheck reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -233,26 +177,13 @@ module VX_dp_ram #( end end - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; if (RW_ASSERT) begin : g_rw_asert - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) + `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) end - end else begin : g_rdata_with_bypass - assign rdata_w = ram[raddr]; - end - if (OUT_REG) begin : g_out_reg - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read) begin - rdata_r <= rdata_w; - end - end - assign rdata = rdata_r; - end else begin : g_no_out_reg + end else begin : g_rwcheck `UNUSED_VAR (read) - assign rdata = rdata_w; + assign rdata = ram[raddr]; end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 9323c4dc06..7d51e618a9 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -102,11 +102,10 @@ module VX_fifo_queue #( wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop; VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (!USE_BRAM), - .OUT_REG(USE_BRAM), - .WRITE_MODE("W") + .DATAW (DATAW), + .SIZE (DEPTH), + .OUT_REG (USE_BRAM), + .NEW_DATA (1) ) dp_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 61875b7fb6..8e2b7e8d82 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -15,10 +15,9 @@ `TRACING_OFF module VX_index_buffer #( - parameter DATAW = 1, - parameter SIZE = 1, - parameter LUTRAM = 0, - parameter ADDRW = `LOG2UP(SIZE) + parameter DATAW = 1, + parameter SIZE = 1, + parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, input wire reset, @@ -49,9 +48,8 @@ module VX_index_buffer #( ); VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .LUTRAM (LUTRAM) + .DATAW (DATAW), + .SIZE (SIZE) ) data_table ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index faaf0dd2f4..efdd836d84 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -19,12 +19,10 @@ module VX_sp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, parameter RESET_RAM = 0, - parameter RESET_OUT = 0, - parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined + parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -39,32 +37,177 @@ module VX_sp_ram #( input wire [DATAW-1:0] wdata, output wire [DATAW-1:0] rdata ); - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .WRENW (WRENW), - .OUT_REG (OUT_REG), - .LUTRAM (LUTRAM), - .NO_RWCHECK (NO_RWCHECK), - .RW_ASSERT (RW_ASSERT), - .RESET_RAM (RESET_RAM), - .RESET_OUT (RESET_OUT), - .WRITE_MODE (WRITE_MODE), - .INIT_ENABLE(INIT_ENABLE), - .INIT_FILE (INIT_FILE), - .INIT_VALUE (INIT_VALUE), - .ADDRW (ADDRW) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (read), - .write (write), - .wren (wren), - .waddr (addr), - .wdata (wdata), - .raddr (addr), - .rdata (rdata) - ); + localparam WSELW = DATAW / WRENW; + + `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) + `UNUSED_PARAM (RW_ASSERT) + +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`ifdef SYNTHESIS +`ifdef QUARTUS + `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`else + `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`endif + if (OUT_REG) begin : g_sync + wire cs = read || write; + if (WRITE_MODE == "R") begin : g_read_first + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end else if (WRITE_MODE == "W") begin : g_write_first + `UNUSED_VAR (wren) + `RAM_ARRAY + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + addr_reg <= addr; + if (write) begin + `RAM_WRITE + end + end + end + assign rdata = ram[addr_reg]; + end else if (WRITE_MODE == "N") begin : g_no_change + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcehck + `NO_RW_RAM_CHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; + end else begin : g_rwcheck + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; + end + end +`else + // simulation + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n + assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[addr][i * WSELW +: WSELW]; + end + + always @(posedge clk) begin + if (RESET_RAM && reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + end else if (write) begin + ram[addr] <= ram_n; + end + end + + if (OUT_REG) begin : g_sync + if (WRITE_MODE == "R") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end else if (WRITE_MODE == "W") begin : g_write_first + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (read || write) begin + addr_reg <= addr; + end + end + assign rdata = ram[addr_reg]; + end else if (WRITE_MODE == "N") begin : g_no_change + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read && ~write) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcheck + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[addr]; + prev_waddr <= addr; + end + end + assign rdata = (prev_write && (prev_waddr == addr)) ? prev_data : ram[addr]; + if (RW_ASSERT) begin : g_rw_asert + `RUNTIME_ASSERT(~read || (rdata == ram[addr]), ("%t: read after write hazard", $time)) + end + end else begin : g_rwcheck + `UNUSED_VAR (read) + assign rdata = ram[addr]; + end + end +`endif endmodule `TRACING_ON From 0f380a3d78110129316984aa6c5c1673bc5680a9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 07:49:27 -0700 Subject: [PATCH 303/407] minor update --- hw/rtl/cache/VX_cache_repl.sv | 15 ++++++--------- hw/rtl/libs/VX_sp_ram.sv | 11 +++++------ 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 24425328d5..f9c511e6d6 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -110,7 +110,6 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); - localparam USE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; @@ -120,15 +119,15 @@ module VX_cache_repl #( .DATAW (LRU_WIDTH), .SIZE (`CS_LINES_PER_BANK), .WRENW (LRU_WIDTH), - .OUT_REG (USE_BRAM) + .OUT_REG (1) ) plru_store ( .clk (clk), .reset (reset), - .read (USE_BRAM ? ~stall : repl_valid), + .read (~stall), .write (hit_valid), .wren (plru_wmask), .waddr (hit_line), - .raddr (USE_BRAM ? repl_line_n : repl_line), + .raddr (repl_line_n), .wdata (plru_wdata), .rdata (plru_rdata) ); @@ -150,8 +149,6 @@ module VX_cache_repl #( end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic // Cyclic replacement policy - localparam USE_BRAM = (WAY_SEL_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; - `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) @@ -163,14 +160,14 @@ module VX_cache_repl #( VX_dp_ram #( .DATAW (WAY_SEL_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (USE_BRAM) + .OUT_REG (1) ) ctr_store ( .clk (clk), .reset (reset), - .read (USE_BRAM ? ~stall : repl_valid), + .read (~stall), .write (repl_valid), .wren (1'b1), - .raddr (USE_BRAM ? repl_line_n : repl_line), + .raddr (repl_line_n), .waddr (repl_line), .wdata (ctr_wdata), .rdata (ctr_rdata) diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index efdd836d84..eb21144f4e 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -141,18 +141,17 @@ module VX_sp_ram #( reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n - assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[addr][i * WSELW +: WSELW]; - end - always @(posedge clk) begin if (RESET_RAM && reset) begin for (integer i = 0; i < SIZE; ++i) begin ram[i] <= DATAW'(INIT_VALUE); end end else if (write) begin - ram[addr] <= ram_n; + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) begin + ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end end end From acc1e3dfd8557d9cb368a19455cee6b488566197 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 20:07:34 -0700 Subject: [PATCH 304/407] minor update --- hw/rtl/VX_cluster.sv | 2 +- hw/rtl/VX_define.vh | 6 ++-- hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 8 ++--- hw/rtl/cache/VX_cache_tags.sv | 7 ++-- hw/rtl/core/VX_operands.sv | 12 +++---- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 4 +-- hw/rtl/libs/VX_dp_ram.sv | 53 ++++++++++++++++--------------- hw/rtl/libs/VX_fifo_queue.sv | 2 +- hw/rtl/libs/VX_generic_arbiter.sv | 8 ++--- hw/rtl/libs/VX_sp_ram.sv | 39 ++++++++++++----------- 14 files changed, 76 insertions(+), 73 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 9aa5fe706b..bec4e232f4 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -154,6 +154,6 @@ module VX_cluster import VX_gpu_pkg::*; #( ); end - `BUFFER_EX(busy, (| per_socket_busy), 1'b1, (`NUM_SOCKETS > 1)); + `BUFFER_EX(busy, (| per_socket_busy), 1'b1, 1, (`NUM_SOCKETS > 1)); endmodule diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 4ccb008804..6519984ad0 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -335,10 +335,10 @@ .data_out (dst) \ ) -`define BUFFER_EX(dst, src, ena, latency) \ +`define BUFFER_EX(dst, src, ena, RSTW, latency) \ VX_pipe_register #( \ .DATAW ($bits(dst)), \ - .RESETW ($bits(dst)), \ + .RESETW (RSTW), \ .DEPTH (latency) \ ) __``dst``__ ( \ .clk (clk), \ @@ -348,7 +348,7 @@ .data_out (dst) \ ) -`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 1) +`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 0, 1) `define POP_COUNT_EX(out, in, model) \ VX_popcount #( \ diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index d9a8f5bf8b..299fb6791d 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -237,6 +237,6 @@ module VX_socket import VX_gpu_pkg::*; #( ); end - `BUFFER_EX(busy, (| per_core_busy), 1'b1, (`SOCKET_SIZE > 1)); + `BUFFER_EX(busy, (| per_core_busy), 1'b1, 1, (`SOCKET_SIZE > 1)); endmodule diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index bae697c65d..5df4038801 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -159,7 +159,7 @@ module Vortex import VX_gpu_pkg::*; ( ); end - `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1)); + `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, 1, (`NUM_CLUSTERS > 1)); `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7d10223783..7258e847eb 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -461,7 +461,7 @@ module VX_cache_bank #( .write_word (write_word_st0), .word_idx (word_idx_st0), .write_byteen(byteen_st0), - .way_idx (way_idx_st1), + .way_idx_r (way_idx_st1), // outputs .read_data (read_data_st1), .evict_byteen(evict_byteen_st1) diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 75e2c79355..03e2629c6e 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -47,7 +47,7 @@ module VX_cache_data #( input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, - input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, + input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_r, // outputs output wire [`CS_LINE_WIDTH-1:0] read_data, output wire [LINE_SIZE-1:0] evict_byteen @@ -94,11 +94,11 @@ module VX_cache_data #( .rdata (byteen_rdata) ); - assign evict_byteen = byteen_rdata[way_idx]; + assign evict_byteen = byteen_rdata[way_idx_r]; end else begin : g_no_dirty_bytes `UNUSED_VAR (init) `UNUSED_VAR (flush) - assign evict_byteen = '0; + assign evict_byteen = '1; // update whole line end wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; @@ -167,6 +167,6 @@ module VX_cache_data #( end end - assign read_data = line_rdata[way_idx]; + assign read_data = line_rdata[way_idx_r]; endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 7afbbfff42..970d54d913 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -66,12 +66,13 @@ module VX_cache_tags #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store wire way_en = (NUM_WAYS == 1) || (evict_way == i); + wire do_init = init; // init all ways wire do_fill = fill && way_en; wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode - wire do_write = WRITEBACK && write && tag_matches[i]; // only write on hit + wire do_write = WRITEBACK && write && tag_matches[i]; // only write on tag hit //wire line_read = read || write || (WRITEBACK && (fill || flush)); - wire line_write = init || do_fill || do_flush || do_write; + wire line_write = do_init || do_fill || do_flush || do_write; wire line_valid = fill || write; wire [TAG_WIDTH-1:0] line_wdata; @@ -90,7 +91,7 @@ module VX_cache_tags #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), .OUT_REG (1), - .NEW_DATA (1) + .RDW_MODE ("W") ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 5bfbe3aa67..06d226161e 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -61,7 +61,7 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; wire pipe_ready_in; @@ -178,14 +178,14 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; VX_pipe_buffer #( - .DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH) + .DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH + REGS_DATAW) + META_DATAW) ) pipe_reg2 ( .clk (clk), .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), - .data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), - .data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), + .data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, gpr_rd_data_st1, pipe_data_st1}), + .data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, gpr_rd_data_st2, pipe_data_st2}), .valid_out(pipe_valid_st2), .ready_out(pipe_ready_st2) ); @@ -270,7 +270,7 @@ module VX_operands import VX_gpu_pkg::*; #( `ifdef GPR_RESET .RESET_RAM (1), `endif - .OUT_REG (1) + .OUT_REG (0) ) gpr_ram ( .clk (clk), .reset (reset), @@ -280,7 +280,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data_st2[b]) + .rdata (gpr_rd_data_st1[b]) ); end diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 9b49ae2680..5011ccb2cc 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -388,7 +388,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire no_pending_instr = (& pending_warp_empty); - `BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1); + `BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1, 1); // export CSRs assign sched_csr_if.cycles = cycles; diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 5b01cc5504..9ec9a62878 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -62,8 +62,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .data_out (perf_sfu_per_cycle) ); - `BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); - `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); + `BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); + `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); wire [PER_ISSUE_WARPS-1:0] stg_valid_in; for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index c964c101b5..2adb27e2dd 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,10 +19,9 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter NO_RWCHECK = 0, - parameter RW_ASSERT = 0, + parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first + parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, - parameter NEW_DATA = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -41,7 +40,8 @@ module VX_dp_ram #( localparam WSELW = DATAW / WRENW; `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) - `UNUSED_PARAM (RW_ASSERT) + `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W"), ("invalid parameter")) + `UNUSED_PARAM (RDW_ASSERT) `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ @@ -73,16 +73,17 @@ module VX_dp_ram #( end `endif if (OUT_REG) begin : g_sync - if (NEW_DATA) begin : g_new_data + wire cs = read || write; + if (RDW_MODE == "W") begin : g_new_data (* rw_addr_collision = "yes" *) `RAM_ARRAY `UNUSED_VAR (wren) `RAM_INITIALIZATION reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (read) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end addr_reg <= raddr; end end @@ -92,18 +93,19 @@ module VX_dp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (read) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end rdata_r <= ram[raddr]; end end assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcehck - `NO_RW_RAM_CHECK `RAM_ARRAY + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_new_data + `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -111,8 +113,8 @@ module VX_dp_ram #( end end assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_ARRAY + end else begin : g_old_data + `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -142,10 +144,11 @@ module VX_dp_ram #( end if (OUT_REG) begin : g_sync - if (NEW_DATA) begin : g_new_data + wire cs = read || write; + if (RDW_MODE == "W") begin : g_new_data reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (read) begin + if (cs) begin addr_reg <= raddr; end end @@ -153,14 +156,17 @@ module VX_dp_ram #( end else begin : g_old_data reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (read) begin + if (cs) begin rdata_r <= ram[raddr]; end end assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcheck + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_new_data + assign rdata = ram[raddr]; + end else begin : g_old_data reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -178,12 +184,9 @@ module VX_dp_ram #( end assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_asert + if (RDW_ASSERT) begin : g_rw_asert `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) end - end else begin : g_rwcheck - `UNUSED_VAR (read) - assign rdata = ram[raddr]; end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 7d51e618a9..7e2eba402e 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -105,7 +105,7 @@ module VX_fifo_queue #( .DATAW (DATAW), .SIZE (DEPTH), .OUT_REG (USE_BRAM), - .NEW_DATA (1) + .RDW_MODE ("W") ) dp_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 5e090ebdda..2b0d086db3 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -16,7 +16,7 @@ `TRACING_OFF module VX_generic_arbiter #( parameter NUM_REQS = 1, - parameter `STRING TYPE = "P", + parameter `STRING TYPE = "P", // P: priority, R: round-robin, M: matrix, C: cyclic parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, @@ -27,6 +27,8 @@ module VX_generic_arbiter #( output wire grant_valid, input wire grant_ready ); + `STATIC_ASSERT((TYPE == "P" || TYPE == "R" || TYPE == "M" || TYPE == "C"), ("invalid parameter")) + if (TYPE == "P") begin : g_priority `UNUSED_VAR (clk) @@ -84,10 +86,6 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else begin : g_invalid - - `ERROR(("invalid parameter")); - end `RUNTIME_ASSERT (((~(| requests) != 1) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index eb21144f4e..cd43e40ffe 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -19,10 +19,9 @@ module VX_sp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter NO_RWCHECK = 0, - parameter RW_ASSERT = 0, + parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, N: no-change + parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, - parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -40,7 +39,8 @@ module VX_sp_ram #( localparam WSELW = DATAW / WRENW; `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) - `UNUSED_PARAM (RW_ASSERT) + `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter")) + `UNUSED_PARAM (RDW_ASSERT) `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ @@ -73,7 +73,7 @@ module VX_sp_ram #( `endif if (OUT_REG) begin : g_sync wire cs = read || write; - if (WRITE_MODE == "R") begin : g_read_first + if (RDW_MODE == "R") begin : g_read_first `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; @@ -86,7 +86,7 @@ module VX_sp_ram #( end end assign rdata = rdata_r; - end else if (WRITE_MODE == "W") begin : g_write_first + end else if (RDW_MODE == "W") begin : g_write_first `UNUSED_VAR (wren) `RAM_ARRAY `RAM_INITIALIZATION @@ -100,7 +100,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr_reg]; - end else if (WRITE_MODE == "N") begin : g_no_change + end else if (RDW_MODE == "N") begin : g_no_change `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; @@ -116,8 +116,9 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcehck - `NO_RW_RAM_CHECK `RAM_ARRAY + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_rwcehck + `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -125,8 +126,8 @@ module VX_sp_ram #( end end assign rdata = ram[addr]; - end else begin : g_rwcheck - `RAM_ARRAY + end else begin : g_no_rwcheck + `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -156,7 +157,7 @@ module VX_sp_ram #( end if (OUT_REG) begin : g_sync - if (WRITE_MODE == "R") begin : g_read_first + if (RDW_MODE == "R") begin : g_read_first reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin @@ -164,7 +165,7 @@ module VX_sp_ram #( end end assign rdata = rdata_r; - end else if (WRITE_MODE == "W") begin : g_write_first + end else if (RDW_MODE == "W") begin : g_write_first reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin if (read || write) begin @@ -172,7 +173,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr_reg]; - end else if (WRITE_MODE == "N") begin : g_no_change + end else if (RDW_MODE == "N") begin : g_no_change reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read && ~write) begin @@ -182,7 +183,10 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcheck + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_rwcheck + assign rdata = ram[addr]; + end else begin : g_no_rwcheck reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -198,12 +202,9 @@ module VX_sp_ram #( end end assign rdata = (prev_write && (prev_waddr == addr)) ? prev_data : ram[addr]; - if (RW_ASSERT) begin : g_rw_asert + if (RDW_ASSERT) begin : g_rw_asert `RUNTIME_ASSERT(~read || (rdata == ram[addr]), ("%t: read after write hazard", $time)) end - end else begin : g_rwcheck - `UNUSED_VAR (read) - assign rdata = ram[addr]; end end `endif From 22c3828bf5022905db600f5fce91512b6fa95265 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 21:12:49 -0700 Subject: [PATCH 305/407] minor update --- hw/rtl/VX_config.vh | 4 +- hw/rtl/cache/VX_cache_repl.sv | 2 +- hw/rtl/libs/VX_dp_ram.sv | 126 +++++++++++++++------- hw/rtl/libs/VX_sp_ram.sv | 192 ++++++++++++++++++++++++++-------- 4 files changed, 240 insertions(+), 84 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 48f8ca3dc4..c2d16ea3a0 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -613,7 +613,7 @@ // Number of Associative Ways `ifndef L2_NUM_WAYS -`define L2_NUM_WAYS 4 +`define L2_NUM_WAYS 8 `endif // Enable Cache Writeback @@ -665,7 +665,7 @@ // Number of Associative Ways `ifndef L3_NUM_WAYS -`define L3_NUM_WAYS 4 +`define L3_NUM_WAYS 8 `endif // Enable Cache Writeback diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index f9c511e6d6..85f2341121 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -110,6 +110,7 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); + `UNUSED_VAR (repl_valid) wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; @@ -152,7 +153,6 @@ module VX_cache_repl #( `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) - `UNUSED_VAR (repl_valid) wire [WAY_SEL_WIDTH-1:0] ctr_rdata; wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1; diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 2adb27e2dd..fc94b99c36 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,6 +19,7 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, + parameter LUTRAM = 0, parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, @@ -38,6 +39,7 @@ module VX_dp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; + `UNUSED_PARAM (LUTRAM) `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W"), ("invalid parameter")) @@ -57,6 +59,7 @@ module VX_dp_ram #( end `ifdef SYNTHESIS + localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); `ifdef QUARTUS `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -74,54 +77,107 @@ module VX_dp_ram #( `endif if (OUT_REG) begin : g_sync wire cs = read || write; - if (RDW_MODE == "W") begin : g_new_data - (* rw_addr_collision = "yes" *) `RAM_ARRAY - `UNUSED_VAR (wren) - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "W") begin : g_new_data + (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY + `UNUSED_VAR (wren) + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= raddr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_old_data + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[raddr]; end - addr_reg <= raddr; end + assign rdata = rdata_r; end - assign rdata = ram[addr_reg]; - end else begin : g_old_data - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE + end else begin : g_auto + if (RDW_MODE == "W") begin : g_new_data + (* rw_addr_collision = "yes" *) `RAM_ARRAY + `UNUSED_VAR (wren) + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= raddr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_old_data + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[raddr]; end - rdata_r <= ram[raddr]; end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_new_data - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "W") begin : g_new_data + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[raddr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; - end else begin : g_old_data - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + end else begin : g_auto + if (RDW_MODE == "W") begin : g_new_data + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end + assign rdata = ram[raddr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end end `else diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index cd43e40ffe..1acbf733a6 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -19,6 +19,7 @@ module VX_sp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, + parameter LUTRAM = 0, parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, N: no-change parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, @@ -37,6 +38,7 @@ module VX_sp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; + `UNUSED_PARAM (LUTRAM) `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter")) @@ -56,6 +58,7 @@ module VX_sp_ram #( end `ifdef SYNTHESIS + localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); `ifdef QUARTUS `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -73,68 +76,165 @@ module VX_sp_ram #( `endif if (OUT_REG) begin : g_sync wire cs = read || write; - if (RDW_MODE == "R") begin : g_read_first - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "R") begin : g_read_first + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[addr]; end - rdata_r <= ram[addr]; end - end - assign rdata = rdata_r; - end else if (RDW_MODE == "W") begin : g_write_first - `UNUSED_VAR (wren) - `RAM_ARRAY - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (cs) begin - addr_reg <= addr; - if (write) begin - `RAM_WRITE + assign rdata = rdata_r; + end else if (RDW_MODE == "W") begin : g_write_first + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + if (WRENW > 1) begin : g_wren + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= addr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_no_wren + `UNUSED_VAR (wren) + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + ram[addr] <= wdata; + rdata_r <= wdata; + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "N") begin : g_no_change + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end else begin + rdata_r <= ram[addr]; + end end end + assign rdata = rdata_r; end - assign rdata = ram[addr_reg]; - end else if (RDW_MODE == "N") begin : g_no_change - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE - end else begin + end else begin : g_auto + if (RDW_MODE == "R") begin : g_read_first + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end rdata_r <= ram[addr]; end end + assign rdata = rdata_r; + end else if (RDW_MODE == "W") begin : g_write_first + `RAM_ARRAY + `RAM_INITIALIZATION + if (WRENW > 1) begin : g_wren + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= addr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_no_wren + `UNUSED_VAR (wren) + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + ram[addr] <= wdata; + rdata_r <= wdata; + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "N") begin : g_no_change + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_rwcehck - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "W") begin : g_new_data + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; - end else begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + end else begin : g_auto + if (RDW_MODE == "W") begin : g_new_data + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end + assign rdata = ram[addr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end end `else From 1e4f0fa0bd465e7487a0b3ec3cb6374e788a777b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 21:42:02 -0700 Subject: [PATCH 306/407] minor update --- hw/rtl/cache/VX_cache_repl.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 85f2341121..9091230466 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -111,6 +111,7 @@ module VX_cache_repl #( // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; From fccbadfe252c8a9c5abc343eed0300cef2d60d50 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 23:32:22 -0700 Subject: [PATCH 307/407] minor update --- hw/rtl/core/VX_fetch.sv | 3 ++- hw/rtl/libs/VX_fifo_queue.sv | 11 ++++------- hw/rtl/libs/VX_index_buffer.sv | 4 +++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index eb1f3d761a..6a35602e84 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -52,7 +52,8 @@ module VX_fetch import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (`PC_BITS + `NUM_THREADS), - .SIZE (`NUM_WARPS) + .SIZE (`NUM_WARPS), + .OUT_REG (0) ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 7e2eba402e..6de6ddc24f 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -74,7 +74,6 @@ module VX_fifo_queue #( end else begin : g_depth_n - localparam USE_BRAM = !LUTRAM && ((DATAW * DEPTH) >= `MAX_LUTRAM); localparam ADDRW = `CLOG2(DEPTH); wire [DATAW-1:0] data_out_w; @@ -95,26 +94,24 @@ module VX_fifo_queue #( end end - wire [ADDRW-1:0] rd_ptr_w = USE_BRAM ? rd_ptr_n : rd_ptr_r; - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); wire bypass = push && (empty || (going_empty && pop)); - wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop; VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (USE_BRAM), + .OUT_REG (1), + .LUTRAM (LUTRAM), .RDW_MODE ("W") ) dp_ram ( .clk (clk), .reset (reset), - .read (read), + .read (~bypass), .write (push), .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_w), + .raddr (rd_ptr_n), .rdata (data_out_w) ); diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 8e2b7e8d82..422c317e1c 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -49,7 +49,9 @@ module VX_index_buffer #( VX_dp_ram #( .DATAW (DATAW), - .SIZE (SIZE) + .SIZE (SIZE), + .OUT_REG (0), + .RDW_MODE("W") ) data_table ( .clk (clk), .reset (reset), From 2b3d1f08600ff80864b6292cb9ef0851b80e893b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 23:54:42 -0700 Subject: [PATCH 308/407] minor update --- hw/rtl/core/VX_operands.sv | 13 ++++--- hw/rtl/libs/VX_dp_ram.sv | 76 +++++++++++++++++++++++++++----------- hw/rtl/libs/VX_sp_ram.sv | 12 +++--- 3 files changed, 67 insertions(+), 34 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 06d226161e..48b01b4c61 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -61,7 +61,7 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; wire pipe_ready_in; @@ -178,14 +178,14 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; VX_pipe_buffer #( - .DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH + REGS_DATAW) + META_DATAW) + .DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH) + META_DATAW) ) pipe_reg2 ( .clk (clk), .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), - .data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, gpr_rd_data_st1, pipe_data_st1}), - .data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, gpr_rd_data_st2, pipe_data_st2}), + .data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, pipe_data_st1}), + .data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, pipe_data_st2}), .valid_out(pipe_valid_st2), .ready_out(pipe_ready_st2) ); @@ -270,7 +270,8 @@ module VX_operands import VX_gpu_pkg::*; #( `ifdef GPR_RESET .RESET_RAM (1), `endif - .OUT_REG (0) + .OUT_REG (1), + .RDW_MODE ("U") ) gpr_ram ( .clk (clk), .reset (reset), @@ -280,7 +281,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data_st1[b]) + .rdata (gpr_rd_data_st2[b]) ); end diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index fc94b99c36..b778ce88ed 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -20,7 +20,7 @@ module VX_dp_ram #( parameter WRENW = 1, parameter OUT_REG = 0, parameter LUTRAM = 0, - parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first + parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, U: undefined parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, parameter INIT_ENABLE = 0, @@ -42,7 +42,7 @@ module VX_dp_ram #( `UNUSED_PARAM (LUTRAM) `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) - `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W"), ("invalid parameter")) + `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "U"), ("invalid parameter")) `UNUSED_PARAM (RDW_ASSERT) `define RAM_INITIALIZATION \ @@ -76,15 +76,14 @@ module VX_dp_ram #( end `endif if (OUT_REG) begin : g_sync - wire cs = read || write; if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY `UNUSED_VAR (wren) `RAM_INITIALIZATION reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -92,12 +91,12 @@ module VX_dp_ram #( end end assign rdata = ram[addr_reg]; - end else begin : g_old_data + end else if (RDW_MODE == "R") begin : g_read_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -105,15 +104,28 @@ module VX_dp_ram #( end end assign rdata = rdata_r; + end else begin : g_undefined + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end end else begin : g_auto - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first (* rw_addr_collision = "yes" *) `RAM_ARRAY `UNUSED_VAR (wren) `RAM_INITIALIZATION reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -121,12 +133,12 @@ module VX_dp_ram #( end end assign rdata = ram[addr_reg]; - end else begin : g_old_data + end else if (RDW_MODE == "R") begin : g_read_first `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -134,12 +146,25 @@ module VX_dp_ram #( end end assign rdata = rdata_r; + end else begin + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end end end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -148,7 +173,7 @@ module VX_dp_ram #( end end assign rdata = ram[raddr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -159,7 +184,7 @@ module VX_dp_ram #( assign rdata = ram[raddr]; end end else begin : g_auto - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -168,7 +193,7 @@ module VX_dp_ram #( end end assign rdata = ram[raddr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -200,19 +225,26 @@ module VX_dp_ram #( end if (OUT_REG) begin : g_sync - wire cs = read || write; - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin addr_reg <= raddr; end end assign rdata = ram[addr_reg]; - end else begin : g_old_data + end else if (RDW_MODE == "R") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; + end else begin : g_undefined reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read) begin rdata_r <= ram[raddr]; end end @@ -220,9 +252,9 @@ module VX_dp_ram #( end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first assign rdata = ram[raddr]; - end else begin : g_old_data + end else begin : g_read_first reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 1acbf733a6..ee13162719 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -196,7 +196,7 @@ module VX_sp_ram #( end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -205,7 +205,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -216,7 +216,7 @@ module VX_sp_ram #( assign rdata = ram[addr]; end end else begin : g_auto - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -225,7 +225,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -284,9 +284,9 @@ module VX_sp_ram #( end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_rwcheck + if (RDW_MODE == "W") begin : g_write_first assign rdata = ram[addr]; - end else begin : g_no_rwcheck + end else begin : g_read_first reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; From d584e7bac182bd6be2b49bee5f0f2025fbf6d277 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Mon, 21 Oct 2024 13:28:57 -0400 Subject: [PATCH 309/407] intermediate docs update --- docs/fpga_setup.md | 56 ++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 78ed63e257..6fcf926d3b 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -13,7 +13,7 @@ If you are associated with Georgia Tech (or related workshops) you can use CRNCH ## Why are the Rouges Important? -By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moore’s Law era of “cheap transistors” ends*. +By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moore’s Law era of “cheap transistors” ends*. Specifically, the Rouges Gallery contains FPGA's which can be synthesized into Vortex hardware. ## How is the Rouges Gallery Funded? @@ -32,68 +32,50 @@ You can listen to a talk about RG [here](https://mediaspace.gatech.edu/media/Jef You should use [this form](https://crnch-rg.cc.gatech.edu/request-rogues-gallery-access/) to request access to RG’s reconfigurable computing (vortex fpga) resources. You should receive an email with your ticket item being created. Once it gets processed, you should get an email confirmed your access has been granted. It might take some time to get processed. ## How to Access Rouges Gallery? +There are two methods of accessing CRNCH's Rouges Gallery +1) Web-based GUI: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/) +2) SSH: `ssh @rg-login.crnch.gatech.edu` -CRNCH resources do not require any VPN access for GT members so you can head to the web url for open on-demand: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/) -Alternatively, you can `ssh` into rg with: `ssh @rg-login.crnch.gatech.edu` - -(`ssh gburdell3@rg-login.crnch.gatech.edu`) - -## Synthesis for Xilinx Boards -First, you need to get access to the server with the Xilinx FPGAs. +## Where should I keep my files? +The CRNCH servers have a folder called `USERSCRATCH` which can be found in your home directory: `echo $HOME`. You should keep all your files in this folder since it is available across all the Rouges Gallery Nodes. ## **What Machines are Available in the Rogues Gallery?** -Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). - -## Which Machine do we Need from RG? - -There are three primary nodes you might use for Xilinx FPGAs. The table below summarizes: - -| Name | Device | Description | -| --- | --- | --- | -| flubber1 | u50 | can synthesize vortex | -| flubber4 | u250 | missing HBM | -| flubber5 | u280 | can synthesize vortex | - - -*Note*: The `USERSCRATCH` folder is synchronized between all RG nodes. That means you can upload your files to `rg-login` and have them available on `flubber[1,4-5`. Changes on one node will be reflected across all nodes. - -## How to Access flubber for Synthesis? +Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). Furthermore, you can find detailed information about the FPGA hardware [here](https://gt-crnch-rg.readthedocs.io/en/main/reconfig/xilinx/xilinx-getting-started.html). -Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery, if you have not already. +## Allocate an FPGA Node +Once you’ve connected to the CRNCH login node, you can use the Slurm scheduler to request an interactive job using `salloc`. This [page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html) explains why we use Slurm to request resources. Documentation for `salloc` can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). And here. -```bash -ssh [@rg-login.crnch.gatech.edu](mailto:usubramanya3@rg-login.crnch.gatech.edu) -``` -Once you’ve logged in, you can use Slurm to request an interactive job. First, view the available Slurm Partitions here [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). Then, the example requests can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). - -In our case we might run: +To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node: ```bash -salloc -p rg-fpga --nodes=1 --ntasks-per-node=1 --nodelist flubber1 --time=01:00:00 +salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 --time=06:00:00 ``` ## Environment Setup Once you are logged in, you will need to complete some first time configurations. -### Clone Repo - ### Source Configuration Scripts ``` $ source /opt/xilinx/xrt/setup.sh $ source /opt/xilinx/Vitis/2023.1/settings64.sh ``` +``` +$ source /opt/xilinx/xrt/setup.sh +$ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh +``` + ### Check Installed FPGA Platforms -`platforminfo -l` +`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. ### Build FPGA image The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. ``` $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make build_u50_hw_4c.log 2>&1 & + $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make build_u250_hw_1c.log 2>&1 & ``` Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" The generated bitstream will be located under /bin/vortex_afu.xclbin @@ -105,7 +87,7 @@ For long-running jobs, invocation of this makefile can be made of the following For example: ```bash -CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 nohup make > build_u280_hw_4c.log 2>&1 & +CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202310_1 nohup make > build_u250_hw_4c.log 2>&1 & ``` The build is complete when the bitstream file `vortex_afu.xclbin` exists in `hw|hw_emu/bin`. From 8fdca0e52afb71e2ba1d4f7e3cf3457bb12b2d4d Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Mon, 21 Oct 2024 15:38:53 -0400 Subject: [PATCH 310/407] correct vitis env --- docs/fpga_setup.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 6fcf926d3b..5b90df0b6c 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -57,11 +57,6 @@ salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 - Once you are logged in, you will need to complete some first time configurations. ### Source Configuration Scripts -``` -$ source /opt/xilinx/xrt/setup.sh -$ source /opt/xilinx/Vitis/2023.1/settings64.sh -``` - ``` $ source /opt/xilinx/xrt/setup.sh $ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh From 519023fb2b644e1455da3b0d9da29c222c3470c6 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Mon, 21 Oct 2024 15:39:10 -0400 Subject: [PATCH 311/407] add citation for MICRO 21 paper --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index ed4c89d882..97686c6415 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,27 @@ Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backe ## Website Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) +## Citation +``` +@inproceedings{10.1145/3466752.3480128, + author = {Tine, Blaise and Yalamarthy, Krishna Praveen and Elsabbagh, Fares and Hyesoon, Kim}, + title = {Vortex: Extending the RISC-V ISA for GPGPU and 3D-Graphics}, + year = {2021}, + isbn = {9781450385572}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3466752.3480128}, + doi = {10.1145/3466752.3480128}, + abstract = {The importance of open-source hardware and software has been increasing. However, despite GPUs being one of the more popular accelerators across various applications, there is very little open-source GPU infrastructure in the public domain. We argue that one of the reasons for the lack of open-source infrastructure for GPUs is rooted in the complexity of their ISA and software stacks. In this work, we first propose an ISA extension to RISC-V that supports GPGPUs and graphics. The main goal of the ISA extension proposal is to minimize the ISA changes so that the corresponding changes to the open-source ecosystem are also minimal, which makes for a sustainable development ecosystem. To demonstrate the feasibility of the minimally extended RISC-V ISA, we implemented the complete software and hardware stacks of Vortex on FPGA. Vortex is a PCIe-based soft GPU that supports OpenCL and OpenGL. Vortex can be used in a variety of applications, including machine learning, graph analytics, and graphics rendering. Vortex can scale up to 32 cores on an Altera Stratix 10 FPGA, delivering a peak performance of 25.6 GFlops at 200 Mhz.}, + booktitle = {MICRO-54: 54th Annual IEEE/ACM International Symposium on Microarchitecture}, + pages = {754–766}, + numpages = {13}, + keywords = {reconfigurable computing, memory systems., computer graphics}, + location = {Virtual Event, Greece}, + series = {MICRO '21} +} +``` + ## Specifications - Support RISC-V RV32IMAF and RV64IMAFD From ff50306833d8b287305ca5da98c8780a4362c526 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 21 Oct 2024 22:24:54 -0700 Subject: [PATCH 312/407] minor update --- hw/syn/xilinx/xrt/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 2517f27770..5d536a0695 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -174,6 +174,7 @@ $(BIN_DIR)/emconfig.json: report: $(XCLBIN_CONTAINER) ifeq ($(TARGET), hw) + cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin From 3a3bb7b70a395a6f96a2bfe657bc724186565fe0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 21 Oct 2024 22:46:04 -0700 Subject: [PATCH 313/407] cleanup deleted files --- hw/rtl/core/VX_gpr_slice.sv | 286 ----------------------- hw/rtl/core/VX_pending_instr.sv | 79 ------- hw/rtl/core/VX_trace.vh | 387 -------------------------------- 3 files changed, 752 deletions(-) delete mode 100644 hw/rtl/core/VX_gpr_slice.sv delete mode 100644 hw/rtl/core/VX_pending_instr.sv delete mode 100644 hw/rtl/core/VX_trace.vh diff --git a/hw/rtl/core/VX_gpr_slice.sv b/hw/rtl/core/VX_gpr_slice.sv deleted file mode 100644 index b036fc5558..0000000000 --- a/hw/rtl/core/VX_gpr_slice.sv +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module VX_gpr_slice import VX_gpu_pkg::*; #( - parameter CORE_ID = 0, - parameter CACHE_ENABLE = 0 -) ( - input wire clk, - input wire reset, - - VX_writeback_if.slave writeback_if, - VX_scoreboard_if.slave scoreboard_if, - VX_operands_if.master operands_if -); - `UNUSED_PARAM (CORE_ID) - localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS; - localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO); - - localparam STATE_IDLE = 2'd0; - localparam STATE_FETCH1 = 2'd1; - localparam STATE_FETCH2 = 2'd2; - localparam STATE_FETCH3 = 2'd3; - localparam STATE_BITS = 2; - - wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; - reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n; - reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n; - - reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data [ISSUE_RATIO-1:0]; - reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data_n [ISSUE_RATIO-1:0]; - reg [`NR_BITS-1:0] cache_reg [ISSUE_RATIO-1:0]; - reg [`NR_BITS-1:0] cache_reg_n [ISSUE_RATIO-1:0]; - reg [`NUM_THREADS-1:0] cache_tmask [ISSUE_RATIO-1:0]; - reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0]; - reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n; - - reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n; - reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n; - reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n; - - reg [STATE_BITS-1:0] state, state_n; - reg [`NR_BITS-1:0] rs2, rs2_n; - reg [`NR_BITS-1:0] rs3, rs3_n; - reg rs2_ready, rs2_ready_n; - reg rs3_ready, rs3_ready_n; - reg data_ready, data_ready_n; - - wire stg_valid_in, stg_ready_in; - - wire is_rs1_zero = (scoreboard_if.data.rs1 == 0); - wire is_rs2_zero = (scoreboard_if.data.rs2 == 0); - wire is_rs3_zero = (scoreboard_if.data.rs3 == 0); - - always @(*) begin - state_n = state; - rs2_n = rs2; - rs3_n = rs3; - rs2_ready_n = rs2_ready; - rs3_ready_n = rs3_ready; - rs1_data_n = rs1_data; - rs2_data_n = rs2_data; - rs3_data_n = rs3_data; - cache_data_n = cache_data; - cache_reg_n = cache_reg; - cache_tmask_n= cache_tmask; - cache_eop_n = cache_eop; - gpr_rd_rid_n = gpr_rd_rid; - gpr_rd_wis_n = gpr_rd_wis; - data_ready_n = data_ready; - - case (state) - STATE_IDLE: begin - if (operands_if.valid && operands_if.ready) begin - data_ready_n = 0; - end - if (scoreboard_if.valid && data_ready_n == 0) begin - data_ready_n = 1; - if (is_rs3_zero || (CACHE_ENABLE != 0 && - scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] && - (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin - rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; - rs3_ready_n = 1; - end else begin - rs3_ready_n = 0; - gpr_rd_rid_n = scoreboard_if.data.rs3; - data_ready_n = 0; - state_n = STATE_FETCH3; - end - if (is_rs2_zero || (CACHE_ENABLE != 0 && - scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] && - (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin - rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; - rs2_ready_n = 1; - end else begin - rs2_ready_n = 0; - gpr_rd_rid_n = scoreboard_if.data.rs2; - data_ready_n = 0; - state_n = STATE_FETCH2; - end - if (is_rs1_zero || (CACHE_ENABLE != 0 && - scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] && - (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin - rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; - end else begin - gpr_rd_rid_n = scoreboard_if.data.rs1; - data_ready_n = 0; - state_n = STATE_FETCH1; - end - end - gpr_rd_wis_n = scoreboard_if.data.wis; - rs2_n = scoreboard_if.data.rs2; - rs3_n = scoreboard_if.data.rs3; - end - STATE_FETCH1: begin - rs1_data_n = gpr_rd_data; - if (~rs2_ready) begin - gpr_rd_rid_n = rs2; - state_n = STATE_FETCH2; - end else if (~rs3_ready) begin - gpr_rd_rid_n = rs3; - state_n = STATE_FETCH3; - end else begin - data_ready_n = 1; - state_n = STATE_IDLE; - end - end - STATE_FETCH2: begin - rs2_data_n = gpr_rd_data; - if (~rs3_ready) begin - gpr_rd_rid_n = rs3; - state_n = STATE_FETCH3; - end else begin - data_ready_n = 1; - state_n = STATE_IDLE; - end - end - STATE_FETCH3: begin - rs3_data_n = gpr_rd_data; - data_ready_n = 1; - state_n = STATE_IDLE; - end - endcase - - if (CACHE_ENABLE != 0 && writeback_if.valid) begin - if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd) - || (cache_eop[writeback_if.data.wis] && writeback_if.data.sop)) begin - for (integer j = 0; j < `NUM_THREADS; ++j) begin - if (writeback_if.data.tmask[j]) begin - cache_data_n[writeback_if.data.wis][j] = writeback_if.data.data[j]; - end - end - cache_reg_n[writeback_if.data.wis] = writeback_if.data.rd; - cache_eop_n[writeback_if.data.wis] = writeback_if.data.eop; - cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask : - (cache_tmask_n[writeback_if.data.wis] | writeback_if.data.tmask); - end - end - end - - always @(posedge clk) begin - if (reset) begin - state <= STATE_IDLE; - cache_eop <= {ISSUE_RATIO{1'b1}}; - data_ready <= 0; - end else begin - state <= state_n; - cache_eop <= cache_eop_n; - data_ready <= data_ready_n; - end - gpr_rd_rid <= gpr_rd_rid_n; - gpr_rd_wis <= gpr_rd_wis_n; - rs2_ready <= rs2_ready_n; - rs3_ready <= rs3_ready_n; - rs2 <= rs2_n; - rs3 <= rs3_n; - rs1_data <= rs1_data_n; - rs2_data <= rs2_data_n; - rs3_data <= rs3_data_n; - cache_data <= cache_data_n; - cache_reg <= cache_reg_n; - cache_tmask <= cache_tmask_n; - end - - assign stg_valid_in = scoreboard_if.valid && data_ready; - assign scoreboard_if.ready = stg_ready_in && data_ready; - - VX_toggle_buffer #( - .DATAW (DATAW) - ) toggle_buffer ( - .clk (clk), - .reset (reset), - .valid_in (stg_valid_in), - .data_in ({ - scoreboard_if.data.uuid, - scoreboard_if.data.wis, - scoreboard_if.data.tmask, - scoreboard_if.data.PC, - scoreboard_if.data.wb, - scoreboard_if.data.ex_type, - scoreboard_if.data.op_type, - scoreboard_if.data.op_args, - scoreboard_if.data.rd - }), - .ready_in (stg_ready_in), - .valid_out (operands_if.valid), - .data_out ({ - operands_if.data.uuid, - operands_if.data.wis, - operands_if.data.tmask, - operands_if.data.PC, - operands_if.data.wb, - operands_if.data.ex_type, - operands_if.data.op_type, - operands_if.data.op_args, - operands_if.data.rd - }), - .ready_out (operands_if.ready) - ); - - assign operands_if.data.rs1_data = rs1_data; - assign operands_if.data.rs2_data = rs2_data; - assign operands_if.data.rs3_data = rs3_data; - - // GPR banks - - reg [RAM_ADDRW-1:0] gpr_rd_addr; - wire [RAM_ADDRW-1:0] gpr_wr_addr; - if (ISSUE_WIS != 0) begin - assign gpr_wr_addr = {writeback_if.data.wis, writeback_if.data.rd}; - always @(posedge clk) begin - gpr_rd_addr <= {gpr_rd_wis_n, gpr_rd_rid_n}; - end - end else begin - assign gpr_wr_addr = writeback_if.data.rd; - always @(posedge clk) begin - gpr_rd_addr <= gpr_rd_rid_n; - end - end - -`ifdef GPR_RESET - reg wr_enabled = 0; - always @(posedge clk) begin - if (reset) begin - wr_enabled <= 1; - end - end -`endif - - for (genvar j = 0; j < `NUM_THREADS; ++j) begin - VX_dp_ram #( - .DATAW (`XLEN), - .SIZE (`NUM_REGS * ISSUE_RATIO), - `ifdef GPR_RESET - .INIT_ENABLE (1), - .INIT_VALUE (0), - `endif - .NO_RWCHECK (1) - ) gpr_ram ( - .clk (clk), - .read (1'b1), - `UNUSED_PIN (wren), - `ifdef GPR_RESET - .write (wr_enabled && writeback_if.valid && writeback_if.data.tmask[j]), - `else - .write (writeback_if.valid && writeback_if.data.tmask[j]), - `endif - .waddr (gpr_wr_addr), - .wdata (writeback_if.data.data[j]), - .raddr (gpr_rd_addr), - .rdata (gpr_rd_data[j]) - ); - end - -endmodule diff --git a/hw/rtl/core/VX_pending_instr.sv b/hw/rtl/core/VX_pending_instr.sv deleted file mode 100644 index af87b53e08..0000000000 --- a/hw/rtl/core/VX_pending_instr.sv +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module VX_pending_instr #( - parameter CTR_WIDTH = 12, - parameter ALM_EMPTY = 1, - parameter DECR_COUNT = 1 -) ( - input wire clk, - input wire reset, - input wire incr, - input wire [`NW_WIDTH-1:0] incr_wid, - input wire [DECR_COUNT-1:0] decr, - input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid, - input wire [`NW_WIDTH-1:0] alm_empty_wid, - output wire empty, - output wire alm_empty -); - localparam COUNTW = `CLOG2(DECR_COUNT+1); - - reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs; - reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt; - reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask; - reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n; - reg [`NUM_WARPS-1:0] alm_empty_r, empty_r; - - always @(*) begin - incr_cnt_n = 0; - decr_mask = 0; - if (incr) begin - incr_cnt_n[incr_wid] = 1; - end - for (integer i = 0; i < DECR_COUNT; ++i) begin - if (decr[i]) begin - decr_mask[decr_wid[i]][i] = 1; - end - end - end - - for (genvar i = 0; i < `NUM_WARPS; ++i) begin - - wire [COUNTW-1:0] decr_cnt_n; - `POP_COUNT(decr_cnt_n, decr_mask[i]); - - wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]); - - always @(posedge clk) begin - if (reset) begin - incr_cnt[i] <= '0; - decr_cnt[i] <= '0; - pending_instrs[i] <= '0; - alm_empty_r[i] <= 0; - empty_r[i] <= 1; - end else begin - incr_cnt[i] <= incr_cnt_n[i]; - decr_cnt[i] <= decr_cnt_n; - pending_instrs[i] <= pending_instrs_n; - alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY); - empty_r[i] <= (pending_instrs_n == 0); - end - end - end - - assign alm_empty = alm_empty_r[alm_empty_wid]; - assign empty = (& empty_r); - -endmodule diff --git a/hw/rtl/core/VX_trace.vh b/hw/rtl/core/VX_trace.vh deleted file mode 100644 index 5dc4bc3044..0000000000 --- a/hw/rtl/core/VX_trace.vh +++ /dev/null @@ -1,387 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`ifndef VX_TRACE_VH -`define VX_TRACE_VH - -`ifdef SIMULATION - - task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); - case (ex_type) - `EX_ALU: `TRACE(level, ("ALU")); - `EX_LSU: `TRACE(level, ("LSU")); - `EX_FPU: `TRACE(level, ("FPU")); - `EX_SFU: `TRACE(level, ("SFU")); - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_ex_op(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - case (op_args.alu.xtype) - `ALU_TYPE_ARITH: begin - if (op_args.alu.is_w) begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDIW")); - `INST_ALU_SLL: `TRACE(level, ("SLLIW")); - `INST_ALU_SRL: `TRACE(level, ("SRLIW")); - `INST_ALU_SRA: `TRACE(level, ("SRAIW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDW")); - `INST_ALU_SUB: `TRACE(level, ("SUBW")); - `INST_ALU_SLL: `TRACE(level, ("SLLW")); - `INST_ALU_SRL: `TRACE(level, ("SRLW")); - `INST_ALU_SRA: `TRACE(level, ("SRAW")); - default: `TRACE(level, ("?")); - endcase - end - end else begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDI")); - `INST_ALU_SLL: `TRACE(level, ("SLLI")); - `INST_ALU_SRL: `TRACE(level, ("SRLI")); - `INST_ALU_SRA: `TRACE(level, ("SRAI")); - `INST_ALU_SLT: `TRACE(level, ("SLTI")); - `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); - `INST_ALU_XOR: `TRACE(level, ("XORI")); - `INST_ALU_OR: `TRACE(level, ("ORI")); - `INST_ALU_AND: `TRACE(level, ("ANDI")); - `INST_ALU_LUI: `TRACE(level, ("LUI")); - `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADD")); - `INST_ALU_SUB: `TRACE(level, ("SUB")); - `INST_ALU_SLL: `TRACE(level, ("SLL")); - `INST_ALU_SRL: `TRACE(level, ("SRL")); - `INST_ALU_SRA: `TRACE(level, ("SRA")); - `INST_ALU_SLT: `TRACE(level, ("SLT")); - `INST_ALU_SLTU: `TRACE(level, ("SLTU")); - `INST_ALU_XOR: `TRACE(level, ("XOR")); - `INST_ALU_OR: `TRACE(level, ("OR")); - `INST_ALU_AND: `TRACE(level, ("AND")); - `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); - `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); - default: `TRACE(level, ("?")); - endcase - end - end - end - `ALU_TYPE_BRANCH: begin - case (`INST_BR_BITS'(op_type)) - `INST_BR_EQ: `TRACE(level, ("BEQ")); - `INST_BR_NE: `TRACE(level, ("BNE")); - `INST_BR_LT: `TRACE(level, ("BLT")); - `INST_BR_GE: `TRACE(level, ("BGE")); - `INST_BR_LTU: `TRACE(level, ("BLTU")); - `INST_BR_GEU: `TRACE(level, ("BGEU")); - `INST_BR_JAL: `TRACE(level, ("JAL")); - `INST_BR_JALR: `TRACE(level, ("JALR")); - `INST_BR_ECALL: `TRACE(level, ("ECALL")); - `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); - `INST_BR_URET: `TRACE(level, ("URET")); - `INST_BR_SRET: `TRACE(level, ("SRET")); - `INST_BR_MRET: `TRACE(level, ("MRET")); - default: `TRACE(level, ("?")); - endcase - end - `ALU_TYPE_MULDIV: begin - if (op_args.alu.is_w) begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MULW")); - `INST_M_DIV: `TRACE(level, ("DIVW")); - `INST_M_DIVU: `TRACE(level, ("DIVUW")); - `INST_M_REM: `TRACE(level, ("REMW")); - `INST_M_REMU: `TRACE(level, ("REMUW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MUL")); - `INST_M_MULH: `TRACE(level, ("MULH")); - `INST_M_MULHSU:`TRACE(level, ("MULHSU")); - `INST_M_MULHU: `TRACE(level, ("MULHU")); - `INST_M_DIV: `TRACE(level, ("DIV")); - `INST_M_DIVU: `TRACE(level, ("DIVU")); - `INST_M_REM: `TRACE(level, ("REM")); - `INST_M_REMU: `TRACE(level, ("REMU")); - default: `TRACE(level, ("?")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_LSU: begin - if (op_args.lsu.is_float) begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LW: `TRACE(level, ("FLW")); - `INST_LSU_LD: `TRACE(level, ("FLD")); - `INST_LSU_SW: `TRACE(level, ("FSW")); - `INST_LSU_SD: `TRACE(level, ("FSD")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LB: `TRACE(level, ("LB")); - `INST_LSU_LH: `TRACE(level, ("LH")); - `INST_LSU_LW: `TRACE(level, ("LW")); - `INST_LSU_LD: `TRACE(level, ("LD")); - `INST_LSU_LBU:`TRACE(level, ("LBU")); - `INST_LSU_LHU:`TRACE(level, ("LHU")); - `INST_LSU_LWU:`TRACE(level, ("LWU")); - `INST_LSU_SB: `TRACE(level, ("SB")); - `INST_LSU_SH: `TRACE(level, ("SH")); - `INST_LSU_SW: `TRACE(level, ("SW")); - `INST_LSU_SD: `TRACE(level, ("SD")); - `INST_LSU_FENCE:`TRACE(level,("FENCE")); - default: `TRACE(level, ("?")); - endcase - end - end - `EX_FPU: begin - case (`INST_FPU_BITS'(op_type)) - `INST_FPU_ADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); - else - `TRACE(level, ("FADD.S")); - end - `INST_FPU_SUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); - else - `TRACE(level, ("FSUB.S")); - end - `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMUL.D")); - else - `TRACE(level, ("FMUL.S")); - end - `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FDIV.D")); - else - `TRACE(level, ("FDIV.S")); - end - `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSQRT.D")); - else - `TRACE(level, ("FSQRT.S")); - end - `INST_FPU_MADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); - else - `TRACE(level, ("FMADD.S")); - end - `INST_FPU_MSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); - else - `TRACE(level, ("FMSUB.S")); - end - `INST_FPU_NMADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); - else - `TRACE(level, ("FNMADD.S")); - end - `INST_FPU_NMSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); - else - `TRACE(level, ("FNMSUB.S")); - end - `INST_FPU_CMP: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.D")); - 1: `TRACE(level, ("FLT.D")); - 2: `TRACE(level, ("FEQ.D")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.S")); - 1: `TRACE(level, ("FLT.S")); - 2: `TRACE(level, ("FEQ.S")); - default: `TRACE(level, ("?")); - endcase - end - end - `INST_FPU_F2F: begin - if (op_args.fpu.fmt[0]) begin - `TRACE(level, ("FCVT.D.S")); - end else begin - `TRACE(level, ("FCVT.S.D")); - end - end - `INST_FPU_F2I: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.D")); - end else begin - `TRACE(level, ("FCVT.W.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.S")); - end else begin - `TRACE(level, ("FCVT.W.S")); - end - end - end - `INST_FPU_F2U: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.D")); - end else begin - `TRACE(level, ("FCVT.WU.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.S")); - end else begin - `TRACE(level, ("FCVT.WU.S")); - end - end - end - `INST_FPU_I2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.L")); - end else begin - `TRACE(level, ("FCVT.D.W")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.L")); - end else begin - `TRACE(level, ("FCVT.S.W")); - end - end - end - `INST_FPU_U2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.LU")); - end else begin - `TRACE(level, ("FCVT.D.WU")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.LU")); - end else begin - `TRACE(level, ("FCVT.S.WU")); - end - end - end - `INST_FPU_MISC: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.D")); - 1: `TRACE(level, ("FSGNJN.D")); - 2: `TRACE(level, ("FSGNJX.D")); - 3: `TRACE(level, ("FCLASS.D")); - 4: `TRACE(level, ("FMV.X.D")); - 5: `TRACE(level, ("FMV.D.X")); - 6: `TRACE(level, ("FMIN.D")); - 7: `TRACE(level, ("FMAX.D")); - endcase - end else begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.S")); - 1: `TRACE(level, ("FSGNJN.S")); - 2: `TRACE(level, ("FSGNJX.S")); - 3: `TRACE(level, ("FCLASS.S")); - 4: `TRACE(level, ("FMV.X.S")); - 5: `TRACE(level, ("FMV.S.X")); - 6: `TRACE(level, ("FMIN.S")); - 7: `TRACE(level, ("FMAX.S")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_SFU: begin - case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); - endcase - end - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_op_args(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); - end - `EX_LSU: begin - `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); - end - `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); - end - `EX_SFU: begin - if (`INST_SFU_IS_CSR(op_type)) begin - `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); - end - end - default:; - endcase - endtask - - task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); - case (addr) - `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); - `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); - `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); - `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); - `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); - default: `TRACE(level, ("?")); - endcase - endtask - -`endif - -`endif // VX_TRACE_VH From 1fa4603fa2e0ef8a1da43e9df45abe323add1d5f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 01:14:19 -0700 Subject: [PATCH 314/407] disable sformatf during synthesis --- hw/rtl/VX_cluster.sv | 6 +++--- hw/rtl/VX_platform.vh | 3 +++ hw/rtl/VX_socket.sv | 6 +++--- hw/rtl/Vortex.sv | 2 +- hw/rtl/cache/VX_cache.sv | 2 +- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_cluster.sv | 2 +- hw/rtl/cache/VX_cache_top.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 4 ++-- hw/rtl/core/VX_core.sv | 12 ++++++------ hw/rtl/core/VX_core_top.sv | 2 +- hw/rtl/core/VX_execute.sv | 8 ++++---- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_issue_slice.sv | 8 ++++---- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 4 ++-- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_sfu_unit.sv | 4 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 20 files changed, 40 insertions(+), 37 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index bec4e232f4..853881c086 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -67,7 +67,7 @@ module VX_cluster import VX_gpu_pkg::*; #( ); VX_gbar_unit #( - .INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID)) + .INSTANCE_ID (`SFORMATF(("gbar%0d", CLUSTER_ID))) ) gbar_unit ( .clk (clk), .reset (reset), @@ -84,7 +84,7 @@ module VX_cluster import VX_gpu_pkg::*; #( `RESET_RELAY (l2_reset, reset); VX_cache_wrap #( - .INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-l2cache", INSTANCE_ID))), .CACHE_SIZE (`L2_CACHE_SIZE), .LINE_SIZE (`L2_LINE_SIZE), .NUM_BANKS (`L2_NUM_BANKS), @@ -131,7 +131,7 @@ module VX_cluster import VX_gpu_pkg::*; #( VX_socket #( .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id), - .INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id)) + .INSTANCE_ID (`SFORMATF(("%s-socket%0d", INSTANCE_ID, socket_id))) ) socket ( `SCOPE_IO_BIND (scope_socket+socket_id) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index eb58e17989..8c4effaf44 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -130,6 +130,8 @@ endgenerate end `endif +`define SFORMATF(x) $sformatf x + `else // SYNTHESIS `define STATIC_ASSERT(cond, msg) @@ -139,6 +141,7 @@ endgenerate `define DEBUG_BLOCK(x) `define TRACE(level, args) +`define SFORMATF(x) `define TRACING_ON `define TRACING_OFF diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 299fb6791d..87dcbd02e7 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -85,7 +85,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (icache_reset, reset); VX_cache_cluster #( - .INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-icache", INSTANCE_ID))), .NUM_UNITS (`NUM_ICACHES), .NUM_INPUTS (`SOCKET_SIZE), .TAG_SEL_IDX (0), @@ -132,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (dcache_reset, reset); VX_cache_cluster #( - .INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-dcache", INSTANCE_ID))), .NUM_UNITS (`NUM_DCACHES), .NUM_INPUTS (`SOCKET_SIZE), .TAG_SEL_IDX (0), @@ -212,7 +212,7 @@ module VX_socket import VX_gpu_pkg::*; #( VX_core #( .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), - .INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id)) + .INSTANCE_ID (`SFORMATF(("%s-core%0d", INSTANCE_ID, core_id))) ) core ( `SCOPE_IO_BIND (scope_core + core_id) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 5df4038801..bce771340b 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -140,7 +140,7 @@ module Vortex import VX_gpu_pkg::*; ( VX_cluster #( .CLUSTER_ID (cluster_id), - .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) + .INSTANCE_ID (`SFORMATF(("cluster%0d", cluster_id))) ) cluster ( `SCOPE_IO_BIND (scope_cluster + cluster_id) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 40f062eccb..8c3db21f4e 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -389,7 +389,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_cache_bank #( .BANK_ID (bank_id), - .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), + .INSTANCE_ID (`SFORMATF(("%s-bank%0d", INSTANCE_ID, bank_id))), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7258e847eb..d3218c54c8 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -503,7 +503,7 @@ module VX_cache_bank #( ); VX_cache_mshr #( - .INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-mshr", INSTANCE_ID))), .BANK_ID (BANK_ID), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 32662e848c..fc4afdb0a3 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -146,7 +146,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap VX_cache_wrap #( - .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), + .INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, i))), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index d6bd4aace5..6dad5b6a89 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -20,7 +20,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 32768, + parameter CACHE_SIZE = 65536, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 951cd811bf..e872217090 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -89,7 +89,7 @@ module VX_alu_unit #( ); VX_alu_int #( - .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)), + .INSTANCE_ID (`SFORMATF(("%s-int%0d", INSTANCE_ID, block_idx))), .BLOCK_IDX (block_idx), .NUM_LANES (NUM_LANES) ) alu_int ( @@ -102,7 +102,7 @@ module VX_alu_unit #( `ifdef EXT_M_ENABLE VX_alu_muldiv #( - .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)), + .INSTANCE_ID (`SFORMATF(("%s-muldiv%0d", INSTANCE_ID, block_idx))), .NUM_LANES (NUM_LANES) ) muldiv_unit ( .clk (clk), diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 260cedca3e..62ed016af9 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -87,7 +87,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (3); VX_schedule #( - .INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-schedule", INSTANCE_ID))), .CORE_ID (CORE_ID) ) schedule ( .clk (clk), @@ -115,7 +115,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_fetch #( - .INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-fetch", INSTANCE_ID))) ) fetch ( `SCOPE_IO_BIND (0) .clk (clk), @@ -126,7 +126,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_decode #( - .INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-decode", INSTANCE_ID))) ) decode ( .clk (clk), .reset (reset), @@ -136,7 +136,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_issue #( - .INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-issue", INSTANCE_ID))) ) issue ( `SCOPE_IO_BIND (1) @@ -153,7 +153,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_execute #( - .INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-execute", INSTANCE_ID))), .CORE_ID (CORE_ID) ) execute ( `SCOPE_IO_BIND (2) @@ -181,7 +181,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_commit #( - .INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-commit", INSTANCE_ID))) ) commit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_core_top.sv b/hw/rtl/core/VX_core_top.sv index 9ade1c28b0..e16a802593 100644 --- a/hw/rtl/core/VX_core_top.sv +++ b/hw/rtl/core/VX_core_top.sv @@ -144,7 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #( `endif VX_core #( - .INSTANCE_ID ($sformatf("core")), + .INSTANCE_ID (`SFORMATF(("core"))), .CORE_ID (CORE_ID) ) core ( `SCOPE_IO_BIND (0) diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index 4f66757f12..b737725ea6 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -52,7 +52,7 @@ module VX_execute import VX_gpu_pkg::*; #( `endif VX_alu_unit #( - .INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-alu", INSTANCE_ID))) ) alu_unit ( .clk (clk), .reset (reset), @@ -64,7 +64,7 @@ module VX_execute import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (1); VX_lsu_unit #( - .INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-lsu", INSTANCE_ID))) ) lsu_unit ( `SCOPE_IO_BIND (0) .clk (clk), @@ -76,7 +76,7 @@ module VX_execute import VX_gpu_pkg::*; #( `ifdef EXT_F_ENABLE VX_fpu_unit #( - .INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-fpu", INSTANCE_ID))) ) fpu_unit ( .clk (clk), .reset (reset), @@ -87,7 +87,7 @@ module VX_execute import VX_gpu_pkg::*; #( `endif VX_sfu_unit #( - .INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-sfu", INSTANCE_ID))), .CORE_ID (CORE_ID) ) sfu_unit ( .clk (clk), diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 5da33cbba9..924d1a67d5 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -78,7 +78,7 @@ module VX_issue import VX_gpu_pkg::*; #( `endif VX_issue_slice #( - .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), + .INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, issue_id))), .ISSUE_ID (issue_id) ) issue_slice ( `SCOPE_IO_BIND(issue_id) diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index f287525c74..d72937251e 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -37,7 +37,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_operands_if operands_if(); VX_ibuffer #( - .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID))) ) ibuffer ( .clk (clk), .reset (reset), @@ -49,7 +49,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); VX_scoreboard #( - .INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID))) ) scoreboard ( .clk (clk), .reset (reset), @@ -64,7 +64,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); VX_operands #( - .INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID))) ) operands ( .clk (clk), .reset (reset), @@ -77,7 +77,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); VX_dispatch #( - .INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID))) ) dispatch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 67fc3eaa89..0018db08dd 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire lsu_mem_rsp_ready; VX_mem_scheduler #( - .INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-memsched", INSTANCE_ID))), .CORE_REQS (NUM_LANES), .MEM_CHANNELS(NUM_LANES), .WORD_SIZE (LSU_WORD_SIZE), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 674ca2686e..7a64a849bf 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_slices VX_lsu_slice #( - .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) + .INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, block_idx))) ) lsu_slice( `SCOPE_IO_BIND (block_idx) .clk (clk), diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index c02e99b29a..57961a24b0 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -92,7 +92,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end VX_local_mem #( - .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), + .INSTANCE_ID(`SFORMATF(("%s-lmem", INSTANCE_ID))), .SIZE (1 << `LMEM_LOG_SIZE), .NUM_REQS (LSU_NUM_REQS), .NUM_BANKS (`LMEM_NUM_BANKS), @@ -131,7 +131,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), + .INSTANCE_ID (`SFORMATF(("%s-coalescer%0d", INSTANCE_ID, i))), .NUM_REQS (`NUM_LSU_LANES), .DATA_IN_SIZE (LSU_WORD_SIZE), .DATA_OUT_SIZE (DCACHE_WORD_SIZE), diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 5011ccb2cc..800b6b63f5 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -290,7 +290,7 @@ module VX_schedule import VX_gpu_pkg::*; #( // split/join handling VX_split_join #( - .INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-splitjoin", INSTANCE_ID))) ) split_join ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 5af6211f65..dccfcfe46d 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -99,7 +99,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( ); VX_wctl_unit #( - .INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-wctl", INSTANCE_ID))), .NUM_LANES (NUM_LANES) ) wctl_unit ( .clk (clk), @@ -110,7 +110,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( ); VX_csr_unit #( - .INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-csr", INSTANCE_ID))), .CORE_ID (CORE_ID), .NUM_LANES (NUM_LANES) ) csr_unit ( diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 523257eb4c..f89b663e93 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -223,7 +223,7 @@ module VX_mem_scheduler #( if (COALESCE_ENABLE) begin : g_coalescer VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-coalescer", INSTANCE_ID))), .NUM_REQS (CORE_REQS), .DATA_IN_SIZE (WORD_SIZE), .DATA_OUT_SIZE (LINE_SIZE), From 24d018b4c9a273f18d18e071ee0f2cad803e886b Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Wed, 23 Oct 2024 05:18:53 -0400 Subject: [PATCH 315/407] documentation updates --- README.md | 2 +- docs/fpga_setup.md | 64 +++++++++++++++++++++++---------------- docs/index.md | 11 +++---- docs/microarchitecture.md | 5 ++- docs/simulation.md | 5 +-- docs/testing.md | 4 +-- 6 files changed, 53 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 97686c6415..a7228e7721 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Vortex GPGPU -Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backend drivers*, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. +Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple **backend drivers**, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. ## Website Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 5b90df0b6c..e7ab0ecbbd 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -52,9 +52,9 @@ To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node: ```bash salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 --time=06:00:00 ``` - -## Environment Setup -Once you are logged in, you will need to complete some first time configurations. +Synthesis for Xilinx Boards +---------------------- +Once you are logged in, you will need to complete some first time configurations. If you are interested in the Intel (Altera) synthesis steps, scroll down below. ### Source Configuration Scripts ``` @@ -89,7 +89,7 @@ The build is complete when the bitstream file `vortex_afu.xclbin` exists in ` ### Running a Program on Xilinx FPGA -The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt driver using the following command: +The [blackbox.sh](./simulation.md) script within the build directory can be used to run a test with Vortex’s xrt driver using the following command: `FPGA_BIN_DIR= TARGET=hw|hw_emu PLATFORM= ./ci/blackbox.sh --driver=xrt --app=` @@ -97,19 +97,11 @@ For example: ```FPGA_BIN_DIR= hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo``` -### Synthesis for Intel (Altera) Boards - -To set up the environment, source the XRT setup.sh and other Xilinx scripts. For example: +Synthesis for Intel (Altera) Boards +---------------------- -``` -source /opt/xilinx/xrt/setup.sh -source /tools/reconfig/xilinx/Vivado/2022.1/settings64.sh -source /tools/reconfig/xilinx/Vitis/2022.1/settings64.sh +### OPAE Environment Setup -``` - -OPAE Environment Setup ----------------------- $ source /opt/inteldevstack/init_env_user.sh $ export OPAE_HOME=/opt/opae/1.1.2 @@ -118,8 +110,7 @@ OPAE Environment Setup $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH -OPAE Build ------------------- +### OPAE Build The FPGA has to following configuration options: - DEVICE_FAMILY=arria10 | stratix10 @@ -134,8 +125,7 @@ A new folder (ex: `test1_xxx_4c`) will be created and the build will start and t Setting TARGET=ase will build the project for simulation using Intel ASE. -OPAE Build Configuration ------------------------- +### OPAE Build Configuration The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: - `NUM_WARPS`: Number of warps per cores @@ -146,8 +136,7 @@ You configure the syntesis build from the command line: $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make -OPAE Build Progress -------------------- +### OPAE Build Progress You could check the last 10 lines in the build log for possible errors until build completion. @@ -166,17 +155,40 @@ The file `vortex_afu.gbs` should exist when the build is done: $ ls -lsa /synth/vortex_afu.gbs -Signing the bitstream and Programming the FPGA ----------------------------------------------- +### Signing the bitstream and Programming the FPGA $ cd $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs $ fpgasupdate vortex_afu_unsigned_ssl.gbs -FPGA sample test running OpenCL sgemm kernel --------------------------------------------- +### Sample FPGA Run Test +Ensure you have the correct opae runtime for the FPGA target + +``` +$ TARGET=FPGA make -C runtime/opae +``` -Run the following from the Vortex root directory +Run the [blackbox.sh](./simulation.md) from your Vortex build directory + +``` +$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" +``` + +### FPGA sample test running OpenCL sgemm kernel + +You can use the `blackbox.sh` script to run the following from your Vortex build directory $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" +### Testing Vortex using OPAE with Intel ASE Simulation +Building ASE synthesis + +```$ TARGET=asesim make -C runtime/opae``` + +Building ASE runtime + +```$ TARGET=asesim make -C runtime/opae``` + +Running ASE simulation + +```$ ASE_LOG=0 ASE_WORKDIR=/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16"``` diff --git a/docs/index.md b/docs/index.md index a53a2fd15f..351e41fbbb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,9 +2,8 @@ ## Table of Contents -- [Codebase Layout](codebase.md) -- [Microarchitecture](microarchitecture.md) -- [Cache Subsystem](cache_subsystem.md) -- [Simulation](simulation.md) -- [Contributing](contributing.md) -- [Debugging](debugging.md) +- [Codebase Layout](codebase.md): Summary of repo file tree +- [Microarchitecture](microarchitecture.md): Vortex Pipeline and cache microarchitectural details and reconfigurability +- [Simulation](simulation.md): Details for building and running each simulation driver +- [Contributing](contributing.md): Process for contributing your own features including repo semantics and testing +- [Debugging](debugging.md): Debugging configurations for each Vortex driver diff --git a/docs/microarchitecture.md b/docs/microarchitecture.md index 3459abcc42..85fa52fd5a 100644 --- a/docs/microarchitecture.md +++ b/docs/microarchitecture.md @@ -77,4 +77,7 @@ Vortex has a 6-stage pipeline: - Sockets - Grouping multiple cores sharing L1 cache - Clusters - - Grouping of sockets sharing L2 cache \ No newline at end of file + - Grouping of sockets sharing L2 cache + +### Vortex Cache Subsystem +More details about the cache subsystem are provided [here](./cache_subsystem.md). \ No newline at end of file diff --git a/docs/simulation.md b/docs/simulation.md index d55b3cd943..4201a64d4f 100644 --- a/docs/simulation.md +++ b/docs/simulation.md @@ -15,7 +15,7 @@ SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Altera based FPGAs. -### How to Test +### How to Test (using `blackbox.sh`) Running tests under specific drivers (rtlsim,simx,fpga) is done using the script named `blackbox.sh` located in the `ci` folder. Running command `./ci/blackbox.sh --help` from the Vortex root directory will display the following command line arguments for `blackbox.sh`: @@ -54,7 +54,8 @@ PERF: instrs=363180, cycles=53108, IPC=6.838518 ## Additional Quick Start Scenarios -Running Vortex simulators with different configurations: +Running Vortex simulators with different configurations and drivers is supported. For example: + - Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads $ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic diff --git a/docs/testing.md b/docs/testing.md index 0ec46bda93..739193ce34 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -2,7 +2,7 @@ ## Running a Vortex application -The framework provides a utility script: blackbox.sh under the /ci/ folder for executing applications in the tests tree. +The framework provides a utility script: blackbox.sh under the /ci/ folder for executing applications in the tests tree. It gets copied into the `build` directory with all the environment variables resolved, so you should run it from the `build` directory as follows: You can query the commandline options of the tool using: $ ./ci/blackbox.sh --help @@ -49,4 +49,4 @@ Compile your test: `$ make -C tests/regression/` Run your test: `$ ./ci/blackbox.sh --driver=simx --app= --debug` ## Adding Your Tests to the CI Pipeline -If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md). \ No newline at end of file +If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. Furthermore, if you are contributing a new feature, it is recommended that you add the ability to enable / disable the new feature that you are adding. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md). \ No newline at end of file From 1c384c096d66a5003faef66c3d701ab1e865c69e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 12:27:44 -0700 Subject: [PATCH 316/407] minor update --- hw/rtl/core/VX_mem_unit.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 1 + hw/rtl/libs/VX_mem_scheduler.sv | 145 +++++++++++++++++--------------- 3 files changed, 81 insertions(+), 67 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 57961a24b0..931ad65cd3 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -127,7 +127,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled + if ((`NUM_LSU_LANES > 1) && (LSU_WORD_SIZE != DCACHE_WORD_SIZE)) begin : g_enabled for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers VX_mem_coalescer #( diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 19a7040951..1a7030b864 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -74,6 +74,7 @@ module VX_mem_coalescer #( output wire out_rsp_ready ); `UNUSED_SPARAM (INSTANCE_ID) + `STATIC_ASSERT ((NUM_REQS > 1), ("invalid parameter")) `STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f89b663e93..f77854ec1e 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -32,7 +32,7 @@ module VX_mem_scheduler #( parameter WORD_WIDTH = WORD_SIZE * 8, parameter LINE_WIDTH = LINE_SIZE * 8, - parameter COALESCE_ENABLE = (LINE_SIZE != WORD_SIZE), + parameter COALESCE_ENABLE = (CORE_REQS > 1) && (LINE_SIZE != WORD_SIZE), parameter PER_LINE_REQS = LINE_SIZE / WORD_SIZE, parameter MERGED_REQS = CORE_REQS / PER_LINE_REQS, parameter MEM_BATCHES = `CDIV(MERGED_REQS, MEM_CHANNELS), @@ -94,6 +94,7 @@ module VX_mem_scheduler #( localparam CORE_BATCHES = COALESCE_ENABLE ? 1 : MEM_BATCHES; localparam CORE_BATCH_BITS = `CLOG2(CORE_BATCHES); + `STATIC_ASSERT ((MEM_CHANNELS <= CORE_REQS), ("invalid parameter")) `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)) @@ -411,99 +412,113 @@ module VX_mem_scheduler #( // Handle memory responses //////////////////////////////////////////////// - reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; - wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; - wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; + if (CORE_REQS == 1) begin : g_rsp_1 - if (CORE_BATCHES > 1) begin : g_rsp_batch_idx - assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; - end else begin : g_rsp_batch_idx_0 - assign rsp_batch_idx = '0; - end - - for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask - localparam i = r / CORE_CHANNELS; - localparam j = r % CORE_CHANNELS; - assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; - end + assign crsp_valid = mem_rsp_valid_s; + assign crsp_mask = mem_rsp_mask_s; + assign crsp_sop = 1'b1; + assign crsp_eop = 1'b1; + assign crsp_data = mem_rsp_data_s; - assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask; + assign mem_rsp_ready_s = crsp_ready; - wire rsp_complete = ~(| rsp_rem_mask_n); + end else begin : g_rsp_N - wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s; + reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; + wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; + wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - always @(posedge clk) begin - if (ibuf_push) begin - rsp_rem_mask[ibuf_waddr] <= core_req_mask; + if (CORE_BATCHES > 1) begin : g_rsp_batch_idx + assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; + end else begin : g_rsp_batch_idx_0 + assign rsp_batch_idx = '0; end - if (mem_rsp_fire_s) begin - rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n; + + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask + localparam i = r / CORE_CHANNELS; + localparam j = r % CORE_CHANNELS; + assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; end - end - if (RSP_PARTIAL != 0 || CORE_REQS == 1) begin : g_rsp_partial + assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask; - reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; + wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s; always @(posedge clk) begin if (ibuf_push) begin - rsp_sop_r[ibuf_waddr] <= 1; + rsp_rem_mask[ibuf_waddr] <= core_req_mask; end if (mem_rsp_fire_s) begin - rsp_sop_r[ibuf_raddr] <= 0; + rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n; end end - assign crsp_valid = mem_rsp_valid_s; - assign crsp_mask = curr_mask; - assign crsp_sop = rsp_sop_r[ibuf_raddr]; + wire rsp_complete = ~(| rsp_rem_mask_n) || (CORE_REQS == 1); - for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data - localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = mem_rsp_data_s[j]; - end + if (RSP_PARTIAL != 0) begin : g_rsp_partial - assign mem_rsp_ready_s = crsp_ready; + reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; - end else begin : g_rsp_full + always @(posedge clk) begin + if (ibuf_push) begin + rsp_sop_r[ibuf_waddr] <= 1; + end + if (mem_rsp_fire_s) begin + rsp_sop_r[ibuf_raddr] <= 0; + end + end + + assign crsp_valid = mem_rsp_valid_s; + assign crsp_mask = curr_mask; + assign crsp_sop = rsp_sop_r[ibuf_raddr]; + + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data + localparam j = r % CORE_CHANNELS; + assign crsp_data[r] = mem_rsp_data_s[j]; + end + + assign mem_rsp_ready_s = crsp_ready; - wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; - reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; + end else begin : g_rsp_full - for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store - for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j - reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1]; - wire rsp_wren = mem_rsp_fire_s - && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) - && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); - always @(posedge clk) begin - if (rsp_wren) begin - rsp_store[ibuf_raddr] <= mem_rsp_data_s[i]; + wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; + + for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store + for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j + reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1]; + wire rsp_wren = mem_rsp_fire_s + && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) + && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); + always @(posedge clk) begin + if (rsp_wren) begin + rsp_store[ibuf_raddr] <= mem_rsp_data_s[i]; + end end + assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr]; end - assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr]; end - end - always @(posedge clk) begin - if (ibuf_push) begin - rsp_orig_mask[ibuf_waddr] <= core_req_mask; + always @(posedge clk) begin + if (ibuf_push) begin + rsp_orig_mask[ibuf_waddr] <= core_req_mask; + end end - end - assign crsp_valid = mem_rsp_valid_s && rsp_complete; - assign crsp_mask = rsp_orig_mask[ibuf_raddr]; - assign crsp_sop = 1'b1; + assign crsp_valid = mem_rsp_valid_s && rsp_complete; + assign crsp_mask = rsp_orig_mask[ibuf_raddr]; + assign crsp_sop = 1'b1; - for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data - localparam i = r / CORE_CHANNELS; - localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[j][i]; - end + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data + localparam i = r / CORE_CHANNELS; + localparam j = r % CORE_CHANNELS; + assign crsp_data[r] = rsp_store_n[j][i]; + end - assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; + assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; + end + assign crsp_eop = rsp_complete; end if (UUID_WIDTH != 0) begin : g_crsp_tag @@ -512,8 +527,6 @@ module VX_mem_scheduler #( assign crsp_tag = ibuf_dout; end - assign crsp_eop = rsp_complete; - // Send response to caller VX_elastic_buffer #( @@ -525,7 +538,7 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (crsp_valid), .ready_in (crsp_ready), - .data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}), + .data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}), .data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}), .valid_out (core_rsp_valid), .ready_out (core_rsp_ready) From 7ab58111d8b4dc61e47379d53cee2b87519046c2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 12:30:39 -0700 Subject: [PATCH 317/407] minor update --- hw/rtl/VX_platform.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 8c4effaf44..2e05ab44b2 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -141,7 +141,7 @@ endgenerate `define DEBUG_BLOCK(x) `define TRACE(level, args) -`define SFORMATF(x) +`define SFORMATF(x) "" `define TRACING_ON `define TRACING_OFF From e7d09feb4a851ff336f38cd801615e192031e00a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 13:06:45 -0700 Subject: [PATCH 318/407] decode => demux --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- hw/rtl/libs/VX_cyclic_arbiter.sv | 2 +- hw/rtl/libs/{VX_decoder.sv => VX_demux.sv} | 2 +- hw/rtl/libs/VX_mem_adapter.sv | 8 ++++---- hw/rtl/libs/VX_rr_arbiter.sv | 4 ++-- hw/rtl/libs/VX_stream_xbar.sv | 8 ++++---- 6 files changed, 14 insertions(+), 14 deletions(-) rename hw/rtl/libs/{VX_decoder.sv => VX_demux.sv} (98%) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index d3218c54c8..2d6dd6a5b5 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -614,10 +614,10 @@ module VX_cache_bank #( `UNUSED_VAR (byteen_st1) end else begin : g_wt wire [LINE_SIZE-1:0] line_byteen; - VX_decoder #( + VX_demux #( .N (`CS_WORD_SEL_BITS), .M (WORD_SIZE) - ) byteen_dec ( + ) byteen_demux ( .sel_in (word_idx_st1), .data_in (byteen_st1), .data_out (line_byteen) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 2899b55fd4..9c28fcc4a6 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -65,7 +65,7 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); - VX_decoder #( + VX_demux #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_demux.sv similarity index 98% rename from hw/rtl/libs/VX_decoder.sv rename to hw/rtl/libs/VX_demux.sv index ce2c509e66..b76ab42aa7 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_demux.sv @@ -17,7 +17,7 @@ // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_decoder #( +module VX_demux #( parameter N = 0, parameter M = 1, parameter MODEL = 0, diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 2cae6fead6..d5efc7d6e6 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -100,19 +100,19 @@ module VX_mem_adapter #( assign mem_req_addr_out_w = mem_req_addr_in_qual; end - VX_decoder #( + VX_demux #( .N (D), .M (SRC_DATA_WIDTH/8) - ) req_be_dec ( + ) req_be_demux ( .sel_in (req_idx), .data_in (mem_req_byteen_in), .data_out (mem_req_byteen_out_w) ); - VX_decoder #( + VX_demux #( .N (D), .M (SRC_DATA_WIDTH) - ) req_data_dec ( + ) req_data_demux ( .sel_in (req_idx), .data_in (mem_req_data_in), .data_out (mem_req_data_out_w) diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index f5304b0234..1d3b479bf0 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -480,12 +480,12 @@ module VX_rr_arbiter #( end end - VX_decoder #( + VX_demux #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( .sel_in (grant_index), - .data_in (grant_valid), + .data_in (1'b1), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 0c4eff2f16..68a31c4fc1 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -64,10 +64,10 @@ module VX_stream_xbar #( ); for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders - VX_decoder #( + VX_demux #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) - ) sel_in_decoder ( + ) sel_in_demux ( .sel_in (sel_in[i]), .data_in (valid_in[i]), .data_out (per_output_valid_in[i]) @@ -137,10 +137,10 @@ module VX_stream_xbar #( wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; - VX_decoder #( + VX_demux #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) - ) sel_in_decoder ( + ) sel_in_demux ( .sel_in (sel_in[0]), .data_in (valid_in[0]), .data_out (valid_out_w) From ec12b500074ae7d325064f12d7e6969dc981c496 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 13:09:34 -0700 Subject: [PATCH 319/407] minor udpate --- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f77854ec1e..f162a370ea 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -637,7 +637,7 @@ module VX_mem_scheduler #( end `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end - if (mem_rsp_fire_s) begin + if (mem_rsp_valid_s && mem_rsp_ready_s) begin `TRACE(2, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) `TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) From cc5ac8388b1de44f43c89a8e731795aac8458dbc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 14:03:19 -0700 Subject: [PATCH 320/407] minor update --- hw/rtl/libs/VX_mem_scheduler.sv | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f162a370ea..65a057b80c 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -412,7 +412,15 @@ module VX_mem_scheduler #( // Handle memory responses //////////////////////////////////////////////// + wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; + if (CORE_BATCHES > 1) begin : g_rsp_batch_idx + assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; + end else begin : g_rsp_batch_idx_0 + assign rsp_batch_idx = '0; + end + if (CORE_REQS == 1) begin : g_rsp_1 + `UNUSED_VAR (rsp_batch_idx) assign crsp_valid = mem_rsp_valid_s; assign crsp_mask = mem_rsp_mask_s; @@ -426,13 +434,6 @@ module VX_mem_scheduler #( reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; - wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - - if (CORE_BATCHES > 1) begin : g_rsp_batch_idx - assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; - end else begin : g_rsp_batch_idx_0 - assign rsp_batch_idx = '0; - end for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask localparam i = r / CORE_CHANNELS; From 22ade31fd53708e1a97dd3ce9054c581244b4075 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 15:55:11 -0700 Subject: [PATCH 321/407] minor updates --- hw/rtl/VX_platform.vh | 17 ----------------- hw/rtl/afu/xrt/VX_afu_wrap.sv | 3 +++ hw/rtl/core/VX_fetch.sv | 3 +++ hw/rtl/core/VX_issue_slice.sv | 3 +++ hw/rtl/core/VX_lsu_slice.sv | 3 +++ 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 2e05ab44b2..6e0b755e27 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -44,11 +44,6 @@ endgenerate end \ end -`define __SCOPE -`define __SCOPE_X -`define __SCOPE_ON -`define __SCOPE_OFF - `ifndef TRACING_ALL `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ @@ -158,18 +153,6 @@ endgenerate `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x -`define __SCOPE (* mark_debug="true" *) - -`define __SCOPE_X - -`define __SCOPE_ON \ - `undef __SCOPE_X \ - `define __SCOPE_X `__SCOPE - -`define __SCOPE_OFF \ - `undef __SCOPE_X \ - `define __SCOPE_X - `endif /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 2b1bfb7c25..7d13344a4b 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -373,7 +373,9 @@ module VX_afu_wrap #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_AFU ila_afu ila_afu_inst ( .clk (clk), .probe0 ({ @@ -394,6 +396,7 @@ module VX_afu_wrap #( }) ); `endif +`endif `ifdef SIMULATION `ifndef VERILATOR diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 6a35602e84..807548614f 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -166,7 +166,9 @@ module VX_fetch import VX_gpu_pkg::*; #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_FETCH ila_fetch ila_fetch_inst ( .clk (clk), .probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}), @@ -174,6 +176,7 @@ module VX_fetch import VX_gpu_pkg::*; #( .probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready}) ); `endif +`endif `ifdef DBG_TRACE_MEM always @(posedge clk) begin diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index d72937251e..5af5f0ef08 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -143,7 +143,9 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_ISSUE ila_issue ila_issue_inst ( .clk (clk), .probe0 ({decode_if.valid, decode_if.data, decode_if.ready}), @@ -152,6 +154,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .probe3 ({writeback_if.valid, writeback_if.data}) ); `endif +`endif `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 0018db08dd..333cbfa544 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -561,7 +561,9 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_LSU ila_lsu ila_lsu_inst ( .clk (clk), .probe0 ({execute_if.valid, execute_if.data, execute_if.ready}), @@ -569,5 +571,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready}) ); `endif +`endif endmodule From 8b172d07ec6d96232fff7820bd4db7366279ef66 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 01:44:55 -0700 Subject: [PATCH 322/407] revert xilinx's asynchronous bram workaround --- hw/rtl/cache/VX_cache_bank.sv | 6 +--- hw/rtl/cache/VX_cache_data.sv | 9 ++++-- hw/rtl/cache/VX_cache_mshr.sv | 4 +-- hw/rtl/cache/VX_cache_repl.sv | 30 ++++++++----------- hw/rtl/cache/VX_cache_tags.sv | 12 +++----- hw/rtl/core/VX_fetch.sv | 2 +- hw/rtl/core/VX_ipdom_stack.sv | 3 +- hw/rtl/libs/VX_dp_ram.sv | 2 +- hw/rtl/libs/VX_fifo_queue.sv | 11 ++----- hw/rtl/libs/VX_index_buffer.sv | 3 +- hw/rtl/libs/VX_scope_tap.sv | 6 ++-- hw/rtl/libs/VX_sp_ram.sv | 53 +++++++++++++++++++++++++++------- hw/rtl/mem/VX_local_mem.sv | 3 +- 13 files changed, 82 insertions(+), 62 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 2d6dd6a5b5..fdee28bf17 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -154,7 +154,7 @@ module VX_cache_bank #( wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st0, way_idx_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1; + wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1; wire rw_sel, rw_st0, rw_st1; @@ -332,7 +332,6 @@ module VX_cache_bank #( wire do_read_st1 = valid_st1 && is_read_st1; wire do_write_st1 = valid_st1 && is_write_st1; - assign line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); @@ -358,7 +357,6 @@ module VX_cache_bank #( .hit_line (line_idx_st1), .hit_way (way_idx_st1), .repl_valid (do_fill_st0 && ~pipe_stall), - .repl_line_n(line_idx_sel), .repl_line (line_idx_st0), .repl_way (victim_way_st0) ); @@ -375,14 +373,12 @@ module VX_cache_bank #( ) cache_tags ( .clk (clk), .reset (reset), - .stall (pipe_stall), // inputs .init (do_init_st0), .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), .read (do_read_st0 && ~pipe_stall), .write (do_write_st0 && ~pipe_stall), - .line_idx_n (line_idx_sel), .line_idx (line_idx_st0), .line_tag (line_tag_st0), .evict_way (evict_way_st0), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 03e2629c6e..ddc40b1bd4 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -82,7 +82,8 @@ module VX_cache_data #( .DATAW (LINE_SIZE * NUM_WAYS), .WRENW (LINE_SIZE * NUM_WAYS), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) byteen_store ( .clk (clk), .reset (reset), @@ -129,7 +130,8 @@ module VX_cache_data #( .DATAW (NUM_WAYS * `CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), .WRENW (NUM_WAYS * LINE_SIZE), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) data_store ( .clk (clk), .reset (reset), @@ -153,7 +155,8 @@ module VX_cache_data #( VX_sp_ram #( .DATAW (`CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) data_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 17546ba2ad..78557e1ce7 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -221,7 +221,7 @@ module VX_cache_mshr #( VX_dp_ram #( .DATAW (DATA_WIDTH), .SIZE (MSHR_SIZE), - .OUT_REG (1) + .RDW_MODE ("R") ) mshr_store ( .clk (clk), .reset (reset), @@ -230,7 +230,7 @@ module VX_cache_mshr #( .wren (1'b1), .waddr (allocate_id_r), .wdata (allocate_data), - .raddr (dequeue_id_n), + .raddr (dequeue_id_r), .rdata (dequeue_data) ); diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 9091230466..578c870023 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -99,7 +99,6 @@ module VX_cache_repl #( input wire [`CS_LINE_SEL_BITS-1:0] hit_line, input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way, input wire repl_valid, - input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n, input wire [`CS_LINE_SEL_BITS-1:0] repl_line, output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way ); @@ -110,26 +109,24 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); - `UNUSED_VAR (repl_valid) - `UNUSED_VAR (repl_line) wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; wire [LRU_WIDTH-1:0] plru_wmask; VX_dp_ram #( - .DATAW (LRU_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (LRU_WIDTH), - .OUT_REG (1) + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH), + .RDW_MODE ("R") ) plru_store ( .clk (clk), .reset (reset), - .read (~stall), + .read (repl_valid), .write (hit_valid), .wren (plru_wmask), .waddr (hit_line), - .raddr (repl_line_n), + .raddr (repl_line), .wdata (plru_wdata), .rdata (plru_rdata) ); @@ -158,18 +155,17 @@ module VX_cache_repl #( wire [WAY_SEL_WIDTH-1:0] ctr_rdata; wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1; - VX_dp_ram #( - .DATAW (WAY_SEL_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + VX_sp_ram #( + .DATAW (WAY_SEL_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .RDW_MODE ("R") ) ctr_store ( .clk (clk), .reset (reset), - .read (~stall), + .read (repl_valid), .write (repl_valid), .wren (1'b1), - .raddr (repl_line_n), - .waddr (repl_line), + .addr (repl_line), .wdata (ctr_wdata), .rdata (ctr_rdata) ); @@ -182,7 +178,6 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) - `UNUSED_VAR (repl_line_n) reg [WAY_SEL_WIDTH-1:0] victim_idx; always @(posedge clk) begin if (reset) begin @@ -201,7 +196,6 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) - `UNUSED_VAR (repl_line_n) assign repl_way = 1'b0; end diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 970d54d913..e086ea94fa 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -29,7 +29,6 @@ module VX_cache_tags #( ) ( input wire clk, input wire reset, - input wire stall, // inputs input wire init, @@ -37,7 +36,6 @@ module VX_cache_tags #( input wire fill, input wire read, input wire write, - input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n, input wire [`CS_LINE_SEL_BITS-1:0] line_idx, input wire [`CS_TAG_SEL_BITS-1:0] line_tag, input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, @@ -71,7 +69,7 @@ module VX_cache_tags #( wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode wire do_write = WRITEBACK && write && tag_matches[i]; // only write on tag hit - //wire line_read = read || write || (WRITEBACK && (fill || flush)); + wire line_read = read || write || (WRITEBACK && (fill || flush)); wire line_write = do_init || do_fill || do_flush || do_write; wire line_valid = fill || write; @@ -87,19 +85,17 @@ module VX_cache_tags #( assign read_dirty[i] = 1'b0; end - VX_dp_ram #( + VX_sp_ram #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1), .RDW_MODE ("W") ) tag_store ( .clk (clk), .reset (reset), - .read (~stall), + .read (line_read), .write (line_write), .wren (1'b1), - .waddr (line_idx), - .raddr (line_idx_n), + .addr (line_idx), .wdata (line_wdata), .rdata (line_rdata) ); diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 807548614f..802effe076 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -53,7 +53,7 @@ module VX_fetch import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (`PC_BITS + `NUM_THREADS), .SIZE (`NUM_WARPS), - .OUT_REG (0) + .RDW_MODE ("R") ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index d5d0001323..6bec145049 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -75,7 +75,8 @@ module VX_ipdom_stack #( VX_dp_ram #( .DATAW (1 + WIDTH * 2), .SIZE (DEPTH), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) ipdom_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index b778ce88ed..0ce68ea1a0 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -20,7 +20,7 @@ module VX_dp_ram #( parameter WRENW = 1, parameter OUT_REG = 0, parameter LUTRAM = 0, - parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, U: undefined + parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, U: undefined parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, parameter INIT_ENABLE = 0, diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 6de6ddc24f..d53903bfd1 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -77,20 +77,16 @@ module VX_fifo_queue #( localparam ADDRW = `CLOG2(DEPTH); wire [DATAW-1:0] data_out_w; - reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; + reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] wr_ptr_r; - always @(*) begin - rd_ptr_n = rd_ptr_r + ADDRW'(pop); - end - always @(posedge clk) begin if (reset) begin wr_ptr_r <= '0; rd_ptr_r <= (OUT_REG != 0) ? 1 : 0; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_n; + rd_ptr_r <= rd_ptr_r + ADDRW'(pop); end end @@ -100,7 +96,6 @@ module VX_fifo_queue #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (1), .LUTRAM (LUTRAM), .RDW_MODE ("W") ) dp_ram ( @@ -109,9 +104,9 @@ module VX_fifo_queue #( .read (~bypass), .write (push), .wren (1'b1), + .raddr (rd_ptr_r), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n), .rdata (data_out_w) ); diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 422c317e1c..4a1e058450 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -50,8 +50,7 @@ module VX_index_buffer #( VX_dp_ram #( .DATAW (DATAW), .SIZE (SIZE), - .OUT_REG (0), - .RDW_MODE("W") + .RDW_MODE ("R") ) data_table ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 78e85e16fd..6c0914b0cc 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -112,7 +112,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) delta_store ( .clk (clk), .reset (reset), @@ -133,7 +134,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) data_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index ee13162719..bdf41eb50c 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -20,7 +20,7 @@ module VX_sp_ram #( parameter WRENW = 1, parameter OUT_REG = 0, parameter LUTRAM = 0, - parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, N: no-change + parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, N: no-change, U: undefined parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, parameter INIT_ENABLE = 0, @@ -75,14 +75,13 @@ module VX_sp_ram #( end `endif if (OUT_REG) begin : g_sync - wire cs = read || write; if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "R") begin : g_read_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -96,7 +95,7 @@ module VX_sp_ram #( if (WRENW > 1) begin : g_wren reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -108,7 +107,7 @@ module VX_sp_ram #( `UNUSED_VAR (wren) reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin ram[addr] <= wdata; rdata_r <= wdata; @@ -124,7 +123,7 @@ module VX_sp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end else begin @@ -133,6 +132,19 @@ module VX_sp_ram #( end end assign rdata = rdata_r; + end else if (RDW_MODE == "U") begin : g_unknown + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end end else begin : g_auto if (RDW_MODE == "R") begin : g_read_first @@ -140,7 +152,7 @@ module VX_sp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -154,7 +166,7 @@ module VX_sp_ram #( if (WRENW > 1) begin : g_wren reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -166,7 +178,7 @@ module VX_sp_ram #( `UNUSED_VAR (wren) reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin ram[addr] <= wdata; rdata_r <= wdata; @@ -182,7 +194,7 @@ module VX_sp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end else begin @@ -191,6 +203,19 @@ module VX_sp_ram #( end end assign rdata = rdata_r; + end else if (RDW_MODE == "U") begin : g_unknown + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end end end else begin : g_async @@ -281,6 +306,14 @@ module VX_sp_ram #( end end assign rdata = rdata_r; + end else if (RDW_MODE == "U") begin : g_unknown + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end end else begin : g_async `UNUSED_VAR (read) diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 557f4a9f75..fd0694fe3d 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -166,7 +166,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) lmem_store ( .clk (clk), .reset (reset), From 98b58606e5120299d06513eb0c0c8324be6b624a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 02:18:00 -0700 Subject: [PATCH 323/407] merge fixes --- docs/fpga_setup.md | 74 ----------------------------------- miscs/patches/ramulator.patch | 46 ---------------------- sim/rtlsim/Makefile | 7 ---- tests/regression/common.mk | 3 +- 4 files changed, 2 insertions(+), 128 deletions(-) delete mode 100644 docs/fpga_setup.md delete mode 100644 miscs/patches/ramulator.patch diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md deleted file mode 100644 index 80d71e45fa..0000000000 --- a/docs/fpga_setup.md +++ /dev/null @@ -1,74 +0,0 @@ -# FPGA Startup and Configuration Guide - -OPAE Environment Setup ----------------------- - - $ source /opt/inteldevstack/init_env_user.sh - $ export OPAE_HOME=/opt/opae/1.1.2 - $ export PATH=$OPAE_HOME/bin:$PATH - $ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH - $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH - $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH - -OPAE Build ------------------- - -The FPGA has to following configuration options: -- DEVICE_FAMILY=arria10 | stratix10 -- NUM_CORES=#n - -Command line: - - $ cd hw/syn/altera/opae - $ PREFIX=test1 TARGET=fpga NUM_CORES=4 make - -A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete. -Setting TARGET=ase will build the project for simulation using Intel ASE. - - -OPAE Build Configuration ------------------------- - -The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: -- `NUM_WARPS`: Number of warps per cores -- `NUM_THREADS`: Number of threads per warps -- `PERF_ENABLE`: enable the use of all profile counters - -You configure the syntesis build from the command line: - - $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make - -OPAE Build Progress -------------------- - -You could check the last 10 lines in the build log for possible errors until build completion. - - $ tail -n 10 /build.log - -Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. - - $ ps -u - -If the build fails and you need to restart it, clean up the build folder using the following command: - - $ make clean - -The file `vortex_afu.gbs` should exist when the build is done: - - $ ls -lsa /synth/vortex_afu.gbs - - -Signing the bitstream and Programming the FPGA ----------------------------------------------- - - $ cd - $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs - $ fpgasupdate vortex_afu_unsigned_ssl.gbs - -FPGA sample test running OpenCL sgemm kernel --------------------------------------------- - -Run the following from the Vortex root directory - - $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" - diff --git a/miscs/patches/ramulator.patch b/miscs/patches/ramulator.patch deleted file mode 100644 index e24b5d230e..0000000000 --- a/miscs/patches/ramulator.patch +++ /dev/null @@ -1,46 +0,0 @@ -diff --git a/Makefile b/Makefile -index ea340c8..d2aac5b 100644 ---- a/Makefile -+++ b/Makefile -@@ -7,16 +7,16 @@ OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS)) - - # Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with - # g++ 4.x due to an internal compiler error when processing lambda functions. --CXX := clang++ -+#CXX := clang++ - # CXX := g++-5 --CXXFLAGS := -O3 -std=c++11 -g -Wall -+CXXFLAGS := -std=c++11 -O3 -g -Wall -fPIC - - .PHONY: all clean depend - - all: depend ramulator - - clean: -- rm -f ramulator -+ rm -f ramulator libramulator.a - rm -rf $(OBJDIR) - - depend: $(OBJDIR)/.depend -@@ -36,7 +36,7 @@ ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend - $(CXX) $(CXXFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS) - - libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o -- libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o -+ $(AR) rcs $@ $^ - - $(OBJS): | $(OBJDIR) - -diff --git a/src/Request.h b/src/Request.h -index 57abd0d..a5ce061 100644 ---- a/src/Request.h -+++ b/src/Request.h -@@ -36,7 +36,7 @@ public: - - Request(long addr, Type type, int coreid = 0) - : is_first_command(true), addr(addr), coreid(coreid), type(type), -- callback([](Request& req){}) {} -+ callback([](Request&){}) {} - - Request(long addr, Type type, function callback, int coreid = 0) - : is_first_command(true), addr(addr), coreid(coreid), type(type), callback(callback) {} diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 50b9c5c1fb..ecaee717b4 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -39,13 +39,6 @@ SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/processor.cpp -ifdef AXI_BUS - TOP = Vortex_axi - CXXFLAGS += -DAXI_BUS -else - TOP = Vortex -endif - VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 2cba5ef9a8..94fe840df4 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -8,11 +8,12 @@ XRT_DEVICE_INDEX ?= 0 VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel -STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d +STARTUP_ADDR ?= 0x180000000 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f +STARTUP_ADDR ?= 0x80000000 endif LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) From eecff10deac07ebb00c68805b6fe32ea3c5c4d12 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 02:51:08 -0700 Subject: [PATCH 324/407] minor update --- hw/rtl/libs/VX_index_buffer.sv | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 4a1e058450..96caec50ed 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -17,6 +17,7 @@ module VX_index_buffer #( parameter DATAW = 1, parameter SIZE = 1, + parameter LUTRAM = 1, parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, @@ -50,7 +51,8 @@ module VX_index_buffer #( VX_dp_ram #( .DATAW (DATAW), .SIZE (SIZE), - .RDW_MODE ("R") + .LUTRAM (LUTRAM), + .RDW_MODE ("W") ) data_table ( .clk (clk), .reset (reset), @@ -63,5 +65,7 @@ module VX_index_buffer #( .rdata (read_data) ); + `RUNTIME_ASSERT (~(acquire_en && write_addr == read_addr), ("%t: oops!", $time)) + endmodule `TRACING_ON From ce510d78c7991fd28f3d7be99a4a34d0d6a4ab7c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 05:02:46 -0700 Subject: [PATCH 325/407] minor update --- hw/rtl/libs/VX_index_buffer.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 96caec50ed..8d0320c5d4 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -17,7 +17,7 @@ module VX_index_buffer #( parameter DATAW = 1, parameter SIZE = 1, - parameter LUTRAM = 1, + parameter LUTRAM = 0, parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, @@ -65,7 +65,5 @@ module VX_index_buffer #( .rdata (read_data) ); - `RUNTIME_ASSERT (~(acquire_en && write_addr == read_addr), ("%t: oops!", $time)) - endmodule `TRACING_ON From d475e9d201d3cb15d6c752a03acd2d362ceaf678 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 25 Oct 2024 12:59:24 -0400 Subject: [PATCH 326/407] remove duplicate block --- docs/fpga_setup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index e7ab0ecbbd..3fb02cfe5e 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -70,7 +70,7 @@ $ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. ``` $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make build_u250_hw_1c.log 2>&1 & + $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 & ``` Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" The generated bitstream will be located under /bin/vortex_afu.xclbin From e73e1c2bb3de9d2dd7e77f213cf0ca6c6108accc Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 1 Nov 2024 13:56:01 -0400 Subject: [PATCH 327/407] update xilinx fpga steps with environment variable steps --- docs/fpga_setup.md | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 3fb02cfe5e..d909d8687c 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -50,7 +50,7 @@ Once you’ve connected to the CRNCH login node, you can use the Slurm scheduler To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node: ```bash -salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 --time=06:00:00 +salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber1 --time=06:00:00 ``` Synthesis for Xilinx Boards ---------------------- @@ -58,19 +58,42 @@ Once you are logged in, you will need to complete some first time configurations ### Source Configuration Scripts ``` +# From any directory $ source /opt/xilinx/xrt/setup.sh $ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh ``` ### Check Installed FPGA Platforms -`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. +`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. Otherwise, if there is an error then there was an issue with the previous two commands. +### Install Vortex Toolchain +The Xilinx synthesis process requires verilator to generate the bitstream. Eventually, you will need the whole toolchain to run the bitstream on the FPGA. Therefore, the Vortex toolchain and can be installed as follows. If you complete these steps properly, you should only need to complete them once and you can skip to `Activate Vortex Toolchain` +``` +# Make a build directory from root and configure scripts for your environment +mkdir build && cd build && ../configure --tooldir=$HOME/tools + +# Install the whole prebuilt toolchain +./ci/toolchain_install.sh --all + +# Add environment variables to bashrc +echo "source /vortex/build/ci/toolchain_env.sh" >> ~/.bashrc +``` + +### Activate Vortex Toolchain +``` +# From any directory +source ~/.bashrc + +# Check environment setup +verilator --version +``` + +### Build the FPGA Bitstream +The root directory contains the path `hw/syn/xilinx/xrt` which has the makefile used to generate the Vortex bitstream. -### Build FPGA image -The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. ``` $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 & + $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 & ``` Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" The generated bitstream will be located under /bin/vortex_afu.xclbin From 667fa1662d316bd3d6a1d618a2883ffe16d34028 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 1 Nov 2024 14:46:38 -0400 Subject: [PATCH 328/407] update docker for micro apptainer --- ci/install_dependencies.sh | 2 +- miscs/docker/Dockerfile.prod | 33 ++++++++++++--------------------- miscs/docker/README.md | 29 ++++++++++++++++++++++------- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/ci/install_dependencies.sh b/ci/install_dependencies.sh index a62ed253be..4dab277868 100755 --- a/ci/install_dependencies.sh +++ b/ci/install_dependencies.sh @@ -31,7 +31,7 @@ check_gcc_version() { apt-get update -y # install system dependencies -apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache +apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache cmake # Check and install GCC 11 if necessary if check_gcc_version; then diff --git a/miscs/docker/Dockerfile.prod b/miscs/docker/Dockerfile.prod index e1a8d94b57..20c9c033b3 100644 --- a/miscs/docker/Dockerfile.prod +++ b/miscs/docker/Dockerfile.prod @@ -18,41 +18,32 @@ FROM ubuntu:20.04 ARG DEBIAN_FRONTEND=noninteractive # Install necessary dependencies and upgrade installed components -RUN apt-get update -y && \ - apt-get install -y \ +# Update and install necessary dependencies +RUN apt-get update && apt-get install -y \ software-properties-common \ build-essential \ python3 \ git \ wget \ curl \ - ca-certificates \ - valgrind \ - libstdc++6 \ - binutils \ - uuid-dev \ - ccache \ - cmake && \ - apt-get upgrade -y && \ - gcc_version=$(gcc -dumpversion) && \ - if dpkg --compare-versions "$gcc_version" lt 11; then \ - echo "GCC version is less than 11. Installing GCC 11..." && \ - add-apt-repository -y ppa:ubuntu-toolchain-r/test && \ - apt-get update -y && \ - apt-get install -y g++-11 gcc-11 && \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 && \ - update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100; \ - else \ - echo "GCC version is 11 or greater. No need to install GCC 11."; \ - fi && \ + ca-certificates && \ rm -rf /var/lib/apt/lists/* +# upgrade installed components +RUN apt-get upgrade && apt-get update + +# temporary until remote dependency script gets updated +RUN apt-get install -y cmake + # Clone the Vortex repository RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /vortex # Set the initial working directory WORKDIR /vortex +# install system dependencies +RUN ./ci/install_dependencies.sh + # Configure the build folder RUN mkdir build && cd build && ../configure diff --git a/miscs/docker/README.md b/miscs/docker/README.md index 897f8f9fba..c077102da1 100644 --- a/miscs/docker/README.md +++ b/miscs/docker/README.md @@ -4,17 +4,32 @@ You can install Docker desktop on MAC or PC or Ubuntu. - MAC: https://docs.docker.com/desktop/install/mac-install - Ubuntu: https://docs.docker.com/desktop/install/ubuntu -### 1- Create a Docker image from the Dockerfile - $ docker build -f Dockerfile.ubuntu -t vortex +### 1- Build a Docker Image from the Dockerfile + $ docker build --platform=linux/amd64 -t vortex-packaged -f Dockerfile.prod . -### 2- Build the Docker image - $ docker docker run -it vortex /bin/bash +### 2- Construct and run a Container from the Docker Image + $ docker run -it --name vortex --privileged=true --platform=linux/amd64 vortex-packaged -### 3- Build the project +### 3- Build the Project One you login the Docker terminal, you will be in the build directory. $ make -s -### 4- Run a simple test +### 4- Run a Simple Test +See `docs/` to learn more! - $ ./ci/blackbox.sh --cores=2 --app=vecadd \ No newline at end of file + $ ./ci/blackbox.sh --cores=2 --app=vecadd + +### 5- Exit the Container + + $ exit + $ docker stop vortex + +### 6- Restart and Re-Enter the Container +If you ran step `2` and then step `5` then, you have to start and re-enter the container + + $ docker start vortex + $ docker exec -it vortex + +--- +Note: Apple Silicon macs will run the container in emulation mode, so compiling and running will take a considerable amount of time -- but it still works! \ No newline at end of file From bffc6d9610fe2e4bf1445d89462bae94c41cbd5e Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 13 Nov 2024 16:20:25 -0800 Subject: [PATCH 329/407] enabling Vivado's asynchronous bram suppot via direct netlist transformation --- configure | 8 +- hw/rtl/VX_platform.vh | 6 +- hw/rtl/cache/VX_cache_top.sv | 12 +- hw/rtl/libs/VX_async_ram_patch.sv | 158 ++++++++ hw/rtl/libs/VX_dp_ram.sv | 377 ++++++++++++------ hw/rtl/libs/VX_fifo_queue.sv | 4 +- hw/rtl/libs/VX_placeholder.sv | 27 ++ hw/rtl/libs/VX_sp_ram.sv | 415 +++++++++++++------ hw/scripts/xilinx_async_bram_patch.tcl | 525 +++++++++++++++++++++++++ hw/scripts/xilinx_export_netlist.tcl | 71 ++++ hw/syn/xilinx/dut/common.mk | 4 +- hw/syn/xilinx/dut/project.tcl | 46 ++- hw/syn/xilinx/dut/unittest/Makefile | 2 +- hw/syn/xilinx/sandbox/project.tcl.in | 6 +- hw/syn/xilinx/xrt/Makefile | 3 + 15 files changed, 1391 insertions(+), 273 deletions(-) create mode 100644 hw/rtl/libs/VX_async_ram_patch.sv create mode 100644 hw/rtl/libs/VX_placeholder.sv create mode 100644 hw/scripts/xilinx_async_bram_patch.tcl create mode 100644 hw/scripts/xilinx_export_netlist.tcl diff --git a/configure b/configure index d2483a7969..fbcd3f1303 100755 --- a/configure +++ b/configure @@ -65,7 +65,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SOURCE_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@BUILDDIR@|$CURRENT_DIR|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then @@ -169,8 +169,8 @@ fi SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*") # Get the directory of the script -SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +SOURCE_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -THIRD_PARTY_DIR=$SCRIPT_DIR/third_party +THIRD_PARTY_DIR=$SOURCE_DIR/third_party -copy_files "$SCRIPT_DIR" "$CURRENT_DIR" +copy_files "$SOURCE_DIR" "$CURRENT_DIR" diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 6e0b755e27..d874b9b2b4 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -160,32 +160,32 @@ endgenerate `ifdef QUARTUS `define MAX_FANOUT 8 `define MAX_LUTRAM 1024 -`define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) +`define BLACKBOX_CELL (* black_box *) `define STRING string `elsif VIVADO `define MAX_FANOUT 8 `define MAX_LUTRAM 1024 -`define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) `define DISABLE_BRAM (* ram_style = "registers" *) `define PRESERVE_NET (* keep = "true" *) +`define BLACKBOX_CELL (* black_box *) `define STRING `else `define MAX_FANOUT 8 `define MAX_LUTRAM 1024 -`define IF_DATA_SIZE(x) x.DATA_WIDTH `define USE_BLOCK_BRAM `define USE_FAST_BRAM `define NO_RW_RAM_CHECK `define DISABLE_BRAM `define PRESERVE_NET +`define BLACKBOX_CELL `define STRING string `endif diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index 6dad5b6a89..45664af2b3 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -31,28 +31,28 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 4, + parameter CRSQ_SIZE = 8, // Miss Reserv Queue Knob parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 4, + parameter MRSQ_SIZE = 8, // Memory Request Queue Size - parameter MREQ_SIZE = 4, + parameter MREQ_SIZE = 8, // Enable cache writeable parameter WRITE_ENABLE = 1, // Enable cache writeback - parameter WRITEBACK = 0, + parameter WRITEBACK = 1, // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, + parameter DIRTY_BYTES = 1, // Request debug identifier parameter UUID_WIDTH = 0, // core request tag size - parameter TAG_WIDTH = 16, + parameter TAG_WIDTH = 32, // Core response output buffer parameter CORE_OUT_BUF = 3, diff --git a/hw/rtl/libs/VX_async_ram_patch.sv b/hw/rtl/libs/VX_async_ram_patch.sv new file mode 100644 index 0000000000..fd29e881d9 --- /dev/null +++ b/hw/rtl/libs/VX_async_ram_patch.sv @@ -0,0 +1,158 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`define RAM_BYPASS(__d) \ + reg [DATAW-1:0] bypass_data_r; \ + reg bypass_valid_r; \ + always @(posedge clk) begin \ + bypass_valid_r <= read_s && write && (raddr_s == waddr); \ + bypass_data_r <= wdata; \ + end \ + assign __d = bypass_valid_r ? bypass_data_r : rdata_r + +`TRACING_OFF +module VX_async_ram_patch #( + parameter DATAW = 1, + parameter SIZE = 1, + parameter WRENW = 1, + parameter DUAL_PORT = 0, + parameter INIT_ENABLE = 0, + parameter INIT_FILE = "", + parameter [DATAW-1:0] INIT_VALUE = 0, + parameter ADDRW = `LOG2UP(SIZE) +) ( + input wire clk, + input wire reset, + input wire read, + input wire write, + input wire [WRENW-1:0] wren, + input wire [ADDRW-1:0] waddr, + input wire [DATAW-1:0] wdata, + input wire [ADDRW-1:0] raddr, + output wire [DATAW-1:0] rdata +); + localparam WSELW = DATAW / WRENW; + + `UNUSED_VAR (reset) + + (* keep = "true" *) wire [ADDRW-1:0] raddr_w, raddr_s; + (* keep = "true" *) wire read_s, is_raddr_reg; + + assign raddr_w = raddr; + + VX_placeholder #( + .I (ADDRW), + .O (ADDRW + 1 + 1) + ) placeholder ( + .in (raddr_w), + .out ({raddr_s, read_s, is_raddr_reg}) + ); + + // synchroneous ram + + wire [DATAW-1:0] rdata_s; + + if (WRENW != 1) begin : g_wren_sync_ram + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + reg [DATAW-1:0] rdata_r; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (read_s || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[raddr_s]; + end + end + `RAM_BYPASS(rdata_s); + end else begin : g_no_wren_sync_ram + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + reg [DATAW-1:0] rdata_r; + `RAM_INITIALIZATION + `UNUSED_VAR (wren) + always @(posedge clk) begin + if (read_s || write) begin + if (write) begin + ram[waddr] <= wdata; + end + rdata_r <= ram[raddr_s]; + end + end + `RAM_BYPASS(rdata_s); + end + + // asynchronous ram (fallback) + + wire [DATAW-1:0] rdata_a; + + if (DUAL_PORT != 0) begin : g_dp_async_ram + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + if (WRENW != 1) begin : g_wren + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + end else begin : g_no_wren + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + end + assign rdata_a = ram[raddr]; + end else begin : g_sp_async_ram + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + if (WRENW != 1) begin : g_wren + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + end else begin : g_no_wren + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + end + assign rdata_a = ram[waddr]; + end + + assign rdata = is_raddr_reg ? rdata_s : rdata_a; + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 0ce68ea1a0..0cff67882f 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -13,6 +13,35 @@ `include "VX_platform.vh" +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`ifdef QUARTUS + `define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`else + `define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`endif + `TRACING_OFF module VX_dp_ram #( parameter DATAW = 1, @@ -45,163 +74,289 @@ module VX_dp_ram #( `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "U"), ("invalid parameter")) `UNUSED_PARAM (RDW_ASSERT) -`define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin : g_init \ - if (INIT_FILE != "") begin : g_file \ - initial $readmemh(INIT_FILE, ram); \ - end else begin : g_value \ - initial begin \ - for (integer i = 0; i < SIZE; ++i) begin : g_i \ - ram[i] = INIT_VALUE; \ - end \ - end \ - end \ - end - `ifdef SYNTHESIS localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); -`ifdef QUARTUS - `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`else - `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`endif if (OUT_REG) begin : g_sync if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first - (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY - `UNUSED_VAR (wren) - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + raddr_r <= raddr; + end + end + assign rdata = ram[raddr_r]; + end else begin : g_no_wren + (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + raddr_r <= raddr; end - addr_reg <= raddr; end + assign rdata = ram[raddr_r]; end - assign rdata = ram[addr_reg]; end else if (RDW_MODE == "R") begin : g_read_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[raddr]; end - rdata_r <= ram[raddr]; end + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else begin : g_undefined - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + if (read) begin + rdata_r <= ram[raddr]; + end end - if (read) begin - rdata_r <= ram[raddr]; + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + if (read) begin + rdata_r <= ram[raddr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first - (* rw_addr_collision = "yes" *) `RAM_ARRAY - `UNUSED_VAR (wren) - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + (* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + raddr_r <= raddr; + end + end + assign rdata = ram[raddr_r]; + end else begin : g_no_wren + (* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + raddr_r <= raddr; end - addr_reg <= raddr; end + assign rdata = ram[raddr_r]; end - assign rdata = ram[addr_reg]; end else if (RDW_MODE == "R") begin : g_read_first - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + rdata_r <= ram[raddr]; end - rdata_r <= ram[raddr]; end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else begin - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + if (read) begin + rdata_r <= ram[raddr]; + end end - if (read) begin - rdata_r <= ram[raddr]; + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + if (read) begin + rdata_r <= ram[raddr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (1), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (waddr), + .wdata (wdata), + .raddr (raddr), + .rdata (rdata) + ); + `else + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[raddr]; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; + `endif end else begin : g_read_first - `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[raddr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[raddr]; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end else begin : g_read_first - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + assign rdata = ram[raddr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end end end @@ -226,13 +381,13 @@ module VX_dp_ram #( if (OUT_REG) begin : g_sync if (RDW_MODE == "W") begin : g_write_first - reg [ADDRW-1:0] addr_reg; + reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin if (read || write) begin - addr_reg <= raddr; + raddr_r <= raddr; end end - assign rdata = ram[addr_reg]; + assign rdata = ram[raddr_r]; end else if (RDW_MODE == "R") begin : g_read_first reg [DATAW-1:0] rdata_r; always @(posedge clk) begin diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index d53903bfd1..720a1a2c60 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -15,8 +15,8 @@ `TRACING_OFF module VX_fifo_queue #( - parameter DATAW = 1, - parameter DEPTH = 1, + parameter DATAW = 32, + parameter DEPTH = 32, parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, diff --git a/hw/rtl/libs/VX_placeholder.sv b/hw/rtl/libs/VX_placeholder.sv new file mode 100644 index 0000000000..738da615b6 --- /dev/null +++ b/hw/rtl/libs/VX_placeholder.sv @@ -0,0 +1,27 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +`BLACKBOX_CELL module VX_placeholder #( + parameter I = 0, + parameter O = 0 +) ( + input wire [`UP(I)-1:0] in, + output wire [`UP(O)-1:0] out +); + // empty module + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index bdf41eb50c..88b922384c 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -13,6 +13,35 @@ `include "VX_platform.vh" +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`ifdef QUARTUS + `define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`else + `define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`endif + `TRACING_OFF module VX_sp_ram #( parameter DATAW = 1, @@ -44,67 +73,55 @@ module VX_sp_ram #( `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter")) `UNUSED_PARAM (RDW_ASSERT) -`define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin : g_init \ - if (INIT_FILE != "") begin : g_file \ - initial $readmemh(INIT_FILE, ram); \ - end else begin : g_value \ - initial begin \ - for (integer i = 0; i < SIZE; ++i) begin : g_i \ - ram[i] = INIT_VALUE; \ - end \ - end \ - end \ - end - `ifdef SYNTHESIS localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); -`ifdef QUARTUS - `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[addr][i] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`else - `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`endif if (OUT_REG) begin : g_sync if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "R") begin : g_read_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[addr]; end - rdata_r <= ram[addr]; end + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else if (RDW_MODE == "W") begin : g_write_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - if (WRENW > 1) begin : g_wren - reg [ADDRW-1:0] addr_reg; + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin if (write) begin - `RAM_WRITE + `RAM_WRITE_WREN end - addr_reg <= addr; + addr_r <= addr; end end - assign rdata = ram[addr_reg]; + assign rdata = ram[addr_r]; end else begin : g_no_wren - `UNUSED_VAR (wren) + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin @@ -119,63 +136,110 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else if (RDW_MODE == "N") begin : g_no_change - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE - end else begin - rdata_r <= ram[addr]; + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end else begin + rdata_r <= ram[addr]; + end end end + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else if (RDW_MODE == "U") begin : g_unknown - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + if (read) begin + rdata_r <= ram[addr]; + end end - if (read) begin - rdata_r <= ram[addr]; + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + if (read) begin + rdata_r <= ram[addr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_auto if (RDW_MODE == "R") begin : g_read_first - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[addr]; end - rdata_r <= ram[addr]; end + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else if (RDW_MODE == "W") begin : g_write_first - `RAM_ARRAY - `RAM_INITIALIZATION - if (WRENW > 1) begin : g_wren - reg [ADDRW-1:0] addr_reg; + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin if (write) begin - `RAM_WRITE + `RAM_WRITE_WREN end - addr_reg <= addr; + addr_r <= addr; end end - assign rdata = ram[addr_reg]; + assign rdata = ram[addr_r]; end else begin : g_no_wren - `UNUSED_VAR (wren) + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin @@ -190,75 +254,172 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else if (RDW_MODE == "N") begin : g_no_change - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE - end else begin - rdata_r <= ram[addr]; + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end else begin + rdata_r <= ram[addr]; + end end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else if (RDW_MODE == "U") begin : g_unknown - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + if (read) begin + rdata_r <= ram[addr]; + end end - if (read) begin - rdata_r <= ram[addr]; + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + if (read) begin + rdata_r <= ram[addr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (0), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (addr), + .wdata (wdata), + .raddr (addr), + .rdata (rdata) + ); + `else + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[addr]; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; + `endif end else begin : g_read_first - `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + assign rdata = ram[addr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + assign rdata = ram[addr]; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end else begin : g_read_first - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + assign rdata = ram[addr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end end end @@ -291,13 +452,13 @@ module VX_sp_ram #( end assign rdata = rdata_r; end else if (RDW_MODE == "W") begin : g_write_first - reg [ADDRW-1:0] addr_reg; + reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin - addr_reg <= addr; + addr_r <= addr; end end - assign rdata = ram[addr_reg]; + assign rdata = ram[addr_r]; end else if (RDW_MODE == "N") begin : g_no_change reg [DATAW-1:0] rdata_r; always @(posedge clk) begin diff --git a/hw/scripts/xilinx_async_bram_patch.tcl b/hw/scripts/xilinx_async_bram_patch.tcl new file mode 100644 index 0000000000..5af7ba9533 --- /dev/null +++ b/hw/scripts/xilinx_async_bram_patch.tcl @@ -0,0 +1,525 @@ +namespace eval vortex { + +variable debug 0 + +proc print_error {msg {do_exit 1}} { + if {$do_exit} { + puts "ERROR: $msg" + exit -1 + } else { + puts "WARNING: $msg" + } +} + +proc str_replace {str match repl} { + set result "" + regsub $match $str $repl result + return $result +} + +proc unique_cell_name {name} { + if {[get_cells -quiet $name] == {}} { return $name } + set index 0 + while {[get_cells -quiet ${name}_${index}] != {}} { incr index } + return ${name}_${index} +} + +proc unique_net_name {name} { + if {[get_nets -quiet $name] == {}} { return $name } + set index 0 + while {[get_nets -quiet ${name}_${index}] != {}} { incr index } + return ${name}_${index} +} + +proc find_nested_cells {parent name_match {should_exist 1}} { + set matching_cells {} + foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] { + set name [get_property NAME $cell] + if {[regexp $name_match $name]} { + lappend matching_cells $cell + } + } + if {[llength $matching_cells] == 0} { + print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist + } + return $matching_cells +} + +proc find_nested_cell {parent name_match} { + foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] { + set name [get_property NAME $cell] + if {$name == $name_match} { + return $cell + } + } + puts "ERROR: No matching cell found for '$parent' matching '$name_match'." + exit -1 +} + +proc find_cell_nets {cell name_match {should_exist 1}} { + set matching_nets {} + foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] { + set name [get_property NAME $net] + if {[regexp $name_match $name]} { + lappend matching_nets $net + } + } + if {[llength $matching_nets] == 0} { + print_error "No matching net found for '$cell' matching '$name_match'." $should_exist + } + return $matching_nets +} + +proc get_cell_net {cell name_match} { + foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] { + set name [get_property NAME $net] + if {$name == $name_match} { + return $net + } + } + puts "ERROR: No matching net found for '$cell' matching '$name_match'." + exit -1 +} + +proc find_cell_pins {cell name_match {should_exist 1}} { + set matching_pins {} + foreach pin [get_pins -of_objects $cell] { + set name [get_property NAME $pin] + if {[regexp $name_match $name]} { + lappend matching_pins $pin + } + } + if {[llength $matching_pins] == 0} { + print_error "No matching pin found for '$cell' matching '$name_match'." $should_exist + } + return $matching_pins +} + +proc get_cell_pin {cell name_match} { + foreach pin [get_pins -of_objects $cell] { + set name [get_property NAME $pin] + if {$name == $name_match} { + return $pin + } + } + puts "ERROR: No matching pin found for '$cell' matching '$name_match'." + exit -1 +} + +proc replace_pin_source {pin source_pin} { + variable debug + + # Disconnect existing net from pin + set net [get_nets -of_objects $pin] + if {[llength $net] == 1} { + disconnect_net -net $net -objects $pin + if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."} + } elseif {[llength $net] > 1} { + puts "ERROR: Multiple nets connected to pin '$pin'." + exit -1 + } else { + puts "WARNING: No net connected to pin '$pin'." + } + + set source_net [get_nets -quiet -of_objects $source_pin] + if {[llength $source_net] == 0} { + # Create a new net if none exists + set source_cell [get_cells -of_objects $source_pin] + set net_name [unique_net_name "${source_cell}_net"] + set source_net [create_net $net_name] + if {$debug} {puts "DEBUG: Created source_net: '$source_net'"} + # Connect the source pin to the new net + connect_net -net $source_net -objects $source_pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$source_pin'."} + } elseif {[llength $source_net] > 1} { + puts "ERROR: Multiple nets connected to pin '$source_pin'." + exit -1 + } + + # Connect pin to the new source net + connect_net -net $source_net -objects $pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."} +} + +proc create_register_next {reg_cell prefix_name} { + variable debug + + set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}] + if {[llength $reg_d_pin] == 0} { + puts "ERROR: No D pin found on register cell '$reg_cell'." + exit -1 + } elseif {[llength $reg_d_pin] > 1} { + puts "ERROR: Multiple D pins found on register cell '$reg_cell'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_d_pin: '$reg_d_pin'"} + + set reg_d_src_pin [find_pin_driver $reg_d_pin] + if {$reg_d_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_d_pin'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_d_src_pin: '$reg_d_src_pin'"} + + set reg_r_src_pin "" + + set register_type [get_property REF_NAME $reg_cell] + if {$register_type == "FDRE"} { + set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}] + if {[llength $reg_r_pin] == 0} { + puts "ERROR: No R pin found on FDRE cell '$reg_cell'." + exit -1 + } elseif {[llength $reg_r_pin] > 1} { + puts "ERROR: Multiple R pins found on FDRE cell '$reg_cell'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_r_pin: '$reg_r_pin'"} + + set reg_r_src_pin [find_pin_driver $reg_r_pin] + if {$reg_r_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_r_pin'." + exit -1 + } + } elseif {$register_type == "FDSE"} { + set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}] + if {[llength $reg_s_pin] == 0} { + puts "ERROR: No S pin found on FDSE cell '$reg_cell'." + exit -1 + } elseif {[llength $reg_s_pin] > 1} { + puts "ERROR: Multiple S pins found on FDSE cell '$reg_cell'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_s_pin: '$reg_s_pin'"} + + set reg_r_src_pin [find_pin_driver $reg_s_pin] + if {$reg_r_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_s_pin'." + exit -1 + } + } else { + puts "ERROR: Unsupported register type: '$register_type'." + exit 1 + } + + if {$debug} {puts "DEBUG: reg_r_src_pin: '$reg_r_src_pin'"} + + set reg_d_src_net [get_nets -of_objects $reg_d_src_pin] + if {[llength $reg_d_src_net] == 0} { + puts "ERROR: Unable to get source nets for pins." + exit -1 + } elseif {[llength $reg_d_src_net] > 1} { + puts "ERROR: Multiple source nets found for pins." + exit -1 + } + + set reg_r_src_net [get_nets -of_objects $reg_r_src_pin] + if {[llength $reg_r_src_net] == 0} { + puts "ERROR: Unable to get source nets for pins." + exit -1 + } elseif {[llength $reg_r_src_net] > 1} { + puts "ERROR: Multiple source nets found for pins." + exit -1 + } + + # Create a MUX cell to implement register next value + # Use a 2x1 LUT to describe the logic: + # FDRE: O = I1 ? 0 : I0; where I0=D, I1=R + # FDSE: O = I1 ? 1 : I0; where I0=D, I1=S + set lut_name [unique_cell_name $prefix_name] + set lut_cell [create_cell -reference LUT2 $lut_name] + puts "INFO: Created lut cell: '$lut_cell'" + + if {$register_type == "FDRE"} { + set_property INIT 4'b0010 $lut_cell + } elseif {$register_type == "FDSE"} { + set_property INIT 4'b1110 $lut_cell + } else { + puts "ERROR: Unsupported register type: '$register_type'." + exit 1 + } + + set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}] + if {[llength $lut_i0_pin] == 0} { + puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'." + exit -1 + } elseif {[llength $lut_i0_pin] > 1} { + puts "ERROR: Multiple I0 pins found on FDSE cell '$lut_cell'." + exit -1 + } + + set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}] + if {[llength $lut_i1_pin] == 0} { + puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'." + exit -1 + } elseif {[llength $lut_i1_pin] > 1} { + puts "ERROR: Multiple I1 pins found on FDSE cell '$lut_cell'." + exit -1 + } + + set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}] + if {[llength $lut_o_pin] == 0} { + puts "ERROR: No O pin found on FDSE cell '$lut_cell'." + exit -1 + } elseif {[llength $lut_o_pin] > 1} { + puts "ERROR: Multiple O pins found on FDSE cell '$lut_cell'." + exit -1 + } + + connect_net -net $reg_d_src_net -objects $lut_i0_pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$reg_d_src_net' to pin '$lut_i0_pin'."} + + connect_net -net $reg_r_src_net -objects $lut_i1_pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$reg_r_src_net' to pin '$lut_i1_pin'."} + + return $lut_o_pin +} + +proc getOrCreateVCCPin {prefix_name} { + variable debug + + set vcc_cell "" + set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}] + if {[llength $vcc_cells] == 0} { + set cell_name [unique_cell_name $prefix_name] + set vcc_cell [create_cell -reference VCC $cell_name] + puts "INFO: Created VCC cell: '$vcc_cell'" + } else { + set vcc_cell [lindex $vcc_cells 0] + } + set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}] + if {[llength $vcc_pin] == 0} { + puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'." + exit -1 + } elseif {[llength $vcc_pin] > 1} { + puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'." + exit -1 + } + return $vcc_pin +} + +proc getOrCreateGNDPin {prefix_name} { + variable debug + + set gnd_cell "" + set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}] + if {[llength $gnd_cells] == 0} { + set cell_name [unique_cell_name $prefix_name] + set gnd_cell [create_cell -reference GND $cell_name] + puts "INFO: Created GND cell: '$gnd_cell'" + } else { + set gnd_cell [lindex $gnd_cells 0] + } + set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}] + if {[llength $gnd_pin] == 0} { + puts "ERROR: No GND pin found on GND cell '$gnd_cell'." + exit -1 + } elseif {[llength $gnd_pin] > 1} { + puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'." + exit -1 + } + return $gnd_pin +} + +proc find_net_sinks {input_net {should_exist 1}} { + set sink_pins {} + foreach pin [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "IN"}] { + lappend sink_pins $pin + } + foreach port [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "OUT"}] { + lappend sink_pins $port + } + if {[llength $sink_pins] == 0} { + print_error "No sink found for '$input_net'." $should_exist + } + return $sink_pins +} + +proc find_net_driver {input_net {should_exist 1}} { + set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}] + if {[llength $driverPins] == 0} { + set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}] + if {[llength $driverPorts] == 0} { + print_error "No driver found for '$input_net'." $should_exist + } elseif {[llength $driverPorts] > 1} { + puts "WARNING: Multiple driver ports found for '$input_net'." + return [lindex $driverPorts 0] + } + return $driverPorts + } elseif {[llength $driverPins] > 1} { + puts "WARNING: Multiple driver pins found for '$input_net'." + return [lindex $driverPins 0] + } + return $driverPins +} + +proc find_pin_driver {input_pin {should_exist 1}} { + set net [get_nets -quiet -of_objects $input_pin] + if {[llength $net] == 0} { + print_error "No net connected to pin '$input_pin'." $should_exist + } elseif {[llength $net] > 1} { + puts "ERROR: Multiple nets connected to pin '$input_pin'." + exit -1 + } + return [find_net_driver $net] +} + +proc find_matching_nets {cell nets match repl} { + set matching_nets {} + foreach net $nets { + set net_name [str_replace $net $match $repl] + set matching_net [get_cell_net $cell $net_name] + if {$matching_net != ""} { + lappend matching_nets $matching_net + } + } + if {[llength $matching_nets] == 0} { + puts "ERROR: No matching nets found for '$nets'." + exit -1 + } elseif {[llength $matching_nets] != [llength $nets]} { + puts "ERROR: Mismatch in number of matching nets." + exit -1 + } + return $matching_nets +} + +proc replace_net_source {net source_pin} { + foreach pin [find_net_sinks $net 0] { + replace_pin_source $pin $source_pin + } +} + +proc resolve_async_bram {inst} { + variable debug + + puts "INFO: Resolving asynchronous BRAM patch: '$inst'." + + set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"] + set read_s_net [find_cell_nets $inst "read_s$"] + set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"] + + set raddr_s_nets [find_matching_nets $inst $raddr_w_nets "raddr_w(\\\[\\d+\\\])?$" "raddr_s\\1"] + + set reg_next_pins {} + set reg_ce_src_pin "" + + foreach raddr_w_net $raddr_w_nets { + if {$debug} {puts "DEBUG: Processing raddr_w net: '$raddr_w_net'"} + + # Find raddr_w_net's driver pin + set raddr_src_pin [find_net_driver $raddr_w_net] + if {$debug} {puts "DEBUG: raddr_src_pin: '$raddr_src_pin'"} + + # Get the driver cell + set raddr_src_cell [get_cells -of_objects $raddr_src_pin] + if {[llength $raddr_src_cell] == 0} { + puts "ERROR: No source cell found connected to pin '$raddr_src_pin'." + exit -1 + } elseif {[llength $raddr_src_cell] > 1} { + puts "ERROR: Multiple source cells found connected to pin '$raddr_src_pin'." + exit -1 + } + + # Check driver type + set driver_type [get_property REF_NAME $raddr_src_cell] + if {$driver_type == "FDRE" || $driver_type == "FDSE"} { + if {$debug} {puts "DEBUG: Net '$raddr_w_net' is registered, driver_type='$driver_type'"} + } else { + puts "WARNING: Net '$raddr_w_net' is not be registered, driver_type='$driver_type'" + break + } + + # Create register next cell and return output pin + set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"] + if {$reg_next_pin == ""} { + puts "ERROR: failed to create register next value for '$raddr_src_cell'." + exit -1 + } + if {$debug} {puts "DEBUG: reg_next_pin: '$reg_next_pin'"} + + lappend reg_next_pins $reg_next_pin + + # Find the CE pin on raddr_src_cell + if {$reg_ce_src_pin == ""} { + set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}] + if {[llength $reg_ce_pin] == 0} { + puts "ERROR: No CE pin found on register cell '$raddr_src_cell'." + exit -1 + } elseif {[llength $reg_ce_pin] > 1} { + puts "ERROR: Multiple CE pins found on register cell '$raddr_src_cell'." + exit -1 + } + if {$debug} {puts "DEBUG: reg_ce_pin: '$reg_ce_pin'"} + + set reg_ce_src_pin [find_pin_driver $reg_ce_pin] + if {$reg_ce_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_ce_pin'." + exit -1 + } + if {$debug} {puts "DEBUG: reg_ce_src_pin: '$reg_ce_src_pin'"} + } + } + + # do we have a fully registered read address? + if {[llength $reg_next_pins] == [llength $raddr_w_nets]} { + puts "INFO: Fully registered read address detected." + set addr_width [llength $raddr_w_nets] + for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} { + set raddr_w_net [lindex $raddr_w_nets $addr_idx] + set raddr_s_net [lindex $raddr_s_nets $addr_idx] + set reg_next_pin [lindex $reg_next_pins $addr_idx] + puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins." + # Connect reg_next_pin to all input pins attached to raddr_s_net + replace_net_source $raddr_s_net $reg_next_pin + } + + # Connect reg_ce_src_pin to all input pins attached to read_s_net + puts "INFO: Connecting pin '$reg_ce_src_pin' to '$read_s_net's pins." + replace_net_source $read_s_net $reg_ce_src_pin + + # Create Const<1>'s pin + set vcc_pin [getOrCreateVCCPin "$inst/VCC"] + + # Connect vcc_pin to all input pins attached to is_raddr_reg_net + puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins." + replace_net_source $is_raddr_reg_net $vcc_pin + } else { + puts "WARNING: Not all read addresses are registered!" + + # Create Const<0>'s pin + set gnd_pin [getOrCreateGNDPin "$inst/GND"] + + # Connect gnd_pin to all input pins attached to is_raddr_reg_net + puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins." + replace_net_source $is_raddr_reg_net $gnd_pin + } + + # Remove all placeholder cells + foreach cell [find_nested_cells $inst "placeholder$"] { + remove_cell $cell + if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."} + } +} + +proc resolve_async_brams {} { + set bram_patch_cells {} + foreach cell [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}] { + puts "INFO: Found async BRAM patch cell: '$cell'." + lappend bram_patch_cells $cell + } + if {[llength $bram_patch_cells] != 0} { + foreach cell $bram_patch_cells { + resolve_async_bram $cell + } + } else { + puts "INFO: No async BRAM patch cells found in the design." + } +} + +} + +# Invoke the procedure to resolve async BRAM +vortex::resolve_async_brams diff --git a/hw/scripts/xilinx_export_netlist.tcl b/hw/scripts/xilinx_export_netlist.tcl new file mode 100644 index 0000000000..25a0d17e84 --- /dev/null +++ b/hw/scripts/xilinx_export_netlist.tcl @@ -0,0 +1,71 @@ +# Function to export netlist to a Graphviz DOT file +proc export_netlist {dot_file_name} { + # Open the DOT file for writing + set dot_file [open $dot_file_name "w"] + + # Start the DOT graph definition + puts $dot_file "digraph Netlist {" + puts $dot_file "rankdir=LR;" ;# Set the graph direction from left to right + + # Extract and add cells to the graph + foreach cell [get_cells -hierarchical] { + set cell_name [get_property NAME $cell] + set cell_type [get_property REF_NAME $cell] + puts $dot_file "\"$cell_name\" \[label=\"$cell_name\\n($cell_type)\", shape=box\];" + } + + # Extract and add ports to the graph + foreach port [get_ports] { + set port_name [get_property NAME $port] + set direction [get_property DIRECTION $port] + set shape "ellipse" + + # Color code input and output ports for easier identification + if {$direction == "IN"} { + set color "lightblue" + } else { + set color "lightgreen" + } + puts $dot_file "\"$port_name\" \[label=\"$port_name\", shape=$shape, style=filled, fillcolor=$color\];" + } + + # Traverse nets and create edges between ports and pins + foreach net [get_nets -hierarchical] { + set net_name [get_property NAME $net] + + # Find source and destination pins + set source_pin "" + set sink_pins {} + + foreach pin [get_pins -of_objects $net] { + set direction [get_property DIRECTION $pin] + set cell [get_cells -of_objects $pin] + set pin_name [get_property NAME $pin] + + if {$direction == "OUT"} { + # Set as source pin + set source_pin "$cell/$pin_name" + } else { + # Collect as sink pin + lappend sink_pins "$cell/$pin_name" + } + } + + # Output edges from source to all sinks + if {$source_pin != ""} { + foreach sink_pin $sink_pins { + puts $dot_file "\"$source_pin\" -> \"$sink_pin\" \[label=\"$net_name\"\];" + } + } + } + + # End the DOT graph definition + puts $dot_file "}" + + # Close the DOT file + close $dot_file + puts "Netlist exported to DOT file: $dot_file_name" +} + +# Run the export function +export_netlist "netlist.dot" \ No newline at end of file diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index 933621bef6..81946c88fd 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -31,9 +31,9 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt ifdef FPU_IP - MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) + MAX_JOBS=$(JOBS) FPU_IP=project_1/ip SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc else - MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) + MAX_JOBS=$(JOBS) SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc endif clean: diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index dcaf883fa3..9cb173c22d 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -14,9 +14,9 @@ # Start time set start_time [clock seconds] -if { $::argc != 5 } { - puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" +if { $::argc != 4 } { + puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n" + puts "Usage: $::argv0 \n" exit } @@ -27,13 +27,16 @@ set top_module [lindex $::argv 0] set device_part [lindex $::argv 1] set vcs_file [lindex $::argv 2] set xdc_file [lindex $::argv 3] -set tool_dir [lindex $::argv 4] + +set script_dir $::env(SCRIPT_DIR) +set source_dir [file dirname [info script]] puts "Using top_module=$top_module" puts "Using device_part=$device_part" puts "Using vcs_file=$vcs_file" puts "Using xdc_file=$xdc_file" -puts "Using tool_dir=$tool_dir" +puts "Using script_dir=$script_dir" +puts "Using source_dir=$source_dir" # Set the number of jobs based on MAX_JOBS environment variable if {[info exists ::env(MAX_JOBS)]} { @@ -48,10 +51,10 @@ if {[info exists ::env(FPU_IP)]} { set ip_dir $::env(FPU_IP) set argv [list $ip_dir $device_part] set argc 2 - source ${tool_dir}/xilinx_ip_gen.tcl + source ${script_dir}/xilinx_ip_gen.tcl } -source "${tool_dir}/parse_vcs_list.tcl" +source "${script_dir}/parse_vcs_list.tcl" set vlist [parse_vcs_list "${vcs_file}"] set vsources_list [lindex $vlist 0] @@ -84,37 +87,52 @@ if {[info exists ::env(FPU_IP)]} { update_compile_order -fileset sources_1 +# Synthesis set_property top $top_module [current_fileset] + set_property \ -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \ -value {-mode out_of_context -flatten_hierarchy "rebuilt"} \ -objects [get_runs synth_1] -# Synthesis +# register compilation hooks +#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1] +#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1] +set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1] +#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1] +#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1] +#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1] + if {$num_jobs != 0} { - launch_runs synth_1 -jobs $num_jobs + launch_runs synth_1 -verbose -jobs $num_jobs } else { - launch_runs synth_1 + launch_runs synth_1 -verbose } wait_on_run synth_1 open_run synth_1 write_checkpoint -force post_synth.dcp -report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages +report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages # Implementation if {$num_jobs != 0} { - launch_runs impl_1 -jobs $num_jobs + launch_runs impl_1 -verbose -jobs $num_jobs } else { - launch_runs impl_1 + launch_runs impl_1 -verbose } wait_on_run impl_1 open_run impl_1 write_checkpoint -force post_impl.dcp +report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages # Generate the synthesis report report_place_status -file place.rpt report_route_status -file route.rpt report_timing_summary -file timing.rpt + +# Generate timing report +report_timing -nworst 10 -delay_type max -sort_by group -file timing.rpt + +# Generate power and drc reports report_power -file power.rpt report_drc -file drc.rpt @@ -125,4 +143,4 @@ set elapsed_time [expr {[clock seconds] - $start_time}] set hours [format "%02d" [expr {$elapsed_time / 3600}]] set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] set seconds [format "%02d" [expr {$elapsed_time % 60}]] -puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 1bc66aa388..3d756562e4 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -1,4 +1,4 @@ -PROJECT = Unittest +PROJECT = VX_fifo_queue TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 8926b43ad0..bb1bf86f2b 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -121,8 +121,8 @@ proc run_setup {} { # None # Set 'sim_1' fileset file properties for local files -set file "testbench.v" -set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] + set file "testbench.v" + set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] set_property -name "file_type" -value "Verilog" -objects $file_obj set_property -name "is_enabled" -value "1" -objects $file_obj set_property -name "is_global_include" -value "0" -objects $file_obj @@ -300,7 +300,7 @@ set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@CURRENTDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \ + CONFIG.Coe_File {@BUILDDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 5d536a0695..643724069d 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -88,6 +88,9 @@ RTL_INCLUDE += $(FPU_INCLUDE) VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache VPP_FLAGS += --vivado.synth.jobs $(JOBS) --vivado.impl.jobs $(JOBS) +# register compilation hooks +VPP_FLAGS += --xp "vivado_prop:run.impl_1.STEPS.OPT_DESIGN.TCL.PRE={$(SCRIPT_DIR)/xilinx_async_bram_patch.tcl}" + # load platform settings include $(SRC_DIR)/platforms.mk From dfc7b6178c57b8badae492fa644766ec6d7ce95c Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 13 Nov 2024 20:56:06 -0800 Subject: [PATCH 330/407] cleanup old cache test --- hw/rtl/cache/VX_cache.sv | 4 - hw/unittest/Makefile | 3 - hw/unittest/cache/Makefile | 26 --- hw/unittest/cache/cachesim.cpp | 354 -------------------------------- hw/unittest/cache/cachesim.h | 104 ---------- hw/unittest/cache/ram.h | 77 ------- hw/unittest/cache/testbench.cpp | 248 ---------------------- 7 files changed, 816 deletions(-) delete mode 100644 hw/unittest/cache/Makefile delete mode 100644 hw/unittest/cache/cachesim.cpp delete mode 100644 hw/unittest/cache/cachesim.h delete mode 100644 hw/unittest/cache/ram.h delete mode 100644 hw/unittest/cache/testbench.cpp diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 8c3db21f4e..d8a5dbaa23 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -82,10 +82,6 @@ module VX_cache import VX_gpu_pkg::*; #( `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable")) `STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback")) - // In writeback mode, memory fill response may issue a new memory request to handle evicted blocks. - // We need to ensure that the memory request queue never fills up to avoid deadlock. - `STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE")) - localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); diff --git a/hw/unittest/Makefile b/hw/unittest/Makefile index f37d6ae1b3..d3a74d7940 100644 --- a/hw/unittest/Makefile +++ b/hw/unittest/Makefile @@ -1,5 +1,4 @@ all: - $(MAKE) -C cache $(MAKE) -C generic_queue $(MAKE) -C mem_streamer $(MAKE) -C cache_top @@ -9,7 +8,6 @@ all: $(MAKE) -C mem_unit_top run: - $(MAKE) -C cache run $(MAKE) -C generic_queue run $(MAKE) -C mem_streamer run $(MAKE) -C cache_top run @@ -19,7 +17,6 @@ run: $(MAKE) -C mem_unit_top run clean: - $(MAKE) -C cache clean $(MAKE) -C generic_queue clean $(MAKE) -C mem_streamer clean $(MAKE) -C cache_top clean diff --git a/hw/unittest/cache/Makefile b/hw/unittest/cache/Makefile deleted file mode 100644 index b734aaeddf..0000000000 --- a/hw/unittest/cache/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -ROOT_DIR := $(realpath ../../..) -include $(ROOT_DIR)/config.mk - -PROJECT := cache - -RTL_DIR := $(VORTEX_HOME)/hw/rtl -DPI_DIR := $(VORTEX_HOME)/hw/dpi - -SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) - -CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common -CXXFLAGS += -I$(ROOT_DIR)/hw - -SRCS := $(DPI_DIR)/util_dpi.cpp -SRCS += $(SRC_DIR)/cachesim.cpp $(SRC_DIR)/testbench.cpp - -DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE - -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv - -RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache - -TOP := VX_cache_top - -include ../common.mk \ No newline at end of file diff --git a/hw/unittest/cache/cachesim.cpp b/hw/unittest/cache/cachesim.cpp deleted file mode 100644 index acd68419bc..0000000000 --- a/hw/unittest/cache/cachesim.cpp +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "cachesim.h" -#include -#include -#include -#include -#include - -#ifndef TRACE_START_TIME -#define TRACE_START_TIME 0ull -#endif - -#ifndef TRACE_STOP_TIME -#define TRACE_STOP_TIME -1ull -#endif - -static uint64_t timestamp = 0; -static bool trace_enabled = false; -static uint64_t trace_start_time = TRACE_START_TIME; -static uint64_t trace_stop_time = TRACE_STOP_TIME; - -double sc_time_stamp() { - return timestamp; -} - -bool sim_trace_enabled() { - if (timestamp >= trace_start_time - && timestamp < trace_stop_time) - return true; - return trace_enabled; -} - -void sim_trace_enable(bool enable) { - trace_enabled = enable; -} - -CacheSim::CacheSim() { - // force random values for uninitialized signals - Verilated::randReset(2); - - // create RTL module instance - cache_ = new VVX_cache_top(); - -#ifdef VCD_OUTPUT - Verilated::traceEverOn(true); - tfp_ = new VerilatedVcdC; - cache_->trace(tfp_, 99); - tfp_->open("trace.vcd"); -#endif - - ram_ = nullptr; - mem_rsp_active_ = false; - snp_req_active_ = false; -} - -CacheSim::~CacheSim() { -#ifdef VCD_OUTPUT - tfp_->close(); -#endif - delete cache_; - //need to delete the req and rsp vectors -} - -void CacheSim::attach_ram(RAM* ram) { - ram_ = ram; - mem_rsp_vec_.clear(); -} - -void CacheSim::reset() { -#ifndef NDEBUG - std::cout << timestamp << ": [sim] reset()" << std::endl; -#endif - - cache_->reset = 1; - this->step(); - cache_->reset = 0; - this->step(); - - mem_rsp_vec_.clear(); - //clear req and rsp vecs - -} - -void CacheSim::step() { - //std::cout << timestamp << ": [sim] step()" << std::endl; - //toggle clock - cache_->clk = 0; - this->eval(); - - cache_->clk = 1; - this->eval(); - - //handle core and memory reqs and rsps - this->eval_reqs(); - this->eval_rsps(); - this->eval_mem_bus(); - timestamp++; -} - -void CacheSim::eval() { - cache_->eval(); -#ifdef VCD_OUTPUT - tfp_->dump(timestamp); -#endif - ++timestamp; -} - -void CacheSim::run(){ -//#ifndef NDEBUG - -//#endif - this->step(); - - int valid = 300; - int stalls = 20 + 10; - - while (valid > -1) { - - this->step(); - display_miss(); - if(cache_->core_rsp_valid){ - get_core_rsp(); - } - - if(!cache_->core_req_valid && !cache_->core_rsp_valid){ - valid--; - - } - stalls--; - if (stalls == 20){ - //stall_mem(); - //send_snoop_req(); - stalls--; - } - } -} - -void CacheSim::clear_req(){ - cache_->core_req_valid = 0; -} - -void CacheSim::send_req(core_req_t *req){ - core_req_vec_.push(req); - unsigned int *data = new unsigned int[4]; - core_rsp_vec_.insert(std::pair(req->tag, data)); -} - -bool CacheSim::get_core_req_ready(){ - return cache_->core_req_ready; -} - -bool CacheSim::get_core_rsp_ready(){ - return cache_->core_rsp_ready; -} - -void CacheSim::eval_reqs(){ - //check to see if cache is accepting reqs - if(!core_req_vec_.empty() && cache_->core_req_ready){ - core_req_t *req = core_req_vec_.front(); - - cache_->core_req_valid = req->valid; - cache_->core_req_rw = req->rw; - cache_->core_req_byteen = req->byteen; - - cache_->core_req_addr[0] = req->addr[0]; - cache_->core_req_addr[1] = req->addr[1]; - cache_->core_req_addr[2] = req->addr[2]; - cache_->core_req_addr[3] = req->addr[3]; - - cache_->core_req_data[0] = req->data[0]; - cache_->core_req_data[1] = req->data[1]; - cache_->core_req_data[2] = req->data[2]; - cache_->core_req_data[3] = req->data[3]; - - cache_->core_req_tag = req->tag; - - core_req_vec_.pop(); - - } else { - clear_req(); - } -} - -void CacheSim::eval_rsps(){ - //check to see if a request has been responded to - if (cache_->core_rsp_valid){ - core_rsp_vec_.at(cache_->core_rsp_tag)[0] = cache_->core_rsp_data[0]; - core_rsp_vec_.at(cache_->core_rsp_tag)[1] = cache_->core_rsp_data[1]; - core_rsp_vec_.at(cache_->core_rsp_tag)[2] = cache_->core_rsp_data[2]; - core_rsp_vec_.at(cache_->core_rsp_tag)[3] = cache_->core_rsp_data[3]; - } -} - -void CacheSim::stall_mem(){ - cache_->mem_req_ready = 0; -} - -void CacheSim::send_snoop_req(){ - /*cache_->snp_req_valid = 1; - cache_->snp_req_addr = 0x12222222; - cache_->snp_req_invalidate = 1; - cache_->snp_req_tag = 0xff; */ -} - -void CacheSim::eval_mem_bus() { - if (ram_ == nullptr) { - cache_->mem_req_ready = 0; - return; - } - - // schedule memory responses - int dequeue_index = -1; - for (int i = 0; i < mem_rsp_vec_.size(); i++) { - if (mem_rsp_vec_[i].cycles_left > 0) { - mem_rsp_vec_[i].cycles_left -= 1; - } - if ((dequeue_index == -1) - && (mem_rsp_vec_[i].cycles_left == 0)) { - dequeue_index = i; - } - } - - // send memory response - if (mem_rsp_active_ - && cache_->mem_rsp_valid - && cache_->mem_rsp_ready) { - mem_rsp_active_ = false; - } - if (!mem_rsp_active_) { - if (dequeue_index != -1) { //time to respond to the request - cache_->mem_rsp_valid = 1; - - //copy data from the rsp queue to the cache module - memcpy(cache_->mem_rsp_data.data(), mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE); - - cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag; - free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue - mem_rsp_vec_.erase(mem_rsp_vec_.begin() + dequeue_index); - mem_rsp_active_ = true; - } else { - cache_->mem_rsp_valid = 0; - } - } - - // handle memory stalls - bool mem_stalled = false; -#ifdef ENABLE_MEM_STALLS - if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { - mem_stalled = true; - } else - if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) { - mem_stalled = true; - } -#endif - - // process memory requests - if (!mem_stalled) { - if (cache_->mem_req_valid) { - if (cache_->mem_req_rw) { //write = 1 - uint64_t byteen = cache_->mem_req_byteen; - uint64_t base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE); - uint8_t* data = reinterpret_cast(cache_->mem_req_data.data()); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - } else { - mem_req_t mem_req; - mem_req.cycles_left = MEM_LATENCY; - mem_req.data = (uint8_t*)malloc(MEM_BLOCK_SIZE); - mem_req.tag = cache_->mem_req_tag; - ram_->read(cache_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data); - mem_rsp_vec_.push_back(mem_req); - } - } - } - - cache_->mem_req_ready = ~mem_stalled; -} - -bool CacheSim::assert_equal(unsigned int* data, unsigned int tag){ - int check = 0; - unsigned int *rsp = core_rsp_vec_.at(tag); - for (int i = 0; i < 4; ++i){ - for (int j = 0; j < 4; ++j){ - if (data[i] == rsp[j]){ - check++; - } - } - } - - return check; - -} - -//DEBUG - -void CacheSim::display_miss(){ - //int i = (unsigned int)cache_->miss_vec; - //std::bitset<8> x(i); - //if (i) std::cout << "Miss Vec " << x << std::endl; - //std::cout << "Miss Vec 0" << cache_->miss_vec[0] << std::endl; -} - -void CacheSim::get_core_req(unsigned int (&rsp)[4]){ - rsp[0] = cache_->core_rsp_data[0]; - rsp[1] = cache_->core_rsp_data[1]; - rsp[2] = cache_->core_rsp_data[2]; - rsp[3] = cache_->core_rsp_data[3]; - - //std::cout << std::hex << "core_rsp_valid: " << cache_->core_rsp_valid << std::endl; - //std::cout << std::hex << "core_rsp_data: " << cache_->core_rsp_data << std::endl; - //std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; -} - -void CacheSim::get_core_rsp(){ - //std::cout << cache_->genblk5_BRA_0_KET_->bank->is_fill_in_pipe<< std::endl; - char check = cache_->core_rsp_valid; - std::cout << std::hex << "core_rsp_valid: " << (unsigned int) check << std::endl; - std::cout << std::hex << "core_rsp_data[0]: " << cache_->core_rsp_data[0] << std::endl; - std::cout << std::hex << "core_rsp_data[1]: " << cache_->core_rsp_data[1] << std::endl; - std::cout << std::hex << "core_rsp_data[2]: " << cache_->core_rsp_data[2] << std::endl; - std::cout << std::hex << "core_rsp_data[3]: " << cache_->core_rsp_data[3] << std::endl; - std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; -} - -void CacheSim::get_mem_req(){ - std::cout << std::hex << "mem_req_valid: " << cache_->mem_req_valid << std::endl; - std::cout << std::hex << "mem_req_rw: " << cache_->mem_req_rw << std::endl; - std::cout << std::hex << "mem_req_byteen: " << cache_->mem_req_byteen << std::endl; - std::cout << std::hex << "mem_req_addr: " << cache_->mem_req_addr << std::endl; - std::cout << std::hex << "mem_req_data: " << cache_->mem_req_data << std::endl; - std::cout << std::hex << "mem_req_tag: " << cache_->mem_req_tag << std::endl; -} - -void CacheSim::get_mem_rsp(){ - std::cout << std::hex << "mem_rsp_valid: " << cache_->mem_rsp_valid << std::endl; - std::cout << std::hex << "mem_rsp_data: " << cache_->mem_rsp_data << std::endl; - std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl; - std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl; -} diff --git a/hw/unittest/cache/cachesim.h b/hw/unittest/cache/cachesim.h deleted file mode 100644 index 5235735d6c..0000000000 --- a/hw/unittest/cache/cachesim.h +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "VVX_cache_top.h" -#include "VVX_cache_top__Syms.h" -#include "verilated.h" - -#ifdef VCD_OUTPUT -#include -#endif - -#include -#include "ram.h" -#include -#include -#include - -#define ENABLE_MEM_STALLS -#define MEM_LATENCY 100 -#define MEM_RQ_SIZE 16 -#define MEM_STALLS_MODULO 16 - -typedef struct { - int cycles_left; - uint8_t *data; - unsigned tag; -} mem_req_t; - -typedef struct { - char valid; - char rw; - unsigned byteen; - unsigned *addr; - unsigned *data; - unsigned int tag; -} core_req_t; - -class CacheSim { -public: - - CacheSim(); - virtual ~CacheSim(); - - bool busy(); - - void reset(); - void step(); - void wait(uint32_t cycles); - void attach_ram(RAM* ram); - void run(); //run until all reqs are empty - - //req/rsp - void send_req(core_req_t *req); - void clear_req(); - void stall_mem(); - void send_snoop_req(); - void send_snp_fwd_in(); - - //assert funcs - bool assert_equal(unsigned int* data, unsigned int tag); - - //debug funcs - void get_mem_req(); - void get_core_req(unsigned int (&rsp)[4]); - void get_core_rsp(); - bool get_core_req_ready(); - bool get_core_rsp_ready(); - void get_mem_rsp(); - void display_miss(); - -private: - - void eval(); - void eval_reqs(); - void eval_rsps(); - void eval_mem_bus(); - - std::queue core_req_vec_; - std::vector mem_rsp_vec_; - std::map core_rsp_vec_; - int mem_rsp_active_; - - uint32_t snp_req_active_; - uint32_t snp_req_size_; - uint32_t pending_snp_reqs_; - - VVX_cache_top* cache_; - RAM* ram_; -#ifdef VCD_OUTPUT - VerilatedVcdC* tfp_; -#endif -}; diff --git a/hw/unittest/cache/ram.h b/hw/unittest/cache/ram.h deleted file mode 100644 index d01934a520..0000000000 --- a/hw/unittest/cache/ram.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -class RAM { -private: - - mutable uint8_t *mem_[(1 << 12)]; - - uint8_t *get(uint32_t address) const { - uint32_t block_addr = address >> 20; - uint32_t block_offset = address & 0x000FFFFF; - if (mem_[block_addr] == NULL) { - mem_[block_addr] = new uint8_t[(1 << 20)]; - } - return mem_[block_addr] + block_offset; - } - -public: - - RAM() { - for (uint32_t i = 0; i < (1 << 12); i++) { - mem_[i] = NULL; - } - } - - ~RAM() { - this->clear(); - } - - size_t size() const { - return (1ull << 32); - } - - void clear() { - for (uint32_t i = 0; i < (1 << 12); i++) { - if (mem_[i]) { - delete [] mem_[i]; - mem_[i] = NULL; - } - } - } - - void read(uint32_t address, uint32_t length, uint8_t *data) const { - for (unsigned i = 0; i < length; i++) { - data[i] = *this->get(address + i); - } - } - - void write(uint32_t address, uint32_t length, const uint8_t *data) { - for (unsigned i = 0; i < length; i++) { - *this->get(address + i) = data[i]; - } - } - - uint8_t& operator[](uint32_t address) { - return *get(address); - } - - const uint8_t& operator[](uint32_t address) const { - return *get(address); - } -}; \ No newline at end of file diff --git a/hw/unittest/cache/testbench.cpp b/hw/unittest/cache/testbench.cpp deleted file mode 100644 index bf9dfb3404..0000000000 --- a/hw/unittest/cache/testbench.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "cachesim.h" -#include -#include -#include - -#define VCD_OUTPUT 1 - - -int REQ_RSP(CacheSim *sim){ //verified - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read); - - sim->run(); - - int check = sim->assert_equal(data, write->tag); - - if (check == 4) return 1; - - return 0; -} - -int HIT_1(CacheSim *sim){ - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0x11; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0x22; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} - -int MISS_1(CacheSim *sim){ - unsigned int addr1[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int addr2[4] = {0x12229222, 0xabbbb4bb, 0xcddd47dd, 0xe4423544}; - unsigned int addr3[4] = {0x12223332, 0xabb454bb, 0xcdddeefd, 0xe4447744}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr1; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read1 = new core_req_t; - read1->valid = 0xf; - read1->rw = 0; - read1->byteen = 0xffff; - read1->addr = addr1; - read1->data = data; - read1->tag = 0xff; - - core_req_t* read2 = new core_req_t; - read2->valid = 0xf; - read2->rw = 0; - read2->byteen = 0xffff; - read2->addr = addr2; - read2->data = data; - read2->tag = 0xff; - - core_req_t* read3 = new core_req_t; - read3->valid = 0xf; - read3->rw = 0; - read3->byteen = 0xffff; - read3->addr = addr3; - read3->data = data; - read3->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read1); - sim->send_req(read2); - sim->send_req(read3); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} -int FLUSH(CacheSim *sim){ - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} - - -int BACK_PRESSURE(CacheSim *sim){ - //happens whenever the core is stalled or memory is stalled - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - for (int i = 0; i < 10; i++){ - sim->send_req(write); - } - sim->send_req(read); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} - - -int main(int argc, char **argv) -{ - //init - RAM ram; - CacheSim cachesim; - cachesim.attach_ram(&ram); - int check = REQ_RSP(&cachesim); - if(check){ - std::cout << "PASSED" << std::endl; - } else { - std::cout << "FAILED" << std::endl; - } - - return 0; -} From 8230b37411dfe28fe1b59a25a5de4c7de276cf90 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 14 Nov 2024 11:42:21 -0800 Subject: [PATCH 331/407] fixed opae build bug --- hw/rtl/afu/opae/vortex_afu.sv | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index f21f851c02..fc4301de73 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -46,7 +46,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; - localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH)); + localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); + + localparam LMEM_BYTE_ADDR_WIDTH = LMEM_ADDR_WIDTH + $clog2(LMEM_DATA_SIZE); + localparam CCI_VX_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH)); + localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt); localparam CCI_DATA_WIDTH = $bits(t_ccip_clData); @@ -103,8 +107,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [127:0] afu_id = `AFU_ACCEL_UUID; wire [63:0] dev_caps = {8'b0, - 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), - 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), + 5'(LMEM_BYTE_ADDR_WIDTH-20), + 3'(`CLOG2(NUM_LOCAL_MEM_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -480,7 +484,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_mem_bus_if #( .DATA_SIZE (LMEM_DATA_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .TAG_WIDTH (AVS_REQ_TAGW) ) cci_vx_mem_bus_if[2](); @@ -488,7 +492,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .SRC_DATA_WIDTH (CCI_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), .SRC_ADDR_WIDTH (CCI_ADDR_WIDTH), - .DST_ADDR_WIDTH (LMEM_ADDR_WIDTH), + .DST_ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .SRC_TAG_WIDTH (CCI_ADDR_WIDTH), .DST_TAG_WIDTH (AVS_REQ_TAGW), .REQ_OUT_BUF (0), @@ -538,7 +542,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), .SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), - .DST_ADDR_WIDTH (LMEM_ADDR_WIDTH), + .DST_ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH), .DST_TAG_WIDTH (AVS_REQ_TAGW), .REQ_OUT_BUF (0), @@ -579,14 +583,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ //-- VX_mem_bus_if #( .DATA_SIZE (LMEM_DATA_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .TAG_WIDTH (AVS_REQ_TAGW+1) ) mem_bus_if[1](); VX_mem_arb #( .NUM_INPUTS (2), .DATA_SIZE (LMEM_DATA_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .TAG_WIDTH (AVS_REQ_TAGW), .ARBITER ("P"), // prioritize VX requests .REQ_OUT_BUF (0), @@ -602,8 +606,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_avs_adapter #( .DATA_WIDTH (LMEM_DATA_WIDTH), - .ADDR_WIDTH_IN (LMEM_ADDR_WIDTH), - .ADDR_WIDTH_OUT($bits(t_local_mem_addr)), + .ADDR_WIDTH_IN (CCI_VX_ADDR_WIDTH), + .ADDR_WIDTH_OUT(LMEM_ADDR_WIDTH), .BURST_WIDTH (LMEM_BURST_CTRW), .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .TAG_WIDTH (AVS_REQ_TAGW + 1), From b48b605b51eaddac879d4642021ccbe1de7656a5 Mon Sep 17 00:00:00 2001 From: tinebp Date: Fri, 15 Nov 2024 03:42:06 -0800 Subject: [PATCH 332/407] remove deprecared yosys link --- hw/syn/yosys/synth.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index b44f16e6b7..76559b8d3c 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -28,7 +28,7 @@ dir_list=() inc_args="" macro_args="" no_warnings=1 -process="elaborate,netlist,techmap,verilog,link" +process="elaborate,netlist,techmap,verilog" declare -a excluded_warnings=("Resizing cell port") @@ -135,11 +135,6 @@ done echo "synth -top $top_level" fi - # link design - if echo "$process" | grep -q "link"; then - echo "link_design -top $top_level" - fi - # convert to netlist if echo "$process" | grep -q "netlist"; then echo "proc; opt" From 320c090613ab4a17be410e3c1860cf689c0b3da5 Mon Sep 17 00:00:00 2001 From: tinebp Date: Tue, 19 Nov 2024 01:57:33 -0800 Subject: [PATCH 333/407] xilinx asynchronous bram patch fixes --- hw/rtl/VX_platform.vh | 3 + hw/rtl/libs/VX_async_ram_patch.sv | 236 +++++++++++++------ hw/rtl/libs/VX_dp_ram.sv | 64 +++--- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- hw/rtl/libs/VX_sp_ram.sv | 124 +++++----- hw/scripts/xilinx_async_bram_patch.tcl | 301 +++++++++++++++++-------- hw/scripts/xilinx_export_netlist.tcl | 13 ++ hw/syn/xilinx/README | 3 + hw/syn/xilinx/xrt/Makefile | 1 + 9 files changed, 490 insertions(+), 257 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index d874b9b2b4..08a2f6ca5b 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -163,6 +163,7 @@ endgenerate `define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) +`define RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams on" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) `define BLACKBOX_CELL (* black_box *) @@ -173,6 +174,7 @@ endgenerate `define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) +`define RW_RAM_CHECK (* rw_addr_collision = "yes" *) `define DISABLE_BRAM (* ram_style = "registers" *) `define PRESERVE_NET (* keep = "true" *) `define BLACKBOX_CELL (* black_box *) @@ -183,6 +185,7 @@ endgenerate `define USE_BLOCK_BRAM `define USE_FAST_BRAM `define NO_RW_RAM_CHECK +`define RW_RAM_CHECK `define DISABLE_BRAM `define PRESERVE_NET `define BLACKBOX_CELL diff --git a/hw/rtl/libs/VX_async_ram_patch.sv b/hw/rtl/libs/VX_async_ram_patch.sv index fd29e881d9..43e8139e65 100644 --- a/hw/rtl/libs/VX_async_ram_patch.sv +++ b/hw/rtl/libs/VX_async_ram_patch.sv @@ -13,12 +13,6 @@ `include "VX_platform.vh" -`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end - `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ if (INIT_FILE != "") begin : g_file \ @@ -32,14 +26,93 @@ end \ end -`define RAM_BYPASS(__d) \ - reg [DATAW-1:0] bypass_data_r; \ - reg bypass_valid_r; \ +`define SYNC_RAM_WF_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [ADDRW-1:0] raddr_r; \ + always @(posedge clk) begin \ + if (__re || __we) begin \ + if (__we) begin \ + ram[__wa] <= wdata; \ + end \ + raddr_r <= __ra; \ + end \ + end \ + assign __d = ram[raddr_r] + +`define SYNC_RAM_WF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [ADDRW-1:0] raddr_r; \ always @(posedge clk) begin \ - bypass_valid_r <= read_s && write && (raddr_s == waddr); \ - bypass_data_r <= wdata; \ + if (__re || __we) begin \ + if (__we) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + raddr_r <= __ra; \ + end \ end \ - assign __d = bypass_valid_r ? bypass_data_r : rdata_r + assign __d = ram[raddr_r] + +`define SYNC_RAM_RF_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [DATAW-1:0] rdata_r; \ + always @(posedge clk) begin \ + if (__re || __we) begin \ + if (__we) begin \ + ram[__wa] <= wdata; \ + end \ + rdata_r <= ram[__ra]; \ + end \ + end \ + assign __d = rdata_r + +`define SYNC_RAM_RF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [DATAW-1:0] rdata_r; \ + always @(posedge clk) begin \ + if (__re || __we) begin \ + if (__we) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + rdata_r <= ram[__ra]; \ + end \ + end \ + assign __d = rdata_r + +`define ASYNC_RAM_BLOCK(__d, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we) begin \ + ram[__wa] <= wdata; \ + end \ + end \ + assign __d = ram[__ra] + +`define ASYNC_RAM_BLOCK_WREN(__d, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + end \ + assign __d = ram[__ra] `TRACING_OFF module VX_async_ram_patch #( @@ -47,6 +120,8 @@ module VX_async_ram_patch #( parameter SIZE = 1, parameter WRENW = 1, parameter DUAL_PORT = 0, + parameter FORCE_BRAM = 0, + parameter WRITE_FIRST = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -79,77 +154,102 @@ module VX_async_ram_patch #( .out ({raddr_s, read_s, is_raddr_reg}) ); - // synchroneous ram - - wire [DATAW-1:0] rdata_s; + wire [DATAW-1:0] rdata_s, rdata_a; - if (WRENW != 1) begin : g_wren_sync_ram - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - reg [DATAW-1:0] rdata_r; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (read_s || write) begin - if (write) begin - `RAM_WRITE_WREN + if (1) begin : g_sync_ram + if (WRENW != 1) begin : g_wren + if (FORCE_BRAM) begin : g_bram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end + end else begin : g_lutram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES + `SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES + `SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES end - rdata_r <= ram[raddr_s]; end - end - `RAM_BYPASS(rdata_s); - end else begin : g_no_wren_sync_ram - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - reg [DATAW-1:0] rdata_r; - `RAM_INITIALIZATION - `UNUSED_VAR (wren) - always @(posedge clk) begin - if (read_s || write) begin - if (write) begin - ram[waddr] <= wdata; + end else begin : g_no_wren + if (FORCE_BRAM) begin : g_bram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end + end else begin : g_lutram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES + `SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES + `SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES end - rdata_r <= ram[raddr_s]; end end - `RAM_BYPASS(rdata_s); end - // asynchronous ram (fallback) - - wire [DATAW-1:0] rdata_a; - - if (DUAL_PORT != 0) begin : g_dp_async_ram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - if (WRENW != 1) begin : g_wren - always @(posedge clk) begin - if (write) begin - `RAM_WRITE_WREN + if (1) begin : g_async_ram + if (DUAL_PORT != 0) begin : g_dp + if (WRENW != 1) begin : g_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES end - end - end else begin : g_no_wren - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; + end else begin : g_no_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES end end - end - assign rdata_a = ram[raddr]; - end else begin : g_sp_async_ram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - if (WRENW != 1) begin : g_wren - always @(posedge clk) begin - if (write) begin - `RAM_WRITE_WREN + end else begin : g_sp + if (WRENW != 1) begin : g_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES end - end - end else begin : g_no_wren - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; + end else begin : g_no_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES end end end - assign rdata_a = ram[waddr]; end assign rdata = is_raddr_reg ? rdata_s : rdata_a; diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 0cff67882f..2cb88efe57 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -80,7 +80,7 @@ module VX_dp_ram #( if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -93,7 +93,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr_r]; end else begin : g_no_wren - (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -166,7 +166,7 @@ module VX_dp_ram #( end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - (* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN + `RW_RAM_CHECK `RAM_ARRAY_WREN `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -179,7 +179,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr_r]; end else begin : g_no_wren - (* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -220,7 +220,7 @@ module VX_dp_ram #( end assign rdata = rdata_r; end - end else begin + end else begin : g_undefined if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN `RAM_INITIALIZATION @@ -253,30 +253,32 @@ module VX_dp_ram #( end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (1), + .FORCE_BRAM (FORCE_BRAM), + .WRITE_FIRST(RDW_MODE == "W"), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (waddr), + .wdata (wdata), + .raddr (raddr), + .rdata (rdata) + ); + `else if (RDW_MODE == "W") begin : g_write_first - `ifdef VIVADO - VX_async_ram_patch #( - .DATAW (DATAW), - .SIZE (SIZE), - .WRENW (WRENW), - .DUAL_PORT (1), - .INIT_ENABLE(INIT_ENABLE), - .INIT_FILE (INIT_FILE), - .INIT_VALUE (INIT_VALUE) - ) async_ram_patch ( - .clk (clk), - .reset (reset), - .read (read), - .write (write), - .wren (wren), - .waddr (waddr), - .wdata (wdata), - .raddr (raddr), - .rdata (rdata) - ); - `else if (WRENW != 1) begin : g_wren - `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -285,7 +287,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end else begin : g_no_wren - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -294,7 +296,6 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end - `endif end else begin : g_read_first if (WRENW != 1) begin : g_wren `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN @@ -316,10 +317,11 @@ module VX_dp_ram #( assign rdata = ram[raddr]; end end + `endif end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - `RAM_ARRAY_WREN + `RW_RAM_CHECK `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -328,7 +330,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end else begin : g_no_wren - reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 1d3b479bf0..c86da584ae 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -485,7 +485,7 @@ module VX_rr_arbiter #( .D (NUM_REQS) ) grant_decoder ( .sel_in (grant_index), - .data_in (1'b1), + .data_in (grant_valid), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 88b922384c..3c673e462c 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -77,20 +77,20 @@ module VX_sp_ram #( localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); if (OUT_REG) begin : g_sync if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "R") begin : g_read_first + if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; + reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin if (write) begin `RAM_WRITE_WREN end - rdata_r <= ram[addr]; + addr_r <= addr; end end - assign rdata = rdata_r; + assign rdata = ram[addr_r]; end else begin : g_no_wren `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -99,26 +99,28 @@ module VX_sp_ram #( if (read || write) begin if (write) begin ram[addr] <= wdata; + rdata_r <= wdata; + end else begin + rdata_r <= ram[addr]; end - rdata_r <= ram[addr]; end end assign rdata = rdata_r; end - end else if (RDW_MODE == "W") begin : g_write_first + end else if (RDW_MODE == "R") begin : g_read_first if (WRENW != 1) begin : g_wren `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_r; + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin if (write) begin `RAM_WRITE_WREN end - addr_r <= addr; + rdata_r <= ram[addr]; end end - assign rdata = ram[addr_r]; + assign rdata = rdata_r; end else begin : g_no_wren `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -127,10 +129,8 @@ module VX_sp_ram #( if (read || write) begin if (write) begin ram[addr] <= wdata; - rdata_r <= wdata; - end else begin - rdata_r <= ram[addr]; end + rdata_r <= ram[addr]; end end assign rdata = rdata_r; @@ -165,7 +165,7 @@ module VX_sp_ram #( end assign rdata = rdata_r; end - end else if (RDW_MODE == "U") begin : g_unknown + end else if (RDW_MODE == "U") begin : g_undefined if (WRENW != 1) begin : g_wren `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION @@ -195,20 +195,20 @@ module VX_sp_ram #( end end end else begin : g_auto - if (RDW_MODE == "R") begin : g_read_first + if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; + reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin if (write) begin `RAM_WRITE_WREN end - rdata_r <= ram[addr]; + addr_r <= addr; end end - assign rdata = rdata_r; + assign rdata = ram[addr_r]; end else begin : g_no_wren reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -217,26 +217,28 @@ module VX_sp_ram #( if (read || write) begin if (write) begin ram[addr] <= wdata; + rdata_r <= wdata; + end else begin + rdata_r <= ram[addr]; end - rdata_r <= ram[addr]; end end assign rdata = rdata_r; end - end else if (RDW_MODE == "W") begin : g_write_first + end else if (RDW_MODE == "R") begin : g_read_first if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_r; + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin if (write) begin `RAM_WRITE_WREN end - addr_r <= addr; + rdata_r <= ram[addr]; end end - assign rdata = ram[addr_r]; + assign rdata = rdata_r; end else begin : g_no_wren reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -245,10 +247,8 @@ module VX_sp_ram #( if (read || write) begin if (write) begin ram[addr] <= wdata; - rdata_r <= wdata; - end else begin - rdata_r <= ram[addr]; end + rdata_r <= ram[addr]; end end assign rdata = rdata_r; @@ -283,7 +283,7 @@ module VX_sp_ram #( end assign rdata = rdata_r; end - end else if (RDW_MODE == "U") begin : g_unknown + end else if (RDW_MODE == "U") begin : g_undefined if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN `RAM_INITIALIZATION @@ -316,30 +316,32 @@ module VX_sp_ram #( end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (0), + .FORCE_BRAM (FORCE_BRAM), + .WRITE_FIRST(RDW_MODE == "W"), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (addr), + .wdata (wdata), + .raddr (addr), + .rdata (rdata) + ); + `else if (RDW_MODE == "W") begin : g_write_first - `ifdef VIVADO - VX_async_ram_patch #( - .DATAW (DATAW), - .SIZE (SIZE), - .WRENW (WRENW), - .DUAL_PORT (0), - .INIT_ENABLE(INIT_ENABLE), - .INIT_FILE (INIT_FILE), - .INIT_VALUE (INIT_VALUE) - ) async_ram_patch ( - .clk (clk), - .reset (reset), - .read (read), - .write (write), - .wren (wren), - .waddr (addr), - .wdata (wdata), - .raddr (addr), - .rdata (rdata) - ); - `else if (WRENW != 1) begin : g_wren - `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -348,7 +350,7 @@ module VX_sp_ram #( end assign rdata = ram[addr]; end else begin : g_no_wren - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -357,7 +359,6 @@ module VX_sp_ram #( end assign rdata = ram[addr]; end - `endif end else begin : g_read_first if (WRENW != 1) begin : g_wren `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN @@ -379,10 +380,11 @@ module VX_sp_ram #( assign rdata = ram[addr]; end end + `endif end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - `RAM_ARRAY_WREN + `RW_RAM_CHECK `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -391,7 +393,7 @@ module VX_sp_ram #( end assign rdata = ram[addr]; end else begin : g_no_wren - reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -443,22 +445,22 @@ module VX_sp_ram #( end if (OUT_REG) begin : g_sync - if (RDW_MODE == "R") begin : g_read_first - reg [DATAW-1:0] rdata_r; + if (RDW_MODE == "W") begin : g_write_first + reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin - rdata_r <= ram[addr]; + addr_r <= addr; end end - assign rdata = rdata_r; - end else if (RDW_MODE == "W") begin : g_write_first - reg [ADDRW-1:0] addr_r; + assign rdata = ram[addr_r]; + end else if (RDW_MODE == "R") begin : g_read_first + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin - addr_r <= addr; + rdata_r <= ram[addr]; end end - assign rdata = ram[addr_r]; + assign rdata = rdata_r; end else if (RDW_MODE == "N") begin : g_no_change reg [DATAW-1:0] rdata_r; always @(posedge clk) begin diff --git a/hw/scripts/xilinx_async_bram_patch.tcl b/hw/scripts/xilinx_async_bram_patch.tcl index 5af7ba9533..f0a49ecd6e 100644 --- a/hw/scripts/xilinx_async_bram_patch.tcl +++ b/hw/scripts/xilinx_async_bram_patch.tcl @@ -1,3 +1,16 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + namespace eval vortex { variable debug 0 @@ -17,6 +30,25 @@ proc str_replace {str match repl} { return $result } +proc regex_escape {str} { + return [string map { + \\ \\\\ + ^ \\^ + . \\. + \[ \\\[ + \] \\\] + \$ \\\$ + \( \\\( + \) \\\) + | \\| + * \\* + + \\+ + ? \\? + \{ \\\{ + \} \\\} + } $str] +} + proc unique_cell_name {name} { if {[get_cells -quiet $name] == {}} { return $name } set index 0 @@ -31,29 +63,58 @@ proc unique_net_name {name} { return ${name}_${index} } -proc find_nested_cells {parent name_match {should_exist 1}} { - set matching_cells {} - foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] { - set name [get_property NAME $cell] - if {[regexp $name_match $name]} { - lappend matching_cells $cell +proc build_parent_child_map {all_cells} { + set parent_child_map {} + foreach cell $all_cells { + set parent [get_property PARENT $cell] + if {$parent ne ""} { + if {[dict exists $parent_child_map $parent]} { + dict lappend parent_child_map $parent $cell + } else { + dict set parent_child_map $parent [list $cell] + } } } - if {[llength $matching_cells] == 0} { - print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist + return $parent_child_map +} + +proc find_cell_descendants_recursive {parent_cell parent_child_map} { + set descendants {} + if {[dict exists $parent_child_map $parent_cell]} { + set children [dict get $parent_child_map $parent_cell] + foreach child $children { + # Add the child to the list + lappend descendants $child + # Recursively add its descendants + set sub_descendants [find_cell_descendants_recursive $child $parent_child_map] + lappend descendants {*}$sub_descendants + } } - return $matching_cells + return $descendants } -proc find_nested_cell {parent name_match} { - foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] { - set name [get_property NAME $cell] - if {$name == $name_match} { - return $cell +proc find_cell_descendants {parent_cell} { + set all_cells [get_cells -hierarchical] + set parent_child_map [build_parent_child_map $all_cells] + return [find_cell_descendants_recursive $parent_cell $parent_child_map] +} + +proc find_nested_cells {parent_cell name_match {should_exist 1}} { + set hier_sep [get_hierarchy_separator] + set matching_cells {} + foreach cell [find_cell_descendants $parent_cell] { + set parent_name [get_property PARENT $cell] + set cell_name [get_property NAME $cell] + set name_prefix [regex_escape "${parent_name}${hier_sep}"] + set pattern "${name_prefix}${name_match}" + if {[regexp $pattern $cell_name]} { + lappend matching_cells $cell } } - puts "ERROR: No matching cell found for '$parent' matching '$name_match'." - exit -1 + if {[llength $matching_cells] == 0} { + print_error "No matching cell found for '$parent_cell' matching '$name_match'." $should_exist + } + return $matching_cells } proc find_cell_nets {cell name_match {should_exist 1}} { @@ -70,22 +131,23 @@ proc find_cell_nets {cell name_match {should_exist 1}} { return $matching_nets } -proc get_cell_net {cell name_match} { - foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] { - set name [get_property NAME $net] - if {$name == $name_match} { - return $net - } +proc get_cell_net {cell name} { + set net [get_nets -hierarchical -filter "PARENT_CELL == $cell && NAME == $name"] + if {[llength $net] == 0} { + puts "ERROR: No matching net found for '$cell' matching '$name'." + exit -1 } - puts "ERROR: No matching net found for '$cell' matching '$name_match'." - exit -1 + return $net; } proc find_cell_pins {cell name_match {should_exist 1}} { + set hier_sep [get_hierarchy_separator] set matching_pins {} foreach pin [get_pins -of_objects $cell] { set name [get_property NAME $pin] - if {[regexp $name_match $name]} { + set name_prefix [regex_escape "${cell}${hier_sep}"] + set pattern "${name_prefix}${name_match}" + if {[regexp $pattern $name]} { lappend matching_pins $pin } } @@ -95,15 +157,31 @@ proc find_cell_pins {cell name_match {should_exist 1}} { return $matching_pins } -proc get_cell_pin {cell name_match} { - foreach pin [get_pins -of_objects $cell] { - set name [get_property NAME $pin] - if {$name == $name_match} { - return $pin - } +proc get_cell_pin {cell name} { + set pin [get_pins -of_objects $cell -filter "NAME == $name"] + if {[llength $pin] == 0} { + puts "ERROR: No matching pin found for '$cell' matching '$name'." + exit -1 } - puts "ERROR: No matching pin found for '$cell' matching '$name_match'." - exit -1 + return $pin +} + +proc remove_cell_from_netlist {cell} { + variable debug + + puts "INFO: Removing cell '$cell' from the netlist." + + # Disconnect all pins of the cell + #foreach pin [get_pins -quiet -of_objects $cell] { + # foreach net [get_nets -quiet -of_objects $pin] { + # disconnect_net -net $net -objects $pin + # if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."} + # } + #} + + # Remove the cell + remove_cell $cell + if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."} } proc replace_pin_source {pin source_pin} { @@ -141,10 +219,42 @@ proc replace_pin_source {pin source_pin} { if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."} } -proc create_register_next {reg_cell prefix_name} { +proc find_net_driver {input_net {should_exist 1}} { + set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}] + if {[llength $driverPins] == 0} { + set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}] + if {[llength $driverPorts] == 0} { + print_error "No driver found for '$input_net'." $should_exist + } elseif {[llength $driverPorts] > 1} { + puts "WARNING: Multiple driver ports found for '$input_net'." + return [lindex $driverPorts 0] + } + return $driverPorts + } elseif {[llength $driverPins] > 1} { + puts "WARNING: Multiple driver pins found for '$input_net'." + return [lindex $driverPins 0] + } + return $driverPins +} + +proc find_pin_driver {input_pin {should_exist 1}} { + set net [get_nets -quiet -of_objects $input_pin] + if {[llength $net] == 0} { + print_error "No net connected to pin '$input_pin'." $should_exist + return "" + } elseif {[llength $net] > 1} { + puts "ERROR: Multiple nets connected to pin '$input_pin'." + exit -1 + } + return [find_net_driver $net] +} + +proc create_register_next {parent reg_cell} { variable debug - set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}] + set hier_sep [get_hierarchy_separator] + + set reg_d_pin [get_pins "${reg_cell}${hier_sep}D"] if {[llength $reg_d_pin] == 0} { puts "ERROR: No D pin found on register cell '$reg_cell'." exit -1 @@ -167,7 +277,7 @@ proc create_register_next {reg_cell prefix_name} { set register_type [get_property REF_NAME $reg_cell] if {$register_type == "FDRE"} { - set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}] + set reg_r_pin [get_pins "${reg_cell}${hier_sep}R"] if {[llength $reg_r_pin] == 0} { puts "ERROR: No R pin found on FDRE cell '$reg_cell'." exit -1 @@ -184,7 +294,7 @@ proc create_register_next {reg_cell prefix_name} { exit -1 } } elseif {$register_type == "FDSE"} { - set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}] + set reg_s_pin [get_pins "${reg_cell}${hier_sep}S"] if {[llength $reg_s_pin] == 0} { puts "ERROR: No S pin found on FDSE cell '$reg_cell'." exit -1 @@ -229,7 +339,7 @@ proc create_register_next {reg_cell prefix_name} { # Use a 2x1 LUT to describe the logic: # FDRE: O = I1 ? 0 : I0; where I0=D, I1=R # FDSE: O = I1 ? 1 : I0; where I0=D, I1=S - set lut_name [unique_cell_name $prefix_name] + set lut_name [unique_cell_name "${parent}${hier_sep}raddr_next"] set lut_cell [create_cell -reference LUT2 $lut_name] puts "INFO: Created lut cell: '$lut_cell'" @@ -242,7 +352,7 @@ proc create_register_next {reg_cell prefix_name} { exit 1 } - set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}] + set lut_i0_pin [get_pins "${lut_cell}${hier_sep}I0"] if {[llength $lut_i0_pin] == 0} { puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'." exit -1 @@ -251,7 +361,7 @@ proc create_register_next {reg_cell prefix_name} { exit -1 } - set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}] + set lut_i1_pin [get_pins "${lut_cell}${hier_sep}I1"] if {[llength $lut_i1_pin] == 0} { puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'." exit -1 @@ -260,7 +370,7 @@ proc create_register_next {reg_cell prefix_name} { exit -1 } - set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}] + set lut_o_pin [get_pins "${lut_cell}${hier_sep}O"] if {[llength $lut_o_pin] == 0} { puts "ERROR: No O pin found on FDSE cell '$lut_cell'." exit -1 @@ -278,19 +388,22 @@ proc create_register_next {reg_cell prefix_name} { return $lut_o_pin } -proc getOrCreateVCCPin {prefix_name} { +proc getOrCreateVCCPin {parent} { variable debug - set vcc_cell "" - set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}] - if {[llength $vcc_cells] == 0} { - set cell_name [unique_cell_name $prefix_name] + set hier_sep [get_hierarchy_separator] + set cell_name "${parent}${hier_sep}VCC" + + set vcc_cell [get_cells -quiet $cell_name] + if {[llength $vcc_cell] == 0} { set vcc_cell [create_cell -reference VCC $cell_name] puts "INFO: Created VCC cell: '$vcc_cell'" - } else { - set vcc_cell [lindex $vcc_cells 0] + } elseif {[llength $vcc_cell] > 1} { + puts "ERROR: Multiple VCC cells found with name '$cell_name'." + exit -1 } - set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}] + + set vcc_pin [get_pins "${vcc_cell}${hier_sep}P"] if {[llength $vcc_pin] == 0} { puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'." exit -1 @@ -298,22 +411,26 @@ proc getOrCreateVCCPin {prefix_name} { puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'." exit -1 } + return $vcc_pin } -proc getOrCreateGNDPin {prefix_name} { +proc getOrCreateGNDPin {parent} { variable debug - set gnd_cell "" - set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}] - if {[llength $gnd_cells] == 0} { - set cell_name [unique_cell_name $prefix_name] + set hier_sep [get_hierarchy_separator] + set cell_name "${parent}${hier_sep}GND" + + set gnd_cell [get_cells -quiet $cell_name] + if {[llength $gnd_cell] == 0} { set gnd_cell [create_cell -reference GND $cell_name] puts "INFO: Created GND cell: '$gnd_cell'" - } else { - set gnd_cell [lindex $gnd_cells 0] + } elseif {[llength $gnd_cell] > 1} { + puts "ERROR: Multiple GND cells found with name '$cell_name'." + exit -1 } - set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}] + + set gnd_pin [get_pins "${gnd_cell}${hier_sep}G"] if {[llength $gnd_pin] == 0} { puts "ERROR: No GND pin found on GND cell '$gnd_cell'." exit -1 @@ -321,6 +438,7 @@ proc getOrCreateGNDPin {prefix_name} { puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'." exit -1 } + return $gnd_pin } @@ -338,35 +456,6 @@ proc find_net_sinks {input_net {should_exist 1}} { return $sink_pins } -proc find_net_driver {input_net {should_exist 1}} { - set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}] - if {[llength $driverPins] == 0} { - set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}] - if {[llength $driverPorts] == 0} { - print_error "No driver found for '$input_net'." $should_exist - } elseif {[llength $driverPorts] > 1} { - puts "WARNING: Multiple driver ports found for '$input_net'." - return [lindex $driverPorts 0] - } - return $driverPorts - } elseif {[llength $driverPins] > 1} { - puts "WARNING: Multiple driver pins found for '$input_net'." - return [lindex $driverPins 0] - } - return $driverPins -} - -proc find_pin_driver {input_pin {should_exist 1}} { - set net [get_nets -quiet -of_objects $input_pin] - if {[llength $net] == 0} { - print_error "No net connected to pin '$input_pin'." $should_exist - } elseif {[llength $net] > 1} { - puts "ERROR: Multiple nets connected to pin '$input_pin'." - exit -1 - } - return [find_net_driver $net] -} - proc find_matching_nets {cell nets match repl} { set matching_nets {} foreach net $nets { @@ -386,6 +475,25 @@ proc find_matching_nets {cell nets match repl} { return $matching_nets } +proc find_matching_pins {cell pins match repl} { + set matching_pins {} + foreach pin $pins { + set pin_name [str_replace $pin $match $repl] + set matching_pin [get_cell_pin $cell $pin_name] + if {$matching_pin != ""} { + lappend matching_pins $matching_pin + } + } + if {[llength $matching_pins] == 0} { + puts "ERROR: No matching pins found for '$pins'." + exit -1 + } elseif {[llength $matching_pins] != [llength $pins]} { + puts "ERROR: Mismatch in number of matching pins." + exit -1 + } + return $matching_pins +} + proc replace_net_source {net source_pin} { foreach pin [find_net_sinks $net 0] { replace_pin_source $pin $source_pin @@ -397,6 +505,8 @@ proc resolve_async_bram {inst} { puts "INFO: Resolving asynchronous BRAM patch: '$inst'." + set hier_sep [get_hierarchy_separator] + set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"] set read_s_net [find_cell_nets $inst "read_s$"] set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"] @@ -433,7 +543,7 @@ proc resolve_async_bram {inst} { } # Create register next cell and return output pin - set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"] + set reg_next_pin [create_register_next $inst $raddr_src_cell] if {$reg_next_pin == ""} { puts "ERROR: failed to create register next value for '$raddr_src_cell'." exit -1 @@ -444,7 +554,7 @@ proc resolve_async_bram {inst} { # Find the CE pin on raddr_src_cell if {$reg_ce_src_pin == ""} { - set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}] + set reg_ce_pin [get_pins "${raddr_src_cell}${hier_sep}CE"] if {[llength $reg_ce_pin] == 0} { puts "ERROR: No CE pin found on register cell '$raddr_src_cell'." exit -1 @@ -466,9 +576,10 @@ proc resolve_async_bram {inst} { # do we have a fully registered read address? if {[llength $reg_next_pins] == [llength $raddr_w_nets]} { puts "INFO: Fully registered read address detected." + + # Connect all reg_next_pins to all input pins attached to raddr_s_nets set addr_width [llength $raddr_w_nets] for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} { - set raddr_w_net [lindex $raddr_w_nets $addr_idx] set raddr_s_net [lindex $raddr_s_nets $addr_idx] set reg_next_pin [lindex $reg_next_pins $addr_idx] puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins." @@ -481,7 +592,7 @@ proc resolve_async_bram {inst} { replace_net_source $read_s_net $reg_ce_src_pin # Create Const<1>'s pin - set vcc_pin [getOrCreateVCCPin "$inst/VCC"] + set vcc_pin [getOrCreateVCCPin $inst] # Connect vcc_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins." @@ -490,18 +601,16 @@ proc resolve_async_bram {inst} { puts "WARNING: Not all read addresses are registered!" # Create Const<0>'s pin - set gnd_pin [getOrCreateGNDPin "$inst/GND"] + set gnd_pin [getOrCreateGNDPin $inst] # Connect gnd_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins." replace_net_source $is_raddr_reg_net $gnd_pin } - # Remove all placeholder cells - foreach cell [find_nested_cells $inst "placeholder$"] { - remove_cell $cell - if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."} - } + # Remove placeholder cell + set placeholder [get_cells "${inst}${hier_sep}placeholder"] + remove_cell_from_netlist $placeholder } proc resolve_async_brams {} { diff --git a/hw/scripts/xilinx_export_netlist.tcl b/hw/scripts/xilinx_export_netlist.tcl index 25a0d17e84..a6ff22ff5d 100644 --- a/hw/scripts/xilinx_export_netlist.tcl +++ b/hw/scripts/xilinx_export_netlist.tcl @@ -1,3 +1,16 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Function to export netlist to a Graphviz DOT file proc export_netlist {dot_file_name} { # Open the DOT file for writing diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index 0fb83e71b8..a1ca231fea 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -47,6 +47,9 @@ TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope # analyze build report vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary +# resuming build for routing +TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.route_design" make > build.log 2>&1 & + # running test FPGA_BIN_DIR= TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo FPGA_BIN_DIR= TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 643724069d..288031e2ec 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -180,6 +180,7 @@ ifeq ($(TARGET), hw) cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_utilization_placed.rpt $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin endif From b0c48e7a46dbd5169c500c4e51f6949587184c67 Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 20 Nov 2024 18:27:52 -0800 Subject: [PATCH 334/407] stream buffer area optimization --- hw/rtl/libs/VX_stream_buffer.sv | 39 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 4b77df83de..2cf08c0f47 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -86,38 +86,47 @@ module VX_stream_buffer #( end else begin : g_no_out_reg - reg [1:0][DATAW-1:0] shift_reg; - reg [1:0] fifo_state, fifo_state_n; + reg [DATAW-1:0] data_out_r, buffer; + reg valid_in_r, valid_out_r; wire fire_in = valid_in && ready_in; wire fire_out = valid_out && ready_out; - always @(*) begin - case ({fire_in, fire_out}) - 2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 - 2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 - default: fifo_state_n = fifo_state; - endcase + always @(posedge clk) begin + if (reset) begin + valid_in_r <= 1'b1; + end else begin + if (fire_in ^ fire_out) begin + valid_in_r <= valid_out_r ^ fire_in; + end + end end always @(posedge clk) begin if (reset) begin - fifo_state <= 2'b00; + valid_out_r <= 1'b0; end else begin - fifo_state <= fifo_state_n; + if (fire_in ^ fire_out) begin + valid_out_r <= valid_in_r ^ fire_out; + end end end always @(posedge clk) begin if (fire_in) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; + data_out_r <= data_in; end end - assign ready_in = ~fifo_state[1]; - assign valid_out = fifo_state[0]; - assign data_out = shift_reg[fifo_state[1]]; + always @(posedge clk) begin + if (fire_in) begin + buffer <= data_out_r; + end + end + + assign ready_in = valid_in_r; + assign valid_out = valid_out_r; + assign data_out = valid_in_r ? data_out_r : buffer; end From 8d8769c7100b9abcad3d1c1ff0eb011d2cfbb5dc Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 20 Nov 2024 19:15:51 -0800 Subject: [PATCH 335/407] stream_buffer area optimization --- hw/rtl/libs/VX_stream_buffer.sv | 88 +++++++++++++-------------------- 1 file changed, 33 insertions(+), 55 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 2cf08c0f47..ea4467cb3a 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// A stream elastic buffer operates at full-bandwidth where fire_in and fire_out can happen simultaneously +// A stream elastic buffer_r operates at full-bandwidth where fire_in and fire_out can happen simultaneously // It has the following benefits: // + full-bandwidth throughput // + ready_in and ready_out are decoupled @@ -45,88 +45,66 @@ module VX_stream_buffer #( assign valid_out = valid_in; assign data_out = data_in; - end else if (OUT_REG != 0) begin : g_out_reg + end else begin : g_buffer - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - reg valid_out_r; - reg no_buffer; + reg [DATAW-1:0] data_out_r, buffer_r; + reg valid_out_r, valid_in_r; wire fire_in = valid_in && ready_in; wire flow_out = ready_out || ~valid_out; always @(posedge clk) begin if (reset) begin - valid_out_r <= 0; - no_buffer <= 1; - end else begin - if (flow_out) begin - no_buffer <= 1; - end else if (valid_in) begin - no_buffer <= 0; - end - if (flow_out) begin - valid_out_r <= valid_in || ~no_buffer; - end + valid_in_r <= 1'b1; + end else if (valid_in || flow_out) begin + valid_in_r <= flow_out; end end always @(posedge clk) begin - if (fire_in) begin - buffer <= data_in; - end - if (flow_out) begin - data_out_r <= no_buffer ? data_in : buffer; + if (reset) begin + valid_out_r <= 1'b0; + end else if (flow_out) begin + valid_out_r <= valid_in || ~valid_in_r; end end - assign ready_in = no_buffer; - assign valid_out = valid_out_r; - assign data_out = data_out_r; + if (OUT_REG != 0) begin : g_out_reg - end else begin : g_no_out_reg + always @(posedge clk) begin + if (fire_in) begin + buffer_r <= data_in; + end + end - reg [DATAW-1:0] data_out_r, buffer; - reg valid_in_r, valid_out_r; + always @(posedge clk) begin + if (flow_out) begin + data_out_r <= valid_in_r ? data_in : buffer_r; + end + end - wire fire_in = valid_in && ready_in; - wire fire_out = valid_out && ready_out; + assign data_out = data_out_r; - always @(posedge clk) begin - if (reset) begin - valid_in_r <= 1'b1; - end else begin - if (fire_in ^ fire_out) begin - valid_in_r <= valid_out_r ^ fire_in; + end else begin : g_no_out_reg + + always @(posedge clk) begin + if (fire_in) begin + data_out_r <= data_in; end end - end - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 1'b0; - end else begin - if (fire_in ^ fire_out) begin - valid_out_r <= valid_in_r ^ fire_out; + always @(posedge clk) begin + if (fire_in) begin + buffer_r <= data_out_r; end end - end - always @(posedge clk) begin - if (fire_in) begin - data_out_r <= data_in; - end - end + assign data_out = valid_in_r ? data_out_r : buffer_r; - always @(posedge clk) begin - if (fire_in) begin - buffer <= data_out_r; - end end - assign ready_in = valid_in_r; assign valid_out = valid_out_r; - assign data_out = valid_in_r ? data_out_r : buffer; + assign ready_in = valid_in_r; end From 180735c531df8f4dafcc484814ea2600ce9cb711 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 21 Nov 2024 16:47:00 -0800 Subject: [PATCH 336/407] fifoqueue area optimization --- hw/rtl/libs/VX_fifo_queue.sv | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 720a1a2c60..f3cc65b7ba 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -90,9 +90,6 @@ module VX_fifo_queue #( end end - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); - wire bypass = push && (empty || (going_empty && pop)); - VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), @@ -101,7 +98,7 @@ module VX_fifo_queue #( ) dp_ram ( .clk (clk), .reset (reset), - .read (~bypass), + .read (1'b1), .write (push), .wren (1'b1), .raddr (rd_ptr_r), @@ -112,11 +109,10 @@ module VX_fifo_queue #( if (OUT_REG != 0) begin : g_out_reg reg [DATAW-1:0] data_out_r; + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); always @(posedge clk) begin - if (bypass) begin - data_out_r <= data_in; - end else if (pop) begin - data_out_r <= data_out_w; + if (pop || (push && empty)) begin + data_out_r <= (empty || going_empty) ? data_in : data_out_w; end end assign data_out = data_out_r; From 18bf49d1e0254e4236a51355edc5c11e1116d624 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 21 Nov 2024 16:48:18 -0800 Subject: [PATCH 337/407] minor update --- hw/scripts/xilinx_async_bram_patch.tcl | 34 ++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/hw/scripts/xilinx_async_bram_patch.tcl b/hw/scripts/xilinx_async_bram_patch.tcl index f0a49ecd6e..e4a684e3b9 100644 --- a/hw/scripts/xilinx_async_bram_patch.tcl +++ b/hw/scripts/xilinx_async_bram_patch.tcl @@ -597,6 +597,11 @@ proc resolve_async_bram {inst} { # Connect vcc_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins." replace_net_source $is_raddr_reg_net $vcc_pin + + # Remove all async_ram cells + foreach cell [find_nested_cells $inst "g_async_ram.*" 0] { + remove_cell_from_netlist $cell + } } else { puts "WARNING: Not all read addresses are registered!" @@ -606,11 +611,17 @@ proc resolve_async_bram {inst} { # Connect gnd_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins." replace_net_source $is_raddr_reg_net $gnd_pin + + # Remove all sync_ram cells + foreach cell [find_nested_cells $inst "g_sync_ram.*" 0] { + remove_cell_from_netlist $cell + } } # Remove placeholder cell - set placeholder [get_cells "${inst}${hier_sep}placeholder"] - remove_cell_from_netlist $placeholder + foreach cell [find_nested_cells $inst "placeholder$"] { + remove_cell_from_netlist $cell + } } proc resolve_async_brams {} { @@ -628,7 +639,26 @@ proc resolve_async_brams {} { } } +proc dump_async_bram_cells {} { + set bram_patch_cells [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}] + if {[llength $bram_patch_cells] != 0} { + foreach cell $bram_patch_cells { + puts "INFO: Found async BRAM patch cell: '$cell'." + set child_cells [find_cell_descendants $cell] + foreach child $child_cells { + set type [get_property REF_NAME $child] + puts "INFO: child cell: '$child', type: '$type'" + } + } + } else { + puts "INFO: No async BRAM patch cells found in the design." + } +} + } # Invoke the procedure to resolve async BRAM vortex::resolve_async_brams + +# dump async bram cells +#vortex::dump_async_bram_cells From 7c4ce748011e33f8f9e1ce0e2c65744d3f5dd187 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 21 Nov 2024 16:48:41 -0800 Subject: [PATCH 338/407] memory unit timing optimization --- hw/rtl/core/VX_mem_unit.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 931ad65cd3..98491e73df 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -47,7 +47,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches VX_lmem_switch #( - .REQ0_OUT_BUF (3), + .REQ0_OUT_BUF (1), .REQ1_OUT_BUF (0), .RSP_OUT_BUF (1), .ARBITER ("P") @@ -78,7 +78,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), .ARBITER ("P"), .REQ_OUT_BUF (3), - .RSP_OUT_BUF (0) + .RSP_OUT_BUF (2) ) lmem_adapter ( .clk (clk), .reset (reset), From 3e4bbfc9f04d29e67bb23b4d25497744ebf85aaa Mon Sep 17 00:00:00 2001 From: tinebp Date: Fri, 22 Nov 2024 11:12:17 -0800 Subject: [PATCH 339/407] minor update --- hw/rtl/libs/VX_fifo_queue.sv | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index f3cc65b7ba..c7a4aab6df 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -110,9 +110,12 @@ module VX_fifo_queue #( if (OUT_REG != 0) begin : g_out_reg reg [DATAW-1:0] data_out_r; wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); + wire bypass = push && (empty || (going_empty && pop)); always @(posedge clk) begin - if (pop || (push && empty)) begin - data_out_r <= (empty || going_empty) ? data_in : data_out_w; + if (bypass) begin + data_out_r <= data_in; + end else if (pop) begin + data_out_r <= data_out_w; end end assign data_out = data_out_r; From 1e4583ac17cb600b74a6d104395759eed1dbb601 Mon Sep 17 00:00:00 2001 From: MichaelJSr Date: Tue, 26 Nov 2024 18:41:01 -0800 Subject: [PATCH 340/407] Adds the riscv vector extension into simx --- ci/regression.sh.in | 16 +- hw/rtl/VX_config.vh | 4 + hw/rtl/VX_types.vh | 13 + perf/cache/cache_perf.log | 2 +- sim/common/rvfloats.cpp | 34 + sim/common/rvfloats.h | 5 + sim/common/softfloat_ext.cpp | 486 ++ sim/common/softfloat_ext.h | 14 + sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/simx/Makefile | 4 +- sim/simx/arch.h | 6 + sim/simx/decode.cpp | 184 +- sim/simx/emulator.cpp | 75 + sim/simx/emulator.h | 88 +- sim/simx/execute.cpp | 141 +- sim/simx/execute_vector.cpp | 4493 +++++++++++++++++ sim/simx/instr.h | 89 +- sim/simx/types.h | 4 +- sim/xrtsim/Makefile | 2 +- tests/riscv/riscv-vector-tests/README | 39 + tests/riscv/riscv-vector-tests/run-test.sh.in | 117 + 22 files changed, 5716 insertions(+), 104 deletions(-) create mode 100644 sim/common/softfloat_ext.cpp create mode 100644 sim/common/softfloat_ext.h create mode 100644 sim/simx/execute_vector.cpp create mode 100644 tests/riscv/riscv-vector-tests/README create mode 100755 tests/riscv/riscv-vector-tests/run-test.sh.in diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 849a8769f4..53819490f1 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -386,10 +386,20 @@ synthesis() echo "synthesis tests done!" } +vector() +{ + echo "begin vector tests..." + + make -C sim/simx + TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh + + echo "vector tests done!" +} + show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--all] [--h|--help]" } declare -a tests=() @@ -439,6 +449,9 @@ while [ "$1" != "" ]; do --synthesis ) tests+=("synthesis") ;; + --vector ) + tests+=("vector") + ;; --all ) tests=() tests+=("unittest") @@ -454,6 +467,7 @@ while [ "$1" != "" ]; do tests+=("scope") tests+=("stress") tests+=("synthesis") + tests+=("vector") ;; -h | --help ) show_usage diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 29eb5c9d8c..3badaa3d3e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -87,6 +87,10 @@ `endif `endif +`ifndef VLEN +`define VLEN 256 +`endif + `ifndef NUM_CLUSTERS `define NUM_CLUSTERS 1 `endif diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 048ba0a5cd..4c8505e5e9 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -188,6 +188,19 @@ `define VX_CSR_MIMPID 12'hF13 `define VX_CSR_MHARTID 12'hF14 +// Vector CSRs + +`define VX_CSR_VSTART 12'h008 +`define VX_CSR_VXSAT 12'h009 +`define VX_CSR_VXRM 12'h00A +`define VX_CSR_VCSR 12'h00F +`define VX_CSR_VL 12'hC20 +`define VX_CSR_VTYPE 12'hC21 +`define VX_CSR_VLENB 12'hC22 +`define VX_CSR_VCYCLE 12'hC00 +`define VX_CSR_VTIME 12'hC01 +`define VX_CSR_VINSTRET 12'hC02 + // GPGU CSRs `define VX_CSR_THREAD_ID 12'hCC0 diff --git a/perf/cache/cache_perf.log b/perf/cache/cache_perf.log index 21a446d25b..0a4a55cc88 100644 --- a/perf/cache/cache_perf.log +++ b/perf/cache/cache_perf.log @@ -1,3 +1,3 @@ CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 make -C ./ci/../driver/rtlsim -verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so +verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/softfloat_ext.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so diff --git a/sim/common/rvfloats.cpp b/sim/common/rvfloats.cpp index 3e577f7f9d..2b252010ca 100644 --- a/sim/common/rvfloats.cpp +++ b/sim/common/rvfloats.cpp @@ -12,6 +12,7 @@ // limitations under the License. #include "rvfloats.h" +#include "softfloat_ext.h" #include extern "C" { @@ -158,6 +159,34 @@ uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) { return from_float64_t(r); } +uint32_t rv_frecip7_s(uint32_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f32_recip7(to_float32_t(a)); + if (fflags) { *fflags = softfloat_exceptionFlags; } + return from_float32_t(r); +} + +uint64_t rv_frecip7_d(uint64_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f64_recip7(to_float64_t(a)); + if (fflags) { *fflags = softfloat_exceptionFlags; } + return from_float64_t(r); +} + +uint32_t rv_frsqrt7_s(uint32_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f32_rsqrte7(to_float32_t(a)); + if (fflags) { *fflags =softfloat_exceptionFlags; } + return from_float32_t(r); +} + +uint64_t rv_frsqrt7_d(uint64_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f64_rsqrte7(to_float64_t(a)); + if (fflags) { *fflags = softfloat_exceptionFlags; } + return from_float64_t(r); +} + uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags) { rv_init(frm); auto r = f32_sqrt(to_float32_t(a)); @@ -486,6 +515,11 @@ uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) { return r; } +uint32_t rv_dtof_r(uint64_t a, uint32_t frm) { + rv_init(frm); + return rv_dtof(a); +} + uint32_t rv_dtof(uint64_t a) { auto r = f64_to_f32(to_float64_t(a)); return from_float32_t(r); diff --git a/sim/common/rvfloats.h b/sim/common/rvfloats.h index d921846dd4..86b60e8eea 100644 --- a/sim/common/rvfloats.h +++ b/sim/common/rvfloats.h @@ -28,6 +28,8 @@ uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags); uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags); uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags); +uint32_t rv_frecip7_s(uint32_t a, uint32_t frm, uint32_t* fflags); +uint32_t rv_frsqrt7_s(uint32_t a, uint32_t frm, uint32_t* fflags); uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags); uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags); @@ -58,6 +60,8 @@ uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags); uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags); uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags); uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_frecip7_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_frsqrt7_d(uint64_t a, uint32_t frm, uint32_t* fflags); uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags); uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags); @@ -85,6 +89,7 @@ uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags); uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags); uint32_t rv_dtof(uint64_t a); +uint32_t rv_dtof_r(uint64_t a, uint32_t frm); uint64_t rv_ftod(uint32_t a); #ifdef __cplusplus diff --git a/sim/common/softfloat_ext.cpp b/sim/common/softfloat_ext.cpp new file mode 100644 index 0000000000..877bdc8ac8 --- /dev/null +++ b/sim/common/softfloat_ext.cpp @@ -0,0 +1,486 @@ +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include <../RISCV/specialize.h> +#include +#include "softfloat_ext.h" + +uint_fast16_t f16_classify( float16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F; + uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0; + bool sign = signF16UI( uiA ); + bool fracZero = fracF16UI( uiA ) == 0; + bool isNaN = isNaNF16UI( uiA ); + bool isSNaN = softfloat_isSigNaNF16UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + +uint_fast16_t f32_classify( float32_t a ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF32UI( uiA ) == 0xFF; + uint_fast16_t subnormalOrZero = expF32UI( uiA ) == 0; + bool sign = signF32UI( uiA ); + bool fracZero = fracF32UI( uiA ) == 0; + bool isNaN = isNaNF32UI( uiA ); + bool isSNaN = softfloat_isSigNaNF32UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + +uint_fast16_t f64_classify( float64_t a ) +{ + union ui64_f64 uA; + uint_fast64_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF64UI( uiA ) == 0x7FF; + uint_fast16_t subnormalOrZero = expF64UI( uiA ) == 0; + bool sign = signF64UI( uiA ); + bool fracZero = fracF64UI( uiA ) == 0; + bool isNaN = isNaNF64UI( uiA ); + bool isSNaN = softfloat_isSigNaNF64UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} + +//user needs to truncate output to required length +static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) { + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 52, 51, 50, 48, 47, 46, 44, 43, + 42, 41, 40, 39, 38, 36, 35, 34, + 33, 32, 31, 30, 30, 29, 28, 27, + 26, 25, 24, 23, 23, 22, 21, 20, + 19, 19, 18, 17, 16, 16, 15, 14, + 14, 13, 12, 12, 11, 10, 10, 9, + 9, 8, 7, 7, 6, 6, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0, + 127, 125, 123, 121, 119, 118, 116, 114, + 113, 111, 109, 108, 106, 105, 103, 102, + 100, 99, 97, 96, 95, 93, 92, 91, + 90, 88, 87, 86, 85, 84, 83, 82, + 80, 79, 78, 77, 76, 75, 74, 73, + 72, 71, 70, 70, 69, 68, 67, 66, + 65, 64, 63, 63, 62, 61, 60, 59, + 59, 58, 57, 56, 56, 55, 54, 53}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + } + + int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1)); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2; + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_rsqrte7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +num + uA.ui = rsqrte7(uA.ui, 5, 10, sub); + break; + } + + return uA.f; +} + +float32_t f32_rsqrte7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +num + uA.ui = rsqrte7(uA.ui, 8, 23, sub); + break; + } + + return uA.f; +} + +float64_t f64_rsqrte7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +num + uA.ui = rsqrte7(uA.ui, 11, 52, sub); + break; + } + + return uA.f; +} + +//user needs to truncate output to required length +static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub, + bool *round_abnormal) +{ + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 127, 125, 123, 121, 119, 117, 116, 114, + 112, 110, 109, 107, 105, 104, 102, 100, + 99, 97, 96, 94, 93, 91, 90, 88, + 87, 85, 84, 83, 81, 80, 79, 77, + 76, 75, 74, 72, 71, 70, 69, 68, + 66, 65, 64, 63, 62, 61, 60, 59, + 58, 57, 56, 55, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 40, 39, 38, 37, 36, + 35, 35, 34, 33, 32, 31, 31, 30, + 29, 28, 28, 27, 26, 25, 25, 24, + 23, 23, 22, 21, 21, 20, 19, 19, + 18, 17, 17, 16, 15, 15, 14, 14, + 13, 12, 12, 11, 11, 10, 9, 9, + 8, 8, 7, 7, 6, 5, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + + if (exp != 0 && exp != UINT64_MAX) { + *round_abnormal = true; + if (rm == 1 || + (rm == 2 && !sign) || + (rm == 3 && sign)) + return ((sign << (s+e)) | make_mask64(s, e)) - 1; + else + return (sign << (s+e)) | make_mask64(s, e); + } + } + + int idx = sig >> (s-p); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp; + if (out_exp == 0 || out_exp == UINT64_MAX) { + out_sig = (out_sig >> 1) | make_mask64(s - 1, 1); + if (out_exp == UINT64_MAX) { + out_sig >>= 1; + out_exp = 0; + } + } + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_recip7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +- normal + uA.ui = recip7(uA.ui, 5, 10, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float32_t f32_recip7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x80000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +- normal + uA.ui = recip7(uA.ui, 8, 23, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float64_t f64_recip7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000000000000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +- normal + uA.ui = recip7(uA.ui, 11, 52, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} \ No newline at end of file diff --git a/sim/common/softfloat_ext.h b/sim/common/softfloat_ext.h new file mode 100644 index 0000000000..7a18af9f7e --- /dev/null +++ b/sim/common/softfloat_ext.h @@ -0,0 +1,14 @@ +#include +#include + +uint_fast16_t f16_classify( float16_t ); +float16_t f16_rsqrte7( float16_t ); +float16_t f16_recip7( float16_t ); + +uint_fast16_t f32_classify( float32_t ); +float32_t f32_rsqrte7( float32_t ); +float32_t f32_recip7( float32_t ); + +uint_fast16_t f64_classify( float64_t ); +float64_t f64_rsqrte7( float64_t ); +float64_t f64_recip7( float64_t ); \ No newline at end of file diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index b04f8ddb47..49b0f4ab86 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -51,7 +51,7 @@ endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index ecaee717b4..3903bbd85f 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -35,7 +35,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/processor.cpp diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 31fde7023c..b97e9c00fc 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -17,8 +17,8 @@ CXXFLAGS += $(CONFIGS) LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp -SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp +SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/execute_vector.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp # Debugging ifdef DEBUG diff --git a/sim/simx/arch.h b/sim/simx/arch.h index 6becf5c912..d68345db6c 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -29,6 +29,7 @@ class Arch { uint16_t num_cores_; uint16_t num_clusters_; uint16_t socket_size_; + uint16_t vsize_; uint16_t num_barriers_; uint64_t local_mem_base_; @@ -39,6 +40,7 @@ class Arch { , num_cores_(num_cores) , num_clusters_(NUM_CLUSTERS) , socket_size_(SOCKET_SIZE) + , vsize_(VLEN / 8) , num_barriers_(NUM_BARRIERS) , local_mem_base_(LMEM_BASE_ADDR) {} @@ -71,6 +73,10 @@ class Arch { return socket_size_; } + uint16_t vsize() const { + return vsize_; + } + }; } \ No newline at end of file diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index 7a37e79e25..3c184879d6 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -47,6 +47,7 @@ static const std::unordered_map sc_instTable = { {Opcode::FMSUB, InstType::R4}, {Opcode::FMNMADD, InstType::R4}, {Opcode::FMNMSUB, InstType::R4}, + {Opcode::VSET, InstType::V}, {Opcode::EXT1, InstType::R}, {Opcode::EXT2, InstType::R4}, {Opcode::R_W, InstType::R}, @@ -54,33 +55,6 @@ static const std::unordered_map sc_instTable = { {Opcode::TCU, InstType::I}, }; -enum Constants { - width_opcode= 7, - width_reg = 5, - width_func2 = 2, - width_func3 = 3, - width_func7 = 7, - width_i_imm = 12, - width_j_imm = 20, - - shift_opcode= 0, - shift_rd = width_opcode, - shift_func3 = shift_rd + width_reg, - shift_rs1 = shift_func3 + width_func3, - shift_rs2 = shift_rs1 + width_reg, - shift_func2 = shift_rs2 + width_reg, - shift_func7 = shift_rs2 + width_reg, - shift_rs3 = shift_func7 + width_func2, - - mask_opcode = (1 << width_opcode) - 1, - mask_reg = (1 << width_reg) - 1, - mask_func2 = (1 << width_func2) - 1, - mask_func3 = (1 << width_func3) - 1, - mask_func7 = (1 << width_func7) - 1, - mask_i_imm = (1 << width_i_imm) - 1, - mask_j_imm = (1 << width_j_imm) - 1, -}; - static const char* op_string(const Instr &instr) { auto opcode = instr.getOpcode(); auto func2 = instr.getFunc2(); @@ -230,10 +204,14 @@ static const char* op_string(const Instr &instr) { case Opcode::FENCE: return "FENCE"; case Opcode::FL: switch (func3) { - case 0x1: return "VL"; case 0x2: return "FLW"; case 0x3: return "FLD"; + case 0x0: return "VL8"; + case 0x5: return "VL16"; + case 0x6: return "VL32"; + case 0x7: return "VL64"; default: + std::cout << "Could not decode float/vector load with func3: " << func3 << std::endl; std::abort(); } case Opcode::FS: @@ -241,7 +219,12 @@ static const char* op_string(const Instr &instr) { case 0x1: return "VS"; case 0x2: return "FSW"; case 0x3: return "FSD"; + case 0x0: return "VS8"; + case 0x5: return "VS16"; + case 0x6: return "VS32"; + case 0x7: return "VS64"; default: + std::cout << "Could not decode float/vector store with func3: " << func3 << std::endl; std::abort(); } case Opcode::AMO: { @@ -390,6 +373,7 @@ static const char* op_string(const Instr &instr) { case Opcode::FMSUB: return func2 ? "FMSUB.D" : "FMSUB.S"; case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S"; case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S"; + case Opcode::VSET: return "VSET"; case Opcode::EXT1: switch (func7) { case 0: @@ -421,6 +405,39 @@ static const char* op_string(const Instr &instr) { } } +inline void vec_log(std::ostream &os, const Instr &instr) { + if (instr.getVUseMask() & set_func3) + os << ", func3:" << instr.getFunc3(); + if (instr.getVUseMask() & set_func6) + os << ", func6:" << instr.getFunc6(); + if (instr.getVUseMask() & set_imm) + os << ", imm:" << instr.getImm(); + if (instr.getVUseMask() & set_vlswidth) + os << ", width:" << instr.getVlsWidth(); + if (instr.getVUseMask() & set_vmop) + os << ", mop:" << instr.getVmop(); + if (instr.getVUseMask() & set_vumop) + os << ", umop:" << instr.getVumop(); + if (instr.getVUseMask() & set_vnf) + os << ", nf:" << instr.getVnf(); + if (instr.getVUseMask() & set_vmask) + os << ", vmask:" << instr.getVmask(); + if (instr.getVUseMask() & set_vs3) + os << ", vs3:" << instr.getVs3(); + if (instr.getVUseMask() & set_zimm) + os << ", zimm:" << ((instr.hasZimm()) ? "true" : "false"); + if (instr.getVUseMask() & set_vlmul) + os << ", lmul:" << instr.getVlmul(); + if (instr.getVUseMask() & set_vsew) + os << ", sew:" << instr.getVsew(); + if (instr.getVUseMask() & set_vta) + os << ", ta:" << instr.getVta(); + if (instr.getVUseMask() & set_vma) + os << ", ma:" << instr.getVma(); + if (instr.getVUseMask() & set_vediv) + os << ", ediv:" << instr.getVediv(); +} + namespace vortex { std::ostream &operator<<(std::ostream &os, const Instr &instr) { os << op_string(instr); @@ -441,6 +458,13 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) { if (sep++ != 0) { os << ", "; } else { os << " "; } os << "0x" << std::hex << instr.getImm() << std::dec; } + if (instr.getOpcode() == Opcode::SYS && instr.getFunc3() >= 5) { + // CSRs with immediate values + if (sep++ != 0) { os << ", "; } else { os << " "; } + os << "0x" << std::hex << instr.getRSrc(0); + } + // Log vector-specific vtype and vreg info + if (instr.isVec()) vec_log(os, instr); return os; } } @@ -452,6 +476,7 @@ std::shared_ptr Emulator::decode(uint32_t code) const { auto func2 = (code >> shift_func2) & mask_func2; auto func3 = (code >> shift_func3) & mask_func3; + auto func6 = (code >> shift_func6) & mask_func6; auto func7 = (code >> shift_func7) & mask_func7; auto rd = (code >> shift_rd) & mask_reg; @@ -466,6 +491,12 @@ std::shared_ptr Emulator::decode(uint32_t code) const { } auto iType = op_it->second; + if (op == Opcode::FL || op == Opcode::FS) { + if (func3 != 0x2 && func3 != 0x3) { + iType = InstType::V; + } + } + switch (iType) { case InstType::R: switch (op) { @@ -659,7 +690,104 @@ std::shared_ptr Emulator::decode(uint32_t code) const { auto imm = (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); instr->setImm(sext(imm, width_j_imm+1)); } break; + + case InstType::V: + instr->setVec(true); + switch (op) { + case Opcode::VSET: { + instr->setDestReg(rd, RegType::Integer); + instr->setFunc3(func3); + switch (func3) { + case 7: { + if (code >> (shift_vset - 1) == 0b10) { // vsetvl + instr->addSrcReg(rs1, RegType::Integer); + instr->addSrcReg(rs2, RegType::Integer); + } else { + auto zimm = (code >> shift_rs2) & mask_v_zimm; + instr->setZimm(true); + instr->setVlmul(zimm & mask_v_lmul); + instr->setVsew((zimm >> shift_v_sew) & mask_v_sew); + instr->setVta((zimm >> shift_v_ta) & mask_v_ta); + instr->setVma((zimm >> shift_v_ma) & mask_v_ma); + if ((code >> shift_vset)) { // vsetivli + instr->setImm(rs1); + } else { // vsetvli + instr->addSrcReg(rs1, RegType::Integer); + } + } + } break; + case 3: { // Vector - immediate arithmetic instructions + instr->setDestReg(rd, RegType::Vector); + instr->addSrcReg(rs2, RegType::Vector); + instr->setImm(rs1); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setFunc6(func6); + } break; + default: { // Vector - vector/scalar arithmetic instructions + if (func3 == 1 && func6 == 16) { + instr->setDestReg(rd, RegType::Float); + } else if (func3 == 2 && func6 == 16) { + instr->setDestReg(rd, RegType::Integer); + } else { + instr->setDestReg(rd, RegType::Vector); + } + instr->addSrcReg(rs1, RegType::Vector); + instr->addSrcReg(rs2, RegType::Vector); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setFunc6(func6); + } + } + } break; + + case Opcode::FL: + instr->addSrcReg(rs1, RegType::Integer); + instr->setVmop((code >> shift_vmop) & 0b11); + switch (instr->getVmop()) { + case 0b00: + instr->setVumop(rs2); + break; + case 0b10: + instr->addSrcReg(rs2, RegType::Integer); + break; + case 0b01: + case 0b11: + instr->addSrcReg(rs2, RegType::Vector); + break; + } + instr->setVsew(func3 & 0x3); + instr->setDestReg(rd, RegType::Vector); + instr->setVlsWidth(func3); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setVnf((code >> shift_vnf) & mask_func3); + break; + case Opcode::FS: + instr->addSrcReg(rs1, RegType::Integer); + instr->setVmop((code >> shift_vmop) & 0b11); + switch (instr->getVmop()) { + case 0b00: + instr->setVumop(rs2); + break; + case 0b10: + instr->addSrcReg(rs2, RegType::Integer); + break; + case 0b01: + case 0b11: + instr->addSrcReg(rs2, RegType::Vector); + break; + } + instr->setVsew(func3 & 0x3); + instr->addSrcReg(rd, RegType::Vector); + instr->setVlsWidth(func3); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setVmop((code >> shift_vmop) & 0b11); + instr->setVnf((code >> shift_vnf) & mask_func3); + break; + + default: + std::abort(); + } + break; case InstType::R4: instr->setDestReg(rd, RegType::Float); instr->addSrcReg(rs1, RegType::Float); diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 05b3497c45..14cb979d40 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -33,6 +33,7 @@ using namespace vortex; Emulator::warp_t::warp_t(const Arch& arch) : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) + , vreg_file(MAX_NUM_REGS, std::vector(arch.vsize())) , uuid(0) {} @@ -64,6 +65,26 @@ void Emulator::warp_t::clear(uint64_t startup_addr) { #endif } } + + for (auto& reg_file : this->vreg_file) { + for (auto& reg : reg_file) { + #ifndef NDEBUG + reg = 0; + #else + reg = std::rand(); + #endif + } + } + + for (auto& reg_file : this->vreg_file) { + for (auto& reg : reg_file) { + #ifndef NDEBUG + reg = 0; + #else + reg = std::rand(); + #endif + } + } } /////////////////////////////////////////////////////////////////////////////// @@ -79,7 +100,12 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) // considered to be big enough to hold input tiles for one output tile. // In future versions, scratchpad size should be fixed to an appropriate value. , scratchpad(std::vector(32 * 32 * 32768)) + , csrs_(arch.num_warps()) { + for (uint32_t i = 0; i < arch_.num_warps(); ++i) { + csrs_.at(i).resize(arch.num_threads()); + } + this->clear(); } @@ -463,6 +489,32 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_FFLAGS: return warps_.at(wid).fcsr & 0x1F; case VX_CSR_FRM: return (warps_.at(wid).fcsr >> 5); case VX_CSR_FCSR: return warps_.at(wid).fcsr; + + // Vector CRSs + case VX_CSR_VSTART: + return csrs_.at(wid).at(tid)[VX_CSR_VSTART]; + case VX_CSR_VXSAT: + return csrs_.at(wid).at(tid)[VX_CSR_VXSAT]; + case VX_CSR_VXRM: + return csrs_.at(wid).at(tid)[VX_CSR_VXRM]; + case VX_CSR_VCSR: { + Word vxsat = csrs_.at(wid).at(tid)[VX_CSR_VXSAT]; + Word vxrm = csrs_.at(wid).at(tid)[VX_CSR_VXRM]; + return (vxrm << 1) | vxsat; + } + case VX_CSR_VL: + return csrs_.at(wid).at(tid)[VX_CSR_VL]; + case VX_CSR_VTYPE: + return csrs_.at(wid).at(tid)[VX_CSR_VTYPE]; + case VX_CSR_VLENB: + return VLEN / 8; + case VX_CSR_VCYCLE: + return csrs_.at(wid).at(tid)[VX_CSR_VCYCLE]; + case VX_CSR_VTIME: + return csrs_.at(wid).at(tid)[VX_CSR_VTIME]; + case VX_CSR_VINSTRET: + return csrs_.at(wid).at(tid)[VX_CSR_VINSTRET]; + case VX_CSR_MHARTID: return (core_->id() * arch_.num_warps() + wid) * arch_.num_threads() + tid; case VX_CSR_THREAD_ID: return tid; case VX_CSR_WARP_ID: return wid; @@ -578,6 +630,29 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { case VX_CSR_MSCRATCH: csr_mscratch_ = value; break; + + // Vector CRSs + case VX_CSR_VSTART: + csrs_.at(wid).at(tid)[VX_CSR_VSTART] = value; + break; + case VX_CSR_VXSAT: + csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1; + break; + case VX_CSR_VXRM: + csrs_.at(wid).at(tid)[VX_CSR_VXRM] = value & 0b11; + break; + case VX_CSR_VCSR: + csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1; + csrs_.at(wid).at(tid)[VX_CSR_VXRM] = (value >> 1) & 0b11; + break; + case VX_CSR_VL: // read only, written by vset(i)vl(i) + csrs_.at(wid).at(tid)[VX_CSR_VL] = value; + break; + case VX_CSR_VTYPE: // read only, written by vset(i)vl(i) + csrs_.at(wid).at(tid)[VX_CSR_VTYPE] = value; + break; + case VX_CSR_VLENB: // read only, set to VLEN / 8 + case VX_CSR_SATP: #ifdef VM_ENABLE // warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 5f1b91d5d4..ffe630c3d1 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -28,6 +28,76 @@ class Core; class Instr; class instr_trace_t; +enum Constants { + width_opcode= 7, + width_reg = 5, + width_func2 = 2, + width_func3 = 3, + width_func6 = 6, + width_func7 = 7, + width_mop = 3, + width_vmask = 1, + width_i_imm = 12, + width_j_imm = 20, + width_v_zimm = 11, + width_v_ma = 1, + width_v_ta = 1, + width_v_sew = 3, + width_v_lmul = 3, + width_aq = 1, + width_rl = 1, + + shift_opcode= 0, + shift_rd = width_opcode, + shift_func3 = shift_rd + width_reg, + shift_rs1 = shift_func3 + width_func3, + shift_rs2 = shift_rs1 + width_reg, + shift_func2 = shift_rs2 + width_reg, + shift_func7 = shift_rs2 + width_reg, + shift_rs3 = shift_func7 + width_func2, + shift_vmop = shift_func7 + width_vmask, + shift_vnf = shift_vmop + width_mop, + shift_func6 = shift_func7 + width_vmask, + shift_vset = shift_func7 + width_func6, + shift_v_sew = width_v_lmul, + shift_v_ta = shift_v_sew + width_v_sew, + shift_v_ma = shift_v_ta + width_v_ta, + + mask_opcode = (1 << width_opcode) - 1, + mask_reg = (1 << width_reg) - 1, + mask_func2 = (1 << width_func2) - 1, + mask_func3 = (1 << width_func3) - 1, + mask_func6 = (1 << width_func6) - 1, + mask_func7 = (1 << width_func7) - 1, + mask_i_imm = (1 << width_i_imm) - 1, + mask_j_imm = (1 << width_j_imm) - 1, + mask_v_zimm = (1 << width_v_zimm) - 1, + mask_v_ma = (1 << width_v_ma) - 1, + mask_v_ta = (1 << width_v_ta) - 1, + mask_v_sew = (1 << width_v_sew) - 1, + mask_v_lmul = (1 << width_v_lmul) - 1, +}; + +struct vtype { + uint32_t vill; + uint32_t vma; + uint32_t vta; + uint32_t vsew; + uint32_t vlmul; +}; + +union reg_data_t { + Word u; + WordI i; + WordF f; + float f32; + double f64; + uint32_t u32; + uint64_t u64; + int32_t i32; + int64_t i64; +}; + class Emulator { public: Emulator(const Arch &arch, @@ -61,6 +131,10 @@ class Emulator { Word get_tc_size(); Word get_tc_num(); + void dcache_read(void* data, uint64_t addr, uint32_t size); + + void dcache_write(const void* data, uint64_t addr, uint32_t size); + private: struct ipdom_entry_t { @@ -85,9 +159,14 @@ class Emulator { ThreadMask tmask; std::vector> ireg_file; std::vector>freg_file; + std::vector> vreg_file; std::stack ipdom_stack; Byte fcsr; uint32_t uuid; + + struct vtype vtype; + uint32_t vl; + Word VLMAX; }; struct wspawn_t { @@ -100,11 +179,13 @@ class Emulator { void execute(const Instr &instr, uint32_t wid, instr_trace_t *trace); - void icache_read(void* data, uint64_t addr, uint32_t size); + void executeVector(const Instr &instr, uint32_t wid, std::vector &rsdata, std::vector &rddata); - void dcache_read(void* data, uint64_t addr, uint32_t size); + void loadVector(const Instr &instr, uint32_t wid, std::vector &rsdata); - void dcache_write(const void* data, uint64_t addr, uint32_t size); + void storeVector(const Instr &instr, uint32_t wid, std::vector &rsdata); + + void icache_read(void* data, uint64_t addr, uint32_t size); void dcache_amo_reserve(uint64_t addr); @@ -142,6 +223,7 @@ class Emulator { uint32_t mat_size; uint32_t tc_size; uint32_t tc_num; + std::vector>> csrs_; }; } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index dd82535715..d477a1d45b 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -25,22 +25,11 @@ #include "emulator.h" #include "instr.h" #include "core.h" +#include "processor_impl.h" #include "VX_types.h" using namespace vortex; -union reg_data_t { - Word u; - WordI i; - WordF f; - float f32; - double f64; - uint32_t u32; - uint64_t u64; - int32_t i32; - int64_t i64; -}; - inline uint64_t nan_box(uint32_t value) { return value | 0xffffffff00000000; } @@ -128,6 +117,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } DPN(2, "}" << std::endl); break; + case RegType::Vector: + break; default: break; } @@ -678,41 +669,47 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->src_regs[0] = {RegType::Integer, rsrc0}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; - uint32_t data_bytes = 1 << (func3 & 0x3); - uint32_t data_width = 8 * data_bytes; - for (uint32_t t = thread_start; t < num_threads; ++t) { - if (!warp.tmask.test(t)) - continue; - uint64_t mem_addr = rsdata[t][0].i + immsrc; - uint64_t read_data = 0; - this->dcache_read(&read_data, mem_addr, data_bytes); - trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; - switch (func3) { - case 0: // RV32I: LB - case 1: // RV32I: LH - rddata[t].i = sext((Word)read_data, data_width); - break; - case 2: - if (opcode == Opcode::L) { - // RV32I: LW + if ((opcode == Opcode::L ) + || (opcode == Opcode::FL && func3 == 2) + || (opcode == Opcode::FL && func3 == 3)) { + uint32_t data_bytes = 1 << (func3 & 0x3); + uint32_t data_width = 8 * data_bytes; + for (uint32_t t = thread_start; t < num_threads; ++t) { + if (!warp.tmask.test(t)) + continue; + uint64_t mem_addr = rsdata[t][0].i + immsrc; + uint64_t read_data = 0; + this->dcache_read(&read_data, mem_addr, data_bytes); + trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; + switch (func3) { + case 0: // RV32I: LB + case 1: // RV32I: LH rddata[t].i = sext((Word)read_data, data_width); - } else { - // RV32F: FLW - rddata[t].u64 = nan_box((uint32_t)read_data); + break; + case 2: + if (opcode == Opcode::L) { + // RV32I: LW + rddata[t].i = sext((Word)read_data, data_width); + } else { + // RV32F: FLW + rddata[t].u64 = nan_box((uint32_t)read_data); + } + break; + case 3: // RV64I: LD + // RV32D: FLD + case 4: // RV32I: LBU + case 5: // RV32I: LHU + case 6: // RV64I: LWU + rddata[t].u64 = read_data; + break; + default: + std::abort(); } - break; - case 3: // RV64I: LD - // RV32D: FLD - case 4: // RV32I: LBU - case 5: // RV32I: LHU - case 6: // RV64I: LWU - rddata[t].u64 = read_data; - break; - default: - std::abort(); } + rd_write = true; + } else { + loadVector(instr, wid, rsdata); } - rd_write = true; break; } case Opcode::S: @@ -724,23 +721,29 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->src_regs[1] = {data_type, rsrc1}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; - uint32_t data_bytes = 1 << (func3 & 0x3); - for (uint32_t t = thread_start; t < num_threads; ++t) { - if (!warp.tmask.test(t)) - continue; - uint64_t mem_addr = rsdata[t][0].i + immsrc; - uint64_t write_data = rsdata[t][1].u64; - trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; - switch (func3) { - case 0: - case 1: - case 2: - case 3: - this->dcache_write(&write_data, mem_addr, data_bytes); - break; - default: - std::abort(); + if ((opcode == Opcode::S) + || (opcode == Opcode::FS && func3 == 2) + || (opcode == Opcode::FS && func3 == 3)) { + uint32_t data_bytes = 1 << (func3 & 0x3); + for (uint32_t t = thread_start; t < num_threads; ++t) { + if (!warp.tmask.test(t)) + continue; + uint64_t mem_addr = rsdata[t][0].i + immsrc; + uint64_t write_data = rsdata[t][1].u64; + trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; + switch (func3) { + case 0: + case 1: + case 2: + case 3: + this->dcache_write(&write_data, mem_addr, data_bytes); + break; + default: + std::abort(); + } } + } else { + storeVector(instr, wid, rsdata); } break; } @@ -925,7 +928,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; - uint32_t frm = this->get_fpu_rm(func3, t, wid); + uint32_t frm = (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, t, wid) : func3; uint32_t fflags = 0; switch (func7) { case 0x00: { // RV32F: FADD.S @@ -1240,7 +1243,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } } - this->update_fcrs(fflags, t, wid); + if (fflags) { + this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); + this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); + } } rd_write = true; break; @@ -1294,7 +1300,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { default: break; } - this->update_fcrs(fflags, t, wid); + if (fflags) { + this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); + this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); + } } rd_write = true; break; @@ -1586,6 +1595,13 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::abort(); } } break; + case Opcode::VSET: { + auto func6 = instr.getFunc6(); + if ((func3 == 0x7) || (func3 == 0x2 && func6 == 16) || (func3 == 0x1 && func6 == 16)) { + rd_write = true; + } + executeVector(instr, wid, rsdata, rddata); + } break; default: std::abort(); } @@ -1629,6 +1645,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->dst_reg = {type, rdest}; break; default: + std::cout << "Unrecognized register write back type: " << type << std::endl; std::abort(); break; } diff --git a/sim/simx/execute_vector.cpp b/sim/simx/execute_vector.cpp new file mode 100644 index 0000000000..3b2d585db9 --- /dev/null +++ b/sim/simx/execute_vector.cpp @@ -0,0 +1,4493 @@ +// This is a fork of https://github.com/troibe/vortex/tree/simx-v2-vector +// The purpose of this fork is to make the simx-v2-vector up to date with master +// Thanks to Troibe for his amazing work + +#include +#include +#include +#include +#include +#include "emulator.h" +#include "instr.h" +#include "processor_impl.h" + +using namespace vortex; + +template +class Add { + public: + static R apply(T first, T second, R) { + return (R)first + (R)second; + } + static std::string name() {return "Add";} +}; + +template +class Sub { + public: + static R apply(T first, T second, R) { + return (R)second - (R)first; + } + static std::string name() {return "Sub";} +}; + +template +class Adc { + public: + static R apply(T first, T second, R third) { + return (R)first + (R)second + third; + } + static std::string name() {return "Adc";} +}; + +template +class Madc { + public: + static R apply(T first, T second, R third) { + return (R)first + (R)second + third > (R)std::numeric_limits::max(); + } + static std::string name() {return "Madc";} +}; + +template +class Sbc { + public: + static R apply(T first, T second, R third) { + return (R)second - (R)first - third; + } + static std::string name() {return "Sbc";} +}; + +template +class Msbc { + public: + static R apply(T first, T second, R third) { + return (R)second < (R)first + third; + } + static std::string name() {return "Msbc";} +}; + +template +class Ssub { + public: + static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { + // rounding mode is not relevant for this operation + T unclippedResult = second - first; + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Ssub";} +}; + +template +class Ssubu { + public: + static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { + // rounding mode is not relevant for this operation + if (first > second) { + vxsat_ = true; + return 0; + } else { + vxsat_ = false; + return second - first; + } + } + static std::string name() {return "Ssubu";} +}; + +template +class Sadd { + public: + static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { + // rounding mode is not relevant for this operation + T unclippedResult = second + first; + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Sadd";} +}; + +template +class Rsub { + public: + static R apply(T first, T second, R) { + return first - second; + } + static std::string name() {return "Rsub";} +}; + +template +class Div { + public: + static R apply(T first, T second, R) { + // logic taken from scalar div + if (first == 0) { + return -1; + } else if (second == std::numeric_limits::min() && first == T(-1)) { + return second; + } else { + return (R)second / (R)first; + } + } + static std::string name() {return "Div";} +}; + +template +class Rem { + public: + static R apply(T first, T second, R) { + // logic taken from scalar rem + if (first == 0) { + return second; + } else if (second == std::numeric_limits::min() && first == T(-1)) { + return 0; + } else { + return (R)second % (R)first; + } + } + static std::string name() {return "Rem";} +}; + +template +class Mul { + public: + static R apply(T first, T second, R) { + return (R)first * (R)second; + } + static std::string name() {return "Mul";} +}; + +template +class Mulsu { + public: + static R apply(T first, T second, R) { + R first_ext = zext((R)first, (sizeof(T) * 8)); + return first_ext * (R)second; + } + static std::string name() {return "Mulsu";} +}; + +template +class Mulh { + public: + static R apply(T first, T second, R) { + __int128_t first_ext = sext((__int128_t)first, (sizeof(T) * 8)); + __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); + return (first_ext * second_ext) >> (sizeof(T) * 8); + } + static std::string name() {return "Mulh";} +}; + +template +class Mulhsu { + public: + static R apply(T first, T second, R) { + __int128_t first_ext = zext((__int128_t)first, (sizeof(T) * 8)); + __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); + return (first_ext * second_ext) >> (sizeof(T) * 8); + } + static std::string name() {return "Mulhsu";} +}; + +template +class Mulhu { + public: + static R apply(T first, T second, R) { + return ((__uint128_t)first * (__uint128_t)second) >> (sizeof(T) * 8); + } + static std::string name() {return "Mulhu";} +}; + +template +class Madd { + public: + static R apply(T first, T second, R third) { + return ((R)first * third) + (R)second; + } + static std::string name() {return "Madd";} +}; + +template +class Nmsac { + public: + static R apply(T first, T second, R third) { + return -((R)first * (R)second) + third; + } + static std::string name() {return "Nmsac";} +}; + +template +class Macc { + public: + static R apply(T first, T second, R third) { + return ((R)first * (R)second) + third; + } + static std::string name() {return "Macc";} +}; + +template +class Maccsu { + public: + static R apply(T first, T second, R third) { + R first_ext = sext((R)first, (sizeof(T) * 8)); + R second_ext = zext((R)second, (sizeof(T) * 8)); + return (first_ext * second_ext) + third; + } + static std::string name() {return "Maccsu";} +}; + +template +class Maccus { + public: + static R apply(T first, T second, R third) { + R first_ext = zext((R)first, (sizeof(T) * 8)); + R second_ext = sext((R)second, (sizeof(T) * 8)); + return (first_ext * second_ext) + third; + } + static std::string name() {return "Maccus";} +}; + +template +class Nmsub { + public: + static R apply(T first, T second, R third) { + return -((R)first * third) + (R)second; + } + static std::string name() {return "Nmsub";} +}; + +template +class Min { + public: + static R apply(T first, T second, R) { + return std::min(first, second); + } + static std::string name() {return "Min";} +}; + +template +class Max { + public: + static R apply(T first, T second, R) { + return std::max(first, second); + } + static std::string name() {return "Max";} +}; + +template +class And { + public: + static R apply(T first, T second, R) { + return first & second; + } + static std::string name() {return "And";} +}; + +template +class Or { + public: + static R apply(T first, T second, R) { + return first | second; + } + static std::string name() {return "Or";} +}; + +template +class Xor { + public: + static R apply(T first, T second, R) { + return first ^ second; + } + static std::string name() {return "Xor";} +}; + +template +class Sll { + public: + static R apply(T first, T second, R) { + // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. + return second << (first & (sizeof(T) * 8 - 1)); + } + static std::string name() {return "Sll";} +}; + +template +bool bitAt(T value, R pos, R negOffset) { + R offsetPos = pos - negOffset; + return pos >= negOffset && ((value >> offsetPos) & 0x1); +} + +template +bool anyBitUpTo(T value, R to, R negOffset) { + R offsetTo = to - negOffset; + return to >= negOffset && (value & (((R)1 << (offsetTo + 1)) - 1)); +} + +template +bool roundBit(T value, R shiftDown, uint32_t vxrm) { + switch (vxrm){ + case 0: // round-to-nearest-up + return bitAt(value, shiftDown, (R)1); + case 1: // round-to-nearest-even + return bitAt(value, shiftDown, (R)1) && (anyBitUpTo(value, shiftDown, (R)2) || bitAt(value, shiftDown, (R)0)); + case 2: // round-down (truncate) + return 0; + case 3: // round-to-odd + return !bitAt(value, shiftDown, (R)0) && anyBitUpTo(value, shiftDown, (R)1); + default: + std::cout << "Roundoff - invalid value for vxrm: " << vxrm << std::endl; + std::abort(); + } +} + +template +class SrlSra { + public: + static R apply(T first, T second, R) { + // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. + return second >> (first & (sizeof(T) * 8 - 1)); + } + static R apply(T first, T second, uint32_t vxrm, uint32_t) { + // Saturation is not relevant for this operation + // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. + T firstValid = first & (sizeof(T) * 8 - 1); + return apply(firstValid, second, 0) + roundBit(second, firstValid, vxrm); + } + static std::string name() {return "SrlSra";} +}; + +template +class Aadd { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t) { + // Saturation is not relevant for this operation + T sum = second + first; + return (sum >> 1) + roundBit(sum, 1, vxrm); + } + static std::string name() {return "Aadd";} +}; + +template +class Asub { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t) { + // Saturation is not relevant for this operation + T difference = second - first; + return (difference >> 1) + roundBit(difference, 1, vxrm); + } + static std::string name() {return "Asub";} +}; + +template +class Eq { + public: + static R apply(T first, T second, R) { + return first == second; + } + static std::string name() {return "Eq";} +}; + +template +class Ne { + public: + static R apply(T first, T second, R) { + return first != second; + } + static std::string name() {return "Ne";} +}; + +template +class Lt { + public: + static R apply(T first, T second, R) { + return first > second; + } + static std::string name() {return "Lt";} +}; + +template +class Le { + public: + static R apply(T first, T second, R) { + return first >= second; + } + static std::string name() {return "Le";} +}; + +template +class Gt { + public: + static R apply(T first, T second, R) { + return first < second; + } + static std::string name() {return "Gt";} +}; + +template +class AndNot { + public: + static R apply(T first, T second, R) { + return second & ~first; + } + static std::string name() {return "AndNot";} +}; + +template +class OrNot { + public: + static R apply(T first, T second, R) { + return second | ~first; + } + static std::string name() {return "OrNot";} +}; + +template +class Nand { + public: + static R apply(T first, T second, R) { + return ~(second & first); + } + static std::string name() {return "Nand";} +}; + +template +class Mv { + public: + static R apply(T first, T, R) { + return first; + } + static std::string name() {return "Mv";} +}; + +template +class Nor { + public: + static R apply(T first, T second, R) { + return ~(second | first); + } + static std::string name() {return "Nor";} +}; + +template +class Xnor { + public: + static R apply(T first, T second, R) { + return ~(second ^ first); + } + static std::string name() {return "Xnor";} +}; + +template +class Fadd { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fadd_s(first, second, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fadd_d(first_d, second_d, frm, &fflags); + } else { + std::cout << "Fadd only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fadd";} +}; + +template +class Fsub { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fsub_s(second, first, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fsub_d(second_d, first_d, frm, &fflags); + } else { + std::cout << "Fsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsub";} +}; + +template +class Fmacc { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fmadd_s(first, second, third, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fmadd_d(first_d, second_d, third, frm, &fflags); + } else { + std::cout << "Fmacc only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmacc";} +}; + +template +class Fnmacc { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fnmadd_s(first, second, third, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fnmadd_d(first_d, second_d, third, frm, &fflags); + } else { + std::cout << "Fnmacc only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmacc";} +}; + +template +class Fmsac { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); + } else { + std::cout << "Fmsac only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmsac";} +}; + +template +class Fnmsac { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fnmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fnmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); + } else { + std::cout << "Fnmsac only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmsac";} +}; + +template +class Fmadd { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fmacc::apply(first, third, second); + } else { + std::cout << "Fmadd only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmadd";} +}; + +template +class Fnmadd { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fnmacc::apply(first, third, second); + } else { + std::cout << "Fnmadd only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmadd";} +}; + +template +class Fmsub { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fmsac::apply(first, third, second); + } else { + std::cout << "Fmsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmsub";} +}; + +template +class Fnmsub { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fnmsac::apply(first, third, second); + } else { + std::cout << "Fnmsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmsub";} +}; + +template +class Fmin { + public: + static R apply(T first, T second, R) { + // ignoring rounding modes for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fmin_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_fmin_d(first, second, &fflags); + } else { + std::cout << "Fmin only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmin";} +}; + +template +class Fmax { + public: + static R apply(T first, T second, R) { + // ignoring rounding modes for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fmax_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_fmax_d(first, second, &fflags); + } else { + std::cout << "Fmax only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmax";} +}; + +template +class Fsgnj { + public: + static R apply(T first, T second, R) { + if (sizeof(T) == 4) { + return rv_fsgnj_s(second, first); + } else if (sizeof(T) == 8) { + return rv_fsgnj_d(second, first); + } else { + std::cout << "Fsgnj only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsgnj";} +}; + +template +class Fsgnjn { + public: + static R apply(T first, T second, R) { + if (sizeof(T) == 4) { + return rv_fsgnjn_s(second, first); + } else if (sizeof(T) == 8) { + return rv_fsgnjn_d(second, first); + } else { + std::cout << "Fsgnjn only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsgnjn";} +}; + +template +class Fsgnjx { + public: + static R apply(T first, T second, R) { + if (sizeof(T) == 4) { + return rv_fsgnjx_s(second, first); + } else if (sizeof(T) == 8) { + return rv_fsgnjx_d(second, first); + } else { + std::cout << "Fsgnjx only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsgnjx";} +}; + +template +class Fcvt { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + switch (first) { + case 0b00000: // vfcvt.xu.f.v + return rv_ftou_s(second, frm, &fflags); + case 0b00001: // vfcvt.x.f.v + return rv_ftoi_s(second, frm, &fflags); + case 0b00010: // vfcvt.f.xu.v + return rv_utof_s(second, frm, &fflags); + case 0b00011: // vfcvt.f.x.v + return rv_itof_s(second, frm, &fflags); + case 0b00110: // vfcvt.rtz.xu.f.v + return rv_ftou_s(second, 1, &fflags); + case 0b00111: // vfcvt.rtz.x.f.v + return rv_ftoi_s(second, 1, &fflags); + case 0b01000: // vfwcvt.xu.f.v + return rv_ftolu_s(second, frm, &fflags); + case 0b01001: // vfwcvt.x.f.v + return rv_ftol_s(second, frm, &fflags); + case 0b01010: // vfwcvt.f.xu.v + return rv_utof_d(second, frm, &fflags); + case 0b01011: // vfwcvt.f.x.v + return rv_itof_d(second, frm, &fflags); + case 0b01100: // vfwcvt.f.f.v + return rv_ftod(second); + case 0b01110: // vfwcvt.rtz.xu.f.v + return rv_ftolu_s(second, 1, &fflags); + case 0b01111: // vfwcvt.rtz.x.f.v + return rv_ftol_s(second, 1, &fflags); + default: + std::cout << "Fcvt has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else if (sizeof(T) == 8) { + switch (first) { + case 0b00000: // vfcvt.xu.f.v + return rv_ftolu_d(second, frm, &fflags); + case 0b00001: // vfcvt.x.f.v + return rv_ftol_d(second, frm, &fflags); + case 0b00010: // vfcvt.f.xu.v + return rv_lutof_d(second, frm, &fflags); + case 0b00011: // vfcvt.f.x.v + return rv_ltof_d(second, frm, &fflags); + case 0b00110: // vfcvt.rtz.xu.f.v + return rv_ftolu_d(second, 1, &fflags); + case 0b00111: // vfcvt.rtz.x.f.v + return rv_ftol_d(second, 1, &fflags); + case 0b01000: // vfwcvt.xu.f.v + case 0b01001: // vfwcvt.x.f.v + case 0b01010: // vfwcvt.f.xu.v + case 0b01011: // vfwcvt.f.x.v + case 0b01100: // vfwcvt.f.f.v + case 0b01110: // vfwcvt.rtz.xu.f.v + case 0b01111: // vfwcvt.rtz.x.f.v + std::cout << "Fwcvt only supports f32" << std::endl; + std::abort(); + default: + std::cout << "Fcvt has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else { + std::cout << "Fcvt only supports f32 and f64" << std::endl; + std::abort(); + } + } + static R apply(T first, T second, uint32_t vxrm, uint32_t &) { // saturation argument is unused + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 8) { + switch (first) { + case 0b10000: // vfncvt.xu.f.w + return rv_ftou_d(second, vxrm, &fflags); + case 0b10001: // vfncvt.x.f.w + return rv_ftoi_d(second, vxrm, &fflags); + case 0b10010: // vfncvt.f.xu.w + return rv_lutof_s(second, vxrm, &fflags); + case 0b10011: // vfncvt.f.x.w + return rv_ltof_s(second, vxrm, &fflags); + case 0b10100: // vfncvt.f.f.w + return rv_dtof_r(second, vxrm); + case 0b10101: // vfncvt.rod.f.f.w + return rv_dtof_r(second, 6); + case 0b10110: // vfncvt.rtz.xu.f.w + return rv_ftou_d(second, 1, &fflags); + case 0b10111: // vfncvt.rtz.x.f.w + return rv_ftoi_d(second, 1, &fflags); + default: + std::cout << "Fncvt has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else { + std::cout << "Fncvt only supports f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fcvt";} +}; + +template +class Funary1 { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + switch (first) { + case 0b00000: // vfsqrt.v + return rv_fsqrt_s(second, frm, &fflags); + case 0b00100: // vfrsqrt7.v + return rv_frsqrt7_s(second, frm, &fflags); + case 0b00101: // vfrec7.v + return rv_frecip7_s(second, frm, &fflags); + case 0b10000: // vfclass.v + return rv_fclss_s(second); + default: + std::cout << "Funary1 has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else if (sizeof(T) == 8) { + switch (first) { + case 0b00000: // vfsqrt.v + return rv_fsqrt_d(second, frm, &fflags); + case 0b00100: // vfrsqrt7.v + return rv_frsqrt7_d(second, frm, &fflags); + case 0b00101: // vfrec7.v + return rv_frecip7_d(second, frm, &fflags); + case 0b10000: // vfclass.v + return rv_fclss_d(second); + default: + std::cout << "Funary1 has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else { + std::cout << "Funary1 only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Funary1";} +}; + +template +class Xunary0 { + public: + static R apply(T, T second, T) { + return second; + } + static std::string name() {return "Xunary0";} +}; + +template +class Feq { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_feq_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return rv_feq_d(second, first, &fflags); + } else { + std::cout << "Feq only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Feq";} +}; + +template +class Fle { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fle_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return rv_fle_d(second, first, &fflags); + } else { + std::cout << "Fle only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fle";} +}; + +template +class Flt { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_flt_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return rv_flt_d(second, first, &fflags); + } else { + std::cout << "Flt only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Flt";} +}; + +template +class Fne { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return !rv_feq_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return !rv_feq_d(second, first, &fflags); + } else { + std::cout << "Fne only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fne";} +}; + +template +class Fgt { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_flt_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_flt_d(first, second, &fflags); + } else { + std::cout << "Fgt only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fgt";} +}; + +template +class Fge { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fle_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_fle_d(first, second, &fflags); + } else { + std::cout << "Fge only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fge";} +}; + +template +class Fdiv { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + return rv_fdiv_s(second, first, frm, &fflags); + } else if (sizeof(T) == 8) { + return rv_fdiv_d(second, first, frm, &fflags); + } else { + std::cout << "Fdiv only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fdiv";} +}; + +template +class Frdiv { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + return rv_fdiv_s(first, second, frm, &fflags); + } else if (sizeof(T) == 8) { + return rv_fdiv_d(first, second, frm, &fflags); + } else { + std::cout << "Frdiv only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Frdiv";} +}; + +template +class Fmul { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fmul_s(first, second, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fmul_d(first_d, second_d, frm, &fflags); + } else { + std::cout << "Fmul only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmul";} +}; + +template +class Frsub { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + return rv_fsub_s(first, second, frm, &fflags); + } else if (sizeof(T) == 8) { + return rv_fsub_d(first, second, frm, &fflags); + } else { + std::cout << "Frsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Frsub";} +}; + +template +class Clip { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { + // The low lg2(2*SEW) bits of the vector or scalar shift-amount value (e.g., the low 6 bits for a SEW=64-bit to + // SEW=32-bit narrowing operation) are used to control the right shift amount, which provides the scaling. + R firstValid = first & (sizeof(T) * 8 - 1); + T unclippedResult = (second >> firstValid) + roundBit(second, firstValid, vxrm); + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Clip";} +}; + +template +class Smul { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { + R shift = sizeof(R) * 8 - 1; + T unshiftedResult = first * second; + T unclippedResult = (unshiftedResult >> shift) + roundBit(unshiftedResult, shift, vxrm); + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Smul";} +}; + +bool isMasked(std::vector> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) { + auto& mask = vreg_file.at(maskVreg); + uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8); + uint8_t value = (emask >> (byteI % 8)) & 0x1; + DP(1, "Masking enabled: " << +!vmask << " mask element: " << +value); + return !vmask && value == 0; +} + +template +uint32_t getVreg(uint32_t baseVreg, uint32_t byteI) { + uint32_t vsew = sizeof(DT) * 8; + return (baseVreg + (byteI / (VLEN / vsew))) % 32; +} + +template +DT &getVregData(std::vector &baseVregVec, uint32_t byteI) { + uint32_t vsew = sizeof(DT) * 8; + return *(DT *)(baseVregVec.data() + (byteI % (VLEN / vsew)) * vsew / 8); +} + +template +DT &getVregData(std::vector> &vreg_file, uint32_t baseVreg, uint32_t byteI) { + auto& vr1 = vreg_file.at(getVreg
(baseVreg, byteI)); + return getVregData
(vr1, byteI); +} + +template +void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + if (nfields * emul > 8) { + std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; + std::abort(); + } + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + uint32_t nfields_strided = strided ? nfields : 1; + Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); + Word mem_data = 0; + emul_->dcache_read(&mem_data, mem_addr, vsew / 8); + DP(1, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); + DP(1, "Previous data: " << +result); + result = (DT) mem_data; + } +} + +void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + case 16: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + case 32: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + case 64: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VLE for vsew: " << vsew << std::endl; + std::abort(); + } +} + +template +void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + if (nfields * emul > 8) { + std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; + std::abort(); + } + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + Word offset = 0; + switch (iSew) { + case 8: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 16: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 32: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 64: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + default: + std::cout << "Unsupported iSew: " << iSew << std::endl; + std::abort(); + } + + Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + offset + (i % nfields) * sizeof(DT); + Word mem_data = 0; + emul_->dcache_read(&mem_data, mem_addr, vsew / 8); + DP(1, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); + DP(1, "Previous data: " << +result); + result = (DT) mem_data; + } +} + +void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + case 16: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + case 32: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + case 64: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VLUX/VLOX for vsew: " << vsew << std::endl; + std::abort(); + } +} + +void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector &rsdata) { + auto &warp = warps_.at(wid); + auto vmask = instr.getVmask(); + auto rdest = instr.getRDest(); + auto mop = instr.getVmop(); + switch (mop) { + case 0b00: { // unit-stride + auto lumop = instr.getVumop(); + switch (lumop) { + case 0b10000: // vle8ff.v, vle16ff.v, vle32ff.v, vle64ff.v - we do not support exceptions -> treat like regular unit stride + // vlseg2e8ff.v, vlseg2e16ff.v, vlseg2e32ff.v, vlseg2e64ff.v + // vlseg3e8ff.v, vlseg3e16ff.v, vlseg3e32ff.v, vlseg3e64ff.v + // vlseg4e8ff.v, vlseg4e16ff.v, vlseg4e32ff.v, vlseg4e64ff.v + // vlseg5e8ff.v, vlseg5e16ff.v, vlseg5e32ff.v, vlseg5e64ff.v + // vlseg6e8ff.v, vlseg6e16ff.v, vlseg6e32ff.v, vlseg6e64ff.v + // vlseg7e8ff.v, vlseg7e16ff.v, vlseg7e32ff.v, vlseg7e64ff.v + // vlseg8e8ff.v, vlseg8e16ff.v, vlseg8e32ff.v, vlseg8e64ff.v + case 0b0000: { // vle8.v, vle16.v, vle32.v, vle64.v + // vlseg2e8.v, vlseg2e16.v, vlseg2e32.v, vlseg2e64.v + // vlseg3e8.v, vlseg3e16.v, vlseg3e32.v, vlseg3e64.v + // vlseg4e8.v, vlseg4e16.v, vlseg4e32.v, vlseg4e64.v + // vlseg5e8.v, vlseg5e16.v, vlseg5e32.v, vlseg5e64.v + // vlseg6e8.v, vlseg6e16.v, vlseg6e32.v, vlseg6e64.v + // vlseg7e8.v, vlseg7e16.v, vlseg7e32.v, vlseg7e64.v + // vlseg8e8.v, vlseg8e16.v, vlseg8e32.v, vlseg8e64.v + WordI stride = warp.vtype.vsew / 8; + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b1000: { // vl1r.v, vl2r.v, vl4r.v, vl8r.v + uint32_t nreg = instr.getVnf() + 1; + if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) { + std::cout << "Whole vector register load - reserved value for nreg: " << nreg << std::endl; + std::abort(); + } + DP(1, "Whole vector register load with nreg: " << nreg); + uint32_t vl = nreg * VLEN / instr.getVsew(); + WordI stride = instr.getVsew() / 8; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, instr.getVsew(), vl, false, stride, 1, 0, vmask); + break; + } + case 0b1011: { // vlm.v + if (warp.vtype.vsew != 8) { + std::cout << "vlm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl; + std::abort(); + } + WordI stride = warp.vtype.vsew / 8; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true); + break; + } + default: + std::cout << "Load vector - unsupported lumop: " << lumop << std::endl; + std::abort(); + } + break; + } + case 0b10: { // strided: vlse8.v, vlse16.v, vlse32.v, vlse64.v + // vlsseg2e8.v, vlsseg2e16.v, vlsseg2e32.v, vlsseg2e64.v + // vlsseg3e8.v, vlsseg3e16.v, vlsseg3e32.v, vlsseg3e64.v + // vlsseg4e8.v, vlsseg4e16.v, vlsseg4e32.v, vlsseg4e64.v + // vlsseg5e8.v, vlsseg5e16.v, vlsseg5e32.v, vlsseg5e64.v + // vlsseg6e8.v, vlsseg6e16.v, vlsseg6e32.v, vlsseg6e64.v + // vlsseg7e8.v, vlsseg7e16.v, vlsseg7e32.v, vlsseg7e64.v + // vlsseg8e8.v, vlsseg8e16.v, vlsseg8e32.v, vlsseg8e64.v + auto rsrc1 = instr.getRSrc(1); + auto rdest = instr.getRDest(); + WordI stride = warp.ireg_file.at(0).at(rsrc1); + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b01: // indexed - unordered, vluxei8.v, vluxei16.v, vluxei32.v, vluxei64.v + // vluxseg2e8.v, vluxseg2e16.v, vluxseg2e32.v, vluxseg2e64.v + // vluxseg3e8.v, vluxseg3e16.v, vluxseg3e32.v, vluxseg3e64.v + // vluxseg4e8.v, vluxseg4e16.v, vluxseg4e32.v, vluxseg4e64.v + // vluxseg5e8.v, vluxseg5e16.v, vluxseg5e32.v, vluxseg5e64.v + // vluxseg6e8.v, vluxseg6e16.v, vluxseg6e32.v, vluxseg6e64.v + // vluxseg7e8.v, vluxseg7e16.v, vluxseg7e32.v, vluxseg7e64.v + // vluxseg8e8.v, vluxseg8e16.v, vluxseg8e32.v, vluxseg8e64.v + case 0b11: { // indexed - ordered, vloxei8.v, vloxei16.v, vloxei32.v, vloxei64.v + // vloxseg2e8.v, vloxseg2e16.v, vloxseg2e32.v, vloxseg2e64.v + // vloxseg3e8.v, vloxseg3e16.v, vloxseg3e32.v, vloxseg3e64.v + // vloxseg4e8.v, vloxseg4e16.v, vloxseg4e32.v, vloxseg4e64.v + // vloxseg5e8.v, vloxseg5e16.v, vloxseg5e32.v, vloxseg5e64.v + // vloxseg6e8.v, vloxseg6e16.v, vloxseg6e32.v, vloxseg6e64.v + // vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v + // vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v + uint32_t nfields = instr.getVnf() + 1; + vector_op_vv_load(warp.vreg_file, this, rsdata, instr.getRSrc(1), rdest, warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask); + break; + } + default: + std::cout << "Load vector - unsupported mop: " << mop << std::endl; + std::abort(); + } +} + +template +void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + uint32_t nfields_strided = strided ? nfields : 1; + Word mem_addr = rsdata[0][0].i + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); + Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); + DP(1, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + emul_->dcache_write(&mem_data, mem_addr, vsew / 8); + } +} + +void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + case 16: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + case 32: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + case 64: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VSE for vsew: " << vsew << std::endl; + std::abort(); + } +} + +template +void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + Word offset = 0; + switch (iSew) { + case 8: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 16: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 32: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 64: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + default: + std::cout << "Unsupported iSew: " << iSew << std::endl; + std::abort(); + } + + Word mem_addr = rsdata[0][0].i + offset + (i % nfields) * sizeof(DT); + Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); + DP(1, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + emul_->dcache_write(&mem_data, mem_addr, vsew / 8); + } +} + +void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + case 16: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + case 32: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + case 64: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VSUX/VSOX for vsew: " << vsew << std::endl; + std::abort(); + } +} + +void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector &rsdata) { + auto &warp = warps_.at(wid); + auto vmask = instr.getVmask(); + auto mop = instr.getVmop(); + switch (mop) { + case 0b00: { // unit-stride + auto vs3 = instr.getRSrc(1); + auto sumop = instr.getVumop(); + WordI stride = warp.vtype.vsew / 8; + switch (sumop) { + case 0b0000: { // vse8.v, vse16.v, vse32.v, vse64.v + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b1000: { // vs1r.v, vs2r.v, vs4r.v, vs8r.v + uint32_t nreg = instr.getVnf() + 1; + if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) { + std::cout << "Whole vector register store - reserved value for nreg: " << nreg << std::endl; + std::abort(); + } + DP(1, "Whole vector register store with nreg: " << nreg); + uint32_t vl = nreg * VLEN / 8; + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, vl, false, stride, 1, 0, vmask); + break; + } + case 0b1011: { // vsm.v + if (warp.vtype.vsew != 8) { + std::cout << "vsm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl; + std::abort(); + } + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true); + break; + } + default: + std::cout << "Store vector - unsupported sumop: " << sumop << std::endl; + std::abort(); + } + break; + } + case 0b10: { // strided: vsse8.v, vsse16.v, vsse32.v, vsse64.v + // vssseg2e8.v, vssseg2e16.v, vssseg2e32.v, vssseg2e64.v + // vssseg3e8.v, vssseg3e16.v, vssseg3e32.v, vssseg3e64.v + // vssseg4e8.v, vssseg4e16.v, vssseg4e32.v, vssseg4e64.v + // vssseg5e8.v, vssseg5e16.v, vssseg5e32.v, vssseg5e64.v + // vssseg6e8.v, vssseg6e16.v, vssseg6e32.v, vssseg6e64.v + // vssseg7e8.v, vssseg7e16.v, vssseg7e32.v, vssseg7e64.v + // vssseg8e8.v, vssseg8e16.v, vssseg8e32.v, vssseg8e64.v + auto rsrc1 = instr.getRSrc(1); + auto vs3 = instr.getRSrc(2); + WordI stride = warp.ireg_file.at(0).at(rsrc1); + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b01: // indexed - unordered, vsuxei8.v, vsuxei16.v, vsuxei32.v, vsuxei64.v + // vsuxseg2ei8.v, vsuxseg2ei16.v, vsuxseg2ei32.v, vsuxseg2ei64.v + // vsuxseg3ei8.v, vsuxseg3ei16.v, vsuxseg3ei32.v, vsuxseg3ei64.v + // vsuxseg4ei8.v, vsuxseg4ei16.v, vsuxseg4ei32.v, vsuxseg4ei64.v + // vsuxseg5ei8.v, vsuxseg5ei16.v, vsuxseg5ei32.v, vsuxseg5ei64.v + // vsuxseg6ei8.v, vsuxseg6ei16.v, vsuxseg6ei32.v, vsuxseg6ei64.v + // vsuxseg7ei8.v, vsuxseg7ei16.v, vsuxseg7ei32.v, vsuxseg7ei64.v + // vsuxseg8ei8.v, vsuxseg8ei16.v, vsuxseg8ei32.v, vsuxseg8ei64.v + case 0b11: { // indexed - ordered, vsoxei8.v, vsoxei16.v, vsoxei32.v, vsoxei64.v + // vsoxseg2ei8.v, vsoxseg2ei16.v, vsoxseg2ei32.v, vsoxseg2ei64.v + // vsoxseg3ei8.v, vsoxseg3ei16.v, vsoxseg3ei32.v, vsoxseg3ei64.v + // vsoxseg4ei8.v, vsoxseg4ei16.v, vsoxseg4ei32.v, vsoxseg4ei64.v + // vsoxseg5ei8.v, vsoxseg5ei16.v, vsoxseg5ei32.v, vsoxseg5ei64.v + // vsoxseg6ei8.v, vsoxseg6ei16.v, vsoxseg6ei32.v, vsoxseg6ei64.v + // vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v + // vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v + uint32_t nfields = instr.getVnf() + 1; + vector_op_vv_store(warp.vreg_file, this, rsdata, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask); + break; + } + default: + std::cout << "Store vector - unsupported mop: " << mop << std::endl; + std::abort(); + } +} + +template