Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
6ef34fc
move calculate_channel_width() to rr_graph.cpp
soheilshahrouz Oct 9, 2025
416ce96
use chan widths extracted from rr garph in drawing
soheilshahrouz Oct 9, 2025
e297202
add rr_chanx/y_list to device context
soheilshahrouz Oct 9, 2025
441400a
use rr extracted chan widths in NetCostHandler::alloc_and_load_chan_w…
soheilshahrouz Oct 9, 2025
ada220f
extend occupancy repots to 3d and use rr graph extracted width
soheilshahrouz Oct 9, 2025
fe0db61
make format
soheilshahrouz Oct 9, 2025
ac62078
add comment explaining dimension sizes of chanx_occ and chany_occ
soheilshahrouz Oct 10, 2025
ef48ddd
remove unused argument from get_channel_occupancy_stats()
soheilshahrouz Oct 10, 2025
4d4fd91
replace i/j with x/y
soheilshahrouz Oct 10, 2025
801a8d8
add comments
soheilshahrouz Oct 10, 2025
b25c626
rename rr_chan?_width --> rr_chan?_segment_width
soheilshahrouz Oct 10, 2025
af94c44
rename rr_chany_list --> rr_chany_width
soheilshahrouz Oct 10, 2025
0a1ff77
Merge remote-tracking branch 'origin/master' into temp_chan_width_rr
soheilshahrouz Oct 10, 2025
9f25742
change seed and update golden resutls for figure8
soheilshahrouz Oct 14, 2025
61cda0e
Merge branch 'master' into temp_chan_width_rr
soheilshahrouz Oct 14, 2025
d849ff6
Update golden results for power_extended_arch_list
soheilshahrouz Oct 14, 2025
4818739
Update golden results for power_extended_circuit_list
soheilshahrouz Oct 15, 2025
d044159
Update golden results
soheilshahrouz Oct 15, 2025
c00c71e
Update golden results for strong_clock_aliases_set_delay
soheilshahrouz Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 119 additions & 116 deletions vpr/src/base/stats.cpp

Large diffs are not rendered by default.

12 changes: 0 additions & 12 deletions vpr/src/base/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@ void routing_stats(const Netlist<>& net_list,
RRSwitchId wire_to_ipin_switch,
bool is_flat);

/**
* @brief Calculates the routing channel width at each grid location.
*
* Iterates through all RR nodes and counts how many wires pass through each (x, y) location
* for both horizontal (CHANX) and vertical (CHANY) channels.
*
* @return A pair of 3D matrices:
* - First: CHANX width per [layer][x][y]
* - Second: CHANY width per [layer][x][y]
*/
std::pair<vtr::NdMatrix<int, 3>, vtr::NdMatrix<int, 3>> calculate_channel_width();

void print_wirelen_prob_dist(bool is_flat);

void print_lambda();
Expand Down
10 changes: 10 additions & 0 deletions vpr/src/base/vpr_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,16 @@ struct DeviceContext : public Context {

int delayless_switch_idx = UNDEFINED;

/// Stores the number of CHANX wire segments in each routing channel segment at [layer][x][y]
vtr::NdMatrix<int, 3> rr_chanx_segment_width;
/// Stores the number of CHANY wire segments in each routing channel segment at [layer][x][y]
vtr::NdMatrix<int, 3> rr_chany_segment_width;

/// Stores the maximum channel segment width in each horizontal channel
std::vector<int> rr_chanx_width;
/// Stores the maximum channel segment width in each vertical channel
std::vector<int> rr_chany_width;

bool rr_graph_is_flat = false;

/*
Expand Down
32 changes: 15 additions & 17 deletions vpr/src/draw/draw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,16 +435,18 @@ void init_draw_coords(float clb_width, const BlkLocRegistry& blk_loc_registry) {
t_draw_state* draw_state = get_draw_state_vars();
t_draw_coords* draw_coords = get_draw_coords_vars();
const DeviceContext& device_ctx = g_vpr_ctx.device();
const DeviceGrid& grid = device_ctx.grid;
const RRGraphView& rr_graph = device_ctx.rr_graph;

/* Store a reference to block location variables so that other drawing
* functions can access block location information without accessing
* the global placement state, which is inaccessible during placement.*/
draw_state->set_graphics_blk_loc_registry_ref(blk_loc_registry);

if (!draw_state->show_graphics && !draw_state->save_graphics
&& draw_state->graphics_commands.empty())
return; //do not initialize only if --disp off and --save_graphics off
// do not initialize only if --disp off and --save_graphics off
if (!draw_state->show_graphics && !draw_state->save_graphics && draw_state->graphics_commands.empty()) {
return;
}

/* Each time routing is on screen, need to reallocate the color of each *
* rr_node, as the number of rr_nodes may change. */
Expand All @@ -466,32 +468,28 @@ void init_draw_coords(float clb_width, const BlkLocRegistry& blk_loc_registry) {
}

size_t j = 0;
for (size_t i = 0; i < (device_ctx.grid.width() - 1); i++) {
for (size_t i = 0; i < grid.width() - 1; i++) {
draw_coords->tile_x[i] = (i * draw_coords->get_tile_width()) + j;
j += device_ctx.chan_width.y_list[i] + 1; /* N wires need N+1 units of space */
j += device_ctx.rr_chany_width[i] + 1; // N wires need N+1 units of space
}
draw_coords->tile_x[device_ctx.grid.width() - 1] = ((device_ctx.grid.width()
- 1)
* draw_coords->get_tile_width())
+ j;
draw_coords->tile_x[grid.width() - 1] = (grid.width() - 1) * draw_coords->get_tile_width() + j;

j = 0;
for (size_t i = 0; i < (device_ctx.grid.height() - 1); ++i) {
for (size_t i = 0; i < device_ctx.grid.height() - 1; ++i) {
draw_coords->tile_y[i] = (i * draw_coords->get_tile_width()) + j;
j += device_ctx.chan_width.x_list[i] + 1;
j += device_ctx.rr_chanx_width[i] + 1;
}
draw_coords->tile_y[device_ctx.grid.height() - 1] = ((device_ctx.grid.height() - 1) * draw_coords->get_tile_width())
+ j;
draw_coords->tile_y[grid.height() - 1] = (grid.height() - 1) * draw_coords->get_tile_width() + j;

/* Load coordinates of sub-blocks inside the clbs */
draw_internal_init_blk();
//Margin beyond edge of the drawn device to extend the visible world
//Setting this to > 0.0 means 'Zoom Fit' leave some fraction of white
//space around the device edges
constexpr float VISIBLE_MARGIN = 0.01;

float draw_width = draw_coords->tile_x[device_ctx.grid.width() - 1]
+ draw_coords->get_tile_width();
float draw_height = draw_coords->tile_y[device_ctx.grid.height() - 1]
+ draw_coords->get_tile_width();
float draw_width = draw_coords->tile_x[grid.width() - 1] + draw_coords->get_tile_width();
float draw_height = draw_coords->tile_y[grid.height() - 1] + draw_coords->get_tile_width();

initial_world = ezgl::rectangle(
{-VISIBLE_MARGIN * draw_width, -VISIBLE_MARGIN * draw_height},
Expand Down
38 changes: 20 additions & 18 deletions vpr/src/place/net_cost_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,33 +143,34 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
const size_t grid_height = device_ctx.grid.height();
const size_t grid_width = device_ctx.grid.width();

/* These arrays contain accumulative channel width between channel zero and
* the channel specified by the given index. The accumulated channel width
* is inclusive, meaning that it includes both channel zero and channel `idx`.
* To compute the total channel width between channels 'low' and 'high', use the
* following formula:
* acc_chan?_width_[high] - acc_chan?_width_[low - 1]
* This returns the total number of tracks between channels 'low' and 'high',
* including tracks in these channels.
*/
// These arrays contain accumulative channel width between channel zero and
// the channel specified by the given index. The accumulated channel width
// is inclusive, meaning that it includes both channel zero and channel `idx`.
// To compute the total channel width between channels 'low' and 'high', use the
// following formula:
// acc_chan?_width_[high] - acc_chan?_width_[low - 1]
// This returns the total number of tracks between channels 'low' and 'high',
// including tracks in these channels.
acc_chanx_width_ = vtr::PrefixSum1D<int>(grid_height, [&](size_t y) noexcept {
int chan_x_width = device_ctx.chan_width.x_list[y];
int chan_x_width = device_ctx.rr_chanx_width[y];

/* If the number of tracks in a channel is zero, two consecutive elements take the same
* value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
* potential issue, we assume that the channel width is at least 1.
*/
if (chan_x_width == 0)
// If the number of tracks in a channel is zero, two consecutive elements take the same
// value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
// potential issue, we assume that the channel width is at least 1.
if (chan_x_width == 0) {
return 1;
}

return chan_x_width;
});

acc_chany_width_ = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept {
int chan_y_width = device_ctx.chan_width.y_list[x];
int chan_y_width = device_ctx.rr_chany_width[x];

// to avoid a division by zero
if (chan_y_width == 0)
if (chan_y_width == 0) {
return 1;
}

return chan_y_width;
});
Expand Down Expand Up @@ -1819,7 +1820,8 @@ std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> NetCostHandler::es
}
}

const auto [chanx_width, chany_width] = calculate_channel_width();
const vtr::NdMatrix<int, 3>& chanx_width = device_ctx.rr_chanx_segment_width;
const vtr::NdMatrix<int, 3>& chany_width = device_ctx.rr_chany_segment_width;

VTR_ASSERT(chanx_util.size() == chany_util.size());
VTR_ASSERT(chanx_util.ndims() == chany_util.ndims());
Expand Down
57 changes: 57 additions & 0 deletions vpr/src/route/rr_graph_generation/rr_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,14 @@ static void build_rr_graph(e_graph_type graph_type,
static int get_delayless_switch_id(const t_det_routing_arch& det_routing_arch,
bool load_rr_graph);

/**
* @brief Calculates the routing channel width at each grid location.
*
* Iterates through all RR nodes and counts how many wires pass through each (layer, x, y) location
* for both horizontal (CHANX) and vertical (CHANY) channels.
*/
static void alloc_and_init_channel_width();

/******************* Subroutine definitions *******************************/

void create_rr_graph(e_graph_type graph_type,
Expand Down Expand Up @@ -533,6 +541,8 @@ void create_rr_graph(e_graph_type graph_type,
device_ctx.rr_graph.rr_nodes(),
is_flat);

alloc_and_init_channel_width();

print_rr_graph_stats();

// Write out rr graph file if needed - Currently, writing the flat rr-graph is not supported since loading from a flat rr-graph is not supported.
Expand Down Expand Up @@ -1116,6 +1126,53 @@ static int get_delayless_switch_id(const t_det_routing_arch& det_routing_arch,
return delayless_switch;
}

static void alloc_and_init_channel_width() {
DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device();
const DeviceGrid& grid = mutable_device_ctx.grid;
const auto& rr_graph = mutable_device_ctx.rr_graph;

vtr::NdMatrix<int, 3>& chanx_width = mutable_device_ctx.rr_chanx_segment_width;
vtr::NdMatrix<int, 3>& chany_width = mutable_device_ctx.rr_chany_segment_width;

chanx_width.resize({grid.get_num_layers(), grid.width(), grid.height()});
chany_width.resize({grid.get_num_layers(), grid.width(), grid.height()});

chanx_width.fill(0);
chany_width.fill(0);

for (RRNodeId node_id : rr_graph.nodes()) {
e_rr_type rr_type = rr_graph.node_type(node_id);

if (rr_type == e_rr_type::CHANX) {
int y = rr_graph.node_ylow(node_id);
int layer = rr_graph.node_layer_low(node_id);
for (int x = rr_graph.node_xlow(node_id); x <= rr_graph.node_xhigh(node_id); x++) {
chanx_width[layer][x][y] += rr_graph.node_capacity(node_id);
}
} else if (rr_type == e_rr_type::CHANY) {
int x = rr_graph.node_xlow(node_id);
int layer = rr_graph.node_layer_low(node_id);
for (int y = rr_graph.node_ylow(node_id); y <= rr_graph.node_yhigh(node_id); y++) {
chany_width[layer][x][y] += rr_graph.node_capacity(node_id);
}
}
}

std::vector<int>& chanx_width_list = mutable_device_ctx.rr_chanx_width;
std::vector<int>& chany_width_list = mutable_device_ctx.rr_chany_width;

chanx_width_list.resize(grid.height());
chany_width_list.resize(grid.width());

std::ranges::fill(chanx_width_list, 0);
std::ranges::fill(chany_width_list, 0);

for (t_physical_tile_loc loc : grid.all_locations()) {
chanx_width_list[loc.y] = std::max(chanx_width[loc.layer_num][loc.x][loc.y], chanx_width_list[loc.y]);
chany_width_list[loc.x] = std::max(chany_width[loc.layer_num][loc.x][loc.y], chany_width_list[loc.x]);
}
}

void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
const t_det_routing_arch& det_routing_arch,
t_physical_tile_type_ptr physical_tile,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time
k4_N10_memSize16384_memData64.xml ch_intrinsics.v common 1.07 vpr 64.88 MiB -1 -1 0.15 28236 3 0.06 -1 -1 36544 -1 -1 72 99 1 0 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 66436 99 130 353 483 1 222 302 13 13 169 clb auto 25.3 MiB 0.03 1748.73 707 29650 4654 11713 13283 64.9 MiB 0.02 0.00 26 1506 9 3.33e+06 2.28e+06 360896. 2135.48 0.38
k4_N10_memSize16384_memData64.xml diffeq1.v common 2.66 vpr 67.86 MiB -1 -1 0.19 32844 23 0.23 -1 -1 37316 -1 -1 74 162 0 5 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 69484 162 96 1186 1127 1 667 337 13 13 169 clb auto 28.2 MiB 0.10 7906.16 4859 81205 21212 54650 5343 67.9 MiB 0.08 0.00 50 9091 14 3.33e+06 2.67e+06 641417. 3795.37 1.37
k4_N10_memSize16384_memData64.xml single_wire.v common 0.31 vpr 62.98 MiB -1 -1 0.05 25804 1 0.01 -1 -1 33044 -1 -1 0 1 0 0 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 64496 1 1 1 2 0 1 2 3 3 9 -1 auto 24.5 MiB 0.00 2 2 3 0 3 0 63.0 MiB 0.00 0.00 2 1 1 30000 0 1489.46 165.495 0.00
k4_N10_memSize16384_memData64.xml single_ff.v common 0.41 vpr 63.02 MiB -1 -1 0.08 26064 1 0.01 -1 -1 33064 -1 -1 1 2 0 0 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 64532 2 1 3 4 1 3 4 3 3 9 -1 auto 24.7 MiB 0.00 6 6 9 6 0 3 63.0 MiB 0.00 0.00 16 5 1 30000 30000 2550.78 283.420 0.00
k4_N10_memSize16384_memData64.xml ch_intrinsics.v common 1.64 vpr 64.26 MiB -1 -1 0.18 21064 3 0.07 -1 -1 32716 -1 -1 72 99 1 0 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 65800 99 130 353 483 1 222 302 13 13 169 clb auto 24.4 MiB 0.03 1748.73 1183 124778 46879 23081 54818 64.3 MiB 0.15 0.00 22 2035 37 3.33e+06 2.28e+06 311708. 1844.43 0.53
k4_N10_memSize16384_memData64.xml diffeq1.v common 2.81 vpr 67.24 MiB -1 -1 0.22 25288 23 0.24 -1 -1 33564 -1 -1 74 162 0 5 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 68852 162 96 1186 1127 1 667 337 13 13 169 clb auto 27.5 MiB 0.12 7906.16 4910 96441 37729 58151 561 67.2 MiB 0.12 0.00 50 9566 16 3.33e+06 2.67e+06 641417. 3795.37 1.17
k4_N10_memSize16384_memData64.xml single_wire.v common 0.52 vpr 61.97 MiB -1 -1 0.06 19028 1 0.02 -1 -1 29568 -1 -1 0 1 0 0 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 63456 1 1 1 2 0 1 2 3 3 9 -1 auto 23.7 MiB 0.00 2 2 3 0 3 0 62.0 MiB 0.00 0.00 2 1 1 30000 0 1489.46 165.495 0.00
k4_N10_memSize16384_memData64.xml single_ff.v common 0.49 vpr 62.35 MiB -1 -1 0.06 19528 1 0.02 -1 -1 29612 -1 -1 1 2 0 0 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 63844 2 1 3 4 1 3 4 3 3 9 -1 auto 23.7 MiB 0.00 6 6 9 3 3 3 62.3 MiB 0.00 0.00 26 15 1 30000 30000 4706.78 522.975 0.01
Loading