Skip to content

Commit 56a8b86

Browse files
[AP][InitialPlacement] Improved Initial Placement
Found that the Initial Placer stage of the AP flow (after APPack, but before Detailed Placement) was not working as expected. The intention was that clusters would be placed at their centroid location accordin to the flat placement, and if that site was illegal or taken it would take a nearby point instead (falling back on the original initial placer if nothing can be found). To achieve this, I was using a method called find_centroid_neighbor which I thought would return the nearest legal location to the given location. This was not correct. This method just creates a bounding-box and tries to find a random point in that box around the given point. This was causing our AP flow to move clusters WAY farther than they wanted, which moved them into places other clusters wanted to go. This was also not exhaustive, so it was often falling back on the original approach which was putting clusters in practically random locations. All of this was causing the post-FL placement from the AP flow to actually have worse quality than the default AP flow! To resolve this, I wrote the actual method I was intending. It performs a BFS-style search from the src location to all legal locations and returns the closest one. By doing this BFS on the compressed grid, I found that this is actually quite efficient. With these changes, I found that the quality of the post-FL placement more than doubled and the average atom displacement from the GP solution decrease dramatically.
1 parent bae777e commit 56a8b86

File tree

3 files changed

+198
-71
lines changed

3 files changed

+198
-71
lines changed

vpr/src/base/flat_placement_types.h

+10
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ struct t_flat_pl_loc {
3535
return *this;
3636
}
3737

38+
/**
39+
* @brief Subtracts the coordinates of another t_flat_pl_loc to this one.
40+
*/
41+
t_flat_pl_loc& operator-=(const t_flat_pl_loc& other) {
42+
x -= other.x;
43+
y -= other.y;
44+
layer -= other.layer;
45+
return *this;
46+
}
47+
3848
/**
3949
* @brief Divides the coordinates of this t_flat_pl_loc by a divisor.
4050
*

vpr/src/place/initial_placement.cpp

+183-66
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "atom_netlist_fwd.h"
33
#include "physical_types_util.h"
44
#include "place_macro.h"
5+
#include "vtr_ndmatrix.h"
56
#include "vtr_random.h"
67
#include "vtr_time.h"
78
#include "vpr_types.h"
@@ -19,7 +20,9 @@
1920

2021
#include <cmath>
2122
#include <iterator>
23+
#include <limits>
2224
#include <optional>
25+
#include <queue>
2326

2427
#ifdef VERBOSE
2528
void print_clb_placement(const char* fname);
@@ -38,11 +41,6 @@ static constexpr int SORT_WEIGHT_PER_TILES_OUTSIDE_OF_PR = 100;
3841
// The neighbor location should be within the defined range to the calculated centroid location.
3942
static constexpr int CENTROID_NEIGHBOR_SEARCH_RLIM = 15;
4043

41-
// The range limit to be used when searcing for a neighbor in the centroid placement when AP is used.
42-
// Since AP is assumed to have a better idea of where clusters should be placed, we want to search more
43-
// places to place a cluster near its solved position before giving up.
44-
static constexpr int CENTROID_NEIGHBOR_SEARCH_RLIM_AP = 60;
45-
4644
/**
4745
* @brief Control routine for placing a macro.
4846
* First iteration of place_marco performs the following steps to place a macro:
@@ -549,47 +547,188 @@ static std::vector<ClusterBlockId> find_centroid_loc(const t_pl_macro& pl_macro,
549547
}
550548

551549
// TODO: Should this return the unplaced_blocks_to_update_their_score?
552-
static void find_centroid_loc_from_flat_placement(const t_pl_macro& pl_macro,
553-
t_pl_loc& centroid,
554-
const FlatPlacementInfo& flat_placement_info) {
550+
static t_flat_pl_loc find_centroid_loc_from_flat_placement(const t_pl_macro& pl_macro,
551+
const FlatPlacementInfo& flat_placement_info) {
555552
// Use the flat placement to compute the centroid of the given macro.
556553
// TODO: Instead of averaging, maybe use MODE (most frequently placed location).
557554
float acc_weight = 0.f;
558-
float acc_x = 0.f;
559-
float acc_y = 0.f;
560-
float acc_layer = 0.f;
561-
float acc_sub_tile = 0.f;
555+
t_flat_pl_loc centroid({0.0f, 0.0f, 0.0f});
562556
for (const t_pl_macro_member& member : pl_macro.members) {
563557
const auto& cluster_atoms = g_vpr_ctx.clustering().atoms_lookup[member.blk_index];
564558
for (AtomBlockId atom_blk_id : cluster_atoms) {
565559
// TODO: We can get away with using less information.
566560
VTR_ASSERT(flat_placement_info.blk_x_pos[atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_y_pos[atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_layer[atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_sub_tile[atom_blk_id] != FlatPlacementInfo::UNDEFINED_SUB_TILE);
567-
// TODO: Make this a debug print.
568-
// VTR_LOG("%s ", g_vpr_ctx.atom().netlist().block_name(atom_blk_id).c_str());
569561

570562
// Accumulate the x, y, layer, and sub_tile for each atom in each
571563
// member of the macro. Remove the offset so the centroid would be
572564
// where the head macro should be placed to put the members in the
573565
// correct place.
574-
acc_x += flat_placement_info.blk_x_pos[atom_blk_id] - member.offset.x;
575-
acc_y += flat_placement_info.blk_y_pos[atom_blk_id] - member.offset.y;
576-
acc_layer += flat_placement_info.blk_layer[atom_blk_id] - member.offset.layer;
577-
acc_sub_tile += flat_placement_info.blk_sub_tile[atom_blk_id] - member.offset.sub_tile;
566+
t_flat_pl_loc cluster_offset({(float)member.offset.x,
567+
(float)member.offset.y,
568+
(float)member.offset.layer});
569+
centroid += flat_placement_info.get_pos(atom_blk_id);
570+
centroid -= cluster_offset;
578571
acc_weight++;
579572
}
580573
}
581574
if (acc_weight > 0.f) {
582-
// NOTE: We add an offset of 0.5 to prevent us from moving to the tile
583-
// below / to the left due to tiny numerical changes (this
584-
// pretends that each atom is in the center of the tile).
585-
centroid.x = std::floor(acc_x / acc_weight);
586-
centroid.y = std::floor(acc_y / acc_weight);
587-
centroid.layer = std::floor(acc_layer / acc_weight);
588-
centroid.sub_tile = std::floor(acc_sub_tile / acc_weight);
589-
590-
// TODO: Make this a debug print.
591-
// VTR_LOG("\n\t(%d, %d, %d, %d)\n", centroid.x, centroid.y, centroid.layer, centroid.sub_tile);
575+
centroid /= acc_weight;
576+
}
577+
return centroid;
578+
}
579+
580+
/**
581+
* @brief Find the nearest compatible location for the given macro as close to
582+
* the src_flat_loc as possible.
583+
*
584+
* This method uses a BFS to find the closest legal location for the macro.
585+
*
586+
* @param src_flat_loc
587+
* The start location of the BFS. This is given as a flat placement to
588+
* allow the search to trade-off different location options. For example,
589+
* if src_loc was (1.6, 1.5), this tells the search that the cluster
590+
* would prefer to be at tile (1, 1), but if it cannot go there and
591+
* it had to go to one of the neighbors, it would prefer to be on the
592+
* right.
593+
* @param block_type
594+
* The logical block type of the macro.
595+
* @param macro
596+
* The macro to place in the location.
597+
* @param blk_loc_registry
598+
*
599+
* @return Returns the closest legal location found. All of the dimensions will
600+
* be OPEN if a locations could not be found.
601+
*/
602+
static inline t_pl_loc find_nearest_compatible_loc(t_flat_pl_loc& src_flat_loc,
603+
t_logical_block_type_ptr block_type,
604+
const t_pl_macro& pl_macro,
605+
const BlkLocRegistry& blk_loc_registry) {
606+
// This method performs a BFS over the compressed grid. This avoids searching
607+
// locations which obviously cannot implement this macro.
608+
const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index];
609+
const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
610+
// This method does not support 3D FPGAs yet. The search performed will only
611+
// traverse the same layer as the src_loc.
612+
VTR_ASSERT(num_layers == 1);
613+
614+
// Get the closest (approximately) compressed location to the src location.
615+
// This does not need to be perfect (in fact I do not think it is), but the
616+
// closer it is, the faster the BFS will find the best solution.
617+
t_physical_tile_loc src_grid_loc(src_flat_loc.x, src_flat_loc.y, src_flat_loc.layer);
618+
const t_physical_tile_loc compressed_src_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx(src_grid_loc);
619+
620+
// Weighted-BFS search the compressed grid for an empty compatible subtile.
621+
size_t num_rows = compressed_block_grid.get_num_rows(0);
622+
size_t num_cols = compressed_block_grid.get_num_columns(0);
623+
vtr::NdMatrix<bool, 2> visited({num_cols, num_rows}, false);
624+
float best_dist = std::numeric_limits<float>::max();
625+
t_pl_loc best_loc(OPEN, OPEN, OPEN, OPEN);
626+
627+
std::queue<t_physical_tile_loc> loc_queue;
628+
loc_queue.push(compressed_src_loc);
629+
while (!loc_queue.empty()) {
630+
// Pop the top element off the queue.
631+
t_physical_tile_loc loc = loc_queue.front();
632+
loc_queue.pop();
633+
634+
// If this location has already been visited, skip it.
635+
if (visited[loc.x][loc.y])
636+
continue;
637+
visited[loc.x][loc.y] = true;
638+
639+
// Get the distance from this loc to the src_loc in grid space
640+
// Note: In compressed space, distances are not what they appear. We are
641+
// using the true grid positions to get the truly closest loc.
642+
// Here, we use the flat centroid of the macro and measure its
643+
// distance from the center of this grid loc (hence the 0.5 offset).
644+
auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc(loc);
645+
float grid_dx = std::abs((float)grid_loc.x + 0.5f - src_flat_loc.x);
646+
float grid_dy = std::abs((float)grid_loc.y + 0.5f - src_flat_loc.y);
647+
float grid_dist = grid_dx + grid_dy;
648+
// If this distance is worst than the best we have seen or is outside
649+
// of the search distance, do not explore it or its neighbors.
650+
// NOTE: This prune is always safe (i.e. it will never remove a better
651+
// solution) since this is a spatial graph and our objective is
652+
// positional distance. The un-visitied neighbors of a node should
653+
// have a higher distance than the current node.
654+
if (grid_dist >= best_dist)
655+
continue;
656+
657+
// In order to ensure our BFS finds the closest compatible location, we
658+
// traverse compressed grid locations which may not actually be valid
659+
// (i.e. no tile exists there). This is fine, we just need to check for
660+
// them to ensure we never try to put a cluster there.
661+
bool is_valid_compressed_loc = false;
662+
const auto& compressed_col_blk_map = compressed_block_grid.get_column_block_map(loc.x, 0);
663+
if (compressed_col_blk_map.count(loc.y) != 0)
664+
is_valid_compressed_loc = true;
665+
666+
// If this distance is better than the best we have seen so far, try
667+
// to see if this is a better solution.
668+
if (is_valid_compressed_loc) {
669+
// Check if a sub-tile is available at this location.
670+
const t_physical_tile_type_ptr phy_type = g_vpr_ctx.device().grid.get_physical_type(grid_loc);
671+
const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(phy_type->index);
672+
int new_sub_tile = -1;
673+
for (int sub_tile : compatible_sub_tiles) {
674+
if (blk_loc_registry.grid_blocks().is_sub_tile_empty(grid_loc, sub_tile)) {
675+
new_sub_tile = sub_tile;
676+
break;
677+
}
678+
}
679+
if (new_sub_tile != -1) {
680+
// If a sub-tile is available, set this to be the first sub-tile
681+
// available and check if this site is legal for this macro.
682+
// TODO: Should we pick a random sub-tile instead?
683+
// Note: We are usin the fully legality check here to check for
684+
// floorplanning constraints and compatibility for all
685+
// members of the macro. This prevents some macros being
686+
// placed where they obviously cannot be implemented.
687+
t_pl_loc new_loc = t_pl_loc(grid_loc.x, grid_loc.y, new_sub_tile, grid_loc.layer_num);
688+
bool site_legal_for_macro = macro_can_be_placed(pl_macro,
689+
new_loc,
690+
true /*check_all_legality*/,
691+
blk_loc_registry);
692+
if (site_legal_for_macro) {
693+
// Update the best solition.
694+
// Note: We need to keep searching since the compressed grid
695+
// may present a location which is closer in compressed
696+
// space earlier than a location which is closer in
697+
// grid space.
698+
best_dist = grid_dist;
699+
best_loc = new_loc;
700+
}
701+
}
702+
}
703+
704+
// Push the neighbors (in the compressed grid) onto the queue.
705+
if (loc.x > 0) {
706+
t_physical_tile_loc new_loc = t_physical_tile_loc(loc.x - 1,
707+
loc.y,
708+
loc.layer_num);
709+
loc_queue.push(new_loc);
710+
}
711+
if (loc.x < (int)num_cols - 1) {
712+
t_physical_tile_loc new_loc = t_physical_tile_loc(loc.x + 1,
713+
loc.y,
714+
loc.layer_num);
715+
loc_queue.push(new_loc);
716+
}
717+
if (loc.y > 0) {
718+
t_physical_tile_loc new_loc = t_physical_tile_loc(loc.x,
719+
loc.y - 1,
720+
loc.layer_num);
721+
loc_queue.push(new_loc);
722+
}
723+
if (loc.y < (int)num_rows - 1) {
724+
t_physical_tile_loc new_loc = t_physical_tile_loc(loc.x,
725+
loc.y + 1,
726+
loc.layer_num);
727+
loc_queue.push(new_loc);
728+
}
592729
}
730+
731+
return best_loc;
593732
}
594733

595734
static bool try_centroid_placement(const t_pl_macro& pl_macro,
@@ -614,46 +753,24 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro,
614753
unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry);
615754
found_legal_subtile = find_subtile_in_location(centroid_loc, block_type, blk_loc_registry, pr, rng);
616755
} else {
617-
// Note: AP uses a different rlim than non-AP
618-
rlim = CENTROID_NEIGHBOR_SEARCH_RLIM_AP;
619756
// If a flat placement is provided, use the flat placement to get the
620-
// centroid.
621-
find_centroid_loc_from_flat_placement(pl_macro, centroid_loc, flat_placement_info);
622-
if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer}) || !is_loc_legal(centroid_loc, pr, block_type)) {
623-
// If the centroid is not legal, check for a neighboring block we
624-
// can use instead.
625-
bool neighbor_legal_loc = find_centroid_neighbor(centroid_loc,
626-
block_type,
627-
false,
628-
rlim,
629-
blk_loc_registry,
630-
rng);
631-
if (!neighbor_legal_loc) {
632-
// If we cannot find a neighboring block, fall back on the
633-
// original find_centroid_loc function.
634-
// FIXME: We should really just skip this block and come back
635-
// to it later. We do not want it taking space from
636-
// someone else!
637-
unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry);
638-
found_legal_subtile = find_subtile_in_location(centroid_loc, block_type, blk_loc_registry, pr, rng);
639-
} else {
640-
found_legal_subtile = true;
641-
}
757+
// centroid location of the macro.
758+
t_flat_pl_loc centroid_flat_loc = find_centroid_loc_from_flat_placement(pl_macro, flat_placement_info);
759+
// Then find the nearest legal location to this centroid for this macro.
760+
centroid_loc = find_nearest_compatible_loc(centroid_flat_loc,
761+
block_type,
762+
pl_macro,
763+
blk_loc_registry);
764+
if (centroid_loc.x == OPEN) {
765+
// If we cannot find a nearest block, fall back on the original
766+
// find_centroid_loc function.
767+
// FIXME: We should really just skip this block and come back
768+
// to it later. We do not want it taking space from
769+
// someone else!
770+
unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry);
771+
found_legal_subtile = find_subtile_in_location(centroid_loc, block_type, blk_loc_registry, pr, rng);
642772
} else {
643-
// If this is a legal location for this block, check if any other
644-
// blocks are at this subtile location.
645-
const GridBlock& grid_blocks = blk_loc_registry.grid_blocks();
646-
if (grid_blocks.block_at_location(centroid_loc)) {
647-
// If there is a block at this subtile, try to find another
648-
// subtile at this location to be placed in.
649-
found_legal_subtile = find_subtile_in_location(centroid_loc,
650-
block_type,
651-
blk_loc_registry,
652-
pr,
653-
rng);
654-
} else {
655-
found_legal_subtile = true;
656-
}
773+
found_legal_subtile = true;
657774
}
658775
}
659776

0 commit comments

Comments
 (0)