2
2
#include " atom_netlist_fwd.h"
3
3
#include " physical_types_util.h"
4
4
#include " place_macro.h"
5
+ #include " vtr_ndmatrix.h"
5
6
#include " vtr_random.h"
6
7
#include " vtr_time.h"
7
8
#include " vpr_types.h"
19
20
20
21
#include < cmath>
21
22
#include < iterator>
23
+ #include < limits>
22
24
#include < optional>
25
+ #include < queue>
23
26
24
27
#ifdef VERBOSE
25
28
void print_clb_placement (const char * fname);
@@ -38,11 +41,6 @@ static constexpr int SORT_WEIGHT_PER_TILES_OUTSIDE_OF_PR = 100;
38
41
// The neighbor location should be within the defined range to the calculated centroid location.
39
42
static constexpr int CENTROID_NEIGHBOR_SEARCH_RLIM = 15 ;
40
43
41
- // The range limit to be used when searcing for a neighbor in the centroid placement when AP is used.
42
- // Since AP is assumed to have a better idea of where clusters should be placed, we want to search more
43
- // places to place a cluster near its solved position before giving up.
44
- static constexpr int CENTROID_NEIGHBOR_SEARCH_RLIM_AP = 60 ;
45
-
46
44
/* *
47
45
* @brief Control routine for placing a macro.
48
46
* First iteration of place_marco performs the following steps to place a macro:
@@ -549,47 +547,188 @@ static std::vector<ClusterBlockId> find_centroid_loc(const t_pl_macro& pl_macro,
549
547
}
550
548
551
549
// TODO: Should this return the unplaced_blocks_to_update_their_score?
552
- static void find_centroid_loc_from_flat_placement (const t_pl_macro& pl_macro,
553
- t_pl_loc& centroid,
554
- const FlatPlacementInfo& flat_placement_info) {
550
+ static t_flat_pl_loc find_centroid_loc_from_flat_placement (const t_pl_macro& pl_macro,
551
+ const FlatPlacementInfo& flat_placement_info) {
555
552
// Use the flat placement to compute the centroid of the given macro.
556
553
// TODO: Instead of averaging, maybe use MODE (most frequently placed location).
557
554
float acc_weight = 0 .f ;
558
- float acc_x = 0 .f ;
559
- float acc_y = 0 .f ;
560
- float acc_layer = 0 .f ;
561
- float acc_sub_tile = 0 .f ;
555
+ t_flat_pl_loc centroid ({0 .0f , 0 .0f , 0 .0f });
562
556
for (const t_pl_macro_member& member : pl_macro.members ) {
563
557
const auto & cluster_atoms = g_vpr_ctx.clustering ().atoms_lookup [member.blk_index ];
564
558
for (AtomBlockId atom_blk_id : cluster_atoms) {
565
559
// TODO: We can get away with using less information.
566
560
VTR_ASSERT (flat_placement_info.blk_x_pos [atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_y_pos [atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_layer [atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_sub_tile [atom_blk_id] != FlatPlacementInfo::UNDEFINED_SUB_TILE);
567
- // TODO: Make this a debug print.
568
- // VTR_LOG("%s ", g_vpr_ctx.atom().netlist().block_name(atom_blk_id).c_str());
569
561
570
562
// Accumulate the x, y, layer, and sub_tile for each atom in each
571
563
// member of the macro. Remove the offset so the centroid would be
572
564
// where the head macro should be placed to put the members in the
573
565
// correct place.
574
- acc_x += flat_placement_info.blk_x_pos [atom_blk_id] - member.offset .x ;
575
- acc_y += flat_placement_info.blk_y_pos [atom_blk_id] - member.offset .y ;
576
- acc_layer += flat_placement_info.blk_layer [atom_blk_id] - member.offset .layer ;
577
- acc_sub_tile += flat_placement_info.blk_sub_tile [atom_blk_id] - member.offset .sub_tile ;
566
+ t_flat_pl_loc cluster_offset ({(float )member.offset .x ,
567
+ (float )member.offset .y ,
568
+ (float )member.offset .layer });
569
+ centroid += flat_placement_info.get_pos (atom_blk_id);
570
+ centroid -= cluster_offset;
578
571
acc_weight++;
579
572
}
580
573
}
581
574
if (acc_weight > 0 .f ) {
582
- // NOTE: We add an offset of 0.5 to prevent us from moving to the tile
583
- // below / to the left due to tiny numerical changes (this
584
- // pretends that each atom is in the center of the tile).
585
- centroid.x = std::floor (acc_x / acc_weight);
586
- centroid.y = std::floor (acc_y / acc_weight);
587
- centroid.layer = std::floor (acc_layer / acc_weight);
588
- centroid.sub_tile = std::floor (acc_sub_tile / acc_weight);
589
-
590
- // TODO: Make this a debug print.
591
- // VTR_LOG("\n\t(%d, %d, %d, %d)\n", centroid.x, centroid.y, centroid.layer, centroid.sub_tile);
575
+ centroid /= acc_weight;
576
+ }
577
+ return centroid;
578
+ }
579
+
580
+ /* *
581
+ * @brief Find the nearest compatible location for the given macro as close to
582
+ * the src_flat_loc as possible.
583
+ *
584
+ * This method uses a BFS to find the closest legal location for the macro.
585
+ *
586
+ * @param src_flat_loc
587
+ * The start location of the BFS. This is given as a flat placement to
588
+ * allow the search to trade-off different location options. For example,
589
+ * if src_loc was (1.6, 1.5), this tells the search that the cluster
590
+ * would prefer to be at tile (1, 1), but if it cannot go there and
591
+ * it had to go to one of the neighbors, it would prefer to be on the
592
+ * right.
593
+ * @param block_type
594
+ * The logical block type of the macro.
595
+ * @param macro
596
+ * The macro to place in the location.
597
+ * @param blk_loc_registry
598
+ *
599
+ * @return Returns the closest legal location found. All of the dimensions will
600
+ * be OPEN if a locations could not be found.
601
+ */
602
+ static inline t_pl_loc find_nearest_compatible_loc (t_flat_pl_loc& src_flat_loc,
603
+ t_logical_block_type_ptr block_type,
604
+ const t_pl_macro& pl_macro,
605
+ const BlkLocRegistry& blk_loc_registry) {
606
+ // This method performs a BFS over the compressed grid. This avoids searching
607
+ // locations which obviously cannot implement this macro.
608
+ const auto & compressed_block_grid = g_vpr_ctx.placement ().compressed_block_grids [block_type->index ];
609
+ const int num_layers = g_vpr_ctx.device ().grid .get_num_layers ();
610
+ // This method does not support 3D FPGAs yet. The search performed will only
611
+ // traverse the same layer as the src_loc.
612
+ VTR_ASSERT (num_layers == 1 );
613
+
614
+ // Get the closest (approximately) compressed location to the src location.
615
+ // This does not need to be perfect (in fact I do not think it is), but the
616
+ // closer it is, the faster the BFS will find the best solution.
617
+ t_physical_tile_loc src_grid_loc (src_flat_loc.x , src_flat_loc.y , src_flat_loc.layer );
618
+ const t_physical_tile_loc compressed_src_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx (src_grid_loc);
619
+
620
+ // Weighted-BFS search the compressed grid for an empty compatible subtile.
621
+ size_t num_rows = compressed_block_grid.get_num_rows (0 );
622
+ size_t num_cols = compressed_block_grid.get_num_columns (0 );
623
+ vtr::NdMatrix<bool , 2 > visited ({num_cols, num_rows}, false );
624
+ float best_dist = std::numeric_limits<float >::max ();
625
+ t_pl_loc best_loc (OPEN, OPEN, OPEN, OPEN);
626
+
627
+ std::queue<t_physical_tile_loc> loc_queue;
628
+ loc_queue.push (compressed_src_loc);
629
+ while (!loc_queue.empty ()) {
630
+ // Pop the top element off the queue.
631
+ t_physical_tile_loc loc = loc_queue.front ();
632
+ loc_queue.pop ();
633
+
634
+ // If this location has already been visited, skip it.
635
+ if (visited[loc.x ][loc.y ])
636
+ continue ;
637
+ visited[loc.x ][loc.y ] = true ;
638
+
639
+ // Get the distance from this loc to the src_loc in grid space
640
+ // Note: In compressed space, distances are not what they appear. We are
641
+ // using the true grid positions to get the truly closest loc.
642
+ // Here, we use the flat centroid of the macro and measure its
643
+ // distance from the center of this grid loc (hence the 0.5 offset).
644
+ auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc (loc);
645
+ float grid_dx = std::abs ((float )grid_loc.x + 0 .5f - src_flat_loc.x );
646
+ float grid_dy = std::abs ((float )grid_loc.y + 0 .5f - src_flat_loc.y );
647
+ float grid_dist = grid_dx + grid_dy;
648
+ // If this distance is worst than the best we have seen or is outside
649
+ // of the search distance, do not explore it or its neighbors.
650
+ // NOTE: This prune is always safe (i.e. it will never remove a better
651
+ // solution) since this is a spatial graph and our objective is
652
+ // positional distance. The un-visitied neighbors of a node should
653
+ // have a higher distance than the current node.
654
+ if (grid_dist >= best_dist)
655
+ continue ;
656
+
657
+ // In order to ensure our BFS finds the closest compatible location, we
658
+ // traverse compressed grid locations which may not actually be valid
659
+ // (i.e. no tile exists there). This is fine, we just need to check for
660
+ // them to ensure we never try to put a cluster there.
661
+ bool is_valid_compressed_loc = false ;
662
+ const auto & compressed_col_blk_map = compressed_block_grid.get_column_block_map (loc.x , 0 );
663
+ if (compressed_col_blk_map.count (loc.y ) != 0 )
664
+ is_valid_compressed_loc = true ;
665
+
666
+ // If this distance is better than the best we have seen so far, try
667
+ // to see if this is a better solution.
668
+ if (is_valid_compressed_loc) {
669
+ // Check if a sub-tile is available at this location.
670
+ const t_physical_tile_type_ptr phy_type = g_vpr_ctx.device ().grid .get_physical_type (grid_loc);
671
+ const auto & compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num (phy_type->index );
672
+ int new_sub_tile = -1 ;
673
+ for (int sub_tile : compatible_sub_tiles) {
674
+ if (blk_loc_registry.grid_blocks ().is_sub_tile_empty (grid_loc, sub_tile)) {
675
+ new_sub_tile = sub_tile;
676
+ break ;
677
+ }
678
+ }
679
+ if (new_sub_tile != -1 ) {
680
+ // If a sub-tile is available, set this to be the first sub-tile
681
+ // available and check if this site is legal for this macro.
682
+ // TODO: Should we pick a random sub-tile instead?
683
+ // Note: We are usin the fully legality check here to check for
684
+ // floorplanning constraints and compatibility for all
685
+ // members of the macro. This prevents some macros being
686
+ // placed where they obviously cannot be implemented.
687
+ t_pl_loc new_loc = t_pl_loc (grid_loc.x , grid_loc.y , new_sub_tile, grid_loc.layer_num );
688
+ bool site_legal_for_macro = macro_can_be_placed (pl_macro,
689
+ new_loc,
690
+ true /* check_all_legality*/ ,
691
+ blk_loc_registry);
692
+ if (site_legal_for_macro) {
693
+ // Update the best solition.
694
+ // Note: We need to keep searching since the compressed grid
695
+ // may present a location which is closer in compressed
696
+ // space earlier than a location which is closer in
697
+ // grid space.
698
+ best_dist = grid_dist;
699
+ best_loc = new_loc;
700
+ }
701
+ }
702
+ }
703
+
704
+ // Push the neighbors (in the compressed grid) onto the queue.
705
+ if (loc.x > 0 ) {
706
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x - 1 ,
707
+ loc.y ,
708
+ loc.layer_num );
709
+ loc_queue.push (new_loc);
710
+ }
711
+ if (loc.x < (int )num_cols - 1 ) {
712
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x + 1 ,
713
+ loc.y ,
714
+ loc.layer_num );
715
+ loc_queue.push (new_loc);
716
+ }
717
+ if (loc.y > 0 ) {
718
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x ,
719
+ loc.y - 1 ,
720
+ loc.layer_num );
721
+ loc_queue.push (new_loc);
722
+ }
723
+ if (loc.y < (int )num_rows - 1 ) {
724
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x ,
725
+ loc.y + 1 ,
726
+ loc.layer_num );
727
+ loc_queue.push (new_loc);
728
+ }
592
729
}
730
+
731
+ return best_loc;
593
732
}
594
733
595
734
static bool try_centroid_placement (const t_pl_macro& pl_macro,
@@ -614,46 +753,24 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro,
614
753
unplaced_blocks_to_update_their_score = find_centroid_loc (pl_macro, centroid_loc, blk_loc_registry);
615
754
found_legal_subtile = find_subtile_in_location (centroid_loc, block_type, blk_loc_registry, pr, rng);
616
755
} else {
617
- // Note: AP uses a different rlim than non-AP
618
- rlim = CENTROID_NEIGHBOR_SEARCH_RLIM_AP;
619
756
// If a flat placement is provided, use the flat placement to get the
620
- // centroid.
621
- find_centroid_loc_from_flat_placement (pl_macro, centroid_loc, flat_placement_info);
622
- if (!is_loc_on_chip ({centroid_loc.x , centroid_loc.y , centroid_loc.layer }) || !is_loc_legal (centroid_loc, pr, block_type)) {
623
- // If the centroid is not legal, check for a neighboring block we
624
- // can use instead.
625
- bool neighbor_legal_loc = find_centroid_neighbor (centroid_loc,
626
- block_type,
627
- false ,
628
- rlim,
629
- blk_loc_registry,
630
- rng);
631
- if (!neighbor_legal_loc) {
632
- // If we cannot find a neighboring block, fall back on the
633
- // original find_centroid_loc function.
634
- // FIXME: We should really just skip this block and come back
635
- // to it later. We do not want it taking space from
636
- // someone else!
637
- unplaced_blocks_to_update_their_score = find_centroid_loc (pl_macro, centroid_loc, blk_loc_registry);
638
- found_legal_subtile = find_subtile_in_location (centroid_loc, block_type, blk_loc_registry, pr, rng);
639
- } else {
640
- found_legal_subtile = true ;
641
- }
757
+ // centroid location of the macro.
758
+ t_flat_pl_loc centroid_flat_loc = find_centroid_loc_from_flat_placement (pl_macro, flat_placement_info);
759
+ // Then find the nearest legal location to this centroid for this macro.
760
+ centroid_loc = find_nearest_compatible_loc (centroid_flat_loc,
761
+ block_type,
762
+ pl_macro,
763
+ blk_loc_registry);
764
+ if (centroid_loc.x == OPEN) {
765
+ // If we cannot find a nearest block, fall back on the original
766
+ // find_centroid_loc function.
767
+ // FIXME: We should really just skip this block and come back
768
+ // to it later. We do not want it taking space from
769
+ // someone else!
770
+ unplaced_blocks_to_update_their_score = find_centroid_loc (pl_macro, centroid_loc, blk_loc_registry);
771
+ found_legal_subtile = find_subtile_in_location (centroid_loc, block_type, blk_loc_registry, pr, rng);
642
772
} else {
643
- // If this is a legal location for this block, check if any other
644
- // blocks are at this subtile location.
645
- const GridBlock& grid_blocks = blk_loc_registry.grid_blocks ();
646
- if (grid_blocks.block_at_location (centroid_loc)) {
647
- // If there is a block at this subtile, try to find another
648
- // subtile at this location to be placed in.
649
- found_legal_subtile = find_subtile_in_location (centroid_loc,
650
- block_type,
651
- blk_loc_registry,
652
- pr,
653
- rng);
654
- } else {
655
- found_legal_subtile = true ;
656
- }
773
+ found_legal_subtile = true ;
657
774
}
658
775
}
659
776
0 commit comments