@@ -64,7 +64,6 @@ getWarpsPerTile(tt::DotOp dotOp,
64
64
ceil <uint32_t >(dpasCap.repeatCount , dpasCap.executionSize );
65
65
uint32_t colRowRatio =
66
66
ceil <uint32_t >(dpasCap.executionSize , dpasCap.repeatCount );
67
- llvm::errs () << " rowColRation: " << rowColRatio << " , colRowRatio: " << colRowRatio << " , ret: " << ret[0 ] << " , " << ret[1 ] << " \n " ;
68
67
69
68
int rowDim = order[rank - 2 ], colDim = order[rank - 1 ];
70
69
do {
@@ -119,20 +118,19 @@ class BlockedToDPAS : public OpRewritePattern<tt::DotOp> {
119
118
unsigned opsPerChan =
120
119
ttg::intel::DpasEncodingAttr::getOpsPerChannel (elemType);
121
120
122
- SmallVector<unsigned > order = {0 , 1 };
121
+ SmallVector<unsigned > order = {0 , 1 };
123
122
Operation *aOp = a.getDefiningOp ();
124
123
if (isa<ttg::ConvertLayoutOp>(aOp)) {
125
- auto valueToConvert = aOp->getOperand (0 );
126
- aOp = valueToConvert.getDefiningOp ();
124
+ auto valueToConvert = aOp->getOperand (0 );
125
+ aOp = valueToConvert.getDefiningOp ();
127
126
}
128
127
if (aOp && isa<tt::LoadOp>(aOp)) {
129
128
Attribute layout;
130
- assert (aOp->getNumResults () == 1 );
131
- layout =
132
- cast<RankedTensorType>(aOp->getResult (0 ).getType ()).getEncoding ();
129
+ assert (aOp->getNumResults () == 1 );
130
+ layout =
131
+ cast<RankedTensorType>(aOp->getResult (0 ).getType ()).getEncoding ();
133
132
order = triton::gpu::getOrder (layout);
134
133
}
135
- llvm::errs () << " order: " << order[0 ] << " , " << order[1 ] << " \n " ;
136
134
137
135
SmallVector<unsigned > warpsPerTile =
138
136
getWarpsPerTile (dotOp, dpasCap, retShape, numWarps, order);
0 commit comments