Skip to content

Commit 2f7f66b

Browse files
bingzheliumeta-codesync[bot]
authored andcommitted
Add log for A2AvDynamic
Summary: Add necessary logs to A2AvDynamic. Only adding to CPU side to avoid performance degradation on GPU. Reviewed By: cenzhaometa Differential Revision: D87947269 fbshipit-source-id: 61df70ce017a96fe140ba633c7c13457c2ad3a57
1 parent 683a441 commit 2f7f66b

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

comms/ctran/algos/AllToAll/AllToAllvDynamicCommon.cc

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,98 @@ static inline commResult_t peerPutContig(
208208
return commSuccess;
209209
}
210210

211+
static inline void printDynamicLog(
212+
const void* const* sendbuffs,
213+
void* const* recvbuffs,
214+
void* recvbuff,
215+
size_t sendcountsLength,
216+
bool combine,
217+
OpElem::opType algoType,
218+
CtranComm* comm,
219+
uint64_t opCount,
220+
int myRank,
221+
int nRanks) {
222+
std::string sendbuffsStr = "", recvbuffsStr = "";
223+
for (int i = 0; i < nRanks; i++) {
224+
sendbuffsStr += fmt::format("sendbuffs[{}] = {}", i, sendbuffs[i]);
225+
if (i < nRanks - 1) {
226+
sendbuffsStr += ", ";
227+
}
228+
if (recvbuffs != nullptr) {
229+
recvbuffsStr += fmt::format("recvbuffs[{}] = {}", i, recvbuffs[i]);
230+
if (i < nRanks - 1) {
231+
recvbuffsStr += ", ";
232+
}
233+
}
234+
}
235+
if (recvbuffs == nullptr) {
236+
recvbuffsStr = "recvbuffs = nullptr";
237+
}
238+
239+
size_t* sendCountsTmpbufCPU =
240+
reinterpret_cast<size_t*>(comm->ctran_->algo->getTmpBuf(
241+
CtranAlgo::TmpbufType::SENDCOUNTS_TMPBUF_CPU));
242+
std::string sendCountsTmpbufCPUStr = "";
243+
for (int i = 0; i < sendcountsLength; i++) {
244+
sendCountsTmpbufCPUStr += std::to_string(sendCountsTmpbufCPU[i]);
245+
if (i < sendcountsLength - 1) {
246+
sendCountsTmpbufCPUStr += ", ";
247+
}
248+
}
249+
250+
CLOGF_TRACE(
251+
COLL,
252+
"ctranAllToAllvDynamicIbImpl {}: opCount {} myRank {} - \n"
253+
"{}\n"
254+
"recvbuff = {}, {}\n"
255+
"sendcounts = [{}]",
256+
combine ? "[combine]" : "[dispatch]",
257+
opCount,
258+
myRank,
259+
sendbuffsStr,
260+
recvbuff,
261+
recvbuffsStr,
262+
sendCountsTmpbufCPUStr);
263+
264+
if (algoType == OpElem::opType::ALLTOALLV_DYNAMIC_SPLIT_NON_CONTIG) {
265+
size_t* sendIndices =
266+
reinterpret_cast<size_t*>(comm->ctran_->algo->getTmpBuf(
267+
CtranAlgo::TmpbufType::SENDINDICES_TMPBUF_CPU));
268+
269+
size_t* sendIndicesBlockLengthsTmpbufCPU =
270+
reinterpret_cast<size_t*>(comm->ctran_->algo->getTmpBuf(
271+
CtranAlgo::TmpbufType::SENDINDICES_BLOCKLEN_TMPBUF_CPU));
272+
273+
std::string sendIndicesStr = "", sendIndicesBlockLengthsStr = "";
274+
int j = 0;
275+
for (int i = 0; i < nRanks; i++) {
276+
sendIndicesBlockLengthsStr +=
277+
std::to_string(sendIndicesBlockLengthsTmpbufCPU[i]);
278+
if (i < nRanks - 1) {
279+
sendIndicesBlockLengthsStr += ", ";
280+
}
281+
for (int k = 0; k < sendIndicesBlockLengthsTmpbufCPU[i]; k++) {
282+
sendIndicesStr += std::to_string(sendIndices[j]);
283+
if ((k < sendIndicesBlockLengthsTmpbufCPU[i] - 1) || (i < nRanks - 1)) {
284+
sendIndicesStr += ", ";
285+
}
286+
j++;
287+
}
288+
}
289+
290+
CLOGF_TRACE(
291+
COLL,
292+
"ctranAllToAllvDynamicIbImpl {}: opCount {} myRank {} - "
293+
"sendIndices = [{}], "
294+
"sendIndicesBlockLengths = [{}]",
295+
combine ? "[combine]" : "[dispatch]",
296+
opCount,
297+
myRank,
298+
sendIndicesStr,
299+
sendIndicesBlockLengthsStr);
300+
}
301+
}
302+
211303
commResult_t ctranAllToAllvDynamicIbImpl(
212304
const void* const* sendbuffs,
213305
void* const* recvbuffs,
@@ -225,6 +317,29 @@ commResult_t ctranAllToAllvDynamicIbImpl(
225317
const int myRank = statex->rank();
226318
const int nRanks = statex->nRanks();
227319

320+
CLOGF_SUBSYS(
321+
INFO,
322+
COLL,
323+
"Entered ctranAllToAllvDynamicIbImpl {}: myRank {} nRanks {}",
324+
combine ? "[combine]" : "[dispatch]",
325+
myRank,
326+
nRanks);
327+
328+
// Debug logging: print parameter values when debug mode is enabled
329+
if (NCCL_CTRAN_ENABLE_TRACE_LOG) {
330+
printDynamicLog(
331+
sendbuffs,
332+
recvbuffs,
333+
recvbuff,
334+
sendcountsLength,
335+
combine,
336+
algoType,
337+
comm,
338+
comm->ctran_->getOpCount(),
339+
myRank,
340+
nRanks);
341+
}
342+
228343
std::vector<void*> remoteRecvBuffs(nRanks);
229344
std::vector<struct CtranMapperRemoteAccessKey> remoteAccessKeys(nRanks);
230345

@@ -302,6 +417,14 @@ commResult_t ctranAllToAllvDynamicIbImpl(
302417
FB_COMMCHECK(comm->ctran_->mapper->deregDynamic(hdl));
303418
}
304419

420+
CLOGF_SUBSYS(
421+
INFO,
422+
COLL,
423+
"Finished ctranAllToAllvDynamicIbImpl {}: myRank {} nRanks {}",
424+
combine ? "[combine]" : "[dispatch]",
425+
myRank,
426+
nRanks);
427+
305428
return commSuccess;
306429
}
307430

comms/ctran/algos/AllToAll/AllToallvDynamicSplitNonContig.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ commResult_t ctranAlltoallvDynamicSplitNonContig(
3030
cudaStream_t stream,
3131
bool combine,
3232
size_t* outputChunkSizesPerRank) {
33+
CLOGF_SUBSYS(
34+
INFO,
35+
COLL,
36+
"Entered ctranAlltoallvDynamicSplitNonContig {}: myRank {} nRanks {}",
37+
combine ? "[combine]" : "[dispatch]",
38+
comm->statex_->rank(),
39+
comm->statex_->nRanks());
40+
3341
auto opCount = comm->ctran_->getOpCount();
3442
FB_COMMCHECK(comm->ctran_->algo->initTmpBufs());
3543

@@ -127,5 +135,13 @@ commResult_t ctranAlltoallvDynamicSplitNonContig(
127135
std::nullopt, /* timeout */
128136
graphPrepareFn));
129137

138+
CLOGF_SUBSYS(
139+
INFO,
140+
COLL,
141+
"Enqueued AlltoallvDynamicSplitNonContig {}: myRank {} nRanks {}",
142+
combine ? "[combine]" : "[dispatch]",
143+
comm->statex_->rank(),
144+
comm->statex_->nRanks());
145+
130146
return commSuccess;
131147
}

0 commit comments

Comments
 (0)