Skip to content

Commit

Permalink
Refactor dispatch latency test and fix several bugs.
Browse files Browse the repository at this point in the history
  • Loading branch information
bensander committed Aug 17, 2017
1 parent f34b89b commit e87f7a4
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 116 deletions.
29 changes: 22 additions & 7 deletions samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,23 @@

using namespace std;

#define SORT_BY_NAME 0
#define SORT_RETAIN_ATTS_ORDER 1


bool ResultDatabase::Result::operator<(const Result &rhs) const
{
if (test < rhs.test)
return true;
if (test > rhs.test)
return false;
#if (SORT_RETAIN_ATTS_ORDER == 0)
// For ties, sort by the value of the attribute:
if (atts < rhs.atts)
return true;
if (atts > rhs.atts)
return false;
#endif
return false; // less-operator returns false on equal
}

Expand Down Expand Up @@ -189,7 +196,10 @@ void ResultDatabase::AddResult(const string &test_orig,
void ResultDatabase::DumpDetailed(ostream &out)
{
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());

#if SORT_BY_NAME
stable_sort(sorted.begin(), sorted.end());
#endif

const int testNameW = 24 ;
const int attW = 12;
Expand Down Expand Up @@ -283,12 +293,15 @@ void ResultDatabase::DumpDetailed(ostream &out)
void ResultDatabase::DumpSummary(ostream &out)
{
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());

const int testNameW = 24 ;
#if SORT_BY_NAME
stable_sort(sorted.begin(), sorted.end());
#endif

const int testNameW = 32 ;
const int attW = 12;
const int fieldW = 9;
out << std::fixed << right << std::setprecision(4);
out << std::fixed << right << std::setprecision(2);

// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
Expand Down Expand Up @@ -334,8 +347,8 @@ void ResultDatabase::DumpSummary(ostream &out)
}
if (0) {
out << endl
<< "Note: results marked with (*) had missing values such as" << endl
<< "might occur with a mixture of architectural capabilities." << endl;
<< "Note: results marked with (*) had missing values such as" << endl
<< "might occur with a mixture of architectural capabilities." << endl;
}
}

Expand Down Expand Up @@ -381,7 +394,9 @@ void ResultDatabase::DumpCsv(string fileName)
bool emptyFile;
vector<Result> sorted(results);

sort(sorted.begin(), sorted.end());
#if SORT_BY_NAME
stable_sort(sorted.begin(), sorted.end());
#endif

//Check to see if the file is empty - if so, add the headers
emptyFile = this->IsFileEmpty(fileName);
Expand Down
227 changes: 118 additions & 109 deletions samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,27 @@ THE SOFTWARE.
#include<time.h>
#include"ResultDatabase.h"

#define check(msg, status) \
if(status != hipSuccess){ \
printf("%s failed.\n",#msg); \
exit(1); \
#define PRINT_PROGRESS 0

#define check(cmd) \
{\
hipError_t status = cmd;\
if(status != hipSuccess){ \
printf("error: '%s'(%d) from %s at %s:%d\n", \
hipGetErrorString(status), status, #cmd,\
__FILE__, __LINE__); \
abort(); \
}\
}

#define LEN 1024*1024
#define SIZE LEN * sizeof(float)
#define ITER 10120

#define NUM_GROUPS 1
#define GROUP_SIZE 64
#define TEST_ITERS 20
#define DISPATCHES_PER_TEST 100

const unsigned p_tests = 0xfffffff;


// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
Expand All @@ -44,115 +56,112 @@ __global__ void NearlyNull(hipLaunchParm lp, float* Ad){
}


ResultDatabase resultDB;


void stopTest(hipEvent_t start, hipEvent_t stop, const char *msg, int iters)
{
float mS = 0;
check(hipEventRecord(stop));
check(hipDeviceSynchronize());
check(hipEventElapsedTime(&mS, start, stop));
resultDB.AddResult(std::string(msg), "", "uS", mS*1000/iters);
if (PRINT_PROGRESS & 0x1 ) {
std::cout<< msg <<"\t\t"<<mS*1000/iters<<" uS"<<std::endl;
}
if (PRINT_PROGRESS & 0x2 ) {
resultDB.DumpSummary(std::cout);
}
}


int main(){

hipError_t err;
float *A;
float *Ad = NULL;

A = new float[LEN];
float *Ad;
check(hipMalloc(&Ad, 4));

for(int i=0;i<LEN;i++){
A[i] = 1.0f;
}

hipStream_t stream;
err = hipStreamCreate(&stream);
check("Creating stream",err);
check(hipStreamCreate(&stream));

//err = hipMalloc(&Ad, SIZE);
//check("Allocating Ad memory on device", err);
//err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
//check("Doing memory copy from A to Ad", err);

float mS = 0;
hipEvent_t start, stop;
hipEventCreate(&start);
hipEventCreate(&stop);

ResultDatabase resultDB[8];


hipEventRecord(start);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000);
// std::cout<<"First Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
resultDB[0].DumpSummary(std::cout);
hipEventRecord(start);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[1].AddResult(std::string("Second Kernel Launch"), "", "uS", mS*1000);
// std::cout<<"Second Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
resultDB[1].DumpSummary(std::cout);
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
}
hipDeviceSynchronize();
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[2].AddResult(std::string("NULL Stream Sync dispatch wait"), "", "uS", mS*1000/ITER);
resultDB[2].DumpSummary(std::cout);
// std::cout<<"NULL Stream Sync dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
hipDeviceSynchronize();

hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
}
hipEventRecord(stop);
hipDeviceSynchronize();
hipEventElapsedTime(&mS, start, stop);
resultDB[3].AddResult(std::string("NULL Stream Async dispatch wait"), "", "uS", mS*1000/ITER);
resultDB[3].DumpSummary(std::cout);
// std::cout<<"NULL Stream Async dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
hipDeviceSynchronize();

hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
hipDeviceSynchronize();
}
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[4].AddResult(std::string("Stream Sync dispatch wait"), "", "uS", mS*1000/ITER);
resultDB[4].DumpSummary(std::cout);
// std::cout<<"Stream Sync dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
hipDeviceSynchronize();
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
}
hipDeviceSynchronize();
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[5].AddResult(std::string("Stream Async dispatch wait"), "", "uS", mS*1000/ITER);
// std::cout<<"Stream Async dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
resultDB[5].DumpSummary(std::cout);
hipDeviceSynchronize();

hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
}
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[6].AddResult(std::string("NULL Stream No Wait"), "", "uS", mS*1000/ITER);
resultDB[6].DumpSummary(std::cout);
// std::cout<<"NULL Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
hipDeviceSynchronize();

hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
}
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[7].AddResult(std::string("Stream Dispatch No Wait"), "", "uS", mS*1000/ITER);
resultDB[7].DumpSummary(std::cout);
// std::cout<<"Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
hipDeviceSynchronize();
hipEvent_t start, sync, stop;
check(hipEventCreate(&start));
check(hipEventCreateWithFlags(&sync, hipEventBlockingSync));
check(hipEventCreate(&stop));



hipStream_t stream0 = 0;


if (p_tests & 0x1) {
hipEventRecord(start);
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
stopTest(start, stop, "FirstKernelLaunch", 1);
}



if (p_tests & 0x2) {
hipEventRecord(start);
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
stopTest(start, stop, "SecondKernelLaunch", 1);
}


if (p_tests & 0x4) {
for (int t=0; t<TEST_ITERS; t++) {
hipEventRecord(start);
for(int i=0;i<DISPATCHES_PER_TEST;i++){
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
hipEventRecord(sync);
hipEventSynchronize(sync);
}
stopTest(start, stop, "NullStreamASyncDispatchWait", DISPATCHES_PER_TEST);
}
}


if (p_tests & 0x10) {
for (int t=0; t<TEST_ITERS; t++) {
hipEventRecord(start);
for(int i=0;i<DISPATCHES_PER_TEST;i++){
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
hipEventRecord(sync);
hipEventSynchronize(sync);
}
stopTest(start, stop, "StreamASyncDispatchWait", DISPATCHES_PER_TEST);
}
}

#if 1

if (p_tests & 0x40) {
for (int t=0; t<TEST_ITERS; t++) {
hipEventRecord(start);
for(int i=0;i<DISPATCHES_PER_TEST;i++){
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
}
stopTest(start, stop, "NullStreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
}
}

if (p_tests & 0x80) {
for (int t=0; t<TEST_ITERS; t++) {
hipEventRecord(start);
for(int i=0;i<DISPATCHES_PER_TEST;i++){
hipLaunchKernel(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
}
stopTest(start, stop, "StreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
}
}
#endif
resultDB.DumpSummary(std::cout);


check(hipEventDestroy(start));
check(hipEventDestroy(sync));
check(hipEventDestroy(stop));
}

0 comments on commit e87f7a4

Please sign in to comment.