Skip to content

Commit

Permalink
Add hipHostMallocCoherent, hipHostMallocNonCoherent
Browse files Browse the repository at this point in the history
Provide per-allocation control over coherent/non-coherent mem.
These overrid the default HIP_COHERENT_HOST_ALLOC setting.
  • Loading branch information
bensander committed May 24, 2017
1 parent d0ef9d8 commit 75f691e
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 26 deletions.
10 changes: 7 additions & 3 deletions include/hip/hcc_detail/hip_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,21 @@ enum hipLimit_t

//! Flags that can be used with hipHostMalloc
#define hipHostMallocDefault 0x0
#define hipHostMallocPortable 0x1
#define hipHostMallocMapped 0x2
#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts.
#define hipHostMallocMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer.
#define hipHostMallocWriteCombined 0x4
#define hipHostMallocCoherent 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
#define hipHostMallocNonCoherent 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.


//! Flags that can be used with hipHostRegister
#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable
#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. HIP only supports one context so this is always assumed true.
#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts.
#define hipHostRegisterMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer.
#define hipHostRegisterIoMemory 0x4 ///< Not supported.



#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield
#define hipDeviceScheduleSpin 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power.
#define hipDeviceScheduleYield 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system.
Expand Down
2 changes: 2 additions & 0 deletions include/hip/nvcc_detail/hip_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ hipMemcpyHostToHost
#define hipHostMallocPortable cudaHostAllocPortable
#define hipHostMallocMapped cudaHostAllocMapped
#define hipHostMallocWriteCombined cudaHostAllocWriteCombined
#define hipHostMallocCoherent 0x0
#define hipHostMallocNonCoherent 0x0

#define hipHostRegisterPortable cudaHostRegisterPortable
#define hipHostRegisterMapped cudaHostRegisterMapped
Expand Down
2 changes: 1 addition & 1 deletion src/hip_hcc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ int HIP_PROFILE_API= 0;
std::string HIP_DB_START_API;
std::string HIP_DB_STOP_API;
int HIP_DB= 0;
int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */
int HIP_VISIBLE_DEVICES = 0;
int HIP_NUM_KERNELS_INFLIGHT = 128;
int HIP_WAIT_MODE = 0;

Expand Down
31 changes: 25 additions & 6 deletions src/hip_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,17 +267,36 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
trueFlags = hipHostMallocMapped | hipHostMallocPortable;
}

const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped | hipHostMallocWriteCombined;

if (flags & ~supportedFlags) {
const unsigned supportedFlags = hipHostMallocPortable
| hipHostMallocMapped
| hipHostMallocWriteCombined
| hipHostMallocCoherent
| hipHostMallocNonCoherent;


const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;

if ((flags & ~supportedFlags) ||
((flags & coherencyFlags) == coherencyFlags)) {
*ptr = nullptr;
// can't specify unsupported flags, can't specify both Coherent + NonCoherent
hip_status = hipErrorInvalidValue;
}
else {
} else {
auto device = ctx->getWriteableDevice();
unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;

unsigned amFlags = 0;
if (flags & hipHostMallocCoherent) {
amFlags = amHostCoherent;
} else if (flags & hipHostMallocNonCoherent) {
amFlags = amHostPinned;
} else {
// depends on env variables:
amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
}


*ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host",
*ptr = hip_internal::allocAndSharePtr((amFlags & amHostCoherent) ? "finegrained_host":"pinned_host",
sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags);

if(sizeBytes && (*ptr == NULL)){
Expand Down
83 changes: 67 additions & 16 deletions tests/src/runtimeApi/memory/hipHostMalloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,19 @@
#define LEN 1024*1024
#define SIZE LEN*sizeof(float)

__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){
__global__ void Add(float *Ad, float *Bd, float *Cd){
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
Cd[tx] = Ad[tx] + Bd[tx];
}


__global__ void Set(int *Ad, int val){
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
Ad[tx] = val;
}

int main(){
float *A, *B, *C;
float *Ad, *Bd, *Cd;


hipDeviceProp_t prop;
int device;
Expand All @@ -49,26 +54,72 @@ int main(){
failed("Does support HostPinned Memory");
}

HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped));
HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault));
HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped));

HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0));
HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0));
{
float *A, *B, *C;
float *Ad, *Bd, *Cd;
HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped));
HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault));
HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped));

HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0));
HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0));

for(int i=0;i<LEN;i++){
A[i] = 1.0f;
B[i] = 2.0f;
}

HIPCHECK(hipMalloc((void**)&Bd, SIZE));
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));

dim3 dimGrid(LEN/512,1,1);
dim3 dimBlock(512,1,1);

for(int i=0;i<LEN;i++){
A[i] = 1.0f;
B[i] = 2.0f;
hipLaunchKernelGGL(Add, dimGrid, dimBlock, 0, 0, Ad, Bd, Cd);

HIPCHECK(hipDeviceSynchronize());

HIPCHECK(hipHostFree(A));
HIPCHECK(hipHostFree(B));
HIPCHECK(hipHostFree(C));
}

HIPCHECK(hipMalloc((void**)&Bd, SIZE));
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
{
int *A, *B;
int numElements = 1024*16;
size_t sizeBytes = numElements * sizeof (int);
#ifdef __HIP_PLATFORM_HCC__
HIPCHECK_API(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent|hipHostMallocNonCoherent), hipErrorInvalidValue);

assert (A == 0);
#endif

HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent));
hipStream_t s;
hipEvent_t e;

dim3 dimGrid(LEN/512,1,1);
dim3 dimBlock(512,1,1);
// Init:
HIPCHECK(hipStreamCreate(&s));
HIPCHECK(hipEventCreateWithFlags(&e, 0));
dim3 dimBlock(64,1,1);
dim3 dimGrid(numElements/dimBlock.x,1,1);

hipLaunchKernel(HIP_KERNEL_NAME(Add), dimGrid, dimBlock, 0, 0, Ad, Bd, Cd);
// Init array to know state:
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, 0x0, A, -42);
HIPCHECK(hipDeviceSynchronize());

hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, s, A, 13);
HIPCHECK(hipEventRecord(e, s));

// Host waits for event :
HIPCHECK(hipEventSynchronize(e));

// check result?

HIPCHECK(hipHostMalloc((void**)&B, sizeBytes, hipHostMallocNonCoherent));
}

passed();

}
2 changes: 2 additions & 0 deletions util/vim/hip.vim
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ syn keyword hipFlags hipHostMallocDefault
syn keyword hipFlags hipHostMallocPortable
syn keyword hipFlags hipHostMallocMapped
syn keyword hipFlags hipHostMallocWriteCombined
syn keyword hipFlags hipHostMallocCoherent
syn keyword hipFlags hipHostMallocNonCoherent

syn keyword hipFlags hipHostRegisterDefault
syn keyword hipFlags hipHostRegisterPortable
Expand Down

0 comments on commit 75f691e

Please sign in to comment.