diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 34ed2ed5ce..6fb7c0256e 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -111,17 +111,21 @@ enum hipLimit_t //! Flags that can be used with hipHostMalloc #define hipHostMallocDefault 0x0 -#define hipHostMallocPortable 0x1 -#define hipHostMallocMapped 0x2 +#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts. +#define hipHostMallocMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer. #define hipHostMallocWriteCombined 0x4 +#define hipHostMallocCoherent 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation. +#define hipHostMallocNonCoherent 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation. + //! Flags that can be used with hipHostRegister #define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable -#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. HIP only supports one context so this is always assumed true. +#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. #define hipHostRegisterMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer. #define hipHostRegisterIoMemory 0x4 ///< Not supported. + #define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield #define hipDeviceScheduleSpin 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power. #define hipDeviceScheduleYield 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system. diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 01a93f7ba4..cbc7ed9f9c 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -65,6 +65,8 @@ hipMemcpyHostToHost #define hipHostMallocPortable cudaHostAllocPortable #define hipHostMallocMapped cudaHostAllocMapped #define hipHostMallocWriteCombined cudaHostAllocWriteCombined +#define hipHostMallocCoherent 0x0 +#define hipHostMallocNonCoherent 0x0 #define hipHostRegisterPortable cudaHostRegisterPortable #define hipHostRegisterMapped cudaHostRegisterMapped diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index e77c4186e8..4588f67c2d 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -74,7 +74,7 @@ int HIP_PROFILE_API= 0; std::string HIP_DB_START_API; std::string HIP_DB_STOP_API; int HIP_DB= 0; -int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */ +int HIP_VISIBLE_DEVICES = 0; int HIP_NUM_KERNELS_INFLIGHT = 128; int HIP_WAIT_MODE = 0; diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index a3d761752e..3ab7713afa 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -267,17 +267,36 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) trueFlags = hipHostMallocMapped | hipHostMallocPortable; } - const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped | hipHostMallocWriteCombined; - if (flags & ~supportedFlags) { + const unsigned supportedFlags = hipHostMallocPortable + | hipHostMallocMapped + | hipHostMallocWriteCombined + | hipHostMallocCoherent + | hipHostMallocNonCoherent; + + + const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent; + + if ((flags & ~supportedFlags) || + ((flags & coherencyFlags) == coherencyFlags)) { + *ptr = nullptr; + // can't specify unsupported flags, can't specify both Coherent + NonCoherent hip_status = hipErrorInvalidValue; - } - else { + } else { auto device = ctx->getWriteableDevice(); - unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned; + + unsigned amFlags = 0; + if (flags & hipHostMallocCoherent) { + amFlags = amHostCoherent; + } else if (flags & hipHostMallocNonCoherent) { + amFlags = amHostPinned; + } else { + // depends on env variables: + amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned; + } - *ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host", + *ptr = hip_internal::allocAndSharePtr((amFlags & amHostCoherent) ? "finegrained_host":"pinned_host", sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags); if(sizeBytes && (*ptr == NULL)){ diff --git a/tests/src/runtimeApi/memory/hipHostMalloc.cpp b/tests/src/runtimeApi/memory/hipHostMalloc.cpp index d6b3b05a1d..31596b5ea5 100644 --- a/tests/src/runtimeApi/memory/hipHostMalloc.cpp +++ b/tests/src/runtimeApi/memory/hipHostMalloc.cpp @@ -31,14 +31,19 @@ #define LEN 1024*1024 #define SIZE LEN*sizeof(float) -__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){ +__global__ void Add(float *Ad, float *Bd, float *Cd){ int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; Cd[tx] = Ad[tx] + Bd[tx]; } + +__global__ void Set(int *Ad, int val){ + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + Ad[tx] = val; +} + int main(){ - float *A, *B, *C; - float *Ad, *Bd, *Cd; + hipDeviceProp_t prop; int device; @@ -49,26 +54,72 @@ int main(){ failed("Does support HostPinned Memory"); } - HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped)); - HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped)); - HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0)); - HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0)); + { + float *A, *B, *C; + float *Ad, *Bd, *Cd; + HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped)); + HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault)); + HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped)); + + HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0)); + HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0)); + + for(int i=0;i