Skip to content

Commit ac58a79

Browse files
committed
cluda_opencl: Use round to nearest even to match cpu version
Fixes test_elemwise_f16 add/iadd on amdgcn and possibly other OpenCL implementations. Fixes Theano#462 Signed-off-by: Jan Vesely <[email protected]>
1 parent 04c2892 commit ac58a79

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

src/cluda_opencl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ typedef struct _ga_half {
5050
#define ga_half2float(p) vload_half(0, &((p).data))
5151
static inline ga_half ga_float2half(ga_float f) {
5252
ga_half r;
53-
vstore_half_rtn(f, 0, &r.data);
53+
vstore_half_rte(f, 0, &r.data);
5454
return r;
5555
}
5656

src/cluda_opencl.h.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ static const char cluda_opencl_h[] = {
122122
0x6f, 0x61, 0x74, 0x20, 0x66, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
123123
0x67, 0x61, 0x5f, 0x68, 0x61, 0x6c, 0x66, 0x20, 0x72, 0x3b, 0x0a,
124124
0x20, 0x20, 0x76, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x68, 0x61,
125-
0x6c, 0x66, 0x5f, 0x72, 0x74, 0x6e, 0x28, 0x66, 0x2c, 0x20, 0x30,
125+
0x6c, 0x66, 0x5f, 0x72, 0x74, 0x65, 0x28, 0x66, 0x2c, 0x20, 0x30,
126126
0x2c, 0x20, 0x26, 0x72, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b,
127127
0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x72,
128128
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x23, 0x70, 0x72, 0x61, 0x67, 0x6d,

0 commit comments

Comments
 (0)