@@ -110,9 +110,9 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); }
110110#if defined(__GNUG__) && !defined(__clang__)
111111
112112// inline asm helper
113- #define VSX_IMPL_1RG (rt, rto, rg, rgo , opc, fnm ) \
114- VSX_FINLINE (rt) fnm(const rg& a) \
115- { rt rs; __asm__ __volatile__ (#opc" %x0,%x1" : " =" # rto (rs) : # rgo (a)); return rs; }
113+ #define VSX_IMPL_1RG (rt, rg , opc, fnm ) \
114+ VSX_FINLINE (rt) fnm(const rg& a) \
115+ { rt rs; __asm__ __volatile__ (#opc" %x0,%x1" : " =wa " (rs) : " wa " (a)); return rs; }
116116
117117#define VSX_IMPL_1VRG (rt, rg, opc, fnm ) \
118118VSX_FINLINE (rt) fnm(const rg& a) \
@@ -233,6 +233,10 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) \
233233#if __GNUG__ < 5
234234// vec_xxpermdi in gcc4 missing little-endian supports just like clang
235235# define vec_permi (a, b, c ) vec_xxpermdi(b, a, (3 ^ (((c) & 1 ) << 1 | (c) >> 1 )))
236+ // same as vec_xxpermdi
237+ # undef vec_vbpermq
238+ VSX_IMPL_2VRG (vec_udword2, vec_uchar16, vbpermq, vec_vbpermq)
239+ VSX_IMPL_2VRG(vec_dword2, vec_char16, vbpermq, vec_vbpermq)
236240#else
237241# define vec_permi vec_xxpermdi
238242#endif // __GNUG__ < 5
@@ -257,44 +261,38 @@ VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
257261VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp)
258262
259263// converts word and doubleword to double-precision
260- #ifdef vec_ctd
261- # undef vec_ctd
262- #endif
263- VSX_IMPL_1RG (vec_double2, wd, vec_int4, wa, xvcvsxwdp, vec_ctdo)
264- VSX_IMPL_1RG(vec_double2, wd, vec_uint4, wa, xvcvuxwdp, vec_ctdo)
265- VSX_IMPL_1RG(vec_double2, wd, vec_dword2, wi, xvcvsxddp, vec_ctd)
266- VSX_IMPL_1RG(vec_double2, wd, vec_udword2, wi, xvcvuxddp, vec_ctd)
264+ #undef vec_ctd
265+ VSX_IMPL_1RG (vec_double2, vec_int4, xvcvsxwdp, vec_ctdo)
266+ VSX_IMPL_1RG(vec_double2, vec_uint4, xvcvuxwdp, vec_ctdo)
267+ VSX_IMPL_1RG(vec_double2, vec_dword2, xvcvsxddp, vec_ctd)
268+ VSX_IMPL_1RG(vec_double2, vec_udword2, xvcvuxddp, vec_ctd)
267269
268270// converts word and doubleword to single-precision
269271#undef vec_ctf
270- VSX_IMPL_1RG (vec_float4, wf, vec_int4, wa, xvcvsxwsp, vec_ctf)
271- VSX_IMPL_1RG(vec_float4, wf, vec_uint4, wa, xvcvuxwsp, vec_ctf)
272- VSX_IMPL_1RG(vec_float4, wf, vec_dword2, wi, xvcvsxdsp, vec_ctfo)
273- VSX_IMPL_1RG(vec_float4, wf, vec_udword2, wi , xvcvuxdsp, vec_ctfo)
272+ VSX_IMPL_1RG (vec_float4, vec_int4, xvcvsxwsp, vec_ctf)
273+ VSX_IMPL_1RG(vec_float4, vec_uint4, xvcvuxwsp, vec_ctf)
274+ VSX_IMPL_1RG(vec_float4, vec_dword2, xvcvsxdsp, vec_ctfo)
275+ VSX_IMPL_1RG(vec_float4, vec_udword2, xvcvuxdsp, vec_ctfo)
274276
275277// converts single and double precision to signed word
276278#undef vec_cts
277- VSX_IMPL_1RG (vec_int4, wa, vec_double2, wd , xvcvdpsxws, vec_ctso)
278- VSX_IMPL_1RG(vec_int4, wa, vec_float4, wf, xvcvspsxws, vec_cts)
279+ VSX_IMPL_1RG (vec_int4, vec_double2, xvcvdpsxws, vec_ctso)
280+ VSX_IMPL_1RG(vec_int4, vec_float4, xvcvspsxws, vec_cts)
279281
280282// converts single and double precision to unsigned word
281283#undef vec_ctu
282- VSX_IMPL_1RG (vec_uint4, wa, vec_double2, wd , xvcvdpuxws, vec_ctuo)
283- VSX_IMPL_1RG(vec_uint4, wa, vec_float4, wf, xvcvspuxws, vec_ctu)
284+ VSX_IMPL_1RG (vec_uint4, vec_double2, xvcvdpuxws, vec_ctuo)
285+ VSX_IMPL_1RG(vec_uint4, vec_float4, xvcvspuxws, vec_ctu)
284286
285287// converts single and double precision to signed doubleword
286- #ifdef vec_ctsl
287- # undef vec_ctsl
288- #endif
289- VSX_IMPL_1RG (vec_dword2, wi, vec_double2, wd, xvcvdpsxds, vec_ctsl)
290- VSX_IMPL_1RG(vec_dword2, wi, vec_float4, wf, xvcvspsxds, vec_ctslo)
288+ #undef vec_ctsl
289+ VSX_IMPL_1RG (vec_dword2, vec_double2, xvcvdpsxds, vec_ctsl)
290+ VSX_IMPL_1RG(vec_dword2, vec_float4, xvcvspsxds, vec_ctslo)
291291
292292// converts single and double precision to unsigned doubleword
293- #ifdef vec_ctul
294- # undef vec_ctul
295- #endif
296- VSX_IMPL_1RG (vec_udword2, wi, vec_double2, wd, xvcvdpuxds, vec_ctul)
297- VSX_IMPL_1RG(vec_udword2, wi, vec_float4, wf, xvcvspuxds, vec_ctulo)
293+ #undef vec_ctul
294+ VSX_IMPL_1RG (vec_udword2, vec_double2, xvcvdpuxds, vec_ctul)
295+ VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo)
298296
299297// just in case if GCC doesn't define it
300298#ifndef vec_xl
0 commit comments