diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e929dab429de5..93f2f503a85d2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35424,10 +35424,11 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const { switch (Opcode) { // These are non-commutative binops. // TODO: Add more X86ISD opcodes once we have test coverage. - case X86ISD::ANDNP: - case X86ISD::PCMPGT: case X86ISD::FMAX: case X86ISD::FMIN: + return Subtarget.hasVLX(); + case X86ISD::ANDNP: + case X86ISD::PCMPGT: case X86ISD::FANDN: case X86ISD::VPSHA: case X86ISD::VPSHL: @@ -44211,6 +44212,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); + } + case X86ISD::FMAX: + case X86ISD::FMIN: { + if (VT.getVectorElementType() == MVT::f16 && !Subtarget.hasVLX()) + break; + [[fallthrough]]; } // Zero upper elements. case X86ISD::VZEXT_MOVL: @@ -44241,8 +44248,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( case X86ISD::VSRLV: case X86ISD::VSRAV: // Float ops. - case X86ISD::FMAX: - case X86ISD::FMIN: case X86ISD::FMAXC: case X86ISD::FMINC: case X86ISD::FRSQRT: @@ -55368,25 +55373,46 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; + auto GetNodeOrWiden = [&](SDValue Op0, SDValue Op1) { + if ((VT != MVT::v8f16 && VT != MVT::v16f16) || Subtarget.hasVLX()) + return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + Op0 = widenSubVector(MVT::v32f16, Op0, /*ZeroNewElements=*/false, Subtarget, + DAG, DL); + Op1 = widenSubVector(MVT::v32f16, Op1, /*ZeroNewElements=*/false, Subtarget, + DAG, DL); + SDValue Res = + DAG.getNode(MinMaxOp, DL, MVT::v32f16, Op0, Op1, N->getFlags()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getVectorIdxConstant(0, DL)); + }; + // If we don't have to respect NaN inputs, this is a direct translation to x86 // min/max instructions. if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) - return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + return GetNodeOrWiden(Op0, Op1); // If one of the operands is known non-NaN use the native min/max instructions // with the non-NaN input as second operand. if (DAG.isKnownNeverNaN(Op1)) - return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + return GetNodeOrWiden(Op0, Op1); if (DAG.isKnownNeverNaN(Op0)) - return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); + return GetNodeOrWiden(Op1, Op0); // If we have to respect NaN inputs, this takes at least 3 instructions. // Favor a library call when operating on a scalar and minimizing code size. if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); + EVT WindenVT = VT; + if ((VT == MVT::v8f16 || VT == MVT::v16f16) && !Subtarget.hasVLX()) { + WindenVT = MVT::v32f16; + Op0 = widenSubVector(MVT::v32f16, Op0, /*ZeroNewElements=*/false, Subtarget, + DAG, DL); + Op1 = widenSubVector(MVT::v32f16, Op1, /*ZeroNewElements=*/false, Subtarget, + DAG, DL); + } EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - VT); + WindenVT); // There are 4 possibilities involving NaN inputs, and these are the required // outputs: @@ -55407,12 +55433,16 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, // use those instructions for fmaxnum by selecting away a NaN input. // If either operand is NaN, the 2nd source operand (Op0) is passed through. - SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); + SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, WindenVT, Op1, Op0); SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO); // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands // are NaN, the NaN value of Op1 is the result. - return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); + SDValue Res = DAG.getSelect(DL, WindenVT, IsOp0Nan, Op1, MinOrMax); + if (VT != WindenVT) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getVectorIdxConstant(0, DL)); + return Res; } static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, diff --git a/llvm/test/CodeGen/X86/avx512fp16-fmaxnum.ll b/llvm/test/CodeGen/X86/avx512fp16-fmaxnum.ll index 1d535f93bc867..9a709ff985f94 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-fmaxnum.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-fmaxnum.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs --show-mc-encoding -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -verify-machineinstrs --show-mc-encoding -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HasVL +; RUN: llc < %s -verify-machineinstrs --show-mc-encoding -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK,NOVL declare half @llvm.maxnum.f16(half, half) declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) @@ -9,61 +10,112 @@ declare <16 x half> @llvm.maxnum.v16f16(<16 x half>, <16 x half>) declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>) define half @test_intrinsic_fmaxh(half %x, half %y) { -; CHECK-LABEL: test_intrinsic_fmaxh: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x76,0x08,0x5f,0xd0] -; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7e,0x08,0xc2,0xc8,0x03] -; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x10,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] -; CHECK-NEXT: retq # encoding: [0xc3] +; HasVL-LABEL: test_intrinsic_fmaxh: +; HasVL: # %bb.0: +; HasVL-NEXT: vmaxsh %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x76,0x08,0x5f,0xd0] +; HasVL-NEXT: vcmpunordsh %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7e,0x08,0xc2,0xc8,0x03] +; HasVL-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x10,0xd1] +; HasVL-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] +; HasVL-NEXT: retq # encoding: [0xc3] +; +; NOVL-LABEL: test_intrinsic_fmaxh: +; NOVL: # %bb.0: +; NOVL-NEXT: vmaxsh %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x76,0x08,0x5f,0xd0] +; NOVL-NEXT: vcmpunordsh %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7e,0x08,0xc2,0xc8,0x03] +; NOVL-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x10,0xd1] +; NOVL-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] +; NOVL-NEXT: retq # encoding: [0xc3] %z = call half @llvm.maxnum.f16(half %x, half %y) readnone ret half %z } define <2 x half> @test_intrinsic_fmax_v2f16(<2 x half> %x, <2 x half> %y) { -; CHECK-LABEL: test_intrinsic_fmax_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxph %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x74,0x08,0x5f,0xd0] -; CHECK-NEXT: vcmpunordph %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc8,0x03] -; CHECK-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xd1] -; CHECK-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; CHECK-NEXT: retq # encoding: [0xc3] +; HasVL-LABEL: test_intrinsic_fmax_v2f16: +; HasVL: # %bb.0: +; HasVL-NEXT: vmaxph %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x74,0x08,0x5f,0xd0] +; HasVL-NEXT: vcmpunordph %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc8,0x03] +; HasVL-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xd1] +; HasVL-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; HasVL-NEXT: retq # encoding: [0xc3] +; +; NOVL-LABEL: test_intrinsic_fmax_v2f16: +; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; NOVL-NEXT: vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] +; NOVL-NEXT: vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] +; NOVL-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] +; NOVL-NEXT: vmovdqa %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x6f,0xc2] +; NOVL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; NOVL-NEXT: retq # encoding: [0xc3] %z = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %x, <2 x half> %y) readnone ret <2 x half> %z } define <4 x half> @test_intrinsic_fmax_v4f16(<4 x half> %x, <4 x half> %y) { -; CHECK-LABEL: test_intrinsic_fmax_v4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxph %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x74,0x08,0x5f,0xd0] -; CHECK-NEXT: vcmpunordph %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc8,0x03] -; CHECK-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xd1] -; CHECK-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; CHECK-NEXT: retq # encoding: [0xc3] +; HasVL-LABEL: test_intrinsic_fmax_v4f16: +; HasVL: # %bb.0: +; HasVL-NEXT: vmaxph %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x74,0x08,0x5f,0xd0] +; HasVL-NEXT: vcmpunordph %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc8,0x03] +; HasVL-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xd1] +; HasVL-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; HasVL-NEXT: retq # encoding: [0xc3] +; +; NOVL-LABEL: test_intrinsic_fmax_v4f16: +; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; NOVL-NEXT: vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] +; NOVL-NEXT: vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] +; NOVL-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] +; NOVL-NEXT: vmovdqa %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x6f,0xc2] +; NOVL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; NOVL-NEXT: retq # encoding: [0xc3] %z = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %x, <4 x half> %y) readnone ret <4 x half> %z } define <8 x half> @test_intrinsic_fmax_v8f16(<8 x half> %x, <8 x half> %y) { -; CHECK-LABEL: test_intrinsic_fmax_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxph %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x74,0x08,0x5f,0xd0] -; CHECK-NEXT: vcmpunordph %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc8,0x03] -; CHECK-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xd1] -; CHECK-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; CHECK-NEXT: retq # encoding: [0xc3] +; HasVL-LABEL: test_intrinsic_fmax_v8f16: +; HasVL: # %bb.0: +; HasVL-NEXT: vmaxph %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x74,0x08,0x5f,0xd0] +; HasVL-NEXT: vcmpunordph %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc8,0x03] +; HasVL-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xd1] +; HasVL-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; HasVL-NEXT: retq # encoding: [0xc3] +; +; NOVL-LABEL: test_intrinsic_fmax_v8f16: +; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; NOVL-NEXT: vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] +; NOVL-NEXT: vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] +; NOVL-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] +; NOVL-NEXT: vmovdqa %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x6f,0xc2] +; NOVL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; NOVL-NEXT: retq # encoding: [0xc3] %z = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %x, <8 x half> %y) readnone ret <8 x half> %z } define <16 x half> @test_intrinsic_fmax_v16f16(<16 x half> %x, <16 x half> %y) { -; CHECK-LABEL: test_intrinsic_fmax_v16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxph %ymm0, %ymm1, %ymm2 # encoding: [0x62,0xf5,0x74,0x28,0x5f,0xd0] -; CHECK-NEXT: vcmpunordph %ymm0, %ymm0, %k1 # encoding: [0x62,0xf3,0x7c,0x28,0xc2,0xc8,0x03] -; CHECK-NEXT: vmovdqu16 %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xd1] -; CHECK-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; CHECK-NEXT: retq # encoding: [0xc3] +; HasVL-LABEL: test_intrinsic_fmax_v16f16: +; HasVL: # %bb.0: +; HasVL-NEXT: vmaxph %ymm0, %ymm1, %ymm2 # encoding: [0x62,0xf5,0x74,0x28,0x5f,0xd0] +; HasVL-NEXT: vcmpunordph %ymm0, %ymm0, %k1 # encoding: [0x62,0xf3,0x7c,0x28,0xc2,0xc8,0x03] +; HasVL-NEXT: vmovdqu16 %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xd1] +; HasVL-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; HasVL-NEXT: retq # encoding: [0xc3] +; +; NOVL-LABEL: test_intrinsic_fmax_v16f16: +; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; NOVL-NEXT: vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] +; NOVL-NEXT: vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] +; NOVL-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] +; NOVL-NEXT: vmovdqa %ymm2, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc2] +; NOVL-NEXT: retq # encoding: [0xc3] %z = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %x, <16 x half> %y) readnone ret <16 x half> %z } @@ -81,10 +133,19 @@ define <32 x half> @test_intrinsic_fmax_v32f16(<32 x half> %x, <32 x half> %y) { } define <4 x half> @maxnum_intrinsic_nnan_fmf_f432(<4 x half> %a, <4 x half> %b) { -; CHECK-LABEL: maxnum_intrinsic_nnan_fmf_f432: -; CHECK: # %bb.0: -; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x5f,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] +; HasVL-LABEL: maxnum_intrinsic_nnan_fmf_f432: +; HasVL: # %bb.0: +; HasVL-NEXT: vmaxph %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x5f,0xc1] +; HasVL-NEXT: retq # encoding: [0xc3] +; +; NOVL-LABEL: maxnum_intrinsic_nnan_fmf_f432: +; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; NOVL-NEXT: vmaxph %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x5f,0xc1] +; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; NOVL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; NOVL-NEXT: retq # encoding: [0xc3] %r = tail call nnan <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %b) ret <4 x half> %r } diff --git a/llvm/test/CodeGen/X86/avx512fp16-fmaxnum.s b/llvm/test/CodeGen/X86/avx512fp16-fmaxnum.s new file mode 100644 index 0000000000000..503a60b1d867d --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512fp16-fmaxnum.s @@ -0,0 +1,179 @@ + .file "avx512fp16-fmaxnum.ll" + .text + .globl test_intrinsic_fmaxh # -- Begin function test_intrinsic_fmaxh + .p2align 4 + .type test_intrinsic_fmaxh,@function +test_intrinsic_fmaxh: # @test_intrinsic_fmaxh + .cfi_startproc +# %bb.0: + vmaxsh %xmm0, %xmm1, %xmm2 # encoding: [0x62,0xf5,0x76,0x08,0x5f,0xd0] + vcmpunordsh %xmm0, %xmm0, %k1 # encoding: [0x62,0xf3,0x7e,0x08,0xc2,0xc8,0x03] + vmovsh %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x10,0xd1] + vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] + retq # encoding: [0xc3] +.Lfunc_end0: + .size test_intrinsic_fmaxh, .Lfunc_end0-test_intrinsic_fmaxh + .cfi_endproc + # -- End function + .globl test_intrinsic_fmax_v2f16 # -- Begin function test_intrinsic_fmax_v2f16 + .p2align 4 + .type test_intrinsic_fmax_v2f16,@function +test_intrinsic_fmax_v2f16: # @test_intrinsic_fmax_v2f16 + .cfi_startproc +# %bb.0: + # kill: def $xmm1 killed $xmm1 def $zmm1 + # kill: def $xmm0 killed $xmm0 def $zmm0 + vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] + vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] + vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] + vmovdqa %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x6f,0xc2] + vzeroupper # encoding: [0xc5,0xf8,0x77] + retq # encoding: [0xc3] +.Lfunc_end1: + .size test_intrinsic_fmax_v2f16, .Lfunc_end1-test_intrinsic_fmax_v2f16 + .cfi_endproc + # -- End function + .globl test_intrinsic_fmax_v4f16 # -- Begin function test_intrinsic_fmax_v4f16 + .p2align 4 + .type test_intrinsic_fmax_v4f16,@function +test_intrinsic_fmax_v4f16: # @test_intrinsic_fmax_v4f16 + .cfi_startproc +# %bb.0: + # kill: def $xmm1 killed $xmm1 def $zmm1 + # kill: def $xmm0 killed $xmm0 def $zmm0 + vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] + vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] + vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] + vmovdqa %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x6f,0xc2] + vzeroupper # encoding: [0xc5,0xf8,0x77] + retq # encoding: [0xc3] +.Lfunc_end2: + .size test_intrinsic_fmax_v4f16, .Lfunc_end2-test_intrinsic_fmax_v4f16 + .cfi_endproc + # -- End function + .globl test_intrinsic_fmax_v8f16 # -- Begin function test_intrinsic_fmax_v8f16 + .p2align 4 + .type test_intrinsic_fmax_v8f16,@function +test_intrinsic_fmax_v8f16: # @test_intrinsic_fmax_v8f16 + .cfi_startproc +# %bb.0: + # kill: def $xmm1 killed $xmm1 def $zmm1 + # kill: def $xmm0 killed $xmm0 def $zmm0 + vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] + vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] + vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] + vmovdqa %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x6f,0xc2] + vzeroupper # encoding: [0xc5,0xf8,0x77] + retq # encoding: [0xc3] +.Lfunc_end3: + .size test_intrinsic_fmax_v8f16, .Lfunc_end3-test_intrinsic_fmax_v8f16 + .cfi_endproc + # -- End function + .globl test_intrinsic_fmax_v16f16 # -- Begin function test_intrinsic_fmax_v16f16 + .p2align 4 + .type test_intrinsic_fmax_v16f16,@function +test_intrinsic_fmax_v16f16: # @test_intrinsic_fmax_v16f16 + .cfi_startproc +# %bb.0: + # kill: def $ymm1 killed $ymm1 def $zmm1 + # kill: def $ymm0 killed $ymm0 def $zmm0 + vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] + vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] + vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] + vmovdqa %ymm2, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc2] + retq # encoding: [0xc3] +.Lfunc_end4: + .size test_intrinsic_fmax_v16f16, .Lfunc_end4-test_intrinsic_fmax_v16f16 + .cfi_endproc + # -- End function + .globl test_intrinsic_fmax_v32f16 # -- Begin function test_intrinsic_fmax_v32f16 + .p2align 4 + .type test_intrinsic_fmax_v32f16,@function +test_intrinsic_fmax_v32f16: # @test_intrinsic_fmax_v32f16 + .cfi_startproc +# %bb.0: + vmaxph %zmm0, %zmm1, %zmm2 # encoding: [0x62,0xf5,0x74,0x48,0x5f,0xd0] + vcmpunordph %zmm0, %zmm0, %k1 # encoding: [0x62,0xf3,0x7c,0x48,0xc2,0xc8,0x03] + vmovdqu16 %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd1] + vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] + retq # encoding: [0xc3] +.Lfunc_end5: + .size test_intrinsic_fmax_v32f16, .Lfunc_end5-test_intrinsic_fmax_v32f16 + .cfi_endproc + # -- End function + .globl maxnum_intrinsic_nnan_fmf_f432 # -- Begin function maxnum_intrinsic_nnan_fmf_f432 + .p2align 4 + .type maxnum_intrinsic_nnan_fmf_f432,@function +maxnum_intrinsic_nnan_fmf_f432: # @maxnum_intrinsic_nnan_fmf_f432 + .cfi_startproc +# %bb.0: + # kill: def $xmm1 killed $xmm1 def $zmm1 + # kill: def $xmm0 killed $xmm0 def $zmm0 + vmaxph %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x5f,0xc1] + # kill: def $xmm0 killed $xmm0 killed $zmm0 + vzeroupper # encoding: [0xc5,0xf8,0x77] + retq # encoding: [0xc3] +.Lfunc_end6: + .size maxnum_intrinsic_nnan_fmf_f432, .Lfunc_end6-maxnum_intrinsic_nnan_fmf_f432 + .cfi_endproc + # -- End function + .globl maxnum_intrinsic_nnan_attr_f16 # -- Begin function maxnum_intrinsic_nnan_attr_f16 + .p2align 4 + .type maxnum_intrinsic_nnan_attr_f16,@function +maxnum_intrinsic_nnan_attr_f16: # @maxnum_intrinsic_nnan_attr_f16 + .cfi_startproc +# %bb.0: + vmaxsh %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x5f,0xc1] + retq # encoding: [0xc3] +.Lfunc_end7: + .size maxnum_intrinsic_nnan_attr_f16, .Lfunc_end7-maxnum_intrinsic_nnan_attr_f16 + .cfi_endproc + # -- End function + .section .rodata,"a",@progbits + .p2align 1, 0x0 # -- Begin function test_maxnum_const_op1 +.LCPI8_0: + .short 0x3c00 # half 1 + .text + .globl test_maxnum_const_op1 + .p2align 4 + .type test_maxnum_const_op1,@function +test_maxnum_const_op1: # @test_maxnum_const_op1 + .cfi_startproc +# %bb.0: + vmaxsh .LCPI8_0(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x5f,0x05,A,A,A,A] + # fixup A - offset: 6, value: .LCPI8_0-4, kind: reloc_riprel_4byte + retq # encoding: [0xc3] +.Lfunc_end8: + .size test_maxnum_const_op1, .Lfunc_end8-test_maxnum_const_op1 + .cfi_endproc + # -- End function + .section .rodata,"a",@progbits + .p2align 1, 0x0 # -- Begin function test_maxnum_const_op2 +.LCPI9_0: + .short 0x3c00 # half 1 + .text + .globl test_maxnum_const_op2 + .p2align 4 + .type test_maxnum_const_op2,@function +test_maxnum_const_op2: # @test_maxnum_const_op2 + .cfi_startproc +# %bb.0: + vmaxsh .LCPI9_0(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x5f,0x05,A,A,A,A] + # fixup A - offset: 6, value: .LCPI9_0-4, kind: reloc_riprel_4byte + retq # encoding: [0xc3] +.Lfunc_end9: + .size test_maxnum_const_op2, .Lfunc_end9-test_maxnum_const_op2 + .cfi_endproc + # -- End function + .globl test_maxnum_const_nan # -- Begin function test_maxnum_const_nan + .p2align 4 + .type test_maxnum_const_nan,@function +test_maxnum_const_nan: # @test_maxnum_const_nan + .cfi_startproc +# %bb.0: + retq # encoding: [0xc3] +.Lfunc_end10: + .size test_maxnum_const_nan, .Lfunc_end10-test_maxnum_const_nan + .cfi_endproc + # -- End function + .section ".note.GNU-stack","",@progbits