@@ -6,14 +6,16 @@ target triple = "aarch64-unknown-linux-gnu"
6
6
define <4 x i32 > @no_sink_simple (<4 x i32 > %a , <4 x i32 > %b , i1 %c , ptr %p ) {
7
7
; CHECK-LABEL: no_sink_simple:
8
8
; CHECK: // %bb.0:
9
+ ; CHECK-NEXT: cmgt v2.4s, v1.4s, v0.4s
10
+ ; CHECK-NEXT: xtn v2.4h, v2.4s
9
11
; CHECK-NEXT: tbz w0, #0, .LBB0_2
10
12
; CHECK-NEXT: // %bb.1: // %s
11
- ; CHECK-NEXT: cmgt v1.4s, v1.4s, v0.4s
13
+ ; CHECK-NEXT: sshll v1.4s, v2.4h, #0
12
14
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
13
15
; CHECK-NEXT: str q0, [x1]
14
16
; CHECK-NEXT: ret
15
17
; CHECK-NEXT: .LBB0_2: // %t
16
- ; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
18
+ ; CHECK-NEXT: sshll v0.4s, v2.4h, #0
17
19
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
18
20
; CHECK-NEXT: ret
19
21
%d = icmp slt <4 x i32 > %a , %b
32
34
define void @vector_loop_with_icmp (ptr nocapture noundef writeonly %dest ) {
33
35
; CHECK-LABEL: vector_loop_with_icmp:
34
36
; CHECK: // %bb.0: // %entry
35
- ; CHECK-NEXT: mov w8 , #15 // =0xf
37
+ ; CHECK-NEXT: mov w9 , #15 // =0xf
36
38
; CHECK-NEXT: mov w10, #4 // =0x4
37
- ; CHECK-NEXT: adrp x9 , .LCPI1_0
39
+ ; CHECK-NEXT: adrp x8 , .LCPI1_0
38
40
; CHECK-NEXT: adrp x11, .LCPI1_1
39
- ; CHECK-NEXT: dup v0.2d, x8
41
+ ; CHECK-NEXT: dup v0.2d, x9
40
42
; CHECK-NEXT: dup v1.2d, x10
41
- ; CHECK-NEXT: ldr q2, [x9 , :lo12:.LCPI1_0]
43
+ ; CHECK-NEXT: ldr q2, [x8 , :lo12:.LCPI1_0]
42
44
; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI1_1]
43
- ; CHECK-NEXT: add x9 , x0, #8
44
- ; CHECK-NEXT: mov w10 , #16 // =0x10
45
- ; CHECK-NEXT: mov w11 , #1 // =0x1
45
+ ; CHECK-NEXT: add x8 , x0, #8
46
+ ; CHECK-NEXT: mov w9 , #16 // =0x10
47
+ ; CHECK-NEXT: mov w10 , #1 // =0x1
46
48
; CHECK-NEXT: b .LBB1_2
47
49
; CHECK-NEXT: .LBB1_1: // %pred.store.continue18
48
50
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
49
51
; CHECK-NEXT: add v2.2d, v2.2d, v1.2d
50
52
; CHECK-NEXT: add v3.2d, v3.2d, v1.2d
51
- ; CHECK-NEXT: subs x10, x10 , #4
52
- ; CHECK-NEXT: add x9, x9 , #16
53
+ ; CHECK-NEXT: subs x9, x9 , #4
54
+ ; CHECK-NEXT: add x8, x8 , #16
53
55
; CHECK-NEXT: b.eq .LBB1_10
54
56
; CHECK-NEXT: .LBB1_2: // %vector.body
55
57
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
56
- ; CHECK-NEXT: cmhi v4.2d, v0.2d, v3.2d
57
- ; CHECK-NEXT: xtn v4.2s, v4.2d
58
- ; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
59
- ; CHECK-NEXT: umov w12, v4.h[0]
60
- ; CHECK-NEXT: tbz w12, #0, .LBB1_4
61
- ; CHECK-NEXT: // %bb.3: // %pred.store.if
58
+ ; CHECK-NEXT: cmhi v4.2d, v0.2d, v2.2d
59
+ ; CHECK-NEXT: cmhi v5.2d, v0.2d, v3.2d
60
+ ; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s
61
+ ; CHECK-NEXT: xtn v4.4h, v4.4s
62
+ ; CHECK-NEXT: umov w11, v4.h[0]
63
+ ; CHECK-NEXT: tbnz w11, #0, .LBB1_6
64
+ ; CHECK-NEXT: // %bb.3: // %pred.store.continue
62
65
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
63
- ; CHECK-NEXT: stur w11, [x9, #-8]
64
- ; CHECK-NEXT: .LBB1_4: // %pred.store.continue
66
+ ; CHECK-NEXT: umov w11, v4.h[1]
67
+ ; CHECK-NEXT: tbnz w11, #0, .LBB1_7
68
+ ; CHECK-NEXT: .LBB1_4: // %pred.store.continue6
65
69
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
66
- ; CHECK-NEXT: dup v4.2d, x8
67
- ; CHECK-NEXT: cmhi v4.2d, v4.2d, v3.2d
68
- ; CHECK-NEXT: xtn v4.2s, v4.2d
69
- ; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
70
- ; CHECK-NEXT: umov w12, v4.h[1]
71
- ; CHECK-NEXT: tbz w12, #0, .LBB1_6
72
- ; CHECK-NEXT: // %bb.5: // %pred.store.if5
70
+ ; CHECK-NEXT: umov w11, v4.h[2]
71
+ ; CHECK-NEXT: tbnz w11, #0, .LBB1_8
72
+ ; CHECK-NEXT: .LBB1_5: // %pred.store.continue8
73
73
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
74
- ; CHECK-NEXT: stur w11, [x9, #-4]
75
- ; CHECK-NEXT: .LBB1_6: // %pred.store.continue6
74
+ ; CHECK-NEXT: umov w11, v4.h[3]
75
+ ; CHECK-NEXT: tbz w11, #0, .LBB1_1
76
+ ; CHECK-NEXT: b .LBB1_9
77
+ ; CHECK-NEXT: .LBB1_6: // %pred.store.if
76
78
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
77
- ; CHECK-NEXT: dup v4.2d, x8
78
- ; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
79
- ; CHECK-NEXT: xtn v4.2s, v4.2d
80
- ; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
81
- ; CHECK-NEXT: umov w12, v4.h[2]
82
- ; CHECK-NEXT: tbz w12, #0, .LBB1_8
83
- ; CHECK-NEXT: // %bb.7: // %pred.store.if7
79
+ ; CHECK-NEXT: stur w10, [x8, #-8]
80
+ ; CHECK-NEXT: umov w11, v4.h[1]
81
+ ; CHECK-NEXT: tbz w11, #0, .LBB1_4
82
+ ; CHECK-NEXT: .LBB1_7: // %pred.store.if5
84
83
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
85
- ; CHECK-NEXT: str w11, [x9]
86
- ; CHECK-NEXT: .LBB1_8: // %pred.store.continue8
84
+ ; CHECK-NEXT: stur w10, [x8, #-4]
85
+ ; CHECK-NEXT: umov w11, v4.h[2]
86
+ ; CHECK-NEXT: tbz w11, #0, .LBB1_5
87
+ ; CHECK-NEXT: .LBB1_8: // %pred.store.if7
87
88
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
88
- ; CHECK-NEXT: dup v4.2d, x8
89
- ; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
90
- ; CHECK-NEXT: xtn v4.2s, v4.2d
91
- ; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
92
- ; CHECK-NEXT: umov w12, v4.h[3]
93
- ; CHECK-NEXT: tbz w12, #0, .LBB1_1
94
- ; CHECK-NEXT: // %bb.9: // %pred.store.if9
89
+ ; CHECK-NEXT: str w10, [x8]
90
+ ; CHECK-NEXT: umov w11, v4.h[3]
91
+ ; CHECK-NEXT: tbz w11, #0, .LBB1_1
92
+ ; CHECK-NEXT: .LBB1_9: // %pred.store.if9
95
93
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
96
- ; CHECK-NEXT: str w11 , [x9 , #4]
94
+ ; CHECK-NEXT: str w10 , [x8 , #4]
97
95
; CHECK-NEXT: b .LBB1_1
98
96
; CHECK-NEXT: .LBB1_10: // %for.cond.cleanup
99
97
; CHECK-NEXT: ret
0 commit comments