Skip to content

Commit 5a2aa14

Browse files
author
Sebastian Schulze
committed
[GR-35136] Basic Block Local Scheduling.
PullRequest: graal/21334
2 parents eb2742d + 3530987 commit 5a2aa14

File tree

5 files changed

+99
-10
lines changed

5 files changed

+99
-10
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/FloatDivNode.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
*/
2525
package jdk.graal.compiler.nodes.calc;
2626

27-
import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_32;
27+
import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_16;
2828

2929
import jdk.graal.compiler.core.common.type.ArithmeticOpTable;
3030
import jdk.graal.compiler.core.common.type.ArithmeticOpTable.BinaryOp;
@@ -41,7 +41,10 @@
4141

4242
import jdk.vm.ci.meta.Constant;
4343

44-
@NodeInfo(shortName = "/", cycles = CYCLES_32)
44+
/**
45+
* Floating point division node.
46+
*/
47+
@NodeInfo(shortName = "/", cycles = CYCLES_16, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
4548
public class FloatDivNode extends BinaryArithmeticNode<Div> {
4649

4750
public static final NodeClass<FloatDivNode> TYPE = NodeClass.create(FloatDivNode.class);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/IntegerDivRemNode.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
package jdk.graal.compiler.nodes.calc;
2626

2727
import static jdk.graal.compiler.nodeinfo.InputType.Guard;
28-
import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_32;
28+
import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_16;
2929
import static jdk.graal.compiler.nodeinfo.NodeSize.SIZE_1;
3030

3131
import jdk.graal.compiler.core.common.type.IntegerStamp;
@@ -42,7 +42,10 @@
4242

4343
import jdk.vm.ci.meta.JavaConstant;
4444

45-
@NodeInfo(allowedUsageTypes = Guard, cycles = CYCLES_32, size = SIZE_1)
45+
/**
46+
* Integer division remainder node.
47+
*/
48+
@NodeInfo(allowedUsageTypes = Guard, cycles = CYCLES_16, size = SIZE_1, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
4649
public abstract class IntegerDivRemNode extends FixedBinaryNode implements Lowerable, IterableNodeType, GuardingNode {
4750

4851
public static final NodeClass<IntegerDivRemNode> TYPE = NodeClass.create(IntegerDivRemNode.class);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/MulNode.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
*/
2525
package jdk.graal.compiler.nodes.calc;
2626

27-
import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_2;
27+
import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_4;
2828

2929
import jdk.graal.compiler.core.common.type.ArithmeticOpTable;
3030
import jdk.graal.compiler.core.common.type.ArithmeticOpTable.BinaryOp;
@@ -46,7 +46,10 @@
4646
import jdk.vm.ci.meta.PrimitiveConstant;
4747
import jdk.vm.ci.meta.Value;
4848

49-
@NodeInfo(shortName = "*", cycles = CYCLES_2)
49+
/**
50+
* Multiplication node.
51+
*/
52+
@NodeInfo(shortName = "*", cycles = CYCLES_4, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
5053
public class MulNode extends BinaryArithmeticNode<Mul> implements NarrowableArithmeticNode, Canonicalizable.BinaryCommutative<ValueNode> {
5154

5255
public static final NodeClass<MulNode> TYPE = NodeClass.create(MulNode.class);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/SqrtNode.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@
4242
import jdk.graal.compiler.nodes.spi.NodeLIRBuilderTool;
4343

4444
/**
45-
* Square root.
45+
* Square root node.
4646
*/
47-
@NodeInfo(cycles = CYCLES_16, size = SIZE_1)
47+
@NodeInfo(cycles = CYCLES_16, size = SIZE_1, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
4848
public final class SqrtNode extends UnaryArithmeticNode<Sqrt> implements ArithmeticLIRLowerable {
4949

5050
public static final NodeClass<SqrtNode> TYPE = NodeClass.create(SqrtNode.class);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,88 @@
9898

9999
public final class SchedulePhase extends BasePhase<CoreProviders> {
100100

101+
/**
102+
* Defines the strategies for scheduling nodes in the compiler's intermediate representation.
103+
* The chosen strategy affects the order in which nodes are executed, impacting performance and
104+
* potentially the effectiveness of other optimizations.
105+
*/
101106
public enum SchedulingStrategy {
102-
EARLIEST_WITH_GUARD_ORDER,
107+
108+
/**
109+
* Schedules nodes in the earliest possible block. This minimizes the distance between a
110+
* node's definition and its usage, reducing register pressure. It can also move nodes out
111+
* of loops, decreasing the number of times they are executed. However, if a node is moved
112+
* out of a conditional execution (e.g. an if) into the dominating block, it will always be
113+
* executed, even if the condition is not satisfied, thereby increasing the number of times
114+
* it is executed. In general this effect seems to be greater than the efficiency gains, by
115+
* scheduling nodes out of loops.
116+
*/
103117
EARLIEST,
118+
119+
/**
120+
* Similar to {@link #EARLIEST}, but preserves the original order of guards. This ensures
121+
* that guard-related nodes are not reordered, maintaining the original guarding behavior.
122+
*/
123+
EARLIEST_WITH_GUARD_ORDER,
124+
125+
/**
126+
* Schedules nodes in the latest possible block to minimize unnecessary executions, thereby
127+
* reducing register pressure and the number of node executions. However, when sinking a
128+
* usage into a loop, this may lead to increased executions and register pressure.
129+
*
130+
* <p>
131+
* Example:
132+
* </p>
133+
*
134+
* <pre>
135+
* b = some calculation
136+
* if (a) {
137+
* some calculation using b
138+
* }
139+
* // no further usages of b
140+
* </pre>
141+
*
142+
* <p>
143+
* In this example, deferring the calculation of 'b' until it's actually needed reduces its
144+
* execution frequency, resulting in improved performance.
145+
* </p>
146+
*/
104147
LATEST,
148+
149+
/**
150+
* Similar to {@link #LATEST}, but ensures that nodes are not scheduled into loops. This
151+
* balances the benefits of early and late scheduling.
152+
*/
105153
LATEST_OUT_OF_LOOPS,
106-
LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS;
154+
155+
/**
156+
* Extends {@link #LATEST_OUT_OF_LOOPS} by actively preserving implicit null checks, to
157+
* reduce memory access.
158+
*
159+
* <p>
160+
* An implicit null check occurs when a null check is folded into a memory access operation.
161+
* For example, accessing a field on a potentially null object reference implicitly checks
162+
* for null and throws a NullPointerException if the object is null.
163+
*
164+
* <pre>
165+
* Consider the following example:
166+
* (1) read(a.length) // implicit null check of 'a'
167+
* (2) read(a.sth) // requires 'a' to be non-null
168+
*
169+
* Preserving implicit null checks ensures that (1) remains before (2) in the execution order.
170+
* If (2) is reordered before (1), an additional explicit null check is required before (2).
171+
* </pre>
172+
*
173+
* <p>
174+
* This optimization helps reduce the number of null checks required.
175+
*/
176+
LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS,
177+
178+
/**
179+
* This scheduling is run after {@link FinalSchedulePhase} to reduce register pressure or
180+
* latency by reordering the nodes within a {@link HIRBlock}.
181+
*/
182+
BASIC_BLOCK_LOCAL_SCHEDULING;
107183

108184
public boolean isEarliest() {
109185
return this == EARLIEST || this == EARLIEST_WITH_GUARD_ORDER;
@@ -113,6 +189,10 @@ public boolean isLatest() {
113189
return !isEarliest();
114190
}
115191

192+
public boolean isBasicBlockLocalScheduling() {
193+
return this == BASIC_BLOCK_LOCAL_SCHEDULING;
194+
}
195+
116196
public boolean scheduleOutOfLoops() {
117197
return this == LATEST_OUT_OF_LOOPS || this == LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS;
118198
}

0 commit comments

Comments
 (0)