[GR-35136] Basic Block Local Scheduling.

Sebastian Schulze · Sebastian Schulze · commit 5a2aa14e5d04 · 2025-12-11T09:13:26.000Z
PullRequest: graal/21334
diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/FloatDivNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/FloatDivNode.java
@@ -24,7 +24,7 @@
  */
 package jdk.graal.compiler.nodes.calc;
 
-import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_32;
+import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_16;
 
 import jdk.graal.compiler.core.common.type.ArithmeticOpTable;
 import jdk.graal.compiler.core.common.type.ArithmeticOpTable.BinaryOp;
@@ -41,7 +41,10 @@
 
 import jdk.vm.ci.meta.Constant;
 
-@NodeInfo(shortName = "/", cycles = CYCLES_32)
+/**
+ * Floating point division node.
+ */
+@NodeInfo(shortName = "/", cycles = CYCLES_16, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
 public class FloatDivNode extends BinaryArithmeticNode<Div> {
 
     public static final NodeClass<FloatDivNode> TYPE = NodeClass.create(FloatDivNode.class);
diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/IntegerDivRemNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/IntegerDivRemNode.java
@@ -25,7 +25,7 @@
 package jdk.graal.compiler.nodes.calc;
 
 import static jdk.graal.compiler.nodeinfo.InputType.Guard;
-import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_32;
+import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_16;
 import static jdk.graal.compiler.nodeinfo.NodeSize.SIZE_1;
 
 import jdk.graal.compiler.core.common.type.IntegerStamp;
@@ -42,7 +42,10 @@
 
 import jdk.vm.ci.meta.JavaConstant;
 
-@NodeInfo(allowedUsageTypes = Guard, cycles = CYCLES_32, size = SIZE_1)
+/**
+ * Integer division remainder node.
+ */
+@NodeInfo(allowedUsageTypes = Guard, cycles = CYCLES_16, size = SIZE_1, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
 public abstract class IntegerDivRemNode extends FixedBinaryNode implements Lowerable, IterableNodeType, GuardingNode {
 
     public static final NodeClass<IntegerDivRemNode> TYPE = NodeClass.create(IntegerDivRemNode.class);
diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/MulNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/MulNode.java
@@ -24,7 +24,7 @@
  */
 package jdk.graal.compiler.nodes.calc;
 
-import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_2;
+import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_4;
 
 import jdk.graal.compiler.core.common.type.ArithmeticOpTable;
 import jdk.graal.compiler.core.common.type.ArithmeticOpTable.BinaryOp;
@@ -46,7 +46,10 @@
 import jdk.vm.ci.meta.PrimitiveConstant;
 import jdk.vm.ci.meta.Value;
 
-@NodeInfo(shortName = "*", cycles = CYCLES_2)
+/**
+ * Multiplication node.
+ */
+@NodeInfo(shortName = "*", cycles = CYCLES_4, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
 public class MulNode extends BinaryArithmeticNode<Mul> implements NarrowableArithmeticNode, Canonicalizable.BinaryCommutative<ValueNode> {
 
     public static final NodeClass<MulNode> TYPE = NodeClass.create(MulNode.class);
diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/SqrtNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/SqrtNode.java
@@ -42,9 +42,9 @@
 import jdk.graal.compiler.nodes.spi.NodeLIRBuilderTool;
 
 /**
- * Square root.
+ * Square root node.
  */
-@NodeInfo(cycles = CYCLES_16, size = SIZE_1)
+@NodeInfo(cycles = CYCLES_16, size = SIZE_1, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).")
 public final class SqrtNode extends UnaryArithmeticNode<Sqrt> implements ArithmeticLIRLowerable {
 
     public static final NodeClass<SqrtNode> TYPE = NodeClass.create(SqrtNode.class);
diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java
@@ -98,12 +98,88 @@
 
 public final class SchedulePhase extends BasePhase<CoreProviders> {
 
+    /**
+     * Defines the strategies for scheduling nodes in the compiler's intermediate representation.
+     * The chosen strategy affects the order in which nodes are executed, impacting performance and
+     * potentially the effectiveness of other optimizations.
+     */
     public enum SchedulingStrategy {
-        EARLIEST_WITH_GUARD_ORDER,
+
+        /**
+         * Schedules nodes in the earliest possible block. This minimizes the distance between a
+         * node's definition and its usage, reducing register pressure. It can also move nodes out
+         * of loops, decreasing the number of times they are executed. However, if a node is moved
+         * out of a conditional execution (e.g. an if) into the dominating block, it will always be
+         * executed, even if the condition is not satisfied, thereby increasing the number of times
+         * it is executed. In general this effect seems to be greater than the efficiency gains, by
+         * scheduling nodes out of loops.
+         */
         EARLIEST,
+
+        /**
+         * Similar to {@link #EARLIEST}, but preserves the original order of guards. This ensures
+         * that guard-related nodes are not reordered, maintaining the original guarding behavior.
+         */
+        EARLIEST_WITH_GUARD_ORDER,
+
+        /**
+         * Schedules nodes in the latest possible block to minimize unnecessary executions, thereby
+         * reducing register pressure and the number of node executions. However, when sinking a
+         * usage into a loop, this may lead to increased executions and register pressure.
+         *
+         * <p>
+         * Example:
+         * </p>
+         *
+         * <pre>
+         *     b = some calculation
+         *     if (a) {
+         *         some calculation using b
+         *     }
+         *     // no further usages of b
+         * </pre>
+         *
+         * <p>
+         * In this example, deferring the calculation of 'b' until it's actually needed reduces its
+         * execution frequency, resulting in improved performance.
+         * </p>
+         */
         LATEST,
+
+        /**
+         * Similar to {@link #LATEST}, but ensures that nodes are not scheduled into loops. This
+         * balances the benefits of early and late scheduling.
+         */
         LATEST_OUT_OF_LOOPS,
-        LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS;
+
+        /**
+         * Extends {@link #LATEST_OUT_OF_LOOPS} by actively preserving implicit null checks, to
+         * reduce memory access.
+         *
+         * <p>
+         * An implicit null check occurs when a null check is folded into a memory access operation.
+         * For example, accessing a field on a potentially null object reference implicitly checks
+         * for null and throws a NullPointerException if the object is null.
+         *
+         * <pre>
+         *     Consider the following example:
+         *     (1) read(a.length) // implicit null check of 'a'
+         *     (2) read(a.sth) // requires 'a' to be non-null
+         *
+         *     Preserving implicit null checks ensures that (1) remains before (2) in the execution order.
+         *     If (2) is reordered before (1), an additional explicit null check is required before (2).
+         * </pre>
+         *
+         * <p>
+         * This optimization helps reduce the number of null checks required.
+         */
+        LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS,
+
+        /**
+         * This scheduling is run after {@link FinalSchedulePhase} to reduce register pressure or
+         * latency by reordering the nodes within a {@link HIRBlock}.
+         */
+        BASIC_BLOCK_LOCAL_SCHEDULING;
 
         public boolean isEarliest() {
             return this == EARLIEST || this == EARLIEST_WITH_GUARD_ORDER;
@@ -113,6 +189,10 @@ public boolean isLatest() {
             return !isEarliest();
         }
 
+        public boolean isBasicBlockLocalScheduling() {
+            return this == BASIC_BLOCK_LOCAL_SCHEDULING;
+        }
+
         public boolean scheduleOutOfLoops() {
             return this == LATEST_OUT_OF_LOOPS || this == LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS;
         }