diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/FloatDivNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/FloatDivNode.java index 5143d98730f8..d59ae612adfe 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/FloatDivNode.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/FloatDivNode.java @@ -24,7 +24,7 @@ */ package jdk.graal.compiler.nodes.calc; -import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_32; +import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_16; import jdk.graal.compiler.core.common.type.ArithmeticOpTable; import jdk.graal.compiler.core.common.type.ArithmeticOpTable.BinaryOp; @@ -41,7 +41,10 @@ import jdk.vm.ci.meta.Constant; -@NodeInfo(shortName = "/", cycles = CYCLES_32) +/** + * Floating point division node. + */ +@NodeInfo(shortName = "/", cycles = CYCLES_16, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).") public class FloatDivNode extends BinaryArithmeticNode
{ public static final NodeClass TYPE = NodeClass.create(FloatDivNode.class); diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/IntegerDivRemNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/IntegerDivRemNode.java index 2cc5936ca675..dc06f3cc8f37 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/IntegerDivRemNode.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/IntegerDivRemNode.java @@ -25,7 +25,7 @@ package jdk.graal.compiler.nodes.calc; import static jdk.graal.compiler.nodeinfo.InputType.Guard; -import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_32; +import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_16; import static jdk.graal.compiler.nodeinfo.NodeSize.SIZE_1; import jdk.graal.compiler.core.common.type.IntegerStamp; @@ -42,7 +42,10 @@ import jdk.vm.ci.meta.JavaConstant; -@NodeInfo(allowedUsageTypes = Guard, cycles = CYCLES_32, size = SIZE_1) +/** + * Integer division remainder node. + */ +@NodeInfo(allowedUsageTypes = Guard, cycles = CYCLES_16, size = SIZE_1, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).") public abstract class IntegerDivRemNode extends FixedBinaryNode implements Lowerable, IterableNodeType, GuardingNode { public static final NodeClass TYPE = NodeClass.create(IntegerDivRemNode.class); diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/MulNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/MulNode.java index a1fa3a62113f..e022157a628b 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/MulNode.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/MulNode.java @@ -24,7 +24,7 @@ */ package jdk.graal.compiler.nodes.calc; -import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_2; +import static jdk.graal.compiler.nodeinfo.NodeCycles.CYCLES_4; import jdk.graal.compiler.core.common.type.ArithmeticOpTable; import jdk.graal.compiler.core.common.type.ArithmeticOpTable.BinaryOp; @@ -46,7 +46,10 @@ import jdk.vm.ci.meta.PrimitiveConstant; import jdk.vm.ci.meta.Value; -@NodeInfo(shortName = "*", cycles = CYCLES_2) +/** + * Multiplication node. + */ +@NodeInfo(shortName = "*", cycles = CYCLES_4, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).") public class MulNode extends BinaryArithmeticNode implements NarrowableArithmeticNode, Canonicalizable.BinaryCommutative { public static final NodeClass TYPE = NodeClass.create(MulNode.class); diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/SqrtNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/SqrtNode.java index 44f91efc12ca..4b32594debb0 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/SqrtNode.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/calc/SqrtNode.java @@ -42,9 +42,9 @@ import jdk.graal.compiler.nodes.spi.NodeLIRBuilderTool; /** - * Square root. + * Square root node. */ -@NodeInfo(cycles = CYCLES_16, size = SIZE_1) +@NodeInfo(cycles = CYCLES_16, size = SIZE_1, cyclesRationale = "The node cycle estimate is taken from Agner Fog's instruction tables (https://www.agner.org/optimize/instruction_tables.pdf).") public final class SqrtNode extends UnaryArithmeticNode implements ArithmeticLIRLowerable { public static final NodeClass TYPE = NodeClass.create(SqrtNode.class); diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java index 3f3564da9c2b..53b6080f3b42 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java @@ -98,12 +98,88 @@ public final class SchedulePhase extends BasePhase { + /** + * Defines the strategies for scheduling nodes in the compiler's intermediate representation. + * The chosen strategy affects the order in which nodes are executed, impacting performance and + * potentially the effectiveness of other optimizations. + */ public enum SchedulingStrategy { - EARLIEST_WITH_GUARD_ORDER, + + /** + * Schedules nodes in the earliest possible block. This minimizes the distance between a + * node's definition and its usage, reducing register pressure. It can also move nodes out + * of loops, decreasing the number of times they are executed. However, if a node is moved + * out of a conditional execution (e.g. an if) into the dominating block, it will always be + * executed, even if the condition is not satisfied, thereby increasing the number of times + * it is executed. In general this effect seems to be greater than the efficiency gains, by + * scheduling nodes out of loops. + */ EARLIEST, + + /** + * Similar to {@link #EARLIEST}, but preserves the original order of guards. This ensures + * that guard-related nodes are not reordered, maintaining the original guarding behavior. + */ + EARLIEST_WITH_GUARD_ORDER, + + /** + * Schedules nodes in the latest possible block to minimize unnecessary executions, thereby + * reducing register pressure and the number of node executions. However, when sinking a + * usage into a loop, this may lead to increased executions and register pressure. + * + *

+ * Example: + *

+ * + *
+         *     b = some calculation
+         *     if (a) {
+         *         some calculation using b
+         *     }
+         *     // no further usages of b
+         * 
+ * + *

+ * In this example, deferring the calculation of 'b' until it's actually needed reduces its + * execution frequency, resulting in improved performance. + *

+ */ LATEST, + + /** + * Similar to {@link #LATEST}, but ensures that nodes are not scheduled into loops. This + * balances the benefits of early and late scheduling. + */ LATEST_OUT_OF_LOOPS, - LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS; + + /** + * Extends {@link #LATEST_OUT_OF_LOOPS} by actively preserving implicit null checks, to + * reduce memory access. + * + *

+ * An implicit null check occurs when a null check is folded into a memory access operation. + * For example, accessing a field on a potentially null object reference implicitly checks + * for null and throws a NullPointerException if the object is null. + * + *

+         *     Consider the following example:
+         *     (1) read(a.length) // implicit null check of 'a'
+         *     (2) read(a.sth) // requires 'a' to be non-null
+         *
+         *     Preserving implicit null checks ensures that (1) remains before (2) in the execution order.
+         *     If (2) is reordered before (1), an additional explicit null check is required before (2).
+         * 
+ * + *

+ * This optimization helps reduce the number of null checks required. + */ + LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS, + + /** + * This scheduling is run after {@link FinalSchedulePhase} to reduce register pressure or + * latency by reordering the nodes within a {@link HIRBlock}. + */ + BASIC_BLOCK_LOCAL_SCHEDULING; public boolean isEarliest() { return this == EARLIEST || this == EARLIEST_WITH_GUARD_ORDER; @@ -113,6 +189,10 @@ public boolean isLatest() { return !isEarliest(); } + public boolean isBasicBlockLocalScheduling() { + return this == BASIC_BLOCK_LOCAL_SCHEDULING; + } + public boolean scheduleOutOfLoops() { return this == LATEST_OUT_OF_LOOPS || this == LATEST_OUT_OF_LOOPS_IMPLICIT_NULL_CHECKS; }