8347555: [REDO] C2: implement optimization for series of Add of unique value

tabjy · rwestrel · commit f6d77cb33299 · 2025-10-10T14:04:51.000Z
Reviewed-by: roland, epeter
diff --git a/src/hotspot/share/opto/addnode.cpp b/src/hotspot/share/opto/addnode.cpp
@@ -396,9 +396,182 @@ Node* AddNode::IdealIL(PhaseGVN* phase, bool can_reshape, BasicType bt) {
     }
   }
 
+  // Collapse addition of the same terms into multiplications.
+  Node* collapsed = Ideal_collapse_variable_times_con(phase, bt);
+  if (collapsed != nullptr) {
+    return collapsed; // Skip AddNode::Ideal() since it may now be a multiplication node.
+  }
+
   return AddNode::Ideal(phase, can_reshape);
 }
 
+// Try to collapse addition of the same terms into a single multiplication. On success, a new MulNode is returned.
+// Examples of this conversion includes:
+//   - a + a + ... + a => CON*a
+//   - (a * CON) + a   => (CON + 1) * a
+//   - a + (a * CON)   => (CON + 1) * a
+//
+// We perform such conversions incrementally during IGVN by transforming left most nodes first and work up to the root
+// of the expression. In other words, we convert, at each iteration:
+//        a + a + a + ... + a
+//     => 2*a + a + ... + a
+//     => 3*a + ... + a
+//     => n*a
+//
+// Due to the iterative nature of IGVN, MulNode transformed from first few AddNode terms may be further transformed into
+// power-of-2 pattern. (e.g., 2 * a => a << 1, 3 * a => (a << 2) + a). We can't guarantee we'll always pick up
+// transformed power-of-2 patterns when term `a` is complex.
+//
+// Note this also converts, for example, original expression `(a*3) + a` into `4*a` and `(a<<2) + a` into `5*a`. A more
+// generalized pattern `(a*b) + (a*c)` into `a*(b + c)` is handled by AddNode::IdealIL().
+Node* AddNode::Ideal_collapse_variable_times_con(PhaseGVN* phase, BasicType bt) {
+  // We need to make sure that the current AddNode is not part of a MulNode that has already been optimized to a
+  // power-of-2 addition (e.g., 3 * a => (a << 2) + a). Without this check, GVN would keep trying to optimize the same
+  // node and can't progress. For example, 3 * a => (a << 2) + a => 3 * a => (a << 2) + a => ...
+  if (Multiplication::find_power_of_two_addition_pattern(this, bt).is_valid()) {
+    return nullptr;
+  }
+
+  Node* lhs = in(1);
+  Node* rhs = in(2);
+
+  Multiplication mul = Multiplication::find_collapsible_addition_patterns(lhs, rhs, bt);
+  if (!mul.is_valid_with(rhs)) {
+    // Swap lhs and rhs then try again
+    mul = Multiplication::find_collapsible_addition_patterns(rhs, lhs, bt);
+    if (!mul.is_valid_with(lhs)) {
+      return nullptr;
+    }
+  }
+
+  Node* con;
+  if (bt == T_INT) {
+    con = phase->intcon(java_add(static_cast<jint>(mul.multiplier()), 1));
+  } else {
+    con = phase->longcon(java_add(mul.multiplier(), CONST64(1)));
+  }
+
+  return MulNode::make(con, mul.variable(), bt);
+}
+
+// Find a pattern of collapsable additions that can be converted to a multiplication.
+// When matching the LHS `a * CON`, we match with best efforts by looking for the following patterns:
+//     - (1) Simple addition:       LHS = a + a
+//     - (2) Simple lshift:         LHS = a << CON
+//     - (3) Simple multiplication: LHS = CON * a
+//     - (4) Power-of-two addition: LHS = (a << CON1) + (a << CON2)
+AddNode::Multiplication AddNode::Multiplication::find_collapsible_addition_patterns(const Node* a, const Node* pattern, BasicType bt) {
+  // (1) Simple addition pattern (e.g., lhs = a + a)
+  Multiplication mul = find_simple_addition_pattern(a, bt);
+  if (mul.is_valid_with(pattern)) {
+    return mul;
+  }
+
+  // (2) Simple lshift pattern (e.g., lhs = a << CON)
+  mul = find_simple_lshift_pattern(a, bt);
+  if (mul.is_valid_with(pattern)) {
+    return mul;
+  }
+
+  // (3) Simple multiplication pattern (e.g., lhs = CON * a)
+  mul = find_simple_multiplication_pattern(a, bt);
+  if (mul.is_valid_with(pattern)) {
+    return mul;
+  }
+
+  // (4) Power-of-two addition pattern (e.g., lhs = (a << CON1) + (a << CON2))
+  // While multiplications can be potentially optimized to power-of-2 subtractions (e.g., a * 7 => (a << 3) - a),
+  // (x - y) + y => x is already handled by the Identity() methods. So, we don't need to check for that pattern here.
+  mul = find_power_of_two_addition_pattern(a, bt);
+  if (mul.is_valid_with(pattern)) {
+    return mul;
+  }
+
+  // We've tried everything.
+  return make_invalid();
+}
+
+// Try to match `n = a + a`. On success, return a struct with `.valid = true`, `variable = a`, and `multiplier = 2`.
+// The method matches `n` for pattern: a + a.
+AddNode::Multiplication AddNode::Multiplication::find_simple_addition_pattern(const Node* n, BasicType bt) {
+  if (n->Opcode() == Op_Add(bt) && n->in(1) == n->in(2)) {
+    return Multiplication(n->in(1), 2);
+  }
+
+  return make_invalid();
+}
+
+// Try to match `n = a << CON`. On success, return a struct with `.valid = true`, `variable = a`, and
+// `multiplier = 1 << CON`.
+// Match `n` for pattern: a << CON.
+// Note that the power-of-2 multiplication optimization could potentially convert a MulNode to this pattern.
+AddNode::Multiplication AddNode::Multiplication::find_simple_lshift_pattern(const Node* n, BasicType bt) {
+  // Note that power-of-2 multiplication optimization could potentially convert a MulNode to this pattern
+  if (n->Opcode() == Op_LShift(bt) && n->in(2)->is_Con()) {
+    Node* con = n->in(2);
+    if (!con->is_top()) {
+      return Multiplication(n->in(1), java_shift_left(1, con->get_int(), bt));
+    }
+  }
+
+  return make_invalid();
+}
+
+// Try to match `n = CON * a`. On success, return a struct with `.valid = true`, `variable = a`, and `multiplier = CON`.
+// Match `n` for patterns: CON * a
+// Note that `CON` will always be the second input node of a Mul node canonicalized by Ideal(). If this is not the case,
+// `n` has not been processed by iGVN. So we skip the optimization for the current add node and wait for to be added to
+// the queue again.
+AddNode::Multiplication AddNode::Multiplication::find_simple_multiplication_pattern(const Node* n, BasicType bt) {
+  if (n->Opcode() == Op_Mul(bt) && n->in(2)->is_Con()) {
+    Node* con = n->in(2);
+    Node* base = n->in(1);
+
+    if (!con->is_top()) {
+      return Multiplication(base, con->get_integer_as_long(bt));
+    }
+  }
+
+  return make_invalid();
+}
+
+// Try to match `n = (a << CON1) + (a << CON2)`. On success, return a struct with `.valid = true`, `variable = a`, and
+// `multiplier = (1 << CON1) + (1 << CON2)`.
+// Match `n` for patterns:
+//     - (1) (a << CON) + (a << CON)
+//     - (2) (a << CON) + a
+//     - (3) a + (a << CON)
+//     - (4) a + a
+// Note that one or both of the term of the addition could simply be `a` (i.e., a << 0) as in pattern (4).
+AddNode::Multiplication AddNode::Multiplication::find_power_of_two_addition_pattern(const Node* n, BasicType bt) {
+  if (n->Opcode() == Op_Add(bt) && n->in(1) != n->in(2)) {
+    const Multiplication lhs = find_simple_lshift_pattern(n->in(1), bt);
+    const Multiplication rhs = find_simple_lshift_pattern(n->in(2), bt);
+
+    // Pattern (1)
+    {
+      const Multiplication res = lhs.add(rhs);
+      if (res.is_valid()) {
+        return res;
+      }
+    }
+
+    // Pattern (2)
+    if (lhs.is_valid_with(n->in(2))) {
+      return Multiplication(lhs.variable(), java_add(lhs.multiplier(), CONST64(1)));
+    }
+
+    // Pattern (3)
+    if (rhs.is_valid_with(n->in(1))) {
+      return Multiplication(rhs.variable(), java_add(rhs.multiplier(), CONST64(1)));
+    }
+
+    // Pattern (4), which is equivalent to a simple addition pattern
+    return find_simple_addition_pattern(n, bt);
+  }
+
+  return make_invalid();
+}
 
 Node* AddINode::Ideal(PhaseGVN* phase, bool can_reshape) {
   Node* in1 = in(1);
diff --git a/src/hotspot/share/opto/addnode.hpp b/src/hotspot/share/opto/addnode.hpp
@@ -42,7 +42,51 @@ typedef const Pair<Node*, jint> ConstAddOperands;
 // by virtual functions.
 class AddNode : public Node {
   virtual uint hash() const;
-public:
+
+  class Multiplication {
+    bool _is_valid = false;
+
+    Node* _variable = nullptr;
+    jlong _multiplier = 0;
+
+  private:
+    Multiplication() {}
+
+  public:
+    Multiplication(Node* variable, jlong multiplier) :
+          _is_valid(true),
+          _variable(variable),
+          _multiplier(multiplier) {}
+
+    static Multiplication make_invalid() {
+      static Multiplication invalid = Multiplication();
+      return invalid;
+    }
+
+    static Multiplication find_collapsible_addition_patterns(const Node* a, const Node* pattern, BasicType bt);
+    static Multiplication find_simple_addition_pattern(const Node* n, BasicType bt);
+    static Multiplication find_simple_lshift_pattern(const Node* n, BasicType bt);
+    static Multiplication find_simple_multiplication_pattern(const Node* n, BasicType bt);
+    static Multiplication find_power_of_two_addition_pattern(const Node* n, BasicType bt);
+
+    Multiplication add(const Multiplication rhs) const {
+      if (is_valid_with(rhs.variable()) && rhs.is_valid_with(variable())) {
+        return Multiplication(variable(), java_add(multiplier(), rhs.multiplier()));
+      }
+
+      return make_invalid();
+    }
+
+    bool is_valid() const { return _is_valid; }
+    bool is_valid_with(const Node* variable) const {
+      return _is_valid && this->_variable == variable;
+    }
+
+    Node* variable() const { return _variable; }
+    jlong multiplier() const { return _multiplier; }
+  };
+
+ public:
   AddNode( Node *in1, Node *in2 ) : Node(nullptr,in1,in2) {
     init_class_id(Class_Add);
   }
@@ -55,6 +99,7 @@ class AddNode : public Node {
   // and flatten expressions (so that 1+x+2 becomes x+3).
   virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
   Node* IdealIL(PhaseGVN* phase, bool can_reshape, BasicType bt);
+  Node* Ideal_collapse_variable_times_con(PhaseGVN* phase, BasicType bt);
 
   // Compute a new Type for this node.  Basically we just do the pre-check,
   // then call the virtual add() to set the type.
diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp
@@ -1253,6 +1253,24 @@ JAVA_INTEGER_SHIFT_OP(>>, java_shift_right_unsigned, jlong, julong)
 
 #undef JAVA_INTEGER_SHIFT_OP
 
+// Some convenient bit shift operations that accepts a BasicType as the last
+// argument. These avoid potential mistakes with overloaded functions only
+// distinguished by lhs argument type.
+#define JAVA_INTEGER_SHIFT_BASIC_TYPE(FUNC)            \
+inline jlong FUNC(jlong lhs, jint rhs, BasicType bt) { \
+  if (bt == T_INT) {                                   \
+    return FUNC((jint) lhs, rhs);                      \
+  }                                                    \
+  assert(bt == T_LONG, "unsupported basic type");      \
+  return FUNC(lhs, rhs);                              \
+}
+
+JAVA_INTEGER_SHIFT_BASIC_TYPE(java_shift_left)
+JAVA_INTEGER_SHIFT_BASIC_TYPE(java_shift_right)
+JAVA_INTEGER_SHIFT_BASIC_TYPE(java_shift_right_unsigned)
+
+#undef JAVA_INTERGER_SHIFT_BASIC_TYPE
+
 //----------------------------------------------------------------------------------------------------
 // The goal of this code is to provide saturating operations for int/uint.
 // Checks overflow conditions and saturates the result to min_jint/max_jint.
diff --git a/test/hotspot/gtest/utilities/test_java_arithmetic.cpp b/test/hotspot/gtest/utilities/test_java_arithmetic.cpp
@@ -192,13 +192,15 @@ TEST(TestJavaArithmetic, shift_left_jint) {
   const volatile ShiftOpJintData* data = asl_jint_data;
   for (size_t i = 0; i < ARRAY_SIZE(asl_jint_data); ++i) {
     ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift));
+    ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift, T_INT));
   }
 }
 
 TEST(TestJavaArithmetic, shift_left_jlong) {
   const volatile ShiftOpJlongData* data = asl_jlong_data;
   for (size_t i = 0; i < ARRAY_SIZE(asl_jlong_data); ++i) {
     ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift));
+    ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift, T_LONG));
   }
 }
 
@@ -262,13 +264,15 @@ TEST(TestJavaArithmetic, shift_right_jint) {
   const volatile ShiftOpJintData* data = asr_jint_data;
   for (size_t i = 0; i < ARRAY_SIZE(asr_jint_data); ++i) {
     ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift));
+    ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift, T_INT));
   }
 }
 
 TEST(TestJavaArithmetic, shift_right_jlong) {
   const volatile ShiftOpJlongData* data = asr_jlong_data;
   for (size_t i = 0; i < ARRAY_SIZE(asr_jlong_data); ++i) {
     ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift));
+    ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift, T_LONG));
   }
 }
 
@@ -334,12 +338,14 @@ TEST(TestJavaArithmetic, shift_right_unsigned_jint) {
   const volatile ShiftOpJintData* data = lsr_jint_data;
   for (size_t i = 0; i < ARRAY_SIZE(lsr_jint_data); ++i) {
     ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift));
+    ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift, T_INT));
   }
 }
 
 TEST(TestJavaArithmetic, shift_right_unsigned_jlong) {
   const volatile ShiftOpJlongData* data = lsr_jlong_data;
   for (size_t i = 0; i < ARRAY_SIZE(lsr_jlong_data); ++i) {
     ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift));
+    ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift, T_LONG));
   }
 }
diff --git a/test/hotspot/jtreg/compiler/c2/gvn/TestCollapsingSameTermAdditions.java b/test/hotspot/jtreg/compiler/c2/gvn/TestCollapsingSameTermAdditions.java
diff --git a/test/micro/org/openjdk/bench/vm/compiler/SerialAdditions.java b/test/micro/org/openjdk/bench/vm/compiler/SerialAdditions.java

Original file line number	Diff line number	Diff line change
`@@ -192,13 +192,15 @@ TEST(TestJavaArithmetic, shift_left_jint) {`
`192`	`192`	`const volatile ShiftOpJintData* data = asl_jint_data;`
`193`	`193`	`for (size_t i = 0; i < ARRAY_SIZE(asl_jint_data); ++i) {`
`194`	`194`	`ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift));`
	`195`	`+ ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift, T_INT));`
`195`	`196`	`}`
`196`	`197`	`}`
`197`	`198`
`198`	`199`	`TEST(TestJavaArithmetic, shift_left_jlong) {`
`199`	`200`	`const volatile ShiftOpJlongData* data = asl_jlong_data;`
`200`	`201`	`for (size_t i = 0; i < ARRAY_SIZE(asl_jlong_data); ++i) {`
`201`	`202`	`ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift));`
	`203`	`+ ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift, T_LONG));`
`202`	`204`	`}`
`203`	`205`	`}`
`204`	`206`
`@@ -262,13 +264,15 @@ TEST(TestJavaArithmetic, shift_right_jint) {`
`262`	`264`	`const volatile ShiftOpJintData* data = asr_jint_data;`
`263`	`265`	`for (size_t i = 0; i < ARRAY_SIZE(asr_jint_data); ++i) {`
`264`	`266`	`ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift));`
	`267`	`+ ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift, T_INT));`
`265`	`268`	`}`
`266`	`269`	`}`
`267`	`270`
`268`	`271`	`TEST(TestJavaArithmetic, shift_right_jlong) {`
`269`	`272`	`const volatile ShiftOpJlongData* data = asr_jlong_data;`
`270`	`273`	`for (size_t i = 0; i < ARRAY_SIZE(asr_jlong_data); ++i) {`
`271`	`274`	`ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift));`
	`275`	`+ ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift, T_LONG));`
`272`	`276`	`}`
`273`	`277`	`}`
`274`	`278`
`@@ -334,12 +338,14 @@ TEST(TestJavaArithmetic, shift_right_unsigned_jint) {`
`334`	`338`	`const volatile ShiftOpJintData* data = lsr_jint_data;`
`335`	`339`	`for (size_t i = 0; i < ARRAY_SIZE(lsr_jint_data); ++i) {`
`336`	`340`	`ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift));`
	`341`	`+ ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift, T_INT));`
`337`	`342`	`}`
`338`	`343`	`}`
`339`	`344`
`340`	`345`	`TEST(TestJavaArithmetic, shift_right_unsigned_jlong) {`
`341`	`346`	`const volatile ShiftOpJlongData* data = lsr_jlong_data;`
`342`	`347`	`for (size_t i = 0; i < ARRAY_SIZE(lsr_jlong_data); ++i) {`
`343`	`348`	`ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift));`
	`349`	`+ ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift, T_LONG));`
`344`	`350`	`}`
`345`	`351`	`}`