Skip to content

Commit f6d77cb

Browse files
tabjyrwestrel
authored andcommitted
8347555: [REDO] C2: implement optimization for series of Add of unique value
Reviewed-by: roland, epeter
1 parent 5594d6b commit f6d77cb

File tree

6 files changed

+875
-1
lines changed

6 files changed

+875
-1
lines changed

src/hotspot/share/opto/addnode.cpp

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,9 +396,182 @@ Node* AddNode::IdealIL(PhaseGVN* phase, bool can_reshape, BasicType bt) {
396396
}
397397
}
398398

399+
// Collapse addition of the same terms into multiplications.
400+
Node* collapsed = Ideal_collapse_variable_times_con(phase, bt);
401+
if (collapsed != nullptr) {
402+
return collapsed; // Skip AddNode::Ideal() since it may now be a multiplication node.
403+
}
404+
399405
return AddNode::Ideal(phase, can_reshape);
400406
}
401407

408+
// Try to collapse addition of the same terms into a single multiplication. On success, a new MulNode is returned.
409+
// Examples of this conversion includes:
410+
// - a + a + ... + a => CON*a
411+
// - (a * CON) + a => (CON + 1) * a
412+
// - a + (a * CON) => (CON + 1) * a
413+
//
414+
// We perform such conversions incrementally during IGVN by transforming left most nodes first and work up to the root
415+
// of the expression. In other words, we convert, at each iteration:
416+
// a + a + a + ... + a
417+
// => 2*a + a + ... + a
418+
// => 3*a + ... + a
419+
// => n*a
420+
//
421+
// Due to the iterative nature of IGVN, MulNode transformed from first few AddNode terms may be further transformed into
422+
// power-of-2 pattern. (e.g., 2 * a => a << 1, 3 * a => (a << 2) + a). We can't guarantee we'll always pick up
423+
// transformed power-of-2 patterns when term `a` is complex.
424+
//
425+
// Note this also converts, for example, original expression `(a*3) + a` into `4*a` and `(a<<2) + a` into `5*a`. A more
426+
// generalized pattern `(a*b) + (a*c)` into `a*(b + c)` is handled by AddNode::IdealIL().
427+
Node* AddNode::Ideal_collapse_variable_times_con(PhaseGVN* phase, BasicType bt) {
428+
// We need to make sure that the current AddNode is not part of a MulNode that has already been optimized to a
429+
// power-of-2 addition (e.g., 3 * a => (a << 2) + a). Without this check, GVN would keep trying to optimize the same
430+
// node and can't progress. For example, 3 * a => (a << 2) + a => 3 * a => (a << 2) + a => ...
431+
if (Multiplication::find_power_of_two_addition_pattern(this, bt).is_valid()) {
432+
return nullptr;
433+
}
434+
435+
Node* lhs = in(1);
436+
Node* rhs = in(2);
437+
438+
Multiplication mul = Multiplication::find_collapsible_addition_patterns(lhs, rhs, bt);
439+
if (!mul.is_valid_with(rhs)) {
440+
// Swap lhs and rhs then try again
441+
mul = Multiplication::find_collapsible_addition_patterns(rhs, lhs, bt);
442+
if (!mul.is_valid_with(lhs)) {
443+
return nullptr;
444+
}
445+
}
446+
447+
Node* con;
448+
if (bt == T_INT) {
449+
con = phase->intcon(java_add(static_cast<jint>(mul.multiplier()), 1));
450+
} else {
451+
con = phase->longcon(java_add(mul.multiplier(), CONST64(1)));
452+
}
453+
454+
return MulNode::make(con, mul.variable(), bt);
455+
}
456+
457+
// Find a pattern of collapsable additions that can be converted to a multiplication.
458+
// When matching the LHS `a * CON`, we match with best efforts by looking for the following patterns:
459+
// - (1) Simple addition: LHS = a + a
460+
// - (2) Simple lshift: LHS = a << CON
461+
// - (3) Simple multiplication: LHS = CON * a
462+
// - (4) Power-of-two addition: LHS = (a << CON1) + (a << CON2)
463+
AddNode::Multiplication AddNode::Multiplication::find_collapsible_addition_patterns(const Node* a, const Node* pattern, BasicType bt) {
464+
// (1) Simple addition pattern (e.g., lhs = a + a)
465+
Multiplication mul = find_simple_addition_pattern(a, bt);
466+
if (mul.is_valid_with(pattern)) {
467+
return mul;
468+
}
469+
470+
// (2) Simple lshift pattern (e.g., lhs = a << CON)
471+
mul = find_simple_lshift_pattern(a, bt);
472+
if (mul.is_valid_with(pattern)) {
473+
return mul;
474+
}
475+
476+
// (3) Simple multiplication pattern (e.g., lhs = CON * a)
477+
mul = find_simple_multiplication_pattern(a, bt);
478+
if (mul.is_valid_with(pattern)) {
479+
return mul;
480+
}
481+
482+
// (4) Power-of-two addition pattern (e.g., lhs = (a << CON1) + (a << CON2))
483+
// While multiplications can be potentially optimized to power-of-2 subtractions (e.g., a * 7 => (a << 3) - a),
484+
// (x - y) + y => x is already handled by the Identity() methods. So, we don't need to check for that pattern here.
485+
mul = find_power_of_two_addition_pattern(a, bt);
486+
if (mul.is_valid_with(pattern)) {
487+
return mul;
488+
}
489+
490+
// We've tried everything.
491+
return make_invalid();
492+
}
493+
494+
// Try to match `n = a + a`. On success, return a struct with `.valid = true`, `variable = a`, and `multiplier = 2`.
495+
// The method matches `n` for pattern: a + a.
496+
AddNode::Multiplication AddNode::Multiplication::find_simple_addition_pattern(const Node* n, BasicType bt) {
497+
if (n->Opcode() == Op_Add(bt) && n->in(1) == n->in(2)) {
498+
return Multiplication(n->in(1), 2);
499+
}
500+
501+
return make_invalid();
502+
}
503+
504+
// Try to match `n = a << CON`. On success, return a struct with `.valid = true`, `variable = a`, and
505+
// `multiplier = 1 << CON`.
506+
// Match `n` for pattern: a << CON.
507+
// Note that the power-of-2 multiplication optimization could potentially convert a MulNode to this pattern.
508+
AddNode::Multiplication AddNode::Multiplication::find_simple_lshift_pattern(const Node* n, BasicType bt) {
509+
// Note that power-of-2 multiplication optimization could potentially convert a MulNode to this pattern
510+
if (n->Opcode() == Op_LShift(bt) && n->in(2)->is_Con()) {
511+
Node* con = n->in(2);
512+
if (!con->is_top()) {
513+
return Multiplication(n->in(1), java_shift_left(1, con->get_int(), bt));
514+
}
515+
}
516+
517+
return make_invalid();
518+
}
519+
520+
// Try to match `n = CON * a`. On success, return a struct with `.valid = true`, `variable = a`, and `multiplier = CON`.
521+
// Match `n` for patterns: CON * a
522+
// Note that `CON` will always be the second input node of a Mul node canonicalized by Ideal(). If this is not the case,
523+
// `n` has not been processed by iGVN. So we skip the optimization for the current add node and wait for to be added to
524+
// the queue again.
525+
AddNode::Multiplication AddNode::Multiplication::find_simple_multiplication_pattern(const Node* n, BasicType bt) {
526+
if (n->Opcode() == Op_Mul(bt) && n->in(2)->is_Con()) {
527+
Node* con = n->in(2);
528+
Node* base = n->in(1);
529+
530+
if (!con->is_top()) {
531+
return Multiplication(base, con->get_integer_as_long(bt));
532+
}
533+
}
534+
535+
return make_invalid();
536+
}
537+
538+
// Try to match `n = (a << CON1) + (a << CON2)`. On success, return a struct with `.valid = true`, `variable = a`, and
539+
// `multiplier = (1 << CON1) + (1 << CON2)`.
540+
// Match `n` for patterns:
541+
// - (1) (a << CON) + (a << CON)
542+
// - (2) (a << CON) + a
543+
// - (3) a + (a << CON)
544+
// - (4) a + a
545+
// Note that one or both of the term of the addition could simply be `a` (i.e., a << 0) as in pattern (4).
546+
AddNode::Multiplication AddNode::Multiplication::find_power_of_two_addition_pattern(const Node* n, BasicType bt) {
547+
if (n->Opcode() == Op_Add(bt) && n->in(1) != n->in(2)) {
548+
const Multiplication lhs = find_simple_lshift_pattern(n->in(1), bt);
549+
const Multiplication rhs = find_simple_lshift_pattern(n->in(2), bt);
550+
551+
// Pattern (1)
552+
{
553+
const Multiplication res = lhs.add(rhs);
554+
if (res.is_valid()) {
555+
return res;
556+
}
557+
}
558+
559+
// Pattern (2)
560+
if (lhs.is_valid_with(n->in(2))) {
561+
return Multiplication(lhs.variable(), java_add(lhs.multiplier(), CONST64(1)));
562+
}
563+
564+
// Pattern (3)
565+
if (rhs.is_valid_with(n->in(1))) {
566+
return Multiplication(rhs.variable(), java_add(rhs.multiplier(), CONST64(1)));
567+
}
568+
569+
// Pattern (4), which is equivalent to a simple addition pattern
570+
return find_simple_addition_pattern(n, bt);
571+
}
572+
573+
return make_invalid();
574+
}
402575

403576
Node* AddINode::Ideal(PhaseGVN* phase, bool can_reshape) {
404577
Node* in1 = in(1);

src/hotspot/share/opto/addnode.hpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,51 @@ typedef const Pair<Node*, jint> ConstAddOperands;
4242
// by virtual functions.
4343
class AddNode : public Node {
4444
virtual uint hash() const;
45-
public:
45+
46+
class Multiplication {
47+
bool _is_valid = false;
48+
49+
Node* _variable = nullptr;
50+
jlong _multiplier = 0;
51+
52+
private:
53+
Multiplication() {}
54+
55+
public:
56+
Multiplication(Node* variable, jlong multiplier) :
57+
_is_valid(true),
58+
_variable(variable),
59+
_multiplier(multiplier) {}
60+
61+
static Multiplication make_invalid() {
62+
static Multiplication invalid = Multiplication();
63+
return invalid;
64+
}
65+
66+
static Multiplication find_collapsible_addition_patterns(const Node* a, const Node* pattern, BasicType bt);
67+
static Multiplication find_simple_addition_pattern(const Node* n, BasicType bt);
68+
static Multiplication find_simple_lshift_pattern(const Node* n, BasicType bt);
69+
static Multiplication find_simple_multiplication_pattern(const Node* n, BasicType bt);
70+
static Multiplication find_power_of_two_addition_pattern(const Node* n, BasicType bt);
71+
72+
Multiplication add(const Multiplication rhs) const {
73+
if (is_valid_with(rhs.variable()) && rhs.is_valid_with(variable())) {
74+
return Multiplication(variable(), java_add(multiplier(), rhs.multiplier()));
75+
}
76+
77+
return make_invalid();
78+
}
79+
80+
bool is_valid() const { return _is_valid; }
81+
bool is_valid_with(const Node* variable) const {
82+
return _is_valid && this->_variable == variable;
83+
}
84+
85+
Node* variable() const { return _variable; }
86+
jlong multiplier() const { return _multiplier; }
87+
};
88+
89+
public:
4690
AddNode( Node *in1, Node *in2 ) : Node(nullptr,in1,in2) {
4791
init_class_id(Class_Add);
4892
}
@@ -55,6 +99,7 @@ class AddNode : public Node {
5599
// and flatten expressions (so that 1+x+2 becomes x+3).
56100
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
57101
Node* IdealIL(PhaseGVN* phase, bool can_reshape, BasicType bt);
102+
Node* Ideal_collapse_variable_times_con(PhaseGVN* phase, BasicType bt);
58103

59104
// Compute a new Type for this node. Basically we just do the pre-check,
60105
// then call the virtual add() to set the type.

src/hotspot/share/utilities/globalDefinitions.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,24 @@ JAVA_INTEGER_SHIFT_OP(>>, java_shift_right_unsigned, jlong, julong)
12531253

12541254
#undef JAVA_INTEGER_SHIFT_OP
12551255

1256+
// Some convenient bit shift operations that accepts a BasicType as the last
1257+
// argument. These avoid potential mistakes with overloaded functions only
1258+
// distinguished by lhs argument type.
1259+
#define JAVA_INTEGER_SHIFT_BASIC_TYPE(FUNC) \
1260+
inline jlong FUNC(jlong lhs, jint rhs, BasicType bt) { \
1261+
if (bt == T_INT) { \
1262+
return FUNC((jint) lhs, rhs); \
1263+
} \
1264+
assert(bt == T_LONG, "unsupported basic type"); \
1265+
return FUNC(lhs, rhs); \
1266+
}
1267+
1268+
JAVA_INTEGER_SHIFT_BASIC_TYPE(java_shift_left)
1269+
JAVA_INTEGER_SHIFT_BASIC_TYPE(java_shift_right)
1270+
JAVA_INTEGER_SHIFT_BASIC_TYPE(java_shift_right_unsigned)
1271+
1272+
#undef JAVA_INTERGER_SHIFT_BASIC_TYPE
1273+
12561274
//----------------------------------------------------------------------------------------------------
12571275
// The goal of this code is to provide saturating operations for int/uint.
12581276
// Checks overflow conditions and saturates the result to min_jint/max_jint.

test/hotspot/gtest/utilities/test_java_arithmetic.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,13 +192,15 @@ TEST(TestJavaArithmetic, shift_left_jint) {
192192
const volatile ShiftOpJintData* data = asl_jint_data;
193193
for (size_t i = 0; i < ARRAY_SIZE(asl_jint_data); ++i) {
194194
ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift));
195+
ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift, T_INT));
195196
}
196197
}
197198

198199
TEST(TestJavaArithmetic, shift_left_jlong) {
199200
const volatile ShiftOpJlongData* data = asl_jlong_data;
200201
for (size_t i = 0; i < ARRAY_SIZE(asl_jlong_data); ++i) {
201202
ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift));
203+
ASSERT_EQ(data[i].r, java_shift_left(data[i].x, data[i].shift, T_LONG));
202204
}
203205
}
204206

@@ -262,13 +264,15 @@ TEST(TestJavaArithmetic, shift_right_jint) {
262264
const volatile ShiftOpJintData* data = asr_jint_data;
263265
for (size_t i = 0; i < ARRAY_SIZE(asr_jint_data); ++i) {
264266
ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift));
267+
ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift, T_INT));
265268
}
266269
}
267270

268271
TEST(TestJavaArithmetic, shift_right_jlong) {
269272
const volatile ShiftOpJlongData* data = asr_jlong_data;
270273
for (size_t i = 0; i < ARRAY_SIZE(asr_jlong_data); ++i) {
271274
ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift));
275+
ASSERT_EQ(data[i].r, java_shift_right(data[i].x, data[i].shift, T_LONG));
272276
}
273277
}
274278

@@ -334,12 +338,14 @@ TEST(TestJavaArithmetic, shift_right_unsigned_jint) {
334338
const volatile ShiftOpJintData* data = lsr_jint_data;
335339
for (size_t i = 0; i < ARRAY_SIZE(lsr_jint_data); ++i) {
336340
ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift));
341+
ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift, T_INT));
337342
}
338343
}
339344

340345
TEST(TestJavaArithmetic, shift_right_unsigned_jlong) {
341346
const volatile ShiftOpJlongData* data = lsr_jlong_data;
342347
for (size_t i = 0; i < ARRAY_SIZE(lsr_jlong_data); ++i) {
343348
ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift));
349+
ASSERT_EQ(data[i].r, java_shift_right_unsigned(data[i].x, data[i].shift, T_LONG));
344350
}
345351
}

0 commit comments

Comments
 (0)