diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 16891caade3..fab2a6548d4 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -64,6 +64,7 @@ set(passes_SOURCES LLVMMemoryCopyFillLowering.cpp LocalCSE.cpp LocalSubtyping.cpp + LocalizeChildren.cpp LogExecution.cpp LoopInvariantCodeMotion.cpp Memory64Lowering.cpp diff --git a/src/passes/LocalizeChildren.cpp b/src/passes/LocalizeChildren.cpp new file mode 100644 index 00000000000..c431e5f91df --- /dev/null +++ b/src/passes/LocalizeChildren.cpp @@ -0,0 +1,60 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/localize.h" +#include "ir/properties.h" +#include "pass.h" +#include "wasm.h" + +namespace wasm { + +struct LocalizeChildren : public WalkerPass> { + bool isFunctionParallel() override { return true; } + + std::unique_ptr create() override { + return std::make_unique(); + } + + // TODO add other things that might benefit + void visitBinary(Binary* curr) { + // Consider localizing when we see something potentially optimizable, like + // a falling-through constant. + if (isFallingThroughConstant(curr->left) || + isFallingThroughConstant(curr->right)) { + ChildLocalizer localizer( + curr, getFunction(), *getModule(), getPassOptions()); + auto* rep = localizer.getReplacement(); + if (rep != curr) { + replaceCurrent(rep); + } + } + } + +private: + // Check if something is a falling-through constant (but not a constant + // already). + bool isFallingThroughConstant(Expression* curr) { + if (Properties::isSingleConstantExpression(curr)) { + return false; + } + curr = Properties::getFallthrough(curr, getPassOptions(), *getModule()); + return Properties::isSingleConstantExpression(curr); + } +}; + +Pass* createLocalizeChildrenPass() { return new LocalizeChildren(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 5e98ef5d086..112cb6aec36 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -249,6 +249,9 @@ void PassRegistry::registerPasses() { registerPass("local-subtyping", "apply more specific subtypes to locals where possible", createLocalSubtypingPass); + registerPass("localize-children", + "move children with effects to locals", + createLocalizeChildrenPass); registerPass("log-execution", "instrument the build with logging of where execution goes", createLogExecutionPass); @@ -643,6 +646,7 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { addIfNoDWARFIssues("remove-unused-names"); addIfNoDWARFIssues("remove-unused-brs"); addIfNoDWARFIssues("remove-unused-names"); + addIfNoDWARFIssues("localize-children"); addIfNoDWARFIssues("optimize-instructions"); if (wasm->features.hasGC()) { addIfNoDWARFIssues("heap-store-optimization"); diff --git a/src/passes/passes.h b/src/passes/passes.h index e0c03bad8d7..54d8b0ea867 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -76,6 +76,7 @@ Pass* createLegalizeJSInterfacePass(); Pass* createLimitSegmentsPass(); Pass* createLocalCSEPass(); Pass* createLocalSubtypingPass(); +Pass* createLocalizeChildrenPass(); Pass* createLogExecutionPass(); Pass* createIntrinsicLoweringPass(); Pass* createTraceCallsPass(); diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test index 4c727bcbd1f..6620ceba383 100644 --- a/test/lit/help/wasm-metadce.test +++ b/test/lit/help/wasm-metadce.test @@ -250,6 +250,9 @@ ;; CHECK-NEXT: --local-subtyping apply more specific subtypes to ;; CHECK-NEXT: locals where possible ;; CHECK-NEXT: +;; CHECK-NEXT: --localize-children move children with effects to +;; CHECK-NEXT: locals +;; CHECK-NEXT: ;; CHECK-NEXT: --log-execution instrument the build with ;; CHECK-NEXT: logging of where execution goes ;; CHECK-NEXT: diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index a0d8d199f94..764d1dc17af 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -274,6 +274,9 @@ ;; CHECK-NEXT: --local-subtyping apply more specific subtypes to ;; CHECK-NEXT: locals where possible ;; CHECK-NEXT: +;; CHECK-NEXT: --localize-children move children with effects to +;; CHECK-NEXT: locals +;; CHECK-NEXT: ;; CHECK-NEXT: --log-execution instrument the build with ;; CHECK-NEXT: logging of where execution goes ;; CHECK-NEXT: diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 881e18950e7..9ab3caa9118 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -214,6 +214,9 @@ ;; CHECK-NEXT: --local-subtyping apply more specific subtypes to ;; CHECK-NEXT: locals where possible ;; CHECK-NEXT: +;; CHECK-NEXT: --localize-children move children with effects to +;; CHECK-NEXT: locals +;; CHECK-NEXT: ;; CHECK-NEXT: --log-execution instrument the build with ;; CHECK-NEXT: logging of where execution goes ;; CHECK-NEXT: diff --git a/test/lit/passes/localize-children.wast b/test/lit/passes/localize-children.wast new file mode 100644 index 00000000000..25788c0891d --- /dev/null +++ b/test/lit/passes/localize-children.wast @@ -0,0 +1,179 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: foreach %s %t wasm-opt --localize-children -S -o - | filecheck %s + +;; Also show a real-world testcase with interactions, see below. +;; RUN: foreach %s %t wasm-opt --localize-children --precompute-propagate --optimize-instructions -S -o - | filecheck %s --check-prefix=OPTS + +(module + ;; CHECK: (type $0 (func (result i32))) + + ;; CHECK: (type $1 (func (param i32) (result i32))) + + ;; CHECK: (memory $memory 10 20) + ;; OPTS: (type $0 (func (result i32))) + + ;; OPTS: (type $1 (func (param i32) (result i32))) + + ;; OPTS: (memory $memory 10 20) + (memory $memory 10 20) + + + ;; CHECK: (export "interactions" (func $interactions)) + + ;; CHECK: (func $fallthrough-const (result i32) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (call $fallthrough-const) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $fallthrough-const) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; OPTS: (export "interactions" (func $interactions)) + + ;; OPTS: (func $fallthrough-const (result i32) + ;; OPTS-NEXT: (local $0 i32) + ;; OPTS-NEXT: (local $1 i32) + ;; OPTS-NEXT: (local.set $0 + ;; OPTS-NEXT: (call $fallthrough-const) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: (local.set $1 + ;; OPTS-NEXT: (block (result i32) + ;; OPTS-NEXT: (drop + ;; OPTS-NEXT: (call $fallthrough-const) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: (i32.const 42) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: (i32.add + ;; OPTS-NEXT: (local.get $0) + ;; OPTS-NEXT: (i32.const 42) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: ) + (func $fallthrough-const (result i32) + ;; We move the children out to locals, as there is a constant that looks + ;; promising to optimize. + (i32.add + (call $fallthrough-const) + (block (result i32) + (drop + (call $fallthrough-const) + ) + (i32.const 42) + ) + ) + ) + + ;; CHECK: (func $call-ignore (result i32) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (call $call-ignore) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; OPTS: (func $call-ignore (result i32) + ;; OPTS-NEXT: (i32.add + ;; OPTS-NEXT: (call $call-ignore) + ;; OPTS-NEXT: (i32.const 42) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: ) + (func $call-ignore (result i32) + ;; The call has effects, but we ignore it: we look for falling-through + ;; constants. + (i32.add + (call $call-ignore) + (i32.const 42) + ) + ) + + ;; CHECK: (func $nothing (result i32) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; OPTS: (func $nothing (result i32) + ;; OPTS-NEXT: (i32.const 30) + ;; OPTS-NEXT: ) + (func $nothing (result i32) + ;; There is nothing for us to do here. + (i32.add + (i32.const 10) + (i32.const 20) + ) + ) + + ;; CHECK: (func $interactions (param $x i32) (result i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.load + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.ge_u + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; OPTS: (func $interactions (param $x i32) (result i32) + ;; OPTS-NEXT: (local $1 i32) + ;; OPTS-NEXT: (local $2 i32) + ;; OPTS-NEXT: (local.set $1 + ;; OPTS-NEXT: (i32.load + ;; OPTS-NEXT: (local.get $x) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: (local.set $2 + ;; OPTS-NEXT: (block (result i32) + ;; OPTS-NEXT: (i32.store + ;; OPTS-NEXT: (local.get $x) + ;; OPTS-NEXT: (i32.const 0) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: (i32.const 0) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: ) + ;; OPTS-NEXT: (i32.const 1) + ;; OPTS-NEXT: ) + (func $interactions (export "interactions") (param $x i32) (result i32) + ;; The load and store interact here, which prevents merge-blocks from + ;; simply moving the store outside. However, this is optimizable as shown + ;; in OPTS: + ;; * --localize-children moves effects out of the >=. + ;; * --precompute-propagate propagates the 0 to one >= arm. + ;; * --optimize-instructions then sees (unsigned) >= 0 and applies the + ;; result of 1. + (i32.ge_u + (i32.load + (local.get $x) + ) + (block (result i32) + (i32.store + (local.get $x) + (i32.const 0) + ) + (i32.const 0) + ) + ) + ) +)