From dd2eec0292f05e882da5f3be7ac97bc0fe0a4159 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 29 Jan 2016 10:39:10 -0600 Subject: [PATCH 01/24] CMakeList.txt updated to build the LLVM-based locality optimization pass. --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fe4a29..b4b7f38 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,10 @@ set(LLVM_LIBRARY_OUTPUT_INTDIR "${CMAKE_BINARY_DIR}/lib/${CMAKE_CFG_INT_DIR}") set(SOURCES llvmAggregateGlobalOps.cpp llvmGlobalToWide.cpp + llvmLocalityOptimization.cpp llvmUtil.cpp + ValueTable.cpp + IGraph.cpp ) add_llvm_loadable_module( llvm-pgas ${SOURCES} ) From dccc2b8aea94482469f4e213cb54ec1458acab04 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 29 Jan 2016 10:42:01 -0600 Subject: [PATCH 02/24] The LLVM locality optimization pass added. --- IGraph.cpp | 77 ++ IGraph.h | 276 +++++++ ValueTable.cpp | 190 +++++ ValueTable.h | 171 ++++ llvmLocalityOptimization.cpp | 1423 ++++++++++++++++++++++++++++++++++ llvmLocalityOptimization.h | 36 + 6 files changed, 2173 insertions(+) create mode 100644 IGraph.cpp create mode 100644 IGraph.h create mode 100644 ValueTable.cpp create mode 100644 ValueTable.h create mode 100644 llvmLocalityOptimization.cpp create mode 100644 llvmLocalityOptimization.h diff --git a/IGraph.cpp b/IGraph.cpp new file mode 100644 index 0000000..8c038d2 --- /dev/null +++ b/IGraph.cpp @@ -0,0 +1,77 @@ +/* + * Copyright 2004-2015 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +//===----------------------------------------------------------------------===// +// Chapel LLVM Locality Optimization by Akihiro Hayashi (ahayashi@rice.edu) +//===----------------------------------------------------------------------===// +// SSA Value Graph for Locality Inference +//===----------------------------------------------------------------------===// + +#include "IGraph.h" + +#include +#include +#include + +using namespace std; +using namespace llvm; + +unsigned int +Node::getAddressSpace(Value* v) +{ + PointerType* pt = dyn_cast(v->getType()); + if(pt) { + return pt->getAddressSpace(); + } else { + return 0; + } +} + +void +Node::initLLMap() +{ + LLMap[value] = getAddressSpace(value); + // Instruction + Instruction *insn = dyn_cast(value); + if (!insn) return; + if (insn->getOpcode() != Instruction::Call) { + for(unsigned int i=0; i < insn->getNumOperands(); i++) { + Value *op = insn->getOperand(i); + LLMap[op] = getAddressSpace(op); + } + } + switch(insn->getOpcode()) { + case Instruction::Call: { + CallInst *call = cast(insn); + Function* f = call->getCalledFunction(); + if (f != NULL) { + /* * + * We are assuming that gf.addr function calls correspond to Chapel's local statements, but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. We work on this later pass (see exemptionTest in llvmLocalityOptimization.cpp). + */ + if (f->getName().startswith(".gf.addr")) { + // Argument of ".gf.addr" is definitely local + for (unsigned int i = 0; i < call->getNumArgOperands(); i++) { + Value* v = call->getArgOperand(i); + LLMap[v] = 0; + } + } + } + break; + } + } +} diff --git a/IGraph.h b/IGraph.h new file mode 100644 index 0000000..8417253 --- /dev/null +++ b/IGraph.h @@ -0,0 +1,276 @@ +/* + * Copyright 2004-2015 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +//===----------------------------------------------------------------------===// +// Chapel LLVM Locality Optimization by Akihiro Hayashi (ahayashi@rice.edu) +//===----------------------------------------------------------------------===// +// SSA Value Graph for Locality Inference +//===----------------------------------------------------------------------===// + +#ifndef _IGRAPH_H_ +#define _IGRAPH_H_ + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvmUtil.h" + +#if HAVE_LLVM_VER >= 35 +#else +#include "llvm/Assembly/Writer.h" +#endif + +#include +#include +using namespace std; +using namespace llvm; + +class Node { +private: + StringRef name; + Value* value; + vector children; + vector parents; + DenseMap LLMap; + unsigned int getAddressSpace(Value *v); + void initLLMap(); + +public: + Node(Value* _value) { value = _value; initLLMap(); }; + + // parents + vector::iterator parents_begin() { return parents.begin(); } + vector::iterator parents_end() { return parents.end(); } + // children + vector::iterator begin() { return children.begin(); } + vector::iterator end() { return children.end(); } + vector::const_iterator begin() const { return children.begin(); } + vector::const_iterator end() const { return children.end(); } + + int getLL(Value* v) const { + int ll; + if (LLMap.find(v) != LLMap.end()) { + ll = LLMap.find(v)->second; + } else { + ll = -1; + } + return ll; + } + + void addParents(Node* parent) { + parents.push_back(parent); + } + + void addChild(Node *child) { + vector::iterator I = find(children.begin(), children.end(), child); + if( I == children.end() ){ + children.push_back(child); + child->addParents(this); + } + } + + StringRef getName() const { return name; } + Value* getValue() const { return value; } + +}; + +class IGraph { +private: + StringRef name; + Node* entry; + vector nodes; + vector getRootNodes() { return nodes; } +public: + IGraph (StringRef _name) { name = _name; } + Node* getEntry() const { return entry; } + StringRef getName() const { return name; } + + vector::iterator begin() { return nodes.begin(); } + vector::iterator end() { return nodes.end(); } + vector::const_iterator begin() const { return nodes.begin(); } + vector::const_iterator end() const { return nodes.end(); } + + Node* getNodeByValue(const Value* v) { + for (vector::iterator I = nodes.begin(), E = nodes.end(); I != E; I++) { + Node* tmp = *I; + if (v == tmp->getValue()) { + return tmp; + } + } + return NULL; + } + void addNode(Node* n) { nodes.push_back(n); } + + unsigned size() const { return nodes.size(); } + void createGraphVizFile(const char* fileName); + + // for GDB + void dump(); +}; + +namespace llvm { + template<> struct GraphTraits { + typedef Node NodeType; + typedef std::vector::iterator ChildIteratorType; + + static NodeType *getEntryNode(Node *node) { return node; } + static inline ChildIteratorType child_begin(NodeType *N) { return N->begin(); } + static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } + + }; + template<> struct GraphTraits { + typedef const Node NodeType; + typedef vector::const_iterator ChildIteratorType; + + static NodeType *getEntryNode(const Node *node) { return node; } + static inline ChildIteratorType child_begin(const NodeType *N) { return N->begin(); } + static inline ChildIteratorType child_end(const NodeType *N) { return N->end(); } + + }; + + template<> struct GraphTraits : public GraphTraits { + static NodeType *getEntryNode(IGraph *G) { return G->getEntry(); } + typedef std::vector::iterator nodes_iterator; + + static nodes_iterator nodes_begin(IGraph *G) { return G->begin(); } + static nodes_iterator nodes_end(IGraph *G) { return G->end(); } + static unsigned nodes_size(IGraph *G) { return G->size(); } + }; + + template<> struct GraphTraits : public GraphTraits { + static NodeType *getEntryNode(const IGraph *G) { return G->getEntry(); } + typedef vector::const_iterator nodes_iterator; + + static nodes_iterator nodes_begin(const IGraph *G) { return G->begin(); } + static nodes_iterator nodes_end(const IGraph *G) { return G->end(); } + static unsigned nodes_size(const IGraph *G) { return G->size(); } + }; + + template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const IGraph* G) { + return "Inequality Graph for '" + G->getName().str(); + } + + static std::string getSimpleNodeLabel(const Node* node, + const IGraph *) { + if (!node->getName().empty()) + return node->getName().str(); + + std::string Str; + raw_string_ostream OS(Str); + const Value *value = node->getValue(); +#if HAVE_LLVM_VER >= 35 + OS << value->getName(); +#else + WriteAsOperand(OS, value, false); +#endif + OS << "FIXME"; + return OS.str(); + } + + static std::string getCompleteNodeLabel(const Node *node, + const IGraph *) { + std::string Str; + raw_string_ostream OS(Str); + if (node->getName().empty()) { +#if HAVE_LLVM_VER >= 35 + OS << node->getValue()->getName(); +#else + WriteAsOperand(OS, node->getValue(), false); +#endif + OS << ", LL("; +#if HAVE_LLVM_VER >= 35 + OS << node->getValue()->getName(); +#else + WriteAsOperand(OS, node->getValue(), false); +#endif + OS << ") = " << node->getLL(node->getValue()) << " : "; + } + OS << *node->getValue(); + OS << " "; + Instruction *insn = dyn_cast(node->getValue()); + if (insn) { + for(unsigned int i=0; i < insn->getNumOperands(); i++) { + Value *op = insn->getOperand(i); + OS << ", LL("; +#if HAVE_LLVM_VER >= 35 + OS << op->getName(); +#else + WriteAsOperand(OS, op, false); +#endif + OS << ") = " << node->getLL(op); + } + } + std::string OutStr = OS.str(); + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) { + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } else if (OutStr[i] == ';') { // Delete comments! + unsigned Idx = OutStr.find('\n', i+1); // Find end of line + OutStr.erase(OutStr.begin()+i, OutStr.begin()+Idx); + --i; + } + } + return OutStr; + } + + std::string getNodeLabel(const Node *node, + const IGraph *graph) { + if (isSimple()) + return getSimpleNodeLabel(node, graph); + else + return getCompleteNodeLabel(node, graph); + } + + static std::string getEdgeSourceLabel(const Node *node, + vector::const_iterator I) { +#if 0 + // Label source of conditional branches with "T" or "F" + if (const BranchInst *BI = dyn_cast(Node->getTerminator())) + if (BI->isConditional()) + return (I == succ_begin(Node)) ? "T" : "F"; + + // Label source of switch edges with the associated value. + if (const SwitchInst *SI = dyn_cast(Node->getTerminator())) { + unsigned SuccNo = I.getSuccessorIndex(); + + if (SuccNo == 0) return "def"; + + std::string Str; + raw_string_ostream OS(Str); + SwitchInst::ConstCaseIt Case = + SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); + OS << Case.getCaseValue()->getValue(); + return OS.str(); + } +#endif + return ""; + } + }; +} + +#endif // _IGRAPH_H_ diff --git a/ValueTable.cpp b/ValueTable.cpp new file mode 100644 index 0000000..2e99d1d --- /dev/null +++ b/ValueTable.cpp @@ -0,0 +1,190 @@ +/* + * Copyright 2004-2015 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//===----------------------------------------------------------------------===// +// Chapel LLVM Locality Optimization by Akihiro Hayashi (ahayashi@rice.edu) +//===----------------------------------------------------------------------===// +// This is a reduced version of the original LLVM global value numbering pass. (GVN.cpp) +// This pass is only used for assigning a value number to variables and expressions and +// does not perform any CSE (common subexpression elimination) +//===----------------------------------------------------------------------===// + +#include "ValueTable.h" +#include "llvmGlobalToWide.h" + +Expression ValueTable::create_expression(Instruction *I) { + Expression e; + e.type = I->getType(); + e.opcode = I->getOpcode(); + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); + if (I->isCommutative()) { + // Ensure that commutative instructions that only differ by a permutation + // of their operands get the same value number by sorting the operand value + // numbers. Since all commutative instructions have two operands it is more + // efficient to sort by hand rather than using, say, std::sort. + assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); + if (e.varargs[0] > e.varargs[1]) + std::swap(e.varargs[0], e.varargs[1]); + } + + if (CmpInst *C = dyn_cast(I)) { + // Sort the operand value numbers so xx get the same value number. + CmpInst::Predicate Predicate = C->getPredicate(); + if (e.varargs[0] > e.varargs[1]) { + std::swap(e.varargs[0], e.varargs[1]); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } + e.opcode = (C->getOpcode() << 8) | Predicate; + } else if (InsertValueInst *E = dyn_cast(I)) { + for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + } + + return e; +} + +//===----------------------------------------------------------------------===// +// ValueTable External Functions +//===----------------------------------------------------------------------===// + +/// add - Insert a value into the table with a specified value number. +void ValueTable::add(Value *V, uint32_t num) { + valueNumbering.insert(std::make_pair(V, num)); +} + +uint32_t ValueTable::lookup_or_add_call(CallInst *C) { + Function *F = C->getCalledFunction(); + if (F != NULL && F->hasName() && F->getName().startswith(GLOBAL_FN_GLOBAL_ADDR)) { + Value *op = C->getOperand(0); + DenseMap::const_iterator VI = valueNumbering.find(op); + if (VI != valueNumbering.end()) { + return VI->second; + } else { + // Not numbered yet + return lookup_or_add(op); + } + } else { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } +} + +/// lookup_or_add - Returns the value number for the specified value, assigning +/// it a new number if it did not have one before. +uint32_t ValueTable::lookup_or_add(Value *V) { + DenseMap::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; + + if (!isa(V)) { +// errs () << *V << " => " << nextValueNumber << "\n"; + valueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + Instruction* I = cast(V); + Expression exp; + switch (I->getOpcode()) { + case Instruction::Call: + return lookup_or_add_call(cast(I)); + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + case Instruction::Select: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::InsertValue: + case Instruction::GetElementPtr: + exp = create_expression(I); + break; + case Instruction::ExtractValue: +// exp = create_extractvalue_expression(cast(I)); +// break; + default: + valueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[V] = e; + return e; +} + +/// lookup - Returns the value number of the specified value. Fails if +/// the value has not yet been numbered. +uint32_t ValueTable::lookup(Value *V) const { + DenseMap::const_iterator VI = valueNumbering.find(V); + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; +} + +/// clear - Remove all entries from the ValueTable. +void ValueTable::clear() { + valueNumbering.clear(); + expressionNumbering.clear(); + nextValueNumber = 1; +} + +/// erase - Remove a value from the value numbering. +void ValueTable::erase(Value *V) { + valueNumbering.erase(V); +} + +/// verifyRemoved - Verify that the value is removed from all internal data +/// structures. +void ValueTable::verifyRemoved(const Value *V) const { + for (DenseMap::const_iterator + I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { + assert(I->first != V && "Inst still occurs in value numbering map!"); + } +} diff --git a/ValueTable.h b/ValueTable.h new file mode 100644 index 0000000..2774329 --- /dev/null +++ b/ValueTable.h @@ -0,0 +1,171 @@ +/* + * Copyright 2004-2015 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//===----------------------------------------------------------------------===// +// Chapel LLVM Locality Optimization by Akihiro Hayashi (ahayashi@rice.edu) +//===----------------------------------------------------------------------===// +// This is a reduced version of the original LLVM global value numbering pass. (GVN.cpp) +// This pass is only used for assigning a value number to variables and expressions and +// does not perform any CSE (common subexpression elimination) +//===----------------------------------------------------------------------===// + +#ifndef _VALUE_TABLE_H_ +#define _VALUE_TABLE_H_ + +#include "llvm/IR/Value.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" + +#include "llvmUtil.h" + +using namespace llvm; + +struct Expression { + uint32_t opcode; + Type *type; + SmallVector varargs; + + Expression(uint32_t o = ~2U) : opcode(o) { } + + bool operator==(const Expression &other) const { + if (opcode != other.opcode) + return false; + if (opcode == ~0U || opcode == ~1U) + return true; + if (type != other.type) + return false; + if (varargs != other.varargs) + return false; + return true; + } + + // ignore type equality + bool equals(const Expression &other) const { + if (opcode != other.opcode) + return false; + if (opcode == ~0U || opcode == ~1U) + return true; + if (varargs != other.varargs) + return false; + return true; + } + + friend hash_code hash_value(const Expression &Value) { + return hash_combine(Value.opcode, Value.type, + hash_combine_range(Value.varargs.begin(), + Value.varargs.end())); + } + + void dump() { + errs () << "Ope" << opcode << " "; + for (SmallVector::iterator I = varargs.begin(), E = varargs.end(); I != E; I++) { + uint32_t val = *I; + errs () << val << ", "; + } + errs () << "\n"; + } +}; + +namespace llvm { + template <> struct DenseMapInfo { + static inline Expression getEmptyKey() { + return ~0U; + } + + static inline Expression getTombstoneKey() { + return ~1U; + } + + static unsigned getHashValue(const Expression e) { + using llvm::hash_value; + return static_cast(hash_value(e)); + } + static bool isEqual(const Expression &LHS, const Expression &RHS) { + return LHS == RHS; + } + }; + +} + +class ValueTable { + DenseMap valueNumbering; + DenseMap expressionNumbering; + uint32_t nextValueNumber; + + Expression create_expression(Instruction* I); + uint32_t lookup_or_add_call(CallInst* C); + +public: + ValueTable() : nextValueNumber(1) { } + + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + + Value* lookup_value(uint32_t id) { + for (DenseMap::const_iterator + I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { + Value* val = I->first; + uint32_t num = I->second; + if (num == id) { + return val; + } + } + return NULL; + } + + bool sameExpressions(Instruction *I1, Instruction *I2) { + Expression e1 = create_expression(I1); + Expression e2 = create_expression(I2); + if (e1.equals(e2)) { + return true; + } else { + return false; + } + } + + void add(Value *V, uint32_t num); + void clear(); + void erase(Value *v); + + uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; + + void dump() { + errs () << "[Value Table Dump Starts]\n"; + for (DenseMap::const_iterator + I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { + Value* val = I->first; + uint32_t num = I->second; + errs () << num << " : " << *val << "\n"; + } + errs () << "[Value Table Dump Ends]\n"; + errs () << "[Expresion Table Dump Starts]\n"; + for (DenseMap::const_iterator + I = expressionNumbering.begin(), E = expressionNumbering.end(); I != E; ++I) { + Expression val = I->first; + uint32_t num = I->second; + //errs () << num << " : Ope" << val.opcode << "\n"; + errs () << num << " : "; + val.dump(); + } + errs () << "[Expresion Table Dump Ends]\n"; + } +}; +#endif diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp new file mode 100644 index 0000000..2d867f4 --- /dev/null +++ b/llvmLocalityOptimization.cpp @@ -0,0 +1,1423 @@ +/* + * Copyright 2004-2015 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//===----------------------------------------------------------------------===// +// Chapel LLVM Locality Optimization by Akihiro Hayashi (ahayashi@rice.edu) +//===----------------------------------------------------------------------===// +// LLVM-based Locality Inference Pass (Locality Optimization Pass) +// This pass tries to convert possibly-remote access (addrspace(100)* access) +// to definitely-local to avoid runtime affinity checking overheads. +// +// To infer the locality, the locality optimization pass tries to utilize +// following information : +// - Case 1. Scalar access enclosed by Chapel's LOCAL statement. +// proc localizeByLocalStmt(ref x) : int { +// var p: int = 1; +// local { p = x; } +// return p + x; // x is definitely local +// } +// The locality level of x is inferred by searching SSA value graph, +// which is implemented in IGraph.[h|cpp]. +// When you specify debugThisFn, the pass generates .dot file +// that can be visualized by the graphviz tool. (http://www.graphviz.org/) +// +// - Case 2. Array access enclosed by Chapel's LOCAL statement. +// proc habanero(A) : int { +// A(1) = 1; // A(1) is definitely local +// local { A(1) = 2; } +// A(2) = 3; // A(2) is possibly remote +// } +// This pass is element-sensitive. For example, +// the locality of A(1) is "definitely-local", +// but the pass leave A(2) "possibly-remote" since there is no enough +// information about the locality of A(2). +// This is done by using a reduced version of the LLVM's global value numbering +// pass (in ValueTable.[h|cpp]) and a array offset analysis. +// +// - Case 3. locale locale array declaration +// proc localizeByArrayDecl () { +// var A: [1..10] int; +// return A(5); +// } +// The locality of A(5) is "definitely-local" since an array A is declared in this scope. +// Note that this pass is not element-sensitve so far. +// +// Limitation, TODOs and future work: +// (Limitation) Locality Inference using SSA Value Graph with if statements: +// The current implementation does not propagate a condition even if a local statement is enclosed by if statement. +// Hence, we may fail to infer the locality in some cases. +// (e.g. if (condition) { local{ p = x } }) +// +// (Limitation) Chapel's local statement detection: +// Currently, we are assuming that gf.addr function calls correspond to Chapel's local statements, +// but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. +// To avoid this problem, we have an std::vector named "NonLocals" to record a retun value of gf.addr +// which is also an argument of gf.make and the NonLocals are referred when doing "exemptionTest". +// This may not be always true. Ideally, a PGAS-LLVM frontend should tell the locality optimization pass +// which gf.addr call is a local statement. +// +// Example : +// 1. call i64* @.gf.addr.1(i64 addrspace(100)* %x) // %x is definitely local +// 2. %y = call i64* @.gf.addr.1(i64 addrspace(100)* %x) // might not be definitely local +// call i64 addrspace(100)* @.gf.make.1(..., %y) +// +// (Limitation) Chapel's Array Declaration detection: +// We basically look for chpl__convertRuntimeTypeToValue to detect Chapel's array declaration. +// This pattern matching completely depends on how PGAS-LLVM frontend emits LLVM IR. +// Please see analyzeCallInsn for more details. +// +// (Limitation) Intra-procedural pass: +// Unfortunately, the current implementation is not inter-procedural. +// +// (Future Work) The utilization of high-level information: +// The locality optimization pass has to recover high-level information such as +// array accesses and local statements from low-level LLVM IR, but ideally, +// PGAS-LLVM frontend are supposed to add annotations to keep these information +// so the locality optimization can perform language-agnostic PGAS optimization. +// +//===----------------------------------------------------------------------===// + +#include "llvmLocalityOptimization.h" + +#ifdef HAVE_LLVM + +#define DEBUG_TYPE "locality-opt" + +#include "llvmUtil.h" +#include +#include + +#if HAVE_LLVM_VER >= 35 +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Verifier.h" +#else +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Analysis/Verifier.h" +#endif + +// For Debugging +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/GraphWriter.h" + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/LinkAllPasses.h" + +#include "llvm/Transforms/Utils/Cloning.h" + +#include "llvmGlobalToWide.h" +#include "IGraph.h" +#include "ValueTable.h" + +using namespace llvm; + +namespace { + + // For Debug + static bool debugPassInsn = true; + static const bool extraChecks = false; + static const char* debugThisFn = ""; + + // For Chapel Compiler & Breakdown + static const bool fLLVMDisableIG = false; + static const bool fLLVMDisableDecl = false; + static const bool fLLVMDisableGVN = false; + static const bool fLLVMLocalityOpt = true; + + // Statistics + STATISTIC(NumLocalizedByIG, "Number of localized operations by IG"); + STATISTIC(NumLocalizedByGVN, "Number of localized operations by GVN"); + STATISTIC(NumLocalizedByArrayDecl, "Number of localized operations by Locale-local Array"); + + struct LocalityOpt : public ModulePass { + + static char ID; + + GlobalToWideInfo *info; + std::string layoutAfterwards; + + LocalityOpt(GlobalToWideInfo* _info, std::string layout) + : ModulePass(ID), info(_info), layoutAfterwards(layout) { + } + + // Constructor for running within opt, for testing and + // bugpoint. + LocalityOpt() + : ModulePass(ID), info(NULL), layoutAfterwards("") { + } + + class LocalArrayEntry { + private: + Value *op; + bool whole; + vector localOffsets; + public: + LocalArrayEntry(Value* _op, bool _whole) : op(_op), whole(_whole) {} + void addLocalOffset(unsigned int offs) { + for (vector::iterator I = localOffsets.begin(), E = localOffsets.end(); I != E; I++) { + if (offs == *I) { + return; + } + } + localOffsets.push_back(offs); + } + Value* getOp() { return op; } + void dumpLocalOffsets() { + for (vector::iterator I = localOffsets.begin(), E = localOffsets.end(); I != E; I++) { + errs () << *I << ", "; + } + errs () << "\n"; + } + bool isLocalOffset(int offset) { + if (std::find(localOffsets.begin(), localOffsets.end(), offset) != localOffsets.end()) { + return true; + } + return false; + } + bool isWholeLocal() { return whole; } + }; + + class LocalArrayInfo { + private: + vector list; + public: + LocalArrayInfo() {} + void add(LocalArrayEntry *li) { list.push_back(li); } + LocalArrayEntry* getEntryByValue(const Value *op) { + for (vector::iterator I = list.begin(), E = list.end(); I != E; I++) { + LocalArrayEntry *li = *I; + if (li->getOp() == op) { + return li; + } + } + return NULL; + } + void dump() { + errs () << "[Local Array Info Start]\n"; + for (vector::iterator I = list.begin(), E = list.end(); I != E; I++) { + LocalArrayEntry *li = *I; + errs () << *(li->getOp()) << "\n"; + errs () << "Definitely Local Offset : "; + if (li->isWholeLocal()) { + errs () << "WHOLE\n"; + } else { + li->dumpLocalOffsets(); + } + } + errs () << "[Local Array Info End]\n"; + } + }; + +#if HAVE_LLVM_VER >= 35 + void dumpFunction(Function *F, std::string mid) { + std::string Filename = F->getName().str() + "." + mid + ".ll"; + std::error_code EC; + raw_fd_ostream File(Filename.c_str(), EC, sys::fs::F_Text); + + if (EC) { + errs() << "Dump Function : error: "<< EC.message() << "\n"; + } else { + File << *F; + } + } + + void dumpDOT(IGraph* G) { + std::string Filename = "ig." + G->getName().str() + ".dot"; + std::error_code EC; + raw_fd_ostream File(Filename.c_str(), EC, sys::fs::F_Text); + + if (EC) { + errs() << "Dump IGraph : error: "<< EC.message() << "\n"; + } else { + WriteGraph(File, (const IGraph*)G, false, "Habanero"); + } + } +#else + void dumpFunction(Function *F, std::string mid) { + std::string Filename = F->getName().str() + "." + mid + ".ll"; + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + File << *F; + else + errs() << " error opening file for writing!"; + errs() << "\n"; + } + + void dumpDOT(IGraph* G) { + std::string Filename = "ig." + G->getName().str() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + WriteGraph(File, (const IGraph*)G, false, "Habanero"); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + + } +#endif + // For Debugging purpose + void insertPrintf(Module &M, Instruction *insertBefore, StringRef Str) { + // Global Value + Constant *StrConstant = ConstantDataArray::getString(M.getContext(), Str); + GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::PrivateLinkage, StrConstant); + GV->setUnnamedAddr(true); + // GEP + Value *zero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + Value *gepArgs[] = { zero, zero }; + Instruction *gepInst = GetElementPtrInst::CreateInBounds(GV, gepArgs, "", insertBefore); + // Printf + Constant *putsFunc = M.getOrInsertFunction("puts", Type::getInt32Ty(M.getContext()), Type::getInt8PtrTy(M.getContext()), NULL); + Value* printfArgs[1]; + printfArgs[0] = gepInst; + CallInst::Create(putsFunc, printfArgs, "", insertBefore); + } + + bool isaGlobalPointer(GlobalToWideInfo* info, Type* type) { + PointerType* pt = dyn_cast(type); + if( pt && pt->getAddressSpace() == info->globalSpace ) return true; + return false; + } + + IGraph* createIGraph(Module &M, Function *F) { + IGraph *G = new IGraph(F->getName()); + for (Function::arg_iterator I = F->arg_begin(), + E = F->arg_end(); I!=E; ++I) { + Value *srcVal = I; + Node *srcNode = G->getNodeByValue(srcVal); + if (srcNode == NULL) { + srcNode = new Node(srcVal); + G->addNode(srcNode); + } + for (User *U : I->users()) { + Value* dstVal = U; + Node *dstNode = G->getNodeByValue(dstVal); + if (dstNode == NULL) { + dstNode = new Node(dstVal); + G->addNode(dstNode); + } + srcNode->addChild(dstNode); + } + } + for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { + BasicBlock* BB = BI; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) { + Instruction *insn = &*I; + Value *srcVal = insn; + Node *srcNode = G->getNodeByValue(srcVal); + if (srcNode == NULL) { + srcNode = new Node(srcVal); + G->addNode(srcNode); + } + for (User *U : I->users()) { + Value *dstVal = U; + Node *dstNode = G->getNodeByValue(dstVal); + if (dstNode == NULL) { + dstNode = new Node(dstVal); + G->addNode(dstNode); + } + srcNode->addChild(dstNode); + } + } + } + return G; + } + + void createValueTableInsn(ValueTable *vn, Instruction *insn) { + if (insn->getType()->isVoidTy()) return; + vn->lookup_or_add(insn); + } + + ValueTable* createValueTable(Function *F) { + ValueTable *vn = new ValueTable(); + for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { + Instruction *insn = &*II; + createValueTableInsn(vn, insn); + } + return vn; + } + + bool isDefinitelyLocalAccordingToIG(Value* op, IGraph *G, std::vector &NonLocals) { + if (fLLVMDisableIG) { + return false; + } + bool definitelyLocal = false; + bool exempt = false; + int ll = info->globalSpace; + // find smallest possible locality level + for (vector::iterator I = G->begin(), E = G->end(); I != E; I++) { + Node* n = *I; + int lltmp = n->getLL(op); + if (lltmp != -1 && lltmp < ll) { + ll = lltmp; + } + } + exempt = exemptionTest(op, NonLocals); + if (ll == 0 && !exempt) { + definitelyLocal = true; + NumLocalizedByIG++; + if (debugPassInsn) { + errs () << *op << " is definitely local\n"; + } + } else if (ll == 0 && exempt) { + if (debugPassInsn) { + errs () << *op << " is exempted\n"; + } + } + return definitelyLocal; + } + + // Assuming op is operand of GEP inst (e.g. getelementptr inbounds i64, i64 addrspace(100)* op) + // find array access and localize it if array descriptor is definitely local according to GVN info + bool isDefinitelyLocalAccordingToList(GetElementPtrInst* oldGEP, ValueToValueMapTy &VM, IGraph *G, LocalArrayInfo *LocalArrays, bool isGVN) { + + if (isGVN && fLLVMDisableGVN) { + return false; + } + if (!isGVN && fLLVMDisableDecl) { + return false; + } + + Value *op = oldGEP->getPointerOperand(); + if (!op) { + return false; + } + LocalArrayEntry *local = LocalArrays->getEntryByValue(op); + + // First Step : See if this GEP access is array access. If so, see if a pointer to array is in LocalArrayInfo + bool possiblyLocal = false; + bool definitelyLocal = false; + int offset = -1; + + // Case 1 : this is GEP LocalArray, 0, 8 + if (local) { + possiblyLocal = true; + } + + // Case 2 : this GEP is obaining a pointer to array element. (GEP %op, %offset) + // Searching (GEP array, 0, 8) and see if array is in local LocalArray + if (isa(op)) { + LoadInst *loadInst = cast(op); + GetElementPtrInst *gepInst = dyn_cast(loadInst->getPointerOperand()); + if (gepInst && gepInst->getNumIndices() == 2) { + Constant *op1 = dyn_cast(gepInst->getOperand(1)); + Constant *op2 = dyn_cast(gepInst->getOperand(2)); + if (op1 != NULL && op2 != NULL + && op1->getUniqueInteger() == 0 && op2->getUniqueInteger() == 8) { + // $shifteddata = GEP %arraydesciptor, 0 , 8 + // original GEP is supposed to be array access (GEP %shifteddata, %offset) + + // search array descriptor + LocalArrayEntry *li1 = LocalArrays->getEntryByValue(gepInst); + if (li1) { + possiblyLocal = true; + local = li1; + } + + // search key assuming array descriptor has already been renamed. + const GetElementPtrInst *keyGep = NULL; + for (ValueToValueMapTy::iterator I = VM.begin(), E = VM.end(); I != E; I++) { + if (I->second == gepInst) { + keyGep = cast(I->first); + break; + } + } + if (keyGep != NULL) { + // this GEP is definitely array offset calculation + const Value* v = keyGep->getPointerOperand(); + LocalArrayEntry *li2 = LocalArrays->getEntryByValue(v); + if (li2) { + possiblyLocal = true; + local = li2; + } + } + } + } + } + + // putting it together + if (possiblyLocal) { + if (!local || local->isWholeLocal()) { + definitelyLocal = true; + } else { + // Check if this offset is local + offset = analyzeArrayAccessOffsets(oldGEP, G); + if (!local || local->isLocalOffset(offset)) { + definitelyLocal = true; + } else { + definitelyLocal = false; + } + } + } else { + definitelyLocal = false; + } + + if (definitelyLocal) { + if (isGVN) { + NumLocalizedByGVN++; + } else { + NumLocalizedByArrayDecl++; + } + } + return definitelyLocal; + } + + Value* findNewOpOrInsertGF(Value *oldOp, ValueToValueMapTy &VM, Module &M, Instruction *insertBefore) { + Value *tmpOp, *newOp; + // check mapping + ValueToValueMapTy::iterator I = VM.find(oldOp); + if (I != VM.end() && I->second) { + tmpOp = I->second; + } else { + tmpOp = oldOp; + } + Type* t = tmpOp->getType(); + if (t->isPointerTy() && t->getPointerAddressSpace() == info->globalSpace) { + // create gf.addr. + PointerType *addrType = cast(oldOp->getType()); + assert(addrType != NULL); + if (addrType->getPointerElementType()->isPointerTy()) { + newOp = tmpOp; + if (debugPassInsn) { + errs() << "GF is not inserted\n"; + } + } else { + Function* fn = getAddrFn(&M, info, addrType); + Value* gf_addr_args[1]; + gf_addr_args[0] = tmpOp; + newOp = CallInst::Create(fn, gf_addr_args, "", insertBefore); + if (debugPassInsn) { + errs() << "GF Inserted : " << *newOp << "\n"; + } + } + } else { + newOp = tmpOp; + } + return newOp; + } + + Instruction* duplicateCallInst(CallInst *oldCall, Function* newF) { + Instruction *newCall; + CallSite CS(oldCall); + const AttributeSet &CallPAL = CS.getAttributes(); + SmallVector args; + + for (unsigned int i = 0; i < oldCall->getNumArgOperands(); i++) { + Value *op = oldCall->getArgOperand(i); + args.push_back(op); + } + + if (InvokeInst *II = dyn_cast(oldCall)) { + newCall = InvokeInst::Create(newF, II->getNormalDest(), II->getUnwindDest(), + args, "", oldCall); + cast(newCall)->setCallingConv(CS.getCallingConv()); + cast(newCall)->setAttributes(CallPAL); + } else { + newCall = CallInst::Create(newF, args, "", oldCall); + cast(newCall)->setCallingConv(CS.getCallingConv()); + cast(newCall)->setAttributes(CallPAL); + if (cast(oldCall)->isTailCall()) + cast(newCall)->setTailCall(); + } + if (MDNode *tbaa = oldCall->getMetadata(LLVMContext::MD_tbaa)) { + newCall->setMetadata(LLVMContext::MD_tbaa, tbaa); + } + if (MDNode *tbaaStruct = oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { + newCall->setMetadata(LLVMContext::MD_tbaa_struct, tbaaStruct); + } + return newCall; + } + + bool checkNeedToWork(Instruction *insn, ValueToValueMapTy &VM) { + bool needsWork = false; + for(unsigned int i=0; i < insn->getNumOperands(); i++) { + Value *old = insn->getOperand(i); + ValueToValueMapTy::iterator I = VM.find(old); + if( I != VM.end() && I->second ) needsWork = true; + } + + // check global + if (insn->getOpcode() != Instruction::Call) { + for(unsigned int i=0; i < insn->getNumOperands(); i++) { + Value *old = insn->getOperand(i); + if( isaGlobalPointer(info, old->getType()) ) needsWork = true; + } + if( isaGlobalPointer(info, insn->getType()) ) needsWork = true; + } else { + CallInst *call = cast(insn); + Function *F = call->getCalledFunction(); + if (!F) return false; + if (isa(call) && isa(call)) { + Value* gDst = call->getArgOperand(0); + Value* gSrc = call->getArgOperand(1); + if (gDst->getType()->getPointerAddressSpace() == info->globalSpace + || gSrc->getType()->getPointerAddressSpace() == info->globalSpace) { + needsWork = true; + } + } else if (F->getName().startswith(".gf.addr")) { + needsWork = true; + } + } + return needsWork; + } + + void processInstruction(Instruction* targetInsn, SmallVector &deletedInsn, ValueToValueMapTy &VM, ValueTable *VN, Module &M, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl, std::vector &NonLocals) { + if(debugPassInsn) { + errs() << "@" << *targetInsn << "\n"; + } + + bool needsWork = checkNeedToWork(targetInsn, VM); + if (!needsWork) { + if (debugPassInsn) { + errs() << "need not to work!\n"; + } + return; + } + + switch(targetInsn->getOpcode()) { + case Instruction::PHI: { /* TODO : Consider PHI Node */ break; } + case Instruction::BitCast: { + CastInst *oldCast = cast(targetInsn); + Value* op = oldCast->getOperand(0); + ValueToValueMapTy::iterator I = VM.find(op); + if (I != VM.end() && I->second) { + Value* newOp = I->second; + if (debugPassInsn) { + errs () << "Take a look :" << *newOp->getType() << "\n"; + } + Type* oldSrcTy = oldCast->getSrcTy(); + Type* newSrcTy = newOp->getType(); + Type* oldDstTy = oldCast->getDestTy(); + assert(oldSrcTy->isPointerTy() && newSrcTy->isPointerTy() && oldDstTy->isPointerTy()); + bool srcIsWide = newSrcTy->getPointerAddressSpace() == 0; + bool dstIsGlobal = oldDstTy->getPointerAddressSpace() == info->globalSpace; + if (srcIsWide && dstIsGlobal) { + Type* newDstTy = convertTypeGlobalToWide(&M, info, oldDstTy); + Instruction* newInst = CastInst::Create(oldCast->getOpcode(), newOp, newDstTy, "", oldCast); + if (debugPassInsn) { + errs() << "Old Instruction : " << *oldCast << "\n"; + errs() << "New Instruction : " << *newInst << "\n"; + } + VM[oldCast] = newInst; + deletedInsn.push_back(oldCast); + } else { + RemapInstruction(targetInsn, VM, RF_IgnoreMissingEntries); + if (debugPassInsn) { + errs() << "New Instruction : " << *targetInsn << "\n"; + } + } + } else { + if (debugPassInsn) { + errs () << "No transformation\n"; + } + } + break; + } + case Instruction::GetElementPtr: { + GetElementPtrInst *oldGEP = cast(targetInsn); + if (oldGEP->getAddressSpace() == info->globalSpace) { + Value *oldOp, *newOp; + Instruction* newInst = NULL; + // Old Operand addrspace(100)* + oldOp = oldGEP->getPointerOperand(); + bool needToTransform = false; + // For array access + // Check if the pointer is definitely local (according to inequality graph) + needToTransform |= isDefinitelyLocalAccordingToIG(oldOp, G, NonLocals); + // Check if the pointer derives from locale-local array pointer (according to GVN) + needToTransform |= isDefinitelyLocalAccordingToList(oldGEP, VM, G, LocalArraysGVN, true); + // Check if the pointer derives from locale-local array pointer (according to locale-local array) + needToTransform |= isDefinitelyLocalAccordingToList(oldGEP, VM, G, LocalArraysDecl, false); + // + ValueToValueMapTy::iterator I = VM.find(oldOp); + needToTransform |= (I != VM.end() && I->second); + if (needToTransform) { + newOp = findNewOpOrInsertGF(oldOp, VM, M, oldGEP); + // creating new GEP + std::vector args; + for (User::op_iterator OI = oldGEP->idx_begin(), OE = oldGEP->idx_end(); OI != OE; OI++) { + args.push_back(*OI); + } + ArrayRef argsRef(args); + // Create new GEP + bool inBounds = oldGEP->isInBounds(); + if (inBounds) { + newInst = GetElementPtrInst::CreateInBounds(newOp, argsRef, oldGEP->getName(), oldGEP); + } else { +#if HAVE_LLVM_VER >= 35 + newInst = GetElementPtrInst::Create(newOp->getType(), newOp, argsRef, oldGEP->getName(), oldGEP); +#else + newInst = GetElementPtrInst::Create(newOp, argsRef, oldGEP->getName(), oldGEP); +#endif + } + if (debugPassInsn) { + errs() << "Old Instruction : " << *oldGEP << "\n"; + errs() << "New Instruction : " << *newInst << "\n"; + } + // TODO: reconsider return type of GEP + VM[oldGEP] = newInst; + deletedInsn.push_back(oldGEP); + } + } else { + RemapInstruction(oldGEP, VM, RF_IgnoreMissingEntries); + } + break; + } + case Instruction::Load: { + LoadInst *oldLoad = cast(targetInsn); + RemapInstruction(oldLoad, VM, RF_IgnoreMissingEntries); + if(oldLoad->getPointerAddressSpace() == info->globalSpace) { + Value *oldOp, *newOp; + Instruction* newInst = NULL; + // Old Operand addrspace(100)* + oldOp = oldLoad->getPointerOperand(); + if (isDefinitelyLocalAccordingToIG(oldOp, G, NonLocals)) { + newOp = findNewOpOrInsertGF(oldOp, VM, M, oldLoad); + newInst = new LoadInst(newOp, + "", + oldLoad->isVolatile(), + oldLoad->getAlignment(), + oldLoad->getOrdering(), + oldLoad->getSynchScope(), + oldLoad); + if (MDNode *tbaa = oldLoad->getMetadata(LLVMContext::MD_tbaa)) { + newInst->setMetadata(LLVMContext::MD_tbaa, tbaa); + } + if (debugPassInsn) { + errs() << "Old Instruction : " << *oldLoad << "\n"; + errs() << "New Instruction : " << *newInst << "\n"; + } + if (!newInst->getType()->isPointerTy()) { + oldLoad->replaceAllUsesWith(newInst); + } + VM[oldLoad] = newInst; + deletedInsn.push_back(oldLoad); + } + } + break; + } + case Instruction::Store: { + StoreInst *oldStore = cast(targetInsn); + RemapInstruction(oldStore, VM, RF_IgnoreMissingEntries); + if (oldStore->getPointerAddressSpace() == info->globalSpace) { + Value *oldOp, *newOp; + Instruction* newInst = NULL; + // Old Operand addrspace(100)* + oldOp = oldStore->getPointerOperand(); + if (isDefinitelyLocalAccordingToIG(oldOp, G, NonLocals)) { + newOp = findNewOpOrInsertGF(oldOp, VM, M, oldStore); + newInst = new StoreInst(oldStore->getValueOperand(), + newOp, + oldStore->isVolatile(), + oldStore->getAlignment(), + oldStore->getOrdering(), + oldStore->getSynchScope(), + oldStore); + if (MDNode *tbaa = oldStore->getMetadata(LLVMContext::MD_tbaa)) { + newInst->setMetadata(LLVMContext::MD_tbaa, tbaa); + } + if (debugPassInsn) { + errs() << "Old Instruction : " << *oldStore << "\n"; + errs() << "New Instruction : " << *newInst << "\n"; + } + VM[oldStore] = newInst; + deletedInsn.push_back(oldStore); + } + } + break; + } + case Instruction::Call: { + CallInst *oldCall = cast(targetInsn); + Function* oldF = oldCall->getCalledFunction(); // null if indirect + assert(oldF != NULL); + if (oldF->getName().startswith(".gf.addr")) { + Value *op = oldCall->getArgOperand(0); + ValueToValueMapTy::iterator I = VM.find(op); + if (I != VM.end() && I->second) { + Value *addrOp = I->second; + // TODO check + errs () << "Removing gf.addr : "; + op->getType()->dump(); + errs () << " => "; + addrOp->getType()->dump(); + errs () << "\n"; + VM[oldCall] = addrOp; + deletedInsn.push_back(oldCall); + errs () << "gf.addr removed\n"; + } + break; + } else if (isa(oldCall)) { + if (isa(oldCall)) { + Value *oldDst = oldCall->getArgOperand(0); + ValueToValueMapTy::iterator I = VM.find(oldDst); + if (I != VM.end() && I->second) { + Value* newDst = I->second; + CallSite CS(oldCall); + const AttributeSet &CallPAL = CS.getAttributes(); + Type *types[2]; + Value *args[5]; + + types[0] = newDst->getType(); + types[1] = oldCall->getArgOperand(2)->getType(); + + args[0] = newDst; + args[1] = oldCall->getArgOperand(1); + args[2] = oldCall->getArgOperand(2); + args[3] = oldCall->getArgOperand(3); + args[4] = oldCall->getArgOperand(4); + + Function* memSetF = Intrinsic::getDeclaration(&M, Intrinsic::memset, types); + Instruction* newCall = CallInst::Create(memSetF, args, "", oldCall); + cast(newCall)->setCallingConv(CS.getCallingConv()); + cast(newCall)->setAttributes(CallPAL); + if (cast(oldCall)->isTailCall()) { + cast(newCall)->setTailCall(); + } + if (MDNode *tbaa = oldCall->getMetadata(LLVMContext::MD_tbaa)) { + newCall->setMetadata(LLVMContext::MD_tbaa, tbaa); + } + if (MDNode *tbaaStruct = oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { + newCall->setMetadata(LLVMContext::MD_tbaa_struct, tbaaStruct); + } + if (debugPassInsn) { + errs () << "MemSet Old Instruction : " << *oldCall << "\n"; + errs () << "New Instruction : " << *newCall << "\n"; + } + VM[oldCall] = newCall; + deletedInsn.push_back(oldCall); + } else { + /* do nothing */ + } + break; + } + assert(isa(oldCall) || isa (oldCall)); + Value *newDst, *newSrc; + Value* oldDst = oldCall->getArgOperand(0); + Value* oldSrc = oldCall->getArgOperand(1); + bool needToTransform = false; + + unsigned dstSpace = oldDst->getType()->getPointerAddressSpace(); + unsigned srcSpace = oldSrc->getType()->getPointerAddressSpace(); + + CallSite CS(oldCall); + const AttributeSet &CallPAL = CS.getAttributes(); + Type *types[3]; + Value *args[5]; + + if (srcSpace == info->globalSpace) { + ValueToValueMapTy::iterator I = VM.find(oldSrc); + bool renamed = I != VM.end() && I->second; + if (isDefinitelyLocalAccordingToIG(oldSrc, G, NonLocals) || renamed) { + newSrc = findNewOpOrInsertGF(oldSrc, VM, M, oldCall); + needToTransform = true; + } + } else { + newSrc = oldSrc; + } + if (dstSpace == info->globalSpace) { + ValueToValueMapTy::iterator I = VM.find(oldDst); + bool renamed = I != VM.end() && I->second; + if (isDefinitelyLocalAccordingToIG(oldDst, G, NonLocals) || renamed) { + newDst = findNewOpOrInsertGF(oldDst, VM, M, oldCall); + needToTransform = true; + } + } else { + newDst = oldDst; + } + + if (!needToTransform) { + break; + } + + types[0] = newDst->getType(); + types[1] = newSrc->getType(); + types[2] = oldCall->getArgOperand(2)->getType(); + + args[0] = newDst; + args[1] = newSrc; + args[2] = oldCall->getArgOperand(2); + args[3] = oldCall->getArgOperand(3); + args[4] = oldCall->getArgOperand(4); + + Function* memF = NULL; + if (isa(oldCall)) { + memF = Intrinsic::getDeclaration(&M, Intrinsic::memcpy, types); + } else if (isa (oldCall)) { + memF = Intrinsic::getDeclaration(&M, Intrinsic::memmove, types); + } + Instruction* newCall = CallInst::Create(memF, args, "", oldCall); + cast(newCall)->setCallingConv(CS.getCallingConv()); + cast(newCall)->setAttributes(CallPAL); + if (cast(oldCall)->isTailCall()) { + cast(newCall)->setTailCall(); + } + if (MDNode *tbaa = oldCall->getMetadata(LLVMContext::MD_tbaa)) { + newCall->setMetadata(LLVMContext::MD_tbaa, tbaa); + } + if (MDNode *tbaaStruct = oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { + newCall->setMetadata(LLVMContext::MD_tbaa_struct, tbaaStruct); + } + if (debugPassInsn) { + errs () << "Old Instruction : " << *oldCall << "\n"; + errs () << "New Instruction : " << *newCall << "\n"; + } + VM[oldCall] = newCall; + deletedInsn.push_back(oldCall); + + } else { + RemapInstruction(targetInsn, VM, RF_IgnoreMissingEntries); + } + break; + } + default: + RemapInstruction(targetInsn, VM, RF_IgnoreMissingEntries); + if (debugPassInsn) { + errs () << "New Instruction: " << *targetInsn << "\n"; + } + break; + } + } + + void localityOptimization(Module &M, Function* F) { + // Don't do anything if there is no body. + // Does nothing for special functions since they have no body. + if( F->begin() == F->end() ) return; + + // TODO : invoke mem2reg pass to introduce SSA phi node + + // For Debug + if (debugThisFn[0] && F->getName() == debugThisFn) { + // generate F->getName().before.ll + dumpFunction(F, "before"); + debugPassInsn = true; + } + + // Allocate + LocalArrayInfo *LocalArraysGVN = new LocalArrayInfo(); + LocalArrayInfo *LocalArraysDecl = new LocalArrayInfo(); + static std::vector NonLocals; + + // Create IGraph + // Inspect all instructions and construt IGraph. Each node of IGraph contains a densemap that map that is one-to-one mapping of each operand into a specific address space (either 100 or 0). + // If an instruction is enclosed by a local statement, set the locality level of each operand to 0. + // Currently, we are assuming that gf.addr function calls correspond to Chapel's local statements, but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. We work on this later pass using NonLocals) + IGraph *G = createIGraph(M, F); + + // Perform a reduced version of GVN + ValueTable *VN = createValueTable(F); + + // Input : VN, G + // Output : LocalArraysGVN, LocalArrayDecl, NonLocals + salvageChapelArrayAccess(F, VN, G, LocalArraysGVN, LocalArraysDecl, NonLocals); + + // Dump analysis results + if (debugThisFn[0] && F->getName() == debugThisFn) { + VN->dump(); + // For Graphviz + dumpDOT(G); + errs () << "\n[Local Array GVN]\n"; + LocalArraysGVN->dump(); + errs () << "[Local Array Decl]\n"; + LocalArraysDecl->dump(); + + // dump nonlocals + errs () << "[Non Locals]\n"; + for (vector::iterator I = NonLocals.begin(), E = NonLocals.end(); I != E; I++) { + Value *tmp = *I; + tmp->dump(); + } + } + + // Process each instruction + // try to convert load/store/getelementptr with addrspace(100) to addrspace(0) with using IGraph + SmallVector deletedInsn; + ValueToValueMapTy ValueMap; + for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { + Instruction *insn = &*II; + processInstruction(insn, deletedInsn, ValueMap, VN, M, G, LocalArraysGVN, LocalArraysDecl, NonLocals); + } + for (unsigned int i = 0; i < deletedInsn.size(); i++) { + Instruction *insn = deletedInsn[i]; + insn->removeFromParent(); + insn->setName(""); + insn->dropAllReferences(); + } + + // Cleanup + // TODO delete + ValueMap.clear(); + deletedInsn.clear(); + NonLocals.clear(); + + // For Debug + if (debugThisFn[0] && F->getName() == debugThisFn) { + // generate F->getName().before.ll + dumpFunction(F, "after"); + } + + // TODO : invoke reg2mem pass + + + } + + /* + Locality Optimization Pass: + + This pass tries to replace address space 100 pointer with address space 0 pointer. + 1. Local Statement (by users) + 2. Locale local array declaration (by users but not explicitly expressed) + */ + + virtual bool runOnModule(Module &M) { + bool madeInfo = false; + + // Normally we expect a user of this optimization to have + // already produced an info object with the important + // information, but if not we set some defaults here so + // that tests can be created and bugpoint can be run. + if( !info ) { + errs() << "Warning: GlobalToWide using default configuration\n"; + info = new GlobalToWideInfo(); + madeInfo = true; + info->globalSpace = 100; + info->wideSpace = 101; + info->localeIdType = M.getTypeByName("struct.c_localeid_t"); + if( ! info->localeIdType ) { + StructType* t = StructType::create(M.getContext(), "struct.c_localeid_t"); + t->setBody(Type::getInt32Ty(M.getContext()), Type::getInt32Ty(M.getContext()), NULL); + info->localeIdType = t; + } + info->nodeIdType = Type::getInt32Ty(M.getContext()); + + // Now go identify special functions in the module by name. + for (Module::iterator next_func = M.begin(); next_func!= M.end(); ) + { + Function *F = &*next_func; + ++next_func; + + FunctionType* FT = F->getFunctionType(); + + // This may look like a crazy amount of checking, but we + // need to do it in order to have bugpoint work with this + // optimization, since it will basically try different ways + // of corrupting the input. + if( F->getName().startswith(GLOBAL_FN_GLOBAL_ADDR) && + FT->getNumParams() == 1 && + FT->getReturnType()->isPointerTy() && + FT->getReturnType()->getPointerAddressSpace() == 0 && + containsGlobalPointers(info, FT->getParamType(0)) ) { + Type* gType = FT->getParamType(0); + GlobalPointerInfo & r = info->gTypes[gType]; + r.addrFn = F; + info->specialFunctions.insert(F); + } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_LOCID) && + FT->getNumParams() == 1 && + FT->getReturnType() == info->localeIdType && + containsGlobalPointers(info, FT->getParamType(0)) ) { + Type* gType = FT->getParamType(0); + GlobalPointerInfo & r = info->gTypes[gType]; + r.locFn = F; + info->specialFunctions.insert(F); + } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_NODEID) && + FT->getNumParams() == 1 && + FT->getReturnType() == info->nodeIdType && + containsGlobalPointers(info, FT->getParamType(0)) ) { + Type* gType = FT->getParamType(0); + GlobalPointerInfo & r = info->gTypes[gType]; + r.nodeFn = F; + info->specialFunctions.insert(F); + } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_MAKE) && + FT->getNumParams() == 2 && + FT->getParamType(0) == info->localeIdType && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(1)->getPointerAddressSpace() == 0 && + containsGlobalPointers(info, FT->getReturnType()) ) { + Type* gType = FT->getReturnType(); + GlobalPointerInfo & r = info->gTypes[gType]; + r.makeFn = F; + info->specialFunctions.insert(F); + } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_TO_WIDE) && + FT->getNumParams() == 1 && + containsGlobalPointers(info, FT->getParamType(0)) ) { + Type* gType = FT->getParamType(0); + GlobalPointerInfo & r = info->gTypes[gType]; + r.globalToWideFn = F; + info->specialFunctions.insert(F); + } else if( F->getName().startswith(GLOBAL_FN_WIDE_TO_GLOBAL) && + FT->getNumParams() == 1 && + containsGlobalPointers(info, FT->getReturnType()) ) { + Type* gType = FT->getReturnType(); + GlobalPointerInfo & r = info->gTypes[gType]; + r.wideToGlobalFn = F; + info->specialFunctions.insert(F); + } + } + } + + assert(info->globalSpace > 0); + assert(info->localeIdType); + assert(info->nodeIdType); + + // Wide pointer address space must differ from the local one... + assert(info->globalSpace != 0); + assert(info->wideSpace != 0); + assert(info->localeIdType != 0); + assert(info->nodeIdType != 0); + + // Note : current implementation is not inter-procedural + for(Module::iterator func = M.begin(); func!= M.end(); func++) { + Function *F = &*func; + if (F->getName().startswith(".")) { + continue; // skip special functions + } + localityOptimization(M, F); + } + + // After it all, put the target info back. + if( !madeInfo ) M.setDataLayout(layoutAfterwards); + if( madeInfo ) delete info; + + return true; + } + + + void salvageChapelArrayAccess(Function *F, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl, std::vector &NonLocals) { + for (inst_iterator IS = inst_begin(F), IE = inst_end(F); IS != IE; IS++) { + Instruction *targetInsn = &*IS; + switch (targetInsn->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + { + // search array access enclosed by local statement + analyzeLoadStoreInsn(targetInsn, F, VN, G, LocalArraysGVN); + break; + } + case Instruction::Call: + { + // search array construction + analyzeCallInsn(targetInsn, VN, G, LocalArraysDecl, NonLocals); + break; + } + default: + ; /* do nothing */ + } + } + } + + int analyzeArrayAccessOffsets(Instruction *getOffsetGEP, IGraph *G) { + int ret = -1; + if (getOffsetGEP) { + errs () << "Offset : \n"; + errs () << *getOffsetGEP << "\n"; + Instruction *offsetInsn = dyn_cast(getOffsetGEP->getOperand(1)); + if (offsetInsn) { + switch(offsetInsn->getOpcode()) { + case Instruction::Load: { + ret = 0; + break; + } + case Instruction::Shl: { + Constant *op1 = dyn_cast(offsetInsn->getOperand(1)); + if (op1) { + ret = 1 << (int)(op1->getUniqueInteger().roundToDouble()); + } else { + ret = -1; + } + break; + } + case Instruction::Mul: { + Constant *op1 = dyn_cast(offsetInsn->getOperand(1)); + if (op1) { + ret = (int)(op1->getUniqueInteger().roundToDouble()); + } else { + ret = -1; + } + } + } + } + + } + return ret; + } + + void analyzeLoadStoreInsn(Instruction *I, Function *F, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArrays) { + if (isArrayAccessMemOp(I, G, info->globalSpace)) { + // for each store/load instruction that involves addrspace 100 and is supposed to be array access. + if (debugPassInsn) { + errs () << *I << " is supposed to be array access\n"; + } + GetElementPtrInst *gep1 = findGEP08FromMemOp(I, G); + if (gep1 == NULL) return; + for (inst_iterator IS2 = inst_begin(F), IE2 = inst_end(F); IS2 != IE2; IS2++) { + Instruction *I2 = &*IS2; + // search load/store instruction that is supposed to be local array access. + if (I != I2 && isArrayAccessMemOp(I2, G, 0)) { + GetElementPtrInst *gep2 = findGEP08FromMemOp(I2, G); + if (gep2 == NULL) continue; + if (VN->sameExpressions(gep1, gep2)) { + errs () << "[GVN worked!]\n"; + errs () << "\t Array Pointer :\n"; + errs () << "\t addrspace(100) : " << *gep1 << "\n"; + errs () << "\t addrspace(0) : " << *gep2 << "\n"; + // mark + Value *localArray = gep1->getPointerOperand(); + LocalArrayEntry *li = LocalArrays->getEntryByValue(localArray); + // Analyze Offset + int offset = analyzeArrayAccessOffsets(dyn_cast(I2->getOperand(1)), G); + if (offset != -1) { + // + if (!li) { + li = new LocalArrayEntry(localArray, false); + li->addLocalOffset(offset); + LocalArrays->add(li); + } else { + li->addLocalOffset(offset); + } + } + } + } + } + } + } + + void markNonLocalsRecursively(Value *v, std::vector &visited, std::vector &NonLocals) { + bool notVisited = find(visited.begin(), visited.end(), v) == visited.end(); + if (isa(v) && notVisited) { + visited.push_back(v); + Instruction *insn = cast(v); + for (unsigned int i = 0; i < insn->getNumOperands(); i++) { + Value *op = insn->getOperand(i); + if (isa(op)) { + CallInst *callInsn2 = cast(op); + Function *calledFunc2 = callInsn2->getCalledFunction(); + if (calledFunc2 && calledFunc2->getName().startswith(".gf.addr")) { + Value *tmp = callInsn2->getArgOperand(0); + NonLocals.push_back(tmp); + } + } + markNonLocalsRecursively(op, visited, NonLocals); + } + } + } + + // check if construct_DefaultRectangularArr is in this function + void analyzeCallInsn(Instruction *I, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArrays, std::vector &NonLocals) { + if (isa(I)) { + CallInst *callInsn1 = cast(I); + Function *calledFunc1 = callInsn1->getCalledFunction(); + // gf.make. + if (calledFunc1 == NULL) { + return; + } + StringRef funcName = calledFunc1->getName(); + if (funcName.startswith(".gf.make")) { + Value* v = callInsn1->getArgOperand(1); + if (isa(v)) { + CallInst *callInsn2 = cast(v); + Function *calledFunc2 = callInsn2->getCalledFunction(); + if (calledFunc2 && calledFunc2->getName().startswith("_construct_DefaultRectangularArr")) { + LocalArrayEntry *li = new LocalArrayEntry(I, true); + LocalArrays->add(li); + } else if (calledFunc2 && calledFunc2->getName().startswith(".gf.addr")) { + NonLocals.push_back(v); + } + } + std::vector visited; + markNonLocalsRecursively(v, visited, NonLocals); + } else if (funcName.startswith("chpl__convertRuntimeTypeToValue")) { // + Value* v = callInsn1->getArgOperand(1); + for (User *U : v->users()) { + Value *UI = U; + if (isa(*UI)) { + LoadInst *l = cast(UI); + if (l->getPointerOperand() == v) { + LocalArrayEntry *li = new LocalArrayEntry(UI, true); + LocalArrays->add(li); + // support chpl___ASSIGN + for (User *LU: l->users()) { + Value *LUI = LU; + if (isa(LUI)) { + CallInst *callInsn2 = cast(LUI); + Function *calledFunc2 = callInsn2->getCalledFunction(); + if (calledFunc2 && calledFunc2->getName().startswith("chpl___ASSIGN_") + && UI == callInsn2->getArgOperand(0)) { + LocalArrayEntry *li2 = new LocalArrayEntry(LUI, true); + LocalArrays->add(li2); + } + } + } + } + } + } + } else if (funcName.startswith("chpl__buildDomainExpr")) { + Value* v = callInsn1->getOperand(1); + for (User *U : v->users()) { + Value *UI = U; + if (isa(UI)) { + LoadInst *l = cast(UI); + if (l->getPointerOperand() == v) { + LocalArrayEntry *li = new LocalArrayEntry(UI, true); + LocalArrays->add(li); + } + } + } + } + } + } + + void searchGEP08Inst(vector &list, vector &visited, Node *node) { + vector::iterator I = find(visited.begin(), visited.end(), node); + if (I == visited.end()) { + visited.push_back(node); + if (debugPassInsn) { + errs () << "Parent Insn : " << *node->getValue() <<"\n"; + } + for (vector::iterator I = node->parents_begin(), E = node->parents_end(); I != E; I++) { + Node *tmp = *I; + Value *v = tmp->getValue(); + if (debugPassInsn) { + errs () << "Parent Insn : " << *v <<"\n"; + } + GetElementPtrInst *gepInst = dyn_cast(v); + if (gepInst && gepInst->getNumIndices() == 2) { + if (debugPassInsn) { + errs () << "Candidate GEP : " << *gepInst << "\n"; + } + Constant *op1 = dyn_cast(gepInst->getOperand(1)); + Constant *op2 = dyn_cast(gepInst->getOperand(2)); + if (op1 != NULL && op2 != NULL + && op1->getUniqueInteger() == 0 && op2->getUniqueInteger() == 8) { + vector::iterator I2 = find(list.begin(), list.end(), gepInst); + if (I2 == list.end()) { + list.push_back(gepInst); + } + } else { + searchGEP08Inst(list, visited, tmp); + } + } else { + searchGEP08Inst(list, visited, tmp); + } + } + } + } + + GetElementPtrInst* findGEP08FromMemOp(Instruction *I, IGraph *G) { + Value *op = NULL; + if (isa(I)) { + StoreInst* s = cast(I); + op = s->getPointerOperand(); + } else if (isa(I)) { + LoadInst* l = cast(I); + op = l->getPointerOperand(); + } else { + return NULL; + } + if (!isa(op)) { + return NULL; + } + vector list; + vector visited; + searchGEP08Inst(list, visited, G->getNodeByValue(op)); + if (list.size() == 0) { + return NULL; + } else { + if (list.size() != 1) { + errs () << "Warning : indirect access detected\n"; + } + return list[0]; + } + } + + bool isArrayAccessGEP(GetElementPtrInst* gep) { + if (gep != NULL) { + Type* t = gep->getOperand(0)->getType(); + if (t->isPointerTy()) { + Type* t2 = t->getPointerElementType(); + if (isa(t2)) { + if (t2->getStructName().startswith("chpl__class")) { + return false; + } + } else { + errs () << "not struct\n"; + } + } + } else { + return false; + } + return true; + } + + bool isArrayAccessMemOp(Instruction *I, IGraph *G, unsigned addrSpace) { + if (isa(I)) { + StoreInst* s = cast(I); + if (s->getPointerAddressSpace() != addrSpace) { + return false; + } + GetElementPtrInst* gep = findGEP08FromMemOp(s, G); + return isArrayAccessGEP(gep); + } else if (isa(I)) { + LoadInst* l = cast(I); + if (l->getPointerAddressSpace() != addrSpace) { + return false; + } + errs () << "Load + 100 : " << *I << "\n"; + GetElementPtrInst* gep = findGEP08FromMemOp(l, G); + return isArrayAccessGEP(gep); + } else { + return false; + } + } + + bool exemptionTest(Value *op, std::vector &NonLocals) { + bool ret = false; + // Case 1 : op = call @gf.make(%x, %y) + // = @gf.addr(op) <= assuming this stmt just unpacks local addr + if (isa(op)) { + CallInst *call = cast(op); + Function *F = call->getCalledFunction(); + if (F && F->getName().startswith(".gf.make")) { + ret = true; + } + } + // Case 2 : = @gf.make(%x, %y) + vector::iterator I = find(NonLocals.begin(), NonLocals.end(), op); + if (I != NonLocals.end()) { + ret = true; + } + return ret; + } + + }; +} + +char LocalityOpt::ID = 0; +static RegisterPass X("locality-opt", "Locality Optimization Pass"); + +ModulePass *createLocalityOpt(GlobalToWideInfo* info, std::string setlayout) { + return new LocalityOpt(info, setlayout); +} +#endif diff --git a/llvmLocalityOptimization.h b/llvmLocalityOptimization.h new file mode 100644 index 0000000..e5b1ee4 --- /dev/null +++ b/llvmLocalityOptimization.h @@ -0,0 +1,36 @@ +/* + * Copyright 2004-2015 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//===----------------------------------------------------------------------===// +// Chapel LLVM Locality Optimization by Akihiro Hayashi (ahayashi@rice.edu) +//===----------------------------------------------------------------------===// + +#ifndef _LLVMLOCALITYOPT_H_ +#define _LLVMLOCALITYOPT_H_ + +#ifdef HAVE_LLVM + +#include "llvmUtil.h" + +class GlobalToWideInfo; +llvm::ModulePass *createLocalityOpt(GlobalToWideInfo* info, std::string layout); + +#endif + +#endif From c4bde3aacb9710b59ab3d2a9fed6b12ad2d0d217 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 29 Jan 2016 10:42:26 -0600 Subject: [PATCH 03/24] A test file for the LLVM locality optimization pass added. --- test/local.ll | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 test/local.ll diff --git a/test/local.ll b/test/local.ll new file mode 100644 index 0000000..7022093 --- /dev/null +++ b/test/local.ll @@ -0,0 +1,133 @@ +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -locality-opt -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" + +; for Chapel Array +%atomicflag = type { i8 } +%atomic_int64 = type { i64 } +%chpl_object_object = type { i32, i32 } +%range_int64_t_bounded_F = type { %rangeBase_int64_t_bounded_F, i8 } +%rangeBase_int64_t_bounded_F = type { i64, i64, i64, i64 } +%chpl_DefaultDist_object = type { %chpl_BaseDist_object } +%chpl_BaseDist_object = type { %chpl_object_object, %atomic_int64, %list_BaseDom, %atomicflag } +%list_BaseDom = type { %chpl_listNode_BaseDom_object addrspace(100)*, %chpl_listNode_BaseDom_object addrspace(100)*, i64 } +%chpl_listNode_BaseDom_object = type { %chpl_object_object, %chpl_BaseDom_object addrspace(100)*, %chpl_listNode_BaseDom_object addrspace(100)* } +%chpl_BaseDom_object = type { %chpl_object_object, %atomic_int64, %list_BaseArr, %atomicflag } +%chpl_DefaultRectangularDom_1_int64_t_F_object = type { %chpl_BaseRectangularDom_object, %chpl_DefaultDist_object addrspace(100)*, [1 x %range_int64_t_bounded_F] } +%chpl_BaseRectangularDom_object = type { %chpl_BaseDom_object } +%chpl_BaseArr_object = type { %chpl_object_object, %atomic_int64, %chpl_BaseArr_object addrspace(100)* } +%chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object = type { %chpl_BaseArr_object, %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)*, [1 x i64], [1 x i64], [1 x i64], i64, i64, i64 addrspace(100)*, i64 addrspace(100)*, i8 } +%chpl___RuntimeTypeInfo8 = type { %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)* } +%list_BaseArr = type { %chpl_listNode_BaseArr_object addrspace(100)*, %chpl_listNode_BaseArr_object addrspace(100)*, i64 } +%chpl_listNode_BaseArr_object = type { %chpl_object_object, %chpl_BaseArr_object addrspace(100)*, %chpl_listNode_BaseArr_object addrspace(100)* } + +declare void @chpl__buildDomainExpr(%range_int64_t_bounded_F %_e0_ranges.val, %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** nocapture %_retArg, i64 %_ln, i8* %_fn) +declare void @chpl__ensureDomainExpr3(%range_int64_t_bounded_F* %_e0_x, %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %_retArg, i64 %_ln, i8* %_fn) +declare %chpl___RuntimeTypeInfo8 @chpl__buildArrayRuntimeType6(%chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %dom, i64 %_ln, i8* %_fn) +declare void @chpl__convertRuntimeTypeToValue8(%chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %dom, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)** %_retArg, i64 %_ln, i8* %_fn) +declare void @_build_range(i64 %low, i64 %high2, %range_int64_t_bounded_F* %_retArg, i64 %_ln, i8* %_fn) +declare i64* @.gf.addr.1(i64 addrspace(100)*) readnone +declare %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* @.gf.addr.2(%chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)*) readnone + +; proc localizeByLocalStmt(ref x) : int { +; var p: int = 1; +; local { p = x; } +; return p + x; +; } + +define i64 @localizeByLocalStmt(i64 addrspace(100)* %x) { +; CHECK: @localizeByLocalStmt( +; ) +entry: + %0 = call i64* @.gf.addr.1(i64 addrspace(100)* %x) + %1 = load i64, i64* %0 +; CHECK: call i64* @.gf.addr. +; CHECK; load i64, i64* +; CHECK: add i64 +; CHECK: ret i64 + %2 = load i64, i64 addrspace(100)* %x + %3 = add i64 %2, %1 + ret i64 %3 +} + +; proc localizeByArrayDecl () { +; var A: [1..10] int; +; return A(5); +; } + +define internal i64 @localizeByArrayDecl() { +; CHECK: @localizeByArrayDecl( +; ) +entry: + %type_tmp = alloca %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* + store %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* null, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)** %type_tmp + %call_tmp = alloca %range_int64_t_bounded_F + %call_tmp2 = alloca %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)* + store %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)* null, %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %call_tmp2 + %_runtime_type_tmp_ = alloca %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)* + %_fn = alloca i8 + store %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)* null, %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %_runtime_type_tmp_ + call void @_build_range(i64 1, i64 10, %range_int64_t_bounded_F* %call_tmp, i64 9, i8* %_fn) + call void @chpl__ensureDomainExpr3(%range_int64_t_bounded_F* %call_tmp, %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %call_tmp2, i64 9, i8* %_fn) + %0 = call %chpl___RuntimeTypeInfo8 @chpl__buildArrayRuntimeType6(%chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %call_tmp2, i64 9, i8* %_fn) + %.fca.0.extract = extractvalue %chpl___RuntimeTypeInfo8 %0, 0 + store %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)* %.fca.0.extract, %chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %_runtime_type_tmp_ + call void @chpl__convertRuntimeTypeToValue8(%chpl_DefaultRectangularDom_1_int64_t_F_object addrspace(100)** %_runtime_type_tmp_, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)** %type_tmp, i64 9, i8* %_fn) + %1 = load %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)*, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)** %type_tmp +; CHECK: call %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* @.gf.addr +; CHECK: getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* + %2 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %1, i32 0, i32 3 +; CHECK: getelementptr inbounds [1 x i64], [1 x i64]* + %3 = getelementptr inbounds [1 x i64], [1 x i64] addrspace(100)* %2, i64 0, i64 0 +; CHECK: load i64, i64* + %4 = load i64, i64 addrspace(100)* %3 + %5 = mul i64 5, %4 +; CHECK: getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* + %6 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %1, i32 0, i32 8 +; CHECK: load i64 addrspace(100)*, i64 addrspace(100)** + %7 = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %6 +; CHECK: call i64* @.gf.addr. +; CHECK: getelementptr inbounds i64, i64* + %8 = getelementptr inbounds i64, i64 addrspace(100)* %7, i64 %5 +; CHECK: load i64, i64* + %9 = load i64, i64 addrspace(100)* %8 +; CHECK: ret i64 + ret i64 %9 +} + +; proc localizeByGVN(A) : int { +; A(1) = 1; // local +; local { A(1) = 2; } +; A(2) = 3; // non-local +; } + +define internal fastcc void @localizeByGVN(%chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %A.val) #1 { +; CHECK: @localizeByGVN( +; ) +entry: +; CHECK: call %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* @.gf.addr. +; CHECK: getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* + %0 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %A.val, i64 0, i32 3, i64 0 +; CHECK: load i64, i64* + %1 = load i64, i64 addrspace(100)* %0 +; CHECK: call %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* @.gf.addr. + %2 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %A.val, i64 0, i32 8 +; CHECK: load i64 addrspace(100)*, i64 addrspace(100)** + %3 = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %2 + %4 = getelementptr inbounds i64, i64 addrspace(100)* %3, i64 %1 +; CHECK: store i64 1, i64* + store i64 1, i64 addrspace(100)* %4 + %5 = tail call %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* @.gf.addr.2(%chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %A.val) + %6 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* %5, i64 0, i32 3, i64 0 + %7 = load i64, i64* %6 + %8 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* %5, i64 0, i32 8 + %9 = load i64 addrspace(100)*, i64 addrspace(100)** %8 + %10 = tail call i64* @.gf.addr.1(i64 addrspace(100)* %9) + %11 = getelementptr inbounds i64, i64* %10, i64 %7 + store i64 2, i64* %11 + %12 = load i64, i64 addrspace(100)* %0 + %13 = shl i64 %12, 1 + %14 = getelementptr inbounds i64, i64 addrspace(100)* %3, i64 %13 + store i64 3, i64 addrspace(100)* %14 + ret void +} From 414444a97884c42f1af44f93af053a89fbd9b19b Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Tue, 16 Feb 2016 16:44:37 -0600 Subject: [PATCH 04/24] the way of showing IGraph's label changed. --- IGraph.h | 60 ++++++++++++++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/IGraph.h b/IGraph.h index 8417253..f391799 100644 --- a/IGraph.h +++ b/IGraph.h @@ -45,7 +45,6 @@ using namespace llvm; class Node { private: - StringRef name; Value* value; vector children; vector parents; @@ -86,8 +85,7 @@ class Node { child->addParents(this); } } - - StringRef getName() const { return name; } + Value* getValue() const { return value; } }; @@ -173,18 +171,18 @@ namespace llvm { static std::string getSimpleNodeLabel(const Node* node, const IGraph *) { - if (!node->getName().empty()) - return node->getName().str(); + if (!node->getValue()->getName().empty()) + return node->getValue()->getName().str(); std::string Str; raw_string_ostream OS(Str); const Value *value = node->getValue(); #if HAVE_LLVM_VER >= 35 - OS << value->getName(); + value->printAsOperand(OS, false); #else WriteAsOperand(OS, value, false); #endif - OS << "FIXME"; + return OS.str(); } @@ -192,45 +190,43 @@ namespace llvm { const IGraph *) { std::string Str; raw_string_ostream OS(Str); - if (node->getName().empty()) { -#if HAVE_LLVM_VER >= 35 - OS << node->getValue()->getName(); -#else - WriteAsOperand(OS, node->getValue(), false); -#endif - OS << ", LL("; -#if HAVE_LLVM_VER >= 35 - OS << node->getValue()->getName(); -#else - WriteAsOperand(OS, node->getValue(), false); -#endif - OS << ") = " << node->getLL(node->getValue()) << " : "; - } - OS << *node->getValue(); - OS << " "; - Instruction *insn = dyn_cast(node->getValue()); + Value* value = node->getValue(); + + Instruction *insn = dyn_cast(value); if (insn) { + OS << *insn << "\n"; for(unsigned int i=0; i < insn->getNumOperands(); i++) { Value *op = insn->getOperand(i); - OS << ", LL("; + if (i != 0 ) OS << ", "; + OS << "LL("; #if HAVE_LLVM_VER >= 35 - OS << op->getName(); + op->printAsOperand(OS, false); #else WriteAsOperand(OS, op, false); -#endif - OS << ") = " << node->getLL(op); +#endif + OS << ") = " << node->getLL(op) << " "; } + } else { +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(OS, false); +#else + WriteAsOperand(OS, value, false); +#endif } + std::string OutStr = OS.str(); + // if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); - // Process string output to make it nicer... + // Process OutStr for DOT format for (unsigned i = 0; i != OutStr.length(); ++i) { - if (OutStr[i] == '\n') { // Left justify + if (OutStr[i] == '\n') { OutStr[i] = '\\'; OutStr.insert(OutStr.begin()+i+1, 'l'); - } else if (OutStr[i] == ';') { // Delete comments! - unsigned Idx = OutStr.find('\n', i+1); // Find end of line + } else if (OutStr[i] == ';') { + // Delete comments! + unsigned Idx = OutStr.find('\n', i+1); + // Find end of line OutStr.erase(OutStr.begin()+i, OutStr.begin()+Idx); --i; } From d019fae9c3289145b80a3e30b34a690e4f1b248a Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Wed, 24 Feb 2016 17:59:51 -0600 Subject: [PATCH 05/24] Modify Inequality Graph Construction (Step 1) To infer the locality of each possibly-remote data access considering CFG, the inequality graph should be constructed accordingly. Since the origianl LLVM IR does not have any information on Def/Use of locality (e.g. local statement in Chapel), we first need to construct our own CFG having such information. Next, locality-SSA can be built in a similar way that classic compilers does and finally inequality graph can easily be constructed. Here is the steps: (Step 1: This commit) Construct our own CFG by recognizing def/use of locality (e.g. calling @.gf.addr and then doing load and store => local statement) (Step 2: Locality-SSA Construction 1 TODO) Compute Dominator Tree and Dominance Frontier of the CFG (Step 3: Locality-SSA Construction 2 TODO) Insert Phi-function --- IGraph.cpp | 211 +++++++++++++++++++++++++++++------ IGraph.h | 176 ++++++++++++++++++----------- llvmLocalityOptimization.cpp | 53 +-------- 3 files changed, 293 insertions(+), 147 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 8c038d2..eb56729 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -28,50 +28,189 @@ #include #include + +#if HAVE_LLVM_VER >= 35 +#include "llvm/IR/InstIterator.h" +#else +#include "llvm/Support/InstIterator.h" +#endif + using namespace std; using namespace llvm; -unsigned int -Node::getAddressSpace(Value* v) -{ - PointerType* pt = dyn_cast(v->getType()); - if(pt) { - return pt->getAddressSpace(); - } else { - return 0; +Value* IGraph::getOperandIfLocalStmt(Instruction *insn) { + CallInst *call = dyn_cast(insn); + if (call) { + Function* f = call->getCalledFunction(); + if (f != NULL) { + // calling @.gf.addr and then doing load and store => local statement + if (f->getName().startswith(".gf.addr")) { + for (User *U : call->getArgOperand(0)->users()) { + Value *UI = U; + if (isa(*UI) || isa(*UI)) { + return call->getArgOperand(0); + } + } + } + } } + return NULL; } -void -Node::initLLMap() -{ - LLMap[value] = getAddressSpace(value); - // Instruction - Instruction *insn = dyn_cast(value); - if (!insn) return; - if (insn->getOpcode() != Instruction::Call) { - for(unsigned int i=0; i < insn->getNumOperands(); i++) { - Value *op = insn->getOperand(i); - LLMap[op] = getAddressSpace(op); +void IGraph::construct(Function *F, GlobalToWideInfo *info) { + + if (debug) { + errs () << "[Inequality Graph Construction for " << F->getName() << "]\n"; + } + + /* 1. collect addrspace 100 pointers that is used in the next step. */ + /* 1. construct a set of addrspace 100 pointers. */ + /* 2. construct a list of blocks that def/use the pointer. */ + SmallVector possiblyRemotePtrs; + SmallVector possiblyRemoteArgs; + // analyze arguments + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I!=E; ++I) { + Value *arg = I; + if (arg->getType()->isPointerTy() && arg->getType()->getPointerAddressSpace() == info->globalSpace) { + if (find(possiblyRemotePtrs.begin(), + possiblyRemotePtrs.end(), + arg) == possiblyRemotePtrs.end()) { + possiblyRemotePtrs.push_back(arg); + } + if (find(possiblyRemoteArgs.begin(), + possiblyRemoteArgs.end(), + arg) == possiblyRemoteArgs.end()) { + possiblyRemoteArgs.push_back(arg); + } } } - switch(insn->getOpcode()) { - case Instruction::Call: { - CallInst *call = cast(insn); - Function* f = call->getCalledFunction(); - if (f != NULL) { - /* * - * We are assuming that gf.addr function calls correspond to Chapel's local statements, but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. We work on this later pass (see exemptionTest in llvmLocalityOptimization.cpp). - */ - if (f->getName().startswith(".gf.addr")) { - // Argument of ".gf.addr" is definitely local - for (unsigned int i = 0; i < call->getNumArgOperands(); i++) { - Value* v = call->getArgOperand(i); - LLMap[v] = 0; - } - } + + // analyze instructions + DenseMap> NodeCandidates; + for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { + Instruction *insn = &*II; + bool needToWork = false; + // + NodeKind kind = NODE_NONE; + Value *ptrOp = NULL; + int addrspace = 100; + // + switch (insn->getOpcode()) { + case Instruction::Load: { + LoadInst *load = cast(insn); + if(load->getPointerAddressSpace() == info->globalSpace) { + needToWork = true; + kind = NODE_USE; + ptrOp = load->getPointerOperand(); + addrspace = 100; + } + break; + } + case Instruction::Call: { + ptrOp = getOperandIfLocalStmt(insn); + if (ptrOp) { + needToWork = true; + kind = NODE_DEF; + addrspace = 0; + } + } + // TODO: store/getelementptr insn + } + if (needToWork) { + // collect possibly remote pointers + if (find(possiblyRemotePtrs.begin(), + possiblyRemotePtrs.end(), + ptrOp) == possiblyRemotePtrs.end()) { + possiblyRemotePtrs.push_back(ptrOp); + } + // store detailed information used in node construction. + NodeCandidates[insn] = std::make_tuple(kind, ptrOp, insn, addrspace); } - break; } - } + + /* 2. for each pointer do the following. */ + /* */ + for (SmallVector::iterator I = possiblyRemotePtrs.begin(), + E = possiblyRemotePtrs.end(); I != E; I++) { + Value* val = *I; + if (debug) { + errs () << "Working on :" << *val << "\n"; + } + + DenseMap> BBInfo; + // Intra-block edge + Node *entry = NULL; + for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { + BasicBlock* BB = BI; + Node *firstNode = NULL; + Node *lastNode = NULL; + + // entry node + if (BI == F->begin()) { + if (find(possiblyRemoteArgs.begin(), + possiblyRemoteArgs.end(), + val) != possiblyRemoteArgs.end()) { + Node *n = new Node(NODE_DEF, val, NULL, 0, 100); + this->addNode(n); + entry = n; + firstNode = n; + lastNode = n; + } + } + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) { + // add edge if needed + Instruction *insn = &*I; + if (NodeCandidates.find(insn) != NodeCandidates.end()) { + std::tuple &info = NodeCandidates[insn]; + Node *n = new Node(std::get<0>(info), // Kind + std::get<1>(info), // Value + std::get<2>(info), // Insn + 0, // Version (0 for now) + std::get<3>(info)); // Locality (either 0 or 100) + this->addNode(n); + if (!firstNode) { + firstNode = n; + } + if (lastNode) { + lastNode->addChild(n); + n->addParents(lastNode); + } + lastNode = n; + } + } + // + BBInfo[BB] = std::make_pair(firstNode, lastNode); + } + + // Inter-block edge + for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { + BasicBlock* BB = BI; + std::pair &SrcBBinfo = BBInfo[BB]; + const TerminatorInst *TInst = BB->getTerminator(); + // get the last node of the current BB + Node* srcNode = std::get<1>(SrcBBinfo); + if (!srcNode) { + continue; + } + // Succ + for (unsigned I = 0, NSucc = TInst->getNumSuccessors(); I < NSucc; I++) { + BasicBlock *Succ = TInst->getSuccessor(I); + std::pair &DstBBinfo = BBInfo[Succ]; + Node* dstNode = std::get<0>(DstBBinfo); + if (dstNode) { + srcNode->addChild(dstNode); + dstNode->addParents(srcNode); + } + } + } + // Dominator Tree Computation + + // Dominator Frontier Computation + + // phi-insertion + + // renaming + + } } diff --git a/IGraph.h b/IGraph.h index f391799..5ace33d 100644 --- a/IGraph.h +++ b/IGraph.h @@ -32,6 +32,9 @@ #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/raw_ostream.h" #include "llvmUtil.h" +#include "llvmGlobalToWide.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/GenericDomTreeConstruction.h" #if HAVE_LLVM_VER >= 35 #else @@ -43,39 +46,53 @@ using namespace std; using namespace llvm; + +enum NodeKind { + NODE_DEF, + NODE_USE, + NODE_PHI, + NODE_NONE +}; + class Node { private: + // + NodeKind kind; Value* value; + Instruction* insn; + unsigned int version; + int locality; + // vector children; vector parents; - DenseMap LLMap; - unsigned int getAddressSpace(Value *v); - void initLLMap(); public: - Node(Value* _value) { value = _value; initLLMap(); }; + Node(NodeKind _kind, Value* _value, Instruction* _insn, unsigned int _version, int _locality) { + kind = _kind; + value = _value; + insn = _insn; + version = _version; + locality = _locality; + + }; // parents vector::iterator parents_begin() { return parents.begin(); } vector::iterator parents_end() { return parents.end(); } + vector::const_iterator parents_begin() const { return parents.begin(); } + vector::const_iterator parents_end() const { return parents.end(); } // children vector::iterator begin() { return children.begin(); } vector::iterator end() { return children.end(); } vector::const_iterator begin() const { return children.begin(); } vector::const_iterator end() const { return children.end(); } - - int getLL(Value* v) const { - int ll; - if (LLMap.find(v) != LLMap.end()) { - ll = LLMap.find(v)->second; - } else { - ll = -1; - } - return ll; - } void addParents(Node* parent) { - parents.push_back(parent); + vector::iterator I = find(parents.begin(), parents.end(), parent); + if( I == parents.end() ){ + parents.push_back(parent); + parent->addChild(this); + } } void addChild(Node *child) { @@ -87,17 +104,38 @@ class Node { } Value* getValue() const { return value; } + NodeKind getKind() const { return kind; } + unsigned int getVersion() const { return version; } + int getLocality() const { return locality; } + + void dump() { + errs () << this->getLocality() << " : "; + this->getValue()->dump(); + } + void printAsOperand(raw_ostream &o, bool) { + + } + }; class IGraph { private: StringRef name; - Node* entry; + // required? + Node* entry; vector nodes; + // required? vector getRootNodes() { return nodes; } + Value* getOperandIfLocalStmt(Instruction *insn); + + // + bool debug = true; + public: IGraph (StringRef _name) { name = _name; } + void construct(Function *F, GlobalToWideInfo *info); + Node* getEntry() const { return entry; } StringRef getName() const { return name; } @@ -125,15 +163,38 @@ class IGraph { }; namespace llvm { + // for Graph Traits + // Graph Traits requires you to provide the following + // (for more details see "llvm/ADT/GraphTraits.h") : + + // typedef NodeType - Type of Node in the graph + // typedef ChildIteratorType - Type used to iterate over children in graph + // static NodeType *getEntryNode(const GraphType &) + // Return the entry node of the graph + // static ChildIteratorType child_begin(NodeType *) + // static ChildIteratorType child_end (NodeType *) + // Return iterators that point to the beginning and ending of the child + // node list for the specified node. + // + // typedef ...iterator nodes_iterator; + // static nodes_iterator nodes_begin(GraphType *G) + // static nodes_iterator nodes_end (GraphType *G) + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + // static unsigned size (GraphType *G) + // Return total number of nodes in the graph + + + // template specialization for template<> struct GraphTraits { typedef Node NodeType; typedef std::vector::iterator ChildIteratorType; - + static NodeType *getEntryNode(Node *node) { return node; } static inline ChildIteratorType child_begin(NodeType *N) { return N->begin(); } static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } - }; + + // template specialization for template<> struct GraphTraits { typedef const Node NodeType; typedef vector::const_iterator ChildIteratorType; @@ -141,27 +202,30 @@ namespace llvm { static NodeType *getEntryNode(const Node *node) { return node; } static inline ChildIteratorType child_begin(const NodeType *N) { return N->begin(); } static inline ChildIteratorType child_end(const NodeType *N) { return N->end(); } - }; + // template specialization for template<> struct GraphTraits : public GraphTraits { static NodeType *getEntryNode(IGraph *G) { return G->getEntry(); } typedef std::vector::iterator nodes_iterator; static nodes_iterator nodes_begin(IGraph *G) { return G->begin(); } static nodes_iterator nodes_end(IGraph *G) { return G->end(); } - static unsigned nodes_size(IGraph *G) { return G->size(); } + static unsigned size(IGraph *G) { return G->size(); }; }; + // template specialization for template<> struct GraphTraits : public GraphTraits { static NodeType *getEntryNode(const IGraph *G) { return G->getEntry(); } typedef vector::const_iterator nodes_iterator; static nodes_iterator nodes_begin(const IGraph *G) { return G->begin(); } static nodes_iterator nodes_end(const IGraph *G) { return G->end(); } - static unsigned nodes_size(const IGraph *G) { return G->size(); } + static unsigned size(const IGraph *G) { return G->size(); } }; + + // template specialization for for Write Graph template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} @@ -191,29 +255,32 @@ namespace llvm { std::string Str; raw_string_ostream OS(Str); Value* value = node->getValue(); - - Instruction *insn = dyn_cast(value); - if (insn) { - OS << *insn << "\n"; - for(unsigned int i=0; i < insn->getNumOperands(); i++) { - Value *op = insn->getOperand(i); - if (i != 0 ) OS << ", "; - OS << "LL("; -#if HAVE_LLVM_VER >= 35 - op->printAsOperand(OS, false); -#else - WriteAsOperand(OS, op, false); + + // print Node Information + switch (node->getKind()) { + case NODE_DEF: +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(OS, false); +#else + WriteAsOperand(OS, value, false); #endif - OS << ") = " << node->getLL(op) << " "; - } - } else { -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(OS, false); -#else - WriteAsOperand(OS, value, false); -#endif + OS << "_" << node->getVersion() << " = " << node->getLocality(); + break; + case NODE_USE: + OS << "... = "; +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(OS, false); +#else + WriteAsOperand(OS, value, false); +#endif + OS << "_" << node->getVersion(); + break; + case NODE_PHI: + break; + default: + assert(0 && "Inequality Graph Node Type should not be NODE_NONE"); } - + std::string OutStr = OS.str(); // if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); @@ -243,30 +310,11 @@ namespace llvm { } static std::string getEdgeSourceLabel(const Node *node, - vector::const_iterator I) { -#if 0 - // Label source of conditional branches with "T" or "F" - if (const BranchInst *BI = dyn_cast(Node->getTerminator())) - if (BI->isConditional()) - return (I == succ_begin(Node)) ? "T" : "F"; - - // Label source of switch edges with the associated value. - if (const SwitchInst *SI = dyn_cast(Node->getTerminator())) { - unsigned SuccNo = I.getSuccessorIndex(); - - if (SuccNo == 0) return "def"; - - std::string Str; - raw_string_ostream OS(Str); - SwitchInst::ConstCaseIt Case = - SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); - OS << Case.getCaseValue()->getValue(); - return OS.str(); - } -#endif + vector::const_iterator I) { return ""; } - }; + }; } + #endif // _IGRAPH_H_ diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 2d867f4..8a771ca 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -136,7 +136,7 @@ namespace { // For Debug static bool debugPassInsn = true; static const bool extraChecks = false; - static const char* debugThisFn = ""; + static const char* debugThisFn = "habanero"; // For Chapel Compiler & Breakdown static const bool fLLVMDisableIG = false; @@ -303,50 +303,6 @@ namespace { return false; } - IGraph* createIGraph(Module &M, Function *F) { - IGraph *G = new IGraph(F->getName()); - for (Function::arg_iterator I = F->arg_begin(), - E = F->arg_end(); I!=E; ++I) { - Value *srcVal = I; - Node *srcNode = G->getNodeByValue(srcVal); - if (srcNode == NULL) { - srcNode = new Node(srcVal); - G->addNode(srcNode); - } - for (User *U : I->users()) { - Value* dstVal = U; - Node *dstNode = G->getNodeByValue(dstVal); - if (dstNode == NULL) { - dstNode = new Node(dstVal); - G->addNode(dstNode); - } - srcNode->addChild(dstNode); - } - } - for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { - BasicBlock* BB = BI; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) { - Instruction *insn = &*I; - Value *srcVal = insn; - Node *srcNode = G->getNodeByValue(srcVal); - if (srcNode == NULL) { - srcNode = new Node(srcVal); - G->addNode(srcNode); - } - for (User *U : I->users()) { - Value *dstVal = U; - Node *dstNode = G->getNodeByValue(dstVal); - if (dstNode == NULL) { - dstNode = new Node(dstVal); - G->addNode(dstNode); - } - srcNode->addChild(dstNode); - } - } - } - return G; - } - void createValueTableInsn(ValueTable *vn, Instruction *insn) { if (insn->getType()->isVoidTy()) return; vn->lookup_or_add(insn); @@ -366,6 +322,7 @@ namespace { return false; } bool definitelyLocal = false; +#if 0 bool exempt = false; int ll = info->globalSpace; // find smallest possible locality level @@ -388,6 +345,7 @@ namespace { errs () << *op << " is exempted\n"; } } +#endif return definitelyLocal; } @@ -925,8 +883,9 @@ namespace { // Inspect all instructions and construt IGraph. Each node of IGraph contains a densemap that map that is one-to-one mapping of each operand into a specific address space (either 100 or 0). // If an instruction is enclosed by a local statement, set the locality level of each operand to 0. // Currently, we are assuming that gf.addr function calls correspond to Chapel's local statements, but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. We work on this later pass using NonLocals) - IGraph *G = createIGraph(M, F); - + IGraph *G = new IGraph(F->getName()); + G->construct(F, info); + // Perform a reduced version of GVN ValueTable *VN = createValueTable(F); From cd1ff955e25835bce3081edecf7f698c2458e82d Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Wed, 24 Feb 2016 20:55:41 -0600 Subject: [PATCH 06/24] A routine that dumps IGraph moved to IGraph.h and IGraph.cpp --- IGraph.cpp | 24 ++++++++++++++++++++++++ IGraph.h | 6 ++++-- llvmLocalityOptimization.cpp | 30 +----------------------------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index eb56729..b877a02 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -214,3 +214,27 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { } } + +void IGraph::dumpDOT() { + std::string Filename = "ig." + this->getName().str() + ".dot"; +#if HAVE_LLVM_VER >= 35 + std::error_code EC; + raw_fd_ostream File(Filename.c_str(), EC, sys::fs::F_Text); + + if (!EC) { + WriteGraph(File, (const IGraph*)this, false, "Habanero"); + } else { + errs() << "Dump IGraph : error: "<< EC.message() << "\n"; + + } +#else + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) { + WriteGraph(File, (const IGraph*)this, false, "Habanero"); + } else { + errs() << " error opening file for writing!"; + } +#endif +} diff --git a/IGraph.h b/IGraph.h index 5ace33d..1b50919 100644 --- a/IGraph.h +++ b/IGraph.h @@ -33,8 +33,10 @@ #include "llvm/Support/raw_ostream.h" #include "llvmUtil.h" #include "llvmGlobalToWide.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/GenericDomTreeConstruction.h" +#include "llvm/Support/GraphWriter.h" #if HAVE_LLVM_VER >= 35 #else @@ -158,8 +160,8 @@ class IGraph { unsigned size() const { return nodes.size(); } void createGraphVizFile(const char* fileName); - // for GDB - void dump(); + // for Debug + void dumpDOT(); }; namespace llvm { diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 8a771ca..1ad5cad 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -116,7 +116,6 @@ // For Debugging #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/GraphWriter.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -240,18 +239,6 @@ namespace { File << *F; } } - - void dumpDOT(IGraph* G) { - std::string Filename = "ig." + G->getName().str() + ".dot"; - std::error_code EC; - raw_fd_ostream File(Filename.c_str(), EC, sys::fs::F_Text); - - if (EC) { - errs() << "Dump IGraph : error: "<< EC.message() << "\n"; - } else { - WriteGraph(File, (const IGraph*)G, false, "Habanero"); - } - } #else void dumpFunction(Function *F, std::string mid) { std::string Filename = F->getName().str() + "." + mid + ".ll"; @@ -264,21 +251,6 @@ namespace { errs() << " error opening file for writing!"; errs() << "\n"; } - - void dumpDOT(IGraph* G) { - std::string Filename = "ig." + G->getName().str() + ".dot"; - errs() << "Writing '" << Filename << "'..."; - - std::string ErrorInfo; - raw_fd_ostream File(Filename.c_str(), ErrorInfo); - - if (ErrorInfo.empty()) - WriteGraph(File, (const IGraph*)G, false, "Habanero"); - else - errs() << " error opening file for writing!"; - errs() << "\n"; - - } #endif // For Debugging purpose void insertPrintf(Module &M, Instruction *insertBefore, StringRef Str) { @@ -897,7 +869,7 @@ namespace { if (debugThisFn[0] && F->getName() == debugThisFn) { VN->dump(); // For Graphviz - dumpDOT(G); + G->dumpDOT(); errs () << "\n[Local Array GVN]\n"; LocalArraysGVN->dump(); errs () << "[Local Array Decl]\n"; From f152fb222f6339b9a10291bc564c177f731e8606 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Wed, 24 Feb 2016 21:47:53 -0600 Subject: [PATCH 07/24] Add comments to IGraph construction part. Add entry node to IGraph. --- IGraph.cpp | 47 +++++++++++++++++++++++++++++++---------------- IGraph.h | 15 ++++++++------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index b877a02..1b18e08 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -62,6 +62,11 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { if (debug) { errs () << "[Inequality Graph Construction for " << F->getName() << "]\n"; } + + /* First create an entry node */ + Node *entry = new Node(NODE_ENTRY, NULL, NULL, 0, 0); + this->entry = entry; + this->addNode(entry); /* 1. collect addrspace 100 pointers that is used in the next step. */ /* 1. construct a set of addrspace 100 pointers. */ @@ -138,49 +143,59 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { } DenseMap> BBInfo; - // Intra-block edge - Node *entry = NULL; + bool firstOccurrence = true; + // Create Intra-block edge for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { BasicBlock* BB = BI; - Node *firstNode = NULL; - Node *lastNode = NULL; + // remember first and last node in BB so we can create edges between blocks. + Node *firstNodeInBB = NULL; + Node *lastNodeInBB = NULL; - // entry node + // create node for arguments if (BI == F->begin()) { if (find(possiblyRemoteArgs.begin(), possiblyRemoteArgs.end(), val) != possiblyRemoteArgs.end()) { Node *n = new Node(NODE_DEF, val, NULL, 0, 100); this->addNode(n); - entry = n; - firstNode = n; - lastNode = n; + firstNodeInBB = n; + lastNodeInBB = n; + if (firstOccurrence) { + entry->addChild(n); + } } } - + + // For each instruction + // Create a node if an instruction contains possibly-remote access for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) { // add edge if needed Instruction *insn = &*I; if (NodeCandidates.find(insn) != NodeCandidates.end()) { std::tuple &info = NodeCandidates[insn]; + // create a new node. Node *n = new Node(std::get<0>(info), // Kind std::get<1>(info), // Value std::get<2>(info), // Insn 0, // Version (0 for now) std::get<3>(info)); // Locality (either 0 or 100) + // register the created node to the Graph. this->addNode(n); - if (!firstNode) { - firstNode = n; + if (!firstNodeInBB) { + // First node in the current BB. + firstNodeInBB = n; } - if (lastNode) { - lastNode->addChild(n); - n->addParents(lastNode); + if (lastNodeInBB) { + // There exists a predecessor node here. + // append the created node to a predecessor node. + lastNodeInBB->addChild(n); + n->addParents(lastNodeInBB); } - lastNode = n; + lastNodeInBB = n; } } // - BBInfo[BB] = std::make_pair(firstNode, lastNode); + BBInfo[BB] = std::make_pair(firstNodeInBB, lastNodeInBB); } // Inter-block edge diff --git a/IGraph.h b/IGraph.h index 1b50919..c82653b 100644 --- a/IGraph.h +++ b/IGraph.h @@ -50,6 +50,7 @@ using namespace llvm; enum NodeKind { + NODE_ENTRY, NODE_DEF, NODE_USE, NODE_PHI, @@ -75,7 +76,6 @@ class Node { insn = _insn; version = _version; locality = _locality; - }; // parents @@ -124,10 +124,8 @@ class Node { class IGraph { private: StringRef name; - // required? Node* entry; vector nodes; - // required? vector getRootNodes() { return nodes; } Value* getOperandIfLocalStmt(Instruction *insn); @@ -140,7 +138,8 @@ class IGraph { Node* getEntry() const { return entry; } StringRef getName() const { return name; } - + + // Iterator for enumerating nodes of IGraph. vector::iterator begin() { return nodes.begin(); } vector::iterator end() { return nodes.end(); } vector::const_iterator begin() const { return nodes.begin(); } @@ -155,10 +154,10 @@ class IGraph { } return NULL; } + void addNode(Node* n) { nodes.push_back(n); } unsigned size() const { return nodes.size(); } - void createGraphVizFile(const char* fileName); // for Debug void dumpDOT(); @@ -185,7 +184,6 @@ namespace llvm { // static unsigned size (GraphType *G) // Return total number of nodes in the graph - // template specialization for template<> struct GraphTraits { typedef Node NodeType; @@ -227,7 +225,7 @@ namespace llvm { }; - // template specialization for for Write Graph + // template specialization for for Writing DOTGraph template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} @@ -260,6 +258,9 @@ namespace llvm { // print Node Information switch (node->getKind()) { + case NODE_ENTRY: + OS << "entry"; + break; case NODE_DEF: #if HAVE_LLVM_VER >= 35 value->printAsOperand(OS, false); From 30b5dcc6495e275326b10f41590f9c5974b6b261 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Thu, 25 Feb 2016 14:21:37 -0600 Subject: [PATCH 08/24] Refactor IGraph.cpp IGraph.h --- IGraph.cpp | 1 - IGraph.h | 210 +++++++++++++++-------------------- llvmLocalityOptimization.cpp | 2 +- 3 files changed, 93 insertions(+), 120 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 1b18e08..9a71f45 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -220,7 +220,6 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { } } // Dominator Tree Computation - // Dominator Frontier Computation // phi-insertion diff --git a/IGraph.h b/IGraph.h index c82653b..e0eabc6 100644 --- a/IGraph.h +++ b/IGraph.h @@ -35,7 +35,6 @@ #include "llvmGlobalToWide.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/GenericDomTree.h" -#include "llvm/Support/GenericDomTreeConstruction.h" #include "llvm/Support/GraphWriter.h" #if HAVE_LLVM_VER >= 35 @@ -48,12 +47,12 @@ using namespace std; using namespace llvm; - enum NodeKind { NODE_ENTRY, NODE_DEF, NODE_USE, NODE_PHI, + NODE_PI, NODE_NONE }; @@ -66,11 +65,18 @@ class Node { unsigned int version; int locality; // - vector children; - vector parents; + typedef std::vector NodeElementType; + // + NodeElementType children; + NodeElementType parents; public: - Node(NodeKind _kind, Value* _value, Instruction* _insn, unsigned int _version, int _locality) { + + Node(NodeKind _kind, + Value* _value, + Instruction* _insn, + unsigned int _version, + int _locality) { kind = _kind; value = _value; insn = _insn; @@ -78,55 +84,92 @@ class Node { locality = _locality; }; + typedef NodeElementType::iterator iterator; + typedef NodeElementType::const_iterator const_iterator; + // parents - vector::iterator parents_begin() { return parents.begin(); } - vector::iterator parents_end() { return parents.end(); } - vector::const_iterator parents_begin() const { return parents.begin(); } - vector::const_iterator parents_end() const { return parents.end(); } + iterator parents_begin() { return parents.begin(); } + iterator parents_end() { return parents.end(); } + const_iterator parents_begin() const { return parents.begin(); } + const_iterator parents_end() const { return parents.end(); } // children - vector::iterator begin() { return children.begin(); } - vector::iterator end() { return children.end(); } - vector::const_iterator begin() const { return children.begin(); } - vector::const_iterator end() const { return children.end(); } + iterator children_begin() { return children.begin(); } + iterator children_end() { return children.end(); } + const_iterator children_begin() const { return children.begin(); } + const_iterator children_end() const { return children.end(); } void addParents(Node* parent) { - vector::iterator I = find(parents.begin(), parents.end(), parent); - if( I == parents.end() ){ + iterator I = find(parents_begin(), parents_end(), parent); + if( I == parents_end() ){ parents.push_back(parent); parent->addChild(this); } } void addChild(Node *child) { - vector::iterator I = find(children.begin(), children.end(), child); + iterator I = find(children_begin(), children_end(), child); if( I == children.end() ){ - children.push_back(child); + children.push_back(child); child->addParents(this); } } Value* getValue() const { return value; } NodeKind getKind() const { return kind; } - unsigned int getVersion() const { return version; } + unsigned int getVersion() const { return version; } int getLocality() const { return locality; } - void dump() { - errs () << this->getLocality() << " : "; - this->getValue()->dump(); + // for debug + void dump() const { + printAsOperand(errs(), true); + errs () << "\n"; } - void printAsOperand(raw_ostream &o, bool) { - + void printAsOperand(raw_ostream &o, bool) const { + // print Node Information + switch (this->getKind()) { + case NODE_ENTRY: + o << "entry"; + break; + case NODE_DEF: +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(o, false); +#else + WriteAsOperand(o, value, false); +#endif + o << "_" << this->getVersion() << " = " << this->getLocality(); + break; + case NODE_USE: + o << "... = "; +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(o, false); +#else + WriteAsOperand(o, value, false); +#endif + o << "_" << this->getVersion(); + break; + case NODE_PHI: + case NODE_PI: + break; + default: + assert(0 && "Inequality Graph Node Type should not be NODE_NONE"); + } + } }; class IGraph { private: + // the name of IGraph, which is used for DOT graph generation StringRef name; - Node* entry; - vector nodes; - vector getRootNodes() { return nodes; } + // entry node + Node* entry; + // List of nodes in IGraph + typedef std::vector NodeListType; + NodeListType nodes; + + NodeListType getRootNodes() { return nodes; } Value* getOperandIfLocalStmt(Instruction *insn); // @@ -139,14 +182,17 @@ class IGraph { Node* getEntry() const { return entry; } StringRef getName() const { return name; } + typedef NodeListType::iterator iterator; + typedef NodeListType::const_iterator const_iterator; + // Iterator for enumerating nodes of IGraph. - vector::iterator begin() { return nodes.begin(); } - vector::iterator end() { return nodes.end(); } - vector::const_iterator begin() const { return nodes.begin(); } - vector::const_iterator end() const { return nodes.end(); } + iterator begin() { return nodes.begin(); } + const_iterator begin() const { return nodes.begin(); } + iterator end() { return nodes.end(); } + const_iterator end() const { return nodes.end(); } Node* getNodeByValue(const Value* v) { - for (vector::iterator I = nodes.begin(), E = nodes.end(); I != E; I++) { + for (NodeListType::iterator I = nodes.begin(), E = nodes.end(); I != E; I++) { Node* tmp = *I; if (v == tmp->getValue()) { return tmp; @@ -155,8 +201,7 @@ class IGraph { return NULL; } - void addNode(Node* n) { nodes.push_back(n); } - + void addNode(Node* n) { nodes.push_back(n); } unsigned size() const { return nodes.size(); } // for Debug @@ -183,48 +228,28 @@ namespace llvm { // nodes_iterator/begin/end - Allow iteration over all nodes in the graph // static unsigned size (GraphType *G) // Return total number of nodes in the graph - - // template specialization for - template<> struct GraphTraits { - typedef Node NodeType; - typedef std::vector::iterator ChildIteratorType; - - static NodeType *getEntryNode(Node *node) { return node; } - static inline ChildIteratorType child_begin(NodeType *N) { return N->begin(); } - static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } - }; + /* The followings are used for DOT Graph generation by DOTGraphTraits */ // template specialization for template<> struct GraphTraits { typedef const Node NodeType; - typedef vector::const_iterator ChildIteratorType; + typedef NodeType::const_iterator ChildIteratorType; static NodeType *getEntryNode(const Node *node) { return node; } - static inline ChildIteratorType child_begin(const NodeType *N) { return N->begin(); } - static inline ChildIteratorType child_end(const NodeType *N) { return N->end(); } + static inline ChildIteratorType child_begin(const NodeType *N) { return N->children_begin(); } + static inline ChildIteratorType child_end(const NodeType *N) { return N->children_end(); } }; - - // template specialization for - template<> struct GraphTraits : public GraphTraits { - static NodeType *getEntryNode(IGraph *G) { return G->getEntry(); } - typedef std::vector::iterator nodes_iterator; - - static nodes_iterator nodes_begin(IGraph *G) { return G->begin(); } - static nodes_iterator nodes_end(IGraph *G) { return G->end(); } - static unsigned size(IGraph *G) { return G->size(); }; - }; - + // template specialization for template<> struct GraphTraits : public GraphTraits { static NodeType *getEntryNode(const IGraph *G) { return G->getEntry(); } - typedef vector::const_iterator nodes_iterator; + typedef IGraph::const_iterator nodes_iterator; static nodes_iterator nodes_begin(const IGraph *G) { return G->begin(); } static nodes_iterator nodes_end(const IGraph *G) { return G->end(); } static unsigned size(const IGraph *G) { return G->size(); } }; - // template specialization for for Writing DOTGraph template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} @@ -233,61 +258,18 @@ namespace llvm { return "Inequality Graph for '" + G->getName().str(); } - static std::string getSimpleNodeLabel(const Node* node, - const IGraph *) { - if (!node->getValue()->getName().empty()) - return node->getValue()->getName().str(); - - std::string Str; - raw_string_ostream OS(Str); - const Value *value = node->getValue(); -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(OS, false); -#else - WriteAsOperand(OS, value, false); -#endif - - return OS.str(); - } - - static std::string getCompleteNodeLabel(const Node *node, - const IGraph *) { + std::string getNodeLabel(const Node *node, + const IGraph *graph) { std::string Str; raw_string_ostream OS(Str); Value* value = node->getValue(); - - // print Node Information - switch (node->getKind()) { - case NODE_ENTRY: - OS << "entry"; - break; - case NODE_DEF: -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(OS, false); -#else - WriteAsOperand(OS, value, false); -#endif - OS << "_" << node->getVersion() << " = " << node->getLocality(); - break; - case NODE_USE: - OS << "... = "; -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(OS, false); -#else - WriteAsOperand(OS, value, false); -#endif - OS << "_" << node->getVersion(); - break; - case NODE_PHI: - break; - default: - assert(0 && "Inequality Graph Node Type should not be NODE_NONE"); - } - + + node->printAsOperand(OS, true); std::string OutStr = OS.str(); - // + + // Erase if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); - + // Process OutStr for DOT format for (unsigned i = 0; i != OutStr.length(); ++i) { if (OutStr[i] == '\n') { @@ -304,16 +286,8 @@ namespace llvm { return OutStr; } - std::string getNodeLabel(const Node *node, - const IGraph *graph) { - if (isSimple()) - return getSimpleNodeLabel(node, graph); - else - return getCompleteNodeLabel(node, graph); - } - static std::string getEdgeSourceLabel(const Node *node, - vector::const_iterator I) { + Node::const_iterator I) { return ""; } }; diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 1ad5cad..fd2cdaf 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -1227,7 +1227,7 @@ namespace { if (debugPassInsn) { errs () << "Parent Insn : " << *node->getValue() <<"\n"; } - for (vector::iterator I = node->parents_begin(), E = node->parents_end(); I != E; I++) { + for (Node::iterator I = node->parents_begin(), E = node->parents_end(); I != E; I++) { Node *tmp = *I; Value *v = tmp->getValue(); if (debugPassInsn) { From 11a6983eac9c9680e33613f65fa1217363399453 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Thu, 25 Feb 2016 22:07:41 -0600 Subject: [PATCH 09/24] Add Domnance Tree and Domnator Fontier Computation. --- IGraph.cpp | 186 ++++++++++++++++++++++++++++++++++++++++++++++++++--- IGraph.h | 58 ++++++++++++++++- 2 files changed, 235 insertions(+), 9 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 9a71f45..817a08c 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -57,6 +57,27 @@ Value* IGraph::getOperandIfLocalStmt(Instruction *insn) { return NULL; } +void IGraph::setPostOrderNumberWithDFSImpl(Node *node, int &number) { + for (Node::iterator I = node->children_begin(), + E = node->children_end(); + I != E; I++) { + Node *child = *I; + setPostOrderNumberWithDFSImpl(child, number); + if (child->getPostOrderNumber() == -1) { + child->setPostOrderNumber(number++); + } + } + +} + +void IGraph::setPostOrderNumberWithDFS() { + Node *entry = this->getEntry(); + entry->setPostOrderNumber(this->size() - 1); + int number = 0; + setPostOrderNumberWithDFSImpl(entry, number); + assert(this->size() - 1 == number); +} + void IGraph::construct(Function *F, GlobalToWideInfo *info) { if (debug) { @@ -219,14 +240,163 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { } } } - // Dominator Tree Computation - // Dominator Frontier Computation - - // phi-insertion - - // renaming - - } + } + // Dominator Tree Computation + this->setPostOrderNumberWithDFS(); + this->computeDominatorTree(); + // Dominator Frontier Computation + this->computeDominanceFrontier(); + // phi-insertion + + // renaming + +} + +Node* IGraph::computeIntersect(Node* b1, Node* b2) { + Node *finger1 = b1; + Node *finger2 = b2; + while (finger1->getPostOrderNumber() != finger2->getPostOrderNumber()) { + while (finger1->getPostOrderNumber() < finger2->getPostOrderNumber()) { + assert(finger1->getDom().count() == 1); + finger1 = this->getNodeByPostOrderNumber(finger1->getDom().find_first()); + } + while (finger2->getPostOrderNumber() < finger1->getPostOrderNumber()) { + assert(finger2->getDom().count() == 1); + finger2 = this->getNodeByPostOrderNumber(finger2->getDom().find_first()); + } + } + return finger1; +} + +void IGraph::computeDominatorTree() { + /* for all nodes, initialize the dominators array */ +#if 0 + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *n = *I; + DominatorTreeType t(this->size(), true); + n->setDom(t); + } + DominatorTreeType dom = entry->getDom(); + dom.reset(); + dom[entry->getPostOrderNumber()] = true; + entry->setDom(dom); + bool Changed = true; + while (Changed) { + Changed = false; + // reverse post order + for (int i = this->size() - 1; i >= 0; i--) { + Node * p = this->getNodeByPostOrderNumber(i); + if (p == this->getEntry()) continue; + Node::DominatorTreeType newSet(this->size(), true); + for (Node::iterator IPRED = p->parents_begin(), EPRED = p->parents_end(); IPRED != EPRED; IPRED++) { + Node *nPred = *IPRED; + newSet &= nPred->getDom(); + } + newSet[p->getPostOrderNumber()] = true; + if (p->getDom() != newSet) { + p->setDom(newSet); + Changed = true; + } + } + } +#else + /* initialize the domiantor array */ + for (IGraph::iterator I = this->begin(), + E = this->end(); I != E; I++) { + Node *node = *I; + if (node == this->getEntry()) { + Node::IDominatorTreeType init(this->size(), false); + init[node->getPostOrderNumber()] = true; + node->setDom(init); + node->setUndefined(false); + } else { + node->setUndefined(true); + } + } + + bool Changed = true; + while (Changed) { + Changed = false; + /* in reverse postorder except entry node */ + for (int i = this->size() - 1; i >= 0; i--) { + Node * b = this->getNodeByPostOrderNumber(i); + if (b == this->getEntry()) continue; + /* pick one first processed predecessor */ + Node::IDominatorTreeType new_idom(this->size(), false); + Node *first_pred = NULL; + for (IGraph::iterator IPRED = b->parents_begin(), + EPRED = b->parents_end(); IPRED != EPRED; IPRED++) { + Node *node = *IPRED; + if (!node->getUndefined()) { + first_pred = node; + break; + } + + } + assert(first_pred != NULL); + new_idom[first_pred->getPostOrderNumber()] = true; + for (Node::iterator IPRED = b->parents_begin(), + EPRED = b->parents_end(); IPRED != EPRED; IPRED++) { + Node *p = *IPRED; + if (p == first_pred) continue; + if (!p->getUndefined()) { + new_idom.reset(); + int idx = computeIntersect(p, first_pred)->getPostOrderNumber(); + new_idom[idx] = true; + } + } + if (b->getDom() != new_idom) { + b->setDom(new_idom); + b->setUndefined(false); + Changed = true; + } + } + } +#endif + + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *n = *I; + errs () << "IDOM(" << n->getPostOrderNumber() << ") : "; + Node::IDominatorTreeType b = n->getDom(); + for (int i = 0; i < b.size(); i++) { + if (b[i]) { + errs () << i << ", "; + } + } + errs () << "\n"; + } +} + +void IGraph::computeDominanceFrontier() { + /* Reset Dominance Frontier */ + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *b = *I; + b->resetDominanceFrontier(); + } + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *b = *I; + if (b->getNumPreds() >= 2) { + for (Node::iterator BI = b->parents_begin(), + BE = b->parents_end(); BI != BE; BI++) { + Node *runner = *BI; + while (runner->getPostOrderNumber() != b->getDom().find_first()) { + runner->addToDominanceFrontier(b); + runner = this->getNodeByPostOrderNumber(runner->getDom().find_first()); + } + } + } + } + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *b = *I; + errs () << "DF(" << b->getPostOrderNumber() << ") : "; + for (Node::df_iterator DI = b->df_begin(), + DE = b->df_end(); DI != DE; DI++) { + Node* df = *DI; + errs () << df->getPostOrderNumber() << ", "; + } + errs () << "\n"; + } + } void IGraph::dumpDOT() { diff --git a/IGraph.h b/IGraph.h index e0eabc6..3d9dd26 100644 --- a/IGraph.h +++ b/IGraph.h @@ -28,6 +28,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/IR/Value.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/raw_ostream.h" @@ -56,7 +57,11 @@ enum NodeKind { NODE_NONE }; + class Node { +public: + typedef BitVector IDominatorTreeType; + typedef SmallVector DominanceFrontierType; private: // NodeKind kind; @@ -64,12 +69,21 @@ class Node { Instruction* insn; unsigned int version; int locality; + // for Dominant Tree + int postOrderNumber; + // typedef std::vector NodeElementType; + // NodeElementType children; NodeElementType parents; + // For Dominator Tree & Dominance Frontier + IDominatorTreeType idom; + bool domIsUndefined; + DominanceFrontierType dominanceFrontier; + public: Node(NodeKind _kind, @@ -82,6 +96,8 @@ class Node { insn = _insn; version = _version; locality = _locality; + postOrderNumber = -1; + domIsUndefined = true; }; typedef NodeElementType::iterator iterator; @@ -92,6 +108,7 @@ class Node { iterator parents_end() { return parents.end(); } const_iterator parents_begin() const { return parents.begin(); } const_iterator parents_end() const { return parents.end(); } + // children iterator children_begin() { return children.begin(); } iterator children_end() { return children.end(); } @@ -119,6 +136,23 @@ class Node { unsigned int getVersion() const { return version; } int getLocality() const { return locality; } + // For Dominator Tree & Dominance Frontier + void setPostOrderNumber(int _postOrderNumber) { postOrderNumber = _postOrderNumber; } + int getPostOrderNumber() const { return postOrderNumber; } + bool getUndefined() { return domIsUndefined; } + void setUndefined(bool flag) { domIsUndefined = flag; } + IDominatorTreeType getDom() { return idom; } + void setDom(IDominatorTreeType _idom) { idom = _idom; } + void resetDominanceFrontier() { dominanceFrontier.clear(); } + void addToDominanceFrontier(Node *b) { dominanceFrontier.push_back(b); } + + int getNumPreds() { return parents.size(); } + + typedef DominanceFrontierType::iterator df_iterator; + + df_iterator df_begin() { return dominanceFrontier.begin(); } + df_iterator df_end() { return dominanceFrontier.end(); } + // for debug void dump() const { printAsOperand(errs(), true); @@ -154,7 +188,8 @@ class Node { default: assert(0 && "Inequality Graph Node Type should not be NODE_NONE"); } - + + o << "\n" << this->getPostOrderNumber(); } }; @@ -172,6 +207,17 @@ class IGraph { NodeListType getRootNodes() { return nodes; } Value* getOperandIfLocalStmt(Instruction *insn); + void setPostOrderNumberWithDFSImpl(Node*, int&); + void setPostOrderNumberWithDFS(); + + /* For Dominator Tree Construction*/ + void computeDominatorTree(); + Node* computeIntersect(Node*, Node*); + + /* For Dominance Frontier Construction */ + void computeDominanceFrontier(); + + // bool debug = true; @@ -201,6 +247,16 @@ class IGraph { return NULL; } + Node* getNodeByPostOrderNumber(const int number) { + for (NodeListType::iterator I = nodes.begin(), E = nodes.end(); I != E; I++) { + Node* tmp = *I; + if (number == tmp->getPostOrderNumber()) { + return tmp; + } + } + return NULL; + } + void addNode(Node* n) { nodes.push_back(n); } unsigned size() const { return nodes.size(); } From fed2a62e06645b090056fa9babff2704129079ef Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Thu, 25 Feb 2016 23:16:55 -0600 Subject: [PATCH 10/24] Add Phi-node insersion --- IGraph.cpp | 110 ++++++++++++++++++++++++++++------------------------- IGraph.h | 19 +++++++++ 2 files changed, 77 insertions(+), 52 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 817a08c..e1921e4 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -247,7 +247,43 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { // Dominator Frontier Computation this->computeDominanceFrontier(); // phi-insertion - + SmallVector phiAddedNodes; /* set of nodes where phi is added */ + for (SmallVector::iterator I = possiblyRemotePtrs.begin(), + E = possiblyRemotePtrs.end(); I != E; I++) { + Value* val = *I; + SmallVector DEFNodes; + for (IGraph::iterator NI = this->begin(), + NE = this->end(); NI != NE; NI++) { + Node *node = *NI; + if (node->getKind() == NODE_DEF && node->getValue() == val) { + DEFNodes.push_back(node); + } + } + while (!DEFNodes.empty()) { + Node* DEFNode = DEFNodes[0]; + DEFNodes.erase(DEFNodes.begin()); + for (Node::df_iterator DI = DEFNode->df_begin(), + DE = DEFNode->df_end(); DI != DE; DI++) { + Node *DFofDEF = *DI; + if (find(phiAddedNodes.begin(), phiAddedNodes.end(), DFofDEF) == phiAddedNodes.end()) { + Node *phiNode = new Node(NODE_PHI, NULL, NULL, 0, 0); + this->addNode(phiNode); + for (Node::iterator NI = DFofDEF->parents_begin(), + NE = DFofDEF->parents_end(); + NI != NE; NI++) { + Node *parents = *NI; + parents->eraseFromChild(DFofDEF); + parents->addChild(phiNode); + } + phiNode->addChild(DFofDEF); + phiAddedNodes.push_back(DFofDEF); + if (find(DEFNodes.begin(), DEFNodes.end(), DFofDEF) == DEFNodes.end()) { + DEFNodes.push_back(DFofDEF); + } + } + } + } + } // renaming } @@ -269,37 +305,6 @@ Node* IGraph::computeIntersect(Node* b1, Node* b2) { } void IGraph::computeDominatorTree() { - /* for all nodes, initialize the dominators array */ -#if 0 - for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { - Node *n = *I; - DominatorTreeType t(this->size(), true); - n->setDom(t); - } - DominatorTreeType dom = entry->getDom(); - dom.reset(); - dom[entry->getPostOrderNumber()] = true; - entry->setDom(dom); - bool Changed = true; - while (Changed) { - Changed = false; - // reverse post order - for (int i = this->size() - 1; i >= 0; i--) { - Node * p = this->getNodeByPostOrderNumber(i); - if (p == this->getEntry()) continue; - Node::DominatorTreeType newSet(this->size(), true); - for (Node::iterator IPRED = p->parents_begin(), EPRED = p->parents_end(); IPRED != EPRED; IPRED++) { - Node *nPred = *IPRED; - newSet &= nPred->getDom(); - } - newSet[p->getPostOrderNumber()] = true; - if (p->getDom() != newSet) { - p->setDom(newSet); - Changed = true; - } - } - } -#else /* initialize the domiantor array */ for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { @@ -331,7 +336,6 @@ void IGraph::computeDominatorTree() { first_pred = node; break; } - } assert(first_pred != NULL); new_idom[first_pred->getPostOrderNumber()] = true; @@ -352,18 +356,18 @@ void IGraph::computeDominatorTree() { } } } -#endif - - for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { - Node *n = *I; - errs () << "IDOM(" << n->getPostOrderNumber() << ") : "; - Node::IDominatorTreeType b = n->getDom(); - for (int i = 0; i < b.size(); i++) { - if (b[i]) { - errs () << i << ", "; + if (debug) { + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *n = *I; + errs () << "IDOM(" << n->getPostOrderNumber() << ") : "; + Node::IDominatorTreeType b = n->getDom(); + for (int i = 0; i < b.size(); i++) { + if (b[i]) { + errs () << i << ", "; + } } + errs () << "\n"; } - errs () << "\n"; } } @@ -385,16 +389,18 @@ void IGraph::computeDominanceFrontier() { } } } - } - for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { - Node *b = *I; - errs () << "DF(" << b->getPostOrderNumber() << ") : "; - for (Node::df_iterator DI = b->df_begin(), - DE = b->df_end(); DI != DE; DI++) { - Node* df = *DI; - errs () << df->getPostOrderNumber() << ", "; + } + if (debug) { + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *b = *I; + errs () << "DF(" << b->getPostOrderNumber() << ") : "; + for (Node::df_iterator DI = b->df_begin(), + DE = b->df_end(); DI != DE; DI++) { + Node* df = *DI; + errs () << df->getPostOrderNumber() << ", "; + } + errs () << "\n"; } - errs () << "\n"; } } diff --git a/IGraph.h b/IGraph.h index 3d9dd26..e07267e 100644 --- a/IGraph.h +++ b/IGraph.h @@ -123,6 +123,14 @@ class Node { } } + void eraseFromParent(Node* parent) { + iterator I = find(parents_begin(), parents_end(), parent); + if ( I != parents_end() ) { + parents.erase(I); + eraseFromChild(this); + } + } + void addChild(Node *child) { iterator I = find(children_begin(), children_end(), child); if( I == children.end() ){ @@ -131,6 +139,15 @@ class Node { } } + void eraseFromChild(Node* child) { + iterator I = find(children_begin(), children_end(), child); + if ( I != children_end() ) { + children.erase(I); + eraseFromParent(this); + } + } + + Value* getValue() const { return value; } NodeKind getKind() const { return kind; } unsigned int getVersion() const { return version; } @@ -183,6 +200,8 @@ class Node { o << "_" << this->getVersion(); break; case NODE_PHI: + o << "phi()"; + break; case NODE_PI: break; default: From 2f910b8e296ce6ddf5fcd070964caf81c273c6fd Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Wed, 2 Mar 2016 00:32:44 -0600 Subject: [PATCH 11/24] Add Renaming --- IGraph.cpp | 138 ++++++++++++++++++++++++++++++++++++++++++++++------- IGraph.h | 128 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 217 insertions(+), 49 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index e1921e4..ec5b359 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -27,7 +27,7 @@ #include #include #include - +#include #if HAVE_LLVM_VER >= 35 #include "llvm/IR/InstIterator.h" @@ -71,6 +71,14 @@ void IGraph::setPostOrderNumberWithDFSImpl(Node *node, int &number) { } void IGraph::setPostOrderNumberWithDFS() { + /* 1. Reset Post order number */ + for (IGraph::iterator I = this->begin(), + E = this->end(); I != E; I++) { + Node *n = *I; + n->resetPostOrderNumber(); + } + + /* 2. set post order number recursively */ Node *entry = this->getEntry(); entry->setPostOrderNumber(this->size() - 1); int number = 0; @@ -248,44 +256,81 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { this->computeDominanceFrontier(); // phi-insertion SmallVector phiAddedNodes; /* set of nodes where phi is added */ + // For each addrspace(100) pointer for (SmallVector::iterator I = possiblyRemotePtrs.begin(), E = possiblyRemotePtrs.end(); I != E; I++) { Value* val = *I; - SmallVector DEFNodes; + SmallVector DEFSites; + // Build a set of nodes that define the current addrspace(100) pointer for (IGraph::iterator NI = this->begin(), NE = this->end(); NI != NE; NI++) { Node *node = *NI; if (node->getKind() == NODE_DEF && node->getValue() == val) { - DEFNodes.push_back(node); + DEFSites.push_back(node); } } - while (!DEFNodes.empty()) { - Node* DEFNode = DEFNodes[0]; - DEFNodes.erase(DEFNodes.begin()); + // For each node that defines the current addrspace(100) pointer + while (!DEFSites.empty()) { + Node* DEFNode = DEFSites[0]; + DEFSites.erase(DEFSites.begin()); + // For each dominance frontier of the current node for (Node::df_iterator DI = DEFNode->df_begin(), DE = DEFNode->df_end(); DI != DE; DI++) { - Node *DFofDEF = *DI; - if (find(phiAddedNodes.begin(), phiAddedNodes.end(), DFofDEF) == phiAddedNodes.end()) { - Node *phiNode = new Node(NODE_PHI, NULL, NULL, 0, 0); + Node *DFofDEF = *DI; + // skip if a phi-node is already inserted + if (find(phiAddedNodes.begin(), + phiAddedNodes.end(), + DFofDEF) == phiAddedNodes.end()) { + Node *phiNode = new Node(NODE_PHI, val, NULL, 0, 0); this->addNode(phiNode); + // inserting a new phi-node + // preserve parents and children of DFofDEF first (TODO functionalize) + Node::NodeElementType DFofDEFParents; for (Node::iterator NI = DFofDEF->parents_begin(), NE = DFofDEF->parents_end(); NI != NE; NI++) { Node *parents = *NI; + DFofDEFParents.push_back(parents); + } + for (Node::iterator NI = DFofDEFParents.begin(), + NE = DFofDEFParents.end(); NI != NE; NI++) { + // + Node *parents = *NI; + DFofDEF->eraseFromParent(parents); parents->eraseFromChild(DFofDEF); parents->addChild(phiNode); - } - phiNode->addChild(DFofDEF); + phiNode->addParents(parents); + } + DFofDEF->addParents(phiNode); + phiNode->addChild(DFofDEF); phiAddedNodes.push_back(DFofDEF); - if (find(DEFNodes.begin(), DEFNodes.end(), DFofDEF) == DEFNodes.end()) { - DEFNodes.push_back(DFofDEF); + // phi-node is also DEF node + if (find(DEFSites.begin(), + DEFSites.end(), DFofDEF) == DEFSites.end()) { + DEFSites.push_back(DFofDEF); } } } } } - // renaming - + + // DT computation again + // Dominator Tree Computation + if (phiAddedNodes.size() > 0) { + this->setPostOrderNumberWithDFS(); + this->computeDominatorTree(); + } + + // renaming + // initilize counters and stacks + for (SmallVector::iterator I = possiblyRemotePtrs.begin(), + E = possiblyRemotePtrs.end(); I != E; I++) { + Value* v = *I; + renamingCounters[v] = 0; + StackType st; + renamingStacks[v] = st; + } + this->performRenaming(); } Node* IGraph::computeIntersect(Node* b1, Node* b2) { @@ -325,6 +370,7 @@ void IGraph::computeDominatorTree() { /* in reverse postorder except entry node */ for (int i = this->size() - 1; i >= 0; i--) { Node * b = this->getNodeByPostOrderNumber(i); + errs () << "PostOrder(" << i << ")\n"; if (b == this->getEntry()) continue; /* pick one first processed predecessor */ Node::IDominatorTreeType new_idom(this->size(), false); @@ -372,11 +418,13 @@ void IGraph::computeDominatorTree() { } void IGraph::computeDominanceFrontier() { - /* Reset Dominance Frontier */ + /* 1. Reset Dominance Frontier */ for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { Node *b = *I; b->resetDominanceFrontier(); } + + /* 2. Compute Dominacne Frontier */ for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { Node *b = *I; if (b->getNumPreds() >= 2) { @@ -389,7 +437,9 @@ void IGraph::computeDominanceFrontier() { } } } - } + } + + /* 3. Dump Dominance Frontier if needed */ if (debug) { for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { Node *b = *I; @@ -405,8 +455,60 @@ void IGraph::computeDominanceFrontier() { } +void IGraph::performRenamingImpl(Node *n, Node::NodeElementType &visited) { + // (TODO) see if previously visited + if (find(visited.begin(), visited.end(), n) != visited.end()) { + return; + } else { + visited.push_back(n); + } + + switch (n->getKind()) { + case NODE_PHI: + genName(n->getValue()); + n->setVersion(renamingStacks[n->getValue()].top()); + break; + case NODE_USE: + n->setVersion(renamingStacks[n->getValue()].top()); + break; + case NODE_DEF: + genName(n->getValue()); + n->setVersion(renamingStacks[n->getValue()].top()); + break; + default: + ; // do nothing + } + // + for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { + Node *node = *I; + // see if the node is a children of n in DT + if (n != node + && n->getPostOrderNumber() == node->getDom().find_first()) { + performRenamingImpl(node, visited); + } + } + if (n->getKind() == NODE_DEF) { + renamingStacks[n->getValue()].pop(); + } +} + +void IGraph::performRenaming() { + Node::NodeElementType visited; + Node *entry = this->getEntry(); + performRenamingImpl(entry, visited); +} + +void IGraph::genName(Value *v) { + int i = renamingCounters[v]; + renamingStacks[v].push(i); + renamingCounters[v] = i + 1; +} + void IGraph::dumpDOT() { - std::string Filename = "ig." + this->getName().str() + ".dot"; + static int version = 0; + stringstream ss; + ss << version++; + std::string Filename = "ig." + this->getName().str() +ss.str() + ".dot"; #if HAVE_LLVM_VER >= 35 std::error_code EC; raw_fd_ostream File(Filename.c_str(), EC, sys::fs::F_Text); diff --git a/IGraph.h b/IGraph.h index e07267e..072530a 100644 --- a/IGraph.h +++ b/IGraph.h @@ -61,7 +61,9 @@ enum NodeKind { class Node { public: typedef BitVector IDominatorTreeType; - typedef SmallVector DominanceFrontierType; + typedef SmallVector DominanceFrontierType; + typedef std::vector NodeElementType; + private: // NodeKind kind; @@ -73,17 +75,18 @@ class Node { int postOrderNumber; // - typedef std::vector NodeElementType; + // NodeElementType children; NodeElementType parents; - + // For Dominator Tree & Dominance Frontier IDominatorTreeType idom; bool domIsUndefined; DominanceFrontierType dominanceFrontier; - + NodeElementType phiNodeArgs; + public: Node(NodeKind _kind, @@ -127,7 +130,6 @@ class Node { iterator I = find(parents_begin(), parents_end(), parent); if ( I != parents_end() ) { parents.erase(I); - eraseFromChild(this); } } @@ -143,17 +145,18 @@ class Node { iterator I = find(children_begin(), children_end(), child); if ( I != children_end() ) { children.erase(I); - eraseFromParent(this); } } - Value* getValue() const { return value; } NodeKind getKind() const { return kind; } - unsigned int getVersion() const { return version; } + unsigned int getVersion() const { return version; } int getLocality() const { return locality; } + void setVersion(unsigned int _version) { version = _version; } + // For Dominator Tree & Dominance Frontier + void resetPostOrderNumber() { postOrderNumber = -1; }; void setPostOrderNumber(int _postOrderNumber) { postOrderNumber = _postOrderNumber; } int getPostOrderNumber() const { return postOrderNumber; } bool getUndefined() { return domIsUndefined; } @@ -176,39 +179,87 @@ class Node { errs () << "\n"; } - void printAsOperand(raw_ostream &o, bool) const { + void printAsOperand(raw_ostream &o, bool PrettyPrint) const { // print Node Information - switch (this->getKind()) { - case NODE_ENTRY: - o << "entry"; - break; - case NODE_DEF: + if (PrettyPrint) { + switch (this->getKind()) { + case NODE_ENTRY: + o << "entry"; + break; + case NODE_DEF: #if HAVE_LLVM_VER >= 35 - value->printAsOperand(o, false); + value->printAsOperand(o, false); #else - WriteAsOperand(o, value, false); + WriteAsOperand(o, value, false); #endif - o << "_" << this->getVersion() << " = " << this->getLocality(); - break; - case NODE_USE: - o << "... = "; + o << "_" << this->getVersion() << " = " << this->getLocality(); + break; + case NODE_USE: + o << "... = "; #if HAVE_LLVM_VER >= 35 - value->printAsOperand(o, false); + value->printAsOperand(o, false); #else - WriteAsOperand(o, value, false); + WriteAsOperand(o, value, false); #endif - o << "_" << this->getVersion(); - break; - case NODE_PHI: - o << "phi()"; - break; - case NODE_PI: - break; - default: + o << "_" << this->getVersion(); + break; + case NODE_PHI: +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(o, false); +#else + WriteAsOperand(o, value, false); +#endif + o << "_" << this->getVersion(); + + o << " = phi("; + for (const_iterator I = this->parents_begin(), + E = this->parents_end(); I != E; I++) { + Node *n = *I; + n->printAsOperand(o, false); + if (I+1 != E) { + o << ", "; + } + } + o << ")"; + break; + case NODE_PI: + break; + default: assert(0 && "Inequality Graph Node Type should not be NODE_NONE"); + } + o << "\n" << this->getPostOrderNumber(); +#ifdef DEBUG + o << "\n" << "Parents ("; + for (const_iterator I = parents_begin(), E = parents_end(); I != E; I++) { + Node *n = *I; + o << n->getPostOrderNumber(); + if (I+1 != E) { + o << ", "; + } + } + o << ")"; + o << "\n" << "Children ("; + for (const_iterator I = children_begin(), E = children_end(); I != E; I++) { + Node *n = *I; + o << n->getPostOrderNumber(); + if (I+1 != E) { + o << ", "; + } + } + o << ")"; +#endif + } else { + NodeKind kind = this->getKind(); + if (kind == NODE_DEF || kind == NODE_USE) { +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(o, false); +#else + WriteAsOperand(o, value, false); +#endif + o << "_" << this->getVersion(); + } } - o << "\n" << this->getPostOrderNumber(); } }; @@ -217,11 +268,20 @@ class IGraph { private: // the name of IGraph, which is used for DOT graph generation StringRef name; + // entry node Node* entry; + // List of nodes in IGraph typedef std::vector NodeListType; NodeListType nodes; + + // For Renaming + typedef std::stack StackType; + typedef DenseMap RenamingStacksType; + typedef DenseMap RenamingCounterType; + RenamingStacksType renamingStacks; + RenamingCounterType renamingCounters; NodeListType getRootNodes() { return nodes; } Value* getOperandIfLocalStmt(Instruction *insn); @@ -235,7 +295,13 @@ class IGraph { /* For Dominance Frontier Construction */ void computeDominanceFrontier(); + + /* For phi node insertion & renaming */ + void insertPhiNodes(); + void performRenaming(); + void performRenamingImpl(Node *, Node::NodeElementType&); + void genName(Value *v); // bool debug = true; From 2c34ba173abbe74d24cf98819fe5351a28c0333f Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Wed, 2 Mar 2016 12:52:48 -0600 Subject: [PATCH 12/24] Refactor IGraph construction --- IGraph.cpp | 426 ++++++++++++++++++++++++++++++++--------------------- IGraph.h | 292 +++++++++++++++--------------------- 2 files changed, 377 insertions(+), 341 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index ec5b359..c5ef2b4 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -38,6 +38,81 @@ using namespace std; using namespace llvm; +void Node::printAsOperandInternal(raw_ostream &o, Value* value) const { +#if HAVE_LLVM_VER >= 35 + value->printAsOperand(o, false); +#else + WriteAsOperand(o, value, false); +#endif +} + +void Node::printAsOperand(raw_ostream &o, bool PrettyPrint) const { + // print Node Information + if (PrettyPrint) { + switch (this->getKind()) { + case NODE_ENTRY: + o << "entry"; + break; + case NODE_DEF: + printAsOperandInternal(o, value); + o << "_" << this->getVersion() << " = " << this->getLocality(); + break; + case NODE_USE: + o << "... = "; + printAsOperandInternal(o, value); + o << "_" << this->getVersion(); + break; + case NODE_PHI: + printAsOperandInternal(o, value); + o << "_" << this->getVersion(); + + o << " = phi("; + for (const_iterator I = this->parents_begin(), + E = this->parents_end(); I != E; I++) { + Node *n = *I; + n->printAsOperand(o, false); + if (I+1 != E) { + o << ", "; + } + } + o << ")"; + break; + case NODE_PI: + break; + default: + assert(0 && "Inequality Graph Node Type should not be NODE_NONE"); + } + o << "\n" << this->getPostOrderNumber(); +#ifdef DEBUG + o << "\n" << "Parents ("; + for (const_iterator I = parents_begin(), E = parents_end(); I != E; I++) { + Node *n = *I; + o << n->getPostOrderNumber(); + if (I+1 != E) { + o << ", "; + } + } + o << ")"; + o << "\n" << "Children ("; + for (const_iterator I = children_begin(), E = children_end(); I != E; I++) { + Node *n = *I; + o << n->getPostOrderNumber(); + if (I+1 != E) { + o << ", "; + } + } + o << ")"; +#endif + } else { + NodeKind kind = this->getKind(); + if (kind == NODE_DEF || kind == NODE_USE) { + printAsOperandInternal(o, value); + o << "_" << this->getVersion(); + } + } + +} + Value* IGraph::getOperandIfLocalStmt(Instruction *insn) { CallInst *call = dyn_cast(insn); if (call) { @@ -57,55 +132,15 @@ Value* IGraph::getOperandIfLocalStmt(Instruction *insn) { return NULL; } -void IGraph::setPostOrderNumberWithDFSImpl(Node *node, int &number) { - for (Node::iterator I = node->children_begin(), - E = node->children_end(); - I != E; I++) { - Node *child = *I; - setPostOrderNumberWithDFSImpl(child, number); - if (child->getPostOrderNumber() == -1) { - child->setPostOrderNumber(number++); - } - } - -} - -void IGraph::setPostOrderNumberWithDFS() { - /* 1. Reset Post order number */ - for (IGraph::iterator I = this->begin(), - E = this->end(); I != E; I++) { - Node *n = *I; - n->resetPostOrderNumber(); - } - - /* 2. set post order number recursively */ - Node *entry = this->getEntry(); - entry->setPostOrderNumber(this->size() - 1); - int number = 0; - setPostOrderNumberWithDFSImpl(entry, number); - assert(this->size() - 1 == number); -} - -void IGraph::construct(Function *F, GlobalToWideInfo *info) { - - if (debug) { - errs () << "[Inequality Graph Construction for " << F->getName() << "]\n"; - } - - /* First create an entry node */ - Node *entry = new Node(NODE_ENTRY, NULL, NULL, 0, 0); - this->entry = entry; - this->addNode(entry); - +IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToWideInfo *info) { /* 1. collect addrspace 100 pointers that is used in the next step. */ /* 1. construct a set of addrspace 100 pointers. */ /* 2. construct a list of blocks that def/use the pointer. */ - SmallVector possiblyRemotePtrs; - SmallVector possiblyRemoteArgs; // analyze arguments for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I!=E; ++I) { Value *arg = I; - if (arg->getType()->isPointerTy() && arg->getType()->getPointerAddressSpace() == info->globalSpace) { + if (arg->getType()->isPointerTy() + && arg->getType()->getPointerAddressSpace() == info->globalSpace) { if (find(possiblyRemotePtrs.begin(), possiblyRemotePtrs.end(), arg) == possiblyRemotePtrs.end()) { @@ -120,12 +155,12 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { } // analyze instructions - DenseMap> NodeCandidates; + InsnToNodeMapType NodeCandidates; for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { Instruction *insn = &*II; bool needToWork = false; // - NodeKind kind = NODE_NONE; + Node::NodeKind kind = Node::NODE_NONE; Value *ptrOp = NULL; int addrspace = 100; // @@ -134,7 +169,7 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { LoadInst *load = cast(insn); if(load->getPointerAddressSpace() == info->globalSpace) { needToWork = true; - kind = NODE_USE; + kind = Node::NODE_USE; ptrOp = load->getPointerOperand(); addrspace = 100; } @@ -144,7 +179,7 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { ptrOp = getOperandIfLocalStmt(insn); if (ptrOp) { needToWork = true; - kind = NODE_DEF; + kind = Node::NODE_DEF; addrspace = 0; } } @@ -161,10 +196,11 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { NodeCandidates[insn] = std::make_tuple(kind, ptrOp, insn, addrspace); } } - - /* 2. for each pointer do the following. */ - /* */ - for (SmallVector::iterator I = possiblyRemotePtrs.begin(), + return NodeCandidates; +} + +void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { + for (PossiblyRemoteArrayType::iterator I = possiblyRemotePtrs.begin(), E = possiblyRemotePtrs.end(); I != E; I++) { Value* val = *I; if (debug) { @@ -185,12 +221,13 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { if (find(possiblyRemoteArgs.begin(), possiblyRemoteArgs.end(), val) != possiblyRemoteArgs.end()) { - Node *n = new Node(NODE_DEF, val, NULL, 0, 100); + Node *n = new Node(Node::NODE_DEF, val, NULL, 0, 100); this->addNode(n); firstNodeInBB = n; lastNodeInBB = n; if (firstOccurrence) { entry->addChild(n); + n->addParent(entry); } } } @@ -201,7 +238,7 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { // add edge if needed Instruction *insn = &*I; if (NodeCandidates.find(insn) != NodeCandidates.end()) { - std::tuple &info = NodeCandidates[insn]; + std::tuple &info = NodeCandidates[insn]; // create a new node. Node *n = new Node(std::get<0>(info), // Kind std::get<1>(info), // Value @@ -218,7 +255,7 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { // There exists a predecessor node here. // append the created node to a predecessor node. lastNodeInBB->addChild(n); - n->addParents(lastNodeInBB); + n->addParent(lastNodeInBB); } lastNodeInBB = n; } @@ -244,109 +281,50 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { Node* dstNode = std::get<0>(DstBBinfo); if (dstNode) { srcNode->addChild(dstNode); - dstNode->addParents(srcNode); + dstNode->addParent(srcNode); } } } } + + +} + +void IGraph::calculateDTandDF() { // Dominator Tree Computation - this->setPostOrderNumberWithDFS(); - this->computeDominatorTree(); + setPostOrderNumberWithDFS(); + computeDominatorTree(); // Dominator Frontier Computation - this->computeDominanceFrontier(); - // phi-insertion - SmallVector phiAddedNodes; /* set of nodes where phi is added */ - // For each addrspace(100) pointer - for (SmallVector::iterator I = possiblyRemotePtrs.begin(), - E = possiblyRemotePtrs.end(); I != E; I++) { - Value* val = *I; - SmallVector DEFSites; - // Build a set of nodes that define the current addrspace(100) pointer - for (IGraph::iterator NI = this->begin(), - NE = this->end(); NI != NE; NI++) { - Node *node = *NI; - if (node->getKind() == NODE_DEF && node->getValue() == val) { - DEFSites.push_back(node); - } - } - // For each node that defines the current addrspace(100) pointer - while (!DEFSites.empty()) { - Node* DEFNode = DEFSites[0]; - DEFSites.erase(DEFSites.begin()); - // For each dominance frontier of the current node - for (Node::df_iterator DI = DEFNode->df_begin(), - DE = DEFNode->df_end(); DI != DE; DI++) { - Node *DFofDEF = *DI; - // skip if a phi-node is already inserted - if (find(phiAddedNodes.begin(), - phiAddedNodes.end(), - DFofDEF) == phiAddedNodes.end()) { - Node *phiNode = new Node(NODE_PHI, val, NULL, 0, 0); - this->addNode(phiNode); - // inserting a new phi-node - // preserve parents and children of DFofDEF first (TODO functionalize) - Node::NodeElementType DFofDEFParents; - for (Node::iterator NI = DFofDEF->parents_begin(), - NE = DFofDEF->parents_end(); - NI != NE; NI++) { - Node *parents = *NI; - DFofDEFParents.push_back(parents); - } - for (Node::iterator NI = DFofDEFParents.begin(), - NE = DFofDEFParents.end(); NI != NE; NI++) { - // - Node *parents = *NI; - DFofDEF->eraseFromParent(parents); - parents->eraseFromChild(DFofDEF); - parents->addChild(phiNode); - phiNode->addParents(parents); - } - DFofDEF->addParents(phiNode); - phiNode->addChild(DFofDEF); - phiAddedNodes.push_back(DFofDEF); - // phi-node is also DEF node - if (find(DEFSites.begin(), - DEFSites.end(), DFofDEF) == DEFSites.end()) { - DEFSites.push_back(DFofDEF); - } - } - } + computeDominanceFrontier(); +} + +void IGraph::setPostOrderNumberWithDFSImpl(Node *node, int &number) { + for (Node::iterator I = node->children_begin(), + E = node->children_end(); + I != E; I++) { + Node *child = *I; + setPostOrderNumberWithDFSImpl(child, number); + if (child->getPostOrderNumber() == -1) { + child->setPostOrderNumber(number++); } } - - // DT computation again - // Dominator Tree Computation - if (phiAddedNodes.size() > 0) { - this->setPostOrderNumberWithDFS(); - this->computeDominatorTree(); - } - - // renaming - // initilize counters and stacks - for (SmallVector::iterator I = possiblyRemotePtrs.begin(), - E = possiblyRemotePtrs.end(); I != E; I++) { - Value* v = *I; - renamingCounters[v] = 0; - StackType st; - renamingStacks[v] = st; - } - this->performRenaming(); + } -Node* IGraph::computeIntersect(Node* b1, Node* b2) { - Node *finger1 = b1; - Node *finger2 = b2; - while (finger1->getPostOrderNumber() != finger2->getPostOrderNumber()) { - while (finger1->getPostOrderNumber() < finger2->getPostOrderNumber()) { - assert(finger1->getDom().count() == 1); - finger1 = this->getNodeByPostOrderNumber(finger1->getDom().find_first()); - } - while (finger2->getPostOrderNumber() < finger1->getPostOrderNumber()) { - assert(finger2->getDom().count() == 1); - finger2 = this->getNodeByPostOrderNumber(finger2->getDom().find_first()); - } +void IGraph::setPostOrderNumberWithDFS() { + /* 1. Reset Post order number */ + for (IGraph::iterator I = this->begin(), + E = this->end(); I != E; I++) { + Node *n = *I; + n->resetPostOrderNumber(); } - return finger1; + + /* 2. set post order number recursively */ + Node *entry = this->getEntry(); + entry->setPostOrderNumber(this->size() - 1); + int number = 0; + setPostOrderNumberWithDFSImpl(entry, number); + assert(this->size() - 1 == number); } void IGraph::computeDominatorTree() { @@ -357,7 +335,7 @@ void IGraph::computeDominatorTree() { if (node == this->getEntry()) { Node::IDominatorTreeType init(this->size(), false); init[node->getPostOrderNumber()] = true; - node->setDom(init); + node->setIDom(init); node->setUndefined(false); } else { node->setUndefined(true); @@ -395,8 +373,8 @@ void IGraph::computeDominatorTree() { new_idom[idx] = true; } } - if (b->getDom() != new_idom) { - b->setDom(new_idom); + if (b->getIDom() != new_idom) { + b->setIDom(new_idom); b->setUndefined(false); Changed = true; } @@ -406,7 +384,7 @@ void IGraph::computeDominatorTree() { for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { Node *n = *I; errs () << "IDOM(" << n->getPostOrderNumber() << ") : "; - Node::IDominatorTreeType b = n->getDom(); + Node::IDominatorTreeType b = n->getIDom(); for (int i = 0; i < b.size(); i++) { if (b[i]) { errs () << i << ", "; @@ -417,6 +395,22 @@ void IGraph::computeDominatorTree() { } } +Node* IGraph::computeIntersect(Node* b1, Node* b2) { + Node *finger1 = b1; + Node *finger2 = b2; + while (finger1->getPostOrderNumber() != finger2->getPostOrderNumber()) { + while (finger1->getPostOrderNumber() < finger2->getPostOrderNumber()) { + assert(finger1->getIDom().count() == 1); + finger1 = this->getNodeByPostOrderNumber(finger1->getIDom().find_first()); + } + while (finger2->getPostOrderNumber() < finger1->getPostOrderNumber()) { + assert(finger2->getIDom().count() == 1); + finger2 = this->getNodeByPostOrderNumber(finger2->getIDom().find_first()); + } + } + return finger1; +} + void IGraph::computeDominanceFrontier() { /* 1. Reset Dominance Frontier */ for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { @@ -431,9 +425,9 @@ void IGraph::computeDominanceFrontier() { for (Node::iterator BI = b->parents_begin(), BE = b->parents_end(); BI != BE; BI++) { Node *runner = *BI; - while (runner->getPostOrderNumber() != b->getDom().find_first()) { + while (runner->getPostOrderNumber() != b->getIDom().find_first()) { runner->addToDominanceFrontier(b); - runner = this->getNodeByPostOrderNumber(runner->getDom().find_first()); + runner = this->getNodeByPostOrderNumber(runner->getIDom().find_first()); } } } @@ -455,8 +449,75 @@ void IGraph::computeDominanceFrontier() { } +void IGraph::performPhiNodeInsertion(bool &Changed) { + SmallVector phiAddedNodes; /* set of nodes where phi is added */ + // For each addrspace(100) pointer + for (PossiblyRemoteArrayType::iterator I = possiblyRemotePtrs.begin(), + E = possiblyRemotePtrs.end(); I != E; I++) { + Value* val = *I; + SmallVector DEFSites; + // Build a set of nodes that define the current addrspace(100) pointer + for (IGraph::iterator NI = this->begin(), + NE = this->end(); NI != NE; NI++) { + Node *node = *NI; + if (node->getKind() == Node::NODE_DEF && node->getValue() == val) { + DEFSites.push_back(node); + } + } + // For each node that defines the current addrspace(100) pointer + while (!DEFSites.empty()) { + Node* DEFNode = DEFSites[0]; + DEFSites.erase(DEFSites.begin()); + // For each dominance frontier of the current node + for (Node::df_iterator DI = DEFNode->df_begin(), + DE = DEFNode->df_end(); DI != DE; DI++) { + Node *DFofDEF = *DI; + // skip if a phi-node is already inserted + if (find(phiAddedNodes.begin(), + phiAddedNodes.end(), + DFofDEF) == phiAddedNodes.end()) { + Node *phiNode = new Node(Node::NODE_PHI, val, NULL, 0, 0); + this->addNode(phiNode); + // inserting a new phi-node + // preserve parents and children of DFofDEF first (TODO functionalize) + Node::NodeElementType DFofDEFParents; + for (Node::iterator NI = DFofDEF->parents_begin(), + NE = DFofDEF->parents_end(); + NI != NE; NI++) { + Node *parents = *NI; + DFofDEFParents.push_back(parents); + } + for (Node::iterator NI = DFofDEFParents.begin(), + NE = DFofDEFParents.end(); NI != NE; NI++) { + // + Node *parents = *NI; + DFofDEF->eraseFromParent(parents); + parents->eraseFromChild(DFofDEF); + parents->addChild(phiNode); + phiNode->addParent(parents); + } + DFofDEF->addParent(phiNode); + phiNode->addChild(DFofDEF); + phiAddedNodes.push_back(DFofDEF); + // phi-node is also DEF node + if (find(DEFSites.begin(), + DEFSites.end(), DFofDEF) == DEFSites.end()) { + DEFSites.push_back(DFofDEF); + } + } + } + } + } + Changed = phiAddedNodes.size() > 0; +} + +void IGraph::generateName(Value *v) { + int i = renamingCounters[v]; + renamingStacks[v].push(i); + renamingCounters[v] = i + 1; +} + void IGraph::performRenamingImpl(Node *n, Node::NodeElementType &visited) { - // (TODO) see if previously visited if (find(visited.begin(), visited.end(), n) != visited.end()) { return; } else { @@ -464,15 +525,15 @@ void IGraph::performRenamingImpl(Node *n, Node::NodeElementType &visited) { } switch (n->getKind()) { - case NODE_PHI: - genName(n->getValue()); + case Node::NODE_PHI: + generateName(n->getValue()); n->setVersion(renamingStacks[n->getValue()].top()); break; - case NODE_USE: + case Node::NODE_USE: n->setVersion(renamingStacks[n->getValue()].top()); break; - case NODE_DEF: - genName(n->getValue()); + case Node::NODE_DEF: + generateName(n->getValue()); n->setVersion(renamingStacks[n->getValue()].top()); break; default: @@ -483,25 +544,62 @@ void IGraph::performRenamingImpl(Node *n, Node::NodeElementType &visited) { Node *node = *I; // see if the node is a children of n in DT if (n != node - && n->getPostOrderNumber() == node->getDom().find_first()) { + && n->getPostOrderNumber() == node->getIDom().find_first()) { performRenamingImpl(node, visited); } } - if (n->getKind() == NODE_DEF) { + if (n->getKind() == Node::NODE_DEF) { renamingStacks[n->getValue()].pop(); } } void IGraph::performRenaming() { + // 1. initilize counters and stacks + for (PossiblyRemoteArrayType::iterator I = possiblyRemotePtrs.begin(), + E = possiblyRemotePtrs.end(); I != E; I++) { + Value* v = *I; + renamingCounters[v] = 0; + StackType st; + renamingStacks[v] = st; + } + // 2. perform renaming Node::NodeElementType visited; Node *entry = this->getEntry(); performRenamingImpl(entry, visited); } -void IGraph::genName(Value *v) { - int i = renamingCounters[v]; - renamingStacks[v].push(i); - renamingCounters[v] = i + 1; +void IGraph::construct(Function *F, GlobalToWideInfo *info) { + + if (debug) { + errs () << "[Inequality Graph Construction for " << F->getName() << "]\n"; + } + + /* First create an entry node */ + Node *entry = new Node(Node::NODE_ENTRY, NULL, NULL, 0, 0); + this->entry = entry; + this->addNode(entry); + + /* 1. Analyze def/use of locality in the function */ + // 1-1 : collect addrspace 100 pointers + // 1-2 : create an instruction to def/use mapping + InsnToNodeMapType NodeCandidates = this->analyzeDefUseOfLocality(F, info); + + /* 2. Build an initial IGraph */ + this->buildGraph(F, NodeCandidates); + + /* 3. Build Locality SSA over IGraph for live-range splitting */ + + // 3-1. Calculate Dominator Tree and Dominance Frontier + this->calculateDTandDF(); + /* 3-2. Insert Phi-nodes using Dominance Frontier */ + bool Changed = false; + this->performPhiNodeInsertion(Changed); + /* 3-3. Compute DT again if the shape of the graph is changed */ + if (Changed) { + this->calculateDTandDF(); + } + /* 3-4. Renaming */ + this->performRenaming(); } void IGraph::dumpDOT() { diff --git a/IGraph.h b/IGraph.h index 072530a..d665244 100644 --- a/IGraph.h +++ b/IGraph.h @@ -48,47 +48,50 @@ using namespace std; using namespace llvm; -enum NodeKind { - NODE_ENTRY, - NODE_DEF, - NODE_USE, - NODE_PHI, - NODE_PI, - NODE_NONE -}; - - class Node { public: typedef BitVector IDominatorTreeType; typedef SmallVector DominanceFrontierType; typedef std::vector NodeElementType; - + + enum NodeKind { + NODE_ENTRY, + NODE_DEF, + NODE_USE, + NODE_PHI, + NODE_PI, + NODE_NONE + }; + private: - // + // Kind of this node (e.g. DEF/USE/PHI) NodeKind kind; + // Correspoing possibly-remote pointer Value* value; + // Corresponding instruction Instruction* insn; + // used in Locality-SSA (live-range splitting) unsigned int version; + // Current convention (0: definitely-local, 100: possibly-remote) int locality; - // for Dominant Tree + // For visiting nodes in post order int postOrderNumber; - - // - - - // - NodeElementType children; - NodeElementType parents; - - // For Dominator Tree & Dominance Frontier + // Immediate dominator of this node IDominatorTreeType idom; + // Used for dominator tree calculation bool domIsUndefined; + // Dominance frontiers of this node DominanceFrontierType dominanceFrontier; - NodeElementType phiNodeArgs; + + // Children and Parents of this node + NodeElementType children; + NodeElementType parents; + + // Used for showing informtion on this node + void printAsOperandInternal(raw_ostream &o, Value* value) const; public: - + // Constructor Node(NodeKind _kind, Value* _value, Instruction* _insn, @@ -103,26 +106,36 @@ class Node { domIsUndefined = true; }; + // For enumerating parents/children of this node typedef NodeElementType::iterator iterator; typedef NodeElementType::const_iterator const_iterator; - // parents + // Interface for enumerating parents iterator parents_begin() { return parents.begin(); } iterator parents_end() { return parents.end(); } const_iterator parents_begin() const { return parents.begin(); } const_iterator parents_end() const { return parents.end(); } - // children + // Interface for enumerating children iterator children_begin() { return children.begin(); } iterator children_end() { return children.end(); } const_iterator children_begin() const { return children.begin(); } const_iterator children_end() const { return children.end(); } - - void addParents(Node* parent) { + + // Getter for general node information + Value* getValue() const { return value; } + NodeKind getKind() const { return kind; } + unsigned int getVersion() const { return version; } + int getLocality() const { return locality; } + int getNumPreds() { return parents.size(); } + + // Setter for node information + void setVersion(unsigned int _version) { version = _version; } + + void addParent(Node* parent) { iterator I = find(parents_begin(), parents_end(), parent); if( I == parents_end() ){ parents.push_back(parent); - parent->addChild(this); } } @@ -137,7 +150,6 @@ class Node { iterator I = find(children_begin(), children_end(), child); if( I == children.end() ){ children.push_back(child); - child->addParents(this); } } @@ -148,133 +160,62 @@ class Node { } } - Value* getValue() const { return value; } - NodeKind getKind() const { return kind; } - unsigned int getVersion() const { return version; } - int getLocality() const { return locality; } + /* === Utility functions for Inequality Graph Construction Starts === */ - void setVersion(unsigned int _version) { version = _version; } + // For enumerating Dominance frontiers of this node + typedef DominanceFrontierType::iterator df_iterator; - // For Dominator Tree & Dominance Frontier + // Interface for enumerating Dominance frontiers of this node + df_iterator df_begin() { return dominanceFrontier.begin(); } + df_iterator df_end() { return dominanceFrontier.end(); } + + // Used for visiting nodes in post order void resetPostOrderNumber() { postOrderNumber = -1; }; void setPostOrderNumber(int _postOrderNumber) { postOrderNumber = _postOrderNumber; } int getPostOrderNumber() const { return postOrderNumber; } + // Used for calculating dominator tree bool getUndefined() { return domIsUndefined; } void setUndefined(bool flag) { domIsUndefined = flag; } - IDominatorTreeType getDom() { return idom; } - void setDom(IDominatorTreeType _idom) { idom = _idom; } + // Setter/Getter for immediate Dominator + IDominatorTreeType getIDom() { return idom; } + void setIDom(IDominatorTreeType _idom) { idom = _idom; } + // Dominance Frontier void resetDominanceFrontier() { dominanceFrontier.clear(); } void addToDominanceFrontier(Node *b) { dominanceFrontier.push_back(b); } - int getNumPreds() { return parents.size(); } + /* === Utility functions for Inequality Graph Construction Ends === */ - typedef DominanceFrontierType::iterator df_iterator; - - df_iterator df_begin() { return dominanceFrontier.begin(); } - df_iterator df_end() { return dominanceFrontier.end(); } - - // for debug + // Used for showing information on this node (e.g. when dumping in DOT format) + void printAsOperand(raw_ostream&, bool) const; + + // Used for debug void dump() const { printAsOperand(errs(), true); errs () << "\n"; - } - - void printAsOperand(raw_ostream &o, bool PrettyPrint) const { - // print Node Information - if (PrettyPrint) { - switch (this->getKind()) { - case NODE_ENTRY: - o << "entry"; - break; - case NODE_DEF: -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(o, false); -#else - WriteAsOperand(o, value, false); -#endif - o << "_" << this->getVersion() << " = " << this->getLocality(); - break; - case NODE_USE: - o << "... = "; -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(o, false); -#else - WriteAsOperand(o, value, false); -#endif - o << "_" << this->getVersion(); - break; - case NODE_PHI: -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(o, false); -#else - WriteAsOperand(o, value, false); -#endif - o << "_" << this->getVersion(); - - o << " = phi("; - for (const_iterator I = this->parents_begin(), - E = this->parents_end(); I != E; I++) { - Node *n = *I; - n->printAsOperand(o, false); - if (I+1 != E) { - o << ", "; - } - } - o << ")"; - break; - case NODE_PI: - break; - default: - assert(0 && "Inequality Graph Node Type should not be NODE_NONE"); - } - o << "\n" << this->getPostOrderNumber(); -#ifdef DEBUG - o << "\n" << "Parents ("; - for (const_iterator I = parents_begin(), E = parents_end(); I != E; I++) { - Node *n = *I; - o << n->getPostOrderNumber(); - if (I+1 != E) { - o << ", "; - } - } - o << ")"; - o << "\n" << "Children ("; - for (const_iterator I = children_begin(), E = children_end(); I != E; I++) { - Node *n = *I; - o << n->getPostOrderNumber(); - if (I+1 != E) { - o << ", "; - } - } - o << ")"; -#endif - } else { - NodeKind kind = this->getKind(); - if (kind == NODE_DEF || kind == NODE_USE) { -#if HAVE_LLVM_VER >= 35 - value->printAsOperand(o, false); -#else - WriteAsOperand(o, value, false); -#endif - o << "_" << this->getVersion(); - } - } - - } - + } }; class IGraph { private: - // the name of IGraph, which is used for DOT graph generation + // The name of IGraph, which is used for DOT graph generation StringRef name; - // entry node + // Entry node Node* entry; - // List of nodes in IGraph + // Nodes of IGraph typedef std::vector NodeListType; NodeListType nodes; + + /* === Data Structures for Inequality Graph Construction Starts === */ + + // An array of possibly remote pointers + typedef SmallVector PossiblyRemoteArrayType; + PossiblyRemoteArrayType possiblyRemotePtrs; + PossiblyRemoteArrayType possiblyRemoteArgs; + + // Used for analyzing def/use of locality + typedef DenseMap> InsnToNodeMapType; // For Renaming typedef std::stack StackType; @@ -282,48 +223,62 @@ class IGraph { typedef DenseMap RenamingCounterType; RenamingStacksType renamingStacks; RenamingCounterType renamingCounters; - - NodeListType getRootNodes() { return nodes; } + + /* === Data Structures for Inequality Graph Construction Ends === */ + + /* === Utility functions for Inequality graph construction Starts === */ + + // Add a node to the graph + void addNode(Node* n) { nodes.push_back(n); } + + // Language specific Value* getOperandIfLocalStmt(Instruction *insn); + // For Initial IGraph construction from LLVM Function + InsnToNodeMapType analyzeDefUseOfLocality(Function *, GlobalToWideInfo *); + void buildGraph(Function*, InsnToNodeMapType&); + + // For constructing Locality-SSA in IGraph + void calculateDTandDF(); void setPostOrderNumberWithDFSImpl(Node*, int&); void setPostOrderNumberWithDFS(); - - /* For Dominator Tree Construction*/ void computeDominatorTree(); Node* computeIntersect(Node*, Node*); - - /* For Dominance Frontier Construction */ - void computeDominanceFrontier(); - - - /* For phi node insertion & renaming */ - void insertPhiNodes(); + void computeDominanceFrontier(); + void performPhiNodeInsertion(bool&); void performRenaming(); void performRenamingImpl(Node *, Node::NodeElementType&); - void genName(Value *v); + void generateName(Value *v); + + /* === Utility functions for Inequality graph construction Ends === */ - // + // verbose bool debug = true; public: + // Constructor + IGraph () { name = "IGraph"; } + // Constructor with name IGraph (StringRef _name) { name = _name; } - void construct(Function *F, GlobalToWideInfo *info); - - Node* getEntry() const { return entry; } - StringRef getName() const { return name; } + // For enumerating nodes in the graph typedef NodeListType::iterator iterator; typedef NodeListType::const_iterator const_iterator; - // Iterator for enumerating nodes of IGraph. + // Interface for enumerating nodes of IGraph. iterator begin() { return nodes.begin(); } const_iterator begin() const { return nodes.begin(); } iterator end() { return nodes.end(); } const_iterator end() const { return nodes.end(); } + // Getter + Node* getEntry() const { return entry; } + StringRef getName() const { return name; } + unsigned size() const { return nodes.size(); } + Node* getNodeByValue(const Value* v) { - for (NodeListType::iterator I = nodes.begin(), E = nodes.end(); I != E; I++) { + for (NodeListType::iterator I = nodes.begin(), + E = nodes.end(); I != E; I++) { Node* tmp = *I; if (v == tmp->getValue()) { return tmp; @@ -333,7 +288,8 @@ class IGraph { } Node* getNodeByPostOrderNumber(const int number) { - for (NodeListType::iterator I = nodes.begin(), E = nodes.end(); I != E; I++) { + for (NodeListType::iterator I = nodes.begin(), + E = nodes.end(); I != E; I++) { Node* tmp = *I; if (number == tmp->getPostOrderNumber()) { return tmp; @@ -341,35 +297,18 @@ class IGraph { } return NULL; } - - void addNode(Node* n) { nodes.push_back(n); } - unsigned size() const { return nodes.size(); } - // for Debug + // Construct a inequality graph from an LLVM function + void construct(Function *F, GlobalToWideInfo *info); + + // Used for dumping IGraph in DOT format void dumpDOT(); }; +/* The followings are used for DOT Graph generation by llvm::DOTGraphTraits */ namespace llvm { // for Graph Traits - // Graph Traits requires you to provide the following - // (for more details see "llvm/ADT/GraphTraits.h") : - - // typedef NodeType - Type of Node in the graph - // typedef ChildIteratorType - Type used to iterate over children in graph - // static NodeType *getEntryNode(const GraphType &) - // Return the entry node of the graph - // static ChildIteratorType child_begin(NodeType *) - // static ChildIteratorType child_end (NodeType *) - // Return iterators that point to the beginning and ending of the child - // node list for the specified node. - // - // typedef ...iterator nodes_iterator; - // static nodes_iterator nodes_begin(GraphType *G) - // static nodes_iterator nodes_end (GraphType *G) - // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - // static unsigned size (GraphType *G) - // Return total number of nodes in the graph - /* The followings are used for DOT Graph generation by DOTGraphTraits */ + // (for more details see "llvm/ADT/GraphTraits.h") // template specialization for template<> struct GraphTraits { @@ -434,5 +373,4 @@ namespace llvm { }; } - #endif // _IGRAPH_H_ From aa841dee4879ad2822266361dbfb6be22b149862 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Wed, 2 Mar 2016 18:03:24 -0600 Subject: [PATCH 13/24] Refactor IGraph construction (the way to calculate post order number was wrong) --- IGraph.cpp | 109 ++++++++++++++++++++++++++++++++++++++--------------- IGraph.h | 4 +- 2 files changed, 80 insertions(+), 33 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index c5ef2b4..7b8965a 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -137,6 +137,9 @@ IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToW /* 1. construct a set of addrspace 100 pointers. */ /* 2. construct a list of blocks that def/use the pointer. */ // analyze arguments + if (debug) { + errs () << "\t analyzing Def/Use of Locality\n"; + } for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I!=E; ++I) { Value *arg = I; if (arg->getType()->isPointerTy() @@ -200,14 +203,19 @@ IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToW } void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { + if (debug) { + errs () << "\t buidling an initial graph\n"; + } + // Build a graph based on NodeCandidates construted in the previous phase (namely analyzeDefUseOfLocality) for (PossiblyRemoteArrayType::iterator I = possiblyRemotePtrs.begin(), E = possiblyRemotePtrs.end(); I != E; I++) { Value* val = *I; if (debug) { - errs () << "Working on :" << *val << "\n"; + errs () << "\t\tWorking on :" << *val << "\n"; } - + // to record the first and last node in BB DenseMap> BBInfo; + // bool firstOccurrence = true; // Create Intra-block edge for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { @@ -215,25 +223,30 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { // remember first and last node in BB so we can create edges between blocks. Node *firstNodeInBB = NULL; Node *lastNodeInBB = NULL; + bool nodeAdded = false; - // create node for arguments + // For the first block, create node for arguments if (BI == F->begin()) { if (find(possiblyRemoteArgs.begin(), possiblyRemoteArgs.end(), - val) != possiblyRemoteArgs.end()) { + val) != possiblyRemoteArgs.end()) { + // an argument that involoves address space 100 is DEF node Node *n = new Node(Node::NODE_DEF, val, NULL, 0, 100); this->addNode(n); + nodeAdded = true; firstNodeInBB = n; lastNodeInBB = n; if (firstOccurrence) { entry->addChild(n); n->addParent(entry); + firstOccurrence = false; } } } // For each instruction // Create a node if an instruction contains possibly-remote access + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) { // add edge if needed Instruction *insn = &*I; @@ -247,6 +260,12 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { std::get<3>(info)); // Locality (either 0 or 100) // register the created node to the Graph. this->addNode(n); + nodeAdded = true; + if (firstOccurrence) { + entry->addChild(n); + n->addParent(entry); + firstOccurrence = false; + } if (!firstNodeInBB) { // First node in the current BB. firstNodeInBB = n; @@ -259,22 +278,29 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { } lastNodeInBB = n; } + } // for each instruction + if (nodeAdded) { + BBInfo[BB] = std::make_pair(firstNodeInBB, lastNodeInBB); + } else { + Node *dummyUSENode = new Node(Node::NODE_USE, val, NULL, 0, 100); + this->addNode(dummyUSENode); + BBInfo[BB] = std::make_pair(dummyUSENode, dummyUSENode); } - // - BBInfo[BB] = std::make_pair(firstNodeInBB, lastNodeInBB); - } - - // Inter-block edge - for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { + } // for each block + + // Add inter-block edges + // For each block in Function + for (Function::iterator BI = F->begin(), + BE = F->end(); BI != BE; BI++) { + // The current BB BasicBlock* BB = BI; + // get the first and last node in this BB std::pair &SrcBBinfo = BBInfo[BB]; - const TerminatorInst *TInst = BB->getTerminator(); - // get the last node of the current BB + // get<1> : the last node in this BB Node* srcNode = std::get<1>(SrcBBinfo); - if (!srcNode) { - continue; - } - // Succ + const TerminatorInst *TInst = BB->getTerminator(); + // add edges : + // the last node in the current BB -> the first node in succesor BBs for (unsigned I = 0, NSucc = TInst->getNumSuccessors(); I < NSucc; I++) { BasicBlock *Succ = TInst->getSuccessor(I); std::pair &DstBBinfo = BBInfo[Succ]; @@ -286,48 +312,57 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { } } } - - } void IGraph::calculateDTandDF() { // Dominator Tree Computation setPostOrderNumberWithDFS(); - computeDominatorTree(); + computeDominatorTree(); // Dominator Frontier Computation computeDominanceFrontier(); } -void IGraph::setPostOrderNumberWithDFSImpl(Node *node, int &number) { +void IGraph::setPostOrderNumberWithDFSInternal(Node *node, int &number, Node::NodeElementType &visited) { + visited.push_back(node); + for (Node::iterator I = node->children_begin(), E = node->children_end(); I != E; I++) { Node *child = *I; - setPostOrderNumberWithDFSImpl(child, number); - if (child->getPostOrderNumber() == -1) { - child->setPostOrderNumber(number++); + if (find(visited.begin(), visited.end(), child) == visited.end()) { + setPostOrderNumberWithDFSInternal(child, number, visited); } } - + if (node->getPostOrderNumber() == -1) { + node->setPostOrderNumber(number++); + } } void IGraph::setPostOrderNumberWithDFS() { + if (debug) { + errs () << "\t setting post order number\n"; + } + /* 1. Reset Post order number */ for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { Node *n = *I; n->resetPostOrderNumber(); } - /* 2. set post order number recursively */ Node *entry = this->getEntry(); entry->setPostOrderNumber(this->size() - 1); int number = 0; - setPostOrderNumberWithDFSImpl(entry, number); + Node::NodeElementType visited; + setPostOrderNumberWithDFSInternal(entry, number, visited); assert(this->size() - 1 == number); } void IGraph::computeDominatorTree() { + if (debug) { + errs () << "\t computing dominator tree\n"; + } + /* initialize the domiantor array */ for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { @@ -348,10 +383,10 @@ void IGraph::computeDominatorTree() { /* in reverse postorder except entry node */ for (int i = this->size() - 1; i >= 0; i--) { Node * b = this->getNodeByPostOrderNumber(i); - errs () << "PostOrder(" << i << ")\n"; if (b == this->getEntry()) continue; /* pick one first processed predecessor */ Node::IDominatorTreeType new_idom(this->size(), false); + errs () << "PostOrder(" << i << ")\n"; Node *first_pred = NULL; for (IGraph::iterator IPRED = b->parents_begin(), EPRED = b->parents_end(); IPRED != EPRED; IPRED++) { @@ -412,6 +447,10 @@ Node* IGraph::computeIntersect(Node* b1, Node* b2) { } void IGraph::computeDominanceFrontier() { + if (debug) { + errs () << "\t computing dominance frontier\n"; + } + /* 1. Reset Dominance Frontier */ for (IGraph::iterator I = this->begin(), E = this->end(); I != E; I++) { Node *b = *I; @@ -450,6 +489,10 @@ void IGraph::computeDominanceFrontier() { } void IGraph::performPhiNodeInsertion(bool &Changed) { + if (debug) { + errs () << "\t performing phi-node insertion\n"; + } + SmallVector phiAddedNodes; /* set of nodes where phi is added */ // For each addrspace(100) pointer for (PossiblyRemoteArrayType::iterator I = possiblyRemotePtrs.begin(), @@ -484,7 +527,7 @@ void IGraph::performPhiNodeInsertion(bool &Changed) { for (Node::iterator NI = DFofDEF->parents_begin(), NE = DFofDEF->parents_end(); NI != NE; NI++) { - Node *parents = *NI; + Node *parents = *NI; DFofDEFParents.push_back(parents); } for (Node::iterator NI = DFofDEFParents.begin(), @@ -517,7 +560,7 @@ void IGraph::generateName(Value *v) { renamingCounters[v] = i + 1; } -void IGraph::performRenamingImpl(Node *n, Node::NodeElementType &visited) { +void IGraph::performRenamingInternal(Node *n, Node::NodeElementType &visited) { if (find(visited.begin(), visited.end(), n) != visited.end()) { return; } else { @@ -545,7 +588,7 @@ void IGraph::performRenamingImpl(Node *n, Node::NodeElementType &visited) { // see if the node is a children of n in DT if (n != node && n->getPostOrderNumber() == node->getIDom().find_first()) { - performRenamingImpl(node, visited); + performRenamingInternal(node, visited); } } if (n->getKind() == Node::NODE_DEF) { @@ -554,6 +597,10 @@ void IGraph::performRenamingImpl(Node *n, Node::NodeElementType &visited) { } void IGraph::performRenaming() { + if (debug) { + errs () << "\t performing renaming\n"; + } + // 1. initilize counters and stacks for (PossiblyRemoteArrayType::iterator I = possiblyRemotePtrs.begin(), E = possiblyRemotePtrs.end(); I != E; I++) { @@ -565,7 +612,7 @@ void IGraph::performRenaming() { // 2. perform renaming Node::NodeElementType visited; Node *entry = this->getEntry(); - performRenamingImpl(entry, visited); + performRenamingInternal(entry, visited); } void IGraph::construct(Function *F, GlobalToWideInfo *info) { diff --git a/IGraph.h b/IGraph.h index d665244..c9206cc 100644 --- a/IGraph.h +++ b/IGraph.h @@ -240,14 +240,14 @@ class IGraph { // For constructing Locality-SSA in IGraph void calculateDTandDF(); - void setPostOrderNumberWithDFSImpl(Node*, int&); + void setPostOrderNumberWithDFSInternal(Node*, int&, Node::NodeElementType&); void setPostOrderNumberWithDFS(); void computeDominatorTree(); Node* computeIntersect(Node*, Node*); void computeDominanceFrontier(); void performPhiNodeInsertion(bool&); void performRenaming(); - void performRenamingImpl(Node *, Node::NodeElementType&); + void performRenamingInternal(Node *, Node::NodeElementType&); void generateName(Value *v); /* === Utility functions for Inequality graph construction Ends === */ From c823e7e5bf9b22040cec1d570593d22d360f38ae Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Wed, 2 Mar 2016 21:02:29 -0600 Subject: [PATCH 14/24] Refactor IGraph construction (support store/getelementptr instruction) --- IGraph.cpp | 42 ++++++++++++++++++++++++++++++++++++------ IGraph.h | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 7b8965a..4c5fdc2 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -113,7 +113,7 @@ void Node::printAsOperand(raw_ostream &o, bool PrettyPrint) const { } -Value* IGraph::getOperandIfLocalStmt(Instruction *insn) { +std::pair IGraph::isChapelLocalStmt(Instruction *insn) { CallInst *call = dyn_cast(insn); if (call) { Function* f = call->getCalledFunction(); @@ -123,19 +123,21 @@ Value* IGraph::getOperandIfLocalStmt(Instruction *insn) { for (User *U : call->getArgOperand(0)->users()) { Value *UI = U; if (isa(*UI) || isa(*UI)) { - return call->getArgOperand(0); + return std::make_pair(true, call->getArgOperand(0)); } } } } } - return NULL; + return std::make_pair(false, (Value*)NULL); } IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToWideInfo *info) { /* 1. collect addrspace 100 pointers that is used in the next step. */ /* 1. construct a set of addrspace 100 pointers. */ /* 2. construct a list of blocks that def/use the pointer. */ + /* This part is language-specific */ + // analyze arguments if (debug) { errs () << "\t analyzing Def/Use of Locality\n"; @@ -178,15 +180,43 @@ IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToW } break; } + case Instruction::Store: { + StoreInst *store = cast(insn); + if(store->getPointerAddressSpace() == info->globalSpace) { + needToWork = true; + kind = Node::NODE_USE; + ptrOp = store->getPointerOperand(); + addrspace = 100; + } + break; + } case Instruction::Call: { - ptrOp = getOperandIfLocalStmt(insn); - if (ptrOp) { + auto local = isChapelLocalStmt(insn); + bool isLocal = local.first; + if (isLocal) { needToWork = true; kind = Node::NODE_DEF; + ptrOp = local.second; addrspace = 0; + } else { + // this function may have side effect + needToWork = true; + kind = Node::NODE_DEF; + ptrOp = NULL; + addrspace = 100; + } + break; + } + case Instruction::GetElementPtr: { + GetElementPtrInst *gep = cast(insn); + if (gep->getAddressSpace() == info->globalSpace) { + needToWork = true; + kind = Node::NODE_USE; + ptrOp = gep->getPointerOperand(); + addrspace = 100; } + break; } - // TODO: store/getelementptr insn } if (needToWork) { // collect possibly remote pointers diff --git a/IGraph.h b/IGraph.h index c9206cc..adef9f4 100644 --- a/IGraph.h +++ b/IGraph.h @@ -232,7 +232,7 @@ class IGraph { void addNode(Node* n) { nodes.push_back(n); } // Language specific - Value* getOperandIfLocalStmt(Instruction *insn); + std::pair isChapelLocalStmt(Instruction *insn); // For Initial IGraph construction from LLVM Function InsnToNodeMapType analyzeDefUseOfLocality(Function *, GlobalToWideInfo *); From a8a8ebdb6c0ee79166b231b2cccf75aa4e863e05 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Thu, 3 Mar 2016 11:02:30 -0600 Subject: [PATCH 15/24] Add mem2reg and reg2mem pass before/after locality optimization pass --- llvmLocalityOptimization.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index fd2cdaf..7b24a22 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -837,7 +837,9 @@ namespace { // Does nothing for special functions since they have no body. if( F->begin() == F->end() ) return; - // TODO : invoke mem2reg pass to introduce SSA phi node + legacy::FunctionPassManager* FPM_pre = new legacy::FunctionPassManager(&M); + FPM_pre->add(llvm::createPromoteMemoryToRegisterPass()); + FPM_pre->run(*F); // For Debug if (debugThisFn[0] && F->getName() == debugThisFn) { @@ -910,9 +912,9 @@ namespace { dumpFunction(F, "after"); } - // TODO : invoke reg2mem pass - - + legacy::FunctionPassManager *FPM_post = new legacy::FunctionPassManager(&M); + FPM_post->add(llvm::createDemoteRegisterToMemoryPass()); + FPM_post->run(*F); } /* From b57e62153502b85f2f4b27686702957d2b8b379d Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Thu, 3 Mar 2016 12:02:06 -0600 Subject: [PATCH 16/24] Remove NonLocals --- llvmLocalityOptimization.cpp | 81 +++++++----------------------------- 1 file changed, 14 insertions(+), 67 deletions(-) diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 7b24a22..7fb1b9a 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -289,7 +289,7 @@ namespace { return vn; } - bool isDefinitelyLocalAccordingToIG(Value* op, IGraph *G, std::vector &NonLocals) { + bool isDefinitelyLocalAccordingToIG(Value* op, IGraph *G) { if (fLLVMDisableIG) { return false; } @@ -515,7 +515,7 @@ namespace { return needsWork; } - void processInstruction(Instruction* targetInsn, SmallVector &deletedInsn, ValueToValueMapTy &VM, ValueTable *VN, Module &M, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl, std::vector &NonLocals) { + void processInstruction(Instruction* targetInsn, SmallVector &deletedInsn, ValueToValueMapTy &VM, ValueTable *VN, Module &M, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl) { if(debugPassInsn) { errs() << "@" << *targetInsn << "\n"; } @@ -577,7 +577,7 @@ namespace { bool needToTransform = false; // For array access // Check if the pointer is definitely local (according to inequality graph) - needToTransform |= isDefinitelyLocalAccordingToIG(oldOp, G, NonLocals); + needToTransform |= isDefinitelyLocalAccordingToIG(oldOp, G); // Check if the pointer derives from locale-local array pointer (according to GVN) needToTransform |= isDefinitelyLocalAccordingToList(oldGEP, VM, G, LocalArraysGVN, true); // Check if the pointer derives from locale-local array pointer (according to locale-local array) @@ -625,7 +625,7 @@ namespace { Instruction* newInst = NULL; // Old Operand addrspace(100)* oldOp = oldLoad->getPointerOperand(); - if (isDefinitelyLocalAccordingToIG(oldOp, G, NonLocals)) { + if (isDefinitelyLocalAccordingToIG(oldOp, G)) { newOp = findNewOpOrInsertGF(oldOp, VM, M, oldLoad); newInst = new LoadInst(newOp, "", @@ -658,7 +658,7 @@ namespace { Instruction* newInst = NULL; // Old Operand addrspace(100)* oldOp = oldStore->getPointerOperand(); - if (isDefinitelyLocalAccordingToIG(oldOp, G, NonLocals)) { + if (isDefinitelyLocalAccordingToIG(oldOp, G)) { newOp = findNewOpOrInsertGF(oldOp, VM, M, oldStore); newInst = new StoreInst(oldStore->getValueOperand(), newOp, @@ -761,7 +761,7 @@ namespace { if (srcSpace == info->globalSpace) { ValueToValueMapTy::iterator I = VM.find(oldSrc); bool renamed = I != VM.end() && I->second; - if (isDefinitelyLocalAccordingToIG(oldSrc, G, NonLocals) || renamed) { + if (isDefinitelyLocalAccordingToIG(oldSrc, G) || renamed) { newSrc = findNewOpOrInsertGF(oldSrc, VM, M, oldCall); needToTransform = true; } @@ -771,7 +771,7 @@ namespace { if (dstSpace == info->globalSpace) { ValueToValueMapTy::iterator I = VM.find(oldDst); bool renamed = I != VM.end() && I->second; - if (isDefinitelyLocalAccordingToIG(oldDst, G, NonLocals) || renamed) { + if (isDefinitelyLocalAccordingToIG(oldDst, G) || renamed) { newDst = findNewOpOrInsertGF(oldDst, VM, M, oldCall); needToTransform = true; } @@ -851,7 +851,6 @@ namespace { // Allocate LocalArrayInfo *LocalArraysGVN = new LocalArrayInfo(); LocalArrayInfo *LocalArraysDecl = new LocalArrayInfo(); - static std::vector NonLocals; // Create IGraph // Inspect all instructions and construt IGraph. Each node of IGraph contains a densemap that map that is one-to-one mapping of each operand into a specific address space (either 100 or 0). @@ -864,8 +863,8 @@ namespace { ValueTable *VN = createValueTable(F); // Input : VN, G - // Output : LocalArraysGVN, LocalArrayDecl, NonLocals - salvageChapelArrayAccess(F, VN, G, LocalArraysGVN, LocalArraysDecl, NonLocals); + // Output : LocalArraysGVN, LocalArrayDecl + salvageChapelArrayAccess(F, VN, G, LocalArraysGVN, LocalArraysDecl); // Dump analysis results if (debugThisFn[0] && F->getName() == debugThisFn) { @@ -876,14 +875,7 @@ namespace { LocalArraysGVN->dump(); errs () << "[Local Array Decl]\n"; LocalArraysDecl->dump(); - - // dump nonlocals - errs () << "[Non Locals]\n"; - for (vector::iterator I = NonLocals.begin(), E = NonLocals.end(); I != E; I++) { - Value *tmp = *I; - tmp->dump(); - } - } + } // Process each instruction // try to convert load/store/getelementptr with addrspace(100) to addrspace(0) with using IGraph @@ -891,7 +883,7 @@ namespace { ValueToValueMapTy ValueMap; for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { Instruction *insn = &*II; - processInstruction(insn, deletedInsn, ValueMap, VN, M, G, LocalArraysGVN, LocalArraysDecl, NonLocals); + processInstruction(insn, deletedInsn, ValueMap, VN, M, G, LocalArraysGVN, LocalArraysDecl); } for (unsigned int i = 0; i < deletedInsn.size(); i++) { Instruction *insn = deletedInsn[i]; @@ -904,7 +896,6 @@ namespace { // TODO delete ValueMap.clear(); deletedInsn.clear(); - NonLocals.clear(); // For Debug if (debugThisFn[0] && F->getName() == debugThisFn) { @@ -1038,7 +1029,7 @@ namespace { } - void salvageChapelArrayAccess(Function *F, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl, std::vector &NonLocals) { + void salvageChapelArrayAccess(Function *F, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl) { for (inst_iterator IS = inst_begin(F), IE = inst_end(F); IS != IE; IS++) { Instruction *targetInsn = &*IS; switch (targetInsn->getOpcode()) { @@ -1052,7 +1043,7 @@ namespace { case Instruction::Call: { // search array construction - analyzeCallInsn(targetInsn, VN, G, LocalArraysDecl, NonLocals); + analyzeCallInsn(targetInsn, VN, LocalArraysDecl); break; } default: @@ -1137,28 +1128,8 @@ namespace { } } - void markNonLocalsRecursively(Value *v, std::vector &visited, std::vector &NonLocals) { - bool notVisited = find(visited.begin(), visited.end(), v) == visited.end(); - if (isa(v) && notVisited) { - visited.push_back(v); - Instruction *insn = cast(v); - for (unsigned int i = 0; i < insn->getNumOperands(); i++) { - Value *op = insn->getOperand(i); - if (isa(op)) { - CallInst *callInsn2 = cast(op); - Function *calledFunc2 = callInsn2->getCalledFunction(); - if (calledFunc2 && calledFunc2->getName().startswith(".gf.addr")) { - Value *tmp = callInsn2->getArgOperand(0); - NonLocals.push_back(tmp); - } - } - markNonLocalsRecursively(op, visited, NonLocals); - } - } - } - // check if construct_DefaultRectangularArr is in this function - void analyzeCallInsn(Instruction *I, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArrays, std::vector &NonLocals) { + void analyzeCallInsn(Instruction *I, ValueTable *VN, LocalArrayInfo *LocalArrays) { if (isa(I)) { CallInst *callInsn1 = cast(I); Function *calledFunc1 = callInsn1->getCalledFunction(); @@ -1175,12 +1146,8 @@ namespace { if (calledFunc2 && calledFunc2->getName().startswith("_construct_DefaultRectangularArr")) { LocalArrayEntry *li = new LocalArrayEntry(I, true); LocalArrays->add(li); - } else if (calledFunc2 && calledFunc2->getName().startswith(".gf.addr")) { - NonLocals.push_back(v); } } - std::vector visited; - markNonLocalsRecursively(v, visited, NonLocals); } else if (funcName.startswith("chpl__convertRuntimeTypeToValue")) { // Value* v = callInsn1->getArgOperand(1); for (User *U : v->users()) { @@ -1324,26 +1291,6 @@ namespace { return false; } } - - bool exemptionTest(Value *op, std::vector &NonLocals) { - bool ret = false; - // Case 1 : op = call @gf.make(%x, %y) - // = @gf.addr(op) <= assuming this stmt just unpacks local addr - if (isa(op)) { - CallInst *call = cast(op); - Function *F = call->getCalledFunction(); - if (F && F->getName().startswith(".gf.make")) { - ret = true; - } - } - // Case 2 : = @gf.make(%x, %y) - vector::iterator I = find(NonLocals.begin(), NonLocals.end(), op); - if (I != NonLocals.end()) { - ret = true; - } - return ret; - } - }; } From aecf88f081dbffff47702124dbabc94a9d600cd1 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Thu, 3 Mar 2016 13:37:37 -0600 Subject: [PATCH 17/24] Remove IGraph traversal from ArrayAccess+LocalStmt+GVN --- llvmLocalityOptimization.cpp | 125 ++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 54 deletions(-) diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 7fb1b9a..791d240 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -362,12 +362,13 @@ namespace { // original GEP is supposed to be array access (GEP %shifteddata, %offset) // search array descriptor - LocalArrayEntry *li1 = LocalArrays->getEntryByValue(gepInst); + LocalArrayEntry *li1 = LocalArrays->getEntryByValue(gepInst->getPointerOperand()); if (li1) { possiblyLocal = true; local = li1; + errs () << "HOGE2\n"; } - + errs () << "HOGE\n"; // search key assuming array descriptor has already been renamed. const GetElementPtrInst *keyGep = NULL; for (ValueToValueMapTy::iterator I = VM.begin(), E = VM.end(); I != E; I++) { @@ -395,7 +396,8 @@ namespace { definitelyLocal = true; } else { // Check if this offset is local - offset = analyzeArrayAccessOffsets(oldGEP, G); + offset = analyzeArrayAccessOffsets(oldGEP); + errs () << "Offset Fire: " << offset << "\n"; if (!local || local->isLocalOffset(offset)) { definitelyLocal = true; } else { @@ -857,17 +859,17 @@ namespace { // If an instruction is enclosed by a local statement, set the locality level of each operand to 0. // Currently, we are assuming that gf.addr function calls correspond to Chapel's local statements, but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. We work on this later pass using NonLocals) IGraph *G = new IGraph(F->getName()); - G->construct(F, info); +// G->construct(F, info); // Perform a reduced version of GVN ValueTable *VN = createValueTable(F); // Input : VN, G // Output : LocalArraysGVN, LocalArrayDecl - salvageChapelArrayAccess(F, VN, G, LocalArraysGVN, LocalArraysDecl); + salvageChapelArrayAccess(F, VN, LocalArraysGVN, LocalArraysDecl); // Dump analysis results - if (debugThisFn[0] && F->getName() == debugThisFn) { +// if (debugThisFn[0] && F->getName() == debugThisFn) { VN->dump(); // For Graphviz G->dumpDOT(); @@ -875,7 +877,7 @@ namespace { LocalArraysGVN->dump(); errs () << "[Local Array Decl]\n"; LocalArraysDecl->dump(); - } +// } // Process each instruction // try to convert load/store/getelementptr with addrspace(100) to addrspace(0) with using IGraph @@ -1029,7 +1031,7 @@ namespace { } - void salvageChapelArrayAccess(Function *F, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl) { + void salvageChapelArrayAccess(Function *F, ValueTable *VN, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl) { for (inst_iterator IS = inst_begin(F), IE = inst_end(F); IS != IE; IS++) { Instruction *targetInsn = &*IS; switch (targetInsn->getOpcode()) { @@ -1037,7 +1039,7 @@ namespace { case Instruction::Store: { // search array access enclosed by local statement - analyzeLoadStoreInsn(targetInsn, F, VN, G, LocalArraysGVN); + analyzeLoadStoreInsn(targetInsn, F, VN, LocalArraysGVN); break; } case Instruction::Call: @@ -1052,7 +1054,7 @@ namespace { } } - int analyzeArrayAccessOffsets(Instruction *getOffsetGEP, IGraph *G) { + int analyzeArrayAccessOffsets(Instruction *getOffsetGEP) { int ret = -1; if (getOffsetGEP) { errs () << "Offset : \n"; @@ -1088,30 +1090,35 @@ namespace { return ret; } - void analyzeLoadStoreInsn(Instruction *I, Function *F, ValueTable *VN, IGraph *G, LocalArrayInfo *LocalArrays) { - if (isArrayAccessMemOp(I, G, info->globalSpace)) { + void analyzeLoadStoreInsn(Instruction *I, Function *F, ValueTable *VN, LocalArrayInfo *LocalArrays) { + if (isArrayAccessLoadOrStore(I, info->globalSpace)) { // for each store/load instruction that involves addrspace 100 and is supposed to be array access. if (debugPassInsn) { errs () << *I << " is supposed to be array access\n"; } - GetElementPtrInst *gep1 = findGEP08FromMemOp(I, G); + GetElementPtrInst *gep1 = findGEP08FromMemOp(I); if (gep1 == NULL) return; for (inst_iterator IS2 = inst_begin(F), IE2 = inst_end(F); IS2 != IE2; IS2++) { Instruction *I2 = &*IS2; // search load/store instruction that is supposed to be local array access. - if (I != I2 && isArrayAccessMemOp(I2, G, 0)) { - GetElementPtrInst *gep2 = findGEP08FromMemOp(I2, G); + if (I != I2 && isArrayAccessLoadOrStore(I2, 0)) { + // I is load/store addrspace(100) ptr and supposed to be array access at this point + // I2 is load/store addrspace(0) ptr and supposed to be array access at this point + GetElementPtrInst *gep2 = findGEP08FromMemOp(I2); if (gep2 == NULL) continue; if (VN->sameExpressions(gep1, gep2)) { errs () << "[GVN worked!]\n"; - errs () << "\t Array Pointer :\n"; - errs () << "\t addrspace(100) : " << *gep1 << "\n"; - errs () << "\t addrspace(0) : " << *gep2 << "\n"; + errs () << "\t Local Access :\n"; + errs () << "\t\t Load/Store w/ addrspace(0) : " << *I2 << "\n"; + errs () << "\t\t Array Ptr w/ addrspace(0) : " << *gep2 << "\n"; + errs () << "\t Possibly Remote Access :\n"; + errs () << "\t\t Load/Store w/ addrspace(100) : " << *I << "\n"; + errs () << "\t\t Array Ptr w/ addrspace(100) : " << *gep1 << "\n"; // mark Value *localArray = gep1->getPointerOperand(); LocalArrayEntry *li = LocalArrays->getEntryByValue(localArray); - // Analyze Offset - int offset = analyzeArrayAccessOffsets(dyn_cast(I2->getOperand(1)), G); + // Analyze Offset + int offset = analyzeArrayAccessOffsets(dyn_cast(I2->getOperand(1))); if (offset != -1) { // if (!li) { @@ -1189,43 +1196,44 @@ namespace { } } - void searchGEP08Inst(vector &list, vector &visited, Node *node) { - vector::iterator I = find(visited.begin(), visited.end(), node); - if (I == visited.end()) { - visited.push_back(node); + void searchGEP08Inst(vector &list, vector &visited, Instruction* I) { + // see if this Instruction is already visited + if (find(visited.begin(), visited.end(), I) != visited.end()) { + return; + } + visited.push_back(I); + if (debugPassInsn) { + errs () << "Parent Insn : " << *I <<"\n"; + } + // Check if this intruction is GEP + GetElementPtrInst *gepInst = dyn_cast(I); + if (gepInst && gepInst->getNumIndices() == 2) { if (debugPassInsn) { - errs () << "Parent Insn : " << *node->getValue() <<"\n"; + errs () << "Candidate GEP : " << *gepInst << "\n"; } - for (Node::iterator I = node->parents_begin(), E = node->parents_end(); I != E; I++) { - Node *tmp = *I; - Value *v = tmp->getValue(); - if (debugPassInsn) { - errs () << "Parent Insn : " << *v <<"\n"; + Constant *op1 = dyn_cast(gepInst->getOperand(1)); + Constant *op2 = dyn_cast(gepInst->getOperand(2)); + if (op1 != NULL && op2 != NULL + && op1->getUniqueInteger() == 0 && op2->getUniqueInteger() == 8) { + // add a candidate GEP to list + if (find(list.begin(), + list.end(), + gepInst) == list.end()) { + list.push_back(gepInst); } - GetElementPtrInst *gepInst = dyn_cast(v); - if (gepInst && gepInst->getNumIndices() == 2) { - if (debugPassInsn) { - errs () << "Candidate GEP : " << *gepInst << "\n"; - } - Constant *op1 = dyn_cast(gepInst->getOperand(1)); - Constant *op2 = dyn_cast(gepInst->getOperand(2)); - if (op1 != NULL && op2 != NULL - && op1->getUniqueInteger() == 0 && op2->getUniqueInteger() == 8) { - vector::iterator I2 = find(list.begin(), list.end(), gepInst); - if (I2 == list.end()) { - list.push_back(gepInst); - } - } else { - searchGEP08Inst(list, visited, tmp); - } - } else { - searchGEP08Inst(list, visited, tmp); + } + } else { + for (unsigned int i=0; i < I->getNumOperands(); i++) { + Value *op = I->getOperand(i); + Instruction *insn = dyn_cast(op); + if (insn) { + searchGEP08Inst(list, visited, insn); } } } } - GetElementPtrInst* findGEP08FromMemOp(Instruction *I, IGraph *G) { + GetElementPtrInst* findGEP08FromMemOp(Instruction *I) { Value *op = NULL; if (isa(I)) { StoreInst* s = cast(I); @@ -1236,12 +1244,21 @@ namespace { } else { return NULL; } + Instruction *insn; if (!isa(op)) { return NULL; + } else { + insn = cast(op); } vector list; - vector visited; - searchGEP08Inst(list, visited, G->getNodeByValue(op)); + vector visited; + for (unsigned int i = 0; i < insn->getNumOperands(); i++) { + Value *op2 = insn->getOperand(i); + Instruction *insn2 = dyn_cast(op2); + if (insn2) { + searchGEP08Inst(list, visited, insn2); + } + } if (list.size() == 0) { return NULL; } else { @@ -1271,13 +1288,13 @@ namespace { return true; } - bool isArrayAccessMemOp(Instruction *I, IGraph *G, unsigned addrSpace) { + bool isArrayAccessLoadOrStore(Instruction *I, unsigned addrSpace) { if (isa(I)) { StoreInst* s = cast(I); if (s->getPointerAddressSpace() != addrSpace) { return false; } - GetElementPtrInst* gep = findGEP08FromMemOp(s, G); + GetElementPtrInst* gep = findGEP08FromMemOp(s); return isArrayAccessGEP(gep); } else if (isa(I)) { LoadInst* l = cast(I); @@ -1285,7 +1302,7 @@ namespace { return false; } errs () << "Load + 100 : " << *I << "\n"; - GetElementPtrInst* gep = findGEP08FromMemOp(l, G); + GetElementPtrInst* gep = findGEP08FromMemOp(l); return isArrayAccessGEP(gep); } else { return false; From da53f0b45d28558ac5b1fe17c224e8e9dc86cb87 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 4 Mar 2016 00:26:55 -0600 Subject: [PATCH 18/24] Add upward locality proof using IGraph --- IGraph.cpp | 85 ++++++++++++++++++++++++++++++++++++ IGraph.h | 27 +++++++----- llvmLocalityOptimization.cpp | 45 ++++++------------- 3 files changed, 113 insertions(+), 44 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 4c5fdc2..78ddab7 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -679,6 +679,91 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { this->performRenaming(); } +Node* IGraph::getDefNode(const Node* node) const { + for (IGraph::const_iterator I = this->begin(), + E = this->end(); I != E; I++) { + Node* n = *I; + if (n->getValue() == node->getValue() + && n->getVersion() == node->getVersion() + && (n->getKind() == Node::NODE_DEF + || n->getKind() == Node::NODE_PHI)) { + return n; + } + } + return NULL; +} + +IGraph::Answer IGraph::proveUpward(const Node* node, + const int qLocality) const { + Answer answer; + + switch (node->getKind()) { + case Node::NODE_DEF: { + answer = (node->getLocality() == qLocality)? TRUE : FALSE; + break; + } + case Node::NODE_USE: { + // search + Node *def = this->getDefNode(node); + answer = (def == NULL)? UNKNOWN : proveUpward(def, qLocality); + break; + } + case Node::NODE_PHI: { + for (Node::const_iterator I = node->parents_begin(), + E = node->parents_end(); I != E; I++) { + const Node *n = *I; + if (proveUpward(n, qLocality) == TRUE) { + answer = TRUE; + } else { + answer = UNKNOWN; + } + } + break; + } + default: { + answer = UNKNOWN; + } + } + return answer; +} + + +IGraph::Answer IGraph::prove(const Value* value, + const Instruction *insn, + const int qLocality) const { + if (debug) { + errs () << "Proving (" << *value << " == " << qLocality << ")? @" << *insn << "\n"; + } + // locate corresponding node + const Node* target = NULL; + for (IGraph::const_iterator I = this->begin(), + E = this->end(); I != E; I++) { + const Node* node = *I; + if (node->getValue() == value + && node->getInstruction() == insn) { + target = node; + } + } + + Answer answer; + if (!target) { + answer = UNKNOWN; + } else { + if (target->getLocality() == qLocality) { + answer = TRUE; + } else { + // upward search + answer = proveUpward(target, qLocality); + } + } + if (debug) { + if (answer == TRUE) { errs () << "\tAnswer:TRUE\n"; } + if (answer == FALSE) { errs () << "\tAnswer:FALSE\n"; } + if (answer == UNKNOWN) { errs () << "\tAnswer:UNKNOWN\n"; } + } + return answer; +} + void IGraph::dumpDOT() { static int version = 0; stringstream ss; diff --git a/IGraph.h b/IGraph.h index adef9f4..9185a6b 100644 --- a/IGraph.h +++ b/IGraph.h @@ -124,6 +124,7 @@ class Node { // Getter for general node information Value* getValue() const { return value; } + Instruction* getInstruction() const { return insn; } NodeKind getKind() const { return kind; } unsigned int getVersion() const { return version; } int getLocality() const { return locality; } @@ -276,17 +277,6 @@ class IGraph { StringRef getName() const { return name; } unsigned size() const { return nodes.size(); } - Node* getNodeByValue(const Value* v) { - for (NodeListType::iterator I = nodes.begin(), - E = nodes.end(); I != E; I++) { - Node* tmp = *I; - if (v == tmp->getValue()) { - return tmp; - } - } - return NULL; - } - Node* getNodeByPostOrderNumber(const int number) { for (NodeListType::iterator I = nodes.begin(), E = nodes.end(); I != E; I++) { @@ -299,7 +289,20 @@ class IGraph { } // Construct a inequality graph from an LLVM function - void construct(Function *F, GlobalToWideInfo *info); + void construct(Function *F, GlobalToWideInfo *info); + + // + Node* getDefNode(const Node*) const; + + enum Answer { + TRUE, + FALSE, + UNKNOWN + }; + + // + Answer prove(const Value*, const Instruction *, const int) const; + Answer proveUpward(const Node*, const int) const; // Used for dumping IGraph in DOT format void dumpDOT(); diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 791d240..d732c89 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -289,36 +289,16 @@ namespace { return vn; } - bool isDefinitelyLocalAccordingToIG(Value* op, IGraph *G) { + bool isDefinitelyLocalAccordingToIG(Value* op, Instruction *insn, IGraph *G) { if (fLLVMDisableIG) { return false; } - bool definitelyLocal = false; -#if 0 - bool exempt = false; - int ll = info->globalSpace; - // find smallest possible locality level - for (vector::iterator I = G->begin(), E = G->end(); I != E; I++) { - Node* n = *I; - int lltmp = n->getLL(op); - if (lltmp != -1 && lltmp < ll) { - ll = lltmp; - } - } - exempt = exemptionTest(op, NonLocals); - if (ll == 0 && !exempt) { - definitelyLocal = true; - NumLocalizedByIG++; - if (debugPassInsn) { - errs () << *op << " is definitely local\n"; - } - } else if (ll == 0 && exempt) { - if (debugPassInsn) { - errs () << *op << " is exempted\n"; - } + IGraph::Answer answer = G->prove(op, insn, 0); + if (answer == IGraph::TRUE) { + return true; + } else { + return false; } -#endif - return definitelyLocal; } // Assuming op is operand of GEP inst (e.g. getelementptr inbounds i64, i64 addrspace(100)* op) @@ -579,7 +559,7 @@ namespace { bool needToTransform = false; // For array access // Check if the pointer is definitely local (according to inequality graph) - needToTransform |= isDefinitelyLocalAccordingToIG(oldOp, G); + needToTransform |= isDefinitelyLocalAccordingToIG(oldOp, targetInsn, G); // Check if the pointer derives from locale-local array pointer (according to GVN) needToTransform |= isDefinitelyLocalAccordingToList(oldGEP, VM, G, LocalArraysGVN, true); // Check if the pointer derives from locale-local array pointer (according to locale-local array) @@ -627,7 +607,7 @@ namespace { Instruction* newInst = NULL; // Old Operand addrspace(100)* oldOp = oldLoad->getPointerOperand(); - if (isDefinitelyLocalAccordingToIG(oldOp, G)) { + if (isDefinitelyLocalAccordingToIG(oldOp, targetInsn, G)) { newOp = findNewOpOrInsertGF(oldOp, VM, M, oldLoad); newInst = new LoadInst(newOp, "", @@ -660,7 +640,7 @@ namespace { Instruction* newInst = NULL; // Old Operand addrspace(100)* oldOp = oldStore->getPointerOperand(); - if (isDefinitelyLocalAccordingToIG(oldOp, G)) { + if (isDefinitelyLocalAccordingToIG(oldOp, targetInsn, G)) { newOp = findNewOpOrInsertGF(oldOp, VM, M, oldStore); newInst = new StoreInst(oldStore->getValueOperand(), newOp, @@ -763,7 +743,7 @@ namespace { if (srcSpace == info->globalSpace) { ValueToValueMapTy::iterator I = VM.find(oldSrc); bool renamed = I != VM.end() && I->second; - if (isDefinitelyLocalAccordingToIG(oldSrc, G) || renamed) { + if (isDefinitelyLocalAccordingToIG(oldSrc, targetInsn, G) || renamed) { newSrc = findNewOpOrInsertGF(oldSrc, VM, M, oldCall); needToTransform = true; } @@ -773,7 +753,7 @@ namespace { if (dstSpace == info->globalSpace) { ValueToValueMapTy::iterator I = VM.find(oldDst); bool renamed = I != VM.end() && I->second; - if (isDefinitelyLocalAccordingToIG(oldDst, G) || renamed) { + if (isDefinitelyLocalAccordingToIG(oldDst, targetInsn, G) || renamed) { newDst = findNewOpOrInsertGF(oldDst, VM, M, oldCall); needToTransform = true; } @@ -859,7 +839,7 @@ namespace { // If an instruction is enclosed by a local statement, set the locality level of each operand to 0. // Currently, we are assuming that gf.addr function calls correspond to Chapel's local statements, but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. We work on this later pass using NonLocals) IGraph *G = new IGraph(F->getName()); -// G->construct(F, info); + G->construct(F, info); // Perform a reduced version of GVN ValueTable *VN = createValueTable(F); @@ -1118,6 +1098,7 @@ namespace { Value *localArray = gep1->getPointerOperand(); LocalArrayEntry *li = LocalArrays->getEntryByValue(localArray); // Analyze Offset + // FIXME: this is only for store instruction! (I2->getOperand(0) if it's a load instruction) int offset = analyzeArrayAccessOffsets(dyn_cast(I2->getOperand(1))); if (offset != -1) { // From a4d353260fc2cf1f6958d8f13974c624a0439c56 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 4 Mar 2016 13:32:57 -0600 Subject: [PATCH 19/24] Refactor IGraph Code --- IGraph.cpp | 39 +++++++++++++++++++++++------------- llvmLocalityOptimization.cpp | 13 ++---------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 78ddab7..68e7d5a 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -200,10 +200,17 @@ IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToW addrspace = 0; } else { // this function may have side effect - needToWork = true; - kind = Node::NODE_DEF; - ptrOp = NULL; - addrspace = 100; + CallInst *call = cast(insn); + for (unsigned int i = 0; i < call->getNumArgOperands(); i++) { + Value *op = call->getArgOperand(i); + if (op->getType()->isPointerTy() + && op->getType()->getPointerAddressSpace() == info->globalSpace) { + needToWork = true; + kind = Node::NODE_DEF; + ptrOp = op; + addrspace = 100; + } + } } break; } @@ -280,7 +287,8 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) { // add edge if needed Instruction *insn = &*I; - if (NodeCandidates.find(insn) != NodeCandidates.end()) { + if ((NodeCandidates.find(insn) != NodeCandidates.end()) + && std::get<1>(NodeCandidates[insn]) == val) { std::tuple &info = NodeCandidates[insn]; // create a new node. Node *n = new Node(std::get<0>(info), // Kind @@ -416,7 +424,6 @@ void IGraph::computeDominatorTree() { if (b == this->getEntry()) continue; /* pick one first processed predecessor */ Node::IDominatorTreeType new_idom(this->size(), false); - errs () << "PostOrder(" << i << ")\n"; Node *first_pred = NULL; for (IGraph::iterator IPRED = b->parents_begin(), EPRED = b->parents_end(); IPRED != EPRED; IPRED++) { @@ -603,7 +610,11 @@ void IGraph::performRenamingInternal(Node *n, Node::NodeElementType &visited) { n->setVersion(renamingStacks[n->getValue()].top()); break; case Node::NODE_USE: - n->setVersion(renamingStacks[n->getValue()].top()); + if (!renamingStacks[n->getValue()].empty()) { + n->setVersion(renamingStacks[n->getValue()].top()); + } else { + n->setVersion(0); + } break; case Node::NODE_DEF: generateName(n->getValue()); @@ -618,6 +629,7 @@ void IGraph::performRenamingInternal(Node *n, Node::NodeElementType &visited) { // see if the node is a children of n in DT if (n != node && n->getPostOrderNumber() == node->getIDom().find_first()) { + errs () << node->getPostOrderNumber() << " is dominated by " << n->getPostOrderNumber() << "\n"; performRenamingInternal(node, visited); } } @@ -670,11 +682,11 @@ void IGraph::construct(Function *F, GlobalToWideInfo *info) { this->calculateDTandDF(); /* 3-2. Insert Phi-nodes using Dominance Frontier */ bool Changed = false; - this->performPhiNodeInsertion(Changed); + this->performPhiNodeInsertion(Changed); /* 3-3. Compute DT again if the shape of the graph is changed */ if (Changed) { this->calculateDTandDF(); - } + } /* 3-4. Renaming */ this->performRenaming(); } @@ -711,11 +723,10 @@ IGraph::Answer IGraph::proveUpward(const Node* node, case Node::NODE_PHI: { for (Node::const_iterator I = node->parents_begin(), E = node->parents_end(); I != E; I++) { - const Node *n = *I; - if (proveUpward(n, qLocality) == TRUE) { - answer = TRUE; - } else { - answer = UNKNOWN; + const Node *n = *I; + answer = proveUpward(n, qLocality); + if (answer != TRUE) { + break; } } break; diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index d732c89..0df4b1f 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -295,6 +295,7 @@ namespace { } IGraph::Answer answer = G->prove(op, insn, 0); if (answer == IGraph::TRUE) { + NumLocalizedByIG++; return true; } else { return false; @@ -346,9 +347,7 @@ namespace { if (li1) { possiblyLocal = true; local = li1; - errs () << "HOGE2\n"; } - errs () << "HOGE\n"; // search key assuming array descriptor has already been renamed. const GetElementPtrInst *keyGep = NULL; for (ValueToValueMapTy::iterator I = VM.begin(), E = VM.end(); I != E; I++) { @@ -818,10 +817,6 @@ namespace { // Don't do anything if there is no body. // Does nothing for special functions since they have no body. if( F->begin() == F->end() ) return; - - legacy::FunctionPassManager* FPM_pre = new legacy::FunctionPassManager(&M); - FPM_pre->add(llvm::createPromoteMemoryToRegisterPass()); - FPM_pre->run(*F); // For Debug if (debugThisFn[0] && F->getName() == debugThisFn) { @@ -884,11 +879,7 @@ namespace { // generate F->getName().before.ll dumpFunction(F, "after"); } - - legacy::FunctionPassManager *FPM_post = new legacy::FunctionPassManager(&M); - FPM_post->add(llvm::createDemoteRegisterToMemoryPass()); - FPM_post->run(*F); - } + } /* Locality Optimization Pass: From 7fab37a42b81f2891a2bd00966c54035a5040e1b Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 4 Mar 2016 16:15:03 -0600 Subject: [PATCH 20/24] Skip local array detection if there is any branch instructions --- llvmLocalityOptimization.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 0df4b1f..3280659 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -844,7 +844,7 @@ namespace { salvageChapelArrayAccess(F, VN, LocalArraysGVN, LocalArraysDecl); // Dump analysis results -// if (debugThisFn[0] && F->getName() == debugThisFn) { + if (debugThisFn[0] && F->getName() == debugThisFn) { VN->dump(); // For Graphviz G->dumpDOT(); @@ -852,7 +852,7 @@ namespace { LocalArraysGVN->dump(); errs () << "[Local Array Decl]\n"; LocalArraysDecl->dump(); -// } + } // Process each instruction // try to convert load/store/getelementptr with addrspace(100) to addrspace(0) with using IGraph @@ -1003,6 +1003,20 @@ namespace { void salvageChapelArrayAccess(Function *F, ValueTable *VN, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl) { + /* Skip this if there is a branch instruction. */ + /* (TODO) Integrate salvageChapelArrayAccess into IGraph later*/ + for (inst_iterator IS = inst_begin(F), IE = inst_end(F); IS != IE; IS++) { + Instruction *targetInsn = &*IS; + TerminatorInst *branch = dyn_cast(targetInsn); + if (branch) { + ReturnInst *RI = dyn_cast(targetInsn); + if (!RI) { + branch->dump(); + return; + } + } + } + for (inst_iterator IS = inst_begin(F), IE = inst_end(F); IS != IE; IS++) { Instruction *targetInsn = &*IS; switch (targetInsn->getOpcode()) { From 59f7e5aad1ef39f938da75a61633c2a6a6ad4a6c Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 4 Mar 2016 16:16:03 -0600 Subject: [PATCH 21/24] add test cases where there is a conditional branch --- test/local.ll | 126 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 7 deletions(-) diff --git a/test/local.ll b/test/local.ll index 7022093..86b6acd 100644 --- a/test/local.ll +++ b/test/local.ll @@ -42,7 +42,7 @@ entry: %0 = call i64* @.gf.addr.1(i64 addrspace(100)* %x) %1 = load i64, i64* %0 ; CHECK: call i64* @.gf.addr. -; CHECK; load i64, i64* +; CHECK: load i64, i64* ; CHECK: add i64 ; CHECK: ret i64 %2 = load i64, i64 addrspace(100)* %x @@ -105,15 +105,12 @@ define internal fastcc void @localizeByGVN(%chpl_DefaultRectangularArr_int64_t_1 ; CHECK: @localizeByGVN( ; ) entry: -; CHECK: call %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* @.gf.addr. -; CHECK: getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* %0 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %A.val, i64 0, i32 3, i64 0 -; CHECK: load i64, i64* %1 = load i64, i64 addrspace(100)* %0 -; CHECK: call %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object* @.gf.addr. %2 = getelementptr inbounds %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object, %chpl_DefaultRectangularArr_int64_t_1_int64_t_F_object addrspace(100)* %A.val, i64 0, i32 8 -; CHECK: load i64 addrspace(100)*, i64 addrspace(100)** - %3 = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %2 + %3 = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %2 +; CHECK: call i64* @.gf.addr. +; CHECK: getelementptr inbounds i64, i64* %4 = getelementptr inbounds i64, i64 addrspace(100)* %3, i64 %1 ; CHECK: store i64 1, i64* store i64 1, i64 addrspace(100)* %4 @@ -131,3 +128,118 @@ entry: store i64 3, i64 addrspace(100)* %14 ret void } + + +@Q = internal global i64 0 +; proc habanero(ref x) : int { +; var p: int = 1; +; if (Q == 1) { +; local { p = x; } +; } +; return p + x; // might be non-local +; } + +define internal fastcc i64 @localizeUnderCondition1(i64 addrspace(100)* %x.val) { +; CHECK: @localizeUnderCondition1( +; ) +entry: + %0 = load i64, i64* @Q + %1 = icmp eq i64 %0, 1 + br i1 %1, label %habanero_5blk_body_, label %habanero_3cond_end_ + +habanero_5blk_body_: ; preds = %entry + %2 = tail call i64* @.gf.addr.1(i64 addrspace(100)* %x.val) + %3 = load i64, i64* %2 + br label %habanero_3cond_end_ + +habanero_3cond_end_: ; preds = %habanero_5blk_body_, %entry + %p.0 = phi i64 [ %3, %habanero_5blk_body_ ], [ 1, %entry ] +; CHECK: load i64, i64 addrspace(100)* %x.val + %4 = load i64, i64 addrspace(100)* %x.val + %5 = add i64 %4, %p.0 + ret i64 %5 +} + +; var p: int = 1; +; if (Q == 1) { +; local { p = x; } +; p = x; // definitely-local +; } else { +; p = x; // possibly-remote +; } +; return p + x; // possibly remote +; } + +define internal fastcc i64 @localizeUnderCondition2(i64 addrspace(100)* %x.val) { +; CHECK: @localizeUnderCondition2( +; ) +entry: + %0 = load i64, i64* @Q + %1 = icmp eq i64 %0, 1 + br i1 %1, label %habanero_5blk_body_, label %habanero_6blk_body_ + +habanero_5blk_body_: ; preds = %entry + %2 = tail call i64* @.gf.addr.1(i64 addrspace(100)* %x.val) + %3 = load i64, i64* %2 +; CHECK: call i64* @.gf.addr. +; CHECK: load i64, i64* + %.pre = load i64, i64 addrspace(100)* %x.val + br label %habanero_3cond_end_ + +habanero_6blk_body_: ; preds = %entry +; CHECK: load i64, i64 addrspace(100)* %x.val + %4 = load i64, i64 addrspace(100)* %x.val + br label %habanero_3cond_end_ + +habanero_3cond_end_: ; preds = %habanero_6blk_body_, %habanero_5blk_body_ + %5 = phi i64 [ %.pre, %habanero_5blk_body_ ], [ %4, %habanero_6blk_body_ ] + %p.0 = phi i64 [ %3, %habanero_5blk_body_ ], [ %4, %habanero_6blk_body_ ] + %6 = add i64 %5, %p.0 + ret i64 %6 +} + +; proc habanero(ref x) : int { +; var p: int = 1; +; if (Q == 1) { +; local { p = x; } +; } else if (Q == 2) { +; local { p = x; } +; } else { +; local { p = x; } +; } +; return p + x; // definitely-local +; } + +define internal fastcc i64 @localizeUnderCondition3(i64 addrspace(100)* %x.val) { +; CHECK: @localizeUnderCondition3( +; ) +entry: + %0 = load i64, i64* @Q + switch i64 %0, label %habanero_10blk_body_ [ + i64 1, label %habanero_5blk_body_ + i64 2, label %habanero_8blk_body_ + ] + +habanero_5blk_body_: ; preds = %entry + %1 = tail call i64* @.gf.addr.1(i64 addrspace(100)* %x.val) + %2 = load i64, i64* %1 + br label %habanero_3cond_end_ + +habanero_8blk_body_: ; preds = %entry + %q = tail call i64* @.gf.addr.1(i64 addrspace(100)* %x.val) + %3 = load i64, i64* %q + br label %habanero_3cond_end_ + +habanero_10blk_body_: ; preds = %entry + %4 = tail call i64* @.gf.addr.1(i64 addrspace(100)* %x.val) + %5 = load i64, i64* %4 + br label %habanero_3cond_end_ + +habanero_3cond_end_: ; preds = %habanero_8blk_body_, %habanero_10blk_body_, %habanero_5blk_body_ + %p.0 = phi i64 [ %2, %habanero_5blk_body_ ], [ %3, %habanero_8blk_body_ ], [ %5, %habanero_10blk_body_ ] +; CHECK: call i64* @.gf.addr. +; CHECK: load i64, i64* + %6 = load i64, i64 addrspace(100)* %x.val + %7 = add i64 %6, %p.0 + ret i64 %7 +} From 82cb889fade11f818376727be276c83cd344257a Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 4 Mar 2016 16:27:10 -0600 Subject: [PATCH 22/24] add comments --- llvmLocalityOptimization.cpp | 74 +++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index 3280659..a69375a 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -138,14 +138,15 @@ namespace { static const char* debugThisFn = "habanero"; // For Chapel Compiler & Breakdown + // These flags will be used from Chapel compiler to turn on/off each optimizer static const bool fLLVMDisableIG = false; static const bool fLLVMDisableDecl = false; static const bool fLLVMDisableGVN = false; static const bool fLLVMLocalityOpt = true; // Statistics - STATISTIC(NumLocalizedByIG, "Number of localized operations by IG"); - STATISTIC(NumLocalizedByGVN, "Number of localized operations by GVN"); + STATISTIC(NumLocalizedByIG, "Number of localized operations by Inequality Graph "); + STATISTIC(NumLocalizedByGVN, "Number of localized array operations by Global value numbering"); STATISTIC(NumLocalizedByArrayDecl, "Number of localized operations by Locale-local Array"); struct LocalityOpt : public ModulePass { @@ -164,46 +165,70 @@ namespace { LocalityOpt() : ModulePass(ID), info(NULL), layoutAfterwards("") { } - + + // This class is used to remember locale-local array class LocalArrayEntry { private: + // A pointer to Chapel Array Value *op; + // whether the whole array is local or not bool whole; + // To remember ChapelArray(offset) is definitely-local vector localOffsets; + public: + // Constructor LocalArrayEntry(Value* _op, bool _whole) : op(_op), whole(_whole) {} + + // mark the speficied offset is definitely-local void addLocalOffset(unsigned int offs) { - for (vector::iterator I = localOffsets.begin(), E = localOffsets.end(); I != E; I++) { + for (vector::iterator I = localOffsets.begin(), + E = localOffsets.end(); I != E; I++) { if (offs == *I) { return; } } localOffsets.push_back(offs); } - Value* getOp() { return op; } - void dumpLocalOffsets() { - for (vector::iterator I = localOffsets.begin(), E = localOffsets.end(); I != E; I++) { - errs () << *I << ", "; - } - errs () << "\n"; - } + + // Getter + Value* getOp() const { return op; } + bool isWholeLocal() const { return whole; } + + // return true if the specified Array(offset) is definitely-local bool isLocalOffset(int offset) { if (std::find(localOffsets.begin(), localOffsets.end(), offset) != localOffsets.end()) { return true; } return false; - } - bool isWholeLocal() { return whole; } + } + + // For Debug + void dumpLocalOffsets() { + for (vector::iterator I = localOffsets.begin(), + E = localOffsets.end(); I != E; I++) { + errs () << *I << ", "; + } + errs () << "\n"; + } }; + // Collection of LocalArrayEntry defined above class LocalArrayInfo { private: + // a list of local array entry vector list; public: + // Constructor LocalArrayInfo() {} + + // Add local entry to current collection void add(LocalArrayEntry *li) { list.push_back(li); } + + // Take a pointer to ChapelArray and return the corresponding entry if exists LocalArrayEntry* getEntryByValue(const Value *op) { - for (vector::iterator I = list.begin(), E = list.end(); I != E; I++) { + for (vector::iterator I = list.begin(), + E = list.end(); I != E; I++) { LocalArrayEntry *li = *I; if (li->getOp() == op) { return li; @@ -211,9 +236,12 @@ namespace { } return NULL; } + + // For Debugging void dump() { errs () << "[Local Array Info Start]\n"; - for (vector::iterator I = list.begin(), E = list.end(); I != E; I++) { + for (vector::iterator I = list.begin(), + E = list.end(); I != E; I++) { LocalArrayEntry *li = *I; errs () << *(li->getOp()) << "\n"; errs () << "Definitely Local Offset : "; @@ -252,22 +280,6 @@ namespace { errs() << "\n"; } #endif - // For Debugging purpose - void insertPrintf(Module &M, Instruction *insertBefore, StringRef Str) { - // Global Value - Constant *StrConstant = ConstantDataArray::getString(M.getContext(), Str); - GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::PrivateLinkage, StrConstant); - GV->setUnnamedAddr(true); - // GEP - Value *zero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - Value *gepArgs[] = { zero, zero }; - Instruction *gepInst = GetElementPtrInst::CreateInBounds(GV, gepArgs, "", insertBefore); - // Printf - Constant *putsFunc = M.getOrInsertFunction("puts", Type::getInt32Ty(M.getContext()), Type::getInt8PtrTy(M.getContext()), NULL); - Value* printfArgs[1]; - printfArgs[0] = gepInst; - CallInst::Create(putsFunc, printfArgs, "", insertBefore); - } bool isaGlobalPointer(GlobalToWideInfo* info, Type* type) { PointerType* pt = dyn_cast(type); From 2c510f1dc0f30798e0ed9ccfedede6e8ddec468e Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 4 Mar 2016 17:04:04 -0600 Subject: [PATCH 23/24] Wrap lines to 80 characters --- IGraph.cpp | 80 +++-- IGraph.h | 38 ++- llvmLocalityOptimization.cpp | 546 +++++++++++++++++++++++++---------- 3 files changed, 468 insertions(+), 196 deletions(-) diff --git a/IGraph.cpp b/IGraph.cpp index 68e7d5a..5de9976 100644 --- a/IGraph.cpp +++ b/IGraph.cpp @@ -85,7 +85,8 @@ void Node::printAsOperand(raw_ostream &o, bool PrettyPrint) const { o << "\n" << this->getPostOrderNumber(); #ifdef DEBUG o << "\n" << "Parents ("; - for (const_iterator I = parents_begin(), E = parents_end(); I != E; I++) { + for (const_iterator I = parents_begin(), E = parents_end(); I != E; +I++) { Node *n = *I; o << n->getPostOrderNumber(); if (I+1 != E) { @@ -94,7 +95,8 @@ void Node::printAsOperand(raw_ostream &o, bool PrettyPrint) const { } o << ")"; o << "\n" << "Children ("; - for (const_iterator I = children_begin(), E = children_end(); I != E; I++) { + for (const_iterator I = children_begin(), E = children_end(); I != E; +I++) { Node *n = *I; o << n->getPostOrderNumber(); if (I+1 != E) { @@ -118,7 +120,8 @@ std::pair IGraph::isChapelLocalStmt(Instruction *insn) { if (call) { Function* f = call->getCalledFunction(); if (f != NULL) { - // calling @.gf.addr and then doing load and store => local statement + // calling @.gf.addr and then doing load + // and store => local statement if (f->getName().startswith(".gf.addr")) { for (User *U : call->getArgOperand(0)->users()) { Value *UI = U; @@ -132,7 +135,8 @@ std::pair IGraph::isChapelLocalStmt(Instruction *insn) { return std::make_pair(false, (Value*)NULL); } -IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToWideInfo *info) { +IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, +GlobalToWideInfo *info) { /* 1. collect addrspace 100 pointers that is used in the next step. */ /* 1. construct a set of addrspace 100 pointers. */ /* 2. construct a list of blocks that def/use the pointer. */ @@ -142,7 +146,8 @@ IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToW if (debug) { errs () << "\t analyzing Def/Use of Locality\n"; } - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I!=E; ++I) { + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I!=E; +++I) { Value *arg = I; if (arg->getType()->isPointerTy() && arg->getType()->getPointerAddressSpace() == info->globalSpace) { @@ -204,7 +209,8 @@ IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToW for (unsigned int i = 0; i < call->getNumArgOperands(); i++) { Value *op = call->getArgOperand(i); if (op->getType()->isPointerTy() - && op->getType()->getPointerAddressSpace() == info->globalSpace) { + && op->getType()->getPointerAddressSpace() == +info->globalSpace) { needToWork = true; kind = Node::NODE_DEF; ptrOp = op; @@ -233,7 +239,8 @@ IGraph::InsnToNodeMapType IGraph::analyzeDefUseOfLocality(Function *F, GlobalToW possiblyRemotePtrs.push_back(ptrOp); } // store detailed information used in node construction. - NodeCandidates[insn] = std::make_tuple(kind, ptrOp, insn, addrspace); + NodeCandidates[insn] = std::make_tuple(kind, ptrOp, insn, +addrspace); } } return NodeCandidates; @@ -243,7 +250,8 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { if (debug) { errs () << "\t buidling an initial graph\n"; } - // Build a graph based on NodeCandidates construted in the previous phase (namely analyzeDefUseOfLocality) + // Build a graph based on NodeCandidates construted in the previous phase + // (namely analyzeDefUseOfLocality) for (PossiblyRemoteArrayType::iterator I = possiblyRemotePtrs.begin(), E = possiblyRemotePtrs.end(); I != E; I++) { Value* val = *I; @@ -255,9 +263,11 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { // bool firstOccurrence = true; // Create Intra-block edge - for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; BI++) { + for (Function::iterator BI = F->begin(), + BE = F->end(); BI != BE; BI++) { BasicBlock* BB = BI; - // remember first and last node in BB so we can create edges between blocks. + // remember first and last node in BB so we can create edges + // between blocks. Node *firstNodeInBB = NULL; Node *lastNodeInBB = NULL; bool nodeAdded = false; @@ -284,18 +294,24 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { // For each instruction // Create a node if an instruction contains possibly-remote access - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) { + for (BasicBlock::iterator I = BB->begin(), + E = BB->end(); I != E; I++) { // add edge if needed Instruction *insn = &*I; if ((NodeCandidates.find(insn) != NodeCandidates.end()) && std::get<1>(NodeCandidates[insn]) == val) { - std::tuple &info = NodeCandidates[insn]; + std::tuple &info = NodeCandidates[insn]; // create a new node. Node *n = new Node(std::get<0>(info), // Kind std::get<1>(info), // Value std::get<2>(info), // Insn - 0, // Version (0 for now) - std::get<3>(info)); // Locality (either 0 or 100) + 0, + // Version (0 for now, set later) + std::get<3>(info)); + // Locality (either 0 or 100) // register the created node to the Graph. this->addNode(n); nodeAdded = true; @@ -320,7 +336,8 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { if (nodeAdded) { BBInfo[BB] = std::make_pair(firstNodeInBB, lastNodeInBB); } else { - Node *dummyUSENode = new Node(Node::NODE_USE, val, NULL, 0, 100); + Node *dummyUSENode = new Node(Node::NODE_USE, val, NULL, 0, +100); this->addNode(dummyUSENode); BBInfo[BB] = std::make_pair(dummyUSENode, dummyUSENode); } @@ -339,7 +356,8 @@ void IGraph::buildGraph(Function *F, InsnToNodeMapType &NodeCandidates) { const TerminatorInst *TInst = BB->getTerminator(); // add edges : // the last node in the current BB -> the first node in succesor BBs - for (unsigned I = 0, NSucc = TInst->getNumSuccessors(); I < NSucc; I++) { + for (unsigned I = 0, NSucc = TInst->getNumSuccessors(); I < NSucc; +I++) { BasicBlock *Succ = TInst->getSuccessor(I); std::pair &DstBBinfo = BBInfo[Succ]; Node* dstNode = std::get<0>(DstBBinfo); @@ -360,7 +378,8 @@ void IGraph::calculateDTandDF() { computeDominanceFrontier(); } -void IGraph::setPostOrderNumberWithDFSInternal(Node *node, int &number, Node::NodeElementType &visited) { +void IGraph::setPostOrderNumberWithDFSInternal(Node *node, int &number, +Node::NodeElementType &visited) { visited.push_back(node); for (Node::iterator I = node->children_begin(), @@ -441,7 +460,8 @@ void IGraph::computeDominatorTree() { if (p == first_pred) continue; if (!p->getUndefined()) { new_idom.reset(); - int idx = computeIntersect(p, first_pred)->getPostOrderNumber(); + int idx = computeIntersect(p, +first_pred)->getPostOrderNumber(); new_idom[idx] = true; } } @@ -473,11 +493,13 @@ Node* IGraph::computeIntersect(Node* b1, Node* b2) { while (finger1->getPostOrderNumber() != finger2->getPostOrderNumber()) { while (finger1->getPostOrderNumber() < finger2->getPostOrderNumber()) { assert(finger1->getIDom().count() == 1); - finger1 = this->getNodeByPostOrderNumber(finger1->getIDom().find_first()); + finger1 = +this->getNodeByPostOrderNumber(finger1->getIDom().find_first()); } while (finger2->getPostOrderNumber() < finger1->getPostOrderNumber()) { assert(finger2->getIDom().count() == 1); - finger2 = this->getNodeByPostOrderNumber(finger2->getIDom().find_first()); + finger2 = +this->getNodeByPostOrderNumber(finger2->getIDom().find_first()); } } return finger1; @@ -501,9 +523,11 @@ void IGraph::computeDominanceFrontier() { for (Node::iterator BI = b->parents_begin(), BE = b->parents_end(); BI != BE; BI++) { Node *runner = *BI; - while (runner->getPostOrderNumber() != b->getIDom().find_first()) { + while (runner->getPostOrderNumber() != +b->getIDom().find_first()) { runner->addToDominanceFrontier(b); - runner = this->getNodeByPostOrderNumber(runner->getIDom().find_first()); + runner = +this->getNodeByPostOrderNumber(runner->getIDom().find_first()); } } } @@ -559,7 +583,7 @@ void IGraph::performPhiNodeInsertion(bool &Changed) { Node *phiNode = new Node(Node::NODE_PHI, val, NULL, 0, 0); this->addNode(phiNode); // inserting a new phi-node - // preserve parents and children of DFofDEF first (TODO functionalize) + // preserve parents and children of DFofDEF first Node::NodeElementType DFofDEFParents; for (Node::iterator NI = DFofDEF->parents_begin(), NE = DFofDEF->parents_end(); @@ -629,7 +653,9 @@ void IGraph::performRenamingInternal(Node *n, Node::NodeElementType &visited) { // see if the node is a children of n in DT if (n != node && n->getPostOrderNumber() == node->getIDom().find_first()) { - errs () << node->getPostOrderNumber() << " is dominated by " << n->getPostOrderNumber() << "\n"; + errs () << node->getPostOrderNumber() + << " is dominated by " + << n->getPostOrderNumber() << "\n"; performRenamingInternal(node, visited); } } @@ -660,7 +686,8 @@ void IGraph::performRenaming() { void IGraph::construct(Function *F, GlobalToWideInfo *info) { if (debug) { - errs () << "[Inequality Graph Construction for " << F->getName() << "]\n"; + errs () << "[Inequality Graph Construction for " + << F->getName() << "]\n"; } /* First create an entry node */ @@ -743,7 +770,8 @@ IGraph::Answer IGraph::prove(const Value* value, const Instruction *insn, const int qLocality) const { if (debug) { - errs () << "Proving (" << *value << " == " << qLocality << ")? @" << *insn << "\n"; + errs () << "Proving (" << *value << " == " << qLocality << ")? @" + << *insn << "\n"; } // locate corresponding node const Node* target = NULL; diff --git a/IGraph.h b/IGraph.h index 9185a6b..518c8e0 100644 --- a/IGraph.h +++ b/IGraph.h @@ -172,7 +172,8 @@ class Node { // Used for visiting nodes in post order void resetPostOrderNumber() { postOrderNumber = -1; }; - void setPostOrderNumber(int _postOrderNumber) { postOrderNumber = _postOrderNumber; } + void setPostOrderNumber(int _postOrderNumber) { + postOrderNumber = _postOrderNumber; } int getPostOrderNumber() const { return postOrderNumber; } // Used for calculating dominator tree bool getUndefined() { return domIsUndefined; } @@ -186,7 +187,8 @@ class Node { /* === Utility functions for Inequality Graph Construction Ends === */ - // Used for showing information on this node (e.g. when dumping in DOT format) + // Used for showing information on this node + // (e.g. when dumping in DOT format) void printAsOperand(raw_ostream&, bool) const; // Used for debug @@ -216,7 +218,8 @@ class IGraph { PossiblyRemoteArrayType possiblyRemoteArgs; // Used for analyzing def/use of locality - typedef DenseMap> InsnToNodeMapType; + typedef DenseMap> InsnToNodeMapType; // For Renaming typedef std::stack StackType; @@ -319,23 +322,36 @@ namespace llvm { typedef NodeType::const_iterator ChildIteratorType; static NodeType *getEntryNode(const Node *node) { return node; } - static inline ChildIteratorType child_begin(const NodeType *N) { return N->children_begin(); } - static inline ChildIteratorType child_end(const NodeType *N) { return N->children_end(); } + static inline ChildIteratorType child_begin(const NodeType *N) { + return N->children_begin(); + } + static inline ChildIteratorType child_end(const NodeType *N) { + return N->children_end(); + } }; // template specialization for - template<> struct GraphTraits : public GraphTraits { - static NodeType *getEntryNode(const IGraph *G) { return G->getEntry(); } + template<> struct GraphTraits : + public GraphTraits { + static NodeType *getEntryNode(const IGraph *G) { + return G->getEntry(); + } typedef IGraph::const_iterator nodes_iterator; - static nodes_iterator nodes_begin(const IGraph *G) { return G->begin(); } - static nodes_iterator nodes_end(const IGraph *G) { return G->end(); } + static nodes_iterator nodes_begin(const IGraph *G) { + return G->begin(); + } + static nodes_iterator nodes_end(const IGraph *G) { + return G->end(); + } static unsigned size(const IGraph *G) { return G->size(); } }; // template specialization for for Writing DOTGraph - template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + template<> struct DOTGraphTraits : public + DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) + {} static std::string getGraphName(const IGraph* G) { return "Inequality Graph for '" + G->getName().str(); diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index a69375a..c48a71d 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -47,7 +47,8 @@ // the locality of A(1) is "definitely-local", // but the pass leave A(2) "possibly-remote" since there is no enough // information about the locality of A(2). -// This is done by using a reduced version of the LLVM's global value numbering +// This is done by using a reduced version of the LLVM's global value +// numbering // pass (in ValueTable.[h|cpp]) and a array offset analysis. // // - Case 3. locale locale array declaration @@ -55,41 +56,55 @@ // var A: [1..10] int; // return A(5); // } -// The locality of A(5) is "definitely-local" since an array A is declared in this scope. +// The locality of A(5) is "definitely-local" since an array A is declared in +// this scope. // Note that this pass is not element-sensitve so far. // // Limitation, TODOs and future work: // (Limitation) Locality Inference using SSA Value Graph with if statements: -// The current implementation does not propagate a condition even if a local statement is enclosed by if statement. +// The current implementation does not propagate a condition even if a +// local statement is enclosed by if statement. // Hence, we may fail to infer the locality in some cases. // (e.g. if (condition) { local{ p = x } }) // // (Limitation) Chapel's local statement detection: -// Currently, we are assuming that gf.addr function calls correspond to Chapel's local statements, -// but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. -// To avoid this problem, we have an std::vector named "NonLocals" to record a retun value of gf.addr -// which is also an argument of gf.make and the NonLocals are referred when doing "exemptionTest". -// This may not be always true. Ideally, a PGAS-LLVM frontend should tell the locality optimization pass +// Currently, we are assuming that gf.addr function calls correspond to +// Chapel's local statements, +// but this is not always true because gf.addr is also used to extract a +// local pointer from a wide pointer. +// To avoid this problem, we have an std::vector named "NonLocals" to +// record a retun value of gf.addr +// which is also an argument of gf.make and the NonLocals are referred when +// doing "exemptionTest". +// This may not be always true. Ideally, a PGAS-LLVM frontend should tell +// the locality optimization pass // which gf.addr call is a local statement. // // Example : -// 1. call i64* @.gf.addr.1(i64 addrspace(100)* %x) // %x is definitely local -// 2. %y = call i64* @.gf.addr.1(i64 addrspace(100)* %x) // might not be definitely local +// 1. call i64* @.gf.addr.1(i64 addrspace(100)* %x) +// %x is definitely local +// 2. %y = call i64* @.gf.addr.1(i64 addrspace(100)* %x) +// might not be definitely local // call i64 addrspace(100)* @.gf.make.1(..., %y) // // (Limitation) Chapel's Array Declaration detection: -// We basically look for chpl__convertRuntimeTypeToValue to detect Chapel's array declaration. -// This pattern matching completely depends on how PGAS-LLVM frontend emits LLVM IR. +// We basically look for chpl__convertRuntimeTypeToValue to detect Chapel's +// array declaration. +// This pattern matching completely depends on how PGAS-LLVM frontend emits +// LLVM IR. // Please see analyzeCallInsn for more details. // // (Limitation) Intra-procedural pass: // Unfortunately, the current implementation is not inter-procedural. // // (Future Work) The utilization of high-level information: -// The locality optimization pass has to recover high-level information such as +// The locality optimization pass has to recover high-level information +// such as // array accesses and local statements from low-level LLVM IR, but ideally, -// PGAS-LLVM frontend are supposed to add annotations to keep these information -// so the locality optimization can perform language-agnostic PGAS optimization. +// PGAS-LLVM frontend are supposed to add annotations to keep these +// information +// so the locality optimization can perform language-agnostic PGAS +// optimization. // //===----------------------------------------------------------------------===// @@ -138,16 +153,20 @@ namespace { static const char* debugThisFn = "habanero"; // For Chapel Compiler & Breakdown - // These flags will be used from Chapel compiler to turn on/off each optimizer + // These flags will be used from Chapel compiler to turn on/off each + // optimizer static const bool fLLVMDisableIG = false; static const bool fLLVMDisableDecl = false; static const bool fLLVMDisableGVN = false; static const bool fLLVMLocalityOpt = true; // Statistics - STATISTIC(NumLocalizedByIG, "Number of localized operations by Inequality Graph "); - STATISTIC(NumLocalizedByGVN, "Number of localized array operations by Global value numbering"); - STATISTIC(NumLocalizedByArrayDecl, "Number of localized operations by Locale-local Array"); + STATISTIC(NumLocalizedByIG, + "Number of localized operations by Inequality Graph "); + STATISTIC(NumLocalizedByGVN, + "Number of localized array operations by Global value numbering"); + STATISTIC(NumLocalizedByArrayDecl, + "Number of localized operations by Locale-local Array"); struct LocalityOpt : public ModulePass { @@ -197,7 +216,9 @@ namespace { // return true if the specified Array(offset) is definitely-local bool isLocalOffset(int offset) { - if (std::find(localOffsets.begin(), localOffsets.end(), offset) != localOffsets.end()) { + if (std::find(localOffsets.begin(), + localOffsets.end(), offset) + != localOffsets.end()) { return true; } return false; @@ -225,7 +246,8 @@ namespace { // Add local entry to current collection void add(LocalArrayEntry *li) { list.push_back(li); } - // Take a pointer to ChapelArray and return the corresponding entry if exists + // Take a pointer to ChapelArray and + // return the corresponding entry if exists LocalArrayEntry* getEntryByValue(const Value *op) { for (vector::iterator I = list.begin(), E = list.end(); I != E; I++) { @@ -262,7 +284,8 @@ namespace { raw_fd_ostream File(Filename.c_str(), EC, sys::fs::F_Text); if (EC) { - errs() << "Dump Function : error: "<< EC.message() << "\n"; + errs() << "Dump Function : error: "<< EC.message() << "\n"; + } else { File << *F; } @@ -294,14 +317,17 @@ namespace { ValueTable* createValueTable(Function *F) { ValueTable *vn = new ValueTable(); - for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { + for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; +++II) { Instruction *insn = &*II; createValueTableInsn(vn, insn); } return vn; } - bool isDefinitelyLocalAccordingToIG(Value* op, Instruction *insn, IGraph *G) { + bool isDefinitelyLocalAccordingToIG(Value* op, + Instruction *insn, + IGraph *G) { if (fLLVMDisableIG) { return false; } @@ -314,9 +340,15 @@ namespace { } } - // Assuming op is operand of GEP inst (e.g. getelementptr inbounds i64, i64 addrspace(100)* op) - // find array access and localize it if array descriptor is definitely local according to GVN info - bool isDefinitelyLocalAccordingToList(GetElementPtrInst* oldGEP, ValueToValueMapTy &VM, IGraph *G, LocalArrayInfo *LocalArrays, bool isGVN) { + // Assuming op is operand of GEP inst + // (e.g. getelementptr inbounds i64, i64 addrspace(100)* op) + // find array access and localize it if array descriptor is definitely + // local according to GVN info + bool isDefinitelyLocalAccordingToList(GetElementPtrInst* oldGEP, + ValueToValueMapTy &VM, + IGraph *G, + LocalArrayInfo *LocalArrays, + bool isGVN) { if (isGVN && fLLVMDisableGVN) { return false; @@ -331,7 +363,8 @@ namespace { } LocalArrayEntry *local = LocalArrays->getEntryByValue(op); - // First Step : See if this GEP access is array access. If so, see if a pointer to array is in LocalArrayInfo + // First Step : See if this GEP access is array access. + // If so, see if a pointer to array is in LocalArrayInfo bool possiblyLocal = false; bool definitelyLocal = false; int offset = -1; @@ -341,28 +374,36 @@ namespace { possiblyLocal = true; } - // Case 2 : this GEP is obaining a pointer to array element. (GEP %op, %offset) - // Searching (GEP array, 0, 8) and see if array is in local LocalArray + // Case 2 : this GEP is obaining a pointer to array element. + // (GEP %op, %offset) + // Searching (GEP array, 0, 8) and + // see if array is in local LocalArray if (isa(op)) { LoadInst *loadInst = cast(op); - GetElementPtrInst *gepInst = dyn_cast(loadInst->getPointerOperand()); + GetElementPtrInst *gepInst = + dyn_cast(loadInst->getPointerOperand()); if (gepInst && gepInst->getNumIndices() == 2) { Constant *op1 = dyn_cast(gepInst->getOperand(1)); Constant *op2 = dyn_cast(gepInst->getOperand(2)); if (op1 != NULL && op2 != NULL - && op1->getUniqueInteger() == 0 && op2->getUniqueInteger() == 8) { + && op1->getUniqueInteger() == 0 + && op2->getUniqueInteger() == 8) { // $shifteddata = GEP %arraydesciptor, 0 , 8 - // original GEP is supposed to be array access (GEP %shifteddata, %offset) - + // original GEP is supposed to be array access + // (GEP %shifteddata, %offset) // search array descriptor - LocalArrayEntry *li1 = LocalArrays->getEntryByValue(gepInst->getPointerOperand()); + LocalArrayEntry *li1 = + LocalArrays->getEntryByValue( + gepInst->getPointerOperand()); if (li1) { possiblyLocal = true; local = li1; } - // search key assuming array descriptor has already been renamed. + // search key array descriptor has + // already been renamed. const GetElementPtrInst *keyGep = NULL; - for (ValueToValueMapTy::iterator I = VM.begin(), E = VM.end(); I != E; I++) { + for (ValueToValueMapTy::iterator I = VM.begin(), + E = VM.end(); I != E; I++) { if (I->second == gepInst) { keyGep = cast(I->first); break; @@ -371,7 +412,8 @@ namespace { if (keyGep != NULL) { // this GEP is definitely array offset calculation const Value* v = keyGep->getPointerOperand(); - LocalArrayEntry *li2 = LocalArrays->getEntryByValue(v); + LocalArrayEntry *li2 = + LocalArrays->getEntryByValue(v); if (li2) { possiblyLocal = true; local = li2; @@ -409,7 +451,9 @@ namespace { return definitelyLocal; } - Value* findNewOpOrInsertGF(Value *oldOp, ValueToValueMapTy &VM, Module &M, Instruction *insertBefore) { + Value* findNewOpOrInsertGF(Value *oldOp, + ValueToValueMapTy &VM, + Module &M, Instruction *insertBefore) { Value *tmpOp, *newOp; // check mapping ValueToValueMapTy::iterator I = VM.find(oldOp); @@ -419,7 +463,8 @@ namespace { tmpOp = oldOp; } Type* t = tmpOp->getType(); - if (t->isPointerTy() && t->getPointerAddressSpace() == info->globalSpace) { + if (t->isPointerTy() + && t->getPointerAddressSpace() == info->globalSpace) { // create gf.addr. PointerType *addrType = cast(oldOp->getType()); assert(addrType != NULL); @@ -432,7 +477,8 @@ namespace { Function* fn = getAddrFn(&M, info, addrType); Value* gf_addr_args[1]; gf_addr_args[0] = tmpOp; - newOp = CallInst::Create(fn, gf_addr_args, "", insertBefore); + newOp = CallInst::Create(fn, gf_addr_args, "", + insertBefore); if (debugPassInsn) { errs() << "GF Inserted : " << *newOp << "\n"; } @@ -455,7 +501,8 @@ namespace { } if (InvokeInst *II = dyn_cast(oldCall)) { - newCall = InvokeInst::Create(newF, II->getNormalDest(), II->getUnwindDest(), + newCall = InvokeInst::Create(newF, II->getNormalDest(), + II->getUnwindDest(), args, "", oldCall); cast(newCall)->setCallingConv(CS.getCallingConv()); cast(newCall)->setAttributes(CallPAL); @@ -469,7 +516,8 @@ namespace { if (MDNode *tbaa = oldCall->getMetadata(LLVMContext::MD_tbaa)) { newCall->setMetadata(LLVMContext::MD_tbaa, tbaa); } - if (MDNode *tbaaStruct = oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { + if (MDNode *tbaaStruct = + oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { newCall->setMetadata(LLVMContext::MD_tbaa_struct, tbaaStruct); } return newCall; @@ -487,7 +535,8 @@ namespace { if (insn->getOpcode() != Instruction::Call) { for(unsigned int i=0; i < insn->getNumOperands(); i++) { Value *old = insn->getOperand(i); - if( isaGlobalPointer(info, old->getType()) ) needsWork = true; + if( isaGlobalPointer(info, old->getType()) ) + needsWork = true; } if( isaGlobalPointer(info, insn->getType()) ) needsWork = true; } else { @@ -497,8 +546,10 @@ namespace { if (isa(call) && isa(call)) { Value* gDst = call->getArgOperand(0); Value* gSrc = call->getArgOperand(1); - if (gDst->getType()->getPointerAddressSpace() == info->globalSpace - || gSrc->getType()->getPointerAddressSpace() == info->globalSpace) { + if (gDst->getType()->getPointerAddressSpace() + == info->globalSpace + || gSrc->getType()->getPointerAddressSpace() + == info->globalSpace) { needsWork = true; } } else if (F->getName().startswith(".gf.addr")) { @@ -508,7 +559,14 @@ namespace { return needsWork; } - void processInstruction(Instruction* targetInsn, SmallVector &deletedInsn, ValueToValueMapTy &VM, ValueTable *VN, Module &M, IGraph *G, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl) { + void processInstruction(Instruction* targetInsn, + SmallVector &deletedInsn, + ValueToValueMapTy &VM, + ValueTable *VN, + Module &M, + IGraph *G, + LocalArrayInfo *LocalArraysGVN, + LocalArrayInfo *LocalArraysDecl) { if(debugPassInsn) { errs() << "@" << *targetInsn << "\n"; } @@ -522,7 +580,10 @@ namespace { } switch(targetInsn->getOpcode()) { - case Instruction::PHI: { /* TODO : Consider PHI Node */ break; } + case Instruction::PHI: { + /* TODO : Consider PHI Node if needed */ + break; + } case Instruction::BitCast: { CastInst *oldCast = cast(targetInsn); Value* op = oldCast->getOperand(0); @@ -535,12 +596,22 @@ namespace { Type* oldSrcTy = oldCast->getSrcTy(); Type* newSrcTy = newOp->getType(); Type* oldDstTy = oldCast->getDestTy(); - assert(oldSrcTy->isPointerTy() && newSrcTy->isPointerTy() && oldDstTy->isPointerTy()); + assert(oldSrcTy->isPointerTy() + && newSrcTy->isPointerTy() + && oldDstTy->isPointerTy()); bool srcIsWide = newSrcTy->getPointerAddressSpace() == 0; - bool dstIsGlobal = oldDstTy->getPointerAddressSpace() == info->globalSpace; + bool dstIsGlobal = oldDstTy->getPointerAddressSpace() + == info->globalSpace; if (srcIsWide && dstIsGlobal) { - Type* newDstTy = convertTypeGlobalToWide(&M, info, oldDstTy); - Instruction* newInst = CastInst::Create(oldCast->getOpcode(), newOp, newDstTy, "", oldCast); + Type* newDstTy = convertTypeGlobalToWide(&M, + info, + oldDstTy); + Instruction* newInst = CastInst::Create( + oldCast->getOpcode(), + newOp, + newDstTy, + "", + oldCast); if (debugPassInsn) { errs() << "Old Instruction : " << *oldCast << "\n"; errs() << "New Instruction : " << *newInst << "\n"; @@ -548,9 +619,12 @@ namespace { VM[oldCast] = newInst; deletedInsn.push_back(oldCast); } else { - RemapInstruction(targetInsn, VM, RF_IgnoreMissingEntries); + RemapInstruction(targetInsn, + VM, + RF_IgnoreMissingEntries); if (debugPassInsn) { - errs() << "New Instruction : " << *targetInsn << "\n"; + errs() << "New Instruction : " + << *targetInsn << "\n"; } } } else { @@ -561,7 +635,8 @@ namespace { break; } case Instruction::GetElementPtr: { - GetElementPtrInst *oldGEP = cast(targetInsn); + GetElementPtrInst *oldGEP + = cast(targetInsn); if (oldGEP->getAddressSpace() == info->globalSpace) { Value *oldOp, *newOp; Instruction* newInst = NULL; @@ -569,32 +644,63 @@ namespace { oldOp = oldGEP->getPointerOperand(); bool needToTransform = false; // For array access - // Check if the pointer is definitely local (according to inequality graph) - needToTransform |= isDefinitelyLocalAccordingToIG(oldOp, targetInsn, G); - // Check if the pointer derives from locale-local array pointer (according to GVN) - needToTransform |= isDefinitelyLocalAccordingToList(oldGEP, VM, G, LocalArraysGVN, true); - // Check if the pointer derives from locale-local array pointer (according to locale-local array) - needToTransform |= isDefinitelyLocalAccordingToList(oldGEP, VM, G, LocalArraysDecl, false); - // + // Check if the pointer is definitely local + // (according to inequality graph) + needToTransform |= + isDefinitelyLocalAccordingToIG(oldOp, targetInsn, G); + // Check if the pointer derives from locale-local array + // pointer (according to GVN) + needToTransform |= + isDefinitelyLocalAccordingToList(oldGEP, + VM, + G, + LocalArraysGVN, + true); + // Check if the pointer derives from locale-local array + // pointer (according to locale-local array) + needToTransform |= + isDefinitelyLocalAccordingToList(oldGEP, + VM, + G, + LocalArraysDecl, + false); + ValueToValueMapTy::iterator I = VM.find(oldOp); needToTransform |= (I != VM.end() && I->second); if (needToTransform) { + // localize newOp = findNewOpOrInsertGF(oldOp, VM, M, oldGEP); // creating new GEP std::vector args; - for (User::op_iterator OI = oldGEP->idx_begin(), OE = oldGEP->idx_end(); OI != OE; OI++) { + for (User::op_iterator OI = oldGEP->idx_begin(), + OE = oldGEP->idx_end(); + OI != OE; OI++) { args.push_back(*OI); } ArrayRef argsRef(args); // Create new GEP bool inBounds = oldGEP->isInBounds(); if (inBounds) { - newInst = GetElementPtrInst::CreateInBounds(newOp, argsRef, oldGEP->getName(), oldGEP); + newInst = + GetElementPtrInst::CreateInBounds(newOp, + argsRef, + oldGEP-> + getName(), + oldGEP); } else { #if HAVE_LLVM_VER >= 35 - newInst = GetElementPtrInst::Create(newOp->getType(), newOp, argsRef, oldGEP->getName(), oldGEP); + newInst = + GetElementPtrInst::Create(newOp->getType(), + newOp, + argsRef, + oldGEP->getName(), +oldGEP); #else - newInst = GetElementPtrInst::Create(newOp, argsRef, oldGEP->getName(), oldGEP); + newInst = GetElementPtrInst::Create(newOp, + argsRef, + oldGEP + ->getName(), + oldGEP); #endif } if (debugPassInsn) { @@ -627,7 +733,8 @@ namespace { oldLoad->getOrdering(), oldLoad->getSynchScope(), oldLoad); - if (MDNode *tbaa = oldLoad->getMetadata(LLVMContext::MD_tbaa)) { + if (MDNode *tbaa = + oldLoad->getMetadata(LLVMContext::MD_tbaa)) { newInst->setMetadata(LLVMContext::MD_tbaa, tbaa); } if (debugPassInsn) { @@ -660,7 +767,8 @@ namespace { oldStore->getOrdering(), oldStore->getSynchScope(), oldStore); - if (MDNode *tbaa = oldStore->getMetadata(LLVMContext::MD_tbaa)) { + if (MDNode *tbaa = + oldStore->getMetadata(LLVMContext::MD_tbaa)) { newInst->setMetadata(LLVMContext::MD_tbaa, tbaa); } if (debugPassInsn) { @@ -675,7 +783,8 @@ namespace { } case Instruction::Call: { CallInst *oldCall = cast(targetInsn); - Function* oldF = oldCall->getCalledFunction(); // null if indirect + Function* oldF = oldCall->getCalledFunction(); + // null if indirect assert(oldF != NULL); if (oldF->getName().startswith(".gf.addr")) { Value *op = oldCall->getArgOperand(0); @@ -695,7 +804,8 @@ namespace { break; } else if (isa(oldCall)) { if (isa(oldCall)) { - Value *oldDst = oldCall->getArgOperand(0); + Value *oldDst = oldCall->getArgOperand(0); + ValueToValueMapTy::iterator I = VM.find(oldDst); if (I != VM.end() && I->second) { Value* newDst = I->second; @@ -713,22 +823,40 @@ namespace { args[3] = oldCall->getArgOperand(3); args[4] = oldCall->getArgOperand(4); - Function* memSetF = Intrinsic::getDeclaration(&M, Intrinsic::memset, types); - Instruction* newCall = CallInst::Create(memSetF, args, "", oldCall); - cast(newCall)->setCallingConv(CS.getCallingConv()); + Function* memSetF = + Intrinsic::getDeclaration(&M, + Intrinsic::memset, + types); + Instruction* newCall = + CallInst::Create(memSetF, + args, + "", + oldCall); + + cast(newCall) + ->setCallingConv(CS.getCallingConv()); cast(newCall)->setAttributes(CallPAL); if (cast(oldCall)->isTailCall()) { cast(newCall)->setTailCall(); } - if (MDNode *tbaa = oldCall->getMetadata(LLVMContext::MD_tbaa)) { - newCall->setMetadata(LLVMContext::MD_tbaa, tbaa); + if (MDNode *tbaa = + oldCall->getMetadata(LLVMContext::MD_tbaa)) { + newCall->setMetadata(LLVMContext::MD_tbaa, + tbaa); } - if (MDNode *tbaaStruct = oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { - newCall->setMetadata(LLVMContext::MD_tbaa_struct, tbaaStruct); + if (MDNode *tbaaStruct = + oldCall + ->getMetadata(LLVMContext::MD_tbaa_struct)) { + + newCall + ->setMetadata(LLVMContext::MD_tbaa_struct, + tbaaStruct); } if (debugPassInsn) { - errs () << "MemSet Old Instruction : " << *oldCall << "\n"; - errs () << "New Instruction : " << *newCall << "\n"; + errs () << "MemSet Old Instruction : " + << *oldCall << "\n"; + errs () << "New Instruction : " << *newCall + << "\n"; } VM[oldCall] = newCall; deletedInsn.push_back(oldCall); @@ -737,15 +865,18 @@ namespace { } break; } - assert(isa(oldCall) || isa (oldCall)); + assert(isa(oldCall) + || isa (oldCall)); Value *newDst, *newSrc; Value* oldDst = oldCall->getArgOperand(0); Value* oldSrc = oldCall->getArgOperand(1); bool needToTransform = false; - unsigned dstSpace = oldDst->getType()->getPointerAddressSpace(); - unsigned srcSpace = oldSrc->getType()->getPointerAddressSpace(); - + unsigned dstSpace = + oldDst->getType()->getPointerAddressSpace(); + unsigned srcSpace = + oldSrc->getType()->getPointerAddressSpace(); + CallSite CS(oldCall); const AttributeSet &CallPAL = CS.getAttributes(); Type *types[3]; @@ -754,8 +885,14 @@ namespace { if (srcSpace == info->globalSpace) { ValueToValueMapTy::iterator I = VM.find(oldSrc); bool renamed = I != VM.end() && I->second; - if (isDefinitelyLocalAccordingToIG(oldSrc, targetInsn, G) || renamed) { - newSrc = findNewOpOrInsertGF(oldSrc, VM, M, oldCall); + if (isDefinitelyLocalAccordingToIG(oldSrc, + targetInsn, + G) + || renamed) { + newSrc = findNewOpOrInsertGF(oldSrc, + VM, + M, + oldCall); needToTransform = true; } } else { @@ -764,8 +901,13 @@ namespace { if (dstSpace == info->globalSpace) { ValueToValueMapTy::iterator I = VM.find(oldDst); bool renamed = I != VM.end() && I->second; - if (isDefinitelyLocalAccordingToIG(oldDst, targetInsn, G) || renamed) { - newDst = findNewOpOrInsertGF(oldDst, VM, M, oldCall); + if (isDefinitelyLocalAccordingToIG(oldDst, + targetInsn, + G) || renamed) { + newDst = findNewOpOrInsertGF(oldDst, + VM, + M, + oldCall); needToTransform = true; } } else { @@ -788,21 +930,33 @@ namespace { Function* memF = NULL; if (isa(oldCall)) { - memF = Intrinsic::getDeclaration(&M, Intrinsic::memcpy, types); + memF = Intrinsic::getDeclaration(&M, + Intrinsic::memcpy, + types); } else if (isa (oldCall)) { - memF = Intrinsic::getDeclaration(&M, Intrinsic::memmove, types); + memF = Intrinsic::getDeclaration(&M, + Intrinsic::memmove, + types); } - Instruction* newCall = CallInst::Create(memF, args, "", oldCall); - cast(newCall)->setCallingConv(CS.getCallingConv()); + Instruction* newCall = CallInst::Create(memF, + args, + "", + oldCall); + + cast(newCall) + ->setCallingConv(CS.getCallingConv()); cast(newCall)->setAttributes(CallPAL); if (cast(oldCall)->isTailCall()) { cast(newCall)->setTailCall(); } - if (MDNode *tbaa = oldCall->getMetadata(LLVMContext::MD_tbaa)) { + if (MDNode *tbaa = + oldCall->getMetadata(LLVMContext::MD_tbaa)) { newCall->setMetadata(LLVMContext::MD_tbaa, tbaa); } - if (MDNode *tbaaStruct = oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { - newCall->setMetadata(LLVMContext::MD_tbaa_struct, tbaaStruct); + if (MDNode *tbaaStruct = + oldCall->getMetadata(LLVMContext::MD_tbaa_struct)) { + newCall->setMetadata(LLVMContext::MD_tbaa_struct, + tbaaStruct); } if (debugPassInsn) { errs () << "Old Instruction : " << *oldCall << "\n"; @@ -810,7 +964,7 @@ namespace { } VM[oldCall] = newCall; deletedInsn.push_back(oldCall); - + } else { RemapInstruction(targetInsn, VM, RF_IgnoreMissingEntries); } @@ -842,9 +996,17 @@ namespace { LocalArrayInfo *LocalArraysDecl = new LocalArrayInfo(); // Create IGraph - // Inspect all instructions and construt IGraph. Each node of IGraph contains a densemap that map that is one-to-one mapping of each operand into a specific address space (either 100 or 0). - // If an instruction is enclosed by a local statement, set the locality level of each operand to 0. - // Currently, we are assuming that gf.addr function calls correspond to Chapel's local statements, but this is not always true because gf.addr is also used to extract a local pointer from a wide pointer. We work on this later pass using NonLocals) + // Inspect all instructions and construt IGraph. Each node of + // IGraph contains a densemap that map that is + // one-to-one mapping of each operand + // into a specific address space (either 100 or 0). + // If an instruction is enclosed by a local statement, set the + // locality level of each operand to 0. + // Currently, we are assuming that gf.addr function calls + // correspond to Chapel's local statements, + // but this is not always true because gf.addr is also used + // to extract a local pointer from a wide pointer. We work on + // this later pass using NonLocals) IGraph *G = new IGraph(F->getName()); G->construct(F, info); @@ -867,12 +1029,17 @@ namespace { } // Process each instruction - // try to convert load/store/getelementptr with addrspace(100) to addrspace(0) with using IGraph + // try to convert load/store/getelementptr with addrspace(100) + // to addrspace(0) with using IGraph SmallVector deletedInsn; ValueToValueMapTy ValueMap; - for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { + for (inst_iterator II = inst_begin(F), + IE = inst_end(F); II != IE; ++II) { Instruction *insn = &*II; - processInstruction(insn, deletedInsn, ValueMap, VN, M, G, LocalArraysGVN, LocalArraysDecl); + processInstruction(insn, + deletedInsn, + ValueMap, VN, M, G, + LocalArraysGVN, LocalArraysDecl); } for (unsigned int i = 0; i < deletedInsn.size(); i++) { Instruction *insn = deletedInsn[i]; @@ -896,9 +1063,11 @@ namespace { /* Locality Optimization Pass: - This pass tries to replace address space 100 pointer with address space 0 pointer. + This pass tries to replace address space 100 pointer with address + space 0 pointer. 1. Local Statement (by users) - 2. Locale local array declaration (by users but not explicitly expressed) + 2. Locale local array declaration (by users but not explicitly + expressed) */ virtual bool runOnModule(Module &M) { @@ -916,14 +1085,17 @@ namespace { info->wideSpace = 101; info->localeIdType = M.getTypeByName("struct.c_localeid_t"); if( ! info->localeIdType ) { - StructType* t = StructType::create(M.getContext(), "struct.c_localeid_t"); - t->setBody(Type::getInt32Ty(M.getContext()), Type::getInt32Ty(M.getContext()), NULL); + StructType* t = StructType::create(M.getContext(), + "struct.c_localeid_t"); + t->setBody(Type::getInt32Ty(M.getContext()), + Type::getInt32Ty(M.getContext()), NULL); info->localeIdType = t; } info->nodeIdType = Type::getInt32Ty(M.getContext()); - + // Now go identify special functions in the module by name. - for (Module::iterator next_func = M.begin(); next_func!= M.end(); ) + for (Module::iterator next_func = M.begin(); next_func!= + M.end(); ) { Function *F = &*next_func; ++next_func; @@ -943,18 +1115,22 @@ namespace { GlobalPointerInfo & r = info->gTypes[gType]; r.addrFn = F; info->specialFunctions.insert(F); - } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_LOCID) && + } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_LOCID) +&& FT->getNumParams() == 1 && FT->getReturnType() == info->localeIdType && - containsGlobalPointers(info, FT->getParamType(0)) ) { + containsGlobalPointers(info, +FT->getParamType(0)) ) { Type* gType = FT->getParamType(0); GlobalPointerInfo & r = info->gTypes[gType]; r.locFn = F; info->specialFunctions.insert(F); - } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_NODEID) && + } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_NODEID) +&& FT->getNumParams() == 1 && FT->getReturnType() == info->nodeIdType && - containsGlobalPointers(info, FT->getParamType(0)) ) { + containsGlobalPointers(info, +FT->getParamType(0)) ) { Type* gType = FT->getParamType(0); GlobalPointerInfo & r = info->gTypes[gType]; r.nodeFn = F; @@ -963,22 +1139,28 @@ namespace { FT->getNumParams() == 2 && FT->getParamType(0) == info->localeIdType && FT->getParamType(1)->isPointerTy() && - FT->getParamType(1)->getPointerAddressSpace() == 0 && - containsGlobalPointers(info, FT->getReturnType()) ) { + FT->getParamType(1)->getPointerAddressSpace() == +0 && + containsGlobalPointers(info, +FT->getReturnType()) ) { Type* gType = FT->getReturnType(); GlobalPointerInfo & r = info->gTypes[gType]; r.makeFn = F; info->specialFunctions.insert(F); - } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_TO_WIDE) && + } else if( +F->getName().startswith(GLOBAL_FN_GLOBAL_TO_WIDE) && FT->getNumParams() == 1 && - containsGlobalPointers(info, FT->getParamType(0)) ) { + containsGlobalPointers(info, +FT->getParamType(0)) ) { Type* gType = FT->getParamType(0); GlobalPointerInfo & r = info->gTypes[gType]; r.globalToWideFn = F; info->specialFunctions.insert(F); - } else if( F->getName().startswith(GLOBAL_FN_WIDE_TO_GLOBAL) && - FT->getNumParams() == 1 && - containsGlobalPointers(info, FT->getReturnType()) ) { + } else if( + F->getName().startswith(GLOBAL_FN_WIDE_TO_GLOBAL) && + FT->getNumParams() == 1 && + containsGlobalPointers(info, + FT->getReturnType()) ) { Type* gType = FT->getReturnType(); GlobalPointerInfo & r = info->gTypes[gType]; r.wideToGlobalFn = F; @@ -986,11 +1168,11 @@ namespace { } } } - + assert(info->globalSpace > 0); assert(info->localeIdType); assert(info->nodeIdType); - + // Wide pointer address space must differ from the local one... assert(info->globalSpace != 0); assert(info->wideSpace != 0); @@ -1005,21 +1187,25 @@ namespace { } localityOptimization(M, F); } - + // After it all, put the target info back. if( !madeInfo ) M.setDataLayout(layoutAfterwards); if( madeInfo ) delete info; return true; } - - void salvageChapelArrayAccess(Function *F, ValueTable *VN, LocalArrayInfo *LocalArraysGVN, LocalArrayInfo *LocalArraysDecl) { + + void salvageChapelArrayAccess(Function *F, ValueTable *VN, + LocalArrayInfo *LocalArraysGVN, + LocalArrayInfo *LocalArraysDecl) { /* Skip this if there is a branch instruction. */ /* (TODO) Integrate salvageChapelArrayAccess into IGraph later*/ - for (inst_iterator IS = inst_begin(F), IE = inst_end(F); IS != IE; IS++) { + for (inst_iterator IS = inst_begin(F), + IE = inst_end(F); IS != IE; IS++) { Instruction *targetInsn = &*IS; - TerminatorInst *branch = dyn_cast(targetInsn); + TerminatorInst *branch = + dyn_cast(targetInsn); if (branch) { ReturnInst *RI = dyn_cast(targetInsn); if (!RI) { @@ -1029,7 +1215,8 @@ namespace { } } - for (inst_iterator IS = inst_begin(F), IE = inst_end(F); IS != IE; IS++) { + for (inst_iterator IS = inst_begin(F), + IE = inst_end(F); IS != IE; IS++) { Instruction *targetInsn = &*IS; switch (targetInsn->getOpcode()) { case Instruction::Load: @@ -1056,7 +1243,8 @@ namespace { if (getOffsetGEP) { errs () << "Offset : \n"; errs () << *getOffsetGEP << "\n"; - Instruction *offsetInsn = dyn_cast(getOffsetGEP->getOperand(1)); + Instruction *offsetInsn = + dyn_cast(getOffsetGEP->getOperand(1)); if (offsetInsn) { switch(offsetInsn->getOpcode()) { case Instruction::Load: { @@ -1064,18 +1252,22 @@ namespace { break; } case Instruction::Shl: { - Constant *op1 = dyn_cast(offsetInsn->getOperand(1)); + Constant *op1 = + dyn_cast(offsetInsn->getOperand(1)); if (op1) { - ret = 1 << (int)(op1->getUniqueInteger().roundToDouble()); + ret = 1 << + (int)(op1->getUniqueInteger().roundToDouble()); } else { ret = -1; } break; } case Instruction::Mul: { - Constant *op1 = dyn_cast(offsetInsn->getOperand(1)); + Constant *op1 = + dyn_cast(offsetInsn->getOperand(1)); if (op1) { - ret = (int)(op1->getUniqueInteger().roundToDouble()); + ret = + (int)(op1->getUniqueInteger().roundToDouble()); } else { ret = -1; } @@ -1087,40 +1279,56 @@ namespace { return ret; } - void analyzeLoadStoreInsn(Instruction *I, Function *F, ValueTable *VN, LocalArrayInfo *LocalArrays) { + void analyzeLoadStoreInsn(Instruction *I, Function *F, ValueTable *VN, + LocalArrayInfo *LocalArrays) { if (isArrayAccessLoadOrStore(I, info->globalSpace)) { - // for each store/load instruction that involves addrspace 100 and is supposed to be array access. + // for each store/load instruction that involves addrspace 100 + // and is supposed to be array access. if (debugPassInsn) { errs () << *I << " is supposed to be array access\n"; } GetElementPtrInst *gep1 = findGEP08FromMemOp(I); if (gep1 == NULL) return; - for (inst_iterator IS2 = inst_begin(F), IE2 = inst_end(F); IS2 != IE2; IS2++) { + for (inst_iterator IS2 = inst_begin(F), + IE2 = inst_end(F); IS2 != IE2; IS2++) { Instruction *I2 = &*IS2; - // search load/store instruction that is supposed to be local array access. + // search load/store instruction that is supposed + // to be local array access. if (I != I2 && isArrayAccessLoadOrStore(I2, 0)) { - // I is load/store addrspace(100) ptr and supposed to be array access at this point - // I2 is load/store addrspace(0) ptr and supposed to be array access at this point + // I is load/store addrspace(100) ptr + // and supposed to be array access at this point + // I2 is load/store addrspace(0) ptr + // and supposed to be array access at this point GetElementPtrInst *gep2 = findGEP08FromMemOp(I2); if (gep2 == NULL) continue; if (VN->sameExpressions(gep1, gep2)) { errs () << "[GVN worked!]\n"; errs () << "\t Local Access :\n"; - errs () << "\t\t Load/Store w/ addrspace(0) : " << *I2 << "\n"; - errs () << "\t\t Array Ptr w/ addrspace(0) : " << *gep2 << "\n"; + errs () << "\t\t Load/Store w/ addrspace(0) : " + << *I2 << "\n"; + errs () << "\t\t Array Ptr w/ addrspace(0) : " + << *gep2 << "\n"; errs () << "\t Possibly Remote Access :\n"; - errs () << "\t\t Load/Store w/ addrspace(100) : " << *I << "\n"; - errs () << "\t\t Array Ptr w/ addrspace(100) : " << *gep1 << "\n"; + errs () << "\t\t Load/Store w/ addrspace(100) : " + << *I << "\n"; + errs () << "\t\t Array Ptr w/ addrspace(100) : " + << *gep1 << "\n"; // mark Value *localArray = gep1->getPointerOperand(); - LocalArrayEntry *li = LocalArrays->getEntryByValue(localArray); + LocalArrayEntry *li = + LocalArrays->getEntryByValue(localArray); // Analyze Offset - // FIXME: this is only for store instruction! (I2->getOperand(0) if it's a load instruction) - int offset = analyzeArrayAccessOffsets(dyn_cast(I2->getOperand(1))); + // FIXME: this is only for store instruction! + // (I2->getOperand(0) if it's a load instruction) + int offset = + analyzeArrayAccessOffsets( + dyn_cast(I2->getOperand(1)) + ); if (offset != -1) { // if (!li) { - li = new LocalArrayEntry(localArray, false); + li = new LocalArrayEntry(localArray, + false); li->addLocalOffset(offset); LocalArrays->add(li); } else { @@ -1134,7 +1342,10 @@ namespace { } // check if construct_DefaultRectangularArr is in this function - void analyzeCallInsn(Instruction *I, ValueTable *VN, LocalArrayInfo *LocalArrays) { + void analyzeCallInsn(Instruction *I, + ValueTable *VN, + LocalArrayInfo + *LocalArrays) { if (isa(I)) { CallInst *callInsn1 = cast(I); Function *calledFunc1 = callInsn1->getCalledFunction(); @@ -1148,29 +1359,40 @@ namespace { if (isa(v)) { CallInst *callInsn2 = cast(v); Function *calledFunc2 = callInsn2->getCalledFunction(); - if (calledFunc2 && calledFunc2->getName().startswith("_construct_DefaultRectangularArr")) { + if (calledFunc2 && + calledFunc2->getName() + .startswith("_construct_DefaultRectangularArr")) { LocalArrayEntry *li = new LocalArrayEntry(I, true); LocalArrays->add(li); } } - } else if (funcName.startswith("chpl__convertRuntimeTypeToValue")) { // + } else if (funcName + .startswith("chpl__convertRuntimeTypeToValue")) { // Value* v = callInsn1->getArgOperand(1); for (User *U : v->users()) { Value *UI = U; if (isa(*UI)) { LoadInst *l = cast(UI); if (l->getPointerOperand() == v) { - LocalArrayEntry *li = new LocalArrayEntry(UI, true); + LocalArrayEntry *li = + new LocalArrayEntry(UI, true); LocalArrays->add(li); // support chpl___ASSIGN for (User *LU: l->users()) { Value *LUI = LU; if (isa(LUI)) { - CallInst *callInsn2 = cast(LUI); - Function *calledFunc2 = callInsn2->getCalledFunction(); - if (calledFunc2 && calledFunc2->getName().startswith("chpl___ASSIGN_") - && UI == callInsn2->getArgOperand(0)) { - LocalArrayEntry *li2 = new LocalArrayEntry(LUI, true); + CallInst *callInsn2 + = cast(LUI); + Function *calledFunc2 + = callInsn2->getCalledFunction(); + if (calledFunc2 && + calledFunc2 + ->getName() + .startswith("chpl___ASSIGN_") + && UI == + callInsn2->getArgOperand(0)) { + LocalArrayEntry *li2 = new + LocalArrayEntry(LUI, true); LocalArrays->add(li2); } } @@ -1185,7 +1407,9 @@ namespace { if (isa(UI)) { LoadInst *l = cast(UI); if (l->getPointerOperand() == v) { - LocalArrayEntry *li = new LocalArrayEntry(UI, true); + LocalArrayEntry *li + = new LocalArrayEntry(UI, + true); LocalArrays->add(li); } } @@ -1194,7 +1418,8 @@ namespace { } } - void searchGEP08Inst(vector &list, vector &visited, Instruction* I) { + void searchGEP08Inst(vector &list, + vector &visited, Instruction* I) { // see if this Instruction is already visited if (find(visited.begin(), visited.end(), I) != visited.end()) { return; @@ -1204,7 +1429,8 @@ namespace { errs () << "Parent Insn : " << *I <<"\n"; } // Check if this intruction is GEP - GetElementPtrInst *gepInst = dyn_cast(I); + GetElementPtrInst *gepInst = dyn_cast(I); + if (gepInst && gepInst->getNumIndices() == 2) { if (debugPassInsn) { errs () << "Candidate GEP : " << *gepInst << "\n"; @@ -1212,7 +1438,8 @@ namespace { Constant *op1 = dyn_cast(gepInst->getOperand(1)); Constant *op2 = dyn_cast(gepInst->getOperand(2)); if (op1 != NULL && op2 != NULL - && op1->getUniqueInteger() == 0 && op2->getUniqueInteger() == 8) { + && op1->getUniqueInteger() == 0 + && op2->getUniqueInteger() == 8) { // add a candidate GEP to list if (find(list.begin(), list.end(), @@ -1310,7 +1537,8 @@ namespace { } char LocalityOpt::ID = 0; -static RegisterPass X("locality-opt", "Locality Optimization Pass"); +static RegisterPass X("locality-opt", + "Locality Optimization Pass"); ModulePass *createLocalityOpt(GlobalToWideInfo* info, std::string setlayout) { return new LocalityOpt(info, setlayout); From 4affd0ea9dbf9634e93a294216550036c1f2d988 Mon Sep 17 00:00:00 2001 From: Akihiro Hayashi Date: Fri, 4 Mar 2016 17:30:23 -0600 Subject: [PATCH 24/24] Modify the desciption of the llvmLocalityOptimization pass. --- llvmLocalityOptimization.cpp | 151 ++++++++++++++++++++--------------- 1 file changed, 86 insertions(+), 65 deletions(-) diff --git a/llvmLocalityOptimization.cpp b/llvmLocalityOptimization.cpp index c48a71d..ee3634c 100644 --- a/llvmLocalityOptimization.cpp +++ b/llvmLocalityOptimization.cpp @@ -24,88 +24,109 @@ // This pass tries to convert possibly-remote access (addrspace(100)* access) // to definitely-local to avoid runtime affinity checking overheads. // -// To infer the locality, the locality optimization pass tries to utilize -// following information : -// - Case 1. Scalar access enclosed by Chapel's LOCAL statement. +// # How it works +// To infer the locality, +// the locality optimization pass tries to utilize following information : +// (Please also see test/local.ll) +// +// ### Case 1. Scalar access enclosed by Chapel's LOCAL statement // proc localizeByLocalStmt(ref x) : int { // var p: int = 1; // local { p = x; } // return p + x; // x is definitely local // } -// The locality level of x is inferred by searching SSA value graph, -// which is implemented in IGraph.[h|cpp]. -// When you specify debugThisFn, the pass generates .dot file -// that can be visualized by the graphviz tool. (http://www.graphviz.org/) // -// - Case 2. Array access enclosed by Chapel's LOCAL statement. -// proc habanero(A) : int { -// A(1) = 1; // A(1) is definitely local -// local { A(1) = 2; } -// A(2) = 3; // A(2) is possibly remote +// This pass considers control-flow: +// proc localizeUnderCondition1(ref x) : int { +// var p: int = 1; +// if (Q == 1) { +// local { p = x; } +// } +// return p + x; // does not localize since it might be non-local +// } +// +// Here is another interesting example: +// proc localizeUnderCondition1(ref x) : int { +// var p: int = 1; +// if (Q == 1) { +// local { p = x; } +// } else if (Q==2) { +// local { p = x; } +// } else { +// local { p = x; } +// } +// return p + x; // x is definitely-local // } -// This pass is element-sensitive. For example, -// the locality of A(1) is "definitely-local", -// but the pass leave A(2) "possibly-remote" since there is no enough -// information about the locality of A(2). -// This is done by using a reduced version of the LLVM's global value -// numbering -// pass (in ValueTable.[h|cpp]) and a array offset analysis. // -// - Case 3. locale locale array declaration +// The locality level of x is inferred by searching +// an Locality-SSA inequlity graph, +// which is implemented in IGraph.[h|cpp]. +// +// (NOTE) When you specify debugThisFn, +// the pass generates .dot file that can be visualized by the graphviz tool. +// (http://www.graphviz.org/) +// +// ### Case 2. Array access enclosed by Chapel's LOCAL statement +// +// proc habanero(A) : int { +// A(1) = 1; // A(1) is definitely local +// local { A(1) = 2; } +// A(2) = 3; // A(2) is possibly remote +// } +// +// The locality optimization pass is element-sensitive. +// For example, the locality of A(1) is _definitely-local_, +// but the pass leave A(2) _possibly-remote_ +// since there is no enough information about the locality of A(2). +// +// This is done by using a reduced version of +// the LLVM's global value numbering pass +// for assigning a value number to variables and expressions +// (in ValueTable.[h|cpp]) and an array offset analysis. +// +// ### Case 3. Locale-locale array declaration // proc localizeByArrayDecl () { // var A: [1..10] int; // return A(5); -// } -// The locality of A(5) is "definitely-local" since an array A is declared in -// this scope. +// } +// +// The locality of A(5) is _definitely-local_ +// since an array A is declared in this scope. // Note that this pass is not element-sensitve so far. // -// Limitation, TODOs and future work: -// (Limitation) Locality Inference using SSA Value Graph with if statements: -// The current implementation does not propagate a condition even if a -// local statement is enclosed by if statement. -// Hence, we may fail to infer the locality in some cases. -// (e.g. if (condition) { local{ p = x } }) -// -// (Limitation) Chapel's local statement detection: -// Currently, we are assuming that gf.addr function calls correspond to -// Chapel's local statements, -// but this is not always true because gf.addr is also used to extract a -// local pointer from a wide pointer. -// To avoid this problem, we have an std::vector named "NonLocals" to -// record a retun value of gf.addr -// which is also an argument of gf.make and the NonLocals are referred when -// doing "exemptionTest". -// This may not be always true. Ideally, a PGAS-LLVM frontend should tell -// the locality optimization pass -// which gf.addr call is a local statement. +// # Limitations +// ### Chapel's Array Declaration Detection +// We basically look for chpl__convertRuntimeTypeToValue +// to detect Chapel's array declaration. +// Please see analyzeCallInsn for more details. +// +// # TODOs and future work +// +// ### The utilization of high-level information +// The locality optimization pass has to recover high-level information +// such as array accesses and local statements from low-level LLVM IR, +// but ideally, PGAS-LLVM frontend are supposed to add annotations +// to keep these information so the locality optimization can easily +// recognize high-level information and perform language-agnostic +// PGAS optimization. // -// Example : -// 1. call i64* @.gf.addr.1(i64 addrspace(100)* %x) -// %x is definitely local -// 2. %y = call i64* @.gf.addr.1(i64 addrspace(100)* %x) -// might not be definitely local -// call i64 addrspace(100)* @.gf.make.1(..., %y) -// -// (Limitation) Chapel's Array Declaration detection: -// We basically look for chpl__convertRuntimeTypeToValue to detect Chapel's -// array declaration. -// This pattern matching completely depends on how PGAS-LLVM frontend emits -// LLVM IR. -// Please see analyzeCallInsn for more details. +// ### Infering the locality of chapel array access considering if statements +// While we try to localize a possibly-remote scalar access +// considering control-flow as much as possible, +// localizing array accesses are still conservative. +// Hence, we does not localize array access enclosed +// by if statement like this: // -// (Limitation) Intra-procedural pass: -// Unfortunately, the current implementation is not inter-procedural. +// if (cond) { local { A(1) = 2; } } +// A(1) = 1; // A(1) is possibly remote // -// (Future Work) The utilization of high-level information: -// The locality optimization pass has to recover high-level information -// such as -// array accesses and local statements from low-level LLVM IR, but ideally, -// PGAS-LLVM frontend are supposed to add annotations to keep these -// information -// so the locality optimization can perform language-agnostic PGAS -// optimization. +// ### Make it inter-procedural pass +// This can make more _possibly-remote accesses_ +// to _definitely-local accesses_. // +// ### More experiments with the latest version of the Chapel compiler +// I have been mainly working with the Chapel compiler 1.9.0. I need +// to check more if the locality optimization pass works. //===----------------------------------------------------------------------===// #include "llvmLocalityOptimization.h"