Skip to content

[NFC][SYCL][Graph] Avoid unnecessary shared_ptr in duplicateNodes #19372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 50 additions & 51 deletions sycl/source/detail/graph/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,40 +1225,38 @@ exec_graph_impl::enqueue(sycl::detail::queue_impl &Queue,

void exec_graph_impl::duplicateNodes() {
// Map of original modifiable nodes (keys) to new duplicated nodes (values)
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>> NodesMap;
std::map<node_impl *, node_impl *> NodesMap;

const std::vector<std::shared_ptr<node_impl>> &ModifiableNodes =
MGraphImpl->MNodeStorage;
nodes_range ModifiableNodes{MGraphImpl->MNodeStorage};
std::deque<std::shared_ptr<node_impl>> NewNodes;

for (size_t i = 0; i < ModifiableNodes.size(); i++) {
auto OriginalNode = ModifiableNodes[i];
std::shared_ptr<node_impl> NodeCopy =
std::make_shared<node_impl>(*OriginalNode);
for (node_impl &OriginalNode : ModifiableNodes) {
NewNodes.push_back(std::make_shared<node_impl>(OriginalNode));
node_impl &NodeCopy = *NewNodes.back();

// Associate the ID of the original node with the node copy for later quick
// access
MIDCache.insert(std::make_pair(OriginalNode->MID, NodeCopy));
MIDCache.insert(std::make_pair(OriginalNode.MID, &NodeCopy));

// Clear edges between nodes so that we can replace with new ones
NodeCopy->MSuccessors.clear();
NodeCopy->MPredecessors.clear();
NodeCopy.MSuccessors.clear();
NodeCopy.MPredecessors.clear();
// Push the new node to the front of the stack
NewNodes.push_back(NodeCopy);
// Associate the new node with the old one for updating edges
NodesMap.insert({OriginalNode, NodeCopy});
NodesMap.insert({&OriginalNode, &NodeCopy});
}

// Now that all nodes have been copied rebuild edges on new nodes. This must
// be done as a separate step since successors may be out of order.
for (size_t i = 0; i < ModifiableNodes.size(); i++) {
auto OriginalNode = ModifiableNodes[i];
auto NodeCopy = NewNodes[i];
auto OrigIt = ModifiableNodes.begin(), OrigEnd = ModifiableNodes.end();
for (auto NewIt = NewNodes.begin(); OrigIt != OrigEnd; ++OrigIt, ++NewIt) {
node_impl &OriginalNode = *OrigIt;
node_impl &NodeCopy = **NewIt;
// Look through all the original node successors, find their copies and
// register those as successors with the current copied node
for (node_impl &NextNode : OriginalNode->successors()) {
node_impl &Successor = *NodesMap.at(NextNode.shared_from_this());
NodeCopy->registerSuccessor(Successor);
for (node_impl &NextNode : OriginalNode.successors()) {
node_impl &Successor = *NodesMap.at(&NextNode);
NodeCopy.registerSuccessor(Successor);
}
}

Expand All @@ -1271,49 +1269,47 @@ void exec_graph_impl::duplicateNodes() {
if (NewNode->MNodeType != node_type::subgraph) {
continue;
}
const std::vector<std::shared_ptr<node_impl>> &SubgraphNodes =
NewNode->MSubGraphImpl->MNodeStorage;
nodes_range SubgraphNodes{NewNode->MSubGraphImpl->MNodeStorage};
std::deque<std::shared_ptr<node_impl>> NewSubgraphNodes{};

// Map of original subgraph nodes (keys) to new duplicated nodes (values)
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>>
SubgraphNodesMap;
std::map<node_impl *, node_impl *> SubgraphNodesMap;

// Copy subgraph nodes
for (size_t i = 0; i < SubgraphNodes.size(); i++) {
auto SubgraphNode = SubgraphNodes[i];
auto NodeCopy = std::make_shared<node_impl>(*SubgraphNode);
for (node_impl &SubgraphNode : SubgraphNodes) {
NewSubgraphNodes.push_back(std::make_shared<node_impl>(SubgraphNode));
node_impl &NodeCopy = *NewSubgraphNodes.back();
// Associate the ID of the original subgraph node with all extracted node
// copies for future quick access.
MIDCache.insert(std::make_pair(SubgraphNode->MID, NodeCopy));
MIDCache.insert(std::make_pair(SubgraphNode.MID, &NodeCopy));

NewSubgraphNodes.push_back(NodeCopy);
SubgraphNodesMap.insert({SubgraphNode, NodeCopy});
NodeCopy->MSuccessors.clear();
NodeCopy->MPredecessors.clear();
SubgraphNodesMap.insert({&SubgraphNode, &NodeCopy});
NodeCopy.MSuccessors.clear();
NodeCopy.MPredecessors.clear();
}

// Rebuild edges for new subgraph nodes
for (size_t i = 0; i < SubgraphNodes.size(); i++) {
auto SubgraphNode = SubgraphNodes[i];
auto NodeCopy = NewSubgraphNodes[i];

for (node_impl &NextNode : SubgraphNode->successors()) {
node_impl &Successor =
*SubgraphNodesMap.at(NextNode.shared_from_this());
NodeCopy->registerSuccessor(Successor);
auto OrigIt = SubgraphNodes.begin(), OrigEnd = SubgraphNodes.end();
for (auto NewIt = NewSubgraphNodes.begin(); OrigIt != OrigEnd;
++OrigIt, ++NewIt) {
node_impl &SubgraphNode = *OrigIt;
node_impl &NodeCopy = **NewIt;

for (node_impl &NextNode : SubgraphNode.successors()) {
node_impl &Successor = *SubgraphNodesMap.at(&NextNode);
NodeCopy.registerSuccessor(Successor);
}
}

// Collect input and output nodes for the subgraph
std::vector<std::shared_ptr<node_impl>> Inputs;
std::vector<std::shared_ptr<node_impl>> Outputs;
for (auto &NodeImpl : NewSubgraphNodes) {
std::vector<node_impl *> Inputs;
std::vector<node_impl *> Outputs;
for (std::shared_ptr<node_impl> &NodeImpl : NewSubgraphNodes) {
if (NodeImpl->MPredecessors.size() == 0) {
Inputs.push_back(NodeImpl);
Inputs.push_back(&*NodeImpl);
}
if (NodeImpl->MSuccessors.size() == 0) {
Outputs.push_back(NodeImpl);
Outputs.push_back(&*NodeImpl);
}
}

Expand All @@ -1333,7 +1329,7 @@ void exec_graph_impl::duplicateNodes() {

// Add all input nodes from the subgraph as successors for this node
// instead
for (auto &Input : Inputs) {
for (node_impl *Input : Inputs) {
PredNode.registerSuccessor(*Input);
}
}
Expand All @@ -1352,7 +1348,7 @@ void exec_graph_impl::duplicateNodes() {

// Add all Output nodes from the subgraph as predecessors for this node
// instead
for (auto &Output : Outputs) {
for (node_impl *Output : Outputs) {
Output->registerSuccessor(SuccNode);
}
}
Expand All @@ -1363,15 +1359,18 @@ void exec_graph_impl::duplicateNodes() {
NewNodes.erase(std::find(NewNodes.begin(), NewNodes.end(), NewNode));
// Also set the iterator to the newly added nodes so we can continue
// iterating over all remaining nodes
auto InsertIt = NewNodes.insert(OldPositionIt, NewSubgraphNodes.begin(),
NewSubgraphNodes.end());
auto InsertIt = NewNodes.insert(
OldPositionIt, std::make_move_iterator(NewSubgraphNodes.begin()),
std::make_move_iterator(NewSubgraphNodes.end()));
// Since the new reverse_iterator will be at i - 1 we need to advance it
// when constructing
NewNodeIt = std::make_reverse_iterator(std::next(InsertIt));
}

// Store all the new nodes locally
MNodeStorage.insert(MNodeStorage.begin(), NewNodes.begin(), NewNodes.end());
MNodeStorage.insert(MNodeStorage.begin(),
std::make_move_iterator(NewNodes.begin()),
std::make_move_iterator(NewNodes.end()));
}

void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
Expand Down Expand Up @@ -1436,7 +1435,7 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {

for (uint32_t i = 0; i < MNodeStorage.size(); ++i) {
MIDCache.insert(
std::make_pair(GraphImpl->MNodeStorage[i]->MID, MNodeStorage[i]));
std::make_pair(GraphImpl->MNodeStorage[i]->MID, MNodeStorage[i].get()));
}

update(GraphImpl->MNodeStorage);
Expand Down Expand Up @@ -1709,7 +1708,7 @@ void exec_graph_impl::populateURKernelUpdateStructs(
auto ExecNode = MIDCache.find(Node.MID);
assert(ExecNode != MIDCache.end() && "Node ID was not found in ID cache");

auto Command = MCommandMap.find(ExecNode->second.get());
auto Command = MCommandMap.find(ExecNode->second);
assert(Command != MCommandMap.end());
UpdateDesc.hCommand = Command->second;

Expand Down Expand Up @@ -1738,7 +1737,7 @@ exec_graph_impl::getURUpdatableNodes(nodes_range Nodes) const {

auto ExecNode = MIDCache.find(Node.MID);
assert(ExecNode != MIDCache.end() && "Node ID was not found in ID cache");
auto PartitionIndex = MPartitionNodes.find(ExecNode->second.get());
auto PartitionIndex = MPartitionNodes.find(ExecNode->second);
assert(PartitionIndex != MPartitionNodes.end());
PartitionedNodes[PartitionIndex->second].push_back(&Node);
}
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/graph/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ class exec_graph_impl {

// Stores a cache of node ids from modifiable graph nodes to the companion
// node(s) in this graph. Used for quick access when updating this graph.
std::multimap<node_impl::id_type, std::shared_ptr<node_impl>> MIDCache;
std::multimap<node_impl::id_type, node_impl *> MIDCache;

unsigned long long MID;
// Used for std::hash in order to create a unique hash for the instance.
Expand Down