Skip to content

Commit

Permalink
Make data contiguous before the inplace reshape-copy_ function (#2489)
Browse files Browse the repository at this point in the history
Co-authored-by: Michael Wyatt <[email protected]>
  • Loading branch information
lokoppakmsft and mrwyattii authored Nov 11, 2022
1 parent be5ec50 commit f2710bb
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
1 change: 1 addition & 0 deletions deepspeed/module_inject/load_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def load_model_with_checkpoint(r_module,
error_msgs = []

def transpose(data):
data = data.contiguous()
data1 = data.transpose(-1, -2).reshape(-1)
data.reshape(-1).copy_(data1)
data1 = None
Expand Down
3 changes: 2 additions & 1 deletion deepspeed/module_inject/replace_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def replace_attn(child, policy):
attn_module = transformer_inference.DeepSpeedDiffusersAttention(config)

def transpose(data):
data = data.contiguous()
data.reshape(-1).copy_(data.transpose(-1, -2).contiguous().reshape(-1))
data = data.reshape(data.shape[-1], data.shape[-2])
data.to(torch.cuda.current_device())
Expand Down Expand Up @@ -531,7 +532,7 @@ def replace_with_policy(child,
# transpose it here to reduce inference cost!
def transpose(data):
# temp move to cpu to avoid requiring extra GPU memory during the reshape
data = data.to('cpu')
data = data.to('cpu').contiguous()
data.reshape(-1).copy_(data.transpose(-1, -2).contiguous().reshape(-1))
data = data.reshape(data.shape[-1], data.shape[-2])
data.to(torch.cuda.current_device())
Expand Down

0 comments on commit f2710bb

Please sign in to comment.