Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Paddle
Submodule Paddle updated 655 files
14 changes: 7 additions & 7 deletions backends/gcu/kernels/interpolate_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void InterpolateKernel(
int out_d,
int out_h,
int out_w,
const std::vector<float>& scale,
const std::vector<double>& scale,
const std::string& interp_method,
bool align_corners,
int align_mode,
Expand All @@ -47,7 +47,7 @@ void InterpolateKernel(

float scale_h = -1;
float scale_w = -1;
std::vector<float> new_scale(scale);
std::vector<double> new_scale(scale);
// Priority: size_tensor > out_size > scale_tensor > scale > out_h & out_w
if (size_tensor && size_tensor->size() > 0) {
auto tensors = size_tensor.get();
Expand Down Expand Up @@ -253,7 +253,7 @@ void InterpolateGradKernel(
int out_d,
int out_h,
int out_w,
const std::vector<float>& scale,
const std::vector<double>& scale,
const std::string& interp_method,
bool align_corners,
int align_mode,
Expand Down Expand Up @@ -326,7 +326,7 @@ void BilinearInterpKernel(
int out_d,
int out_h,
int out_w,
const std::vector<float>& scale,
const std::vector<double>& scale,
const std::string& interp_method,
bool align_corners,
int align_mode,
Expand Down Expand Up @@ -361,7 +361,7 @@ void BilinearInterpGradKernel(
int out_d,
int out_h,
int out_w,
const std::vector<float>& scale,
const std::vector<double>& scale,
const std::string& interp_method,
bool align_corners,
int align_mode,
Expand Down Expand Up @@ -400,7 +400,7 @@ void NearestInterpKernel(
int out_d,
int out_h,
int out_w,
const std::vector<float>& scale,
const std::vector<double>& scale,
const std::string& interp_method,
bool align_corners,
int align_mode,
Expand Down Expand Up @@ -435,7 +435,7 @@ void NearestInterpGradKernel(
int out_d,
int out_h,
int out_w,
const std::vector<float>& scale,
const std::vector<double>& scale,
const std::string& interp_method,
bool align_corners,
int align_mode,
Expand Down
2 changes: 1 addition & 1 deletion backends/iluvatar_gpu/build_inc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ fi

# Compile
echo "Starting compilation..."
ninja -k 0 -j$(nproc) 2>&1 | tee -a compile.log
ninja -j$(nproc) 2>&1
FAILED_LOG="failed_files.log"
grep -E "FAILED: " compile.log | tee ${FAILED_LOG}
echo "Failed files are listed in ${FAILED_LOG}"
Expand Down
2 changes: 1 addition & 1 deletion backends/iluvatar_gpu/build_paddle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ cmake -G Ninja -DPY_VERSION=${PYTHON_VERSION} -DWITH_COREX=ON -DPADDLE_SOURCE_DI
-DCMAKE_CUDA_FLAGS='-Xclang -fcuda-allow-variadic-functions -mllvm --skip-double' \
-DCMAKE_C_FLAGS="-pthread" \
-DWITH_ARM=OFF -DWITH_DGC=OFF .. || { echo "Error: CMake configuration failed!"; exit 1; }
ninja -k 0 -j$(nproc) || { echo "Error: Paddle-iluvatar-gpu build failed!"; exit 1; }
ninja -j$(nproc) || { echo "Error: Paddle-iluvatar-gpu build failed!"; exit 1; }
popd

if [[ ! -d "build_pip" ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
} else {
ScopedTensorDescriptor desc;
std::vector<int> tensor_dims = {N, dim, D, 1};
GPUDNNDataLayout layout = GPUDNNDataLayout::kNCHW;
DataLayout layout = DataLayout::kNCHW;
cudnnTensorDescriptor_t descp = desc.descriptor<T>(layout, tensor_dims);

auto handle = GetDnnHandle(dev_ctx.stream(), dev_ctx.GetPlace());
Expand Down Expand Up @@ -1163,7 +1163,7 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
} else {
ScopedTensorDescriptor desc;
std::vector<int> tensor_dims = {N, dim, D, 1};
GPUDNNDataLayout layout = GPUDNNDataLayout::kNCHW;
DataLayout layout = DataLayout::kNCHW;
cudnnTensorDescriptor_t descp = desc.descriptor<T>(layout, tensor_dims);
auto handle = GetDnnHandle(dev_ctx.stream(), dev_ctx.GetPlace());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ void TopPSamplingKernel(const Context& dev_ctx,
const DenseTensor& ps,
const paddle::optional<DenseTensor>& threshold,
const paddle::optional<DenseTensor>& topp_seed,
int seed,
int64_t seed,
int k,
const std::string& mode,
DenseTensor* out,
Expand Down
69 changes: 31 additions & 38 deletions backends/iluvatar_gpu/kernels/gpudnn/conv_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void ConvCudnnGradKernelImplV7(
const std::vector<int>& strides,
const std::vector<int>& padding_common,
const std::vector<int>& dilations,
phi::backends::gpu::DataLayout compute_format,
phi::backends::gpu::DataLayout layout,
DataLayout compute_format,
DataLayout layout,
bool use_addto,
bool exhaustive_search,
bool deterministic,
Expand Down Expand Up @@ -98,31 +98,31 @@ void ConvCudnnGradKernelImplV7(

int i_n, i_c, i_d, i_h, i_w;
int o_n, o_c, o_d, o_h, o_w;
if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
if (compute_format == DataLayout::NHWC) {
GetNCDHW(transformed_input->dims(),
phi::backends::gpu::DataLayout::kNHWC,
DataLayout::NHWC,
&i_n,
&i_c,
&i_d,
&i_h,
&i_w);
GetNCDHW(transformed_output_grad_channel->dims(),
phi::backends::gpu::DataLayout::kNHWC,
DataLayout::NHWC,
&o_n,
&o_c,
&o_d,
&o_h,
&o_w);
} else {
GetNCDHW(transformed_input->dims(),
phi::backends::gpu::DataLayout::kNCHW,
DataLayout::NCHW,
&i_n,
&i_c,
&i_d,
&i_h,
&i_w);
GetNCDHW(transformed_output_grad_channel->dims(),
phi::backends::gpu::DataLayout::kNCHW,
DataLayout::NCHW,
&o_n,
&o_c,
&o_d,
Expand Down Expand Up @@ -349,7 +349,7 @@ void ConvCudnnGradKernelImplV8(
const std::vector<int>& strides,
const std::vector<int>& padding_common,
const std::vector<int>& dilations,
phi::backends::gpu::DataLayout layout,
DataLayout layout,
bool use_addto,
bool exhaustive_search,
bool deterministic,
Expand Down Expand Up @@ -469,7 +469,7 @@ void ConvCudnnGradKernel(const Context& dev_ctx,

#ifdef PADDLE_WITH_HIP
// HIP MIOPEN ONLY SUPPORT NCHW format
auto compute_format = phi::backends::gpu::DataLayout::kNCHW;
auto compute_format = DataLayout::NCHW;
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
const bool compute_in_nhwc =
Expand All @@ -479,14 +479,12 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
const bool compute_in_nhwc =
dtype == CUDNN_DATA_HALF && IsVoltaOrLater(dev_ctx);
#endif
auto compute_format = compute_in_nhwc && channel_last
? phi::backends::gpu::DataLayout::kNHWC
: phi::backends::gpu::DataLayout::kNCHW;
auto compute_format =
compute_in_nhwc && channel_last ? DataLayout::NHWC : DataLayout::NCHW;
#endif
VLOG(3) << "Compute ConvGradOp with cuDNN:"
<< " data_format=" << data_format << " compute_format="
<< (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC"
: "NCHW");
<< (compute_format == DataLayout::NHWC ? "NHWC" : "NCHW");

// transform Tensor
DenseTensor transformed_input_channel(input.type());
Expand All @@ -495,7 +493,7 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
DenseTensor transformed_filter_channel(filter.type());
DenseTensor transformed_filter_grad_channel(filter.type());

if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) {
if (channel_last && compute_format == DataLayout::NCHW) {
VLOG(3) << "Transform input, output_grad, input_grad and tensor from "
"NHWC to NCHW.";
ResizeToChannelFirst<Context, T>(
Expand Down Expand Up @@ -526,7 +524,7 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
}
}

if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
if (compute_format == DataLayout::NHWC) {
VLOG(3) << "Transform filter and filter_grad tensor from NCHW to NHWC.";
ResizeToChannelLast<Context, T>(
dev_ctx, &filter, &transformed_filter_channel);
Expand All @@ -549,7 +547,7 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
auto filter_dims = transformed_filter_channel.dims();
DDim in_data_dims;
DDim filter_data_dims;
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
if (compute_format == DataLayout::NCHW) {
in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
} else {
Expand All @@ -574,7 +572,7 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
std::vector<int> padding_diff(data_dim);
std::vector<int> new_input_shape_vec(data_dim + 2);
new_input_shape_vec[0] = transformed_input_channel.dims()[0];
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
if (compute_format == DataLayout::NCHW) {
new_input_shape_vec[1] = transformed_input_channel.dims()[1];
} else {
new_input_shape_vec[data_dim + 1] =
Expand All @@ -584,14 +582,14 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
for (size_t i = 0; i < data_dim; ++i) {
padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]);
padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]);
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
if (compute_format == DataLayout::NCHW) {
new_input_shape_vec[i + 2] =
transformed_input_channel.dims()[i + 2] + padding_diff[i];
} else {
new_input_shape_vec[i + 1] =
transformed_input_channel.dims()[i + 1] + padding_diff[i];
}
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
if (compute_format == DataLayout::NCHW) {
input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i];
input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i];
} else {
Expand Down Expand Up @@ -645,14 +643,11 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
}
}
}
phi::backends::gpu::DataLayout layout =
compute_format == phi::backends::gpu::DataLayout::kNHWC
? phi::backends::gpu::DataLayout::kNHWC
: phi::backends::gpu::DataLayout::kNCHW;
DataLayout layout =
compute_format == DataLayout::NHWC ? DataLayout::NHWC : DataLayout::NCHW;
if (transformed_input.dims().size() == 5) {
layout = compute_format == phi::backends::gpu::DataLayout::kNHWC
? phi::backends::gpu::DataLayout::kNDHWC
: phi::backends::gpu::DataLayout::kNCDHW;
layout = compute_format == DataLayout::NHWC ? DataLayout::NDHWC
: DataLayout::NCDHW;
}
CUDNN_ENFORCE_TENSOR_SIZE_SUPPORTED(transformed_input);
CUDNN_ENFORCE_TENSOR_SIZE_SUPPORTED(transformed_filter_channel);
Expand Down Expand Up @@ -740,15 +735,14 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
}
}

if (channel_last &&
compute_format == phi::backends::gpu::DataLayout::kNCHW) {
if (channel_last && compute_format == DataLayout::NCHW) {
TransToChannelLast<Context, T>(
dev_ctx, &transformed_input_grad_channel, input_grad);
}
}

if (filter_grad) {
if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
if (compute_format == DataLayout::NHWC) {
TransToChannelFirst<Context, T>(
dev_ctx, &transformed_filter_grad_channel, filter_grad);
}
Expand Down Expand Up @@ -1011,8 +1005,7 @@ void ConvCudnnGradGradKernel(
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;

auto handle = GetDnnHandle(dev_ctx.stream(), dev_ctx.GetPlace());
auto layout = phi::backends::gpu::GetCudnnTensorFormat(
phi::backends::gpu::DataLayout::kNCHW);
auto layout = phi::backends::gpu::GetCudnnTensorFormat(DataLayout::NCHW);

ConvArgs args1{handle,
&transformed_ddX,
Expand All @@ -1023,7 +1016,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
phi::backends::gpu::DataLayout::kNCHW};
DataLayout::NCHW};
ConvArgs args2{handle,
&transformed_X,
ddW,
Expand All @@ -1033,7 +1026,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
phi::backends::gpu::DataLayout::kNCHW};
DataLayout::NCHW};
ConvArgs args3{handle,
&transformed_ddX,
dW,
Expand All @@ -1043,7 +1036,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
phi::backends::gpu::DataLayout::kNCHW};
DataLayout::NCHW};
ConvArgs args4{handle,
&transformed_dX,
ddW,
Expand All @@ -1053,7 +1046,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
phi::backends::gpu::DataLayout::kNCHW};
DataLayout::NCHW};

#ifdef PADDLE_WITH_HIP
SearchResult<miopenConvFwdAlgorithm_t> fwd_result1;
Expand Down Expand Up @@ -1179,11 +1172,11 @@ void ConvCudnnGradGradKernel(

int i_n, i_c, i_d, i_h, i_w;
GetNCDHW(
transformed_X.dims(), DataLayout::kNCHW, &i_n, &i_c, &i_d, &i_h, &i_w);
transformed_X.dims(), DataLayout::NCHW, &i_n, &i_c, &i_d, &i_h, &i_w);

int o_n, o_c, o_d, o_h, o_w;
GetNCDHW(transformed_dO_channel.dims(),
DataLayout::kNCHW,
DataLayout::NCHW,
&o_n,
&o_c,
&o_d,
Expand Down
Loading