cpp-pm
diff --git a/‎modules/dnn/include/opencv2/dnn/all_layers.hpp
Lines changed: 34 additions & 0 deletions b/‎modules/dnn/include/opencv2/dnn/all_layers.hpp
Lines changed: 34 additions & 0 deletions
diff --git a/‎modules/dnn/src/cuda/activations.cu
Lines changed: 28 additions & 0 deletions b/‎modules/dnn/src/cuda/activations.cu
Lines changed: 28 additions & 0 deletions
diff --git a/‎modules/dnn/src/cuda/functors.hpp
Lines changed: 78 additions & 0 deletions b/‎modules/dnn/src/cuda/functors.hpp
Lines changed: 78 additions & 0 deletions
diff --git a/‎modules/dnn/src/cuda4dnn/kernels/activations.hpp
Lines changed: 12 additions & 0 deletions b/‎modules/dnn/src/cuda4dnn/kernels/activations.hpp
Lines changed: 12 additions & 0 deletions
diff --git a/‎modules/dnn/src/cuda4dnn/primitives/activation.hpp
Lines changed: 62 additions & 0 deletions b/‎modules/dnn/src/cuda4dnn/primitives/activation.hpp
Lines changed: 62 additions & 0 deletions
diff --git a/‎modules/dnn/src/init.cpp
Lines changed: 4 additions & 0 deletions b/‎modules/dnn/src/init.cpp
Lines changed: 4 additions & 0 deletions
@@ -738,6 +738,40 @@ CV__DNN_INLINE_NS_BEGIN
         static Ptr<TanLayer> create(const LayerParams &params);
     };
 
+    class CV_EXPORTS CeluLayer : public ActivationLayer
+    {
+    public:
+        float alpha;
+
+        static Ptr<CeluLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS HardSigmoidLayer : public ActivationLayer
+    {
+    public:
+        float alpha;
+        float beta;
+
+        static Ptr<HardSigmoidLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS SeluLayer : public ActivationLayer
+    {
+    public:
+        float alpha;
+        float gamma;
+
+        static Ptr<SeluLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ThresholdedReluLayer : public ActivationLayer
+    {
+    public:
+        float alpha;
+
+        static Ptr<ThresholdedReluLayer> create(const LayerParams &params);
+    };
+
     class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer
     {
     public:
 
@@ -233,6 +233,26 @@ void tan(const Stream& stream, Span<T> output, View<T> input) {
     generic_op<T, TanFunctor<T>>(stream, output, input);
 }
 
+template <class T>
+void celu(const Stream& stream, Span<T> output, View<T> input, T alpha) {
+    generic_op<T, CeluFunctor<T>>(stream, output, input, {alpha});
+}
+
+template <class T>
+void hardsigmoid(const Stream& stream, Span<T> output, View<T> input, T alpha, T beta) {
+    generic_op<T, HardSigmoidFunctor<T>>(stream, output, input, {alpha, beta});
+}
+
+template <class T>
+void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma) {
+    generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
+}
+
+template <class T>
+void thresholdedrelu(const Stream& stream, Span<T> output, View<T> input, T alpha) {
+    generic_op<T, ThresholdedReluFunctor<T>>(stream, output, input, {alpha});
+}
+
 template <class T>
 void abs(const Stream& stream, Span<T> output, View<T> input) {
     generic_op<T, AbsFunctor<T>>(stream, output, input);
@@ -286,6 +306,10 @@ template void sinh<__half>(const Stream&, Span<__half>, View<__half>);
 template void softplus<__half>(const Stream&, Span<__half>, View<__half>);
 template void softsign<__half>(const Stream&, Span<__half>, View<__half>);
 template void tan<__half>(const Stream&, Span<__half>, View<__half>);
+template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half);
+template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
+template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
+template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
 template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
 template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
 #endif
@@ -321,6 +345,10 @@ template void sinh<float>(const Stream&, Span<float>, View<float>);
 template void softplus<float>(const Stream&, Span<float>, View<float>);
 template void softsign<float>(const Stream&, Span<float>, View<float>);
 template void tan<float>(const Stream&, Span<float>, View<float>);
+template void celu<float>(const Stream&, Span<float>, View<float>, float);
+template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float);
+template void selu<float>(const Stream&, Span<float>, View<float>, float, float);
+template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
 template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
 template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
 
 
@@ -528,6 +528,84 @@ struct TanFunctor {
     }
 };
 
+template <class T>
+struct CeluFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() : alpha(1) { }
+        CUDA4DNN_HOST_DEVICE Params(T alpha_) : alpha(alpha_) { }
+        T alpha;
+    };
+
+    CUDA4DNN_DEVICE CeluFunctor() : CeluFunctor(Params{}) { }
+    CUDA4DNN_DEVICE CeluFunctor(const Params& params) : alpha{params.alpha} { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        using csl::device::min;
+        using csl::device::max;
+        using csl::device::expm1;
+        return max(T(0), value) + min(T(0), alpha * expm1(value / alpha));
+    }
+
+    T alpha;
+};
+
+template <class T>
+struct HardSigmoidFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() : alpha(0.2), beta(0.5) { }
+        CUDA4DNN_HOST_DEVICE Params(T alpha_, T beta_) : alpha(alpha_), beta(beta_) { }
+        T alpha, beta;
+    };
+
+    CUDA4DNN_DEVICE HardSigmoidFunctor() : HardSigmoidFunctor(Params{}) { }
+    CUDA4DNN_DEVICE HardSigmoidFunctor(const Params& params): alpha{params.alpha}, beta{params.beta} { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        using csl::device::clamp;
+        return clamp(alpha * value + beta, T(0), T(1));
+    }
+
+    T alpha, beta;
+};
+
+template <class T>
+struct SeluFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() : alpha(1.6732632423543772848170429916717),
+                                        gamma(1.0507009873554804934193349852946) { }
+        CUDA4DNN_HOST_DEVICE Params(T alpha_, T gamma_) : alpha(alpha_), gamma(gamma_) { }
+        T alpha, gamma;
+    };
+
+    CUDA4DNN_DEVICE SeluFunctor() : SeluFunctor(Params{}) { }
+    CUDA4DNN_DEVICE SeluFunctor(const Params& params): alpha{params.alpha}, gamma{params.gamma} { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        using csl::device::expm1;
+        return gamma * (value > T(0) ? value : alpha * expm1(value));
+    }
+
+    T alpha, gamma;
+};
+
+template <class T>
+struct ThresholdedReluFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() : alpha(1) { }
+        CUDA4DNN_HOST_DEVICE Params(T alpha_) : alpha(alpha_) { }
+        T alpha;
+    };
+
+    CUDA4DNN_DEVICE ThresholdedReluFunctor() : ThresholdedReluFunctor(Params{}) { }
+    CUDA4DNN_DEVICE ThresholdedReluFunctor(const Params& params) : alpha{params.alpha} { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        return (value > alpha) ? value : T(0);
+    }
+
+    T alpha;
+};
+
 template <class T>
 struct PowerFunctor {
     struct Params {
 
@@ -105,6 +105,18 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
     template <class T>
     void tan(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
 
+    template <class T>
+    void celu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
+
+    template <class T>
+    void hardsigmoid(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T beta);
+
+    template <class T>
+    void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma);
+
+    template <class T>
+    void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
+
     template <class T>
     void power(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T exp, T scale, T shift);
 
 
@@ -490,6 +490,68 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         csl::Stream stream;
     };
 
+    template <class T>
+    class CeluOp final : public BaseOp<CeluOp, T> {
+    public:
+        CeluOp(csl::Stream stream_, T alpha_) : stream(std::move(stream_)), alpha{ alpha_ } { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::celu<T>(stream, output, input, alpha);
+        }
+
+    private:
+        csl::Stream stream;
+        const T alpha;
+    };
+
+    template <class T>
+    class HardSigmoidOp final : public BaseOp<HardSigmoidOp, T> {
+    public:
+        HardSigmoidOp(csl::Stream stream_, T alpha_, T beta_)
+            : stream(std::move(stream_)), alpha{ alpha_ }, beta{ beta_ } { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::hardsigmoid<T>(stream, output, input, alpha, beta);
+        }
+
+    private:
+        csl::Stream stream;
+        const T alpha, beta;
+    };
+
+    template <class T>
+    class SeluOp final : public BaseOp<SeluOp, T> {
+    public:
+        SeluOp(csl::Stream stream_, T alpha_, T gamma_)
+            : stream(std::move(stream_)), alpha{ alpha_ }, gamma{ gamma_ } { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::selu<T>(stream, output, input, alpha, gamma);
+        }
+
+    private:
+        csl::Stream stream;
+        const T alpha, gamma;
+    };
+
+    template <class T>
+    class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> {
+    public:
+        ThresholdedReluOp(csl::Stream stream_, T alpha_) : stream(std::move(stream_)), alpha{ alpha_ } { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::thresholdedrelu<T>(stream, output, input, alpha);
+        }
+
+    private:
+        csl::Stream stream;
+        const T alpha;
+    };
+
     template <class T>
     class PowerOp final : public BaseOp<PowerOp, T> {
     public:
 
@@ -132,6 +132,10 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(Softplus,       SoftplusLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Softsign,       SoftsignLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Tan,            TanLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Celu,           CeluLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(HardSigmoid,    HardSigmoidLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Selu,           SeluLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(ThresholdedRelu,ThresholdedReluLayer);
     CV_DNN_REGISTER_LAYER_CLASS(BatchNorm,      BatchNormLayer);
     CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool,      MaxUnpoolLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Dropout,        BlankLayer);