From a2edf4d929ae22079dd1a301b88593e46e316256 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Wed, 6 Dec 2023 04:46:24 -0600 Subject: [PATCH] Merge pull request #24647 from fengyuentau:cuda_sub dnn cuda: support Sub #24647 Related https://github.com/opencv/opencv/issues/24606#issuecomment-1837390257 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/dnn/src/cuda/eltwise_ops.cu | 7 +++++++ modules/dnn/src/cuda/functors.hpp | 12 ++++++++++++ modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp | 3 +++ modules/dnn/src/cuda4dnn/primitives/eltwise.hpp | 3 +++ modules/dnn/src/layers/nary_eltwise_layers.cpp | 11 ++++++++--- 5 files changed, 33 insertions(+), 3 deletions(-) diff --git a/modules/dnn/src/cuda/eltwise_ops.cu b/modules/dnn/src/cuda/eltwise_ops.cu index f94bdb811346..16f6cccf6b16 100644 --- a/modules/dnn/src/cuda/eltwise_ops.cu +++ b/modules/dnn/src/cuda/eltwise_ops.cu @@ -319,7 +319,13 @@ void eltwise_div_2(const Stream& stream, TensorSpan output, TensorView x, eltwise_op>(stream, output, x, y); } +template +void eltwise_sub_2(const Stream& stream, TensorSpan output, TensorView x, TensorView y) { + eltwise_op>(stream, output, x, y); +} + #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) + template void eltwise_sub_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y); template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y); template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y); template void eltwise_sum_coeff_2(const Stream&, TensorSpan<__half>, __half, TensorView<__half>, __half, TensorView<__half>); @@ -327,6 +333,7 @@ void eltwise_div_2(const Stream& stream, TensorSpan output, TensorView x, template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y); template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y); #endif + template void eltwise_sub_2(const Stream& stream, TensorSpan output, TensorView x, TensorView y); template void eltwise_div_2(const Stream& stream, TensorSpan output, TensorView x, TensorView y); template void eltwise_prod_2(const Stream& stream, TensorSpan output, TensorView x, TensorView y); template void eltwise_sum_coeff_2(const Stream&, TensorSpan, float, TensorView, float, TensorView); diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 3e487cd98a50..2df32030f0e4 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -741,6 +741,18 @@ struct DivFunctor { CUDA4DNN_DEVICE T operator()(T x, T y) { return x / y; } }; +template +struct SubFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE SubFunctor() { } + CUDA4DNN_DEVICE SubFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T x, T y) { return x - y; } +}; + template struct SignFunctor { struct Params { diff --git a/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp b/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp index 0e44372fee3d..3dc3355b3b8b 100644 --- a/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp @@ -30,6 +30,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void eltwise_div_2(const csl::Stream& stream, csl::TensorSpan output, csl::TensorView x, csl::TensorView y); + template + void eltwise_sub_2(const csl::Stream& stream, csl::TensorSpan output, csl::TensorView x, csl::TensorView y); + }}}} /* namespace cv::dnn::cuda4dnn::kernels */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp b/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp index b46f0d870f4d..05bca83820d1 100644 --- a/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp @@ -27,6 +27,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { PRODUCT, DIV, MIN, + SUB, }; class EltwiseOpBase : public CUDABackendNode { @@ -88,6 +89,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { else kernels::eltwise_sum_coeff_2(stream, output, coeffs[0], input_x, coeffs[1], input_y); break; + case EltwiseOpType::SUB: kernels::eltwise_sub_2(stream, output, input_x, input_y); break; } } else @@ -119,6 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { kernels::eltwise_sum_coeff_2(stream, output, coeff_x, output, coeffs[i], input); } break; + case EltwiseOpType::SUB: kernels::eltwise_sub_2(stream, output, output, input); break; } } } diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index 8572eee995a7..c988ec69f299 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -114,9 +114,11 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer op == OPERATION::GREATER_EQUAL || op == OPERATION::LESS_EQUAL ); - if (op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM || - op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD) - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; + if (backendId == DNN_BACKEND_CUDA) { + return op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM || + op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD || + op == OPERATION::SUB; + } return backendId == DNN_BACKEND_OPENCV; } @@ -828,6 +830,9 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer case OPERATION::ADD: op_ = cuda4dnn::EltwiseOpType::SUM; break; + case OPERATION::SUB: + op_ = cuda4dnn::EltwiseOpType::SUB; + break; default: return Ptr(); // return empty cuda_node if the EltwiseOpType is unsupported type. };