Skip to content

Commit

Permalink
Merge pull request opencv#24647 from fengyuentau:cuda_sub
Browse files Browse the repository at this point in the history
dnn cuda: support Sub opencv#24647

Related opencv#24606 (comment)

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
  • Loading branch information
fengyuentau authored Dec 6, 2023
1 parent f5ec92e commit a2edf4d
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 3 deletions.
7 changes: 7 additions & 0 deletions modules/dnn/src/cuda/eltwise_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -319,14 +319,21 @@ void eltwise_div_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
eltwise_op<T, DivFunctor<T>>(stream, output, x, y);
}

template <class T>
void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
eltwise_op<T, SubFunctor<T>>(stream, output, x, y);
}

#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
template void eltwise_sub_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<__half>, __half, TensorView<__half>, __half, TensorView<__half>);
template void eltwise_sum_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
#endif
template void eltwise_sub_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_div_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_prod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<float>, float, TensorView<float>, float, TensorView<float>);
Expand Down
12 changes: 12 additions & 0 deletions modules/dnn/src/cuda/functors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,18 @@ struct DivFunctor {
CUDA4DNN_DEVICE T operator()(T x, T y) { return x / y; }
};

template <class T>
struct SubFunctor {
struct Params {
CUDA4DNN_HOST_DEVICE Params() { }
};

CUDA4DNN_DEVICE SubFunctor() { }
CUDA4DNN_DEVICE SubFunctor(const Params& params) { }

CUDA4DNN_DEVICE T operator()(T x, T y) { return x - y; }
};

template <class T>
struct SignFunctor {
struct Params {
Expand Down
3 changes: 3 additions & 0 deletions modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T>
void eltwise_div_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);

template <class T>
void eltwise_sub_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);

}}}} /* namespace cv::dnn::cuda4dnn::kernels */

#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
3 changes: 3 additions & 0 deletions modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
PRODUCT,
DIV,
MIN,
SUB,
};

class EltwiseOpBase : public CUDABackendNode {
Expand Down Expand Up @@ -88,6 +89,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
else
kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
break;
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, input_x, input_y); break;
}
}
else
Expand Down Expand Up @@ -119,6 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
kernels::eltwise_sum_coeff_2<T>(stream, output, coeff_x, output, coeffs[i], input);
}
break;
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, output, input); break;
}
}
}
Expand Down
11 changes: 8 additions & 3 deletions modules/dnn/src/layers/nary_eltwise_layers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,11 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer
op == OPERATION::GREATER_EQUAL ||
op == OPERATION::LESS_EQUAL
);
if (op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD)
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
if (backendId == DNN_BACKEND_CUDA) {
return op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD ||
op == OPERATION::SUB;
}
return backendId == DNN_BACKEND_OPENCV;
}

Expand Down Expand Up @@ -828,6 +830,9 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer
case OPERATION::ADD:
op_ = cuda4dnn::EltwiseOpType::SUM;
break;
case OPERATION::SUB:
op_ = cuda4dnn::EltwiseOpType::SUB;
break;
default: return Ptr<BackendNode>(); // return empty cuda_node if the EltwiseOpType is unsupported type.
};

Expand Down

0 comments on commit a2edf4d

Please sign in to comment.