diff --git a/modules/calib3d/src/undistort.simd.hpp b/modules/calib3d/src/undistort.simd.hpp
index 7998a3b086ea..70bac4470245 100644
--- a/modules/calib3d/src/undistort.simd.hpp
+++ b/modules/calib3d/src/undistort.simd.hpp
@@ -89,8 +89,8 @@ class initUndistortRectifyMapComputer : public ParallelLoopBody
         s2(_s2),
         s3(_s3),
         s4(_s4) {
-#if CV_SIMD_64F
-        for (int i = 0; i < 2 * v_float64::nlanes; ++i)
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+        for (int i = 0; i < 2 * VTraits<v_float64>::vlanes(); ++i)
         {
             s_x[i] = ir[0] * i;
             s_y[i] = ir[3] * i;
@@ -123,26 +123,26 @@ class initUndistortRectifyMapComputer : public ParallelLoopBody
             else
                 CV_Assert(m1 != NULL);
 
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
             const v_float64 v_one = vx_setall_f64(1.0);
-            for (; j <= size.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes, _x += 2*v_float64::nlanes * ir[0], _y += 2*v_float64::nlanes * ir[3], _w += 2*v_float64::nlanes * ir[6])
+            for (; j <= size.width - 2*VTraits<v_float64>::vlanes(); j += 2*VTraits<v_float64>::vlanes(), _x += 2*VTraits<v_float64>::vlanes() * ir[0], _y += 2*VTraits<v_float64>::vlanes() * ir[3], _w += 2*VTraits<v_float64>::vlanes() * ir[6])
             {
                 v_float64 m_0, m_1, m_2, m_3;
-                m_2 = v_one / (vx_setall_f64(_w) + vx_load(s_w));
-                m_3 = v_one / (vx_setall_f64(_w) + vx_load(s_w + v_float64::nlanes));
+                m_2 = v_div(v_one, v_add(vx_setall_f64(_w), vx_load(this->s_w)));
+                m_3 = v_div(v_one, v_add(vx_setall_f64(_w), vx_load(this->s_w + VTraits<v_float64>::vlanes())));
                 m_0 = vx_setall_f64(_x); m_1 = vx_setall_f64(_y);
-                v_float64 x_0 = (m_0 + vx_load(s_x)) * m_2;
-                v_float64 x_1 = (m_0 + vx_load(s_x + v_float64::nlanes)) * m_3;
-                v_float64 y_0 = (m_1 + vx_load(s_y)) * m_2;
-                v_float64 y_1 = (m_1 + vx_load(s_y + v_float64::nlanes)) * m_3;
+                v_float64 x_0 = v_mul(v_add(m_0, vx_load(this->s_x)), m_2);
+                v_float64 x_1 = v_mul(v_add(m_0, vx_load(this->s_x + VTraits<v_float64>::vlanes())), m_3);
+                v_float64 y_0 = v_mul(v_add(m_1, vx_load(this->s_y)), m_2);
+                v_float64 y_1 = v_mul(v_add(m_1, vx_load(this->s_y + VTraits<v_float64>::vlanes())), m_3);
 
-                v_float64 xd_0 = x_0 * x_0;
-                v_float64 yd_0 = y_0 * y_0;
-                v_float64 xd_1 = x_1 * x_1;
-                v_float64 yd_1 = y_1 * y_1;
+                v_float64 xd_0 = v_mul(x_0, x_0);
+                v_float64 yd_0 = v_mul(y_0, y_0);
+                v_float64 xd_1 = v_mul(x_1, x_1);
+                v_float64 yd_1 = v_mul(y_1, y_1);
 
-                v_float64 r2_0 = xd_0 + yd_0;
-                v_float64 r2_1 = xd_1 + yd_1;
+                v_float64 r2_0 = v_add(xd_0, yd_0);
+                v_float64 r2_1 = v_add(xd_1, yd_1);
 
                 m_1 = vx_setall_f64(k3);
                 m_2 = vx_setall_f64(k2);
@@ -151,18 +151,18 @@ class initUndistortRectifyMapComputer : public ParallelLoopBody
                 m_1 = v_muladd(v_muladd(v_muladd(m_1, r2_1, m_2), r2_1, m_3), r2_1, v_one);
                 m_3 = vx_setall_f64(k6);
                 m_2 = vx_setall_f64(k5);
-                m_0 /= v_muladd(v_muladd(v_muladd(m_3, r2_0, m_2), r2_0, vx_setall_f64(k4)), r2_0, v_one);
-                m_1 /= v_muladd(v_muladd(v_muladd(m_3, r2_1, m_2), r2_1, vx_setall_f64(k4)), r2_1, v_one);
+                m_0 = v_div(m_0, v_muladd(v_muladd(v_muladd(m_3, r2_0, m_2), r2_0, vx_setall_f64(this->k4)), r2_0, v_one));
+                m_1 = v_div(m_1, v_muladd(v_muladd(v_muladd(m_3, r2_1, m_2), r2_1, vx_setall_f64(this->k4)), r2_1, v_one));
 
                 m_3 = vx_setall_f64(2.0);
                 xd_0 = v_muladd(m_3, xd_0, r2_0);
                 yd_0 = v_muladd(m_3, yd_0, r2_0);
                 xd_1 = v_muladd(m_3, xd_1, r2_1);
                 yd_1 = v_muladd(m_3, yd_1, r2_1);
-                m_2 = x_0 * y_0 * m_3;
-                m_3 = x_1 * y_1 * m_3;
+                m_2 = v_mul(v_mul(x_0, y_0), m_3);
+                m_3 = v_mul(v_mul(x_1, y_1), m_3);
 
-                x_0 *= m_0; y_0 *= m_0; x_1 *= m_1; y_1 *= m_1;
+                x_0 = v_mul(x_0, m_0); y_0 = v_mul(y_0, m_0); x_1 = v_mul(x_1, m_1); y_1 = v_mul(y_1, m_1);
 
                 m_0 = vx_setall_f64(p1);
                 m_1 = vx_setall_f64(p2);
@@ -176,8 +176,8 @@ class initUndistortRectifyMapComputer : public ParallelLoopBody
                 xd_1 = v_muladd(m_0, m_3, xd_1);
                 yd_1 = v_muladd(m_1, m_3, yd_1);
 
-                m_0 = r2_0 * r2_0;
-                m_1 = r2_1 * r2_1;
+                m_0 = v_mul(r2_0, r2_0);
+                m_1 = v_mul(r2_1, r2_1);
                 m_2 = vx_setall_f64(s2);
                 m_3 = vx_setall_f64(s1);
                 xd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, xd_0));
@@ -203,17 +203,17 @@ class initUndistortRectifyMapComputer : public ParallelLoopBody
                 r2_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
                 r2_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
                 m_0 = vx_setzero_f64();
-                r2_0 = v_select(r2_0 == m_0, v_one, v_one / r2_0);
-                r2_1 = v_select(r2_1 == m_0, v_one, v_one / r2_1);
+                r2_0 = v_select(v_eq(r2_0, m_0), v_one, v_div(v_one, r2_0));
+                r2_1 = v_select(v_eq(r2_1, m_0), v_one, v_div(v_one, r2_1));
 
                 m_0 = vx_setall_f64(fx);
                 m_1 = vx_setall_f64(u0);
                 m_2 = vx_setall_f64(fy);
                 m_3 = vx_setall_f64(v0);
-                x_0 = v_muladd(m_0 * r2_0, x_0, m_1);
-                y_0 = v_muladd(m_2 * r2_0, y_0, m_3);
-                x_1 = v_muladd(m_0 * r2_1, x_1, m_1);
-                y_1 = v_muladd(m_2 * r2_1, y_1, m_3);
+                x_0 = v_muladd(v_mul(m_0, r2_0), x_0, m_1);
+                y_0 = v_muladd(v_mul(m_2, r2_0), y_0, m_3);
+                x_1 = v_muladd(v_mul(m_0, r2_1), x_1, m_1);
+                y_1 = v_muladd(v_mul(m_2, r2_1), y_1, m_3);
 
                 if (m1type == CV_32FC1)
                 {
@@ -225,20 +225,20 @@ class initUndistortRectifyMapComputer : public ParallelLoopBody
                     v_float32 mf0, mf1;
                     v_zip(v_cvt_f32(x_0, x_1), v_cvt_f32(y_0, y_1), mf0, mf1);
                     v_store(&m1f[j * 2], mf0);
-                    v_store(&m1f[j * 2 + v_float32::nlanes], mf1);
+                    v_store(&m1f[j * 2 + VTraits<v_float32>::vlanes()], mf1);
                 }
                 else // m1type == CV_16SC2
                 {
                     m_0 = vx_setall_f64(INTER_TAB_SIZE);
-                    x_0 *= m_0; x_1 *= m_0; y_0 *= m_0; y_1 *= m_0;
+                    x_0 = v_mul(x_0, m_0); x_1 = v_mul(x_1, m_0); y_0 = v_mul(y_0, m_0); y_1 = v_mul(y_1, m_0);
 
                     v_int32 mask = vx_setall_s32(INTER_TAB_SIZE - 1);
                     v_int32 iu = v_round(x_0, x_1);
                     v_int32 iv = v_round(y_0, y_1);
 
-                    v_pack_u_store(&m2[j], (iu & mask) + (iv & mask) * vx_setall_s32(INTER_TAB_SIZE));
+                    v_pack_u_store(&m2[j], v_add(v_and(iu, mask), v_mul(v_and(iv, mask), vx_setall_s32(INTER_TAB_SIZE))));
                     v_int32 out0, out1;
-                    v_zip(iu >> INTER_BITS, iv >> INTER_BITS, out0, out1);
+                    v_zip(v_shr<INTER_BITS>(iu), v_shr<INTER_BITS>(iv), out0, out1);
                     v_store(&m1[j * 2], v_pack(out0, out1));
                 }
             }
@@ -302,10 +302,10 @@ class initUndistortRectifyMapComputer : public ParallelLoopBody
     double s2;
     double s3;
     double s4;
-#if CV_SIMD_64F
-    double s_x[2*v_float64::nlanes];
-    double s_y[2*v_float64::nlanes];
-    double s_w[2*v_float64::nlanes];
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+    double s_x[2*VTraits<v_float64>::max_nlanes];
+    double s_y[2*VTraits<v_float64>::max_nlanes];
+    double s_w[2*VTraits<v_float64>::max_nlanes];
 #endif
 };
 }
diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp
index 88a002145ae0..bf9a247054c1 100644
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@@ -972,6 +972,15 @@ namespace CV__SIMD_NAMESPACE {
     { \
         return a op b; \
     }
+    #define OPENCV_HAL_WRAP_EQ_OP(_Tpvec) \
+    inline _Tpvec v_eq(const _Tpvec& a, const _Tpvec& b) \
+    { \
+        return a == b; \
+    } \
+    inline _Tpvec v_ne(const _Tpvec& a, const _Tpvec& b) \
+    { \
+        return a != b; \
+    }
 
     #define OPENCV_HAL_WRAP_CMP(_Tpvec) \
     OPENCV_HAL_WRAP_CMP_OP(_Tpvec, eq, ==) \
@@ -984,11 +993,11 @@ namespace CV__SIMD_NAMESPACE {
     OPENCV_HAL_WRAP_CMP(v_uint8)
     OPENCV_HAL_WRAP_CMP(v_uint16)
     OPENCV_HAL_WRAP_CMP(v_uint32)
-    // OPENCV_HAL_WRAP_CMP(v_uint64)
+    OPENCV_HAL_WRAP_EQ_OP(v_uint64)
     OPENCV_HAL_WRAP_CMP(v_int8)
     OPENCV_HAL_WRAP_CMP(v_int16)
     OPENCV_HAL_WRAP_CMP(v_int32)
-    // OPENCV_HAL_WRAP_CMP(v_int64)
+    OPENCV_HAL_WRAP_EQ_OP(v_int64)
     OPENCV_HAL_WRAP_CMP(v_float32)
     #if CV_SIMD_64F
     OPENCV_HAL_WRAP_CMP(v_float64)
@@ -997,9 +1006,11 @@ namespace CV__SIMD_NAMESPACE {
         OPENCV_HAL_WRAP_CMP(v_uint8x16)
         OPENCV_HAL_WRAP_CMP(v_uint16x8)
         OPENCV_HAL_WRAP_CMP(v_uint32x4)
+        OPENCV_HAL_WRAP_EQ_OP(v_uint64x2)
         OPENCV_HAL_WRAP_CMP(v_int8x16)
         OPENCV_HAL_WRAP_CMP(v_int16x8)
         OPENCV_HAL_WRAP_CMP(v_int32x4)
+        OPENCV_HAL_WRAP_EQ_OP(v_int64x2)
         OPENCV_HAL_WRAP_CMP(v_float32x4)
         #if CV_SIMD_64F
         OPENCV_HAL_WRAP_CMP(v_float64x2)
@@ -1009,9 +1020,11 @@ namespace CV__SIMD_NAMESPACE {
         OPENCV_HAL_WRAP_CMP(v_uint8x32)
         OPENCV_HAL_WRAP_CMP(v_uint16x16)
         OPENCV_HAL_WRAP_CMP(v_uint32x8)
+        OPENCV_HAL_WRAP_EQ_OP(v_uint64x4)
         OPENCV_HAL_WRAP_CMP(v_int8x32)
         OPENCV_HAL_WRAP_CMP(v_int16x16)
         OPENCV_HAL_WRAP_CMP(v_int32x8)
+        OPENCV_HAL_WRAP_EQ_OP(v_int64x4)
         OPENCV_HAL_WRAP_CMP(v_float32x8)
         #if CV_SIMD_64F
         OPENCV_HAL_WRAP_CMP(v_float64x4)
diff --git a/modules/core/src/arithm.simd.hpp b/modules/core/src/arithm.simd.hpp
index 1c97e91fbe2e..2c26680b51cf 100644
--- a/modules/core/src/arithm.simd.hpp
+++ b/modules/core/src/arithm.simd.hpp
@@ -69,7 +69,7 @@
 #define DEFINE_SIMD_F32(fun, ...) \
     DEFINE_SIMD(__CV_CAT(fun, 32f), float, v_float32, __VA_ARGS__)
 
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     #define DEFINE_SIMD_F64(fun, ...) \
         DEFINE_SIMD(__CV_CAT(fun, 64f), double, v_float64, __VA_ARGS__)
 #else
@@ -104,7 +104,7 @@ namespace cv { namespace hal {
 
 #ifdef ARITHM_DEFINITIONS_ONLY
 
-#if !CV_SIMD_64F
+#if !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 typedef int v_float64; // dummy
 #endif
 
@@ -266,7 +266,7 @@ struct op_absdiff
 template<>
 struct op_absdiff<schar, v_int8>
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_int8 r(const v_int8& a, const v_int8& b)
     { return v_absdiffs(a, b); }
 #endif
@@ -276,7 +276,7 @@ struct op_absdiff<schar, v_int8>
 template<>
 struct op_absdiff<short, v_int16>
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_int16 r(const v_int16& a, const v_int16& b)
     { return v_absdiffs(a, b); }
 #endif
@@ -286,7 +286,7 @@ struct op_absdiff<short, v_int16>
 template<>
 struct op_absdiff<int, v_int32>
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_int32 r(const v_int32& a, const v_int32& b)
     { return v_reinterpret_as_s32(v_absdiff(a, b)); }
 #endif
@@ -331,7 +331,7 @@ struct op_not
 
 //////////////////////////// Loaders /////////////////////////////////
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 
 template< template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
 struct bin_loader
@@ -396,7 +396,7 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
 static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height)
 {
     typedef OP<T1, Tvec> op;
-#if CV_SIMD  || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef bin_loader<OP, T1, Tvec> ldr;
     const int wide_step = VTraits<Tvec>::vlanes();
     #if !CV_NEON && CV_SIMD_WIDTH == 16
@@ -414,7 +414,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
     {
         int x = 0;
 
-    #if CV_SIMD || CV_SIMD_SCALABLE
+    #if (CV_SIMD || CV_SIMD_SCALABLE)
         #if !CV_NEON && !CV_MSA
         if (is_aligned(src1, src2, dst))
         {
@@ -464,7 +464,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
     vx_cleanup();
 }
 
-#if !CV_SIMD_64F
+#if !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 template<template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
 static void bin_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height)
 {
@@ -496,7 +496,7 @@ static void bin_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t
 #define BIN_LOOP64F bin_loop_nosimd
 #else
 #define BIN_LOOP64F bin_loop
-#endif //!CV_SIMD_64F
+#endif //!(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 
 #endif // ARITHM_DEFINITIONS_ONLY
 
@@ -621,7 +621,7 @@ struct op_cmpne
 
 //////////////////////////// Loaders /////////////////////////////////
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 // todo: add support for RW alignment & stream
 template<int nload, template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
 struct cmp_loader_n
@@ -701,7 +701,7 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
 static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height)
 {
     typedef OP<T1, Tvec> op;
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef cmp_loader_n<sizeof(T1), OP, T1, Tvec> ldr;
     const int wide_step = VTraits<Tvec>::vlanes() * sizeof(T1);
 #endif // CV_SIMD
@@ -713,7 +713,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
     {
         int x = 0;
 
-    #if CV_SIMD || CV_SIMD_SCALABLE
+    #if (CV_SIMD || CV_SIMD_SCALABLE)
         for (; x <= width - wide_step; x += wide_step)
         {
             ldr::l(src1 + x, src2 + x, dst + x);
@@ -768,7 +768,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
     }
 }
 
-#if !CV_SIMD_64F
+#if !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 template< template<typename T1, typename Tvec> class OP, typename T1>
 static void cmp_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height)
 {
@@ -822,7 +822,7 @@ static void cmp_loop_nosimd(const double* src1, size_t step1, const double* src2
         break;
     }
 }
-#endif // !CV_SIMD_64F
+#endif // !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 
 #endif // ARITHM_DEFINITIONS_ONLY
 
@@ -880,7 +880,7 @@ DEFINE_SIMD_ALL(cmp)
 
 //////////////////////////// Loaders ///////////////////////////////
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 // todo: add support for RW alignment & stream
 template<int nload, template<typename T1, typename T2, typename Tvec> class OP, typename T1, typename T2, typename Tvec>
 struct scalar_loader_n
@@ -1099,16 +1099,16 @@ struct scalar_loader_n<sizeof(float), OP, float, T2, v_float32>
 };
 #endif // CV_SIMD
 
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 template<template<typename T1, typename T2, typename Tvec> class OP>
 struct scalar_loader_n<sizeof(int), OP, int, double, v_int32>
 {
     typedef OP<int, float, v_int32> op;
     typedef OP<double, double, v_float64> op64;
-    enum {step = v_int32::nlanes};
 
     static inline void l(const int* src1, const int* src2, const double* scalar, int* dst)
     {
+        const int step = VTraits<v_int32>::vlanes();
         v_int32 v_src1 = vx_load(src1);
         v_int32 v_src2 = vx_load(src2);
         v_int32 v_src1s = vx_load(src1 + step);
@@ -1125,6 +1125,7 @@ struct scalar_loader_n<sizeof(int), OP, int, double, v_int32>
     }
     static inline void l(const int* src1, const double* scalar, int* dst)
     {
+        const int step = VTraits<v_int32>::vlanes();
         v_int32 v_src1 = vx_load(src1);
         v_int32 v_src1s = vx_load(src1 + step);
 
@@ -1169,10 +1170,10 @@ struct scalar_loader_n<sizeof(float), OP, float, double, v_float32>
 {
     typedef OP<float, float, v_float32> op;
     typedef OP<double, double, v_float64> op64;
-    enum {step = v_float32::nlanes};
 
     static inline void l(const float* src1, const float* src2, const double* scalar, float* dst)
     {
+        const int step = VTraits<v_float32>::vlanes();
         v_float32 v_src1 = vx_load(src1);
         v_float32 v_src2 = vx_load(src2);
         v_float32 v_src1s = vx_load(src1 + step);
@@ -1186,6 +1187,7 @@ struct scalar_loader_n<sizeof(float), OP, float, double, v_float32>
     }
     static inline void l(const float* src1, const double* scalar, float* dst)
     {
+        const int step = VTraits<v_float32>::vlanes();
         v_float32 v_src1 = vx_load(src1);
         v_float32 v_src1s = vx_load(src1 + step);
 
@@ -1226,10 +1228,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP>
 struct scalar_loader_n<sizeof(double), OP, double, double, v_float64>
 {
     typedef OP<double, double, v_float64> op;
-    enum {step = v_float64::nlanes};
 
     static inline void l(const double* src1, const double* src2, const double* scalar, double* dst)
     {
+        const int step = VTraits<v_float64>::vlanes();
         v_float64 v_src1 = vx_load(src1);
         v_float64 v_src2 = vx_load(src2);
         v_float64 v_src1s = vx_load(src1 + step);
@@ -1243,6 +1245,7 @@ struct scalar_loader_n<sizeof(double), OP, double, double, v_float64>
     }
     static inline void l(const double* src1, const double* scalar, double* dst)
     {
+        const int step = VTraits<v_float64>::vlanes();
         v_float64 v_src1 = vx_load(src1);
         v_float64 v_src1s = vx_load(src1 + step);
 
@@ -1253,7 +1256,7 @@ struct scalar_loader_n<sizeof(double), OP, double, double, v_float64>
         v_store(dst + step, r1);
     }
 };
-#endif // CV_SIMD_64F
+#endif // (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 
 //////////////////////////// Loops /////////////////////////////////
 
@@ -1263,7 +1266,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
                  T1* dst, size_t step, int width, int height, const T2* scalar)
 {
     typedef OP<T1, T2, Tvec> op;
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
     const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
                           sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
@@ -1277,7 +1280,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
     {
         int x = 0;
 
-    #if CV_SIMD || CV_SIMD_SCALABLE
+    #if (CV_SIMD || CV_SIMD_SCALABLE)
         for (; x <= width - wide_step; x += wide_step)
         {
             ldr::l(src1 + x, src2 + x, scalar, dst + x);
@@ -1309,7 +1312,7 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T1
 static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int width, int height, const T2* scalar)
 {
     typedef OP<T1, T2, Tvec> op;
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
     const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
                           sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
@@ -1322,7 +1325,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int
     {
         int x = 0;
 
-    #if CV_SIMD || CV_SIMD_SCALABLE
+    #if (CV_SIMD || CV_SIMD_SCALABLE)
         for (; x <= width - wide_step; x += wide_step)
         {
             ldr::l(src1 + x, scalar, dst + x);
@@ -1349,7 +1352,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int
     vx_cleanup();
 }
 
-#if !CV_SIMD_64F
+#if !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 // dual source
 template<template<typename T1, typename T2, typename Tvec> class OP, typename T1, typename T2, typename Tvec>
 static void scalar_loop_nosimd(const T1* src1, size_t step1, const T1* src2, size_t step2,
@@ -1413,7 +1416,7 @@ static void scalar_loop_nosimd(const T1* src1, size_t step1, T1* dst, size_t ste
 #define SCALAR_LOOP64F scalar_loop_nosimd
 #else
 #define SCALAR_LOOP64F scalar_loop
-#endif // !CV_SIMD_64F
+#endif // !(CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 
 #endif // ARITHM_DEFINITIONS_ONLY
 
@@ -1437,7 +1440,7 @@ struct op_mul
 template<typename T1, typename T2, typename Tvec>
 struct op_mul_scale
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
@@ -1453,7 +1456,7 @@ struct op_mul_scale
 template<>
 struct op_mul_scale<double, double, v_float64>
 {
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
     {
         const v_float64 v_scalar = vx_setall_f64(*scalar);
@@ -1578,7 +1581,7 @@ struct op_div_f
 template<typename T1, typename T2, typename Tvec>
 struct op_div_scale
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
@@ -1600,7 +1603,7 @@ struct op_div_scale
 template<>
 struct op_div_scale<float, float, v_float32>
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_float32 r(const v_float32& a, const v_float32& b, const float* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
@@ -1614,7 +1617,7 @@ struct op_div_scale<float, float, v_float32>
 template<>
 struct op_div_scale<double, double, v_float64>
 {
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
     {
         const v_float64 v_scalar = vx_setall_f64(*scalar);
@@ -1686,7 +1689,7 @@ DEFINE_SIMD_ALL(div, div_loop)
 template<typename T1, typename T2, typename Tvec>
 struct op_add_scale
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
     {
         const v_float32 v_alpha = vx_setall_f32(*scalar);
@@ -1702,7 +1705,7 @@ struct op_add_scale
 template<>
 struct op_add_scale<double, double, v_float64>
 {
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
     {
         const v_float64 v_alpha = vx_setall_f64(*scalar);
@@ -1719,7 +1722,7 @@ struct op_add_scale<double, double, v_float64>
 template<typename T1, typename T2, typename Tvec>
 struct op_add_weighted
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalars)
     {
         const v_float32 v_alpha = vx_setall_f32(scalars[0]);
@@ -1737,7 +1740,7 @@ struct op_add_weighted
 template<>
 struct op_add_weighted<double, double, v_float64>
 {
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalars)
     {
         const v_float64 v_alpha = vx_setall_f64(scalars[0]);
@@ -1836,7 +1839,7 @@ DEFINE_SIMD_F64(addWeighted, add_weighted_loop_d)
 template<typename T1, typename T2, typename Tvec>
 struct op_recip
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_float32 r(const v_float32& a, const T2* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
@@ -1858,7 +1861,7 @@ struct op_recip
 template<>
 struct op_recip<float, float, v_float32>
 {
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     static inline v_float32 r(const v_float32& a, const float* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
@@ -1872,7 +1875,7 @@ struct op_recip<float, float, v_float32>
 template<>
 struct op_recip<double, double, v_float64>
 {
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     static inline v_float64 r(const v_float64& a, const double* scalar)
     {
         const v_float64 v_scalar = vx_setall_f64(*scalar);
diff --git a/modules/core/src/has_non_zero.simd.hpp b/modules/core/src/has_non_zero.simd.hpp
index 6ea8bcd7d2d1..e9f9b683d695 100644
--- a/modules/core/src/has_non_zero.simd.hpp
+++ b/modules/core/src/has_non_zero.simd.hpp
@@ -87,11 +87,11 @@ static bool hasNonZero8u( const uchar* src, size_t len )
 {
     bool res = false;
     const uchar* srcEnd = src+len;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef v_uint8 v_type;
     const v_type v_zero = vx_setzero_u8();
     constexpr const int unrollCount = 2;
-    int step = v_type::nlanes * unrollCount;
+    int step = VTraits<v_type>::vlanes() * unrollCount;
     int len0 = len & -step;
     const uchar* srcSimdEnd = src+len0;
 
@@ -99,10 +99,10 @@ static bool hasNonZero8u( const uchar* src, size_t len )
     while(!res && countSIMD--)
     {
         v_type v0 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v1 = vx_load(src);
-        src += v_type::nlanes;
-        res = v_check_any(((v0 | v1) != v_zero));
+        src += VTraits<v_type>::vlanes();
+        res = v_check_any((v_ne(v_or(v0, v1), v_zero)));
     }
 
     v_cleanup();
@@ -114,11 +114,11 @@ static bool hasNonZero16u( const ushort* src, size_t len )
 {
     bool res = false;
     const ushort* srcEnd = src+len;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef v_uint16 v_type;
     const v_type v_zero = vx_setzero_u16();
     constexpr const int unrollCount = 4;
-    int step = v_type::nlanes * unrollCount;
+    int step = VTraits<v_type>::vlanes() * unrollCount;
     int len0 = len & -step;
     const ushort* srcSimdEnd = src+len0;
 
@@ -126,16 +126,16 @@ static bool hasNonZero16u( const ushort* src, size_t len )
     while(!res && countSIMD--)
     {
         v_type v0 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v1 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v2 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v3 = vx_load(src);
-        src += v_type::nlanes;
-        v0 |= v1;
-        v2 |= v3;
-        res = v_check_any(((v0 | v2) != v_zero));
+        src += VTraits<v_type>::vlanes();
+        v0 = v_or(v0, v1);
+        v2 = v_or(v2, v3);
+        res = v_check_any((v_ne(v_or(v0, v2), v_zero)));
     }
 
     v_cleanup();
@@ -147,11 +147,11 @@ static bool hasNonZero32s( const int* src, size_t len )
 {
     bool res = false;
     const int* srcEnd = src+len;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef v_int32 v_type;
     const v_type v_zero = vx_setzero_s32();
     constexpr const int unrollCount = 8;
-    int step = v_type::nlanes * unrollCount;
+    int step = VTraits<v_type>::vlanes() * unrollCount;
     int len0 = len & -step;
     const int* srcSimdEnd = src+len0;
 
@@ -159,29 +159,29 @@ static bool hasNonZero32s( const int* src, size_t len )
     while(!res && countSIMD--)
     {
         v_type v0 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v1 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v2 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v3 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v4 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v5 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v6 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v7 = vx_load(src);
-        src += v_type::nlanes;
-        v0 |= v1;
-        v2 |= v3;
-        v4 |= v5;
-        v6 |= v7;
-
-        v0 |= v2;
-        v4 |= v6;
-        res = v_check_any(((v0 | v4) != v_zero));
+        src += VTraits<v_type>::vlanes();
+        v0 = v_or(v0, v1);
+        v2 = v_or(v2, v3);
+        v4 = v_or(v4, v5);
+        v6 = v_or(v6, v7);
+
+        v0 = v_or(v0, v2);
+        v4 = v_or(v4, v6);
+        res = v_check_any((v_ne(v_or(v0, v4), v_zero)));
     }
 
     v_cleanup();
@@ -193,11 +193,11 @@ static bool hasNonZero32f( const float* src, size_t len )
 {
     bool res = false;
     const float* srcEnd = src+len;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     typedef v_float32 v_type;
     const v_type v_zero = vx_setzero_f32();
     constexpr const int unrollCount = 8;
-    int step = v_type::nlanes * unrollCount;
+    int step = VTraits<v_type>::vlanes() * unrollCount;
     int len0 = len & -step;
     const float* srcSimdEnd = src+len0;
 
@@ -205,30 +205,30 @@ static bool hasNonZero32f( const float* src, size_t len )
     while(!res && countSIMD--)
     {
         v_type v0 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v1 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v2 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v3 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v4 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v5 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v6 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v7 = vx_load(src);
-        src += v_type::nlanes;
-        v0 |= v1;
-        v2 |= v3;
-        v4 |= v5;
-        v6 |= v7;
-
-        v0 |= v2;
-        v4 |= v6;
+        src += VTraits<v_type>::vlanes();
+        v0 = v_or(v0, v1);
+        v2 = v_or(v2, v3);
+        v4 = v_or(v4, v5);
+        v6 = v_or(v6, v7);
+
+        v0 = v_or(v0, v2);
+        v4 = v_or(v4, v6);
         //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
-        res = !v_check_all(((v0 | v4) == v_zero));
+        res = !v_check_all((v_eq(v_or(v0, v4), v_zero)));
     }
 
     v_cleanup();
@@ -240,11 +240,11 @@ static bool hasNonZero64f( const double* src, size_t len )
 {
     bool res = false;
     const double* srcEnd = src+len;
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     typedef v_float64 v_type;
     const v_type v_zero = vx_setzero_f64();
     constexpr const int unrollCount = 16;
-    int step = v_type::nlanes * unrollCount;
+    int step = VTraits<v_type>::vlanes() * unrollCount;
     int len0 = len & -step;
     const double* srcSimdEnd = src+len0;
 
@@ -252,55 +252,55 @@ static bool hasNonZero64f( const double* src, size_t len )
     while(!res && countSIMD--)
     {
         v_type v0 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v1 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v2 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v3 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v4 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v5 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v6 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v7 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v8 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v9 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v10 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v11 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v12 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v13 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v14 = vx_load(src);
-        src += v_type::nlanes;
+        src += VTraits<v_type>::vlanes();
         v_type v15 = vx_load(src);
-        src += v_type::nlanes;
-        v0  |= v1;
-        v2  |= v3;
-        v4  |= v5;
-        v6  |= v7;
-        v8  |= v9;
-        v10 |= v11;
-        v12 |= v13;
-        v14 |= v15;
-
-        v0  |= v2;
-        v4  |= v6;
-        v8  |= v10;
-        v12 |= v14;
-
-        v0  |= v4;
-        v8  |= v12;
+        src += VTraits<v_type>::vlanes();
+        v0 = v_or(v0, v1);
+        v2 = v_or(v2, v3);
+        v4 = v_or(v4, v5);
+        v6 = v_or(v6, v7);
+        v8 = v_or(v8, v9);
+        v10 = v_or(v10, v11);
+        v12 = v_or(v12, v13);
+        v14 = v_or(v14, v15);
+
+        v0 = v_or(v0, v2);
+        v4 = v_or(v4, v6);
+        v8 = v_or(v8, v10);
+        v12 = v_or(v12, v14);
+
+        v0 = v_or(v0, v4);
+        v8 = v_or(v8, v12);
         //res = v_check_any(((v0 | v8) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
-        res = !v_check_all(((v0 | v8) == v_zero));
+        res = !v_check_all((v_eq(v_or(v0, v8), v_zero)));
     }
 
     v_cleanup();
diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp
index 43c6d07d5897..12376c250841 100644
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -276,7 +276,7 @@ template<typename T> struct VBLAS
     int givens(T*, T*, int, T, T) const { return 0; }
 };
 
-#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE_64F
+#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE, GCC 13 related
 template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
 {
     if( n < 2*VTraits<v_float32>::vlanes() )
diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp
index 62aacc0d63f5..3a9dbd9be800 100644
--- a/modules/core/src/matmul.simd.hpp
+++ b/modules/core/src/matmul.simd.hpp
@@ -2549,6 +2549,7 @@ double dotProd_16s(const short* src1, const short* src2, int len)
 double dotProd_32s(const int* src1, const int* src2, int len)
 {
 #if CV_SIMD_64F // TODO: enable for CV_SIMD_SCALABLE_64F
+// Test failed on RVV(QEMU): Too big difference (=1.20209e-08 > 1.11022e-12)
     double r = .0;
     int i = 0;
     const int step  = VTraits<v_int32>::vlanes();
diff --git a/modules/core/src/mean.simd.hpp b/modules/core/src/mean.simd.hpp
index bb815adc1ccd..e34293ee385d 100644
--- a/modules/core/src/mean.simd.hpp
+++ b/modules/core/src/mean.simd.hpp
@@ -24,7 +24,7 @@ struct SumSqr_SIMD
     }
 };
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 
 template <>
 struct SumSqr_SIMD<uchar, int, int>
diff --git a/modules/dnn/src/int8layers/convolution_layer.cpp b/modules/dnn/src/int8layers/convolution_layer.cpp
index 0b3c5480a071..60301a406c57 100644
--- a/modules/dnn/src/int8layers/convolution_layer.cpp
+++ b/modules/dnn/src/int8layers/convolution_layer.cpp
@@ -19,7 +19,7 @@ namespace cv
 namespace dnn
 {
 
-#if CV_SIMD
+#if CV_SIMD128
 static inline void v_expand_mul_add(const v_int8x16& a, const v_int8x16& b,
                                     v_int32x4& out0, v_int32x4& out1, v_int32x4& out2, v_int32x4& out3)
 {
@@ -1015,7 +1015,7 @@ class ConvolutionLayerInt8Impl CV_FINAL : public BaseConvolutionLayerInt8Impl
                                         outptr[0] = std::min(std::max(out1, -128), 127);
                                         out_j = 1;
                                     }
-                                #if CV_SIMD
+                                #if CV_SIMD128
                                     if( stride_w == 1 )
                                     {
                                         const int out_delta = 16;
diff --git a/modules/dnn/src/int8layers/fully_connected_layer.cpp b/modules/dnn/src/int8layers/fully_connected_layer.cpp
index 826ed12ad955..b8e3bd6ee542 100644
--- a/modules/dnn/src/int8layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/int8layers/fully_connected_layer.cpp
@@ -305,7 +305,7 @@ class FullyConnectedLayerInt8Impl CV_FINAL : public InnerProductLayerInt8
             #endif
                 {
                     int i = 0;
-            #if CV_SIMD
+            #if CV_SIMD128
                     for( ; i  <= nw - 4; i += 4, wptr += 4*wstep )
                     {
                         v_int32x4 vs0 = v_setzero_s32(), vs1 = v_setzero_s32(),
diff --git a/modules/imgproc/src/accum.simd.hpp b/modules/imgproc/src/accum.simd.hpp
index 7fe7aabeaf82..13363026135d 100644
--- a/modules/imgproc/src/accum.simd.hpp
+++ b/modules/imgproc/src/accum.simd.hpp
@@ -475,9 +475,9 @@ void acc_simd_(const ushort* src, float* dst, const uchar* mask, int len, int cn
 void acc_simd_(const float* src, float* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD
-    const int cVectorWidth = v_uint16::nlanes;
-    const int step = v_float32::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    const int cVectorWidth = VTraits<v_uint16>::vlanes();
+    const int step = VTraits<v_float32>::vlanes();
 
     if (!mask)
     {
@@ -493,8 +493,8 @@ void acc_simd_(const float* src, float* dst, const uchar* mask, int len, int cn)
         #else
         for (; x <= size - cVectorWidth; x += cVectorWidth)
         {
-            v_store(dst + x, vx_load(dst + x) + vx_load(src + x));
-            v_store(dst + x + step, vx_load(dst + x + step) + vx_load(src + x + step));
+            v_store(dst + x, v_add(vx_load(dst + x), vx_load(src + x)));
+            v_store(dst + x + step, v_add(vx_load(dst + x + step), vx_load(src + x + step)));
         }
         #endif // CV_AVX && !CV_AVX2
     }
@@ -508,11 +508,11 @@ void acc_simd_(const float* src, float* dst, const uchar* mask, int len, int cn)
                 v_uint16 v_masku16 = vx_load_expand(mask + x);
                 v_uint32 v_masku320, v_masku321;
                 v_expand(v_masku16, v_masku320, v_masku321);
-                v_float32 v_mask0 = v_reinterpret_as_f32(~(v_masku320 == v_reinterpret_as_u32(v_0)));
-                v_float32 v_mask1 = v_reinterpret_as_f32(~(v_masku321 == v_reinterpret_as_u32(v_0)));
+                v_float32 v_mask0 = v_reinterpret_as_f32(v_not(v_eq(v_masku320, v_reinterpret_as_u32(v_0))));
+                v_float32 v_mask1 = v_reinterpret_as_f32(v_not(v_eq(v_masku321, v_reinterpret_as_u32(v_0))));
 
-                v_store(dst + x, vx_load(dst + x) + (vx_load(src + x) & v_mask0));
-                v_store(dst + x + step, vx_load(dst + x + step) + (vx_load(src + x + step) & v_mask1));
+                v_store(dst + x, v_add(vx_load(dst + x), v_and(vx_load(src + x), v_mask0)));
+                v_store(dst + x + step, v_add(vx_load(dst + x + step), v_and(vx_load(src + x + step), v_mask1)));
             }
         }
         else if (cn == 3)
@@ -522,25 +522,25 @@ void acc_simd_(const float* src, float* dst, const uchar* mask, int len, int cn)
                 v_uint16 v_masku16 = vx_load_expand(mask + x);
                 v_uint32 v_masku320, v_masku321;
                 v_expand(v_masku16, v_masku320, v_masku321);
-                v_float32 v_mask0 = v_reinterpret_as_f32(~(v_masku320 == v_reinterpret_as_u32(v_0)));
-                v_float32 v_mask1 = v_reinterpret_as_f32(~(v_masku321 == v_reinterpret_as_u32(v_0)));
+                v_float32 v_mask0 = v_reinterpret_as_f32(v_not(v_eq(v_masku320, v_reinterpret_as_u32(v_0))));
+                v_float32 v_mask1 = v_reinterpret_as_f32(v_not(v_eq(v_masku321, v_reinterpret_as_u32(v_0))));
 
                 v_float32 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
                 v_load_deinterleave(src + x * cn, v_src00, v_src10, v_src20);
                 v_load_deinterleave(src + (x + step) * cn, v_src01, v_src11, v_src21);
-                v_src00 = v_src00 & v_mask0;
-                v_src01 = v_src01 & v_mask1;
-                v_src10 = v_src10 & v_mask0;
-                v_src11 = v_src11 & v_mask1;
-                v_src20 = v_src20 & v_mask0;
-                v_src21 = v_src21 & v_mask1;
+                v_src00 = v_and(v_src00, v_mask0);
+                v_src01 = v_and(v_src01, v_mask1);
+                v_src10 = v_and(v_src10, v_mask0);
+                v_src11 = v_and(v_src11, v_mask1);
+                v_src20 = v_and(v_src20, v_mask0);
+                v_src21 = v_and(v_src21, v_mask1);
 
                 v_float32 v_dst00, v_dst01, v_dst10, v_dst11, v_dst20, v_dst21;
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
                 v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21);
 
-                v_store_interleave(dst + x * cn, v_dst00 + v_src00, v_dst10 + v_src10, v_dst20 + v_src20);
-                v_store_interleave(dst + (x + step) * cn, v_dst01 + v_src01, v_dst11 + v_src11, v_dst21 + v_src21);
+                v_store_interleave(dst + x * cn, v_add(v_dst00, v_src00), v_add(v_dst10, v_src10), v_add(v_dst20, v_src20));
+                v_store_interleave(dst + (x + step) * cn, v_add(v_dst01, v_src01), v_add(v_dst11, v_src11), v_add(v_dst21, v_src21));
             }
         }
     }
@@ -862,9 +862,9 @@ void acc_simd_(const ushort* src, double* dst, const uchar* mask, int len, int c
 void acc_simd_(const float* src, double* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD_64F
-    const int cVectorWidth = v_float32::nlanes;
-    const int step = v_float64::nlanes;
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+    const int cVectorWidth = VTraits<v_float32>::vlanes();
+    const int step = VTraits<v_float64>::vlanes();
 
     if (!mask)
     {
@@ -889,8 +889,8 @@ void acc_simd_(const float* src, double* dst, const uchar* mask, int len, int cn
             v_float64 v_src0 = v_cvt_f64(v_src);
             v_float64 v_src1 = v_cvt_f64_high(v_src);
 
-            v_store(dst + x, vx_load(dst + x) + v_src0);
-            v_store(dst + x + step, vx_load(dst + x + step) + v_src1);
+            v_store(dst + x, v_add(vx_load(dst + x), v_src0));
+            v_store(dst + x + step, v_add(vx_load(dst + x + step), v_src1));
         }
         #endif // CV_AVX && !CV_AVX2
     }
@@ -904,15 +904,15 @@ void acc_simd_(const float* src, double* dst, const uchar* mask, int len, int cn
                 v_uint32 v_masku32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_masku32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
 
                 v_float32 v_src = vx_load(src + x);
-                v_float64 v_src0 = v_cvt_f64(v_src) & v_mask0;
-                v_float64 v_src1 = v_cvt_f64_high(v_src) & v_mask1;
+                v_float64 v_src0 = v_and(v_cvt_f64(v_src), v_mask0);
+                v_float64 v_src1 = v_and(v_cvt_f64_high(v_src), v_mask1);
 
-                v_store(dst + x, vx_load(dst + x) + v_src0);
-                v_store(dst + x + step, vx_load(dst + x + step) + v_src1);
+                v_store(dst + x, v_add(vx_load(dst + x), v_src0));
+                v_store(dst + x + step, v_add(vx_load(dst + x + step), v_src1));
             }
         }
         else if (cn == 3)
@@ -922,24 +922,24 @@ void acc_simd_(const float* src, double* dst, const uchar* mask, int len, int cn
                 v_uint32 v_masku32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_masku32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
 
                 v_float32 v_src0, v_src1, v_src2;
                 v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2);
-                v_float64 v_src00 = v_cvt_f64(v_src0) & v_mask0;
-                v_float64 v_src01 = v_cvt_f64_high(v_src0) & v_mask1;
-                v_float64 v_src10 = v_cvt_f64(v_src1) & v_mask0;
-                v_float64 v_src11 = v_cvt_f64_high(v_src1) & v_mask1;
-                v_float64 v_src20 = v_cvt_f64(v_src2) & v_mask0;
-                v_float64 v_src21 = v_cvt_f64_high(v_src2) & v_mask1;
+                v_float64 v_src00 = v_and(v_cvt_f64(v_src0), v_mask0);
+                v_float64 v_src01 = v_and(v_cvt_f64_high(v_src0), v_mask1);
+                v_float64 v_src10 = v_and(v_cvt_f64(v_src1), v_mask0);
+                v_float64 v_src11 = v_and(v_cvt_f64_high(v_src1), v_mask1);
+                v_float64 v_src20 = v_and(v_cvt_f64(v_src2), v_mask0);
+                v_float64 v_src21 = v_and(v_cvt_f64_high(v_src2), v_mask1);
 
                 v_float64 v_dst00, v_dst01, v_dst10, v_dst11, v_dst20, v_dst21;
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
                 v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21);
 
-                v_store_interleave(dst + x * cn, v_dst00 + v_src00, v_dst10 + v_src10, v_dst20 + v_src20);
-                v_store_interleave(dst + (x + step) * cn, v_dst01 + v_src01, v_dst11 + v_src11, v_dst21 + v_src21);
+                v_store_interleave(dst + x * cn, v_add(v_dst00, v_src00), v_add(v_dst10, v_src10), v_add(v_dst20, v_src20));
+                v_store_interleave(dst + (x + step) * cn, v_add(v_dst01, v_src01), v_add(v_dst11, v_src11), v_add(v_dst21, v_src21));
             }
         }
     }
@@ -950,9 +950,9 @@ void acc_simd_(const float* src, double* dst, const uchar* mask, int len, int cn
 void acc_simd_(const double* src, double* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD_64F
-    const int cVectorWidth = v_float64::nlanes * 2;
-    const int step = v_float64::nlanes;
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+    const int cVectorWidth = VTraits<v_float64>::vlanes() * 2;
+    const int step = VTraits<v_float64>::vlanes();
 
     if (!mask)
     {
@@ -971,8 +971,8 @@ void acc_simd_(const double* src, double* dst, const uchar* mask, int len, int c
             v_float64 v_src0 = vx_load(src + x);
             v_float64 v_src1 = vx_load(src + x + step);
 
-            v_store(dst + x, vx_load(dst + x) + v_src0);
-            v_store(dst + x + step, vx_load(dst + x + step) + v_src1);
+            v_store(dst + x, v_add(vx_load(dst + x), v_src0));
+            v_store(dst + x + step, v_add(vx_load(dst + x + step), v_src1));
         }
         #endif // CV_AVX && !CV_AVX2
     }
@@ -986,14 +986,14 @@ void acc_simd_(const double* src, double* dst, const uchar* mask, int len, int c
                 v_uint32 v_masku32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_masku32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
 
                 v_float64 v_src0 = vx_load(src + x);
                 v_float64 v_src1 = vx_load(src + x + step);
 
-                v_store(dst + x, vx_load(dst + x) + (v_src0 & v_mask0));
-                v_store(dst + x + step, vx_load(dst + x + step) + (v_src1 & v_mask1));
+                v_store(dst + x, v_add(vx_load(dst + x), v_and(v_src0, v_mask0)));
+                v_store(dst + x + step, v_add(vx_load(dst + x + step), v_and(v_src1, v_mask1)));
             }
         }
         else if (cn == 3)
@@ -1003,25 +1003,25 @@ void acc_simd_(const double* src, double* dst, const uchar* mask, int len, int c
                 v_uint32 v_masku32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_masku32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
 
                 v_float64 v_src00, v_src10, v_src20, v_src01, v_src11, v_src21;
                 v_load_deinterleave(src + x * cn, v_src00, v_src10, v_src20);
                 v_load_deinterleave(src + (x + step) * cn, v_src01, v_src11, v_src21);
-                v_src00 = v_src00 & v_mask0;
-                v_src01 = v_src01 & v_mask1;
-                v_src10 = v_src10 & v_mask0;
-                v_src11 = v_src11 & v_mask1;
-                v_src20 = v_src20 & v_mask0;
-                v_src21 = v_src21 & v_mask1;
+                v_src00 = v_and(v_src00, v_mask0);
+                v_src01 = v_and(v_src01, v_mask1);
+                v_src10 = v_and(v_src10, v_mask0);
+                v_src11 = v_and(v_src11, v_mask1);
+                v_src20 = v_and(v_src20, v_mask0);
+                v_src21 = v_and(v_src21, v_mask1);
 
                 v_float64 v_dst00, v_dst10, v_dst20, v_dst01, v_dst11, v_dst21;
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
                 v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21);
 
-                v_store_interleave(dst + x * cn, v_dst00 + v_src00, v_dst10 + v_src10, v_dst20 + v_src20);
-                v_store_interleave(dst + (x + step) * cn, v_dst01 + v_src01, v_dst11 + v_src11, v_dst21 + v_src21);
+                v_store_interleave(dst + x * cn, v_add(v_dst00, v_src00), v_add(v_dst10, v_src10), v_add(v_dst20, v_src20));
+                v_store_interleave(dst + (x + step) * cn, v_add(v_dst01, v_src01), v_add(v_dst11, v_src11), v_add(v_dst21, v_src21));
             }
         }
     }
@@ -1256,9 +1256,9 @@ void accSqr_simd_(const ushort* src, float* dst, const uchar* mask, int len, int
 void accSqr_simd_(const float* src, float* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD
-    const int cVectorWidth = v_uint16::nlanes;
-    const int step = v_float32::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    const int cVectorWidth = VTraits<v_uint16>::vlanes();
+    const int step = VTraits<v_float32>::vlanes();
 
     if (!mask)
     {
@@ -1293,12 +1293,12 @@ void accSqr_simd_(const float* src, float* dst, const uchar* mask, int len, int
                 v_uint16 v_mask16 = vx_load_expand(mask + x);
                 v_uint32 v_mask_0, v_mask_1;
                 v_expand(v_mask16, v_mask_0, v_mask_1);
-                v_float32 v_mask0 = v_reinterpret_as_f32(~(v_mask_0 == v_0));
-                v_float32 v_mask1 = v_reinterpret_as_f32(~(v_mask_1 == v_0));
+                v_float32 v_mask0 = v_reinterpret_as_f32(v_not(v_eq(v_mask_0, v_0)));
+                v_float32 v_mask1 = v_reinterpret_as_f32(v_not(v_eq(v_mask_1, v_0)));
                 v_float32 v_src0 = vx_load(src + x);
                 v_float32 v_src1 = vx_load(src + x + step);
-                v_src0 = v_src0 & v_mask0;
-                v_src1 = v_src1 & v_mask1;
+                v_src0 = v_and(v_src0, v_mask0);
+                v_src1 = v_and(v_src1, v_mask1);
 
                 v_store(dst + x, v_fma(v_src0, v_src0, vx_load(dst + x)));
                 v_store(dst + x + step, v_fma(v_src1, v_src1, vx_load(dst + x + step)));
@@ -1311,18 +1311,18 @@ void accSqr_simd_(const float* src, float* dst, const uchar* mask, int len, int
                 v_uint16 v_mask16 = vx_load_expand(mask + x);
                 v_uint32 v_mask_0, v_mask_1;
                 v_expand(v_mask16, v_mask_0, v_mask_1);
-                v_float32 v_mask0 = v_reinterpret_as_f32(~(v_mask_0 == v_0));
-                v_float32 v_mask1 = v_reinterpret_as_f32(~(v_mask_1 == v_0));
+                v_float32 v_mask0 = v_reinterpret_as_f32(v_not(v_eq(v_mask_0, v_0)));
+                v_float32 v_mask1 = v_reinterpret_as_f32(v_not(v_eq(v_mask_1, v_0)));
 
                 v_float32 v_src00, v_src10, v_src20, v_src01, v_src11, v_src21;
                 v_load_deinterleave(src + x * cn, v_src00, v_src10, v_src20);
                 v_load_deinterleave(src + (x + step) * cn, v_src01, v_src11, v_src21);
-                v_src00 = v_src00 & v_mask0;
-                v_src01 = v_src01 & v_mask1;
-                v_src10 = v_src10 & v_mask0;
-                v_src11 = v_src11 & v_mask1;
-                v_src20 = v_src20 & v_mask0;
-                v_src21 = v_src21 & v_mask1;
+                v_src00 = v_and(v_src00, v_mask0);
+                v_src01 = v_and(v_src01, v_mask1);
+                v_src10 = v_and(v_src10, v_mask0);
+                v_src11 = v_and(v_src11, v_mask1);
+                v_src20 = v_and(v_src20, v_mask0);
+                v_src21 = v_and(v_src21, v_mask1);
 
                 v_float32 v_dst00, v_dst10, v_dst20, v_dst01, v_dst11, v_dst21;
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
@@ -1625,9 +1625,9 @@ void accSqr_simd_(const ushort* src, double* dst, const uchar* mask, int len, in
 void accSqr_simd_(const float* src, double* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD_64F
-    const int cVectorWidth = v_float32::nlanes;
-    const int step = v_float64::nlanes;
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+    const int cVectorWidth = VTraits<v_float32>::vlanes();
+    const int step = VTraits<v_float64>::vlanes();
 
     if (!mask)
     {
@@ -1667,9 +1667,9 @@ void accSqr_simd_(const float* src, double* dst, const uchar* mask, int len, int
             for (; x <= len - cVectorWidth; x += cVectorWidth)
             {
                 v_uint32 v_mask = vx_load_expand_q(mask + x);;
-                v_mask = ~(v_mask == v_0);
+                v_mask = v_not(v_eq(v_mask, v_0));
                 v_float32 v_src = vx_load(src + x);
-                v_src = v_src & v_reinterpret_as_f32(v_mask);
+                v_src = v_and(v_src, v_reinterpret_as_f32(v_mask));
                 v_float64 v_src0 = v_cvt_f64(v_src);
                 v_float64 v_src1 = v_cvt_f64_high(v_src);
 
@@ -1682,13 +1682,13 @@ void accSqr_simd_(const float* src, double* dst, const uchar* mask, int len, int
             for (; x <= len - cVectorWidth; x += cVectorWidth)
             {
                 v_uint32 v_mask = vx_load_expand_q(mask + x);
-                v_mask = ~(v_mask == v_0);
+                v_mask = v_not(v_eq(v_mask, v_0));
 
                 v_float32 v_src0, v_src1, v_src2;
                 v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2);
-                v_src0 = v_src0 & v_reinterpret_as_f32(v_mask);
-                v_src1 = v_src1 & v_reinterpret_as_f32(v_mask);
-                v_src2 = v_src2 & v_reinterpret_as_f32(v_mask);
+                v_src0 = v_and(v_src0, v_reinterpret_as_f32(v_mask));
+                v_src1 = v_and(v_src1, v_reinterpret_as_f32(v_mask));
+                v_src2 = v_and(v_src2, v_reinterpret_as_f32(v_mask));
 
                 v_float64 v_src00 = v_cvt_f64(v_src0);
                 v_float64 v_src01 = v_cvt_f64_high(v_src0);
@@ -1720,9 +1720,9 @@ void accSqr_simd_(const float* src, double* dst, const uchar* mask, int len, int
 void accSqr_simd_(const double* src, double* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD_64F
-    const int cVectorWidth = v_float64::nlanes * 2;
-    const int step = v_float64::nlanes;
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+    const int cVectorWidth = VTraits<v_float64>::vlanes() * 2;
+    const int step = VTraits<v_float64>::vlanes();
 
     if (!mask)
     {
@@ -1756,12 +1756,12 @@ void accSqr_simd_(const double* src, double* dst, const uchar* mask, int len, in
                 v_uint32 v_mask32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_mask32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
                 v_float64 v_src0 = vx_load(src + x);
                 v_float64 v_src1 = vx_load(src + x + step);
-                v_src0 = v_src0 & v_mask0;
-                v_src1 = v_src1 & v_mask1;
+                v_src0 = v_and(v_src0, v_mask0);
+                v_src1 = v_and(v_src1, v_mask1);
                 v_store(dst + x, v_fma(v_src0, v_src0, vx_load(dst + x)));
                 v_store(dst + x + step, v_fma(v_src1, v_src1, vx_load(dst + x + step)));
             }
@@ -1773,18 +1773,18 @@ void accSqr_simd_(const double* src, double* dst, const uchar* mask, int len, in
                 v_uint32 v_mask32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_mask32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
 
                 v_float64 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
                 v_load_deinterleave(src + x * cn, v_src00, v_src10, v_src20);
                 v_load_deinterleave(src + (x + step) * cn, v_src01, v_src11, v_src21);
-                v_src00 = v_src00 & v_mask0;
-                v_src01 = v_src01 & v_mask1;
-                v_src10 = v_src10 & v_mask0;
-                v_src11 = v_src11 & v_mask1;
-                v_src20 = v_src20 & v_mask0;
-                v_src21 = v_src21 & v_mask1;
+                v_src00 = v_and(v_src00, v_mask0);
+                v_src01 = v_and(v_src01, v_mask1);
+                v_src10 = v_and(v_src10, v_mask0);
+                v_src11 = v_and(v_src11, v_mask1);
+                v_src20 = v_and(v_src20, v_mask0);
+                v_src21 = v_and(v_src21, v_mask1);
 
                 v_float64 v_dst00, v_dst01, v_dst10, v_dst11, v_dst20, v_dst21;
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
@@ -2035,9 +2035,9 @@ void accProd_simd_(const ushort* src1, const ushort* src2, float* dst, const uch
 void accProd_simd_(const float* src1, const float* src2, float* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD
-    const int cVectorWidth = v_uint16::nlanes;
-    const int step = v_float32::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    const int cVectorWidth = VTraits<v_uint16>::vlanes();
+    const int step = VTraits<v_float32>::vlanes();
 
     if (!mask)
     {
@@ -2069,11 +2069,11 @@ void accProd_simd_(const float* src1, const float* src2, float* dst, const uchar
             {
                 v_uint32 v_mask32_0 = vx_load_expand_q(mask + x);
                 v_uint32 v_mask32_1 = vx_load_expand_q(mask + x + step);
-                v_float32 v_mask0 = v_reinterpret_as_f32(~(v_mask32_0 == v_0));
-                v_float32 v_mask1 = v_reinterpret_as_f32(~(v_mask32_1 == v_0));
+                v_float32 v_mask0 = v_reinterpret_as_f32(v_not(v_eq(v_mask32_0, v_0)));
+                v_float32 v_mask1 = v_reinterpret_as_f32(v_not(v_eq(v_mask32_1, v_0)));
 
-                v_store(dst + x, vx_load(dst + x) + ((vx_load(src1 + x) * vx_load(src2 + x)) & v_mask0));
-                v_store(dst + x + step, vx_load(dst + x + step) + ((vx_load(src1 + x + step) * vx_load(src2 + x + step)) & v_mask1));
+                v_store(dst + x, v_add(vx_load(dst + x), v_and(v_mul(vx_load(src1 + x), vx_load(src2 + x)), v_mask0)));
+                v_store(dst + x + step, v_add(vx_load(dst + x + step), v_and(v_mul(vx_load(src1 + x + step), vx_load(src2 + x + step)), v_mask1)));
             }
         }
         else if (cn == 3)
@@ -2082,8 +2082,8 @@ void accProd_simd_(const float* src1, const float* src2, float* dst, const uchar
             {
                 v_uint32 v_mask32_0 = vx_load_expand_q(mask + x);
                 v_uint32 v_mask32_1 = vx_load_expand_q(mask + x + step);
-                v_float32 v_mask0 = v_reinterpret_as_f32(~(v_mask32_0 == v_0));
-                v_float32 v_mask1 = v_reinterpret_as_f32(~(v_mask32_1 == v_0));
+                v_float32 v_mask0 = v_reinterpret_as_f32(v_not(v_eq(v_mask32_0, v_0)));
+                v_float32 v_mask1 = v_reinterpret_as_f32(v_not(v_eq(v_mask32_1, v_0)));
 
                 v_float32 v_1src00, v_1src01, v_1src10, v_1src11, v_1src20, v_1src21;
                 v_float32 v_2src00, v_2src01, v_2src10, v_2src11, v_2src20, v_2src21;
@@ -2096,8 +2096,8 @@ void accProd_simd_(const float* src1, const float* src2, float* dst, const uchar
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
                 v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21);
 
-                v_store_interleave(dst + x * cn, v_dst00 + ((v_1src00 * v_2src00) & v_mask0), v_dst10 + ((v_1src10 * v_2src10) & v_mask0), v_dst20 + ((v_1src20 * v_2src20) & v_mask0));
-                v_store_interleave(dst + (x + step) * cn, v_dst01 + ((v_1src01 * v_2src01) & v_mask1), v_dst11 + ((v_1src11 * v_2src11) & v_mask1), v_dst21 + ((v_1src21 * v_2src21) & v_mask1));
+                v_store_interleave(dst + x * cn, v_add(v_dst00, v_and(v_mul(v_1src00, v_2src00), v_mask0)), v_add(v_dst10, v_and(v_mul(v_1src10, v_2src10), v_mask0)), v_add(v_dst20, v_and(v_mul(v_1src20, v_2src20), v_mask0)));
+                v_store_interleave(dst + (x + step) * cn, v_add(v_dst01, v_and(v_mul(v_1src01, v_2src01), v_mask1)), v_add(v_dst11, v_and(v_mul(v_1src11, v_2src11), v_mask1)), v_add(v_dst21, v_and(v_mul(v_1src21, v_2src21), v_mask1)));
             }
         }
     }
@@ -2398,9 +2398,9 @@ void accProd_simd_(const ushort* src1, const ushort* src2, double* dst, const uc
 void accProd_simd_(const float* src1, const float* src2, double* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD_64F
-    const int cVectorWidth = v_float32::nlanes;
-    const int step = v_float64::nlanes;
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+    const int cVectorWidth = VTraits<v_float32>::vlanes();
+    const int step = VTraits<v_float64>::vlanes();
 
     if (!mask)
     {
@@ -2447,11 +2447,11 @@ void accProd_simd_(const float* src1, const float* src2, double* dst, const ucha
             for (; x <= len - cVectorWidth; x += cVectorWidth)
             {
                 v_uint32 v_mask = vx_load_expand_q(mask + x);
-                v_mask = ~(v_mask == v_0);
+                v_mask = v_not(v_eq(v_mask, v_0));
                 v_float32 v_1src = vx_load(src1 + x);
                 v_float32 v_2src = vx_load(src2 + x);
-                v_1src = v_1src & v_reinterpret_as_f32(v_mask);
-                v_2src = v_2src & v_reinterpret_as_f32(v_mask);
+                v_1src = v_and(v_1src, v_reinterpret_as_f32(v_mask));
+                v_2src = v_and(v_2src, v_reinterpret_as_f32(v_mask));
 
                 v_float64 v_1src0 = v_cvt_f64(v_1src);
                 v_float64 v_1src1 = v_cvt_f64_high(v_1src);
@@ -2467,16 +2467,16 @@ void accProd_simd_(const float* src1, const float* src2, double* dst, const ucha
             for (; x <= len - cVectorWidth; x += cVectorWidth)
             {
                 v_uint32 v_mask = vx_load_expand_q(mask + x);
-                v_mask = ~(v_mask == v_0);
+                v_mask = v_not(v_eq(v_mask, v_0));
                 v_float32 v_1src0, v_1src1, v_1src2, v_2src0, v_2src1, v_2src2;
                 v_load_deinterleave(src1 + x * cn, v_1src0, v_1src1, v_1src2);
                 v_load_deinterleave(src2 + x * cn, v_2src0, v_2src1, v_2src2);
-                v_1src0 = v_1src0 & v_reinterpret_as_f32(v_mask);
-                v_1src1 = v_1src1 & v_reinterpret_as_f32(v_mask);
-                v_1src2 = v_1src2 & v_reinterpret_as_f32(v_mask);
-                v_2src0 = v_2src0 & v_reinterpret_as_f32(v_mask);
-                v_2src1 = v_2src1 & v_reinterpret_as_f32(v_mask);
-                v_2src2 = v_2src2 & v_reinterpret_as_f32(v_mask);
+                v_1src0 = v_and(v_1src0, v_reinterpret_as_f32(v_mask));
+                v_1src1 = v_and(v_1src1, v_reinterpret_as_f32(v_mask));
+                v_1src2 = v_and(v_1src2, v_reinterpret_as_f32(v_mask));
+                v_2src0 = v_and(v_2src0, v_reinterpret_as_f32(v_mask));
+                v_2src1 = v_and(v_2src1, v_reinterpret_as_f32(v_mask));
+                v_2src2 = v_and(v_2src2, v_reinterpret_as_f32(v_mask));
 
                 v_float64 v_dst00, v_dst01, v_dst10, v_dst11, v_dst20, v_dst21;
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
@@ -2501,9 +2501,9 @@ void accProd_simd_(const float* src1, const float* src2, double* dst, const ucha
 void accProd_simd_(const double* src1, const double* src2, double* dst, const uchar* mask, int len, int cn)
 {
     int x = 0;
-#if CV_SIMD_64F
-    const int cVectorWidth = v_float64::nlanes * 2;
-    const int step = v_float64::nlanes;
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
+    const int cVectorWidth = VTraits<v_float64>::vlanes() * 2;
+    const int step = VTraits<v_float64>::vlanes();
 
     if (!mask)
     {
@@ -2542,16 +2542,16 @@ void accProd_simd_(const double* src1, const double* src2, double* dst, const uc
                 v_uint32 v_mask32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_mask32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
 
                 v_float64 v_src00 = vx_load(src1 + x);
                 v_float64 v_src01 = vx_load(src1 + x + step);
                 v_float64 v_src10 = vx_load(src2 + x);
                 v_float64 v_src11 = vx_load(src2 + x + step);
 
-                v_store(dst + x, vx_load(dst + x) + ((v_src00 * v_src10) & v_mask0));
-                v_store(dst + x + step, vx_load(dst + x + step) + ((v_src01 * v_src11) & v_mask1));
+                v_store(dst + x, v_add(vx_load(dst + x), v_and(v_mul(v_src00, v_src10), v_mask0)));
+                v_store(dst + x + step, v_add(vx_load(dst + x + step), v_and(v_mul(v_src01, v_src11), v_mask1)));
             }
         }
         else if (cn == 3)
@@ -2561,8 +2561,8 @@ void accProd_simd_(const double* src1, const double* src2, double* dst, const uc
                 v_uint32 v_mask32 = vx_load_expand_q(mask + x);
                 v_uint64 v_masku640, v_masku641;
                 v_expand(v_mask32, v_masku640, v_masku641);
-                v_float64 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
-                v_float64 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
+                v_float64 v_mask0 = v_reinterpret_as_f64(v_not(v_eq(v_masku640, v_0)));
+                v_float64 v_mask1 = v_reinterpret_as_f64(v_not(v_eq(v_masku641, v_0)));
 
                 v_float64 v_1src00, v_1src01, v_1src10, v_1src11, v_1src20, v_1src21;
                 v_float64 v_2src00, v_2src01, v_2src10, v_2src11, v_2src20, v_2src21;
@@ -2570,19 +2570,19 @@ void accProd_simd_(const double* src1, const double* src2, double* dst, const uc
                 v_load_deinterleave(src1 + (x + step) * cn, v_1src01, v_1src11, v_1src21);
                 v_load_deinterleave(src2 + x * cn, v_2src00, v_2src10, v_2src20);
                 v_load_deinterleave(src2 + (x + step) * cn, v_2src01, v_2src11, v_2src21);
-                v_float64 v_src00 = (v_1src00 & v_mask0) * v_2src00;
-                v_float64 v_src01 = (v_1src01 & v_mask1) * v_2src01;
-                v_float64 v_src10 = (v_1src10 & v_mask0) * v_2src10;
-                v_float64 v_src11 = (v_1src11 & v_mask1) * v_2src11;
-                v_float64 v_src20 = (v_1src20 & v_mask0) * v_2src20;
-                v_float64 v_src21 = (v_1src21 & v_mask1) * v_2src21;
+                v_float64 v_src00 = v_mul(v_and(v_1src00, v_mask0), v_2src00);
+                v_float64 v_src01 = v_mul(v_and(v_1src01, v_mask1), v_2src01);
+                v_float64 v_src10 = v_mul(v_and(v_1src10, v_mask0), v_2src10);
+                v_float64 v_src11 = v_mul(v_and(v_1src11, v_mask1), v_2src11);
+                v_float64 v_src20 = v_mul(v_and(v_1src20, v_mask0), v_2src20);
+                v_float64 v_src21 = v_mul(v_and(v_1src21, v_mask1), v_2src21);
 
                 v_float64 v_dst00, v_dst01, v_dst10, v_dst11, v_dst20, v_dst21;
                 v_load_deinterleave(dst + x * cn, v_dst00, v_dst10, v_dst20);
                 v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21);
 
-                v_store_interleave(dst + x * cn, v_dst00 + v_src00, v_dst10 + v_src10, v_dst20 + v_src20);
-                v_store_interleave(dst + (x + step) * cn, v_dst01 + v_src01, v_dst11 + v_src11, v_dst21 + v_src21);
+                v_store_interleave(dst + x * cn, v_add(v_dst00, v_src00), v_add(v_dst10, v_src10), v_add(v_dst20, v_src20));
+                v_store_interleave(dst + (x + step) * cn, v_add(v_dst01, v_src01), v_add(v_dst11, v_src11), v_add(v_dst21, v_src21));
             }
         }
     }
diff --git a/modules/imgproc/src/color_hsv.simd.hpp b/modules/imgproc/src/color_hsv.simd.hpp
index bea1decc3ae6..bef9497760db 100644
--- a/modules/imgproc/src/color_hsv.simd.hpp
+++ b/modules/imgproc/src/color_hsv.simd.hpp
@@ -98,7 +98,7 @@ struct RGB2HSV_b
 
         int i = 0;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         const int vsize = VTraits<v_uint8>::vlanes();
         for ( ; i <= n - vsize;
               i += vsize, src += scn*vsize, dst += 3*vsize)
@@ -274,7 +274,7 @@ struct RGB2HSV_f
     : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
     { }
 
-    #if CV_SIMD || CV_SIMD_SCALABLE
+    #if (CV_SIMD || CV_SIMD_SCALABLE)
     inline void process(const v_float32& v_r, const v_float32& v_g, const v_float32& v_b,
                         v_float32& v_h, v_float32& v_s, v_float32& v_v,
                         float hscale) const
@@ -308,7 +308,7 @@ struct RGB2HSV_f
         float hscale = hrange*(1.f/360.f);
         n *= 3;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         const int vsize = VTraits<v_float32>::vlanes();
         for ( ; i <= n - 3*vsize; i += 3*vsize, src += scn * vsize)
         {
@@ -368,7 +368,7 @@ struct RGB2HSV_f
 };
 
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 inline void HSV2RGB_simd(const v_float32& h, const v_float32& s, const v_float32& v,
                          v_float32& b, v_float32& g, v_float32& r, float hscale)
 {
@@ -473,7 +473,7 @@ struct HSV2RGB_f
         float hs = hscale;
         n *= 3;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         const int vsize = VTraits<v_float32>::vlanes();
         v_float32 valpha = vx_setall_f32(alpha);
         for (; i <= n - vsize*3; i += vsize*3, dst += dcn * vsize)
@@ -530,7 +530,7 @@ struct HSV2RGB_b
         int j = 0, dcn = dstcn;
         uchar alpha = ColorChannel<uchar>::max();
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         const int vsize = VTraits<v_float32>::vlanes();
 
         for (j = 0; j <= (n - vsize*4) * 3; j += 3 * 4 * vsize, dst += dcn * 4 * vsize)
@@ -679,7 +679,7 @@ struct RGB2HLS_f
     {
     }
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     inline void process(const v_float32& r, const v_float32& g, const v_float32& b,
                         const v_float32& vhscale,
                         v_float32& h, v_float32& l, v_float32& s) const
@@ -718,7 +718,7 @@ struct RGB2HLS_f
 
         int i = 0, bidx = blueIdx, scn = srccn;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         const int vsize = VTraits<v_float32>::vlanes();
         v_float32 vhscale = vx_setall_f32(hscale);
 
@@ -802,13 +802,13 @@ struct RGB2HLS_b
 
         int scn = srccn;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[bufChannels*BLOCK_SIZE];
 #else
         float CV_DECL_ALIGNED(16) buf[bufChannels*BLOCK_SIZE];
 #endif
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         static const int fsize = VTraits<v_float32>::vlanes();
         //TODO: fix that when v_interleave is available
         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[VTraits<v_float32>::max_nlanes*3];
@@ -823,7 +823,7 @@ struct RGB2HLS_b
         {
             int dn = std::min(n - i, (int)BLOCK_SIZE);
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
             v_float32 v255inv = vx_setall_f32(1.f/255.f);
             if (scn == 3)
             {
@@ -902,7 +902,7 @@ struct RGB2HLS_b
             cvt(buf, buf, dn);
 
             int j = 0;
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
             for( ; j <= dn*3 - fsize*3*4; j += fsize*3*4)
             {
                 v_float32 f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11;
@@ -973,7 +973,7 @@ struct HLS2RGB_f
     : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange)
     { }
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     inline void process(const v_float32& h, const v_float32& l, const v_float32& s,
                         v_float32& b, v_float32& g, v_float32& r) const
     {
@@ -1016,7 +1016,7 @@ struct HLS2RGB_f
         int i = 0, bidx = blueIdx, dcn = dstcn;
         float alpha = ColorChannel<float>::max();
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         static const int vsize = VTraits<v_float32>::vlanes();
         for (; i <= n - vsize; i += vsize, src += 3*vsize, dst += dcn*vsize)
         {
@@ -1099,13 +1099,13 @@ struct HLS2RGB_b
         int i, j, dcn = dstcn;
         uchar alpha = ColorChannel<uchar>::max();
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[bufChannels*BLOCK_SIZE];
 #else
         float CV_DECL_ALIGNED(16) buf[bufChannels*BLOCK_SIZE];
 #endif
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         static const int fsize = VTraits<v_float32>::vlanes();
         //TODO: fix that when v_interleave is available
         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[VTraits<v_float32>::max_nlanes*3];
@@ -1122,7 +1122,7 @@ struct HLS2RGB_b
             int dn = std::min(n - i, (int)BLOCK_SIZE);
             j = 0;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
             for( ; j <= dn*3 - 3*4*fsize; j += 3*4*fsize)
             {
                 // 3x uchar -> 3*4 float
@@ -1179,7 +1179,7 @@ struct HLS2RGB_b
             }
             cvt(buf, buf, dn);
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
             v_float32 v255 = vx_setall_f32(255.f);
             if(dcn == 3)
             {
diff --git a/modules/imgproc/src/median_blur.simd.hpp b/modules/imgproc/src/median_blur.simd.hpp
index 90f0b2033021..7d8423d3224c 100644
--- a/modules/imgproc/src/median_blur.simd.hpp
+++ b/modules/imgproc/src/median_blur.simd.hpp
@@ -548,7 +548,7 @@ struct MinMax32f
     }
 };
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 
 struct MinMaxVec8u
 {
@@ -688,7 +688,7 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )
                 if( limit == size.width )
                     break;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
                 int nlanes = VTraits<typename VecOp::arg_type>::vlanes();
 #else
                 int nlanes = 1;
@@ -793,7 +793,7 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )
                 if( limit == size.width )
                     break;
 
-#if CV_SIMD || CV_SIMD_SCALABLE
+#if (CV_SIMD || CV_SIMD_SCALABLE)
                 int nlanes = VTraits<typename VecOp::arg_type>::vlanes();
 #else
                 int nlanes = 1;