From 1d8f9285674f2b1f9f2d7758d3f945ebb74fcf82 Mon Sep 17 00:00:00 2001 From: Wang Yixuan <88923622+hust17yixuan@users.noreply.github.com> Date: Mon, 10 Feb 2025 12:21:01 +0800 Subject: [PATCH] 1.x branch fix hccl compile (#3237) * 1.x branch fix hccl compile fix bug * fix bug --- mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp | 2 +- mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp | 2 +- mmcv/ops/csrc/pytorch/npu/nms_npu.cpp | 2 +- mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp | 2 +- mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp | 2 +- mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp | 4 ++-- setup.py | 5 +++++ 7 files changed, 12 insertions(+), 7 deletions(-) diff --git a/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp b/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp index ed04622af6..41732250b4 100644 --- a/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp @@ -24,7 +24,7 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, bboxesFP32 = bboxesFP32.to(at::kFloat); gtboxesFP32 = gtboxesFP32.to(at::kFloat); } - c10::SmallVector iousSize = {gtboxesFP32.size(0), + c10::SmallVector iousSize = {gtboxesFP32.size(0), bboxesFP32.size(0)}; if (aligned) { iousSize = {gtboxesFP32.size(0), 1}; diff --git a/mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp b/mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp index da278ca3c5..068649e535 100644 --- a/mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp @@ -15,7 +15,7 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias, if (grad == 0) { auto input_size = input.sizes(); int input_length = input_size.size(); - c10::SmallVector input_size_tmp; + c10::SmallVector input_size_tmp; for (uint64_t i = 0; i < input_size.size(); i++) { input_size_tmp.emplace_back(input_size[i]); } diff --git a/mmcv/ops/csrc/pytorch/npu/nms_npu.cpp b/mmcv/ops/csrc/pytorch/npu/nms_npu.cpp index 2d9ee8632e..7ed48b9d29 100644 --- a/mmcv/ops/csrc/pytorch/npu/nms_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/nms_npu.cpp @@ -13,7 +13,7 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) { at::empty({}, boxes.options().dtype(at::kFloat)).fill_(0); at::Tensor max_outputsize_y = at::empty({}, boxes.options().dtype(at::kInt)).fill_(boxes.size(0)); - c10::SmallVector outputsize = {boxes.size(0)}; + c10::SmallVector outputsize = {boxes.size(0)}; at::Tensor output = at::empty(outputsize, boxes.options().dtype(at::kInt)).fill_(-1); OpCommand cmd; diff --git a/mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp b/mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp index 0f2318cfbe..1856ae1deb 100644 --- a/mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp @@ -11,7 +11,7 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores, detsCast = detsCast.to(at::kFloat); scoresCast = scoresCast.to(at::kFloat); } - c10::SmallVector selectedIndexSize = {dets.size(0)}; + c10::SmallVector selectedIndexSize = {dets.size(0)}; at::Tensor selectedBox = at::empty_like(dets); at::Tensor selectedIndex = at::empty(selectedIndexSize, dets.options().dtype(at::kInt)); diff --git a/mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp index 0cddac8ed0..ddce0e8a81 100644 --- a/mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp @@ -41,7 +41,7 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y, roi_end_mode = 0; } auto shape = grad_input.sizes(); - c10::SmallVector xdiff_shape; + c10::SmallVector xdiff_shape; for (uint64_t i = 0; i < shape.size(); i++) { xdiff_shape.emplace_back(shape[i]); } diff --git a/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp b/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp index 48d5b4789e..8d972aff19 100644 --- a/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp +++ b/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp @@ -12,12 +12,12 @@ void roipoint_pool3d_forward_impl_npu(int batch_size, int pts_num, Tensor pooled_empty_flag) { auto points_trans = xyz.transpose(1, 2).contiguous(); auto point_features_trans = pts_feature.transpose(1, 2).contiguous(); - c10::SmallVector features_trans_size = { + c10::SmallVector features_trans_size = { xyz.size(0), boxes3d.size(1), xyz.size(2) + pts_feature.size(2), sampled_pts_num}; at::Tensor pooled_features_trans = at::empty(features_trans_size, xyz.options()); - c10::SmallVector empty_flag_size = {boxes3d.size(0), + c10::SmallVector empty_flag_size = {boxes3d.size(0), boxes3d.size(1)}; EXEC_NPU_CMD(aclnnRoipointPool3dForward, points_trans, point_features_trans, boxes3d, sampled_pts_num, pooled_features_trans, diff --git a/setup.py b/setup.py index 46328c3f20..8caa993d7b 100644 --- a/setup.py +++ b/setup.py @@ -446,6 +446,11 @@ def get_mluops_version(file_path): 'torch_npu').submodule_search_locations[0] + '/include/third_party/acl/inc' ] + extra_compile_args['cxx'] += [ + '-I' + importlib.util.find_spec( + 'torch_npu').submodule_search_locations[0] + + '/include/third_party/hccl/inc' + ] define_macros += [('MMCV_WITH_NPU', None)] extension = NpuExtension if parse_version(torch.__version__) < parse_version('2.1.0'):