Skip to content

Commit

Permalink
fix ops npu (#3231)
Browse files Browse the repository at this point in the history
* fix roi_pool_backward npu

* fix lint in pixel_group
  • Loading branch information
hust17yixuan authored Jan 21, 2025
1 parent 761d780 commit a5cfaef
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 50 deletions.
103 changes: 57 additions & 46 deletions mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,64 @@
using namespace NPU_NAME_SPACE;
using namespace std;

vector<vector<float>> pixel_group_npu(Tensor score, Tensor mask, Tensor embedding,
Tensor kernel_label, Tensor kernel_contour,
int kernel_region_num, float distance_threshold) {
TORCH_CHECK(score.dim() == 2, "score.dim() must be 2, but got: ", score.dim());
TORCH_CHECK(mask.dim() == 2, "mask.dim() must be 2, but got: ", mask.dim());
TORCH_CHECK(embedding.dim() == 3, "embedding.dim() must be 3, but got: ", embedding.dim());
TORCH_CHECK(kernel_label.dim() == 2, "kernel_label.dim() must be 2, but got: ", kernel_label.dim());
TORCH_CHECK(kernel_contour.dim() == 2, "kernel_contour.dim() must be 2, but got: ", kernel_contour.dim());

auto label_size = kernel_label.sizes();
auto height = label_size[0];
auto width = label_size[1];

c10::SmallVector<int64_t, 8> point_vector_size = {kernel_region_num, 2};
c10::SmallVector<int64_t, 8> label_updated_size = {height, width};
at::Tensor point_vector = at::zeros(point_vector_size, score.options());
at::Tensor label_updated = at::empty(label_updated_size, kernel_label.options());

EXEC_NPU_CMD(aclnnPixelGroup, score, mask, embedding, kernel_label, kernel_contour,
kernel_region_num, distance_threshold, point_vector, label_updated);

std::vector<std::vector<float>> pixel_assignment(kernel_region_num);
at::Tensor point_vector_cpu = point_vector.to(at::kCPU);
at::Tensor label_updated_cpu = label_updated.to(at::kCPU);

for (int32_t l = 0; l < kernel_region_num; l++) {
pixel_assignment[l].push_back(point_vector_cpu[l][0].item<float>());
pixel_assignment[l].push_back(point_vector_cpu[l][1].item<float>());
if (pixel_assignment[l][1] > 0) {
pixel_assignment[l][0] /= pixel_assignment[l][1];
}
if (l > 0) {
at::Tensor valid_mask = (label_updated_cpu == l);
at::Tensor indices = at::nonzero(valid_mask);
for (int32_t i = 0; i < indices.size(0); i++) {
auto x = indices[i][0].item<int32_t>();
auto y = indices[i][1].item<int32_t>();
pixel_assignment[l].push_back(y);
pixel_assignment[l].push_back(x);
}
}
vector<vector<float>> pixel_group_npu(Tensor score, Tensor mask,
Tensor embedding, Tensor kernel_label,
Tensor kernel_contour,
int kernel_region_num,
float distance_threshold) {
TORCH_CHECK(score.dim() == 2,
"score.dim() must be 2, but got: ", score.dim());
TORCH_CHECK(mask.dim() == 2, "mask.dim() must be 2, but got: ", mask.dim());
TORCH_CHECK(embedding.dim() == 3,
"embedding.dim() must be 3, but got: ", embedding.dim());
TORCH_CHECK(kernel_label.dim() == 2,
"kernel_label.dim() must be 2, but got: ", kernel_label.dim());
TORCH_CHECK(
kernel_contour.dim() == 2,
"kernel_contour.dim() must be 2, but got: ", kernel_contour.dim());

auto label_size = kernel_label.sizes();
auto height = label_size[0];
auto width = label_size[1];

c10::SmallVector<int64_t, 8> point_vector_size = {kernel_region_num, 2};
c10::SmallVector<int64_t, 8> label_updated_size = {height, width};
at::Tensor point_vector = at::zeros(point_vector_size, score.options());
at::Tensor label_updated =
at::empty(label_updated_size, kernel_label.options());

EXEC_NPU_CMD(aclnnPixelGroup, score, mask, embedding, kernel_label,
kernel_contour, kernel_region_num, distance_threshold,
point_vector, label_updated);

std::vector<std::vector<float>> pixel_assignment(kernel_region_num);
at::Tensor point_vector_cpu = point_vector.to(at::kCPU);
at::Tensor label_updated_cpu = label_updated.to(at::kCPU);

for (int32_t l = 0; l < kernel_region_num; l++) {
pixel_assignment[l].push_back(point_vector_cpu[l][0].item<float>());
pixel_assignment[l].push_back(point_vector_cpu[l][1].item<float>());
if (pixel_assignment[l][1] > 0) {
pixel_assignment[l][0] /= pixel_assignment[l][1];
}
if (l > 0) {
at::Tensor valid_mask = (label_updated_cpu == l);
at::Tensor indices = at::nonzero(valid_mask);
for (int32_t i = 0; i < indices.size(0); i++) {
auto x = indices[i][0].item<int32_t>();
auto y = indices[i][1].item<int32_t>();
pixel_assignment[l].push_back(y);
pixel_assignment[l].push_back(x);
}
}
return pixel_assignment;
}
return pixel_assignment;
}

vector<vector<float>> pixel_group_impl(Tensor score, Tensor mask, Tensor embedding,
Tensor kernel_label, Tensor kernel_contour,
int kernel_region_num, float distance_threshold);
vector<vector<float>> pixel_group_impl(Tensor score, Tensor mask,
Tensor embedding, Tensor kernel_label,
Tensor kernel_contour,
int kernel_region_num,
float distance_threshold);

REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu);
REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu);
14 changes: 10 additions & 4 deletions mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,29 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
int64_t pooled_height_64 = pooled_height;
int64_t pooled_width_64 = pooled_width;
int64_t pooled_channel = 1;
at::Tensor argmax_trans = argmax.transpose(1, 2).transpose(2, 3);
at::Tensor grad_output_trans = grad_output.transpose(1, 2).transpose(2, 3);
at::Tensor roi_actual_num =
at::empty_like(rois, rois.options().dtype(at::kInt));
at::Tensor x = at::ones_like(grad_input);
at::Tensor x = at::ones_like(grad_input).transpose(1, 2).transpose(2, 3);
at::Tensor y = at::zeros_like(x);
OpCommand cmd;
cmd.Name("RoiPoolingGradWithArgMax")
.Input(grad_output)
.Input(grad_output_trans)
.Input(x)
.Input(rois)
.Input(roi_actual_num)
.Input(argmax)
.Output(grad_input)
.Input(argmax_trans)
.Output(y)
.Attr("pooled_h", pooled_height_64)
.Attr("pooled_w", pooled_width_64)
.Attr("spatial_scale_h", spatial_scale)
.Attr("spatial_scale_w", spatial_scale)
.Attr("pool_channel", pooled_channel)
.Run();
at::Tensor result = y.transpose(2, 3).transpose(1, 2);
at::Tensor res = result.contiguous();
grad_input.copy_(res);
}

void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
Expand Down

0 comments on commit a5cfaef

Please sign in to comment.