Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CINN] Add fold_full_op pass #71443

Merged
merged 2 commits into from
Mar 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "paddle/cinn/hlir/dialect/operator/transforms/convert_fa_to_qkvmha_pass.h"
#include "paddle/cinn/hlir/dialect/operator/transforms/convert_memory_effec_attn_to_flash_attn_pass.h"
#include "paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.h"
#include "paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.h"
#include "paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h"
#include "paddle/cinn/hlir/dialect/operator/transforms/fuse_parallel_matmul_pass.h"
#include "paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.h"
Expand Down Expand Up @@ -134,6 +135,7 @@ void ApplyPdToCinnPass(
pass_manager->AddPass(cinn::dialect::ir::CreateFuseParallelMatmulPass());
}
pass_manager->AddPass(cinn::dialect::ir::CreateRemoveAssignOutPass());
pass_manager->AddPass(cinn::dialect::ir::CreateFoldFullOpPass());
pass_manager->AddPass(cinn::dialect::ir::CreateConv2dTransposeFilterPass());
pass_manager->AddPass(cinn::dialect::ir::CreateConvertMEA2FAPass());
pass_manager->AddPass(cinn::dialect::ir::CreateConvertFA2QKVMHAPass());
Expand Down
87 changes: 87 additions & 0 deletions paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.h"

#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
#include "paddle/fluid/pir/utils/general_functions.h"
#include "paddle/pir/include/pattern_rewrite/pattern_rewrite_driver.h"

namespace cinn {
namespace dialect {
namespace ir {

template <class OPTYPE>
class FoldFullWithReshapeOpPattern : public pir::OpRewritePattern<OPTYPE> {
public:
using pir::OpRewritePattern<OPTYPE>::OpRewritePattern;

bool MatchAndRewrite(OPTYPE op,
pir::PatternRewriter& rewriter) const override {
auto* pre_op = op->operand_source(0).defining_op();
if (!pre_op || !pre_op->template isa<paddle::dialect::FullOp>()) {
return false;
}
const auto& out_shape =
op->result(0)
.type()
.template dyn_cast<paddle::dialect::DenseTensorType>()
.dims();
if (common::contain_unknown_dim(out_shape)) {
return false;
}

pir::AttributeMap attrs = pre_op->attributes();
attrs["shape"] = paddle::dialect::IntArrayAttribute::get(
pir::IrContext::Instance(),
phi::IntArray(out_shape.Get(), out_shape.size()));

auto new_full_op = rewriter.Build<paddle::dialect::FullOp>(attrs);

rewriter.ReplaceAllUsesWith(op->result(0), new_full_op->result(0));
rewriter.EraseOp(op);
if (pre_op->use_empty()) {
rewriter.EraseOp(pre_op);
}

return true;
}
};

class FoldFullOpPass : public pir::PatternRewritePass {
public:
FoldFullOpPass() : pir::PatternRewritePass("fold_full_ops_pass", 1) {}

pir::RewritePatternSet InitializePatterns(pir::IrContext* context) override {
pir::RewritePatternSet ps(context);

ps.Add<FoldFullWithReshapeOpPattern<paddle::dialect::ReshapeOp>>(context);
ps.Add<FoldFullWithReshapeOpPattern<paddle::dialect::TransposeOp>>(context);

return ps;
}

bool CanApplyOn(pir::Operation* op) const override {
return op->num_regions() > 0;
}
};

std::unique_ptr<pir::Pass> CreateFoldFullOpPass() {
return std::make_unique<FoldFullOpPass>();
}

} // namespace ir
} // namespace dialect
} // namespace cinn
27 changes: 27 additions & 0 deletions paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/pir/include/pass/pass.h"

namespace cinn {
namespace dialect {
namespace ir {

std::unique_ptr<pir::Pass> CreateFoldFullOpPass();

} // namespace ir
} // namespace dialect
} // namespace cinn
50 changes: 25 additions & 25 deletions test/ir/pir/cinn/test_cinn_sub_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,34 +263,34 @@ def test_forward(self):
np.testing.assert_allclose(cinn_out.numpy(), dy_out.numpy(), atol=1e-8)


# class TestCinnLayerNorm(TestCinnSubGraphBase):
# def train(self, use_cinn):
# paddle.seed(2022)
# self.prepare_data()
# net = CINNLayerNormSubGraphNet(self.shape[-1])
# net = utils.apply_to_static(net, use_cinn)
# # net.eval()
# weight = paddle.ones(shape=[self.shape[-1]], dtype="float64")
# weight.stop_gradient = False
# bias = paddle.ones(shape=[self.shape[-1]], dtype="float64")
# bias.stop_gradient = False
# self.x.stop_gradient = False
# out = net(self.x, weight, bias)
# loss = out.sum()
# loss.backward()
class TestCinnLayerNorm(TestCinnSubGraphBase):
def train(self, use_cinn):
paddle.seed(2022)
self.prepare_data()
net = CINNLayerNormSubGraphNet(self.shape[-1])
net = utils.apply_to_static(net, use_cinn)
# net.eval()
weight = paddle.ones(shape=[self.shape[-1]], dtype="float64")
weight.stop_gradient = False
bias = paddle.ones(shape=[self.shape[-1]], dtype="float64")
bias.stop_gradient = False
self.x.stop_gradient = False
out = net(self.x, weight, bias)
loss = out.sum()
loss.backward()

# return out, self.x.gradient(), weight.gradient(), bias.gradient()
return out, self.x.gradient(), weight.gradient(), bias.gradient()

# def test_train(self):
# cinn_out, cinn_x_grad, cinn_w_grad, cinn_b_grad = self.train(
# use_cinn=True
# )
def test_train(self):
cinn_out, cinn_x_grad, cinn_w_grad, cinn_b_grad = self.train(
use_cinn=True
)

# dy_out, dy_x_grad, dy_w_grad, dy_b_grad = self.train(use_cinn=False)
# np.testing.assert_allclose(cinn_out.numpy(), dy_out.numpy(), atol=1e-8)
# np.testing.assert_allclose(cinn_x_grad, dy_x_grad, atol=1e-8)
# np.testing.assert_allclose(cinn_w_grad, dy_w_grad, atol=1e-8)
# np.testing.assert_allclose(cinn_b_grad, dy_b_grad, atol=1e-8)
dy_out, dy_x_grad, dy_w_grad, dy_b_grad = self.train(use_cinn=False)
np.testing.assert_allclose(cinn_out.numpy(), dy_out.numpy(), atol=1e-8)
np.testing.assert_allclose(cinn_x_grad, dy_x_grad, atol=1e-8)
np.testing.assert_allclose(cinn_w_grad, dy_w_grad, atol=1e-8)
np.testing.assert_allclose(cinn_b_grad, dy_b_grad, atol=1e-8)


# class TestAddDropoutLayerNorm(TestCinnSubGraphBase):
Expand Down
67 changes: 67 additions & 0 deletions test/ir/pir/cinn/test_fold_full_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest

import numpy as np
import utils

import paddle
from paddle import nn


class SubGraph(nn.Layer):
def __init__(self):
super().__init__()

def forward(self, x):
y = paddle.full([1, 32, 2, 10], 1.0, dtype="float32")
z = paddle.transpose(y.reshape([4, 8, 2, 10]), perm=[0, 2, 3, 1])
return x + z


class TestFuldFullOps(unittest.TestCase):
def setUp(self):
paddle.seed(2022)
self.prepare_data()

def prepare_data(self):
self.x = paddle.randn([4, 2, 1, 8], dtype="float32")

def check_jit_kernel_info(self, static_fn):
utils.check_jit_kernel_number(static_fn, 1)
utils.check_jit_kernel_structure(static_fn, {utils.JIT_KERNEL_NAME: 1})

def eval(self, use_cinn):
paddle.seed(2022)
net = SubGraph()
net = utils.apply_to_static(net, use_cinn)
net.eval()
out = net(self.x)

if use_cinn:
self.check_jit_kernel_info(net.forward)
return out

def test_eval(self):
cinn_outs = self.eval(use_cinn=True)
dy_outs = self.eval(use_cinn=False)

for cinn_out, dy_out in zip(cinn_outs, dy_outs):
np.testing.assert_allclose(
cinn_out.numpy(), dy_out.numpy(), atol=1e-6, rtol=1e-6
)


if __name__ == '__main__':
unittest.main()
61 changes: 30 additions & 31 deletions test/ir/pir/cinn/test_llama_sub_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,44 +93,43 @@ def rotate_half(self, x):
return paddle.concat([-x2, x1], axis=-1) # shape is the same as x


# class TestRotaryPosEmb(TestCinnSubGraphBase):
# def prepare_data(self):
# self.q = paddle.randn([1, 2048, 8, 96], dtype="float32")
# self.q.stop_gradient = False
class TestRotaryPosEmb(TestCinnSubGraphBase):
def prepare_data(self):
self.q = paddle.randn([1, 2048, 8, 96], dtype="float32")
self.q.stop_gradient = False

# self.k = paddle.randn([1, 2048, 8, 96], dtype="float32")
# self.k.stop_gradient = False
self.k = paddle.randn([1, 2048, 8, 96], dtype="float32")
self.k.stop_gradient = False

# self.cos = paddle.randn([1, 2048, 1, 96], dtype="float32")
# self.cos.stop_gradient = False
self.cos = paddle.randn([1, 2048, 1, 96], dtype="float32")
self.cos.stop_gradient = False

# self.sin = paddle.randn([1, 2048, 1, 96], dtype="float32")
# self.sin.stop_gradient = False
self.sin = paddle.randn([1, 2048, 1, 96], dtype="float32")
self.sin.stop_gradient = False

# self.position_ids = paddle.arange(end=2048, dtype="int64").unsqueeze(0)
# self.position_ids.stop_gradient = False
self.position_ids = paddle.arange(end=2048, dtype="int64").unsqueeze(0)
self.position_ids.stop_gradient = False

# def eval(self, use_cinn):
# paddle.seed(2022)
# self.prepare_data()
# net = RotaryPosEmb()
def eval(self, use_cinn):
paddle.seed(2022)
self.prepare_data()
net = RotaryPosEmb()

# net = utils.apply_to_static(net, use_cinn)
# # net.eval()
# out = net(self.q, self.k, self.cos, self.sin, self.position_ids)
# loss = (out[0] + out[1]).sum()
# loss.backward()
# return out
net = utils.apply_to_static(net, use_cinn)
# net.eval()
out = net(self.q, self.k, self.cos, self.sin, self.position_ids)
loss = (out[0] + out[1]).sum()
loss.backward()
return out

# def test_eval(self):
# cinn_outs = self.eval(use_cinn=True)
# dy_outs = self.eval(use_cinn=False)

# # TODO(phlrain): Need to check result
# for cinn_out, dy_out in zip(cinn_outs, dy_outs):
# np.testing.assert_allclose(
# cinn_out.numpy(), dy_out.numpy(), atol=1e-8
# )
def test_eval(self):
cinn_outs = self.eval(use_cinn=True)
dy_outs = self.eval(use_cinn=False)

for cinn_out, dy_out in zip(cinn_outs, dy_outs):
np.testing.assert_allclose(
cinn_out.numpy(), dy_out.numpy(), atol=1e-6
)


class RepeatKV(nn.Layer):
Expand Down
5 changes: 2 additions & 3 deletions test/ir/pir/cinn/test_rope.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ def eval(self, use_cinn):
net.eval()
out = net(self.q, self.k, self.cos, self.sin, self.position_ids)

# TODO(phlrain): Need to Fuse to one Kernel
# if use_cinn:
# self.check_jit_kernel_info(net.forward)
if use_cinn:
self.check_jit_kernel_info(net.forward)
return out

def test_eval(self):
Expand Down