PaddlePaddle · zyfncg · Mar 6, 2025 · Mar 5, 2025 · Mar 5, 2025
diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc
@@ -38,6 +38,7 @@
 #include "paddle/cinn/hlir/dialect/operator/transforms/convert_fa_to_qkvmha_pass.h"
 #include "paddle/cinn/hlir/dialect/operator/transforms/convert_memory_effec_attn_to_flash_attn_pass.h"
 #include "paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.h"
+#include "paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.h"
 #include "paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h"
 #include "paddle/cinn/hlir/dialect/operator/transforms/fuse_parallel_matmul_pass.h"
 #include "paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.h"
@@ -134,6 +135,7 @@ void ApplyPdToCinnPass(
     pass_manager->AddPass(cinn::dialect::ir::CreateFuseParallelMatmulPass());
   }
   pass_manager->AddPass(cinn::dialect::ir::CreateRemoveAssignOutPass());
+  pass_manager->AddPass(cinn::dialect::ir::CreateFoldFullOpPass());
   pass_manager->AddPass(cinn::dialect::ir::CreateConv2dTransposeFilterPass());
   pass_manager->AddPass(cinn::dialect::ir::CreateConvertMEA2FAPass());
   pass_manager->AddPass(cinn::dialect::ir::CreateConvertFA2QKVMHAPass());

diff --git a/paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.cc
@@ -0,0 +1,87 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.h"
+
+#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
+#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
+#include "paddle/fluid/pir/utils/general_functions.h"
+#include "paddle/pir/include/pattern_rewrite/pattern_rewrite_driver.h"
+
+namespace cinn {
+namespace dialect {
+namespace ir {
+
+template <class OPTYPE>
+class FoldFullWithReshapeOpPattern : public pir::OpRewritePattern<OPTYPE> {
+ public:
+  using pir::OpRewritePattern<OPTYPE>::OpRewritePattern;
+
+  bool MatchAndRewrite(OPTYPE op,
+                       pir::PatternRewriter& rewriter) const override {
+    auto* pre_op = op->operand_source(0).defining_op();
+    if (!pre_op || !pre_op->template isa<paddle::dialect::FullOp>()) {
+      return false;
+    }
+    const auto& out_shape =
+        op->result(0)
+            .type()
+            .template dyn_cast<paddle::dialect::DenseTensorType>()
+            .dims();
+    if (common::contain_unknown_dim(out_shape)) {
+      return false;
+    }
+
+    pir::AttributeMap attrs = pre_op->attributes();
+    attrs["shape"] = paddle::dialect::IntArrayAttribute::get(
+        pir::IrContext::Instance(),
+        phi::IntArray(out_shape.Get(), out_shape.size()));
+
+    auto new_full_op = rewriter.Build<paddle::dialect::FullOp>(attrs);
+
+    rewriter.ReplaceAllUsesWith(op->result(0), new_full_op->result(0));
+    rewriter.EraseOp(op);
+    if (pre_op->use_empty()) {
+      rewriter.EraseOp(pre_op);
+    }
+
+    return true;
+  }
+};
+
+class FoldFullOpPass : public pir::PatternRewritePass {
+ public:
+  FoldFullOpPass() : pir::PatternRewritePass("fold_full_ops_pass", 1) {}
+
+  pir::RewritePatternSet InitializePatterns(pir::IrContext* context) override {
+    pir::RewritePatternSet ps(context);
+
+    ps.Add<FoldFullWithReshapeOpPattern<paddle::dialect::ReshapeOp>>(context);
+    ps.Add<FoldFullWithReshapeOpPattern<paddle::dialect::TransposeOp>>(context);
+
+    return ps;
+  }
+
+  bool CanApplyOn(pir::Operation* op) const override {
+    return op->num_regions() > 0;
+  }
+};
+
+std::unique_ptr<pir::Pass> CreateFoldFullOpPass() {
+  return std::make_unique<FoldFullOpPass>();
+}
+
+}  // namespace ir
+}  // namespace dialect
+}  // namespace cinn
diff --git a/paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.h b/paddle/cinn/hlir/dialect/operator/transforms/fold_full_pass.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/pir/include/pass/pass.h"
+
+namespace cinn {
+namespace dialect {
+namespace ir {
+
+std::unique_ptr<pir::Pass> CreateFoldFullOpPass();
+
+}  // namespace ir
+}  // namespace dialect
+}  // namespace cinn
diff --git a/test/ir/pir/cinn/test_cinn_sub_graph.py b/test/ir/pir/cinn/test_cinn_sub_graph.py
@@ -263,34 +263,34 @@ def test_forward(self):
         np.testing.assert_allclose(cinn_out.numpy(), dy_out.numpy(), atol=1e-8)
 
 
-# class TestCinnLayerNorm(TestCinnSubGraphBase):
-#     def train(self, use_cinn):
-#         paddle.seed(2022)
-#         self.prepare_data()
-#         net = CINNLayerNormSubGraphNet(self.shape[-1])
-#         net = utils.apply_to_static(net, use_cinn)
-#         # net.eval()
-#         weight = paddle.ones(shape=[self.shape[-1]], dtype="float64")
-#         weight.stop_gradient = False
-#         bias = paddle.ones(shape=[self.shape[-1]], dtype="float64")
-#         bias.stop_gradient = False
-#         self.x.stop_gradient = False
-#         out = net(self.x, weight, bias)
-#         loss = out.sum()
-#         loss.backward()
+class TestCinnLayerNorm(TestCinnSubGraphBase):
+    def train(self, use_cinn):
+        paddle.seed(2022)
+        self.prepare_data()
+        net = CINNLayerNormSubGraphNet(self.shape[-1])
+        net = utils.apply_to_static(net, use_cinn)
+        # net.eval()
+        weight = paddle.ones(shape=[self.shape[-1]], dtype="float64")
+        weight.stop_gradient = False
+        bias = paddle.ones(shape=[self.shape[-1]], dtype="float64")
+        bias.stop_gradient = False
+        self.x.stop_gradient = False
+        out = net(self.x, weight, bias)
+        loss = out.sum()
+        loss.backward()
 
-#         return out, self.x.gradient(), weight.gradient(), bias.gradient()
+        return out, self.x.gradient(), weight.gradient(), bias.gradient()
 
-#     def test_train(self):
-#         cinn_out, cinn_x_grad, cinn_w_grad, cinn_b_grad = self.train(
-#             use_cinn=True
-#         )
+    def test_train(self):
+        cinn_out, cinn_x_grad, cinn_w_grad, cinn_b_grad = self.train(
+            use_cinn=True
+        )
 
-#         dy_out, dy_x_grad, dy_w_grad, dy_b_grad = self.train(use_cinn=False)
-#         np.testing.assert_allclose(cinn_out.numpy(), dy_out.numpy(), atol=1e-8)
-#         np.testing.assert_allclose(cinn_x_grad, dy_x_grad, atol=1e-8)
-#         np.testing.assert_allclose(cinn_w_grad, dy_w_grad, atol=1e-8)
-#         np.testing.assert_allclose(cinn_b_grad, dy_b_grad, atol=1e-8)
+        dy_out, dy_x_grad, dy_w_grad, dy_b_grad = self.train(use_cinn=False)
+        np.testing.assert_allclose(cinn_out.numpy(), dy_out.numpy(), atol=1e-8)
+        np.testing.assert_allclose(cinn_x_grad, dy_x_grad, atol=1e-8)
+        np.testing.assert_allclose(cinn_w_grad, dy_w_grad, atol=1e-8)
+        np.testing.assert_allclose(cinn_b_grad, dy_b_grad, atol=1e-8)
 
 
 # class TestAddDropoutLayerNorm(TestCinnSubGraphBase):

diff --git a/test/ir/pir/cinn/test_fold_full_ops.py b/test/ir/pir/cinn/test_fold_full_ops.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+
+import numpy as np
+import utils
+
+import paddle
+from paddle import nn
+
+
+class SubGraph(nn.Layer):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        y = paddle.full([1, 32, 2, 10], 1.0, dtype="float32")
+        z = paddle.transpose(y.reshape([4, 8, 2, 10]), perm=[0, 2, 3, 1])
+        return x + z
+
+
+class TestFuldFullOps(unittest.TestCase):
+    def setUp(self):
+        paddle.seed(2022)
+        self.prepare_data()
+
+    def prepare_data(self):
+        self.x = paddle.randn([4, 2, 1, 8], dtype="float32")
+
+    def check_jit_kernel_info(self, static_fn):
+        utils.check_jit_kernel_number(static_fn, 1)
+        utils.check_jit_kernel_structure(static_fn, {utils.JIT_KERNEL_NAME: 1})
+
+    def eval(self, use_cinn):
+        paddle.seed(2022)
+        net = SubGraph()
+        net = utils.apply_to_static(net, use_cinn)
+        net.eval()
+        out = net(self.x)
+
+        if use_cinn:
+            self.check_jit_kernel_info(net.forward)
+        return out
+
+    def test_eval(self):
+        cinn_outs = self.eval(use_cinn=True)
+        dy_outs = self.eval(use_cinn=False)
+
+        for cinn_out, dy_out in zip(cinn_outs, dy_outs):
+            np.testing.assert_allclose(
+                cinn_out.numpy(), dy_out.numpy(), atol=1e-6, rtol=1e-6
+            )
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/ir/pir/cinn/test_llama_sub_graph.py b/test/ir/pir/cinn/test_llama_sub_graph.py
@@ -93,44 +93,43 @@ def rotate_half(self, x):
         return paddle.concat([-x2, x1], axis=-1)  # shape is the same as x
 
 
-# class TestRotaryPosEmb(TestCinnSubGraphBase):
-#     def prepare_data(self):
-#         self.q = paddle.randn([1, 2048, 8, 96], dtype="float32")
-#         self.q.stop_gradient = False
+class TestRotaryPosEmb(TestCinnSubGraphBase):
+    def prepare_data(self):
+        self.q = paddle.randn([1, 2048, 8, 96], dtype="float32")
+        self.q.stop_gradient = False
 
-#         self.k = paddle.randn([1, 2048, 8, 96], dtype="float32")
-#         self.k.stop_gradient = False
+        self.k = paddle.randn([1, 2048, 8, 96], dtype="float32")
+        self.k.stop_gradient = False
 
-#         self.cos = paddle.randn([1, 2048, 1, 96], dtype="float32")
-#         self.cos.stop_gradient = False
+        self.cos = paddle.randn([1, 2048, 1, 96], dtype="float32")
+        self.cos.stop_gradient = False
 
-#         self.sin = paddle.randn([1, 2048, 1, 96], dtype="float32")
-#         self.sin.stop_gradient = False
+        self.sin = paddle.randn([1, 2048, 1, 96], dtype="float32")
+        self.sin.stop_gradient = False
 
-#         self.position_ids = paddle.arange(end=2048, dtype="int64").unsqueeze(0)
-#         self.position_ids.stop_gradient = False
+        self.position_ids = paddle.arange(end=2048, dtype="int64").unsqueeze(0)
+        self.position_ids.stop_gradient = False
 
-#     def eval(self, use_cinn):
-#         paddle.seed(2022)
-#         self.prepare_data()
-#         net = RotaryPosEmb()
+    def eval(self, use_cinn):
+        paddle.seed(2022)
+        self.prepare_data()
+        net = RotaryPosEmb()
 
-#         net = utils.apply_to_static(net, use_cinn)
-#         # net.eval()
-#         out = net(self.q, self.k, self.cos, self.sin, self.position_ids)
-#         loss = (out[0] + out[1]).sum()
-#         loss.backward()
-#         return out
+        net = utils.apply_to_static(net, use_cinn)
+        # net.eval()
+        out = net(self.q, self.k, self.cos, self.sin, self.position_ids)
+        loss = (out[0] + out[1]).sum()
+        loss.backward()
+        return out
 
-#     def test_eval(self):
-#         cinn_outs = self.eval(use_cinn=True)
-#         dy_outs = self.eval(use_cinn=False)
-
-#         # TODO(phlrain): Need to check result
-#         for cinn_out, dy_out in zip(cinn_outs, dy_outs):
-#             np.testing.assert_allclose(
-#                 cinn_out.numpy(), dy_out.numpy(), atol=1e-8
-#             )
+    def test_eval(self):
+        cinn_outs = self.eval(use_cinn=True)
+        dy_outs = self.eval(use_cinn=False)
+
+        for cinn_out, dy_out in zip(cinn_outs, dy_outs):
+            np.testing.assert_allclose(
+                cinn_out.numpy(), dy_out.numpy(), atol=1e-6
+            )
 
 
 class RepeatKV(nn.Layer):

diff --git a/test/ir/pir/cinn/test_rope.py b/test/ir/pir/cinn/test_rope.py
@@ -73,9 +73,8 @@ def eval(self, use_cinn):
         net.eval()
         out = net(self.q, self.k, self.cos, self.sin, self.position_ids)
 
-        # TODO(phlrain): Need to Fuse to one Kernel
-        # if use_cinn:
-        #     self.check_jit_kernel_info(net.forward)
+        if use_cinn:
+            self.check_jit_kernel_info(net.forward)
         return out
 
     def test_eval(self):