From 6252f7244acd48a9a2928010b06a24e3dd77436b Mon Sep 17 00:00:00 2001
From: yinfan98 <1106310035@qq.com>
Date: Mon, 6 Jan 2025 14:49:49 +0000
Subject: [PATCH 1/3] fix typo f12-17,f19-24,f26-28

---
 CONTRIBUTING.md                               |   4 +-
 _typos.toml                                   |  21 ----
 paddle/cinn/common/ir_util.h                  |   2 +-
 paddle/cinn/runtime/cuda/cuda_util.cc         |   2 +-
 paddle/fluid/framework/data_transform.cc      |   2 +-
 .../framework/new_executor/pir_interpreter.cc |   2 +-
 paddle/fluid/inference/tensorrt/op_teller.cc  |   4 +-
 .../tensorrt/plugin/custom_generic_plugin.cu  |   6 +-
 .../tensorrt/plugin/gelu_op_plugin.cu         |   2 +-
 .../multiary_infer_sym.cc                     |   2 +-
 paddle/phi/kernels/cpu/unique_kernel.cc       |  18 +--
 paddle/phi/kernels/funcs/math_cuda_utils.h    |   2 +-
 paddle/phi/kernels/funcs/unique_functor.h     |  56 +++++-----
 paddle/phi/kernels/gpu/rms_norm_funcs.h       |   2 +-
 .../phi/kernels/gpu/rms_norm_grad_kernel.cu   |   2 +-
 .../kernels/gpu/unique_consecutive_functor.h  |   4 +-
 paddle/phi/kernels/gpu/unique_kernel.cu       | 104 +++++++++---------
 paddle/phi/kernels/gpudnn/conv_gpudnn_base.h  |   2 +-
 python/paddle/amp/grad_scaler.py              |  16 +--
 .../hybrid_parallel_gradscaler.py             |   6 +-
 .../passes/auto_parallel_sharding.py          |   2 +-
 .../pipeline_zero_bubble.py                   |   6 +-
 python/paddle/incubate/asp/utils.py           |  14 +--
 .../paddle/io/dataloader/dataloader_iter.py   |   2 +-
 python/paddle/text/datasets/imikolov.py       |   2 +-
 test/legacy_test/test_gather_op.py            |   2 +-
 test/legacy_test/test_lstm_op.py              |   4 +-
 tools/gen_ut_cmakelists.py                    |   4 +-
 28 files changed, 137 insertions(+), 158 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d0c06e6ccf443f..4d9a94b022a748 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -39,7 +39,7 @@ PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-
    pre-commit install
    ```
 
-   Our pre-commit configuration requires clang-format 3.8 for auto-formating C/C++ code and yapf for Python.
+   Our pre-commit configuration requires clang-format 3.8 for auto-formatting C/C++ code and yapf for Python.
 
    Once installed, `pre-commit` checks the style of code and documentation in every commit.  We will see something like the following when you run `git commit`:
 
@@ -52,7 +52,7 @@ PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-
    Check for broken symlinks................................................Passed
    Detect Private Key...................................(no files to check)Skipped
    Fix End of Files.....................................(no files to check)Skipped
-   clang-formater.......................................(no files to check)Skipped
+   clang-formatter.......................................(no files to check)Skipped
    [my-cool-stuff c703c041] add test file
     1 file changed, 0 insertions(+), 0 deletions(-)
     create mode 100644 233
diff --git a/_typos.toml b/_typos.toml
index 30dc504ebeabcc..ef47c804bb5179 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -53,27 +53,6 @@ dobule = 'dobule'
 Dowloading = 'Dowloading'
 downsteram = 'downsteram'
 fetchs = 'fetchs'
-Flattend = 'Flattend'
-flattend = 'flattend'
-flattern = 'flattern'
-Flattern = 'Flattern'
-filpped = 'filpped'
-flaot = 'flaot'
-follwed = 'follwed'
-folowing = 'folowing'
-formater = 'formater'
-formating = 'formating'
-foramt = 'foramt'
-formate = 'formate'
-formt = 'formt'
-forwrad = 'forwrad'
-forwad = 'forwad'
-forword = 'forword'
-founf = 'founf'
-framwork = 'framwork'
-frequence = 'frequence'
-fron = 'fron'
-fullfill = 'fullfill'
 Indexs = 'Indexs'
 indexs = 'indexs'
 indiates = 'indiates'
diff --git a/paddle/cinn/common/ir_util.h b/paddle/cinn/common/ir_util.h
index 724be629e6e93e..cbfe072d307016 100644
--- a/paddle/cinn/common/ir_util.h
+++ b/paddle/cinn/common/ir_util.h
@@ -191,7 +191,7 @@ inline void UnpackReduction(const ir::IndexExpr &expr, FLeaf fleaf) {
 }
 
 /*!
- * \brief Flattern the expression into a vector of expressions splited by `Add`
+ * \brief Flatten the expression into a vector of expressions splited by `Add`
  * or `Mul`.
  *
  * For example (Add):
diff --git a/paddle/cinn/runtime/cuda/cuda_util.cc b/paddle/cinn/runtime/cuda/cuda_util.cc
index a0c12732a4ad5d..af0017222231bc 100644
--- a/paddle/cinn/runtime/cuda/cuda_util.cc
+++ b/paddle/cinn/runtime/cuda/cuda_util.cc
@@ -1742,7 +1742,7 @@ void cinn_call_cholesky_nvgpu(void *v_args,
   cinn_buffer_t *x = args[0].operator cinn_buffer_t *();
   cinn_buffer_t *out = args[1].operator cinn_buffer_t *();
   // In cuSOLVER, dense matrix stores in COL_MAJOR, thus FILL_MODE needs to be
-  // filpped. See also:
+  // flipped. See also:
   // https://docs.nvidia.com/cuda/cusolver/index.html#matrix-dense-format
   cublasFillMode_t uplo =
       upper ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER;
diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc
index c8cf06fe27aec8..71d1ae8047105b 100644
--- a/paddle/fluid/framework/data_transform.cc
+++ b/paddle/fluid/framework/data_transform.cc
@@ -173,7 +173,7 @@ phi::GetKernelTypeForVarContext BuildGetKernelTypeForVarContext(
   if (has_infer_varkernel_fn) {
     for (auto &attr : fluid_attrs) {
       switch (attr.second.index()) {
-        case 3:  // string type in framwork::Attribute
+        case 3:  // string type in framework::Attribute
           (*phi_attrs)[attr.first] = PADDLE_GET_CONST(std::string, attr.second);
           break;
         default:
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index 75def437deafda..287ca3fb178ea5 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -979,7 +979,7 @@ void PirInterpreter::BuildInstruction() {
 }
 
 std::string PirInterpreter::DebugInstructions() {
-  // log formate: var[101] = pd_op.relu(var[100]) or for inplace op var[100] =
+  // log format: var[101] = pd_op.relu(var[100]) or for inplace op var[100] =
   // pd_op.relu_(var[100])
   std::stringstream ss;
   ss << "{outputs}"
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index d0becae8c45ed6..15f2fba66b1932 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -3467,9 +3467,9 @@ struct CustomGenericPluginTeller : public Teller {
                    "SetTrtInferShapeFn.";
         return false;
       }
-      auto& trt_supports_formate_config =
+      auto& trt_supports_format_config =
           OpMetaInfoHelper::GetTrtSupportsFormatConfig(op_info);
-      if (trt_supports_formate_config.empty()) {
+      if (trt_supports_format_config.empty()) {
         VLOG(3)
             << op_type
             << " has no trt supportsFormatCombination config. Please set by "
diff --git a/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu
index af5db479f10592..d6d76c6b9618ea 100644
--- a/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/custom_generic_plugin.cu
@@ -311,9 +311,9 @@ bool CustomGenericPlugin::supportsFormatCombination(
   auto& op_meta_info_map = OpMetaInfoMap::Instance();
   const auto& meta_info_map = op_meta_info_map.GetMap();
   auto& op_info = meta_info_map.at(op_desc_.Type()).front();
-  auto& supports_formate_config =
+  auto& supports_format_config =
       OpMetaInfoHelper::GetTrtSupportsFormatConfig(op_info);
-  PADDLE_ENFORCE_NE(supports_formate_config.empty(),
+  PADDLE_ENFORCE_NE(supports_format_config.empty(),
                     true,
                     common::errors::InvalidArgument(
                         "The %s op has no tensorrt plugin "
@@ -325,7 +325,7 @@ bool CustomGenericPlugin::supportsFormatCombination(
   size_t output_num = OpMetaInfoHelper::GetOutputs(op_info).size();
   std::vector<std::vector<std::pair<std::string, std::string>>>
       format_combinations;
-  for (auto& config : supports_formate_config) {
+  for (auto& config : supports_format_config) {
     auto format_combination = parseConfig(op_desc_.Type(), config);
     PADDLE_ENFORCE_EQ(input_num + output_num,
                       format_combination.size(),
diff --git a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu
index f5369eb691c69e..c1b4aad6d73c06 100644
--- a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu
@@ -77,7 +77,7 @@ __device__ half do_tanh<half>(half a) {
   return __float2half(tmp);
 }
 
-// the kernel below is not aligned with fluid fp32 forwrad ones, use it for
+// the kernel below is not aligned with fluid fp32 forward ones, use it for
 // fp16.
 template <typename T, unsigned TPB>
 __global__ void no_exact_gelu_kernel(
diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc
index 3f2c8397a61415..9809acfb576b71 100644
--- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc
+++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc
@@ -2631,7 +2631,7 @@ bool GroupNormOpInferSymbolicShape(
     channel_idx = 1;
   } else {
     PADDLE_THROW(common::errors::Unimplemented(
-        "GroupNorm only suport NHWC and NCHW data formt"));
+        "GroupNorm only suport NHWC and NCHW data format"));
   }
 
   symbol::DimExpr channel_dim = x_shape.shape()[channel_idx];
diff --git a/paddle/phi/kernels/cpu/unique_kernel.cc b/paddle/phi/kernels/cpu/unique_kernel.cc
index e3be49af16ed3c..8a0b9046a15b84 100644
--- a/paddle/phi/kernels/cpu/unique_kernel.cc
+++ b/paddle/phi/kernels/cpu/unique_kernel.cc
@@ -83,15 +83,15 @@ void UniqueRawKernel(const Context& context,
   if (axis.empty()) {
     phi::VisitDataTypeTiny(
         dtype,
-        phi::funcs::UniqueFlattendTensorFunctor<Context, T>(context,
-                                                            x,
-                                                            out,
-                                                            indices,
-                                                            index,
-                                                            counts,
-                                                            return_index,
-                                                            return_inverse,
-                                                            return_counts));
+        phi::funcs::UniqueFlattenedTensorFunctor<Context, T>(context,
+                                                             x,
+                                                             out,
+                                                             indices,
+                                                             index,
+                                                             counts,
+                                                             return_index,
+                                                             return_inverse,
+                                                             return_counts));
   } else {
     int axis_value = axis[0];
     axis_value = (axis_value == -1) ? (x.dims().size() - 1) : axis_value;
diff --git a/paddle/phi/kernels/funcs/math_cuda_utils.h b/paddle/phi/kernels/funcs/math_cuda_utils.h
index a5aaa1310b16db..f14b2af8c72609 100644
--- a/paddle/phi/kernels/funcs/math_cuda_utils.h
+++ b/paddle/phi/kernels/funcs/math_cuda_utils.h
@@ -298,7 +298,7 @@ __inline__ __device__ T PartialWarpReduceMin(T val, warp_mask_t lane_mask) {
   T warp_val = __shfl_sync(lane_mask, val, 0, warpSize);
 #else
   T warp_val = __shfl(
-      val, 0, warpSize);  // To fullfill the data in each thread of this warp.
+      val, 0, warpSize);  // To fulfill the data in each thread of this warp.
 #endif
   warp_val = val;
 
diff --git a/paddle/phi/kernels/funcs/unique_functor.h b/paddle/phi/kernels/funcs/unique_functor.h
index 8d62a0c5255e46..4365f1a5f4cfe6 100644
--- a/paddle/phi/kernels/funcs/unique_functor.h
+++ b/paddle/phi/kernels/funcs/unique_functor.h
@@ -130,15 +130,15 @@ static bool Equal(const DenseTensor& a, const DenseTensor& b) {
 }
 
 template <typename Context, typename InT, typename IndexT>
-static void UniqueFlattendTensor(const Context& context,
-                                 const DenseTensor& in,
-                                 DenseTensor* out,
-                                 DenseTensor* indices,
-                                 DenseTensor* index,
-                                 DenseTensor* count,
-                                 bool return_index,
-                                 bool return_inverse,
-                                 bool return_counts) {
+static void UniqueFlattenedTensor(const Context& context,
+                                  const DenseTensor& in,
+                                  DenseTensor* out,
+                                  DenseTensor* indices,
+                                  DenseTensor* index,
+                                  DenseTensor* count,
+                                  bool return_index,
+                                  bool return_inverse,
+                                  bool return_counts) {
   const InT* in_data = in.data<InT>();
   std::set<InT> unique(in_data, in_data + in.numel());
   out->Resize(common::make_ddim({static_cast<int64_t>(unique.size())}));
@@ -327,7 +327,7 @@ static void UniqueDim(const Context& context,
 }
 
 template <typename Context, typename InT>
-struct UniqueFlattendTensorFunctor {
+struct UniqueFlattenedTensorFunctor {
   const Context& ctx_; /*  */
   const DenseTensor& in_;
   DenseTensor* out_;
@@ -338,15 +338,15 @@ struct UniqueFlattendTensorFunctor {
   const bool return_inverse_;
   const bool return_counts_;
 
-  UniqueFlattendTensorFunctor(const Context& context,
-                              const DenseTensor& in,
-                              DenseTensor* out,
-                              DenseTensor* indices,
-                              DenseTensor* index,
-                              DenseTensor* count,
-                              bool return_index,
-                              bool return_inverse,
-                              bool return_counts)
+  UniqueFlattenedTensorFunctor(const Context& context,
+                               const DenseTensor& in,
+                               DenseTensor* out,
+                               DenseTensor* indices,
+                               DenseTensor* index,
+                               DenseTensor* count,
+                               bool return_index,
+                               bool return_inverse,
+                               bool return_counts)
       : ctx_(context),
         in_(in),
         out_(out),
@@ -359,15 +359,15 @@ struct UniqueFlattendTensorFunctor {
 
   template <typename IndexT>
   void apply() const {
-    UniqueFlattendTensor<Context, InT, IndexT>(ctx_,
-                                               in_,
-                                               out_,
-                                               indices_,
-                                               index_,
-                                               count_,
-                                               return_index_,
-                                               return_inverse_,
-                                               return_counts_);
+    UniqueFlattenedTensor<Context, InT, IndexT>(ctx_,
+                                                in_,
+                                                out_,
+                                                indices_,
+                                                index_,
+                                                count_,
+                                                return_index_,
+                                                return_inverse_,
+                                                return_counts_);
   }
 };
 
diff --git a/paddle/phi/kernels/gpu/rms_norm_funcs.h b/paddle/phi/kernels/gpu/rms_norm_funcs.h
index 2954d593014a6c..db6a137a02d386 100644
--- a/paddle/phi/kernels/gpu/rms_norm_funcs.h
+++ b/paddle/phi/kernels/gpu/rms_norm_funcs.h
@@ -14,7 +14,7 @@ limitations under the License. */
 
 /* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. */
 
-/*This code is copied fron NVIDIA apex:
+/*This code is copied from NVIDIA apex:
  *     https://github.com/NVIDIA/apex
  *     with minor changes. */
 
diff --git a/paddle/phi/kernels/gpu/rms_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/rms_norm_grad_kernel.cu
index 5be55226813646..342737e9b20bd5 100644
--- a/paddle/phi/kernels/gpu/rms_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/rms_norm_grad_kernel.cu
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 /* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. */
-/*This code is copied fron NVIDIA apex:
+/*This code is copied from NVIDIA apex:
  *     https://github.com/NVIDIA/apex
  *     with minor changes. */
 
diff --git a/paddle/phi/kernels/gpu/unique_consecutive_functor.h b/paddle/phi/kernels/gpu/unique_consecutive_functor.h
index dae83a45a8e917..f094da335f396d 100644
--- a/paddle/phi/kernels/gpu/unique_consecutive_functor.h
+++ b/paddle/phi/kernels/gpu/unique_consecutive_functor.h
@@ -32,7 +32,7 @@
 
 namespace phi {
 
-// The core logic of computing Unique Consecutive for a flattend Tensor
+// The core logic of computing Unique Consecutive for a flattened Tensor
 template <typename Context,
           typename InT,
           typename IndexT,
@@ -113,7 +113,7 @@ static void UniqueConsecutiveFlattenedCUDATensor(const Context& context,
   }
 }
 
-// functor for processing a flattend Tensor
+// functor for processing a flattened Tensor
 template <typename Context, typename InT>
 struct UniqueConsecutiveFlattenedCUDAFunctor {
   const Context& ctx_;
diff --git a/paddle/phi/kernels/gpu/unique_kernel.cu b/paddle/phi/kernels/gpu/unique_kernel.cu
index 341483e57d56b4..e08aa5bece3bc4 100644
--- a/paddle/phi/kernels/gpu/unique_kernel.cu
+++ b/paddle/phi/kernels/gpu/unique_kernel.cu
@@ -106,21 +106,21 @@ struct BinaryNotEqual {
   }
 };
 
-// The core logic of computing Unique for a flattend DenseTensor
+// The core logic of computing Unique for a flattened DenseTensor
 template <typename Context, typename InT, typename IndexT>
 static typename std::enable_if<
     !std::is_same<InT, phi::dtype::float16>::value &&
     !std::is_same<InT, phi::dtype::bfloat16>::value>::type
-UniqueFlattendCUDATensor(const Context& context,
-                         const DenseTensor& in,
-                         DenseTensor* out,
-                         DenseTensor* indices,
-                         DenseTensor* index,
-                         DenseTensor* counts,
-                         bool return_index,
-                         bool return_inverse,
-                         bool return_counts,
-                         int64_t num_input) {
+UniqueFlattenedCUDATensor(const Context& context,
+                          const DenseTensor& in,
+                          DenseTensor* out,
+                          DenseTensor* indices,
+                          DenseTensor* index,
+                          DenseTensor* counts,
+                          bool return_index,
+                          bool return_inverse,
+                          bool return_counts,
+                          int64_t num_input) {
   // 0. Prepration
   auto equal = thrust::equal_to<InT>();
   auto not_equal = thrust::not_equal_to<InT>();
@@ -242,21 +242,21 @@ UniqueFlattendCUDATensor(const Context& context,
   }
 }
 
-// The core logic of computing Unique for a flattend DenseTensor
+// The core logic of computing Unique for a flattened DenseTensor
 template <typename Context, typename InT, typename IndexT>
 static typename std::enable_if<
     std::is_same<InT, phi::dtype::float16>::value ||
     std::is_same<InT, phi::dtype::bfloat16>::value>::type
-UniqueFlattendCUDATensor(const Context& context,
-                         const DenseTensor& in,
-                         DenseTensor* out,
-                         DenseTensor* indices,
-                         DenseTensor* index,
-                         DenseTensor* counts,
-                         bool return_index,
-                         bool return_inverse,
-                         bool return_counts,
-                         int64_t num_input) {
+UniqueFlattenedCUDATensor(const Context& context,
+                          const DenseTensor& in,
+                          DenseTensor* out,
+                          DenseTensor* indices,
+                          DenseTensor* index,
+                          DenseTensor* counts,
+                          bool return_index,
+                          bool return_inverse,
+                          bool return_counts,
+                          int64_t num_input) {
   // 1. Sort indices
   DenseTensor in_resize;
   in_resize.ShareDataWith(in);
@@ -526,9 +526,9 @@ static void UniqueDimsCUDATensor(const Context& context,
   }
 }
 
-// functor for processing a flattend DenseTensor
+// functor for processing a flattened DenseTensor
 template <typename Context, typename InT>
-struct UniqueFlattendCUDAFunctor {
+struct UniqueFlattenedCUDAFunctor {
   const Context& ctx_;
   const DenseTensor& in_;
   DenseTensor* out_;
@@ -539,15 +539,15 @@ struct UniqueFlattendCUDAFunctor {
   const bool return_inverse_;
   const bool return_counts_;
 
-  UniqueFlattendCUDAFunctor(const Context& context,
-                            const DenseTensor& in,
-                            DenseTensor* out,
-                            DenseTensor* indices,
-                            DenseTensor* index,
-                            DenseTensor* counts,
-                            bool return_index,
-                            bool return_inverse,
-                            bool return_counts)
+  UniqueFlattenedCUDAFunctor(const Context& context,
+                             const DenseTensor& in,
+                             DenseTensor* out,
+                             DenseTensor* indices,
+                             DenseTensor* index,
+                             DenseTensor* counts,
+                             bool return_index,
+                             bool return_inverse,
+                             bool return_counts)
       : ctx_(context),
         in_(in),
         out_(out),
@@ -560,16 +560,16 @@ struct UniqueFlattendCUDAFunctor {
 
   template <typename IndexT>
   void apply() const {
-    UniqueFlattendCUDATensor<Context, InT, IndexT>(ctx_,
-                                                   in_,
-                                                   out_,
-                                                   indices_,
-                                                   index_,
-                                                   counts_,
-                                                   return_index_,
-                                                   return_inverse_,
-                                                   return_counts_,
-                                                   in_.numel());
+    UniqueFlattenedCUDATensor<Context, InT, IndexT>(ctx_,
+                                                    in_,
+                                                    out_,
+                                                    indices_,
+                                                    index_,
+                                                    counts_,
+                                                    return_index_,
+                                                    return_inverse_,
+                                                    return_counts_,
+                                                    in_.numel());
   }
 };
 
@@ -650,15 +650,15 @@ void UniqueRawKernel(const Context& context,
   if (axis.empty()) {
     phi::VisitDataTypeTiny(
         dtype,
-        UniqueFlattendCUDAFunctor<Context, T>(context,
-                                              x,
-                                              out,
-                                              indices,
-                                              index,
-                                              counts,
-                                              return_index,
-                                              return_inverse,
-                                              return_counts));
+        UniqueFlattenedCUDAFunctor<Context, T>(context,
+                                               x,
+                                               out,
+                                               indices,
+                                               index,
+                                               counts,
+                                               return_index,
+                                               return_inverse,
+                                               return_counts));
   } else {
     // 'axis' is required.
     int axis_value = axis[0];
diff --git a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h
index 5b55aa8f70394a..a21ed28d839a4a 100644
--- a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h
+++ b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h
@@ -120,7 +120,7 @@ struct ConvArgsBase {
   // groups
   int group;
 
-  // data foramt
+  // data format
   GPUDNNDataLayout data_layout;
 
   ConvArgsBase(const HandleT& h,
diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py
index 4ba1524a307d9d..c371918e3f0e4f 100644
--- a/python/paddle/amp/grad_scaler.py
+++ b/python/paddle/amp/grad_scaler.py
@@ -181,7 +181,7 @@ def __init__(
                 self._scale = paddle.to_tensor(
                     np.array([self._init_loss_scaling]).astype(np.float32)
                 )
-                self._cache_founf_inf = None
+                self._cache_found_inf = None
                 self._optimizer_states = defaultdict(_refresh_optimizer_state)
 
     def scale(self, var: Tensor) -> Tensor:
@@ -335,13 +335,13 @@ def minimize(
             optimizer._set_auxiliary_var('found_inf', self._found_inf)
             optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
             # TODO: Fix to _cache_found_inf after PaddleNLP update
-            self._cache_founf_inf = optimizer._get_auxiliary_var('found_inf')
+            self._cache_found_inf = optimizer._get_auxiliary_var('found_inf')
         else:
             if self._found_inf:
-                self._cache_founf_inf = True
+                self._cache_found_inf = True
             else:
                 optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
-                self._cache_founf_inf = False
+                self._cache_found_inf = False
 
         if self._use_dynamic_loss_scaling:
             # update the scale
@@ -462,7 +462,7 @@ def _update(self):
         if not self._enable:
             return
 
-        if self._cache_founf_inf:
+        if self._cache_found_inf:
             self._incr_count = 0
             self._decr_count = self._decr_count + 1
             if self._decr_count == self._decr_every_n_nan_or_inf:
@@ -846,13 +846,13 @@ def step(self, optimizer: Optimizer) -> None:
         if hasattr(optimizer, "_set_auxiliary_var"):
             optimizer._set_auxiliary_var('found_inf', self._found_inf)
             optimizer.step()
-            self._cache_founf_inf = optimizer._get_auxiliary_var('found_inf')
+            self._cache_found_inf = optimizer._get_auxiliary_var('found_inf')
         else:
             if self._found_inf:
-                self._cache_founf_inf = True
+                self._cache_found_inf = True
             else:
                 optimizer.step()
-                self._cache_founf_inf = False
+                self._cache_found_inf = False
 
         optimizer_state["state"] = OptimizerState.STEPPED
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
index 358c6023e6c6f7..c9a684ae807be4 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
@@ -45,13 +45,13 @@ def minimize(self, optimizer, *args, **kwargs):
             optimizer._set_auxiliary_var('found_inf', self._found_inf)
             optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
             # TODO: Fix to _cache_found_inf after PaddleNLP update
-            self._cache_founf_inf = optimizer._get_auxiliary_var('found_inf')
+            self._cache_found_inf = optimizer._get_auxiliary_var('found_inf')
         else:
             if self._found_inf:
-                self._cache_founf_inf = True
+                self._cache_found_inf = True
             else:
                 optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
-                self._cache_founf_inf = False
+                self._cache_found_inf = False
 
         if self._use_dynamic_loss_scaling:
             self._update()
diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py
index 4b7814af7f53ea..c6315c78ad4617 100644
--- a/python/paddle/distributed/passes/auto_parallel_sharding.py
+++ b/python/paddle/distributed/passes/auto_parallel_sharding.py
@@ -196,7 +196,7 @@ def _apply_single_impl(self, main_program, startup_program, context):
         # NOTE Multi / Sub-Block Support
         # we assume that only parameter are present and partitioned in main_block,
         # there is NO new param in sub_block, and all params in sub_block follows the same
-        # partition as main_block. the above constraint fullfill the 3 most common use-cases in Paddle sub_block:
+        # partition as main_block. the above constraint fulfill the 3 most common use-cases in Paddle sub_block:
         # 1. subblock for lr scheduler
         # 2. sub-block uses the same or partial network of main-block, e.g. GPT3 generation model
         # 3. sub-block used for double backward
diff --git a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_zero_bubble.py b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_zero_bubble.py
index d6025d80e5e7c8..112373cebcd404 100644
--- a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_zero_bubble.py
+++ b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_zero_bubble.py
@@ -604,11 +604,11 @@ def _insert_jobs_after_backward_start(
                         break
 
             # Step3: Insert forward jobs after backward_b
-            forword_insert_order = range(self.num_stage)
+            forward_insert_order = range(self.num_stage)
             if self.num_model_chunks % 2:
-                forword_insert_order = range(self.num_stage - 1, -1, -1)
+                forward_insert_order = range(self.num_stage - 1, -1, -1)
 
-            for stage_id in forword_insert_order:
+            for stage_id in forward_insert_order:
                 for chunk_id in range(self.num_model_chunks - 1, -1, -1):
                     if self._can_schedule_f_task(stage_id, chunk_id):
                         while (
diff --git a/python/paddle/incubate/asp/utils.py b/python/paddle/incubate/asp/utils.py
index 408c3d3a6b0866..1fef294dc41826 100644
--- a/python/paddle/incubate/asp/utils.py
+++ b/python/paddle/incubate/asp/utils.py
@@ -220,14 +220,14 @@ def get_mask_1d(mat: npt.NDArray[Any], n: int, m: int) -> npt.NDArray[Any]:
     """
     mat_flatten, shape = _reshape_1d(mat, m)
 
-    mask_flattern = np.ones_like(mat_flatten)
+    mask_flatten = np.ones_like(mat_flatten)
     mask = np.ones_like(mat)
     for i in range(mat_flatten.shape[0]):
         sub_mat = mat_flatten[i]
         min_order_indices = np.argsort(np.absolute(sub_mat))
-        mask_flattern[i, min_order_indices[:n].tolist()] = 0
-    mask_flattern = mask_flattern.reshape(shape)
-    mask[:, :] = mask_flattern[:, : mat.shape[1]]
+        mask_flatten[i, min_order_indices[:n].tolist()] = 0
+    mask_flatten = mask_flatten.reshape(shape)
+    mask[:, :] = mask_flatten[:, : mat.shape[1]]
     return mask
 
 
@@ -486,13 +486,13 @@ def get_mask_2d_best(mat: npt.NDArray[Any], n: int, m: int) -> npt.NDArray[Any]:
     patterns = _compute_valid_2d_patterns(n, m)
 
     mat_flatten, shape = _reshape_2d(mat, m)
-    mask_flattern = np.ones_like(mat_flatten).reshape(-1, m, m)
+    mask_flatten = np.ones_like(mat_flatten).reshape(-1, m, m)
     pmax = np.argmax(
         np.matmul(mat_flatten, patterns.reshape(patterns.shape[0], m * m).T),
         axis=1,
     )
 
-    mask_flattern[:] = patterns[pmax[:]]
+    mask_flatten[:] = patterns[pmax[:]]
     mask = np.empty(shape)
 
     curr_idx = 0
@@ -500,7 +500,7 @@ def get_mask_2d_best(mat: npt.NDArray[Any], n: int, m: int) -> npt.NDArray[Any]:
         row_end = row_start + m
         for col_start in range(0, shape[1], m):
             col_end = col_start + m
-            mask[row_start:row_end, col_start:col_end] = mask_flattern[curr_idx]
+            mask[row_start:row_end, col_start:col_end] = mask_flatten[curr_idx]
             curr_idx += 1
     return mask[: mat.shape[0], : mat.shape[1]]
 
diff --git a/python/paddle/io/dataloader/dataloader_iter.py b/python/paddle/io/dataloader/dataloader_iter.py
index 836c0b40224c6f..8b3ba314388eab 100644
--- a/python/paddle/io/dataloader/dataloader_iter.py
+++ b/python/paddle/io/dataloader/dataloader_iter.py
@@ -52,7 +52,7 @@
 # layers processing) after iterate **the first few data** in
 # distributed launch mode, distributed launch will call
 # terminate() to kill main process on each devices, but thread
-# is still iterating to fullfill blocking queue caches, which
+# is still iterating to fulfill blocking queue caches, which
 # may cause thread error `terminate called without an active
 # exception` for terminate is a strong signal and `__del__`
 # of DataLoader may not be called, so we add a global link to
diff --git a/python/paddle/text/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py
index df7b4383f6318f..05c4194bbf08da 100644
--- a/python/paddle/text/datasets/imikolov.py
+++ b/python/paddle/text/datasets/imikolov.py
@@ -43,7 +43,7 @@ class Imikolov(Dataset):
         data_type(str): 'NGRAM' or 'SEQ'. Default 'NGRAM'.
         window_size(int): sliding window size for 'NGRAM' data. Default -1.
         mode(str): 'train' 'test' mode. Default 'train'.
-        min_word_freq(int): minimal word frequence for building word dictionary. Default 50.
+        min_word_freq(int): minimal word frequencies for building word dictionary. Default 50.
         download(bool): whether to download dataset automatically if
             :attr:`data_file` is not set. Default True
 
diff --git a/test/legacy_test/test_gather_op.py b/test/legacy_test/test_gather_op.py
index c4ebe86af2ad97..d8227134d6b5d2 100644
--- a/test/legacy_test/test_gather_op.py
+++ b/test/legacy_test/test_gather_op.py
@@ -471,7 +471,7 @@ def config(self):
 
 
 class TestOutOfRangeError(unittest.TestCase):
-    def test_dygraph_forwad_and_backward(self):
+    def test_dygraph_forward_and_backward(self):
         with dygraph_guard():
             x = paddle.randn([100, 3]).cpu()
             x.stop_gradient = False
diff --git a/test/legacy_test/test_lstm_op.py b/test/legacy_test/test_lstm_op.py
index 2f3f3fe4ed683e..fca6d226e90705 100644
--- a/test/legacy_test/test_lstm_op.py
+++ b/test/legacy_test/test_lstm_op.py
@@ -207,7 +207,7 @@ def test_check_output(self):
         self.check_output(atol=1e-8, check_dygraph=False)
 
     def test_check_grad(self):
-        # TODO(qingqing) remove folowing lines after the check_grad is refined.
+        # TODO(qingqing) remove following lines after the check_grad is refined.
         N = len(self.lod[0])
         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
         self.outputs['BatchCellPreAct'] = np.zeros((N, self.D)).astype(
@@ -259,7 +259,7 @@ def test_check_grad(self):
 #         self.use_peepholes = True
 
 #     def test_check_grad(self):
-#         # TODO(qingqing) remove folowing lines after the check_grad is refined.
+#         # TODO(qingqing) remove following lines after the check_grad is refined.
 #         N = len(self.lod[0])
 #         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
 #         self.outputs['BatchCellPreAct'] = np.zeros(
diff --git a/tools/gen_ut_cmakelists.py b/tools/gen_ut_cmakelists.py
index f64f065c19da65..50819aa9a0de58 100644
--- a/tools/gen_ut_cmakelists.py
+++ b/tools/gen_ut_cmakelists.py
@@ -238,7 +238,7 @@ def process_dist_port_num(self, port_num):
             re.compile("^[0-9]+$").search(port_num)
             and int(port_num) > 0
             or port_num.strip() == ""
-        ), f"""port_num must be foramt as a positive integer or empty, but this port_num is '{port_num}'"""
+        ), f"""port_num must be format as a positive integer or empty, but this port_num is '{port_num}'"""
         port_num = port_num.strip()
         if len(port_num) == 0:
             return 0
@@ -270,7 +270,7 @@ def _init_dist_ut_ports_from_cmakefile(self, cmake_file_name):
                         break
                 name = lines[k - 1].strip()
 
-                # matcg right tets name format, the name must start with 'test_' follwed bu at least one char of
+                # matcg right tets name format, the name must start with 'test_' followed bu at least one char of
                 # '0-9'. 'a-z'. 'A-Z' or '_'
                 assert re.compile("^test_[0-9a-zA-Z_]+").search(
                     name

From 755259539c94c28bfc74587e64180e59bc798179 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Mon, 6 Jan 2025 23:11:56 +0800
Subject: [PATCH 2/3] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4d9a94b022a748..8f03b35783a5ff 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -52,7 +52,7 @@ PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-
    Check for broken symlinks................................................Passed
    Detect Private Key...................................(no files to check)Skipped
    Fix End of Files.....................................(no files to check)Skipped
-   clang-formatter.......................................(no files to check)Skipped
+   clang-format.........................................(no files to check)Skipped
    [my-cool-stuff c703c041] add test file
     1 file changed, 0 insertions(+), 0 deletions(-)
     create mode 100644 233

From a824aeacd783e14fc00e17cbca17b165014b5100 Mon Sep 17 00:00:00 2001
From: yinfan98 <1106310035@qq.com>
Date: Tue, 7 Jan 2025 12:09:19 +0800
Subject: [PATCH 3/3] Update imikolov.py

---
 python/paddle/text/datasets/imikolov.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/paddle/text/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py
index 05c4194bbf08da..6691b1fd6ef5c1 100644
--- a/python/paddle/text/datasets/imikolov.py
+++ b/python/paddle/text/datasets/imikolov.py
@@ -54,6 +54,7 @@ class Imikolov(Dataset):
 
         .. code-block:: python
 
+            >>> # doctest: +TIMEOUT(60)
             >>> import paddle
             >>> from paddle.text.datasets import Imikolov