lintrunner

NVIDIA · Feb 12, 2025 · d3c602d · d3c602d
1 parent 21cce37
commit d3c602d
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 28 deletions.
diff --git a/.gitignore b/.gitignore
@@ -51,5 +51,3 @@ foo.bin
 
 # Mac OS internal file
 .DS_Store
-
-.cache/
diff --git a/csrc/scheduler/utils.cpp b/csrc/scheduler/utils.cpp
@@ -2234,36 +2234,45 @@ void propagateReshapeTransforms(Fusion* fusion, const ComputeAtMap& ca_map) {
     for (auto logical_id : tv->getLogicalDomain()) {
       if (terminating_reshape_dims.find(logical_id) !=
           terminating_reshape_dims.end()) {
-
-        // Check if logical ID is directly in the loop domain
+         if logical ID is directly in the loop domain
         auto find_it = std::find(
             tv->getLoopDomain().begin(), tv->getLoopDomain().end(), logical_id);
-
-        // If not found directly and there is a sharded loop ID, 
-        // check if the logical ID is the same as the producer of the DID split.
+
+
+
+        directly and there is a sharded loop ID,
+        // chec
+        ogical ID is the same as the producer of the DID split.
         if (find_it == tv->getLoopDomain().end()) {
           int64_t sharded_axis = getShardedLoopAxis(tv, ParallelType::DIDx);
           if (sharded_axis != -1) {
             // Get the split operation that created the DIDx dimension
-            auto split = dynamic_cast<Split*>(tv->getLoopDomain().at(sharded_axis)->definition());
+            auto split = dynamic_cast<Split*>(tv->getLoopDomain
+                ().at(sharded_axis)->definition());
             if (split && split->in() == logical_id) {
               // The DIDx axis is not reordered, since
-              find_it = std::find(tv->getLoopDomain().begin(), tv->getLoopDomain().end(), split->inner());
-            }
+              find_it = std::find(tv->getLoopDomain
+                  ().begin(), tv->getLoopDomai
+                  ().end(), split->inner());
+                              }
           }
         }
-
-        NVF_ERROR(
-            find_it != tv->getLoopDomain().end(),
+
+        NVF_ERR
+
+        nd_it != tv->getLoopDomain().end(),
             "Require ",
             logical_id,
             " is in the active domain of ",
             tv->toString(),
             " for view propagation.");
-
-        // Reorder the reshape dimensions to the front of the domain
-        int64_t old_pos = std::distance(tv->getLoopDomain().begin(), find_it);  
-        old2new[old_pos] = (int64_t)old2new.size();
+
+        // Reorder the 
+
+         to the front of the domain
+        int64_t old_pos = std::distance(tv->getLoopDomain().begin(), find_it);
+        old2new[old_pos] = (int6
+        .size();
       }
     }
 
@@ -2917,3 +2926,4 @@ bool hasExpensiveMUFUops(Fusion* fusion) {
 }
 } // namespace scheduler_utils
 } // namespace nvfuser
+
diff --git a/tests/cpp/test_multidevice_sharding.cpp b/tests/cpp/test_multidevice_sharding.cpp
@@ -705,16 +705,20 @@ TEST_F(MultiDeviceTest, ViewWithMerge) {
       UnorderedElementsAre(HeuristicIs(SchedulerType::PointWise)));
 }
 
-TEST_F(MultiDeviceTest, TransformPropagatorWithReshape){
+TEST_F(MultiDeviceTest, TransformPropagatorWithReshape) {
   auto fusion = std::make_unique<Fusion>();
   FusionGuard fg(fusion.get());
 
   const int d = communicator_->size();
-  const int64_t b=2, s=2, h=4, e=3;
+  const int64_t b = 2, s = 2, h = 4, e = 3;
+
+  TensorView* in = makeContigConcreteTensor(
+      {b, s, d * h * e}); // in: loop domain: {b, s, d*h*e}
+  TensorView* out = reshape(
+      in,
+      {b, s, d * h * e},
+      {b, s, d * h, e}); // out: loop domain: {b, s, d*h, e}
 
-  TensorView* in = makeContigConcreteTensor({b, s, d*h*e}); // in: loop domain: {b, s, d*h*e}
-  TensorView* out = reshape(in, {b, s, d*h*e}, {b, s, d*h, e}); // out: loop domain: {b, s, d*h, e}
-
   fusion->addInput(in);
   fusion->addOutput(out);
 
@@ -724,7 +728,7 @@ TEST_F(MultiDeviceTest, TransformPropagatorWithReshape){
   TransformPropagator propagator_c2p(out);
   MaxLogicalDomainInfoSpanningTree(out).traverse(&propagator_c2p);
   // in: loop domain: {b, s, d*h, e} after transform propagation
-  
+
   // Loop split and parallelize input
   in->setDeviceMesh(mesh);
   in->split(-2, d, /*inner_split=*/false);
@@ -734,19 +738,19 @@ TEST_F(MultiDeviceTest, TransformPropagatorWithReshape){
   TransformPropagator propagator_p2c(in);
   MaxLogicalDomainInfoSpanningTree(in).traverse(&propagator_p2c);
   // out: loop domain: {b, s, d, h, e} after transform propagation
-  
+
   // Parallelize out
   scheduler_utils::parallelizeAllLike(
-    in,
-    /*pos=*/-1,
-    /*selected_tv=*/{out});
+      in,
+      /*pos=*/-1,
+      /*selected_tv=*/{out});
   // out: loop domain: {b, s, DIDx{d}, h, e} after transform propagation
 
   in->setAllocationDomain(in->getLoopDomain(), true);
   out->setAllocationDomain(out->getLoopDomain(), true);
 
   FusionExecutorCache executor_cache(std::move(fusion));
-  at::Tensor in_tensor = at::randn({b, s, h*e}, tensor_options);
+  at::Tensor in_tensor = at::randn({b, s, h * e}, tensor_options);
   at::Tensor out_tensor = executor_cache.runFusionWithInputs({in_tensor})[0];
   testValidate(
       executor_cache.fusion(),
Original file line number	Diff line number	Diff line change
Expand Up		@@ -51,5 +51,3 @@ foo.bin

		# Mac OS internal file
		.DS_Store

		.cache/