Skip to content

Commit

Permalink
lintrunner
Browse files Browse the repository at this point in the history
  • Loading branch information
Priya2698 committed Feb 12, 2025
1 parent 21cce37 commit d3c602d
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 28 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,3 @@ foo.bin

# Mac OS internal file
.DS_Store

.cache/
40 changes: 25 additions & 15 deletions csrc/scheduler/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2234,36 +2234,45 @@ void propagateReshapeTransforms(Fusion* fusion, const ComputeAtMap& ca_map) {
for (auto logical_id : tv->getLogicalDomain()) {
if (terminating_reshape_dims.find(logical_id) !=
terminating_reshape_dims.end()) {

// Check if logical ID is directly in the loop domain
if logical ID is directly in the loop domain
auto find_it = std::find(
tv->getLoopDomain().begin(), tv->getLoopDomain().end(), logical_id);

// If not found directly and there is a sharded loop ID,
// check if the logical ID is the same as the producer of the DID split.



directly and there is a sharded loop ID,
// chec
ogical ID is the same as the producer of the DID split.
if (find_it == tv->getLoopDomain().end()) {
int64_t sharded_axis = getShardedLoopAxis(tv, ParallelType::DIDx);
if (sharded_axis != -1) {
// Get the split operation that created the DIDx dimension
auto split = dynamic_cast<Split*>(tv->getLoopDomain().at(sharded_axis)->definition());
auto split = dynamic_cast<Split*>(tv->getLoopDomain
().at(sharded_axis)->definition());
if (split && split->in() == logical_id) {
// The DIDx axis is not reordered, since
find_it = std::find(tv->getLoopDomain().begin(), tv->getLoopDomain().end(), split->inner());
}
find_it = std::find(tv->getLoopDomain
().begin(), tv->getLoopDomai
().end(), split->inner());
}
}
}

NVF_ERROR(
find_it != tv->getLoopDomain().end(),

NVF_ERR

nd_it != tv->getLoopDomain().end(),
"Require ",
logical_id,
" is in the active domain of ",
tv->toString(),
" for view propagation.");

// Reorder the reshape dimensions to the front of the domain
int64_t old_pos = std::distance(tv->getLoopDomain().begin(), find_it);
old2new[old_pos] = (int64_t)old2new.size();

// Reorder the

to the front of the domain
int64_t old_pos = std::distance(tv->getLoopDomain().begin(), find_it);
old2new[old_pos] = (int6
.size();
}
}

Expand Down Expand Up @@ -2917,3 +2926,4 @@ bool hasExpensiveMUFUops(Fusion* fusion) {
}
} // namespace scheduler_utils
} // namespace nvfuser
26 changes: 15 additions & 11 deletions tests/cpp/test_multidevice_sharding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,16 +705,20 @@ TEST_F(MultiDeviceTest, ViewWithMerge) {
UnorderedElementsAre(HeuristicIs(SchedulerType::PointWise)));
}

TEST_F(MultiDeviceTest, TransformPropagatorWithReshape){
TEST_F(MultiDeviceTest, TransformPropagatorWithReshape) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

const int d = communicator_->size();
const int64_t b=2, s=2, h=4, e=3;
const int64_t b = 2, s = 2, h = 4, e = 3;

TensorView* in = makeContigConcreteTensor(
{b, s, d * h * e}); // in: loop domain: {b, s, d*h*e}
TensorView* out = reshape(
in,
{b, s, d * h * e},
{b, s, d * h, e}); // out: loop domain: {b, s, d*h, e}

TensorView* in = makeContigConcreteTensor({b, s, d*h*e}); // in: loop domain: {b, s, d*h*e}
TensorView* out = reshape(in, {b, s, d*h*e}, {b, s, d*h, e}); // out: loop domain: {b, s, d*h, e}

fusion->addInput(in);
fusion->addOutput(out);

Expand All @@ -724,7 +728,7 @@ TEST_F(MultiDeviceTest, TransformPropagatorWithReshape){
TransformPropagator propagator_c2p(out);
MaxLogicalDomainInfoSpanningTree(out).traverse(&propagator_c2p);
// in: loop domain: {b, s, d*h, e} after transform propagation

// Loop split and parallelize input
in->setDeviceMesh(mesh);
in->split(-2, d, /*inner_split=*/false);
Expand All @@ -734,19 +738,19 @@ TEST_F(MultiDeviceTest, TransformPropagatorWithReshape){
TransformPropagator propagator_p2c(in);
MaxLogicalDomainInfoSpanningTree(in).traverse(&propagator_p2c);
// out: loop domain: {b, s, d, h, e} after transform propagation

// Parallelize out
scheduler_utils::parallelizeAllLike(
in,
/*pos=*/-1,
/*selected_tv=*/{out});
in,
/*pos=*/-1,
/*selected_tv=*/{out});
// out: loop domain: {b, s, DIDx{d}, h, e} after transform propagation

in->setAllocationDomain(in->getLoopDomain(), true);
out->setAllocationDomain(out->getLoopDomain(), true);

FusionExecutorCache executor_cache(std::move(fusion));
at::Tensor in_tensor = at::randn({b, s, h*e}, tensor_options);
at::Tensor in_tensor = at::randn({b, s, h * e}, tensor_options);
at::Tensor out_tensor = executor_cache.runFusionWithInputs({in_tensor})[0];
testValidate(
executor_cache.fusion(),
Expand Down

0 comments on commit d3c602d

Please sign in to comment.