-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
161 lines (117 loc) · 6.13 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
.PHONY: clean clean-build clean-pyc clean-test coverage dist docs help install lint lint/flake8 lint/black
.DEFAULT_GOAL := help
define BROWSER_PYSCRIPT
import os, webbrowser, sys
from urllib.request import pathname2url
webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
endef
export BROWSER_PYSCRIPT
define PRINT_HELP_PYSCRIPT
import re, sys
for line in sys.stdin:
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
if match:
target, help = match.groups()
print("%-20s %s" % (target, help))
endef
export PRINT_HELP_PYSCRIPT
BROWSER := python -c "$$BROWSER_PYSCRIPT"
help:
@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
clean-build: ## remove build artifacts
rm -fr build/
rm -fr dist/
rm -fr .eggs/
find . -name '*.egg-info' -exec rm -fr {} +
find . -name '*.egg' -exec rm -f {} +
clean-pyc: ## remove Python file artifacts
find . -name '*.pyc' -exec rm -f {} +
find . -name '*.pyo' -exec rm -f {} +
find . -name '*~' -exec rm -f {} +
find . -name '__pycache__' -exec rm -fr {} +
clean-test: ## remove test and coverage artifacts
rm -fr .tox/
rm -f .coverage
rm -fr htmlcov/
rm -fr .pytest_cache
lint/flake8: ## check style with flake8
flake8 pytorch_hccl_tests tests
lint/black: ## check style with black
black --check pytorch_hccl_tests tests
lint: lint/flake8 lint/black ## check style
test: ## run tests quickly with the default Python
pytest
test-all: ## run tests on every Python version with tox
tox
coverage: ## check code coverage quickly with the default Python
coverage run --source pytorch_hccl_tests -m pytest
coverage report -m
coverage html
$(BROWSER) htmlcov/index.html
docs: ## generate Sphinx HTML documentation, including API docs
rm -f docs/pytorch_hccl_tests.rst
rm -f docs/modules.rst
sphinx-apidoc -o docs/ pytorch_hccl_tests
$(MAKE) -C docs clean
$(MAKE) -C docs html
$(BROWSER) docs/_build/html/index.html
servedocs: docs ## compile the docs watching for changes
watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
release: dist ## package and upload a release
twine upload dist/*
dist: clean ## builds source and wheel package
python setup.py sdist
python setup.py bdist_wheel
ls -l dist
install-cuda: ## Install package for CUDA backend testing. Use PyTorch 1.11.0 for fair comparison
pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
pip install .
install-npu-x86: clean ## Install package for Ascend backend testing (x86)
wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_x86_64.whl
pip install torch_npu-1.11.0.post1-cp37-cp37m-linux_x86_64.whl
pip install .
install-npu-arm: clean ## Install packages for Ascend/NPU backend testing (aarch64)
wget https://repo.huaweicloud.com/kunpeng/archive/Ascend/PyTorch/torch-1.11.0-cp37-cp37m-linux_aarch64.whl
wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_aarch64.whl
pip install --force-reinstall torch-1.11.0-cp37-cp37m-linux_aarch64.whl torch_npu-1.11.0.post1-cp37-cp37m-linux_aarch64.whl
pip install .
install: clean ## install the package to the active Python's site-packages
pip install torch==1.11.0+cpu torchvision==0.12.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install .
# You can override the env variables. Example `make latency -e DEVICE=npu`
export WORLD_SIZE = 4
export DEVICE = cpu
# To surpress a torchrun warning
export OMP_NUM_THREADS = 1
hello: ## OSU MPI/HCCL hello init benchmark
torchrun --nnodes 1 --nproc_per_node 2 pytorch_hccl_tests/cli.py --benchmark hello
latency: ## OSU MPI/HCCL latency benchmark
torchrun --nnodes 1 --nproc_per_node 2 pytorch_hccl_tests/cli.py --benchmark latency --device ${DEVICE}
multi-latency: ## OSU MPI/HCCL multi-latency benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/cli.py --benchmark multi-latency --device ${DEVICE}
bandwidth: ## OSU MPI/HCCL bandwidth benchmark
torchrun --nnodes 1 --nproc_per_node 2 pytorch_hccl_tests/cli.py --benchmark bandwidth --device ${DEVICE}
bidirectional-bw: ## OSU MPI/HCCL bidirectional bandwidth benchmark
torchrun --nnodes 1 --nproc_per_node 2 pytorch_hccl_tests/cli.py --benchmark bibw --device ${DEVICE}
allreduce: ## OSU MPI/HCCL allreduce benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/cli.py --benchmark allreduce --device ${DEVICE}
allgather: ## OSU MPI/HCCL allgather benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/cli.py --benchmark allgather --device ${DEVICE}
alltoall: ## OSU MPI/HCCL alltoall benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/cli.py --benchmark alltoall --device ${DEVICE}
barrier: ## OSU MPI/HCCL barrier benchmark
torchrun --nnodes 1 --nproc_per_node 2 pytorch_hccl_tests/cli.py --benchmark barrier --device ${DEVICE}
broadcast: ## OSU MPI/HCCL broadcast benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/cli.py --benchmark broadcast --device ${DEVICE}
gather: ## OSU MPI/HCCL Bandwidth benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/osu/collectives/osu_gather.py --device ${DEVICE}
reduce: ## OSU MPI/HCCL Bandwidth benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/osu/collectives/osu_reduce.py --device ${DEVICE}
scatter: ## OSU MPI/HCCL Bandwidth benchmark
torchrun --nnodes 1 --nproc_per_node 2 pytorch_hccl_tests/osu/collectives/osu_scatter.py --device ${DEVICE}
reducescatter: ## OSU MPI/HCCL reduce_scatter benchmark
torchrun --nnodes 1 --nproc_per_node ${WORLD_SIZE} pytorch_hccl_tests/cli.py --benchmark reducescatter --device ${DEVICE}
p2p: latency bandwidth bidirectional-bw multi-latency ## OSU MPI/HCCL point-to-point benchmark suite
collectives: allreduce allgather alltoall barrier gather reduce scatter reducescatter ## OSU MPI/HCCL collective communications benchmark suite
benchmarks: p2p collectives ## OSU MPI/HCCL complete benchmark suite