Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove deprecated distutils #4837

Merged
merged 8 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ ignore-mixin-members=yes
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis. It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=distutils
ignored-modules=

# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
Expand Down
3 changes: 1 addition & 2 deletions src/sagemaker/local/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import tarfile
import tempfile

from distutils.spawn import find_executable
from threading import Thread
from typing import Dict, List
from six.moves.urllib.parse import urlparse
Expand Down Expand Up @@ -170,7 +169,7 @@ def _get_compose_cmd_prefix():
compose_cmd_prefix.extend(["docker", "compose"])
return compose_cmd_prefix

if find_executable("docker-compose") is not None:
if shutil.which("docker-compose") is not None:
logger.info("'Docker Compose' found using Docker Compose CLI.")
compose_cmd_prefix.extend(["docker-compose"])
return compose_cmd_prefix
Expand Down
5 changes: 2 additions & 3 deletions src/sagemaker/local/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import re
import errno

from distutils.dir_util import copy_tree
from six.moves.urllib.parse import urlparse

from sagemaker import s3
Expand Down Expand Up @@ -102,7 +101,7 @@ def move_to_destination(source, destination, job_name, sagemaker_session, prefix


def recursive_copy(source, destination):
"""A wrapper around distutils.dir_util.copy_tree.
"""A wrapper around shutil.copy_tree.

This won't throw any exception when the source directory does not exist.

Expand All @@ -111,7 +110,7 @@ def recursive_copy(source, destination):
destination (str): destination path
"""
if os.path.isdir(source):
copy_tree(source, destination)
shutil.copytree(source, destination, dirs_exist_ok=True)


def kill_child_processes(pid):
Expand Down
9 changes: 1 addition & 8 deletions src/sagemaker/workflow/_repack_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,6 @@
# is unpacked for inference, the custom entry point will be used.
# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html

# distutils.dir_util.copy_tree works way better than the half-baked
# shutil.copytree which bombs on previously existing target dirs...
# alas ... https://bugs.python.org/issue10948
# we'll go ahead and use the copy_tree function anyways because this
# repacking is some short-lived hackery, right??
from distutils.dir_util import copy_tree

from os.path import abspath, realpath, dirname, normpath, join as joinpath

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -188,7 +181,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):

# copy the "src" dir, which includes the previous training job's model and the
# custom inference script, to the output of this training job
copy_tree(src_dir, "/opt/ml/model")
shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True)


if __name__ == "__main__": # pragma: no cover
Expand Down
2 changes: 1 addition & 1 deletion src/sagemaker/workflow/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

logger = logging.getLogger(__name__)

FRAMEWORK_VERSION = "0.23-1"
FRAMEWORK_VERSION = "1.2-1"
INSTANCE_TYPE = "ml.m5.large"
REPACK_SCRIPT = "_repack_model.py"
REPACK_SCRIPT_LAUNCHER = "_repack_script_launcher.sh"
Expand Down
9 changes: 1 addition & 8 deletions tests/data/_repack_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@
# is unpacked for inference, the custom entry point will be used.
# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html

# distutils.dir_util.copy_tree works way better than the half-baked
# shutil.copytree which bombs on previously existing target dirs...
# alas ... https://bugs.python.org/issue10948
# we'll go ahead and use the copy_tree function anyways because this
# repacking is some short-lived hackery, right??
from distutils.dir_util import copy_tree


def repack(inference_script, model_archive, dependencies=None, source_dir=None): # pragma: no cover
"""Repack custom dependencies and code into an existing model TAR archive
Expand Down Expand Up @@ -92,7 +85,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):

# copy the "src" dir, which includes the previous training job's model and the
# custom inference script, to the output of this training job
copy_tree(src_dir, "/opt/ml/model")
shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True)


if __name__ == "__main__": # pragma: no cover
Expand Down
6 changes: 4 additions & 2 deletions tests/integ/sagemaker/workflow/test_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ def test_model_registration_with_model_repack(
role,
pipeline_name,
region_name,
pytorch_training_latest_version,
pytorch_training_latest_py_version,
):
base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
entry_point = os.path.join(base_dir, "mnist.py")
Expand All @@ -166,8 +168,8 @@ def test_model_registration_with_model_repack(
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
framework_version="1.5.0",
py_version="py3",
framework_version=pytorch_training_latest_version,
py_version=pytorch_training_latest_py_version,
instance_count=instance_count,
instance_type=instance_type,
sagemaker_session=pipeline_session,
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/sagemaker/local/test_local_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def test_get_compose_cmd_prefix_with_docker_cli():
"subprocess.check_output",
side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"),
)
@patch("sagemaker.local.image.find_executable", Mock(return_value="/usr/bin/docker-compose"))
@patch("sagemaker.local.image.shutil.which", Mock(return_value="/usr/bin/docker-compose"))
def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output):
compose_cmd_prefix = _SageMakerContainer._get_compose_cmd_prefix()
assert compose_cmd_prefix == ["docker-compose"]
Expand All @@ -170,7 +170,7 @@ def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output):
"subprocess.check_output",
side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"),
)
@patch("sagemaker.local.image.find_executable", Mock(return_value=None))
@patch("sagemaker.local.image.shutil.which", Mock(return_value=None))
def test_get_compose_cmd_prefix_raises_import_error(check_output):
with pytest.raises(ImportError) as e:
_SageMakerContainer._get_compose_cmd_prefix()
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/sagemaker/local/test_local_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ def test_move_to_destination_illegal_destination():


@patch("sagemaker.local.utils.os.path")
@patch("sagemaker.local.utils.copy_tree")
@patch("sagemaker.local.utils.shutil.copytree")
def test_recursive_copy(copy_tree, m_os_path):
m_os_path.isdir.return_value = True
sagemaker.local.utils.recursive_copy("source", "destination")
copy_tree.assert_called_with("source", "destination")
copy_tree.assert_called_with("source", "destination", dirs_exist_ok=True)


@patch("sagemaker.local.utils.os")
Expand Down
5 changes: 2 additions & 3 deletions tests/unit/sagemaker/workflow/test_step_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

MODEL_NAME = "gisele"
MODEL_REPACKING_IMAGE_URI = (
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3"
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3"
)


Expand Down Expand Up @@ -1219,8 +1219,7 @@ def test_estimator_transformer_with_model_repack_with_estimator(estimator, sourc
assert arguments == {
"AlgorithmSpecification": {
"TrainingInputMode": "File",
"TrainingImage": "246618743249.dkr.ecr.us-west-2.amazonaws.com/"
+ "sagemaker-scikit-learn:0.23-1-cpu-py3",
"TrainingImage": MODEL_REPACKING_IMAGE_URI,
},
"ProfilerConfig": {"DisableProfiler": True},
"OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"},
Expand Down
11 changes: 9 additions & 2 deletions tests/unit/test_chainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import logging
import json
import os
from distutils.util import strtobool

import pytest
from mock import MagicMock, Mock, ANY
Expand Down Expand Up @@ -174,7 +173,15 @@ def test_additional_hyperparameters(sagemaker_session, chainer_version, chainer_
framework_version=chainer_version,
py_version=chainer_py_version,
)
assert bool(strtobool(chainer.hyperparameters()["sagemaker_use_mpi"]))

assert chainer.hyperparameters()["sagemaker_use_mpi"].lower() in (
"y",
"yes",
"t",
"true",
"on",
"1",
)
assert int(chainer.hyperparameters()["sagemaker_num_processes"]) == 4
assert int(chainer.hyperparameters()["sagemaker_process_slots_per_host"]) == 10
assert (
Expand Down