From 67f60bd5eb618f0eaa96a87d83cf59ec4c111472 Mon Sep 17 00:00:00 2001 From: Justin Mahlik <38999128+jmahlik@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:18:31 -0500 Subject: [PATCH 1/2] fix: remove deprecated distutils imports by local mode --- src/sagemaker/local/image.py | 3 +-- src/sagemaker/local/utils.py | 5 ++--- tests/unit/sagemaker/local/test_local_image.py | 4 ++-- tests/unit/sagemaker/local/test_local_utils.py | 4 ++-- tests/unit/test_chainer.py | 10 +++++++++- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 32437a59c3..ef24bb0d99 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -30,7 +30,6 @@ import tarfile import tempfile -from distutils.spawn import find_executable from threading import Thread from typing import Dict, List from six.moves.urllib.parse import urlparse @@ -170,7 +169,7 @@ def _get_compose_cmd_prefix(): compose_cmd_prefix.extend(["docker", "compose"]) return compose_cmd_prefix - if find_executable("docker-compose") is not None: + if shutil.which("docker-compose") is not None: logger.info("'Docker Compose' found using Docker Compose CLI.") compose_cmd_prefix.extend(["docker-compose"]) return compose_cmd_prefix diff --git a/src/sagemaker/local/utils.py b/src/sagemaker/local/utils.py index 950d0974db..2c2a5a1c90 100644 --- a/src/sagemaker/local/utils.py +++ b/src/sagemaker/local/utils.py @@ -21,7 +21,6 @@ import re import errno -from distutils.dir_util import copy_tree from six.moves.urllib.parse import urlparse from sagemaker import s3 @@ -102,7 +101,7 @@ def move_to_destination(source, destination, job_name, sagemaker_session, prefix def recursive_copy(source, destination): - """A wrapper around distutils.dir_util.copy_tree. + """A wrapper around shutil.copy_tree. This won't throw any exception when the source directory does not exist. @@ -111,7 +110,7 @@ def recursive_copy(source, destination): destination (str): destination path """ if os.path.isdir(source): - copy_tree(source, destination) + shutil.copytree(source, destination, dirs_exist_ok=True) def kill_child_processes(pid): diff --git a/tests/unit/sagemaker/local/test_local_image.py b/tests/unit/sagemaker/local/test_local_image.py index 3142fa6dfa..3bb15dc43d 100644 --- a/tests/unit/sagemaker/local/test_local_image.py +++ b/tests/unit/sagemaker/local/test_local_image.py @@ -160,7 +160,7 @@ def test_get_compose_cmd_prefix_with_docker_cli(): "subprocess.check_output", side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"), ) -@patch("sagemaker.local.image.find_executable", Mock(return_value="/usr/bin/docker-compose")) +@patch("sagemaker.local.image.shutil.which", Mock(return_value="/usr/bin/docker-compose")) def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output): compose_cmd_prefix = _SageMakerContainer._get_compose_cmd_prefix() assert compose_cmd_prefix == ["docker-compose"] @@ -170,7 +170,7 @@ def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output): "subprocess.check_output", side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"), ) -@patch("sagemaker.local.image.find_executable", Mock(return_value=None)) +@patch("sagemaker.local.image.shutil.which", Mock(return_value=None)) def test_get_compose_cmd_prefix_raises_import_error(check_output): with pytest.raises(ImportError) as e: _SageMakerContainer._get_compose_cmd_prefix() diff --git a/tests/unit/sagemaker/local/test_local_utils.py b/tests/unit/sagemaker/local/test_local_utils.py index 42710b2495..a9aae53fb2 100644 --- a/tests/unit/sagemaker/local/test_local_utils.py +++ b/tests/unit/sagemaker/local/test_local_utils.py @@ -85,11 +85,11 @@ def test_move_to_destination_illegal_destination(): @patch("sagemaker.local.utils.os.path") -@patch("sagemaker.local.utils.copy_tree") +@patch("sagemaker.local.utils.shutil.copytree") def test_recursive_copy(copy_tree, m_os_path): m_os_path.isdir.return_value = True sagemaker.local.utils.recursive_copy("source", "destination") - copy_tree.assert_called_with("source", "destination") + copy_tree.assert_called_with("source", "destination", dirs_exist_ok=True) @patch("sagemaker.local.utils.os") diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index 8ad2ae0bab..e51b6ea998 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -174,7 +174,15 @@ def test_additional_hyperparameters(sagemaker_session, chainer_version, chainer_ framework_version=chainer_version, py_version=chainer_py_version, ) - assert bool(strtobool(chainer.hyperparameters()["sagemaker_use_mpi"])) + + assert chainer.hyperparameters()["sagemaker_use_mpi"].lower() in ( + "y", + "yes", + "t", + "true", + "on", + "1", + ) assert int(chainer.hyperparameters()["sagemaker_num_processes"]) == 4 assert int(chainer.hyperparameters()["sagemaker_process_slots_per_host"]) == 10 assert ( From bfb49a26dec132841d6ec2b7364f21d7ce540565 Mon Sep 17 00:00:00 2001 From: Justin Mahlik <38999128+jmahlik@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:39:25 -0500 Subject: [PATCH 2/2] fix: remove deprecated distutils api calls in workflows Also remove from tests. Closes #4534 --- .pylintrc | 2 +- src/sagemaker/workflow/_repack_model.py | 9 +-------- tests/data/_repack_model.py | 9 +-------- tests/unit/test_chainer.py | 1 - 4 files changed, 3 insertions(+), 18 deletions(-) diff --git a/.pylintrc b/.pylintrc index 11e2ababa9..9ad209dd39 100644 --- a/.pylintrc +++ b/.pylintrc @@ -310,7 +310,7 @@ ignore-mixin-members=yes # (useful for modules/projects where namespaces are manipulated during runtime # and thus existing member attributes cannot be deduced by static analysis. It # supports qualified module names, as well as Unix pattern matching. -ignored-modules=distutils +ignored-modules= # List of class names for which member attributes should not be checked (useful # for classes with dynamically set attributes). This supports the use of diff --git a/src/sagemaker/workflow/_repack_model.py b/src/sagemaker/workflow/_repack_model.py index 84b3a426f6..2a9129da2f 100644 --- a/src/sagemaker/workflow/_repack_model.py +++ b/src/sagemaker/workflow/_repack_model.py @@ -27,13 +27,6 @@ # is unpacked for inference, the custom entry point will be used. # Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html -# distutils.dir_util.copy_tree works way better than the half-baked -# shutil.copytree which bombs on previously existing target dirs... -# alas ... https://bugs.python.org/issue10948 -# we'll go ahead and use the copy_tree function anyways because this -# repacking is some short-lived hackery, right?? -from distutils.dir_util import copy_tree - from os.path import abspath, realpath, dirname, normpath, join as joinpath logger = logging.getLogger(__name__) @@ -188,7 +181,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None): # copy the "src" dir, which includes the previous training job's model and the # custom inference script, to the output of this training job - copy_tree(src_dir, "/opt/ml/model") + shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True) if __name__ == "__main__": # pragma: no cover diff --git a/tests/data/_repack_model.py b/tests/data/_repack_model.py index 3cfa6760b3..b370db5dbf 100644 --- a/tests/data/_repack_model.py +++ b/tests/data/_repack_model.py @@ -26,13 +26,6 @@ # is unpacked for inference, the custom entry point will be used. # Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html -# distutils.dir_util.copy_tree works way better than the half-baked -# shutil.copytree which bombs on previously existing target dirs... -# alas ... https://bugs.python.org/issue10948 -# we'll go ahead and use the copy_tree function anyways because this -# repacking is some short-lived hackery, right?? -from distutils.dir_util import copy_tree - def repack(inference_script, model_archive, dependencies=None, source_dir=None): # pragma: no cover """Repack custom dependencies and code into an existing model TAR archive @@ -92,7 +85,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None): # copy the "src" dir, which includes the previous training job's model and the # custom inference script, to the output of this training job - copy_tree(src_dir, "/opt/ml/model") + shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True) if __name__ == "__main__": # pragma: no cover diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index e51b6ea998..5c273460ee 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -15,7 +15,6 @@ import logging import json import os -from distutils.util import strtobool import pytest from mock import MagicMock, Mock, ANY