diff --git a/docs/conf.py b/docs/conf.py index 1194bfd58..f98981a7a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -8,14 +8,15 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. +# documentation root, use pathlib.Path.resolve to make it absolute, like shown here. # import os import re import sys +from pathlib import Path -sys.path.insert(0, os.path.abspath("../")) -sys.path.append(os.path.abspath("./_ext")) +sys.path.insert(0, str(Path("../").resolve(strict=True))) +sys.path.append(str(Path("./_ext").resolve(strict=True))) # -- Project information ----------------------------------------------------- diff --git a/examples/data_types_and_io/data_types_and_io/file.py b/examples/data_types_and_io/data_types_and_io/file.py index c9beeebbe..ede0fd7ae 100644 --- a/examples/data_types_and_io/data_types_and_io/file.py +++ b/examples/data_types_and_io/data_types_and_io/file.py @@ -1,6 +1,6 @@ import csv -import os from collections import defaultdict +from pathlib import Path from typing import List import flytekit @@ -37,11 +37,8 @@ def normalize_columns( normalized_data[colname] = [(x - mean) / std for x in values] # write to local path - out_path = os.path.join( - flytekit.current_context().working_directory, - f"normalized-{os.path.basename(csv_url.path).rsplit('.')[0]}.csv", - ) - with open(out_path, mode="w") as output_file: + out_path = Path(flytekit.current_context().working_directory) / f"normalized-{Path(csv_url.path).stem}.csv" + with out_path.open(mode="w") as output_file: writer = csv.DictWriter(output_file, fieldnames=columns_to_normalize) writer.writeheader() for row in zip(*normalized_data.values()): diff --git a/examples/data_types_and_io/data_types_and_io/folder.py b/examples/data_types_and_io/data_types_and_io/folder.py index b953e9434..46b0e5e5a 100644 --- a/examples/data_types_and_io/data_types_and_io/folder.py +++ b/examples/data_types_and_io/data_types_and_io/folder.py @@ -1,5 +1,4 @@ import csv -import os import urllib.request from collections import defaultdict from pathlib import Path @@ -15,17 +14,14 @@ @task def download_files(csv_urls: List[str]) -> FlyteDirectory: working_dir = flytekit.current_context().working_directory - local_dir = Path(os.path.join(working_dir, "csv_files")) + local_dir = Path(working_dir) / "csv_files" local_dir.mkdir(exist_ok=True) # get the number of digits needed to preserve the order of files in the local directory zfill_len = len(str(len(csv_urls))) for idx, remote_location in enumerate(csv_urls): - local_image = os.path.join( - # prefix the file name with the index location of the file in the original csv_urls list - local_dir, - f"{str(idx).zfill(zfill_len)}_{os.path.basename(remote_location)}", - ) + # prefix the file name with the index location of the file in the original csv_urls list + local_image = Path(local_dir) / f"{str(idx).zfill(zfill_len)}_{Path(remote_location).name}" urllib.request.urlretrieve(remote_location, local_image) return FlyteDirectory(path=str(local_dir)) @@ -69,7 +65,7 @@ def normalize_all_files( ) -> FlyteDirectory: for local_csv_file, column_names, columns_to_normalize in zip( # make sure we sort the files in the directory to preserve the original order of the csv urls - [os.path.join(csv_files_dir, x) for x in sorted(os.listdir(csv_files_dir))], + list(sorted(Path(csv_files_dir).iterdir())), columns_metadata, columns_to_normalize_metadata, ): diff --git a/examples/data_types_and_io/data_types_and_io/structured_dataset.py b/examples/data_types_and_io/data_types_and_io/structured_dataset.py index 12b6ba659..93b8adb7a 100644 --- a/examples/data_types_and_io/data_types_and_io/structured_dataset.py +++ b/examples/data_types_and_io/data_types_and_io/structured_dataset.py @@ -1,6 +1,6 @@ -import os import typing from dataclasses import dataclass +from pathlib import Path import numpy as np import pandas as pd @@ -87,8 +87,8 @@ def encode( table = pa.Table.from_arrays(df, name) path = ctx.file_access.get_random_remote_directory() local_dir = ctx.file_access.get_random_local_directory() - local_path = os.path.join(local_dir, f"{0:05}") - pq.write_table(table, local_path) + local_path = Path(local_dir) / f"{0:05}" + pq.write_table(table, str(local_path)) ctx.file_access.upload_directory(local_dir, path) return literals.StructuredDataset( uri=path, diff --git a/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py b/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py index dda307adb..fe2702ef7 100644 --- a/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py +++ b/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py @@ -3,9 +3,9 @@ # # In this example, we'll show how to use DoltTable along with Dolt's `Branch` feature. # %% -import os import sys import typing +from pathlib import Path import pandas as pd from dolt_integrations.core import NewBranch @@ -30,7 +30,7 @@ # statement to fetch data. # %% -doltdb_path = os.path.join(os.path.dirname(__file__), "foo") +doltdb_path = str(Path(__file__).parent / "foo") def generate_confs(a: int) -> typing.Tuple[DoltConfig, DoltConfig, DoltConfig]: diff --git a/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py b/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py index ce36cfe95..2ba3971d6 100644 --- a/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py +++ b/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py @@ -10,8 +10,8 @@ # %% [markdown] # First, let's import the libraries. # %% -import os import sys +from pathlib import Path import pandas as pd from flytekit import task, workflow @@ -20,7 +20,7 @@ # %% [markdown] # Next, we initialize Dolt's config. # %% -doltdb_path = os.path.join(os.path.dirname(__file__), "foo") +doltdb_path = str(Path(__file__).parent / "foo") rabbits_conf = DoltConfig( db_path=doltdb_path, diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py index 766e8ae55..82df5551c 100644 --- a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py +++ b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py @@ -7,7 +7,6 @@ # %% [markdown] # First, let's import the libraries we will use in this example. # %% -import os import pathlib from flytekit import Resources, kwtypes, workflow @@ -34,7 +33,7 @@ # %% nb = NotebookTask( name="pipeline-nb", - notebook_path=os.path.join(pathlib.Path(__file__).parent.absolute(), "supermarket_regression.ipynb"), + notebook_path=str(pathlib.Path(__file__).parent.absolute() / "supermarket_regression.ipynb"), inputs=kwtypes( n_estimators=int, max_depth=int, diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py index 442d18000..cfd810d1f 100644 --- a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py +++ b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py @@ -8,7 +8,6 @@ # %% [markdown] # First, let's import the libraries we will use in this example. # %% -import os import pathlib from dataclasses import dataclass @@ -46,7 +45,7 @@ class Hyperparameters(object): # %% nb = NotebookTask( name="eda-feature-eng-nb", - notebook_path=os.path.join(pathlib.Path(__file__).parent.absolute(), "supermarket_regression_1.ipynb"), + notebook_path=str(pathlib.Path(__file__).parent.absolute() / "supermarket_regression_1.ipynb"), outputs=kwtypes(dummified_data=pd.DataFrame, dataset=str), requests=Resources(mem="500Mi"), ) diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py index 43d299dac..6cabceb55 100644 --- a/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py +++ b/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py @@ -8,7 +8,6 @@ # %% [markdown] # First, let's import the libraries we will use in this example. # %% -import os import pathlib import pandas as pd @@ -27,7 +26,7 @@ # %% nb_1 = NotebookTask( name="eda-featureeng-nb", - notebook_path=os.path.join(pathlib.Path(__file__).parent.absolute(), "supermarket_regression_1.ipynb"), + notebook_path=str(pathlib.Path(__file__).parent.absolute() / "supermarket_regression_1.ipynb"), outputs=kwtypes(dummified_data=pd.DataFrame, dataset=str), requests=Resources(mem="500Mi"), ) @@ -40,10 +39,7 @@ # %% nb_2 = NotebookTask( name="regression-nb", - notebook_path=os.path.join( - pathlib.Path(__file__).parent.absolute(), - "supermarket_regression_2.ipynb", - ), + notebook_path=str(pathlib.Path(__file__).parent.absolute() / "supermarket_regression_2.ipynb"), inputs=kwtypes( dataset=str, n_estimators=int, diff --git a/examples/extending/extending/custom_types.py b/examples/extending/extending/custom_types.py index 50742d68e..af4504c2b 100644 --- a/examples/extending/extending/custom_types.py +++ b/examples/extending/extending/custom_types.py @@ -1,6 +1,6 @@ import os import tempfile -import typing +from pathlib import Path from typing import Type from flytekit import Blob, BlobMetadata, BlobType, FlyteContext, Literal, LiteralType, Scalar, task, workflow @@ -13,26 +13,25 @@ class MyDataset(object): ``MyDataset`` is a collection of files. In Flyte, this maps to a multi-part blob or directory. """ - def __init__(self, base_dir: str = None): + def __init__(self, base_dir: str | None = None): if base_dir is None: self._tmp_dir = tempfile.TemporaryDirectory() self._base_dir = self._tmp_dir.name self._files = [] else: self._base_dir = base_dir - files = os.listdir(base_dir) - self._files = [os.path.join(base_dir, f) for f in files] + self._files = list(Path(base_dir).iterdir()) @property def base_dir(self) -> str: return self._base_dir @property - def files(self) -> typing.List[str]: + def files(self) -> list[os.PathLike]: return self._files - def new_file(self, name: str) -> str: - new_file = os.path.join(self._base_dir, name) + def new_file(self, name: str) -> os.PathLike: + new_file = Path(self._base_dir) / name self._files.append(new_file) return new_file diff --git a/examples/feast_integration/feast_integration/feast_workflow.py b/examples/feast_integration/feast_integration/feast_workflow.py index 72801a82d..16638806d 100644 --- a/examples/feast_integration/feast_integration/feast_workflow.py +++ b/examples/feast_integration/feast_integration/feast_workflow.py @@ -34,6 +34,7 @@ import logging import os from datetime import datetime, timedelta +from pathlib import Path import boto3 import flytekit @@ -173,8 +174,8 @@ def store_offline(repo_config: RepoConfig, dataframe: StructuredDataset) -> Flyt horse_colic_entity = Entity(name="Hospital Number") ctx = flytekit.current_context() - data_dir = os.path.join(ctx.working_directory, "parquet-data") - os.makedirs(data_dir, exist_ok=True) + data_dir = Path(ctx.working_directory) / "parquet-data" + data_dir.mkdir(parents=True, exist_ok=True) FlyteContext.current_context().file_access.get_data( dataframe._literal_sd.uri + "/00000", diff --git a/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py b/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py index 29d0c11fe..2d6a435c6 100644 --- a/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py +++ b/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py @@ -123,7 +123,7 @@ def download_data(dataset: str) -> FlyteDirectory: print("==============") working_dir = flytekit.current_context().working_directory - data_dir = pathlib.Path(os.path.join(working_dir, "data")) + data_dir = pathlib.Path(working_dir) / "data" data_dir.mkdir(exist_ok=True) # download the dataset @@ -584,8 +584,8 @@ def act_sigmoid_scaled(x): print("Best RMSPE: %f" % best_val_rmspe) # save the trained model - keras_model.save(os.path.join(working_dir, hp.local_checkpoint_file)) - print("Written checkpoint to %s" % os.path.join(working_dir, hp.local_checkpoint_file)) + keras_model.save(pathlib.Path(working_dir) / hp.local_checkpoint_file) + print("Written checkpoint to %s" % (pathlib.Path(working_dir) / hp.local_checkpoint_file)) # the Estimator returns a Transformer representation of the trained model once training is complete return keras_model @@ -612,7 +612,7 @@ def test( pred_df = pred_df.withColumn("Sales_pred", F.exp(pred_df.Sales_output)) submission_df = pred_df.select(pred_df.Id.cast(T.IntegerType()), pred_df.Sales_pred).toPandas() - submission_df.sort_values(by=["Id"]).to_csv(os.path.join(working_dir, hp.local_submission_csv), index=False) + submission_df.sort_values(by=["Id"]).to_csv(pathlib.Path(working_dir) / hp.local_submission_csv, index=False) # predictions are saved to a CSV file. print("Saved predictions to %s" % hp.local_submission_csv) diff --git a/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py b/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py index 755ecf3f4..c2c3eb891 100644 --- a/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py +++ b/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py @@ -15,8 +15,8 @@ # %% [markdown] # First, let's import the required libraries. # %% -import os import typing +from pathlib import Path import pandas as pd from flytekit import Resources, kwtypes, task, workflow @@ -65,7 +65,7 @@ def simple_task(csv_file: str) -> int: # If the data validation fails, this will return a ValidationError. result = simple_task_object(dataset=csv_file) print(result) - df = pd.read_csv(os.path.join("greatexpectations", "data", csv_file)) + df = pd.read_csv(Path("greatexpectations") / "data" / csv_file) return df.shape[0] @@ -205,7 +205,7 @@ def schema_wf() -> typing.List[str]: # %% @task def runtime_to_df_task(csv_file: str) -> pd.DataFrame: - df = pd.read_csv(os.path.join("greatexpectations", "data", csv_file)) + df = pd.read_csv(Path("greatexpectations") / "data" / csv_file) return df diff --git a/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py b/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py index 31993d011..e67081116 100644 --- a/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py +++ b/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py @@ -14,7 +14,7 @@ # %% [markdown] # First, let's import the required libraries. # %% -import os +from pathlib import Path import pandas as pd from flytekit import Resources, task, workflow @@ -191,7 +191,7 @@ def schema_wf() -> int: # %% @task def runtime_to_df_task(csv_file: str) -> pd.DataFrame: - df = pd.read_csv(os.path.join("greatexpectations", "data", csv_file)) + df = pd.read_csv(Path("greatexpectations") / "data" / csv_file) return df diff --git a/examples/house_price_prediction/house_price_prediction/house_price_predictor.py b/examples/house_price_prediction/house_price_prediction/house_price_predictor.py index cd3375afd..a0ce97437 100644 --- a/examples/house_price_prediction/house_price_prediction/house_price_predictor.py +++ b/examples/house_price_prediction/house_price_prediction/house_price_predictor.py @@ -27,13 +27,13 @@ # pip install xgboost # ``` -# %% -import os - # %% [markdown] # First, let's import the required packages into the environment. # %% import typing + +# %% +from pathlib import Path from typing import Tuple import flytekit @@ -210,7 +210,7 @@ def fit(loc: str, train: pd.DataFrame, val: pd.DataFrame) -> JoblibSerializedFil m.fit(x, y, eval_set=[(eval_x, eval_y)]) working_dir = flytekit.current_context().working_directory - fname = os.path.join(working_dir, f"model-{loc}.joblib.dat") + fname = str(Path(working_dir) / f"model-{loc}.joblib.dat") joblib.dump(m, fname) # return the serialized model diff --git a/examples/k8s_pod_plugin/k8s_pod_plugin/pod.py b/examples/k8s_pod_plugin/k8s_pod_plugin/pod.py index 0a6d8994c..46eb2ed51 100644 --- a/examples/k8s_pod_plugin/k8s_pod_plugin/pod.py +++ b/examples/k8s_pod_plugin/k8s_pod_plugin/pod.py @@ -15,8 +15,8 @@ # %% [markdown] # First, we import the necessary libraries for use in the following examples. # %% -import os import time +from pathlib import Path from typing import List from flytekit import Resources, TaskMetadata, dynamic, map_task, task, workflow @@ -131,7 +131,7 @@ def pod_workflow() -> str: ) def multiple_containers_pod_task() -> str: # The code defined in this task will get injected into the primary container. - while not os.path.isfile(_SHARED_DATA_PATH): + while not Path(_SHARED_DATA_PATH).is_file(): time.sleep(5) with open(_SHARED_DATA_PATH, "r") as shared_message_file: diff --git a/examples/kfmpi_plugin/kfmpi_plugin/mpi_mnist.py b/examples/kfmpi_plugin/kfmpi_plugin/mpi_mnist.py index 2fbd454f8..7161e1856 100644 --- a/examples/kfmpi_plugin/kfmpi_plugin/mpi_mnist.py +++ b/examples/kfmpi_plugin/kfmpi_plugin/mpi_mnist.py @@ -6,7 +6,6 @@ # %% [markdown] # To begin, import the necessary dependencies. # %% -import os import pathlib import flytekit @@ -131,7 +130,7 @@ def horovod_train_task(batch_size: int, buffer_size: int, dataset_size: int) -> raise IgnoreOutputs("I am not rank 0") working_dir = flytekit.current_context().working_directory - checkpoint_prefix = pathlib.Path(os.path.join(working_dir, "checkpoint")) + checkpoint_prefix = pathlib.Path(working_dir) / "checkpoint" checkpoint.save(checkpoint_prefix) tf.keras.models.save_model( diff --git a/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_mnist.py b/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_mnist.py index 95efb0eb7..ec3842ced 100644 --- a/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_mnist.py +++ b/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_mnist.py @@ -9,6 +9,7 @@ import os import typing from dataclasses import dataclass +from pathlib import Path from typing import Tuple import flytekit @@ -209,7 +210,7 @@ class Hyperparameters(object): container_image=custom_image, ) def mnist_pytorch_job(hp: Hyperparameters) -> TrainingOutputs: - log_dir = os.path.join(flytekit.current_context().working_directory, "logs") + log_dir = str(Path(flytekit.current_context().working_directory) / "logs") writer = SummaryWriter(log_dir) torch.manual_seed(hp.seed) @@ -228,7 +229,7 @@ def mnist_pytorch_job(hp: Hyperparameters) -> TrainingOutputs: kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST( - os.path.join(flytekit.current_context().working_directory, "data"), + str(Path(flytekit.current_context().working_directory) / "data"), train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]), @@ -239,7 +240,7 @@ def mnist_pytorch_job(hp: Hyperparameters) -> TrainingOutputs: ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( - os.path.join(flytekit.current_context().working_directory, "data"), + str(Path(flytekit.current_context().working_directory) / "data"), train=False, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]), ), @@ -272,7 +273,7 @@ def mnist_pytorch_job(hp: Hyperparameters) -> TrainingOutputs: ] # Save the model - model_file = os.path.join(flytekit.current_context().working_directory, "mnist_cnn.pt") + model_file = str(Path(flytekit.current_context().working_directory) / "mnist_cnn.pt") torch.save(model.state_dict(), model_file) return TrainingOutputs( @@ -302,7 +303,7 @@ def plot_accuracy(epoch_accuracies: typing.List[float]) -> PNGImageFile: plt.title("Accuracy") plt.ylabel("accuracy") plt.xlabel("epoch") - accuracy_plot = os.path.join(flytekit.current_context().working_directory, "accuracy.png") + accuracy_plot = str(Path(flytekit.current_context().working_directory) / "accuracy.png") plt.savefig(accuracy_plot) return PNGImageFile(accuracy_plot) diff --git a/examples/kftensorflow_plugin/kftensorflow_plugin/tf_mnist.py b/examples/kftensorflow_plugin/kftensorflow_plugin/tf_mnist.py index 4ddb4d992..9770b1891 100644 --- a/examples/kftensorflow_plugin/kftensorflow_plugin/tf_mnist.py +++ b/examples/kftensorflow_plugin/kftensorflow_plugin/tf_mnist.py @@ -14,6 +14,7 @@ # %% import os from dataclasses import dataclass +from pathlib import Path from typing import NamedTuple, Tuple from dataclasses_json import dataclass_json @@ -143,7 +144,7 @@ def train_model( checkpoint_dir = "./training_checkpoints" # Define the name of the checkpoint files - checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}") + checkpoint_prefix = str(Path(checkpoint_dir) / "ckpt_{epoch}") # Define a callback for printing the learning rate at the end of each epoch class PrintLR(tf.keras.callbacks.Callback): diff --git a/examples/nlp_processing/nlp_processing/word2vec_and_lda.py b/examples/nlp_processing/nlp_processing/word2vec_and_lda.py index 53dee379c..a95846178 100644 --- a/examples/nlp_processing/nlp_processing/word2vec_and_lda.py +++ b/examples/nlp_processing/nlp_processing/word2vec_and_lda.py @@ -16,10 +16,10 @@ # First, we import the necessary libraries. # %% import logging -import os import random import typing from dataclasses import dataclass +from pathlib import Path from typing import Dict, List import flytekit @@ -52,8 +52,8 @@ # %% [markdown] # Next, we define the path to the lee corpus dataset (installed with gensim). # %% -data_dir = os.path.join(gensim.__path__[0], "test", "test_data") -lee_train_file = os.path.join(data_dir, "lee_background.cor") +data_dir = Path(gensim.__path__[0]) / "test" / "test_data" +lee_train_file = str(data_dir / "lee_background.cor") # %% [markdown] @@ -195,7 +195,7 @@ def train_word2vec_model(training_data: List[List[str]], hyperparams: Word2VecMo ) training_loss = model.get_latest_training_loss() logger.info(f"training loss: {training_loss}") - out_path = os.path.join(flytekit.current_context().working_directory, "word2vec.model") + out_path = str(Path(flytekit.current_context().working_directory) / "word2vec.model") model.save(out_path) return (out_path,) diff --git a/examples/papermill_plugin/papermill_plugin/simple.py b/examples/papermill_plugin/papermill_plugin/simple.py index e21f71082..9d4536561 100644 --- a/examples/papermill_plugin/papermill_plugin/simple.py +++ b/examples/papermill_plugin/papermill_plugin/simple.py @@ -5,7 +5,6 @@ # one output. This can be generalized to multiple inputs and outputs. # %% import math -import os import pathlib from flytekit import kwtypes, task, workflow @@ -31,7 +30,7 @@ # %% nb = NotebookTask( name="simple-nb", - notebook_path=os.path.join(pathlib.Path(__file__).parent.absolute(), "nb_simple.ipynb"), + notebook_path=str(pathlib.Path(__file__).parent.absolute() / "nb_simple.ipynb"), render_deck=True, inputs=kwtypes(v=float), outputs=kwtypes(square=float), diff --git a/examples/sagemaker_inference_agent/sagemaker_inference_agent/sagemaker_inference_agent_example_usage.py b/examples/sagemaker_inference_agent/sagemaker_inference_agent/sagemaker_inference_agent_example_usage.py index c982908c9..278d81fca 100644 --- a/examples/sagemaker_inference_agent/sagemaker_inference_agent/sagemaker_inference_agent_example_usage.py +++ b/examples/sagemaker_inference_agent/sagemaker_inference_agent/sagemaker_inference_agent_example_usage.py @@ -10,6 +10,7 @@ # %% import os import tarfile +from pathlib import Path import flytekit from flytekit import ImageSpec, task, workflow @@ -37,7 +38,7 @@ def train_model(dataset: FlyteFile) -> FlyteFile: model = XGBClassifier() model.fit(X_train, y_train) - serialized_model = os.path.join(flytekit.current_context().working_directory, "xgboost_model.json") + serialized_model = str(Path(flytekit.current_context().working_directory) / "xgboost_model.json") booster = model.get_booster() booster.save_model(serialized_model) @@ -152,7 +153,7 @@ def sagemaker_xgboost_wf( class Predictor: def __init__(self, path: str, name: str): self._model = Booster() - self._model.load_model(os.path.join(path, name)) + self._model.load_model(str(Path(path) / name)) def predict(self, inputs: DMatrix) -> np.ndarray: return self._model.predict(inputs) diff --git a/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py b/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py index e9ceefc09..ca112a011 100644 --- a/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py +++ b/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py @@ -8,7 +8,7 @@ # %% [markdown] # First, let's make all the necessary imports for our example to run properly # %% -import os +from pathlib import Path import flytekit import numpy as np @@ -99,9 +99,10 @@ def constraints_report(profile_view: DatasetProfileView) -> bool: @task(container_image=image_spec) def make_predictions(input_data: pd.DataFrame, output_path: str) -> str: input_data["predictions"] = np.random.random(size=len(input_data)) - if not os.path.exists(output_path): - os.makedirs(output_path) - input_data.to_csv(os.path.join(output_path, "predictions.csv")) + output_path = Path(output_path) + if not output_path.exists(): + output_path.mkdir(parents=True) + input_data.to_csv(str(output_path / "predictions.csv")) return f"wrote predictions successfully to {output_path}" diff --git a/flyte_tests_validate.py b/flyte_tests_validate.py index 12db7b33c..de9d429d9 100644 --- a/flyte_tests_validate.py +++ b/flyte_tests_validate.py @@ -1,7 +1,7 @@ import json -import os import re import subprocess +from pathlib import Path if __name__ == "__main__": file_list = "flyte_tests.txt" @@ -18,11 +18,13 @@ print(f"Processing file: {file_name}") # Retrieve the file path, including the name of the file and its immediate parent directory - directory_path = os.path.dirname(file_name).split(os.path.sep)[-1:] - file_path = ".".join(directory_path + [os.path.splitext(os.path.basename(file_name))[0]]) + file_path = Path(file_name) + directory_path = file_path.parent.name + file_name_without_extension = file_path.stem + import_path = f"{directory_path}.{file_name_without_extension}" # Retrieve the workflow(s) - workflows = list(filter(lambda tup: file_path in tup[0], examples)) + workflows = list(filter(lambda tup: import_path in tup[0], examples)) # Verify if there are any workflows present in the provided file path if not workflows: