diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5b656b847..cd80721d5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,16 +1,13 @@ repos: -- repo: https://github.com/PyCQA/flake8 - rev: 3.9.2 +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.1.6 hooks: - - id: flake8 -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black -- repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort + # Run the linter. + - id: ruff + args: [--fix] + # Run the formatter. + - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: diff --git a/Makefile b/Makefile index bfdbc410f..4a0d59f6d 100644 --- a/Makefile +++ b/Makefile @@ -24,10 +24,9 @@ docs-requirements.txt: docs-requirements.in install-piptools docs-requirements: docs-requirements.txt .PHONY: fmt -fmt: ## Format code with black and isort - autoflake --remove-all-unused-imports --ignore-init-module-imports --ignore-pass-after-docstring --in-place -r examples - pre-commit run black --all-files || true - pre-commit run isort --all-files || true +fmt: ## Format code with ruff + pre-commit run ruff --all-files || true + pre-commit run ruff-format --all-files || true .PHONY: update_boilerplate update_boilerplate: diff --git a/dev-requirements.in b/dev-requirements.in index d7f500fff..23bdb828c 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -1,13 +1,8 @@ autoflake flytekit -black coverage -flake8 pre-commit -flake8-black -flake8-isort codespell -isort mock pytest mypy diff --git a/dev-requirements.txt b/dev-requirements.txt index 75cb34f5e..39a1cdb2d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -39,10 +39,6 @@ azure-storage-blob==12.19.0 # via adlfs binaryornot==0.4.4 # via cookiecutter -black==24.3.0 - # via - # -r dev-requirements.in - # flake8-black botocore==1.31.17 # via aiobotocore cachetools==5.3.2 @@ -63,7 +59,6 @@ charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # black # cookiecutter # flytekit # rich-click @@ -100,15 +95,6 @@ exceptiongroup==1.2.0 # via pytest filelock==3.13.1 # via virtualenv -flake8==6.1.0 - # via - # -r dev-requirements.in - # flake8-black - # flake8-isort -flake8-black==0.3.6 - # via -r dev-requirements.in -flake8-isort==6.1.1 - # via -r dev-requirements.in flyteidl==1.10.6 # via flytekit flytekit==1.10.2 @@ -118,7 +104,6 @@ frozenlist==1.4.0 # aiohttp # aiosignal fsspec==2023.9.2 - # via # adlfs # flytekit # gcsfs @@ -175,10 +160,6 @@ iniconfig==2.0.0 # via pytest isodate==0.6.1 # via azure-storage-blob -isort==5.12.0 - # via - # -r dev-requirements.in - # flake8-isort jaraco-classes==3.3.0 # via keyring jeepney==0.8.0 @@ -216,8 +197,6 @@ mashumaro==3.11 # via # -r dev-requirements.in # flytekit -mccabe==0.7.0 - # via flake8 mdurl==0.1.2 # via markdown-it-py mock==5.1.0 @@ -239,7 +218,6 @@ mypy==1.7.1 # via -r dev-requirements.in mypy-extensions==1.0.0 # via - # black # mypy # typing-inspect nodeenv==1.8.0 @@ -255,17 +233,13 @@ oauthlib==3.2.2 # requests-oauthlib packaging==23.2 # via - # black # docker # marshmallow # pytest pandas==1.5.3 # via flytekit -pathspec==0.11.2 - # via black platformdirs==4.1.0 # via - # black # virtualenv pluggy==1.3.0 # via pytest @@ -291,14 +265,8 @@ pyasn1==0.5.1 # rsa pyasn1-modules==0.3.0 # via google-auth -pycodestyle==2.11.1 - # via flake8 pycparser==2.21 # via cffi -pyflakes==3.1.0 - # via - # autoflake - # flake8 pygments==2.17.2 # via rich pyjwt[crypto]==2.8.0 diff --git a/docs/contribute.md b/docs/contribute.md index babff86c5..2bd45bff3 100644 --- a/docs/contribute.md +++ b/docs/contribute.md @@ -284,9 +284,7 @@ if the code in itself is updated and requirements.txt is the same. ## Pre-commit hooks We use [pre-commit](https://pre-commit.com/) to automate linting and code formatting on every commit. -Configured hooks include [black](https://github.com/psf/black), [isort](https://github.com/PyCQA/isort), -[flake8](https://github.com/PyCQA/flake8) and linters to ensure newlines are added to the end of files, and there is -proper spacing in files. +Configured hooks include [ruff](https://github.com/astral-sh/ruff) to ensure newlines are added to the end of files, and there is proper spacing in files. We run all those hooks in CI, but if you want to run them locally on every commit, run `pre-commit install` after installing the dev environment requirements. In case you want to disable `pre-commit` hooks locally, run @@ -294,7 +292,7 @@ installing the dev environment requirements. In case you want to disable `pre-co ### Formatting -We use [black](https://github.com/psf/black) and [isort](https://github.com/PyCQA/isort) to autoformat code. They +We use [ruff](https://github.com/astral-sh/ruff) to autoformat code. They are configured as git hooks in `pre-commit`. Run `make fmt` to format your code. ### Spell-checking diff --git a/examples/dbt_plugin/dbt_plugin/dbt_example.py b/examples/dbt_plugin/dbt_plugin/dbt_example.py index 2e4f0c9c7..059d8cfb3 100644 --- a/examples/dbt_plugin/dbt_plugin/dbt_example.py +++ b/examples/dbt_plugin/dbt_plugin/dbt_example.py @@ -28,6 +28,7 @@ DBT_PROFILES_DIR = "dbt-profiles" DBT_PROFILE = "jaffle_shop" + # %% [markdown] # This task ensures that the jaffle_shop database is created and it also contains # some data before scheduling an execution of this workflow. diff --git a/examples/development_lifecycle/development_lifecycle/task_cache.py b/examples/development_lifecycle/development_lifecycle/task_cache.py index 209ce1939..3278375d2 100644 --- a/examples/development_lifecycle/development_lifecycle/task_cache.py +++ b/examples/development_lifecycle/development_lifecycle/task_cache.py @@ -67,7 +67,8 @@ def hash_pandas_dataframe(df: pandas.DataFrame) -> str: @task def foo_1( # noqa: F811 - a: int, b: str # noqa: F821 + a: int, + b: str, # noqa: F821 ) -> Annotated[pandas.DataFrame, HashMethod(hash_pandas_dataframe)]: # noqa: F821 # noqa: F821 df = pandas.DataFrame(...) # noqa: F821 ... diff --git a/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py b/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py index fc8d1e53e..dda307adb 100644 --- a/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py +++ b/examples/dolt_plugin/dolt_plugin/dolt_branch_example.py @@ -71,6 +71,7 @@ def generate_confs(a: int) -> typing.Tuple[DoltConfig, DoltConfig, DoltConfig]: # Return types of `DoltTable` save the `data` to the # Dolt database given a connection configuration. + # %% @task def get_confs(a: int) -> typing.Tuple[DoltConfig, DoltTable, DoltConfig]: diff --git a/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py b/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py index 55fd31347..ce36cfe95 100644 --- a/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py +++ b/examples/dolt_plugin/dolt_plugin/dolt_quickstart_example.py @@ -31,6 +31,7 @@ # %% [markdown] # We define a task to create a DataFrame and store the table in Dolt. + # %% @task def populate_rabbits(a: int) -> DoltTable: diff --git a/examples/duckdb_plugin/duckdb_plugin/duckdb_example.py b/examples/duckdb_plugin/duckdb_plugin/duckdb_example.py index 74627cc2e..80f6b3993 100644 --- a/examples/duckdb_plugin/duckdb_plugin/duckdb_example.py +++ b/examples/duckdb_plugin/duckdb_plugin/duckdb_example.py @@ -153,7 +153,7 @@ def params_wf( [["chainsaw", 500, 10], ["iphone", 300, 2]], ["duck", "goose"], ] - ) + ), ) -> pd.DataFrame: return read_df(df=duckdb_params_query(params=params)) diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py index dee7318c5..766e8ae55 100644 --- a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py +++ b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook.py @@ -50,6 +50,7 @@ # %% [markdown] # Since a task need not be defined, we create a `workflow` and return the MAE score. + # %% @workflow def notebook_wf( diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py index d94decb1b..442d18000 100644 --- a/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py +++ b/examples/exploratory_data_analysis/exploratory_data_analysis/notebook_and_task.py @@ -56,6 +56,7 @@ class Hyperparameters(object): # Next, we define a `cross_validate` function and a `modeling` task to compute the MAE score of the data against # the Gradient Boosting Regressor. + # %% def cross_validate(model, nfolds, feats, targets): score = -1 * (cross_val_score(model, feats, targets, cv=nfolds, scoring="neg_mean_absolute_error")) diff --git a/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py b/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py index ccc0eb091..43d299dac 100644 --- a/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py +++ b/examples/exploratory_data_analysis/exploratory_data_analysis/notebooks_as_tasks.py @@ -60,6 +60,7 @@ # %% [markdown] # We define a `Workflow` to run the notebook tasks. + # %% @workflow def notebook_wf( diff --git a/examples/feast_integration/feast_integration/feature_eng_tasks.py b/examples/feast_integration/feast_integration/feature_eng_tasks.py index 411f0f576..0adf2ab96 100644 --- a/examples/feast_integration/feast_integration/feature_eng_tasks.py +++ b/examples/feast_integration/feast_integration/feature_eng_tasks.py @@ -25,6 +25,7 @@ "timestamp", ] + # %% [markdown] # Use the [SimpleImputer](https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html) class from the `scikit-learn` library # to fill in the missing values of the dataset. diff --git a/examples/flyteinteractive_plugin/flyteinteractive_plugin/vscode.py b/examples/flyteinteractive_plugin/flyteinteractive_plugin/vscode.py index fb243923c..682b2c268 100644 --- a/examples/flyteinteractive_plugin/flyteinteractive_plugin/vscode.py +++ b/examples/flyteinteractive_plugin/flyteinteractive_plugin/vscode.py @@ -12,6 +12,7 @@ # ## Usage # ### Add `@vscode` decorator to task function definition + # %% @task @vscode diff --git a/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py b/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py index 294be3167..29d0c11fe 100644 --- a/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py +++ b/examples/forecasting_sales/forecasting_sales/keras_spark_rossmann_estimator.py @@ -93,6 +93,7 @@ # %% [markdown] # Next, let's initialize a data class to store the hyperparameters that will be used with the model (`epochs`, `learning_rate`, `batch_size`, etc.). + # %% @dataclass_json @dataclass @@ -600,7 +601,6 @@ def test( test_df: pyspark.sql.DataFrame, hp: Hyperparameters, ) -> FlyteDirectory: - print("================") print("Final prediction") print("================") @@ -649,7 +649,6 @@ def test( limits=Resources(mem="1Gi"), ) def horovod_spark_task(data_dir: FlyteDirectory, hp: Hyperparameters, work_dir: FlyteDirectory) -> FlyteDirectory: - max_sales, vocab, train_df, test_df = data_preparation(data_dir, hp) # working directory will have the model and predictions as separate files diff --git a/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py b/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py index 8d0dc5a16..755ecf3f4 100644 --- a/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py +++ b/examples/greatexpectations_plugin/greatexpectations_plugin/task_example.py @@ -54,6 +54,7 @@ context_root_dir=CONTEXT_ROOT_DIR, ) + # %% [markdown] # Next, we define a task that validates the data before returning the shape of the DataFrame. # %% @@ -97,6 +98,7 @@ def simple_wf(dataset: str = DATASET_LOCAL) -> int: context_root_dir=CONTEXT_ROOT_DIR, ) + # %% [markdown] # Next, we define a task that calls the validation logic. # %% @@ -144,6 +146,7 @@ def file_wf( task_config=SQLite3Config(uri=SQLITE_DATASET), ) + # %% [markdown] # Next, we define a task that validates the data and returns the columns in it. # %% diff --git a/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py b/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py index 774d3c34b..31993d011 100644 --- a/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py +++ b/examples/greatexpectations_plugin/greatexpectations_plugin/type_example.py @@ -47,6 +47,7 @@ # # The parameters within the `data_connector_query` convey that we're fetching all those files that have "2019" and "01" in the file names. + # %% @task(limits=Resources(mem="500Mi")) def simple_task( @@ -67,7 +68,7 @@ def simple_task( ), context_root_dir=CONTEXT_ROOT_DIR, ), - ] + ], ) -> str: return f"Validation works for {directory}!" @@ -102,6 +103,7 @@ def simple_wf(directory: str = "my_assets") -> str: # The first value that's being sent within `GreatExpectationsType` is `CSVFile` (this is a pre-formatted FlyteFile type). # This means that we want to validate the `FlyteFile` data. + # %% @task(limits=Resources(mem="500Mi")) def file_task(dataset: GreatExpectationsType[CSVFile, great_expectations_config]) -> pd.DataFrame: @@ -137,7 +139,7 @@ def schema_task( local_file_path="/tmp/test.parquet", # noqa: F722 context_root_dir=CONTEXT_ROOT_DIR, ), - ] + ], ) -> int: return dataframe.shape[0] diff --git a/examples/hive_plugin/hive_plugin/hive.py b/examples/hive_plugin/hive_plugin/hive.py index 2a3ebb21f..76568cc1e 100644 --- a/examples/hive_plugin/hive_plugin/hive.py +++ b/examples/hive_plugin/hive_plugin/hive.py @@ -67,6 +67,7 @@ def no_io_wf(): # There is a helper task that will automatically do the wrapping above. Please be patient as we fill out these docs. # ::: + # %% @workflow def with_output_wf() -> FlyteSchema: diff --git a/examples/house_price_prediction/house_price_prediction/house_price_predictor.py b/examples/house_price_prediction/house_price_prediction/house_price_predictor.py index bc23f1461..cd3375afd 100644 --- a/examples/house_price_prediction/house_price_prediction/house_price_predictor.py +++ b/examples/house_price_prediction/house_price_prediction/house_price_predictor.py @@ -68,6 +68,7 @@ # # We define a function to compute the price of a house based on multiple factors (`number of bedrooms`, `number of bathrooms`, `area`, `garage space`, and `year built`). + # %% def gen_price(house) -> int: _base_price = int(house["SQUARE_FEET"] * 150) @@ -125,7 +126,6 @@ def gen_houses(num_houses) -> pd.DataFrame: def split_data( df: pd.DataFrame, seed: int, split: typing.List[float] ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - seed = seed val_size = split[1] # 0.3 test_size = split[2] # 0.1 @@ -182,6 +182,7 @@ def split_data( # %% [markdown] # We define a task to call the aforementioned functions. + # %% @task(cache=True, cache_version="0.1", limits=Resources(mem="600Mi")) def generate_and_split_data(number_of_houses: int, seed: int) -> dataset: @@ -196,7 +197,6 @@ def generate_and_split_data(number_of_houses: int, seed: int) -> dataset: # %% @task(cache_version="1.0", cache=True, limits=Resources(mem="600Mi")) def fit(loc: str, train: pd.DataFrame, val: pd.DataFrame) -> JoblibSerializedFile: - # fetch the features and target columns from the train dataset x = train[train.columns[1:]] y = train[train.columns[0]] @@ -227,7 +227,6 @@ def predict( test: pd.DataFrame, model_ser: JoblibSerializedFile, ) -> typing.List[float]: - # load the model model = joblib.load(model_ser) @@ -246,7 +245,6 @@ def predict( # %% @workflow def house_price_predictor_trainer(seed: int = 7, number_of_houses: int = NUM_HOUSES_PER_LOCATION) -> typing.List[float]: - # generate the data and split it into train test, and validation data split_data_vals = generate_and_split_data(number_of_houses=number_of_houses, seed=seed) diff --git a/examples/mlflow_plugin/mlflow_plugin/mlflow_example.py b/examples/mlflow_plugin/mlflow_plugin/mlflow_example.py index f25a9cdfe..e6ba6777a 100644 --- a/examples/mlflow_plugin/mlflow_plugin/mlflow_example.py +++ b/examples/mlflow_plugin/mlflow_plugin/mlflow_example.py @@ -59,6 +59,7 @@ def train_model(epochs: int): # :class: with-shadow # ::: + # %% [markdown] # Finally, we put everything together into a workflow: # diff --git a/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py b/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py index dbb377513..d045bbb80 100644 --- a/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py +++ b/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py @@ -11,6 +11,7 @@ # %% [markdown] # `MMCloudConfig` configures `MMCloudTask`. Tasks specified with `MMCloudConfig` will be executed using MMCloud. Tasks will be executed with requests `cpu="1"` and `mem="1Gi"` by default. + # %% @task(task_config=MMCloudConfig()) def to_str(i: int) -> str: @@ -25,6 +26,7 @@ def to_int(s: str) -> int: # %% [markdown] # [Resource](https://docs.flyte.org/projects/cookbook/en/latest/auto_examples/productionizing/customizing_resources.html) (cpu and mem) requests and limits, [container](https://docs.flyte.org/projects/cookbook/en/latest/auto_examples/customizing_dependencies/multi_images.html) images, and [environment](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html) variable specifications are supported. + # %% @task( task_config=MMCloudConfig(submit_extra="--migratePolicy [enable=true]"), diff --git a/examples/mnist_classifier/mnist_classifier/pytorch_single_node_and_gpu.py b/examples/mnist_classifier/mnist_classifier/pytorch_single_node_and_gpu.py index 997e5122b..3675fa8de 100644 --- a/examples/mnist_classifier/mnist_classifier/pytorch_single_node_and_gpu.py +++ b/examples/mnist_classifier/mnist_classifier/pytorch_single_node_and_gpu.py @@ -89,6 +89,7 @@ def forward(self, x): # %% [markdown] # ## The Data Loader + # %% def mnist_dataloader(batch_size, train=True, **kwargs): return torch.utils.data.DataLoader( @@ -187,7 +188,6 @@ def test(model, device, test_loader): # disable gradient with torch.no_grad(): - # loop through the test data loader for images, targets in test_loader: images, targets = images.to(device), targets.to(device) # device conversion diff --git a/examples/mnist_classifier/mnist_classifier/pytorch_single_node_multi_gpu.py b/examples/mnist_classifier/mnist_classifier/pytorch_single_node_multi_gpu.py index 63110fb18..0a659755a 100644 --- a/examples/mnist_classifier/mnist_classifier/pytorch_single_node_multi_gpu.py +++ b/examples/mnist_classifier/mnist_classifier/pytorch_single_node_multi_gpu.py @@ -40,12 +40,13 @@ # {ref}`single node and gpu tutorial ` # such as the `Net` model architecture, `Hyperparameters`, and `log_test_predictions`. # %% -from mnist_classifier.pytorch_single_node_and_gpu import Hyperparameters, Net, log_test_predictions from torch import distributed as dist from torch import multiprocessing as mp from torch import nn, optim from torchvision import datasets, transforms +from mnist_classifier.pytorch_single_node_and_gpu import Hyperparameters, Net, log_test_predictions + # %% [markdown] # Let's define some variables to be used later. # @@ -86,6 +87,7 @@ def wandb_setup(): # We'll use the same neural network architecture as the one we define in the # {ref}`single node and gpu tutorial `. + # %% [markdown] # ## Data Downloader # @@ -195,7 +197,6 @@ def test(model, rank, test_loader): # disable gradient with torch.no_grad(): - # loop through the test data loader total = 0.0 for images, targets in test_loader: @@ -247,6 +248,7 @@ def test(model, rank, test_loader): # `dist_setup` is a helper function that instantiates a distributed environment. We're pointing all of the # processes across all available GPUs to the address of the main process. + # %% def dist_setup(rank, world_size, backend): os.environ["MASTER_ADDR"] = "localhost" @@ -270,6 +272,7 @@ def dist_setup(rank, world_size, backend): # - save the trained model to disk # - keep track of validation metrics + # %% def train_mnist(rank: int, world_size: int, hp: Hyperparameters): # store the hyperparameters' config in ``wandb`` diff --git a/examples/nlp_processing/nlp_processing/word2vec_and_lda.py b/examples/nlp_processing/nlp_processing/word2vec_and_lda.py index 22b1bc4ec..53dee379c 100644 --- a/examples/nlp_processing/nlp_processing/word2vec_and_lda.py +++ b/examples/nlp_processing/nlp_processing/word2vec_and_lda.py @@ -186,7 +186,6 @@ class LDAModelHyperparams(object): # %% @task def train_word2vec_model(training_data: List[List[str]], hyperparams: Word2VecModelHyperparams) -> model_file: - model = Word2Vec( training_data, min_count=hyperparams.min_count, diff --git a/examples/onnx_plugin/onnx_plugin/pytorch_onnx.py b/examples/onnx_plugin/onnx_plugin/pytorch_onnx.py index aecbbc856..1816a8f70 100644 --- a/examples/onnx_plugin/onnx_plugin/pytorch_onnx.py +++ b/examples/onnx_plugin/onnx_plugin/pytorch_onnx.py @@ -59,21 +59,23 @@ def _initialize_weights(self): # This is a special annotation that tells Flytekit that this parameter is to be converted to an ONNX model with the given config. # %% @task -def train() -> Annotated[ - PyTorch2ONNX, - PyTorch2ONNXConfig( - args=torch.randn(1, 1, 224, 224, requires_grad=True), - export_params=True, # store the trained parameter weights inside - opset_version=10, # the ONNX version to export the model to - do_constant_folding=True, # whether to execute constant folding for optimization - input_names=["input"], # the model's input names - output_names=["output"], # the model's output names - dynamic_axes={ - "input": {0: "batch_size"}, - "output": {0: "batch_size"}, - }, # variable length axes - ), -]: +def train() -> ( + Annotated[ + PyTorch2ONNX, + PyTorch2ONNXConfig( + args=torch.randn(1, 1, 224, 224, requires_grad=True), + export_params=True, # store the trained parameter weights inside + opset_version=10, # the ONNX version to export the model to + do_constant_folding=True, # whether to execute constant folding for optimization + input_names=["input"], # the model's input names + output_names=["output"], # the model's output names + dynamic_axes={ + "input": {0: "batch_size"}, + "output": {0: "batch_size"}, + }, # variable length axes + ), + ] +): # Create the super-resolution model by using the above model definition. torch_model = SuperResolutionNet(upscale_factor=3) diff --git a/examples/onnx_plugin/onnx_plugin/tensorflow_onnx.py b/examples/onnx_plugin/onnx_plugin/tensorflow_onnx.py index c8f9d866b..b317b39f0 100644 --- a/examples/onnx_plugin/onnx_plugin/tensorflow_onnx.py +++ b/examples/onnx_plugin/onnx_plugin/tensorflow_onnx.py @@ -29,6 +29,7 @@ ], ) + # %% [markdown] # Define a `load_data` task to load CIFAR10 data. # %% diff --git a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py index 0416ad56f..80bee74d8 100644 --- a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py +++ b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py @@ -22,6 +22,7 @@ # # Let's first define a simple data processing pipeline in pure python. + # %% def total_pay(df): return df.assign(total_pay=df.hourly_pay * df.hours_worked) @@ -46,6 +47,7 @@ def process_data(df, worker_id): # Next we define the schemas that provide type and statistical annotations # for the raw, intermediate, and final outputs of our pipeline. + # %% class InSchema(pa.DataFrameModel): hourly_pay: Series[float] = pa.Field(ge=7) @@ -95,6 +97,7 @@ class OutSchema(IntermediateSchema): # by decorating our functions with the {py:func}`~flytekit.task` and {py:func}`~flytekit.workflow` decorators and # annotating the inputs and outputs of those functions with the pandera schemas: + # %% @task(container_image=custom_image) def dict_to_dataframe(data: dict) -> DataFrame[InSchema]: diff --git a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py index b5f266580..c998fb291 100644 --- a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py +++ b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py @@ -116,6 +116,7 @@ # Once we've gotten a rough sense of the statistical properties of the data, we can encode that domain knowledge into # a pandera schema: + # %% class RawData(pa.SchemaModel): age: Series[int] = pa.Field(in_range={"min_value": 0, "max_value": 200}) @@ -170,6 +171,7 @@ class Config: # # Now we're ready to write our first Flyte task: + # %% @task(container_image=custom_image) def fetch_raw_data() -> DataFrame[RawData]: @@ -201,6 +203,7 @@ def fetch_raw_data() -> DataFrame[RawData]: # # Here we can use inheritance to define a `ParsedData` schema by overriding just the `target` attribute: + # %% class ParsedData(RawData): target: Series[int] = pa.Field(isin=[0, 1]) @@ -243,6 +246,7 @@ def split_data(parsed_data: DataFrame[ParsedData], test_size: float, random_stat # # Next we'll train a `RandomForestClassifier` to predict the absence/presence of heart disease: + # %% def get_features_and_target(dataset): """Helper function for separating feature and target data.""" @@ -270,6 +274,7 @@ def train_model(training_set: DataFrame[ParsedData], random_state: int) -> Jobli # # Next we assess the accuracy score of the model on the test set: + # %% @task(container_image=custom_image) def evaluate_model(model: JoblibSerializedFile, test_set: DataFrame[ParsedData]) -> float: @@ -283,6 +288,7 @@ def evaluate_model(model: JoblibSerializedFile, test_set: DataFrame[ParsedData]) # %% [markdown] # Finally, we put all of the pieces together in a Flyte workflow: + # %% @workflow def pipeline(data_random_state: int, model_random_state: int) -> float: diff --git a/examples/papermill_plugin/papermill_plugin/simple.py b/examples/papermill_plugin/papermill_plugin/simple.py index 2efb043d8..e21f71082 100644 --- a/examples/papermill_plugin/papermill_plugin/simple.py +++ b/examples/papermill_plugin/papermill_plugin/simple.py @@ -43,6 +43,7 @@ # - You can see the notebook on Flyte deck if `render_deck` is set to true. # ::: + # %% [markdown] # :::{figure} https://i.imgur.com/ogfVpr2.png # :alt: Notebook diff --git a/examples/productionizing/productionizing/use_secrets.py b/examples/productionizing/productionizing/use_secrets.py index 156467ce9..1091968c1 100644 --- a/examples/productionizing/productionizing/use_secrets.py +++ b/examples/productionizing/productionizing/use_secrets.py @@ -135,6 +135,7 @@ def user_info_task() -> Tuple[str, str]: # dependent library requires the secret to be available as a file. # In these scenarios you can specify the `mount_requirement=Secret.MountType.FILE`. + # In the following example we force the mounting to be an environment variable: @task( secret_requests=[ diff --git a/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py b/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py index 368550d97..e9ceefc09 100644 --- a/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py +++ b/examples/whylogs_plugin/whylogs_plugin/whylogs_example.py @@ -31,6 +31,7 @@ packages=["flytekitplugins-whylogs", "whylogs[whylabs]", "scikit-learn", "mlflow"], registry="ghcr.io/flyteorg" ) + # %% [markdown] # Next thing is defining a task to read our reference dataset. # For this, we will take scikit-learn's entire example Diabetes dataset diff --git a/pyproject.toml b/pyproject.toml index 96fa357b5..0f0400244 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,25 @@ -[tool.black] +[tool.ruff] line-length = 120 +select = ["E", "W", "F", "I"] +ignore = [ + # Whitespace before '{symbol}' + "E203", + # Too many leading # before block comment + "E266", + # Line too long ({width} > {limit}) + "E501", + # Ambiguous variable name: {name} + "E741", + # Undefined name {name} + "F821", +] -[tool.isort] -profile = "black" -line_length = 120 +[tool.ruff.extend-per-file-ignores] +"*/__init__.py" = [ + # unused-import + "F401", +] +"examples/**/*.py" = [ + # Module level import not at top of cell + "E402", +] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 4118bcbdd..000000000 --- a/setup.cfg +++ /dev/null @@ -1,9 +0,0 @@ -[flake8] -max-line-length = 400 -extend-ignore = E203, E266, E501, W503, E741 -exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,venv/*,src/*,tests/unit/common/protos/*,build,_build -max-complexity=32 -per-file-ignores = - *:F821 - */__init__.py: F401 - examples/**/*.py: E402, E302