Skip to content

Commit

Permalink
Webhook tasks using FlyteAgents (#3058)
Browse files Browse the repository at this point in the history
Signed-off-by: Ketan Umare <[email protected]>
Signed-off-by: Kevin Su <[email protected]>
Co-authored-by: Ketan Umare <[email protected]>
Co-authored-by: Kevin Su <[email protected]>
  • Loading branch information
3 people authored Feb 20, 2025
1 parent 9acab29 commit 6d7c738
Show file tree
Hide file tree
Showing 15 changed files with 614 additions and 86 deletions.
1 change: 1 addition & 0 deletions dev-requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ ipykernel

orjson
kubernetes>=12.0.1
httpx
2 changes: 2 additions & 0 deletions flytekit/clis/sdk_in_container/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def agent(_: click.Context, port, prometheus_port, worker, timeout, modules):
"""
import asyncio

from flytekit.extras.webhook import WebhookTask # noqa: F401

working_dir = os.getcwd()
if all(os.path.realpath(path) != working_dir for path in sys.path):
sys.path.append(working_dir)
Expand Down
9 changes: 6 additions & 3 deletions flytekit/extend/backend/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
from flytekit.models.literals import LiteralMap
from flytekit.models.task import TaskExecutionMetadata, TaskTemplate

# It's used to force agent to run in the same event loop in the local execution.
local_agent_loop = asyncio.new_event_loop()


class TaskCategory:
def __init__(self, name: str, version: int = 0):
Expand Down Expand Up @@ -285,7 +288,7 @@ def execute(self: PythonTask, **kwargs) -> LiteralMap:
output_prefix = ctx.file_access.get_random_remote_directory()

agent = AgentRegistry.get_agent(task_template.type, task_template.task_type_version)
resource = asyncio.run(
resource = local_agent_loop.run_until_complete(
self._do(agent=agent, template=task_template, output_prefix=output_prefix, inputs=kwargs)
)
if resource.phase != TaskExecution.SUCCEEDED:
Expand Down Expand Up @@ -335,10 +338,10 @@ def execute(self: PythonTask, **kwargs) -> LiteralMap:
task_template = get_serializable(OrderedDict(), ss, self).template
self._agent = AgentRegistry.get_agent(task_template.type, task_template.task_type_version)

resource_meta = asyncio.run(
resource_meta = local_agent_loop.run_until_complete(
self._create(task_template=task_template, output_prefix=output_prefix, inputs=kwargs)
)
resource = asyncio.run(self._get(resource_meta=resource_meta))
resource = local_agent_loop.run_until_complete(self._get(resource_meta=resource_meta))

if resource.phase != TaskExecution.SUCCEEDED:
raise FlyteUserException(f"Failed to run the task {self.name} with error: {resource.message}")
Expand Down
4 changes: 4 additions & 0 deletions flytekit/extras/webhook/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .agent import WebhookAgent
from .task import WebhookTask

__all__ = ["WebhookTask", "WebhookAgent"]
101 changes: 101 additions & 0 deletions flytekit/extras/webhook/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from typing import Optional

import httpx
from flyteidl.core.execution_pb2 import TaskExecution

from flytekit.extend.backend.base_agent import AgentRegistry, Resource, SyncAgentBase
from flytekit.interaction.string_literals import literal_map_string_repr
from flytekit.models.literals import LiteralMap
from flytekit.models.task import TaskTemplate
from flytekit.utils.dict_formatter import format_dict

from .constants import DATA_KEY, HEADERS_KEY, METHOD_KEY, SHOW_DATA_KEY, SHOW_URL_KEY, TASK_TYPE, TIMEOUT_SEC, URL_KEY


class WebhookAgent(SyncAgentBase):
"""
WebhookAgent is responsible for handling webhook tasks.
This agent sends HTTP requests based on the task template and inputs provided,
and processes the responses to determine the success or failure of the task.
:param client: An optional HTTP client to use for sending requests.
"""

name: str = "Webhook Agent"

def __init__(self, client: Optional[httpx.AsyncClient] = None):
super().__init__(task_type_name=TASK_TYPE)
self._client = client or httpx.AsyncClient()

async def do(
self, task_template: TaskTemplate, output_prefix: str, inputs: Optional[LiteralMap] = None, **kwargs
) -> Resource:
"""
This method processes the webhook task and sends an HTTP request.
It uses asyncio to send the request and process the response using the httpx library.
"""
try:
final_dict = self._get_final_dict(task_template, inputs)
return await self._process_webhook(final_dict)
except Exception as e:
return Resource(phase=TaskExecution.FAILED, message=str(e))

def _get_final_dict(self, task_template: TaskTemplate, inputs: LiteralMap) -> dict:
custom_dict = task_template.custom
input_dict = {
"inputs": literal_map_string_repr(inputs),
}
return format_dict("test", custom_dict, input_dict)

async def _make_http_request(self, method: str, url: str, headers: dict, data: dict, timeout: int) -> tuple:
if method == "GET":
response = await self._client.get(url, headers=headers, params=data, timeout=timeout)
else:
response = await self._client.post(url, json=data, headers=headers, timeout=timeout)
return response.status_code, response.text

@staticmethod
def _build_response(
status: int,
text: str,
data: dict = None,
url: str = None,
show_data: bool = False,
show_url: bool = False,
) -> dict:
final_response = {
"status_code": status,
"response_data": text,
}
if show_data:
final_response["input_data"] = data
if show_url:
final_response["url"] = url
return final_response

async def _process_webhook(self, final_dict: dict) -> Resource:
url = final_dict.get(URL_KEY)
body = final_dict.get(DATA_KEY)
headers = final_dict.get(HEADERS_KEY)
method = str(final_dict.get(METHOD_KEY)).upper()
show_data = final_dict.get(SHOW_DATA_KEY, False)
show_url = final_dict.get(SHOW_URL_KEY, False)
timeout_sec = final_dict.get(TIMEOUT_SEC, 10)

status, text = await self._make_http_request(method, url, headers, body, timeout_sec)
if status != 200:
return Resource(
phase=TaskExecution.FAILED,
message=f"Webhook failed with status code {status}, response: {text}",
)
final_response = self._build_response(status, text, body, url, show_data, show_url)
return Resource(
phase=TaskExecution.SUCCEEDED,
outputs={"info": final_response},
message="Webhook was successfully invoked!",
)


AgentRegistry.register(WebhookAgent())
9 changes: 9 additions & 0 deletions flytekit/extras/webhook/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
TASK_TYPE: str = "webhook"

URL_KEY: str = "url"
METHOD_KEY: str = "method"
HEADERS_KEY: str = "headers"
DATA_KEY: str = "data"
SHOW_DATA_KEY: str = "show_data"
SHOW_URL_KEY: str = "show_url"
TIMEOUT_SEC: str = "timeout_sec"
141 changes: 141 additions & 0 deletions flytekit/extras/webhook/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from datetime import timedelta
from typing import Any, Dict, Optional, Type, Union

from flytekit import Documentation
from flytekit.configuration import SerializationSettings
from flytekit.core.base_task import PythonTask
from flytekit.extend.backend.base_agent import SyncAgentExecutorMixin

from ...core.interface import Interface
from .constants import DATA_KEY, HEADERS_KEY, METHOD_KEY, SHOW_DATA_KEY, SHOW_URL_KEY, TASK_TYPE, TIMEOUT_SEC, URL_KEY


class WebhookTask(SyncAgentExecutorMixin, PythonTask):
"""
The WebhookTask is used to invoke a webhook. The webhook can be invoked with a POST or GET method.
All the parameters can be formatted using python format strings.
Example:
```python
simple_get = WebhookTask(
name="simple-get",
url="http://localhost:8000/",
method=http.HTTPMethod.GET,
headers={"Content-Type": "application/json"},
)
get_with_params = WebhookTask(
name="get-with-params",
url="http://localhost:8000/items/{inputs.item_id}",
method=http.HTTPMethod.GET,
headers={"Content-Type": "application/json"},
dynamic_inputs={"s": str, "item_id": int},
show_data=True,
show_url=True,
description="Test Webhook Task",
data={"q": "{inputs.s}"},
)
@fk.workflow
def wf(s: str) -> (dict, dict, dict):
v = hello(s=s)
w = WebhookTask(
name="invoke-slack",
url="https://hooks.slack.com/services/xyz/zaa/aaa",
headers={"Content-Type": "application/json"},
data={"text": "{inputs.s}"},
show_data=True,
show_url=True,
description="Test Webhook Task",
dynamic_inputs={"s": str},
)
return simple_get(), get_with_params(s=v, item_id=10), w(s=v)
```
All the parameters can be formatted using python format strings. The following parameters are available for
formatting:
- dynamic_inputs: These are the dynamic inputs to the task. The keys are the names of the inputs and the values
are the values of the inputs. All inputs are available under the prefix `inputs.`.
For example, if the inputs are {"input1": 10, "input2": "hello"}, then you can
use {inputs.input1} and {inputs.input2} in the URL and the body. Define the dynamic_inputs argument in the
constructor to use these inputs. The dynamic inputs should not be actual values, but the types of the inputs.
TODO Coming soon secrets support
- secrets: These are the secrets that are requested by the task. The keys are the names of the secrets and the
values are the values of the secrets. All secrets are available under the prefix `secrets.`.
For example, if the secret requested are Secret(name="secret1") and Secret(name="secret), then you can use
{secrets.secret1} and {secrets.secret2} in the URL and the body. Define the secret_requests argument in the
constructor to use these secrets. The secrets should not be actual values, but the types of the secrets.
:param name: Name of this task, should be unique in the project
:param url: The endpoint or URL to invoke for this webhook. This can be a static string or a python format string,
where the format arguments are the dynamic_inputs to the task, secrets etc. Refer to the description for more
details of available formatting parameters.
:param method: The HTTP method to use for the request. Default is POST.
:param headers: The headers to send with the request. This can be a static dictionary or a python format string,
where the format arguments are the dynamic_inputs to the task, secrets etc. Refer to the description for more
details of available formatting parameters.
:param data: The body to send with the request. This can be a static dictionary or a python format string,
where the format arguments are the dynamic_inputs to the task, secrets etc. Refer to the description for more
details of available formatting parameters. the data should be a json serializable dictionary and will be
sent as the json body of the POST request and as the query parameters of the GET request.
:param dynamic_inputs: The dynamic inputs to the task. The keys are the names of the inputs and the values
are the types of the inputs. These inputs are available under the prefix `inputs.` to be used in the URL,
headers and body and other formatted fields.
:param secret_requests: The secrets that are requested by the task. (TODO not yet supported)
:param show_data: If True, the body of the request will be logged in the UI as the output of the task.
:param show_url: If True, the URL of the request will be logged in the UI as the output of the task.
:param description: Description of the task
:param timeout: The timeout for the request (connection and read). Default is 10 seconds. If int value is provided,
it is considered as seconds.
"""

def __init__(
self,
name: str,
url: str,
method: str = "POST",
headers: Optional[Dict[str, str]] = None,
data: Optional[Dict[str, Any]] = None,
dynamic_inputs: Optional[Dict[str, Type]] = None,
show_data: bool = False,
show_url: bool = False,
description: Optional[str] = None,
timeout: Union[int, timedelta] = timedelta(seconds=10),
# secret_requests: Optional[List[Secret]] = None, TODO Secret support is coming soon
):
if method not in {"GET", "POST"}:
raise ValueError(f"Method should be either GET or POST. Got {method}")

interface = Interface(
inputs=dynamic_inputs or {},
outputs={"info": dict},
)
super().__init__(
name=name,
interface=interface,
task_type=TASK_TYPE,
# secret_requests=secret_requests,
docs=Documentation(short_description=description) if description else None,
)
self._url = url
self._method = method
self._headers = headers
self._data = data
self._show_data = show_data
self._show_url = show_url
self._timeout_sec = timeout if isinstance(timeout, int) else timeout.total_seconds()

def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]:
config = {
URL_KEY: self._url,
METHOD_KEY: self._method,
HEADERS_KEY: self._headers or {},
DATA_KEY: self._data or {},
SHOW_DATA_KEY: self._show_data,
SHOW_URL_KEY: self._show_url,
TIMEOUT_SEC: self._timeout_sec,
}
return config
86 changes: 86 additions & 0 deletions flytekit/utils/dict_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import re
from typing import Any, Dict, Optional


def get_nested_value(d: Dict[str, Any], keys: list[str]) -> Any:
"""
Retrieve the nested value from a dictionary based on a list of keys.
"""
for key in keys:
if key not in d:
raise ValueError(f"Could not find the key {key} in {d}.")
d = d[key]
return d


def replace_placeholder(
service: str,
original_dict: str,
placeholder: str,
replacement: str,
) -> str:
"""
Replace a placeholder in the original string and handle the specific logic for the sagemaker service and idempotence token.
"""
temp_dict = original_dict.replace(f"{{{placeholder}}}", replacement)
if service == "sagemaker" and placeholder in [
"inputs.idempotence_token",
"idempotence_token",
]:
if len(temp_dict) > 63:
truncated_token = replacement[: 63 - len(original_dict.replace(f"{{{placeholder}}}", ""))]
return original_dict.replace(f"{{{placeholder}}}", truncated_token)
else:
return temp_dict
return temp_dict


def format_dict(
service: str,
original_dict: Any,
update_dict: Dict[str, Any],
idempotence_token: Optional[str] = None,
) -> Any:
"""
Recursively update a dictionary with format strings with values from another dictionary where the keys match
the format string. This goes a little beyond regular python string formatting and uses `.` to denote nested keys.
For example, if original_dict is {"EndpointConfigName": "{endpoint_config_name}"},
and update_dict is {"endpoint_config_name": "my-endpoint-config"},
then the result will be {"EndpointConfigName": "my-endpoint-config"}.
For nested keys if the original_dict is {"EndpointConfigName": "{inputs.endpoint_config_name}"},
and update_dict is {"inputs": {"endpoint_config_name": "my-endpoint-config"}},
then the result will be {"EndpointConfigName": "my-endpoint-config"}.
:param service: The AWS service to use
:param original_dict: The dictionary to update (in place)
:param update_dict: The dictionary to use for updating
:param idempotence_token: Hash of config -- this is to ensure the execution ID is deterministic
:return: The updated dictionary
"""
if original_dict is None:
return None

if isinstance(original_dict, str) and "{" in original_dict and "}" in original_dict:
matches = re.findall(r"\{([^}]+)\}", original_dict)
for match in matches:
if "." in match:
keys = match.split(".")
nested_value = get_nested_value(update_dict, keys)
if f"{{{match}}}" == original_dict:
return nested_value
else:
original_dict = replace_placeholder(service, original_dict, match, str(nested_value))
elif match == "idempotence_token" and idempotence_token:
original_dict = replace_placeholder(service, original_dict, match, idempotence_token)
return original_dict

if isinstance(original_dict, list):
return [format_dict(service, item, update_dict, idempotence_token) for item in original_dict]

if isinstance(original_dict, dict):
for key, value in original_dict.items():
original_dict[key] = format_dict(service, value, update_dict, idempotence_token)

return original_dict
Loading

0 comments on commit 6d7c738

Please sign in to comment.