diff --git a/.circleci/config.yml b/.circleci/config.yml index 1964d79609..90298dbca9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -70,7 +70,7 @@ executors: parameters: toolchain_version: type: string - default: '{{ checksum ".circleci/config.yml" }}-{{ checksum "~/.arch" }}-{{ checksum "rust-toolchain.toml" }}-{{ checksum "~/.daily_version" }}' + default: '{{ checksum ".circleci/config.yml" }}-v2-{{ checksum "~/.arch" }}-{{ checksum "rust-toolchain.toml" }}-{{ checksum "~/.daily_version" }}' xtask_version: type: string default: '{{ checksum ".circleci/config.yml" }}-{{ checksum "~/.arch" }}-{{ checksum "rust-toolchain.toml" }}-{{ checksum "~/.xtask_version" }}' @@ -91,7 +91,7 @@ parameters: # forks of the project to run their own tests on their own CircleCI deployments with no # additional configuration. common_job_environment: &common_job_environment - CARGO_NET_GIT_FETCH_WITH_CLI: true + CARGO_NET_GIT_FETCH_WITH_CLI: "true" RUST_BACKTRACE: full CARGO_INCREMENTAL: 0 commands: @@ -107,7 +107,7 @@ commands: - restore_cache: keys: - "<< pipeline.parameters.toolchain_version >>" - - install_debian_packages: + - install_packages: platform: << parameters.platform >> - install_protoc: platform: << parameters.platform >> @@ -205,7 +205,7 @@ commands: echo "${CIRCLE_PROJECT_REPONAME}-${COMMON_ANCESTOR_REF}" > ~/.merge_version # Linux specific step to install packages that are needed - install_debian_packages: + install_packages: parameters: platform: type: executor @@ -222,11 +222,10 @@ commands: name: Update and install dependencies command: | if [[ ! -d "$HOME/.deb" ]]; then - mkdir ~/.deb + mkdir $HOME/.deb sudo apt-get --download-only -o Dir::Cache="$HOME/.deb" -o Dir::Cache::archives="$HOME/.deb" install libssl-dev libdw-dev cmake fi - sudo dpkg -i ~/.deb/*.deb - + sudo dpkg -i $HOME/.deb/*.deb install_protoc: parameters: platform: @@ -762,4 +761,4 @@ workflows: branches: ignore: /.*/ tags: - only: /v.*/ + only: /v.*/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 23ac088772..fd79245f9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,165 @@ All notable changes to Router will be documented in this file. This project adheres to [Semantic Versioning v2.0.0](https://semver.org/spec/v2.0.0.html). +# [1.22.0] - 2023-06-21 + +## 🚀 Features + +### Federated Subscriptions ([PR #3285](https://github.com/apollographql/router/pull/3285)) + +> ⚠️ **This is an [Enterprise feature](https://www.apollographql.com/blog/platform/evaluating-apollo-router-understanding-free-and-open-vs-commercial-features/) of the Apollo Router.** It requires an organization with a [GraphOS Enterprise plan](https://www.apollographql.com/pricing/). +> +> If your organization _doesn't_ currently have an Enterprise plan, you can test out this functionality by signing up for a free [Enterprise trial](https://www.apollographql.com/docs/graphos/org/plans/#enterprise-trials). + + +#### High-Level Overview + +##### What are Federated Subscriptions? + +This PR adds GraphQL subscription support to the Router for use with Federation. Clients can now use GraphQL subscriptions with the Router to receive realtime updates from a supergraph. With these changes, `subscription` operations are now a first-class supported feature of the Router and Federation, alongside queries and mutations. + +```mermaid +flowchart LR; + client(Client); + subgraph "Your infrastructure"; + router(["Apollo Router"]); + subgraphA[Products
subgraph]; + subgraphB[Reviews
subgraph]; + router---|Subscribes
over WebSocket|subgraphA; + router-.-|Can query for
entity fields|subgraphB; + end; + client---|Subscribes
over HTTP|router; + class client secondary; +``` + +##### Client to Router Communication + +- Apollo has designed and implemented a new open protocol for handling subscriptions called [multipart subscriptions](https://github.com/apollographql/router/blob/dev/dev-docs/multipart-subscriptions-protocol.md) +- With this new protocol clients can manage subscriptions with the Router over tried and true HTTP; WebSockets, SSE (server-sent events), etc. are not needed +- All Apollo clients ([Apollo Client web](https://www.apollographql.com/docs/react/data/subscriptions), [Apollo Kotlin](https://www.apollographql.com/docs/kotlin/essentials/subscriptions), [Apollo iOS](https://www.apollographql.com/docs/ios/fetching/subscriptions)) have been updated to support multipart subscriptions, and can be used out of the box with little to no extra configuration +- Subscription communication between clients and the Router must use the multipart subscription protocol, meaning only subscriptions over HTTP are supported at this time + +##### Router to Subgraph Communication + +- The Router communicates with subscription enabled subgraphs using WebSockets +- By default, the router sends subscription requests to subgraphs using the [graphql-transport-ws protocol](https://github.com/enisdenjo/graphql-ws/blob/master/PROTOCOL.md) which is implemented in the [graphql-ws](https://github.com/enisdenjo/graphql-ws) library. You can also configure it to use the [graphql-ws protocol](https://github.com/apollographql/subscriptions-transport-ws/blob/master/PROTOCOL.md) which is implemented in the [subscriptions-transport-ws library](https://github.com/apollographql/subscriptions-transport-ws). +- Subscription ready subgraphs can be introduced to Federation and the Router as is - no additional configuration is needed on the subgraph side + +##### Subscription Execution + +When the Router receives a GraphQL subscription request, the generated query plan will contain an initial subscription request to the subgraph that contributed the requested subscription root field. + +For example, as a result of a client sending this subscription request to the Router: + +```graphql +subscription { + reviewAdded { + id + body + product { + id + name + createdBy { + name + } + } + } +} +``` + +The router will send this request to the `reviews` subgraph: + +```graphql +subscription { + reviewAdded { + id + body + product { + id + } + } +} +``` + +When the `reviews` subgraph receives new data from its underlying source event stream, that data is sent back to the Router. Once received, the Router continues following the determined query plan to fetch any additional required data from other subgraphs: + +Example query sent to the `products` subgraph: + +```graphql +query ($representations: [_Any!]!) { + _entities(representations: $representations) { + ... on Product { + name + createdBy { + __typename + email + } + } + } +} +``` + +Example query sent to the `users` subgraph: + +```graphql +query ($representations: [_Any!]!) { + _entities(representations: $representations) { + ... on User { + name + } + } +} +``` + +When the Router finishes running the entire query plan, the data is merged back together and returned to the requesting client over HTTP (using the multipart subscriptions protocol). + +#### Configuration + +Here is a configuration example: + +```yaml title="router.yaml" +subscription: + mode: + passthrough: + all: # The router uses these subscription settings UNLESS overridden per-subgraph + path: /subscriptions # The path to use for subgraph subscription endpoints (Default: /ws) + subgraphs: # Overrides subscription settings for individual subgraphs + reviews: # Overrides settings for the 'reviews' subgraph + path: /ws # Overrides '/subscriptions' defined above + protocol: graphql_transport_ws # The WebSocket-based protocol to use for subscription communication (Default: graphql_ws) +``` + +#### Usage Reporting + +Subscription use is tracked in the Router as follows: + +- **Subscription registration:** The initial subscription operation sent by a client to the Router that's responsible for starting a new subscription +- **Subscription notification:** The resolution of the client subscription’s selection set in response to a subscription enabled subgraph source event + +Subscription registration and notification (with operation traces and statistics) are sent to Apollo Studio for observability. + +#### Advanced Features + +This PR includes the following configurable performance optimizations. + +#### Deduplication + +- If the Router detects that a client is using the same subscription as another client (ie. a subscription with the same HTTP headers and selection set), it will avoid starting a new subscription with the requested subgraph. The Router will reuse the same open subscription instead, and will send the same source events to the new client. +- This helps reduce the number of WebSockets that need to be opened between the Router and subscription enabled subgraphs, thereby drastically reducing Router to subgraph network traffic and overall latency +- For example, if 100 clients are subscribed to the same subscription there will be 100 open HTTP connections from the clients to the Router, but only 1 open WebSocket connection from the Router to the subgraph +- Subscription deduplication between the Router and subgraphs is enabled by default (but can be disabled via the Router config file) + +#### Callback Mode + +- Instead of sending subscription data between a Router and subgraph over an open WebSocket, the Router can be configured to send the subgraph a callback URL that will then be used to receive all source stream events +- Subscription enabled subgraphs send source stream events (subscription updates) back to the callback URL by making HTTP POST requests +- Refer to the [callback mode documentation](https://github.com/apollographql/router/blob/dev/dev-docs/callback_protocol.md) for more details, including an explanation of the callback URL request/response payload format +- This feature is still experimental and needs to be enabled explicitly in the Router config file + +By [@bnjjj](https://github.com/bnjjj) and [@o0Ignition0o](https://github.com/o0ignition0o) in https://github.com/apollographql/router/pull/3285 + + + # [1.21.0] - 2023-06-20 ## 🚀 Features diff --git a/Cargo.lock b/Cargo.lock index 478f2c9d5f..6cece4c6ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -281,7 +281,7 @@ dependencies = [ [[package]] name = "apollo-router" -version = "1.21.0" +version = "1.22.0" dependencies = [ "access-json", "anyhow", @@ -317,6 +317,7 @@ dependencies = [ "graphql_client 0.11.0", "heck 0.4.1", "hex", + "hmac", "http", "http-body", "http-serde", @@ -393,6 +394,7 @@ dependencies = [ "tokio", "tokio-rustls 0.23.4", "tokio-stream", + "tokio-tungstenite", "tokio-util", "tonic 0.8.3", "tonic-build", @@ -420,7 +422,7 @@ dependencies = [ [[package]] name = "apollo-router-benchmarks" -version = "1.21.0" +version = "1.22.0" dependencies = [ "apollo-parser 0.4.1", "apollo-router", @@ -436,7 +438,7 @@ dependencies = [ [[package]] name = "apollo-router-scaffold" -version = "1.21.0" +version = "1.22.0" dependencies = [ "anyhow", "cargo-scaffold", @@ -645,6 +647,7 @@ checksum = "f8175979259124331c1d7bf6586ee7e0da434155e4b2d48ec2c8386281d8df39" dependencies = [ "async-trait", "axum-core", + "base64 0.21.2", "bitflags", "bytes", "futures-util", @@ -663,8 +666,10 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", + "sha1 0.10.5", "sync_wrapper", "tokio", + "tokio-tungstenite", "tower", "tower-layer", "tower-service", @@ -6171,6 +6176,22 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "tokio-tungstenite" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54319c93411147bced34cb5609a80e0a8e44c5999c93903a81cd866630ec0bfd" +dependencies = [ + "futures-util", + "log", + "rustls 0.20.8", + "rustls-native-certs", + "tokio", + "tokio-rustls 0.23.4", + "tungstenite", + "webpki", +] + [[package]] name = "tokio-util" version = "0.7.8" @@ -6510,6 +6531,27 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "tungstenite" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30ee6ab729cd4cf0fd55218530c4522ed30b7b6081752839b68fcec8d0960788" +dependencies = [ + "base64 0.13.1", + "byteorder", + "bytes", + "http", + "httparse", + "log", + "rand 0.8.5", + "rustls 0.20.8", + "sha1 0.10.5", + "thiserror", + "url", + "utf-8", + "webpki", +] + [[package]] name = "typed-builder" version = "0.9.1" @@ -6721,6 +6763,12 @@ dependencies = [ "url", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8parse" version = "0.2.1" diff --git a/apollo-router-benchmarks/Cargo.toml b/apollo-router-benchmarks/Cargo.toml index 8226283491..2ad0ad2885 100644 --- a/apollo-router-benchmarks/Cargo.toml +++ b/apollo-router-benchmarks/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "apollo-router-benchmarks" -version = "1.21.0" +version = "1.22.0" authors = ["Apollo Graph, Inc. "] edition = "2021" license = "Elastic-2.0" diff --git a/apollo-router-scaffold/Cargo.toml b/apollo-router-scaffold/Cargo.toml index 8a4f8a3d6d..bc8572a6ee 100644 --- a/apollo-router-scaffold/Cargo.toml +++ b/apollo-router-scaffold/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "apollo-router-scaffold" -version = "1.21.0" +version = "1.22.0" authors = ["Apollo Graph, Inc. "] edition = "2021" license = "Elastic-2.0" diff --git a/apollo-router-scaffold/templates/base/Cargo.toml b/apollo-router-scaffold/templates/base/Cargo.toml index 9bdd9df4d7..83cdc5bafb 100644 --- a/apollo-router-scaffold/templates/base/Cargo.toml +++ b/apollo-router-scaffold/templates/base/Cargo.toml @@ -22,7 +22,7 @@ apollo-router = { path ="{{integration_test}}apollo-router" } apollo-router = { git="https://github.com/apollographql/router.git", branch="{{branch}}" } {{else}} # Note if you update these dependencies then also update xtask/Cargo.toml -apollo-router = "1.21.0" +apollo-router = "1.22.0" {{/if}} {{/if}} async-trait = "0.1.52" diff --git a/apollo-router-scaffold/templates/base/xtask/Cargo.toml b/apollo-router-scaffold/templates/base/xtask/Cargo.toml index 0690738259..29204ce3f8 100644 --- a/apollo-router-scaffold/templates/base/xtask/Cargo.toml +++ b/apollo-router-scaffold/templates/base/xtask/Cargo.toml @@ -13,7 +13,7 @@ apollo-router-scaffold = { path ="{{integration_test}}apollo-router-scaffold" } {{#if branch}} apollo-router-scaffold = { git="https://github.com/apollographql/router.git", branch="{{branch}}" } {{else}} -apollo-router-scaffold = { git = "https://github.com/apollographql/router.git", tag = "v1.21.0" } +apollo-router-scaffold = { git = "https://github.com/apollographql/router.git", tag = "v1.22.0" } {{/if}} {{/if}} anyhow = "1.0.58" diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml index 5c2a00efd9..ff69bb4cb1 100644 --- a/apollo-router/Cargo.toml +++ b/apollo-router/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "apollo-router" -version = "1.21.0" +version = "1.22.0" authors = ["Apollo Graph, Inc. "] repository = "https://github.com/apollographql/router/" documentation = "https://docs.rs/apollo-router" @@ -215,8 +215,10 @@ urlencoding = "2.1.2" uuid = { version = "1.3.3", features = ["serde", "v4"] } yaml-rust = "0.4.5" wsl = "0.1.0" +tokio-tungstenite = { version = "0.18.0", features = ["rustls-tls-native-roots"] } tokio-rustls = "0.23.4" http-serde = "1.1.2" +hmac = "0.12.1" parking_lot = "0.12.1" memchr = "2.5.0" brotli = "3.3.4" @@ -233,6 +235,7 @@ uname = "0.1.1" tikv-jemallocator = "0.5" [dev-dependencies] +axum = { version = "0.6.6", features = ["headers", "json", "original-uri", "ws"] } ecdsa = { version = "0.15.1", features = ["signing", "pem", "pkcs8"] } fred = { version = "6.3.0", features = ["enable-rustls", "no-client-setname"] } futures-test = "0.3.28" diff --git a/apollo-router/feature_discussions.json b/apollo-router/feature_discussions.json index c8e2fb9c43..5fc7b56899 100644 --- a/apollo-router/feature_discussions.json +++ b/apollo-router/feature_discussions.json @@ -8,4 +8,4 @@ "preview": { "preview_operation_limits": "https://github.com/apollographql/router/discussions/3040" } -} +} \ No newline at end of file diff --git a/apollo-router/src/axum_factory/tests.rs b/apollo-router/src/axum_factory/tests.rs index 2d01d2779a..017dd70eb1 100644 --- a/apollo-router/src/axum_factory/tests.rs +++ b/apollo-router/src/axum_factory/tests.rs @@ -1162,7 +1162,7 @@ async fn it_errors_on_bad_accept_header() -> Result<(), ApolloRouterError> { ); assert_eq!( response.text().await.unwrap(), - r#"{"errors":[{"message":"'accept' header must be one of: \\\"*/*\\\", \"application/json\", \"application/graphql-response+json\" or \"multipart/mixed;boundary=\\\"graphql\\\";deferSpec=20220824\"","extensions":{"code":"INVALID_ACCEPT_HEADER"}}]}"# + r#"{"errors":[{"message":"'accept' header must be one of: \\\"*/*\\\", \"application/json\", \"application/graphql-response+json\", \"multipart/mixed;boundary=\\\"graphql\\\";subscriptionSpec=1.0\" or \"multipart/mixed;boundary=\\\"graphql\\\";deferSpec=20220824\"","extensions":{"code":"INVALID_ACCEPT_HEADER"}}]}"# ); server.shutdown().await diff --git a/apollo-router/src/configuration/experimental.rs b/apollo-router/src/configuration/experimental.rs index 06784b4483..fc8011ba83 100644 --- a/apollo-router/src/configuration/experimental.rs +++ b/apollo-router/src/configuration/experimental.rs @@ -123,6 +123,9 @@ mod tests { "sub": { "experimental_trace_id": "ok" } + }, + "preview_subscription": { + } }); @@ -133,5 +136,9 @@ mod tests { "experimental_trace_id".to_string() ] ); + assert_eq!( + get_configurations(&val, "preview"), + vec!["preview_subscription".to_string(),] + ); } } diff --git a/apollo-router/src/configuration/mod.rs b/apollo-router/src/configuration/mod.rs index 0d66cd7dca..2fba20f48e 100644 --- a/apollo-router/src/configuration/mod.rs +++ b/apollo-router/src/configuration/mod.rs @@ -18,6 +18,8 @@ use std::net::SocketAddr; use std::num::NonZeroUsize; use std::str::FromStr; use std::sync::Arc; +#[cfg(not(test))] +use std::time::Duration; use derivative::Derivative; use displaydoc::Display; @@ -50,9 +52,21 @@ pub(crate) use self::schema::generate_upgrade; use self::subgraph::SubgraphConfiguration; use crate::cache::DEFAULT_CACHE_CAPACITY; use crate::configuration::schema::Mode; +use crate::graphql; +use crate::notification::Notify; use crate::plugin::plugins; +#[cfg(not(test))] +use crate::plugins::subscription::SubscriptionConfig; +#[cfg(not(test))] +use crate::plugins::subscription::APOLLO_SUBSCRIPTION_PLUGIN; +#[cfg(not(test))] +use crate::plugins::subscription::APOLLO_SUBSCRIPTION_PLUGIN_NAME; use crate::ApolloRouterError; +// TODO: Talk it through with the teams +#[cfg(not(test))] +static HEARTBEAT_TIMEOUT_DURATION_SECONDS: u64 = 15; + static SUPERGRAPH_ENDPOINT_REGEX: Lazy = Lazy::new(|| { Regex::new(r"(?P.*/)(?P.+)\*$") .expect("this regex to check the path is valid") @@ -149,6 +163,9 @@ pub struct Configuration { #[serde(default)] #[serde(flatten)] pub(crate) apollo_plugins: ApolloPlugins, + + #[serde(default, skip_serializing, skip_deserializing)] + pub(crate) notify: Notify, } impl<'de> serde::Deserialize<'de> for Configuration { @@ -217,10 +234,24 @@ impl Configuration { plugins: Map, apollo_plugins: Map, tls: Option, + notify: Option>, apq: Option, operation_limits: Option, chaos: Option, ) -> Result { + #[cfg(not(test))] + let notify_queue_cap = match apollo_plugins.get(APOLLO_SUBSCRIPTION_PLUGIN_NAME) { + Some(plugin_conf) => { + let conf = serde_json::from_value::(plugin_conf.clone()) + .map_err(|err| ConfigurationError::PluginConfiguration { + plugin: APOLLO_SUBSCRIPTION_PLUGIN.to_string(), + error: format!("{err:?}"), + })?; + conf.queue_capacity + } + None => None, + }; + let conf = Self { validated_yaml: Default::default(), supergraph: supergraph.unwrap_or_default(), @@ -238,6 +269,11 @@ impl Configuration { plugins: apollo_plugins, }, tls: tls.unwrap_or_default(), + #[cfg(test)] + notify: notify.unwrap_or_default(), + #[cfg(not(test))] + notify: notify.map(|n| n.set_queue_size(notify_queue_cap)) + .unwrap_or_else(|| Notify::builder().and_queue_size(notify_queue_cap).ttl(Duration::from_secs(HEARTBEAT_TIMEOUT_DURATION_SECONDS)).heartbeat_error_message(graphql::Response::builder().errors(vec![graphql::Error::builder().message("the connection has been closed because it hasn't heartbeat for a while").extension_code("SUBSCRIPTION_HEARTBEAT_ERROR").build()]).build()).build()), }; conf.validate() @@ -287,6 +323,7 @@ impl Configuration { plugins: Map, apollo_plugins: Map, tls: Option, + notify: Option>, apq: Option, operation_limits: Option, chaos: Option, @@ -307,6 +344,7 @@ impl Configuration { plugins: apollo_plugins, }, tls: tls.unwrap_or_default(), + notify: notify.unwrap_or_default(), apq: apq.unwrap_or_default(), }; diff --git a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap index f02fda7160..e919b8cedf 100644 --- a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap +++ b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap @@ -1227,6 +1227,149 @@ expression: "&schema" }, "additionalProperties": false }, + "subscription": { + "description": "Subscriptions configuration", + "type": "object", + "properties": { + "enable_deduplication": { + "description": "Enable the deduplication of subscription (for example if we detect the exact same request to subgraph we won't open a new websocket to the subgraph in passthrough mode) (default: true)", + "default": true, + "type": "boolean" + }, + "max_opened_subscriptions": { + "description": "This is a limit to only have maximum X opened subscriptions at the same time. By default if it's not set there is no limit.", + "default": null, + "type": "integer", + "format": "uint", + "minimum": 0.0, + "nullable": true + }, + "mode": { + "description": "Select a subscription mode (callback or passthrough)", + "default": { + "preview_callback": null, + "passthrough": null + }, + "type": "object", + "properties": { + "passthrough": { + "description": "Enable passthrough mode for subgraph(s)", + "type": "object", + "properties": { + "all": { + "description": "Configuration for all subgraphs", + "default": null, + "type": "object", + "properties": { + "path": { + "description": "Path on which WebSockets are listening", + "default": null, + "type": "string", + "nullable": true + }, + "protocol": { + "description": "Which WebSocket GraphQL protocol to use for this subgraph possible values are: 'graphql_ws' | 'graphql_transport_ws' (default: graphql_ws)", + "default": "graphql_ws", + "type": "string", + "enum": [ + "graphql_ws", + "graphql_transport_ws" + ] + } + }, + "additionalProperties": false, + "nullable": true + }, + "subgraphs": { + "description": "Configuration for specific subgraphs", + "default": {}, + "type": "object", + "additionalProperties": { + "description": "WebSocket configuration for a specific subgraph", + "type": "object", + "properties": { + "path": { + "description": "Path on which WebSockets are listening", + "default": null, + "type": "string", + "nullable": true + }, + "protocol": { + "description": "Which WebSocket GraphQL protocol to use for this subgraph possible values are: 'graphql_ws' | 'graphql_transport_ws' (default: graphql_ws)", + "default": "graphql_ws", + "type": "string", + "enum": [ + "graphql_ws", + "graphql_transport_ws" + ] + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false, + "nullable": true + }, + "preview_callback": { + "description": "Enable callback mode for subgraph(s)", + "type": "object", + "required": [ + "public_url" + ], + "properties": { + "listen": { + "description": "Listen address on which the callback must listen (default: 127.0.0.1:4000)", + "writeOnly": true, + "anyOf": [ + { + "description": "Socket address.", + "type": "string" + }, + { + "description": "Unix socket.", + "type": "string" + } + ], + "nullable": true + }, + "path": { + "description": "Specify on which path you want to listen for callbacks (default: /callback)", + "writeOnly": true, + "type": "string", + "nullable": true + }, + "public_url": { + "description": "URL used to access this router instance", + "type": "string" + }, + "subgraphs": { + "description": "Specify on which subgraph we enable the callback mode for subscription If empty it applies to all subgraphs (passthrough mode takes precedence)", + "default": [], + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + } + }, + "additionalProperties": false, + "nullable": true + } + }, + "additionalProperties": false + }, + "queue_capacity": { + "description": "It represent the capacity of the in memory queue to know how many events we can keep in a buffer", + "default": null, + "type": "integer", + "format": "uint", + "minimum": 0.0, + "nullable": true + } + }, + "additionalProperties": false + }, "supergraph": { "description": "Configuration for the supergraph", "default": { diff --git a/apollo-router/src/error.rs b/apollo-router/src/error.rs index 9d3e1fe1d1..bfad581e22 100644 --- a/apollo-router/src/error.rs +++ b/apollo-router/src/error.rs @@ -87,6 +87,16 @@ pub(crate) enum FetchError { /// The reason the fetch failed. reason: String, }, + /// Websocket fetch failed from '{service}': {reason} + /// + /// note that this relates to a transport error and not a GraphQL error + SubrequestWsError { + /// The service failed. + service: String, + + /// The reason the fetch failed. + reason: String, + }, /// subquery requires field '{field}' but it was not found in the current response ExecutionFieldNotFound { @@ -135,6 +145,7 @@ impl FetchError { } FetchError::SubrequestMalformedResponse { service, .. } | FetchError::SubrequestUnexpectedPatchResponse { service } + | FetchError::SubrequestWsError { service, .. } | FetchError::CompressionError { service, .. } => { extensions .entry("service") @@ -181,6 +192,7 @@ impl ErrorExtension for FetchError { "SUBREQUEST_UNEXPECTED_PATCH_RESPONSE" } FetchError::SubrequestHttpError { .. } => "SUBREQUEST_HTTP_ERROR", + FetchError::SubrequestWsError { .. } => "SUBREQUEST_WEBSOCKET_ERROR", FetchError::ExecutionFieldNotFound { .. } => "EXECUTION_FIELD_NOT_FOUND", FetchError::ExecutionPathNotFound { .. } => "EXECUTION_PATH_NOT_FOUND", FetchError::CompressionError { .. } => "COMPRESSION_ERROR", diff --git a/apollo-router/src/lib.rs b/apollo-router/src/lib.rs index 437e7135db..296dcc8a4d 100644 --- a/apollo-router/src/lib.rs +++ b/apollo-router/src/lib.rs @@ -59,8 +59,10 @@ mod http_ext; mod http_server_factory; mod introspection; pub mod layers; +pub(crate) mod notification; mod orbiter; mod plugins; +pub(crate) mod protocols; mod query_planner; mod request; mod response; @@ -78,6 +80,7 @@ pub use crate::configuration::ListenAddr; pub use crate::context::Context; pub use crate::executable::main; pub use crate::executable::Executable; +pub use crate::notification::Notify; pub use crate::router::ApolloRouterError; pub use crate::router::ConfigurationSource; pub use crate::router::LicenseSource; diff --git a/apollo-router/src/notification.rs b/apollo-router/src/notification.rs new file mode 100644 index 0000000000..17240c359b --- /dev/null +++ b/apollo-router/src/notification.rs @@ -0,0 +1,979 @@ +//! Internal pub/sub facility for subscription + +use std::collections::HashMap; +use std::fmt::Debug; +use std::hash::Hash; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use std::time::Duration; +use std::time::Instant; + +use futures::channel::mpsc; +use futures::channel::mpsc::SendError; +use futures::channel::oneshot; +use futures::channel::oneshot::Canceled; +use futures::Sink; +use futures::SinkExt; +use futures::Stream; +use futures::StreamExt; +use pin_project_lite::pin_project; +use thiserror::Error; +use tokio::sync::broadcast; +use tokio_stream::wrappers::errors::BroadcastStreamRecvError; +use tokio_stream::wrappers::BroadcastStream; +use tokio_stream::wrappers::IntervalStream; + +use crate::graphql; + +static NOTIFY_CHANNEL_SIZE: usize = 1024; +static DEFAULT_MSG_CHANNEL_SIZE: usize = 128; + +#[derive(Error, Debug)] +pub(crate) enum NotifyError { + #[error("cannot send data to pubsub")] + SendError(#[from] SendError), + #[error("cannot send data to response stream")] + BroadcastSendError(#[from] broadcast::error::SendError), + #[error("cannot send data to pubsub because channel has been closed")] + Canceled(#[from] Canceled), + #[error("this topic doesn't exist")] + UnknownTopic, +} + +type ResponseSender = + oneshot::Sender>, broadcast::Receiver>)>>; + +type ResponseSenderWithCreated = oneshot::Sender<( + broadcast::Sender>, + broadcast::Receiver>, + bool, +)>; + +enum Notification { + CreateOrSubscribe { + topic: K, + // Sender connected to the original source stream + msg_sender: broadcast::Sender>, + // To know if it has been created or re-used + response_sender: ResponseSenderWithCreated, + heartbeat_enabled: bool, + }, + Subscribe { + topic: K, + // Oneshot channel to fetch the receiver + response_sender: ResponseSender, + }, + SubscribeIfExist { + topic: K, + // Oneshot channel to fetch the receiver + response_sender: ResponseSender, + }, + Unsubscribe { + topic: K, + }, + ForceDelete { + topic: K, + }, + Exist { + topic: K, + response_sender: oneshot::Sender, + }, + InvalidIds { + topics: Vec, + response_sender: oneshot::Sender<(Vec, Vec)>, + }, + #[cfg(test)] + TryDelete { + topic: K, + }, + #[cfg(test)] + Broadcast { + data: V, + }, + #[cfg(test)] + Debug { + // Returns the number of subscriptions and subscribers + response_sender: oneshot::Sender, + }, +} + +/// In memory pub/sub implementation +#[derive(Clone)] +pub struct Notify { + sender: mpsc::Sender>, + /// Size (number of events) of the channel to receive message + pub(crate) queue_size: Option, +} + +#[buildstructor::buildstructor] +impl Notify +where + K: Send + Hash + Eq + Clone + 'static, + V: Send + Sync + Clone + 'static, +{ + #[builder] + pub(crate) fn new( + ttl: Option, + heartbeat_error_message: Option, + queue_size: Option, + ) -> Notify { + let (sender, receiver) = mpsc::channel(NOTIFY_CHANNEL_SIZE); + tokio::task::spawn(task(receiver, ttl, heartbeat_error_message)); + Notify { sender, queue_size } + } + + #[doc(hidden)] + /// NOOP notifier for tests + pub fn for_tests() -> Self { + let (sender, _receiver) = mpsc::channel(NOTIFY_CHANNEL_SIZE); + Notify { + sender, + queue_size: None, + } + } +} +impl Notify +where + K: Send + Hash + Eq + Clone + 'static, + V: Send + Clone + 'static, +{ + #[cfg(not(test))] + pub(crate) fn set_queue_size(mut self, queue_size: Option) -> Self { + self.queue_size = queue_size; + self + } + + // boolean in the tuple means `created` + pub(crate) async fn create_or_subscribe( + &mut self, + topic: K, + heartbeat_enabled: bool, + ) -> Result<(Handle, bool), NotifyError> { + let (sender, _receiver) = + broadcast::channel(self.queue_size.unwrap_or(DEFAULT_MSG_CHANNEL_SIZE)); + + let (tx, rx) = oneshot::channel(); + self.sender + .send(Notification::CreateOrSubscribe { + topic: topic.clone(), + msg_sender: sender, + response_sender: tx, + heartbeat_enabled, + }) + .await?; + + let (msg_sender, msg_receiver, created) = rx.await?; + let handle = Handle::new( + topic, + self.sender.clone(), + msg_sender, + BroadcastStream::from(msg_receiver), + ); + + Ok((handle, created)) + } + + pub(crate) async fn subscribe(&mut self, topic: K) -> Result, NotifyError> { + let (sender, receiver) = oneshot::channel(); + + self.sender + .send(Notification::Subscribe { + topic: topic.clone(), + response_sender: sender, + }) + .await?; + + let Some((msg_sender, msg_receiver)) = receiver.await? else { + return Err(NotifyError::UnknownTopic); + }; + let handle = Handle::new( + topic, + self.sender.clone(), + msg_sender, + BroadcastStream::from(msg_receiver), + ); + + Ok(handle) + } + + pub(crate) async fn subscribe_if_exist( + &mut self, + topic: K, + ) -> Result>, NotifyError> { + let (sender, receiver) = oneshot::channel(); + + self.sender + .send(Notification::SubscribeIfExist { + topic: topic.clone(), + response_sender: sender, + }) + .await?; + + let Some((msg_sender, msg_receiver)) = receiver.await? else { + return Ok(None); + }; + let handle = Handle::new( + topic, + self.sender.clone(), + msg_sender, + BroadcastStream::from(msg_receiver), + ); + + Ok(handle.into()) + } + + pub(crate) async fn exist(&mut self, topic: K) -> Result> { + // Channel to check if the topic still exists or not + let (response_tx, response_rx) = oneshot::channel(); + + self.sender + .send(Notification::Exist { + topic, + response_sender: response_tx, + }) + .await?; + + let resp = response_rx.await?; + + Ok(resp) + } + + pub(crate) async fn invalid_ids( + &mut self, + topics: Vec, + ) -> Result<(Vec, Vec), NotifyError> { + // Channel to check if the topic still exists or not + let (response_tx, response_rx) = oneshot::channel(); + + self.sender + .send(Notification::InvalidIds { + topics, + response_sender: response_tx, + }) + .await?; + + let resp = response_rx.await?; + + Ok(resp) + } + + /// Delete the topic even if several subscribers are still listening + pub(crate) async fn force_delete(&mut self, topic: K) -> Result<(), NotifyError> { + // if disconnected, we don't care (the task was stopped) + self.sender + .send(Notification::ForceDelete { topic }) + .await + .map_err(std::convert::Into::into) + } + + /// Delete the topic if and only if one or zero subscriber is still listening + /// This function is not async to allow it to be used in a Drop impl + #[cfg(test)] + pub(crate) fn try_delete(&mut self, topic: K) -> Result<(), NotifyError> { + // if disconnected, we don't care (the task was stopped) + self.sender + .try_send(Notification::TryDelete { topic }) + .map_err(|try_send_error| try_send_error.into_send_error().into()) + } + + #[cfg(test)] + pub(crate) async fn broadcast(&mut self, data: V) -> Result<(), NotifyError> { + self.sender + .send(Notification::Broadcast { data }) + .await + .map_err(std::convert::Into::into) + } + + #[cfg(test)] + pub(crate) async fn debug(&mut self) -> Result> { + let (response_tx, response_rx) = oneshot::channel(); + self.sender + .send(Notification::Debug { + response_sender: response_tx, + }) + .await?; + + Ok(response_rx.await.unwrap()) + } +} + +#[cfg(test)] +impl Default for Notify +where + K: Send + Hash + Eq + Clone + 'static, + V: Send + Sync + Clone + 'static, +{ + /// Useless notify mainly for test + fn default() -> Self { + Self::for_tests() + } +} + +impl Debug for Notify { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Notify").finish() + } +} + +struct HandleGuard +where + K: Clone, +{ + topic: K, + pubsub_sender: mpsc::Sender>, +} + +impl Clone for HandleGuard +where + K: Clone, +{ + fn clone(&self) -> Self { + Self { + topic: self.topic.clone(), + pubsub_sender: self.pubsub_sender.clone(), + } + } +} + +impl Drop for HandleGuard +where + K: Clone, +{ + fn drop(&mut self) { + let err = self.pubsub_sender.try_send(Notification::Unsubscribe { + topic: self.topic.clone(), + }); + if let Err(err) = err { + tracing::trace!("cannot unsubscribe {err:?}"); + } + } +} + +pin_project! { +pub struct Handle +where + K: Clone, +{ + handle_guard: HandleGuard, + #[pin] + msg_sender: broadcast::Sender>, + #[pin] + msg_receiver: BroadcastStream>, +} +} + +impl Clone for Handle +where + K: Clone, + V: Clone + Send + 'static, +{ + fn clone(&self) -> Self { + Self { + handle_guard: self.handle_guard.clone(), + msg_receiver: BroadcastStream::new(self.msg_sender.subscribe()), + msg_sender: self.msg_sender.clone(), + } + } +} + +impl Handle +where + K: Clone, +{ + fn new( + topic: K, + pubsub_sender: mpsc::Sender>, + msg_sender: broadcast::Sender>, + msg_receiver: BroadcastStream>, + ) -> Self { + Self { + handle_guard: HandleGuard { + topic, + pubsub_sender, + }, + msg_sender, + msg_receiver, + } + } + + pub(crate) fn into_stream(self) -> HandleStream { + HandleStream { + handle_guard: self.handle_guard, + msg_receiver: self.msg_receiver, + } + } + + pub(crate) fn into_sink(self) -> HandleSink { + HandleSink { + handle_guard: self.handle_guard, + msg_sender: self.msg_sender, + } + } + + /// Return a sink and a stream + pub fn split(self) -> (HandleSink, HandleStream) { + ( + HandleSink { + handle_guard: self.handle_guard.clone(), + msg_sender: self.msg_sender, + }, + HandleStream { + handle_guard: self.handle_guard, + msg_receiver: self.msg_receiver, + }, + ) + } +} + +pin_project! { +pub struct HandleStream +where + K: Clone, +{ + handle_guard: HandleGuard, + #[pin] + msg_receiver: BroadcastStream>, +} +} + +impl Stream for HandleStream +where + K: Clone, + V: Clone + 'static + Send, +{ + type Item = V; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let mut this = self.as_mut().project(); + + match Pin::new(&mut this.msg_receiver).poll_next(cx) { + Poll::Ready(Some(Err(BroadcastStreamRecvError::Lagged(_)))) => { + tracing::info!(monotonic_counter.apollo_router_skipped_event_count = 1u64,); + self.poll_next(cx) + } + Poll::Ready(None) => Poll::Ready(None), + Poll::Ready(Some(Ok(Some(val)))) => Poll::Ready(Some(val)), + Poll::Ready(Some(Ok(None))) => Poll::Ready(None), + Poll::Pending => Poll::Pending, + } + } +} + +pin_project! { +pub struct HandleSink +where + K: Clone, +{ + handle_guard: HandleGuard, + #[pin] + msg_sender: broadcast::Sender>, +} +} + +impl HandleSink +where + K: Clone, + V: Clone + 'static + Send, +{ + /// Send data to the subscribed topic + pub(crate) fn send_sync(&mut self, data: V) -> Result<(), NotifyError> { + self.msg_sender.send(data.into()).map_err(|err| { + NotifyError::BroadcastSendError(broadcast::error::SendError(err.0.unwrap())) + })?; + + Ok(()) + } +} + +impl Sink for HandleSink +where + K: Clone, + V: Clone + 'static + Send, +{ + type Error = graphql::Error; + + fn poll_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn start_send(self: Pin<&mut Self>, item: V) -> Result<(), Self::Error> { + self.msg_sender.send(Some(item)).map_err(|_err| { + graphql::Error::builder() + .message("cannot send payload through pubsub") + .extension_code("NOTIFICATION_HANDLE_SEND_ERROR") + .build() + })?; + Ok(()) + } + + fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn poll_close( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + let topic = self.handle_guard.topic.clone(); + let _ = self + .handle_guard + .pubsub_sender + .try_send(Notification::ForceDelete { topic }); + Poll::Ready(Ok(())) + } +} + +impl Handle where K: Clone {} + +async fn task( + mut receiver: mpsc::Receiver>, + ttl: Option, + heartbeat_error_message: Option, +) where + K: Send + Hash + Eq + Clone + 'static, + V: Send + Clone + 'static, +{ + let mut pubsub: PubSub = PubSub::new(ttl); + + let mut ttl_fut: Box + Send + Unpin> = match ttl { + Some(ttl) => Box::new(IntervalStream::new(tokio::time::interval(ttl))), + None => Box::new(tokio_stream::pending()), + }; + + loop { + tokio::select! { + _ = ttl_fut.next() => { + let heartbeat_error_message = heartbeat_error_message.clone(); + pubsub.kill_dead_topics(heartbeat_error_message).await; + tracing::info!( + value.apollo_router_opened_subscriptions = pubsub.subscriptions.len() as u64, + ); + } + message = receiver.next() => { + match message { + Some(message) => { + match message { + Notification::Unsubscribe { topic } => pubsub.unsubscribe(topic), + Notification::ForceDelete { topic } => pubsub.force_delete(topic), + Notification::CreateOrSubscribe { topic, msg_sender, response_sender, heartbeat_enabled } => { + pubsub.subscribe_or_create(topic, msg_sender, response_sender, heartbeat_enabled); + } + Notification::Subscribe { + topic, + response_sender, + } => { + pubsub.subscribe(topic, response_sender); + } + Notification::SubscribeIfExist { + topic, + response_sender, + } => { + if pubsub.is_used(&topic) { + pubsub.subscribe(topic, response_sender); + } else { + pubsub.force_delete(topic); + let _ = response_sender.send(None); + } + } + Notification::InvalidIds { + topics, + response_sender, + } => { + let invalid_topics = pubsub.invalid_topics(topics); + let _ = response_sender.send(invalid_topics); + } + Notification::Exist { + topic, + response_sender, + } => { + let exist = pubsub.exist(&topic); + let _ = response_sender.send(exist); + if exist { + pubsub.touch(&topic); + } + } + #[cfg(test)] + Notification::TryDelete { topic } => pubsub.try_delete(topic), + #[cfg(test)] + Notification::Broadcast { data } => { + pubsub.broadcast(data).await; + } + #[cfg(test)] + Notification::Debug { response_sender } => { + let _ = response_sender.send(pubsub.subscriptions.len()); + } + } + }, + None => break, + } + } + } + } +} + +#[derive(Debug)] +struct Subscription { + msg_sender: broadcast::Sender>, + heartbeat_enabled: bool, + updated_at: Instant, +} + +impl Subscription { + fn new(msg_sender: broadcast::Sender>, heartbeat_enabled: bool) -> Self { + Self { + msg_sender, + heartbeat_enabled, + updated_at: Instant::now(), + } + } + // Update the updated_at value + fn touch(&mut self) { + self.updated_at = Instant::now(); + } +} + +struct PubSub +where + K: Hash + Eq, +{ + subscriptions: HashMap>, + ttl: Option, +} + +impl Default for PubSub +where + K: Hash + Eq, +{ + fn default() -> Self { + Self { + // subscribers: HashMap::new(), + subscriptions: HashMap::new(), + ttl: None, + } + } +} + +impl PubSub +where + K: Hash + Eq + Clone, + V: Clone + 'static, +{ + fn new(ttl: Option) -> Self { + Self { + subscriptions: HashMap::new(), + ttl, + } + } + + fn create_topic( + &mut self, + topic: K, + sender: broadcast::Sender>, + heartbeat_enabled: bool, + ) { + self.subscriptions + .insert(topic, Subscription::new(sender, heartbeat_enabled)); + } + + fn subscribe(&mut self, topic: K, sender: ResponseSender) { + match self.subscriptions.get_mut(&topic) { + Some(subscription) => { + let _ = sender.send(Some(( + subscription.msg_sender.clone(), + subscription.msg_sender.subscribe(), + ))); + } + None => { + let _ = sender.send(None); + } + } + } + + fn subscribe_or_create( + &mut self, + topic: K, + msg_sender: broadcast::Sender>, + sender: ResponseSenderWithCreated, + heartbeat_enabled: bool, + ) { + match self.subscriptions.get(&topic) { + Some(subscription) => { + let _ = sender.send(( + subscription.msg_sender.clone(), + subscription.msg_sender.subscribe(), + false, + )); + } + None => { + self.create_topic(topic, msg_sender.clone(), heartbeat_enabled); + + let _ = sender.send((msg_sender.clone(), msg_sender.subscribe(), true)); + } + } + } + + fn unsubscribe(&mut self, topic: K) { + let mut topic_to_delete = false; + match self.subscriptions.get(&topic) { + Some(subscription) => { + topic_to_delete = subscription.msg_sender.receiver_count() == 0; + } + None => tracing::trace!("Cannot find the subscription to unsubscribe"), + } + if topic_to_delete { + self.subscriptions.remove(&topic); + }; + } + + /// Check if the topic is used by anyone else than the current handle + fn is_used(&self, topic: &K) -> bool { + self.subscriptions + .get(topic) + .map(|s| s.msg_sender.receiver_count() > 0) + .unwrap_or_default() + } + + /// Update the heartbeat + fn touch(&mut self, topic: &K) { + if let Some(sub) = self.subscriptions.get_mut(topic) { + sub.touch(); + } + } + + /// Check if the topic exists + fn exist(&self, topic: &K) -> bool { + self.subscriptions.contains_key(topic) + } + + /// Given a list of topics, returns the list of valid and invalid topics + /// Heartbeat the given valid topics + fn invalid_topics(&mut self, topics: Vec) -> (Vec, Vec) { + topics.into_iter().fold( + (Vec::new(), Vec::new()), + |(mut valid_ids, mut invalid_ids), e| { + match self.subscriptions.get_mut(&e) { + Some(sub) => { + sub.touch(); + valid_ids.push(e); + } + None => { + invalid_ids.push(e); + } + } + + (valid_ids, invalid_ids) + }, + ) + } + + /// clean all topics which didn't heartbeat + async fn kill_dead_topics(&mut self, heartbeat_error_message: Option) { + if let Some(ttl) = self.ttl { + let drained = self.subscriptions.drain(); + let (remaining_subs, closed_subs) = drained.into_iter().fold( + (HashMap::new(), HashMap::new()), + |(mut acc, mut acc_error), (topic, sub)| { + if (!sub.heartbeat_enabled || sub.updated_at.elapsed() <= ttl) + && sub.msg_sender.receiver_count() > 0 + { + acc.insert(topic, sub); + } else { + acc_error.insert(topic, sub); + } + + (acc, acc_error) + }, + ); + self.subscriptions = remaining_subs; + + // Send error message to all killed connections + for (_subscriber_id, subscription) in closed_subs { + if let Some(heartbeat_error_message) = &heartbeat_error_message { + let _ = subscription + .msg_sender + .send(heartbeat_error_message.clone().into()); + let _ = subscription.msg_sender.send(None); + } + } + } + } + + #[cfg(test)] + fn try_delete(&mut self, topic: K) { + if let Some(sub) = self.subscriptions.get(&topic) { + if sub.msg_sender.receiver_count() > 1 { + return; + } + } + + self.force_delete(topic); + } + + fn force_delete(&mut self, topic: K) { + tracing::trace!("deleting subscription"); + let sub = self.subscriptions.remove(&topic); + if let Some(sub) = sub { + let _ = sub.msg_sender.send(None); + } + } + + #[cfg(test)] + async fn broadcast(&mut self, value: V) -> Option<()> + where + V: Clone, + { + let mut fut = vec![]; + for (sub_id, sub) in &self.subscriptions { + let cloned_value = value.clone(); + let sub_id = sub_id.clone(); + fut.push( + sub.msg_sender + .send(cloned_value.into()) + .is_err() + .then_some(sub_id), + ); + } + // clean closed sender + let sub_to_clean: Vec = fut.into_iter().flatten().collect(); + self.subscriptions + .retain(|k, s| s.msg_sender.receiver_count() > 0 && !sub_to_clean.contains(k)); + + Some(()) + } +} + +#[cfg(test)] +mod tests { + + use uuid::Uuid; + + use super::*; + + #[tokio::test] + async fn subscribe() { + let mut notify = Notify::builder().build(); + let topic_1 = Uuid::new_v4(); + let topic_2 = Uuid::new_v4(); + + let (handle1, created) = notify.create_or_subscribe(topic_1, false).await.unwrap(); + assert!(created); + let (_handle2, created) = notify.create_or_subscribe(topic_2, false).await.unwrap(); + assert!(created); + + let handle_1_bis = notify.subscribe(topic_1).await.unwrap(); + let handle_1_other = notify.subscribe(topic_1).await.unwrap(); + let mut cloned_notify = notify.clone(); + + let mut handle = cloned_notify.subscribe(topic_1).await.unwrap().into_sink(); + handle + .send_sync(serde_json_bytes::json!({"test": "ok"})) + .unwrap(); + drop(handle); + drop(handle1); + let mut handle_1_bis = handle_1_bis.into_stream(); + let new_msg = handle_1_bis.next().await.unwrap(); + assert_eq!(new_msg, serde_json_bytes::json!({"test": "ok"})); + let mut handle_1_other = handle_1_other.into_stream(); + let new_msg = handle_1_other.next().await.unwrap(); + assert_eq!(new_msg, serde_json_bytes::json!({"test": "ok"})); + + assert!(notify.exist(topic_1).await.unwrap()); + assert!(notify.exist(topic_2).await.unwrap()); + + drop(_handle2); + drop(handle_1_bis); + drop(handle_1_other); + + let subscriptions_nb = notify.debug().await.unwrap(); + assert_eq!(subscriptions_nb, 0); + } + + #[tokio::test] + async fn it_subscribe_and_delete() { + let mut notify = Notify::builder().build(); + let topic_1 = Uuid::new_v4(); + let topic_2 = Uuid::new_v4(); + + let (handle1, created) = notify.create_or_subscribe(topic_1, true).await.unwrap(); + assert!(created); + let (_handle2, created) = notify.create_or_subscribe(topic_2, true).await.unwrap(); + assert!(created); + + let mut _handle_1_bis = notify.subscribe(topic_1).await.unwrap(); + let mut _handle_1_other = notify.subscribe(topic_1).await.unwrap(); + let mut cloned_notify = notify.clone(); + let mut handle = cloned_notify.subscribe(topic_1).await.unwrap().into_sink(); + handle + .send_sync(serde_json_bytes::json!({"test": "ok"})) + .unwrap(); + drop(handle); + assert!(notify.exist(topic_1).await.unwrap()); + drop(_handle_1_bis); + drop(_handle_1_other); + + notify.try_delete(topic_1).unwrap(); + + let subscriptions_nb = notify.debug().await.unwrap(); + assert_eq!(subscriptions_nb, 1); + + assert!(!notify.exist(topic_1).await.unwrap()); + + notify.force_delete(topic_1).await.unwrap(); + + let mut handle1 = handle1.into_stream(); + let new_msg = handle1.next().await.unwrap(); + assert_eq!(new_msg, serde_json_bytes::json!({"test": "ok"})); + assert!(handle1.next().await.is_none()); + assert!(notify.exist(topic_2).await.unwrap()); + notify.try_delete(topic_2).unwrap(); + + let subscriptions_nb = notify.debug().await.unwrap(); + assert_eq!(subscriptions_nb, 0); + } + + #[tokio::test] + async fn it_test_ttl() { + let mut notify = Notify::builder() + .ttl(Duration::from_millis(100)) + .heartbeat_error_message(serde_json_bytes::json!({"error": "connection_closed"})) + .build(); + let topic_1 = Uuid::new_v4(); + let topic_2 = Uuid::new_v4(); + + let (handle1, created) = notify.create_or_subscribe(topic_1, true).await.unwrap(); + assert!(created); + let (_handle2, created) = notify.create_or_subscribe(topic_2, true).await.unwrap(); + assert!(created); + + let handle_1_bis = notify.subscribe(topic_1).await.unwrap(); + let handle_1_other = notify.subscribe(topic_1).await.unwrap(); + let mut cloned_notify = notify.clone(); + tokio::spawn(async move { + let mut handle = cloned_notify.subscribe(topic_1).await.unwrap().into_sink(); + handle + .send_sync(serde_json_bytes::json!({"test": "ok"})) + .unwrap(); + }); + drop(handle1); + + let mut handle_1_bis = handle_1_bis.into_stream(); + let new_msg = handle_1_bis.next().await.unwrap(); + assert_eq!(new_msg, serde_json_bytes::json!({"test": "ok"})); + let mut handle_1_other = handle_1_other.into_stream(); + let new_msg = handle_1_other.next().await.unwrap(); + assert_eq!(new_msg, serde_json_bytes::json!({"test": "ok"})); + + tokio::time::sleep(Duration::from_millis(200)).await; + let res = handle_1_bis.next().await.unwrap(); + assert_eq!(res, serde_json_bytes::json!({"error": "connection_closed"})); + + assert!(handle_1_bis.next().await.is_none()); + + assert!(!notify.exist(topic_1).await.unwrap()); + assert!(!notify.exist(topic_2).await.unwrap()); + + let subscriptions_nb = notify.debug().await.unwrap(); + assert_eq!(subscriptions_nb, 0); + } +} diff --git a/apollo-router/src/plugin/mod.rs b/apollo-router/src/plugin/mod.rs index ad74aa4970..fcbd0d8c05 100644 --- a/apollo-router/src/plugin/mod.rs +++ b/apollo-router/src/plugin/mod.rs @@ -37,7 +37,9 @@ use tower::BoxError; use tower::Service; use tower::ServiceBuilder; +use crate::graphql; use crate::layers::ServiceBuilderExt; +use crate::notification::Notify; use crate::router_factory::Endpoint; use crate::services::execution; use crate::services::router; @@ -45,8 +47,11 @@ use crate::services::subgraph; use crate::services::supergraph; use crate::ListenAddr; -type InstanceFactory = - fn(&serde_json::Value, Arc) -> BoxFuture, BoxError>>; +type InstanceFactory = fn( + &serde_json::Value, + Arc, + Notify, +) -> BoxFuture, BoxError>>; type SchemaFactory = fn(&mut SchemaGenerator) -> schemars::schema::Schema; @@ -61,34 +66,103 @@ pub struct PluginInit { pub config: T, /// Router Supergraph Schema (schema definition language) pub supergraph_sdl: Arc, + + pub(crate) notify: Notify, } impl PluginInit where T: for<'de> Deserialize<'de>, { + #[deprecated = "use PluginInit::builder() instead"] /// Create a new PluginInit for the supplied config and SDL. pub fn new(config: T, supergraph_sdl: Arc) -> Self { - PluginInit { - config, - supergraph_sdl, - } + Self::builder() + .config(config) + .supergraph_sdl(supergraph_sdl) + .notify(Notify::builder().build()) + .build() } /// Try to create a new PluginInit for the supplied JSON and SDL. /// /// This will fail if the supplied JSON cannot be deserialized into the configuration /// struct. + #[deprecated = "use PluginInit::try_builder() instead"] pub fn try_new( config: serde_json::Value, supergraph_sdl: Arc, + ) -> Result { + Self::try_builder() + .config(config) + .supergraph_sdl(supergraph_sdl) + .notify(Notify::builder().build()) + .build() + } + + #[cfg(test)] + pub(crate) fn fake_new(config: T, supergraph_sdl: Arc) -> Self { + PluginInit { + config, + supergraph_sdl, + notify: Notify::for_tests(), + } + } +} + +#[buildstructor::buildstructor] +impl PluginInit +where + T: for<'de> Deserialize<'de>, +{ + /// Create a new PluginInit builder + #[builder(entry = "builder", exit = "build", visibility = "pub")] + /// Build a new PluginInit for the supplied configuration and SDL. + /// + /// You can reuse a notify instance, or Build your own. + pub(crate) fn new_builder( + config: T, + supergraph_sdl: Arc, + notify: Notify, + ) -> Self { + PluginInit { + config, + supergraph_sdl, + notify, + } + } + + #[builder(entry = "try_builder", exit = "build", visibility = "pub")] + /// Try to build a new PluginInit for the supplied json configuration and SDL. + /// + /// You can reuse a notify instance, or Build your own. + /// invoking build() will fail if the JSON doesn't comply with the configuration format. + pub(crate) fn try_new_builder( + config: serde_json::Value, + supergraph_sdl: Arc, + notify: Notify, ) -> Result { let config: T = serde_json::from_value(config)?; Ok(PluginInit { config, supergraph_sdl, + notify, }) } + + /// Create a new PluginInit builder + #[builder(entry = "fake_builder", exit = "build", visibility = "pub")] + fn fake_new_builder( + config: T, + supergraph_sdl: Option>, + notify: Option>, + ) -> Self { + PluginInit { + config, + supergraph_sdl: supergraph_sdl.unwrap_or_default(), + notify: notify.unwrap_or_else(Notify::for_tests), + } + } } /// Factories for plugin schema and configuration. @@ -120,9 +194,13 @@ impl PluginFactory { tracing::debug!(%plugin_factory_name, "creating plugin factory"); PluginFactory { name: plugin_factory_name, - instance_factory: |configuration, schema| { + instance_factory: |configuration, schema, notify| { Box::pin(async move { - let init = PluginInit::try_new(configuration.clone(), schema)?; + let init = PluginInit::try_builder() + .config(configuration.clone()) + .supergraph_sdl(schema) + .notify(notify) + .build()?; let plugin = P::new(init).await?; Ok(Box::new(plugin) as Box) }) @@ -136,8 +214,9 @@ impl PluginFactory { &self, configuration: &serde_json::Value, supergraph_sdl: Arc, + notify: Notify, ) -> Result, BoxError> { - (self.instance_factory)(configuration, supergraph_sdl).await + (self.instance_factory)(configuration, supergraph_sdl, notify).await } #[cfg(test)] @@ -145,7 +224,7 @@ impl PluginFactory { &self, configuration: &serde_json::Value, ) -> Result, BoxError> { - (self.instance_factory)(configuration, Default::default()).await + (self.instance_factory)(configuration, Default::default(), Default::default()).await } pub(crate) fn create_schema(&self, gen: &mut SchemaGenerator) -> schemars::schema::Schema { diff --git a/apollo-router/src/plugin/test/mock/canned.rs b/apollo-router/src/plugin/test/mock/canned.rs index 5230ed3cab..227a0ba806 100644 --- a/apollo-router/src/plugin/test/mock/canned.rs +++ b/apollo-router/src/plugin/test/mock/canned.rs @@ -36,6 +36,12 @@ pub(crate) fn accounts_subgraph() -> MockSubgraph { ] } }} + ), + ( + json! {{ + "query": "subscription{userWasCreated{name}}", + }}, + json! {{}} ) ].into_iter().map(|(query, response)| (serde_json::from_value(query).unwrap(), serde_json::from_value(response).unwrap())).collect(); MockSubgraph::new(account_mocks) diff --git a/apollo-router/src/plugin/test/mock/subgraph.rs b/apollo-router/src/plugin/test/mock/subgraph.rs index 20362dbe0a..5d1a6da714 100644 --- a/apollo-router/src/plugin/test/mock/subgraph.rs +++ b/apollo-router/src/plugin/test/mock/subgraph.rs @@ -11,9 +11,11 @@ use http::StatusCode; use tower::BoxError; use tower::Service; +use crate::graphql; use crate::graphql::Request; use crate::graphql::Response; use crate::json_ext::Object; +use crate::notification::Handle; use crate::services::SubgraphRequest; use crate::services::SubgraphResponse; @@ -24,6 +26,7 @@ pub struct MockSubgraph { // using an arc to improve efficiency when service is cloned mocks: Arc, extensions: Option, + subscription_stream: Option>, } impl MockSubgraph { @@ -31,6 +34,7 @@ impl MockSubgraph { Self { mocks: Arc::new(mocks), extensions: None, + subscription_stream: None, } } @@ -42,6 +46,14 @@ impl MockSubgraph { self.extensions = Some(extensions); self } + + pub fn with_subscription_stream( + mut self, + subscription_stream: Handle, + ) -> Self { + self.subscription_stream = Some(subscription_stream); + self + } } /// Builder for `MockSubgraph` @@ -49,6 +61,7 @@ impl MockSubgraph { pub struct MockSubgraphBuilder { mocks: MockResponses, extensions: Option, + subscription_stream: Option>, } impl MockSubgraphBuilder { pub fn with_extensions(mut self, extensions: Object) -> Self { @@ -68,10 +81,19 @@ impl MockSubgraphBuilder { self } + pub fn with_subscription_stream( + mut self, + subscription_stream: Handle, + ) -> Self { + self.subscription_stream = Some(subscription_stream); + self + } + pub fn build(self) -> MockSubgraph { MockSubgraph { mocks: Arc::new(self.mocks), extensions: self.extensions, + subscription_stream: self.subscription_stream, } } } @@ -87,8 +109,43 @@ impl Service for MockSubgraph { Poll::Ready(Ok(())) } - fn call(&mut self, req: SubgraphRequest) -> Self::Future { - let response = if let Some(response) = self.mocks.get(req.subgraph_request.body()) { + fn call(&mut self, mut req: SubgraphRequest) -> Self::Future { + let body = req.subgraph_request.body_mut(); + + if let Some(sub_stream) = &mut req.subscription_stream { + sub_stream + .try_send( + self.subscription_stream + .take() + .expect("must have a subscription stream set") + .into_stream(), + ) + .unwrap(); + } + + // Redact the callback url and subscription_id because it generates a subscription uuid + if let Some(serde_json_bytes::Value::Object(subscription_ext)) = + body.extensions.get_mut("subscription") + { + if let Some(callback_url) = subscription_ext.get_mut("callback_url") { + let mut cb_url = url::Url::parse( + callback_url + .as_str() + .expect("callback_url extension must be a string"), + ) + .expect("callback_url must be a valid URL"); + cb_url.path_segments_mut().unwrap().pop(); + cb_url.path_segments_mut().unwrap().push("subscription_id"); + + *callback_url = serde_json_bytes::Value::String(cb_url.to_string().into()); + } + if let Some(subscription_id) = subscription_ext.get_mut("subscription_id") { + *subscription_id = + serde_json_bytes::Value::String("subscription_id".to_string().into()); + } + } + + let response = if let Some(response) = self.mocks.get(body) { // Build an http Response let http_response = http::Response::builder() .status(StatusCode::OK) @@ -99,7 +156,7 @@ impl Service for MockSubgraph { let error = crate::error::Error::builder() .message(format!( "couldn't find mock for query {}", - serde_json::to_string(&req.subgraph_request.body()).unwrap() + serde_json::to_string(body).unwrap() )) .extension_code("FETCH_ERROR".to_string()) .extensions(self.extensions.clone().unwrap_or_default()) diff --git a/apollo-router/src/plugins/csrf.rs b/apollo-router/src/plugins/csrf.rs index 807900ffa1..98a2fe1181 100644 --- a/apollo-router/src/plugins/csrf.rs +++ b/apollo-router/src/plugins/csrf.rs @@ -299,7 +299,7 @@ mod csrf_tests { .unwrap()) }); - let service_stack = Csrf::new(PluginInit::new(config, Default::default())) + let service_stack = Csrf::new(PluginInit::fake_new(config, Default::default())) .await .unwrap() .supergraph_service(mock_service.boxed()); @@ -316,7 +316,7 @@ mod csrf_tests { } async fn assert_rejected(config: CSRFConfig, request: supergraph::Request) { - let service_stack = Csrf::new(PluginInit::new(config, Default::default())) + let service_stack = Csrf::new(PluginInit::fake_new(config, Default::default())) .await .unwrap() .supergraph_service(MockSupergraphService::new().boxed()); diff --git a/apollo-router/src/plugins/forbid_mutations.rs b/apollo-router/src/plugins/forbid_mutations.rs index 5298d7b90a..136b8de3ba 100644 --- a/apollo-router/src/plugins/forbid_mutations.rs +++ b/apollo-router/src/plugins/forbid_mutations.rs @@ -93,7 +93,7 @@ mod forbid_http_get_mutations_tests { .times(1) .returning(move |_| Ok(ExecutionResponse::fake_builder().build().unwrap())); - let service_stack = ForbidMutations::new(PluginInit::new( + let service_stack = ForbidMutations::new(PluginInit::fake_new( ForbidMutationsConfig(true), Default::default(), )) @@ -120,7 +120,7 @@ mod forbid_http_get_mutations_tests { .build(); let expected_status = StatusCode::BAD_REQUEST; - let service_stack = ForbidMutations::new(PluginInit::new( + let service_stack = ForbidMutations::new(PluginInit::fake_new( ForbidMutationsConfig(true), Default::default(), )) @@ -144,7 +144,7 @@ mod forbid_http_get_mutations_tests { .times(1) .returning(move |_| Ok(ExecutionResponse::fake_builder().build().unwrap())); - let service_stack = ForbidMutations::new(PluginInit::new( + let service_stack = ForbidMutations::new(PluginInit::fake_new( ForbidMutationsConfig(false), Default::default(), )) diff --git a/apollo-router/src/plugins/headers.rs b/apollo-router/src/plugins/headers.rs index d5b3fbb78b..7a6e56d4ac 100644 --- a/apollo-router/src/plugins/headers.rs +++ b/apollo-router/src/plugins/headers.rs @@ -762,6 +762,8 @@ mod test { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: ctx, + subscription_stream: None, + connection_closed_signal: None, } } diff --git a/apollo-router/src/plugins/mod.rs b/apollo-router/src/plugins/mod.rs index 83ee925887..2bd2270df1 100644 --- a/apollo-router/src/plugins/mod.rs +++ b/apollo-router/src/plugins/mod.rs @@ -20,7 +20,7 @@ macro_rules! schemar_fn { }; } -mod authentication; +pub(crate) mod authentication; mod authorization; mod coprocessor; #[cfg(test)] @@ -32,5 +32,6 @@ mod headers; mod include_subgraph_errors; pub(crate) mod override_url; pub(crate) mod rhai; +pub(crate) mod subscription; pub(crate) mod telemetry; pub(crate) mod traffic_shaping; diff --git a/apollo-router/src/plugins/override_url.rs b/apollo-router/src/plugins/override_url.rs index 6e2894ced1..0c3b9a35b3 100644 --- a/apollo-router/src/plugins/override_url.rs +++ b/apollo-router/src/plugins/override_url.rs @@ -107,6 +107,7 @@ mod tests { ) .unwrap(), Default::default(), + Default::default(), ) .await .unwrap(); diff --git a/apollo-router/src/plugins/rhai/engine.rs b/apollo-router/src/plugins/rhai/engine.rs index 000398efdc..737be696b8 100644 --- a/apollo-router/src/plugins/rhai/engine.rs +++ b/apollo-router/src/plugins/rhai/engine.rs @@ -35,6 +35,7 @@ use crate::graphql::Request; use crate::graphql::Response; use crate::http_ext; use crate::plugins::authentication::APOLLO_AUTHENTICATION_JWT_CLAIMS; +use crate::plugins::subscription::SUBSCRIPTION_WS_CUSTOM_CONNECTION_PARAMS; use crate::Context; const CANNOT_ACCESS_HEADERS_ON_A_DEFERRED_RESPONSE: &str = @@ -1145,6 +1146,10 @@ impl Rhai { "APOLLO_AUTHENTICATION_JWT_CLAIMS".into(), APOLLO_AUTHENTICATION_JWT_CLAIMS.to_string().into(), ); + global_variables.insert( + "APOLLO_SUBSCRIPTION_WS_CUSTOM_CONNECTION_PARAMS".into(), + SUBSCRIPTION_WS_CUSTOM_CONNECTION_PARAMS.to_string().into(), + ); let shared_globals = Arc::new(global_variables); diff --git a/apollo-router/src/plugins/subscription.rs b/apollo-router/src/plugins/subscription.rs new file mode 100644 index 0000000000..ad44089614 --- /dev/null +++ b/apollo-router/src/plugins/subscription.rs @@ -0,0 +1,1240 @@ +use std::collections::HashMap; +use std::collections::HashSet; +use std::ops::ControlFlow; +use std::task::Poll; + +use bytes::Buf; +use futures::future::BoxFuture; +use hmac::Hmac; +use hmac::Mac; +use http::Method; +use http::StatusCode; +use multimap::MultiMap; +use once_cell::sync::OnceCell; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use sha2::Digest; +use sha2::Sha256; +use tower::BoxError; +use tower::Service; +use tower::ServiceBuilder; +use tower::ServiceExt; +use tracing_futures::Instrument; +use uuid::Uuid; + +use crate::context::Context; +use crate::graphql; +use crate::graphql::Response; +use crate::json_ext::Object; +use crate::layers::ServiceBuilderExt; +use crate::notification::Notify; +use crate::plugin::Plugin; +use crate::plugin::PluginInit; +use crate::protocols::websocket::WebSocketProtocol; +use crate::query_planner::OperationKind; +use crate::register_plugin; +use crate::services::router; +use crate::services::subgraph; +use crate::Endpoint; +use crate::ListenAddr; + +type HmacSha256 = Hmac; +pub(crate) const APOLLO_SUBSCRIPTION_PLUGIN: &str = "apollo.subscription"; +#[cfg(not(test))] +pub(crate) const APOLLO_SUBSCRIPTION_PLUGIN_NAME: &str = "subscription"; +pub(crate) static SUBSCRIPTION_CALLBACK_HMAC_KEY: OnceCell = OnceCell::new(); +pub(crate) const SUBSCRIPTION_WS_CUSTOM_CONNECTION_PARAMS: &str = + "apollo.subscription.custom_connection_params"; + +#[derive(Debug, Clone)] +pub(crate) struct Subscription { + notify: Notify, + callback_hmac_key: Option, + pub(crate) config: SubscriptionConfig, +} + +/// Subscriptions configuration +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[serde(deny_unknown_fields, default)] +pub(crate) struct SubscriptionConfig { + /// Select a subscription mode (callback or passthrough) + pub(crate) mode: SubscriptionModeConfig, + /// Enable the deduplication of subscription (for example if we detect the exact same request to subgraph we won't open a new websocket to the subgraph in passthrough mode) + /// (default: true) + pub(crate) enable_deduplication: bool, + /// This is a limit to only have maximum X opened subscriptions at the same time. By default if it's not set there is no limit. + pub(crate) max_opened_subscriptions: Option, + /// It represent the capacity of the in memory queue to know how many events we can keep in a buffer + pub(crate) queue_capacity: Option, +} + +impl Default for SubscriptionConfig { + fn default() -> Self { + Self { + mode: Default::default(), + enable_deduplication: true, + max_opened_subscriptions: None, + queue_capacity: None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Default, JsonSchema)] +#[serde(deny_unknown_fields)] +pub(crate) struct SubscriptionModeConfig { + #[serde(rename = "preview_callback")] + /// Enable callback mode for subgraph(s) + pub(crate) callback: Option, + /// Enable passthrough mode for subgraph(s) + pub(crate) passthrough: Option, +} + +impl SubscriptionModeConfig { + pub(crate) fn get_subgraph_config(&self, service_name: &str) -> Option { + if let Some(passthrough_cfg) = &self.passthrough { + if let Some(subgraph_cfg) = passthrough_cfg.subgraphs.get(service_name) { + return SubscriptionMode::Passthrough(subgraph_cfg.clone()).into(); + } + if let Some(all_cfg) = &passthrough_cfg.all { + return SubscriptionMode::Passthrough(all_cfg.clone()).into(); + } + } + + if let Some(callback_cfg) = &self.callback { + if callback_cfg.subgraphs.contains(service_name) || callback_cfg.subgraphs.is_empty() { + let callback_cfg = CallbackMode { + public_url: callback_cfg.public_url.clone(), + listen: callback_cfg.listen.clone(), + path: callback_cfg.path.clone(), + subgraphs: HashSet::new(), // We don't need it + }; + return SubscriptionMode::Callback(callback_cfg).into(); + } + } + + None + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Default, JsonSchema)] +#[serde(deny_unknown_fields, default)] +pub(crate) struct SubgraphPassthroughMode { + /// Configuration for all subgraphs + pub(crate) all: Option, + /// Configuration for specific subgraphs + pub(crate) subgraphs: HashMap, +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum SubscriptionMode { + /// Using a callback url + Callback(CallbackMode), + /// Using websocket to directly connect to subgraph + Passthrough(WebSocketConfiguration), +} + +/// Using a callback url +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(deny_unknown_fields)] +pub(crate) struct CallbackMode { + #[schemars(with = "String")] + /// URL used to access this router instance + pub(crate) public_url: url::Url, + // `skip_serializing` We don't need it in the context + /// Listen address on which the callback must listen (default: 127.0.0.1:4000) + #[serde(skip_serializing)] + listen: Option, + // `skip_serializing` We don't need it in the context + /// Specify on which path you want to listen for callbacks (default: /callback) + #[serde(skip_serializing)] + path: Option, + + /// Specify on which subgraph we enable the callback mode for subscription + /// If empty it applies to all subgraphs (passthrough mode takes precedence) + #[serde(default)] + subgraphs: HashSet, +} + +/// Using websocket to directly connect to subgraph +#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)] +#[serde(deny_unknown_fields, default)] +pub(crate) struct PassthroughMode { + /// WebSocket configuration for specific subgraphs + subgraph: SubgraphPassthroughMode, +} + +#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)] +#[serde(deny_unknown_fields, default)] +/// WebSocket configuration for a specific subgraph +pub(crate) struct WebSocketConfiguration { + /// Path on which WebSockets are listening + pub(crate) path: Option, + /// Which WebSocket GraphQL protocol to use for this subgraph possible values are: 'graphql_ws' | 'graphql_transport_ws' (default: graphql_ws) + pub(crate) protocol: WebSocketProtocol, +} + +fn default_path() -> String { + String::from("/callback") +} + +fn default_listen_addr() -> ListenAddr { + ListenAddr::SocketAddr("127.0.0.1:4000".parse().expect("valid ListenAddr")) +} + +#[async_trait::async_trait] +impl Plugin for Subscription { + type Config = SubscriptionConfig; + + async fn new(init: PluginInit) -> Result { + let mut callback_hmac_key = None; + if init.config.mode.callback.is_some() { + callback_hmac_key = Some( + SUBSCRIPTION_CALLBACK_HMAC_KEY + .get_or_init(|| Uuid::new_v4().to_string()) + .clone(), + ); + } + + Ok(Subscription { + notify: init.notify, + callback_hmac_key, + config: init.config, + }) + } + + fn subgraph_service( + &self, + _subgraph_name: &str, + service: subgraph::BoxService, + ) -> subgraph::BoxService { + let enabled = self.config.mode.callback.is_some() || self.config.mode.passthrough.is_some(); + ServiceBuilder::new() + .checkpoint(move |req: subgraph::Request| { + if req.operation_kind == OperationKind::Subscription && !enabled { + Ok(ControlFlow::Break(subgraph::Response::builder().context(req.context).error(graphql::Error::builder().message("cannot execute a subscription if it's not enabled in the configuration").extension_code("SUBSCRIPTION_DISABLED").build()).extensions(Object::default()).build())) + } else { + Ok(ControlFlow::Continue(req)) + } + }).service(service) + .boxed() + } + + fn web_endpoints(&self) -> MultiMap { + let mut map = MultiMap::new(); + + if let Some(CallbackMode { listen, path, .. }) = &self.config.mode.callback { + let path = path.clone().unwrap_or_else(default_path); + let path = path.trim_end_matches('/'); + let callback_hmac_key = self + .callback_hmac_key + .clone() + .expect("cannot run subscription in callback mode without a hmac key"); + let endpoint = Endpoint::from_router_service( + format!("{path}/:callback"), + CallbackService::new(self.notify.clone(), path.to_string(), callback_hmac_key) + .boxed(), + ); + map.insert(listen.clone().unwrap_or_else(default_listen_addr), endpoint); + } + + map + } +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(tag = "kind", rename = "lowercase")] +pub(crate) enum CallbackPayload { + #[serde(rename = "subscription")] + Subscription(SubscriptionPayload), +} + +impl CallbackPayload { + fn id(&self) -> &String { + match self { + CallbackPayload::Subscription(subscription_payload) => subscription_payload.id(), + } + } + + fn verifier(&self) -> &String { + match self { + CallbackPayload::Subscription(subscription_payload) => subscription_payload.verifier(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)] +#[serde(deny_unknown_fields, default)] +/// Callback payload when a subscription id is incorrect +pub(crate) struct InvalidIdsPayload { + /// List of invalid ids + pub(crate) invalid_ids: Vec, + pub(crate) id: String, + pub(crate) verifier: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(tag = "action", rename = "lowercase")] +pub(crate) enum SubscriptionPayload { + #[serde(rename = "check")] + Check { id: String, verifier: String }, + #[serde(rename = "heartbeat")] + Heartbeat { + /// Id sent with the corresponding verifier + id: String, + /// List of ids to heartbeat + ids: Vec, + /// Verifier received with the corresponding id + verifier: String, + }, + #[serde(rename = "next")] + Next { + id: String, + payload: Response, + verifier: String, + }, + #[serde(rename = "complete")] + Complete { + id: String, + verifier: String, + errors: Option>, + }, +} + +impl SubscriptionPayload { + fn id(&self) -> &String { + match self { + SubscriptionPayload::Check { id, .. } + | SubscriptionPayload::Heartbeat { id, .. } + | SubscriptionPayload::Next { id, .. } + | SubscriptionPayload::Complete { id, .. } => id, + } + } + + fn verifier(&self) -> &String { + match self { + SubscriptionPayload::Check { verifier, .. } + | SubscriptionPayload::Heartbeat { verifier, .. } + | SubscriptionPayload::Next { verifier, .. } + | SubscriptionPayload::Complete { verifier, .. } => verifier, + } + } +} + +#[derive(Clone)] +pub(crate) struct CallbackService { + notify: Notify, + path: String, + callback_hmac_key: String, +} + +impl CallbackService { + pub(crate) fn new( + notify: Notify, + path: String, + callback_hmac_key: String, + ) -> Self { + Self { + notify, + path, + callback_hmac_key, + } + } +} + +impl Service for CallbackService { + type Response = router::Response; + type Error = BoxError; + type Future = BoxFuture<'static, Result>; + + fn poll_ready(&mut self, _: &mut std::task::Context<'_>) -> Poll> { + Ok(()).into() + } + + fn call(&mut self, req: router::Request) -> Self::Future { + let mut notify = self.notify.clone(); + let path = self.path.clone(); + let callback_hmac_key = self.callback_hmac_key.clone(); + Box::pin( + async move { + let (parts, body) = req.router_request.into_parts(); + let sub_id = parts + .uri + .path() + .trim_start_matches(&format!("{path}/")) + .to_string(); + + match parts.method { + Method::POST => { + let cb_body = hyper::body::to_bytes(body) + .await + .map_err(|e| format!("failed to get the request body: {e}")) + .and_then(|bytes| { + serde_json::from_reader::<_, CallbackPayload>(bytes.reader()) + .map_err(|err| { + format!( + "failed to deserialize the request body into JSON: {err}" + ) + }) + }); + let cb_body = match cb_body { + Ok(cb_body) => cb_body, + Err(err) => { + return Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::BAD_REQUEST) + .body(err.into()) + .map_err(BoxError::from)?, + context: req.context, + }); + } + }; + let id = cb_body.id().clone(); + + // Hash verifier to sha256 to mitigate timing attack + // Check verifier + let verifier = cb_body.verifier(); + let mut verifier_hasher = Sha256::new(); + verifier_hasher.update(verifier.as_bytes()); + let hashed_verifier = verifier_hasher.finalize(); + + let mut mac = HmacSha256::new_from_slice(callback_hmac_key.as_bytes())?; + mac.update(id.as_bytes()); + let result = mac.finalize(); + let expected_verifier = hex::encode(result.into_bytes()); + let mut verifier_hasher = Sha256::new(); + verifier_hasher.update(expected_verifier.as_bytes()); + let expected_hashed_verifier = verifier_hasher.finalize(); + + if hashed_verifier != expected_hashed_verifier { + return Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::UNAUTHORIZED) + .body("verifier doesn't match".into()) + .map_err(BoxError::from)?, + context: req.context, + }); + } + + if let Err(res) = ensure_id_consistency(&req.context, &sub_id, &id) { + return Ok(res); + } + + match cb_body { + CallbackPayload::Subscription(SubscriptionPayload::Next { + mut payload, + .. + }) => { + let mut handle = match notify.subscribe_if_exist(id).await? { + Some(handle) => handle.into_sink(), + None => { + return Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::NOT_FOUND) + .body("suscription doesn't exist".into()) + .map_err(BoxError::from)?, + context: req.context, + }); + } + }; + // Keep the subscription to the client opened + payload.subscribed = Some(true); + handle.send_sync(payload)?; + + Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::OK) + .body::("".into()) + .map_err(BoxError::from)?, + context: req.context, + }) + } + CallbackPayload::Subscription(SubscriptionPayload::Check { + .. + }) => { + if notify.exist(id).await? { + Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::NO_CONTENT) + .body::("".into()) + .map_err(BoxError::from)?, + context: req.context, + }) + } else { + Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::NOT_FOUND) + .body("suscription doesn't exist".into()) + .map_err(BoxError::from)?, + context: req.context, + }) + } + } + CallbackPayload::Subscription(SubscriptionPayload::Heartbeat { + ids, + id, + verifier, + }) => { + if !ids.contains(&id) { + return Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::UNAUTHORIZED) + .body("id used for the verifier is not part of ids array".into()) + .map_err(BoxError::from)?, + context: req.context, + }); + } + + let (mut valid_ids, invalid_ids) = notify.invalid_ids(ids).await?; + if invalid_ids.is_empty() { + Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::NO_CONTENT) + .body::("".into()) + .map_err(BoxError::from)?, + context: req.context, + }) + } else if valid_ids.is_empty() { + Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::NOT_FOUND) + .body("suscriptions don't exist".into()) + .map_err(BoxError::from)?, + context: req.context, + }) + } else { + let (id, verifier) = if invalid_ids.contains(&id) { + (id, verifier) + } else { + let new_id = valid_ids.pop().expect("valid_ids is not empty, checked in the previous if block"); + // Generate new verifier + let mut mac = HmacSha256::new_from_slice( + callback_hmac_key.as_bytes(), + )?; + mac.update(new_id.as_bytes()); + let result = mac.finalize(); + let verifier = hex::encode(result.into_bytes()); + + (new_id, verifier) + }; + Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::NOT_FOUND) + .body(serde_json::to_string_pretty(&InvalidIdsPayload{ + invalid_ids, + id, + verifier, + })?.into()) + .map_err(BoxError::from)?, + context: req.context, + }) + } + } + CallbackPayload::Subscription(SubscriptionPayload::Complete { + errors, + .. + }) => { + if let Some(errors) = errors { + let mut handle = + notify.subscribe(id.clone()).await?.into_sink(); + handle.send_sync( + graphql::Response::builder().errors(errors).build(), + )?; + } + notify.force_delete(id).await?; + Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::ACCEPTED) + .body::("".into()) + .map_err(BoxError::from)?, + context: req.context, + }) + } + } + } + _ => Ok(router::Response { + response: http::Response::builder() + .status(StatusCode::METHOD_NOT_ALLOWED) + .body::("".into()) + .map_err(BoxError::from)?, + context: req.context, + }), + } + } + .instrument(tracing::info_span!("subscription_callback")), + ) + } +} + +pub(crate) fn create_verifier(sub_id: &str) -> Result { + let callback_hmac_key = SUBSCRIPTION_CALLBACK_HMAC_KEY + .get() + .ok_or("subscription callback hmac key is not available")?; + let mut mac = HmacSha256::new_from_slice(callback_hmac_key.as_bytes())?; + mac.update(sub_id.as_bytes()); + let result = mac.finalize(); + let verifier = hex::encode(result.into_bytes()); + + Ok(verifier) +} + +fn ensure_id_consistency( + context: &Context, + id_from_path: &str, + id_from_body: &str, +) -> Result<(), router::Response> { + (id_from_path != id_from_body) + .then(|| { + Err(router::Response { + response: http::Response::builder() + .status(StatusCode::BAD_REQUEST) + .body::("id from url path and id from body are different".into()) + .expect("this body is valid"), + context: context.clone(), + }) + }) + .unwrap_or_else(|| Ok(())) +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use futures::StreamExt; + use serde_json::Value; + use tower::util::BoxService; + use tower::Service; + use tower::ServiceExt; + + use super::*; + use crate::graphql::Request; + use crate::http_ext; + use crate::plugin::test::MockSubgraphService; + use crate::plugin::DynPlugin; + use crate::services::SubgraphRequest; + use crate::services::SubgraphResponse; + use crate::Notify; + + #[tokio::test(flavor = "multi_thread")] + async fn it_test_callback_endpoint() { + let mut notify = Notify::builder().build(); + let dyn_plugin: Box = crate::plugin::plugins() + .find(|factory| factory.name == APOLLO_SUBSCRIPTION_PLUGIN) + .expect("Plugin not found") + .create_instance( + &Value::from_str( + r#"{ + "mode": { + "preview_callback": { + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + "subgraphs": ["test"] + } + } + }"#, + ) + .unwrap(), + Default::default(), + notify.clone(), + ) + .await + .unwrap(); + + let http_req_prom = http::Request::get("http://localhost:4000/subscription/callback") + .body(Default::default()) + .unwrap(); + let mut web_endpoint = dyn_plugin + .web_endpoints() + .into_iter() + .next() + .unwrap() + .1 + .into_iter() + .next() + .unwrap() + .into_router(); + let resp = web_endpoint + .ready() + .await + .unwrap() + .call(http_req_prom) + .await + .unwrap(); + assert_eq!(resp.status(), http::StatusCode::NOT_FOUND); + let new_sub_id = uuid::Uuid::new_v4().to_string(); + let (handler, _created) = notify + .create_or_subscribe(new_sub_id.clone(), true) + .await + .unwrap(); + let verifier = create_verifier(&new_sub_id).unwrap(); + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Check { + id: new_sub_id.clone(), + verifier: verifier.clone(), + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::NO_CONTENT); + + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Next { + id: new_sub_id.clone(), + payload: graphql::Response::builder() + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build(), + verifier: verifier.clone(), + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::OK); + let mut handler = handler.into_stream(); + let msg = handler.next().await.unwrap(); + + assert_eq!( + msg, + graphql::Response::builder() + .subscribed(true) + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build() + ); + drop(handler); + + // Should answer NOT FOUND because I dropped the only existing handler and so no one is still listening to the sub + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Next { + id: new_sub_id.clone(), + payload: graphql::Response::builder() + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build(), + verifier: verifier.clone(), + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::NOT_FOUND); + + // Should answer NOT FOUND because I dropped the only existing handler and so no one is still listening to the sub + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription( + SubscriptionPayload::Heartbeat { + id: new_sub_id.clone(), + ids: vec![new_sub_id, "FAKE_SUB_ID".to_string()], + verifier: verifier.clone(), + }, + )) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::NOT_FOUND); + } + + #[tokio::test(flavor = "multi_thread")] + async fn it_test_callback_endpoint_with_bad_verifier() { + let mut notify = Notify::builder().build(); + let dyn_plugin: Box = crate::plugin::plugins() + .find(|factory| factory.name == APOLLO_SUBSCRIPTION_PLUGIN) + .expect("Plugin not found") + .create_instance( + &Value::from_str( + r#"{ + "mode": { + "preview_callback": { + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + "subgraphs": ["test"] + } + } + }"#, + ) + .unwrap(), + Default::default(), + notify.clone(), + ) + .await + .unwrap(); + + let http_req_prom = http::Request::get("http://localhost:4000/subscription/callback") + .body(Default::default()) + .unwrap(); + let mut web_endpoint = dyn_plugin + .web_endpoints() + .into_iter() + .next() + .unwrap() + .1 + .into_iter() + .next() + .unwrap() + .into_router(); + let resp = web_endpoint + .ready() + .await + .unwrap() + .call(http_req_prom) + .await + .unwrap(); + assert_eq!(resp.status(), http::StatusCode::NOT_FOUND); + let new_sub_id = uuid::Uuid::new_v4().to_string(); + let (_handler, _created) = notify + .create_or_subscribe(new_sub_id.clone(), true) + .await + .unwrap(); + let verifier = String::from("XXX"); + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Check { + id: new_sub_id.clone(), + verifier: verifier.clone(), + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::UNAUTHORIZED); + + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Next { + id: new_sub_id.clone(), + payload: graphql::Response::builder() + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build(), + verifier: verifier.clone(), + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::UNAUTHORIZED); + } + + #[tokio::test(flavor = "multi_thread")] + async fn it_test_callback_endpoint_with_complete_subscription() { + let mut notify = Notify::builder().build(); + let dyn_plugin: Box = crate::plugin::plugins() + .find(|factory| factory.name == APOLLO_SUBSCRIPTION_PLUGIN) + .expect("Plugin not found") + .create_instance( + &Value::from_str( + r#"{ + "mode": { + "preview_callback": { + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + "subgraphs": ["test"] + } + } + }"#, + ) + .unwrap(), + Default::default(), + notify.clone(), + ) + .await + .unwrap(); + + let http_req_prom = http::Request::get("http://localhost:4000/subscription/callback") + .body(Default::default()) + .unwrap(); + let mut web_endpoint = dyn_plugin + .web_endpoints() + .into_iter() + .next() + .unwrap() + .1 + .into_iter() + .next() + .unwrap() + .into_router(); + let resp = web_endpoint + .ready() + .await + .unwrap() + .call(http_req_prom) + .await + .unwrap(); + assert_eq!(resp.status(), http::StatusCode::NOT_FOUND); + let new_sub_id = uuid::Uuid::new_v4().to_string(); + let (handler, _created) = notify + .create_or_subscribe(new_sub_id.clone(), true) + .await + .unwrap(); + let verifier = create_verifier(&new_sub_id).unwrap(); + + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Check { + id: new_sub_id.clone(), + verifier: verifier.clone(), + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::NO_CONTENT); + + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Next { + id: new_sub_id.clone(), + payload: graphql::Response::builder() + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build(), + verifier: verifier.clone(), + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::OK); + let mut handler = handler.into_stream(); + let msg = handler.next().await.unwrap(); + + assert_eq!( + msg, + graphql::Response::builder() + .subscribed(true) + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build() + ); + + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription( + SubscriptionPayload::Complete { + id: new_sub_id.clone(), + errors: Some(vec![graphql::Error::builder() + .message("cannot complete the subscription") + .extension_code("SUBSCRIPTION_ERROR") + .build()]), + verifier: verifier.clone(), + }, + )) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::ACCEPTED); + let msg = handler.next().await.unwrap(); + + assert_eq!( + msg, + graphql::Response::builder() + .errors(vec![graphql::Error::builder() + .message("cannot complete the subscription") + .extension_code("SUBSCRIPTION_ERROR") + .build()]) + .build() + ); + + // Should answer NOT FOUND because we completed the sub + let http_req = http::Request::post(format!( + "http://localhost:4000/subscription/callback/{new_sub_id}" + )) + .body(hyper::Body::from( + serde_json::to_vec(&CallbackPayload::Subscription(SubscriptionPayload::Next { + id: new_sub_id.clone(), + payload: graphql::Response::builder() + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build(), + verifier, + })) + .unwrap(), + )) + .unwrap(); + let resp = web_endpoint.oneshot(http_req).await.unwrap(); + assert_eq!(resp.status(), http::StatusCode::NOT_FOUND); + } + + #[tokio::test(flavor = "multi_thread")] + async fn it_test_subgraph_service_with_subscription_disabled() { + let dyn_plugin: Box = crate::plugin::plugins() + .find(|factory| factory.name == APOLLO_SUBSCRIPTION_PLUGIN) + .expect("Plugin not found") + .create_instance( + &Value::from_str(r#"{}"#).unwrap(), + Default::default(), + Default::default(), + ) + .await + .unwrap(); + + let mut mock_subgraph_service = MockSubgraphService::new(); + mock_subgraph_service + .expect_call() + .times(0) + .returning(move |req: SubgraphRequest| { + Ok(SubgraphResponse::fake_builder() + .context(req.context) + .build()) + }); + + let mut subgraph_service = + dyn_plugin.subgraph_service("my_subgraph_name", BoxService::new(mock_subgraph_service)); + let subgraph_req = SubgraphRequest::fake_builder() + .subgraph_request( + http_ext::Request::fake_builder() + .body( + Request::fake_builder() + .query(String::from( + "subscription {\n userWasCreated {\n username\n }\n}", + )) + .build(), + ) + .build() + .unwrap(), + ) + .operation_kind(OperationKind::Subscription) + .build(); + let subgraph_response = subgraph_service + .ready() + .await + .unwrap() + .call(subgraph_req) + .await + .unwrap(); + + assert_eq!(subgraph_response.response.body(), &graphql::Response::builder().data(serde_json_bytes::Value::Null).error(graphql::Error::builder().message("cannot execute a subscription if it's not enabled in the configuration").extension_code("SUBSCRIPTION_DISABLED").build()).extensions(Object::default()).build()); + } + + #[test] + fn it_test_subscription_config() { + let config_with_callback: SubscriptionConfig = serde_json::from_value(serde_json::json!({ + "mode": { + "preview_callback": { + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + "subgraphs": ["test"] + } + } + })) + .unwrap(); + + let subgraph_cfg = config_with_callback.mode.get_subgraph_config("test"); + assert_eq!( + subgraph_cfg, + Some(SubscriptionMode::Callback( + serde_json::from_value::(serde_json::json!({ + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + "subgraphs": [] + })) + .unwrap() + )) + ); + + let config_with_callback_default: SubscriptionConfig = + serde_json::from_value(serde_json::json!({ + "mode": { + "preview_callback": { + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + } + } + })) + .unwrap(); + + let subgraph_cfg = config_with_callback_default + .mode + .get_subgraph_config("test"); + assert_eq!( + subgraph_cfg, + Some(SubscriptionMode::Callback( + serde_json::from_value::(serde_json::json!({ + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + "subgraphs": [] + })) + .unwrap() + )) + ); + + let config_with_passthrough: SubscriptionConfig = + serde_json::from_value(serde_json::json!({ + "mode": { + "passthrough": { + "subgraphs": { + "test": { + "path": "/ws", + } + } + } + } + })) + .unwrap(); + + let subgraph_cfg = config_with_passthrough.mode.get_subgraph_config("test"); + assert_eq!( + subgraph_cfg, + Some(SubscriptionMode::Passthrough( + serde_json::from_value::(serde_json::json!({ + "path": "/ws", + })) + .unwrap() + )) + ); + + let config_with_passthrough_override: SubscriptionConfig = + serde_json::from_value(serde_json::json!({ + "mode": { + "passthrough": { + "all": { + "path": "/wss", + "protocol": "graphql_transport_ws" + }, + "subgraphs": { + "test": { + "path": "/ws", + } + } + } + } + })) + .unwrap(); + + let subgraph_cfg = config_with_passthrough_override + .mode + .get_subgraph_config("test"); + assert_eq!( + subgraph_cfg, + Some(SubscriptionMode::Passthrough( + serde_json::from_value::(serde_json::json!({ + "path": "/ws", + "protocol": "graphql_ws" + })) + .unwrap() + )) + ); + + let config_with_passthrough_all: SubscriptionConfig = + serde_json::from_value(serde_json::json!({ + "mode": { + "passthrough": { + "all": { + "path": "/wss", + "protocol": "graphql_transport_ws" + }, + "subgraphs": { + "foo": { + "path": "/ws", + } + } + } + } + })) + .unwrap(); + + let subgraph_cfg = config_with_passthrough_all.mode.get_subgraph_config("test"); + assert_eq!( + subgraph_cfg, + Some(SubscriptionMode::Passthrough( + serde_json::from_value::(serde_json::json!({ + "path": "/wss", + "protocol": "graphql_transport_ws" + })) + .unwrap() + )) + ); + + let config_with_both_mode: SubscriptionConfig = serde_json::from_value(serde_json::json!({ + "mode": { + "preview_callback": { + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + }, + "passthrough": { + "subgraphs": { + "foo": { + "path": "/ws", + } + } + } + } + })) + .unwrap(); + + let subgraph_cfg = config_with_both_mode.mode.get_subgraph_config("test"); + assert_eq!( + subgraph_cfg, + Some(SubscriptionMode::Callback( + serde_json::from_value::(serde_json::json!({ + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + })) + .unwrap() + )) + ); + + let config_with_passthrough_precedence: SubscriptionConfig = + serde_json::from_value(serde_json::json!({ + "mode": { + "preview_callback": { + "public_url": "http://localhost:4000", + "path": "/subscription/callback", + }, + "passthrough": { + "all": { + "path": "/wss", + "protocol": "graphql_transport_ws" + }, + "subgraphs": { + "foo": { + "path": "/ws", + } + } + } + } + })) + .unwrap(); + + let subgraph_cfg = config_with_passthrough_precedence + .mode + .get_subgraph_config("test"); + assert_eq!( + subgraph_cfg, + Some(SubscriptionMode::Passthrough( + serde_json::from_value::(serde_json::json!({ + "path": "/wss", + "protocol": "graphql_transport_ws" + })) + .unwrap() + )) + ); + + let config_without_mode: SubscriptionConfig = + serde_json::from_value(serde_json::json!({})).unwrap(); + + let subgraph_cfg = config_without_mode.mode.get_subgraph_config("test"); + assert_eq!(subgraph_cfg, None); + } +} + +register_plugin!("apollo", "subscription", Subscription); diff --git a/apollo-router/src/plugins/telemetry/apollo.rs b/apollo-router/src/plugins/telemetry/apollo.rs index 329518d348..5ecef6cfdd 100644 --- a/apollo-router/src/plugins/telemetry/apollo.rs +++ b/apollo-router/src/plugins/telemetry/apollo.rs @@ -280,18 +280,25 @@ pub(crate) struct LicensedOperationCountByType { #[derive(Debug, Serialize, PartialEq, Eq, Hash, Clone, Copy)] #[serde(rename_all = "kebab-case")] pub(crate) enum OperationSubType { - // TODO + SubscriptionEvent, + SubscriptionRequest, } impl OperationSubType { pub(crate) const fn as_str(&self) -> &'static str { - "" + match self { + OperationSubType::SubscriptionEvent => "subscription-event", + OperationSubType::SubscriptionRequest => "subscription-request", + } } } impl Display for OperationSubType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "") + match self { + OperationSubType::SubscriptionEvent => write!(f, "subscription-event"), + OperationSubType::SubscriptionRequest => write!(f, "subscription-request"), + } } } diff --git a/apollo-router/src/plugins/telemetry/metrics/apollo.rs b/apollo-router/src/plugins/telemetry/metrics/apollo.rs index 0cb9baa92d..f03e526922 100644 --- a/apollo-router/src/plugins/telemetry/metrics/apollo.rs +++ b/apollo-router/src/plugins/telemetry/metrics/apollo.rs @@ -73,7 +73,9 @@ mod test { use crate::plugins::telemetry::apollo::ENDPOINT_DEFAULT; use crate::plugins::telemetry::apollo_exporter::Sender; use crate::plugins::telemetry::Telemetry; + use crate::plugins::telemetry::OPERATION_KIND; use crate::plugins::telemetry::STUDIO_EXCLUDE; + use crate::query_planner::OperationKind; use crate::services::SupergraphRequest; use crate::Context; use crate::TestHarness; @@ -115,6 +117,23 @@ mod test { Ok(()) } + #[tokio::test(flavor = "multi_thread")] + async fn apollo_metrics_for_subscription() -> Result<(), BoxError> { + let query = "subscription {userWasCreated{name}}"; + let context = Context::new(); + let _ = context + .insert(OPERATION_KIND, OperationKind::Subscription) + .unwrap(); + let results = get_metrics_for_request(query, None, Some(context)).await?; + let mut settings = insta::Settings::clone_current(); + settings.set_sort_maps(true); + settings.add_redaction("[].request_id", "[REDACTED]"); + settings.bind(|| { + insta::assert_json_snapshot!(results); + }); + Ok(()) + } + #[tokio::test(flavor = "multi_thread")] async fn apollo_metrics_multiple_operations() -> Result<(), BoxError> { let query = "query {topProducts{name}} query {topProducts{name}}"; @@ -248,7 +267,7 @@ mod test { async fn create_plugin_with_apollo_config( apollo_config: apollo::Config, ) -> Result { - Telemetry::new(PluginInit::new( + Telemetry::new(PluginInit::fake_new( config::Conf { logging: Default::default(), metrics: None, diff --git a/apollo-router/src/plugins/telemetry/metrics/apollo/studio.rs b/apollo-router/src/plugins/telemetry/metrics/apollo/studio.rs index e5de294ef0..d7a85a5cd8 100644 --- a/apollo-router/src/plugins/telemetry/metrics/apollo/studio.rs +++ b/apollo-router/src/plugins/telemetry/metrics/apollo/studio.rs @@ -277,7 +277,6 @@ mod test { let metric_1 = create_test_metric("client_1", "version_1", "report_key_1"); let metric_2 = create_test_metric("client_1", "version_1", "report_key_1"); let aggregated_metrics = Report::new(vec![metric_1, metric_2]); - insta::with_settings!({sort_maps => true}, { insta::assert_json_snapshot!(aggregated_metrics); }); diff --git a/apollo-router/src/plugins/telemetry/metrics/snapshots/apollo_router__plugins__telemetry__metrics__apollo__test__apollo_metrics_for_subscription.snap b/apollo-router/src/plugins/telemetry/metrics/snapshots/apollo_router__plugins__telemetry__metrics__apollo__test__apollo_metrics_for_subscription.snap new file mode 100644 index 0000000000..c50e070b89 --- /dev/null +++ b/apollo-router/src/plugins/telemetry/metrics/snapshots/apollo_router__plugins__telemetry__metrics__apollo__test__apollo_metrics_for_subscription.snap @@ -0,0 +1,61 @@ +--- +source: apollo-router/src/plugins/telemetry/metrics/apollo.rs +expression: results +--- +[ + { + "request_id": "[REDACTED]", + "stats": { + "# -\nsubscription{userWasCreated{name}}": { + "stats_with_context": { + "context": { + "client_name": "test_client", + "client_version": "1.0-test", + "operation_type": "subscription", + "operation_subtype": "subscription-request" + }, + "query_latency_stats": { + "latency": { + "secs": 0, + "nanos": 100000000 + }, + "cache_hit": false, + "persisted_query_hit": null, + "cache_latency": null, + "root_error_stats": { + "children": {}, + "errors_count": 0, + "requests_with_errors_count": 0 + }, + "has_errors": true, + "public_cache_ttl_latency": null, + "private_cache_ttl_latency": null, + "registered_operation": false, + "forbidden_operation": false, + "without_field_instrumentation": false + }, + "per_type_stat": {} + }, + "referenced_fields_by_type": { + "Subscription": { + "field_names": [ + "userWasCreated" + ], + "is_interface": false + }, + "User": { + "field_names": [ + "name" + ], + "is_interface": false + } + } + } + }, + "licensed_operation_count_by_type": { + "type": "subscription", + "subtype": "subscription-request", + "licensed_operation_count": 1 + } + } +] diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 02204506df..daa4faf2c7 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -66,6 +66,7 @@ use self::reload::reload_fmt; use self::reload::reload_metrics; use self::reload::NullFieldFormatter; use self::reload::OPENTELEMETRY_TRACER_HANDLE; +use self::tracing::apollo_telemetry::APOLLO_PRIVATE_DURATION_NS; use self::tracing::reload::ReloadTracer; use crate::layers::ServiceBuilderExt; use crate::plugin::Plugin; @@ -126,7 +127,9 @@ pub(crate) const EXECUTION_SPAN_NAME: &str = "execution"; const CLIENT_NAME: &str = "apollo_telemetry::client_name"; const CLIENT_VERSION: &str = "apollo_telemetry::client_version"; const SUBGRAPH_FTV1: &str = "apollo_telemetry::subgraph_ftv1"; -const OPERATION_KIND: &str = "apollo_telemetry::operation_kind"; +pub(crate) const OPERATION_KIND: &str = "apollo_telemetry::operation_kind"; +pub(crate) const GRAPHQL_OPERATION_NAME_CONTEXT_KEY: &str = + "apollo_telemetry::graphql_operation_name"; pub(crate) const STUDIO_EXCLUDE: &str = "apollo_telemetry::studio::exclude"; pub(crate) const LOGGING_DISPLAY_HEADERS: &str = "apollo_telemetry::logging::display_headers"; pub(crate) const LOGGING_DISPLAY_BODY: &str = "apollo_telemetry::logging::display_body"; @@ -270,7 +273,7 @@ impl Plugin for Telemetry { let response: Result = fut.await; span.record( - "apollo_private.duration_ns", + APOLLO_PRIVATE_DURATION_NS, start.elapsed().as_nanos() as i64, ); @@ -672,6 +675,11 @@ impl Telemetry { .operation_name .as_deref() .unwrap_or_default(); + if let Some(operation_name) = &http_request.body().operation_name { + let _ = request + .context + .insert(GRAPHQL_OPERATION_NAME_CONTEXT_KEY, operation_name.clone()); + } let span = info_span!( SUPERGRAPH_SPAN_NAME, @@ -1079,6 +1087,8 @@ impl Telemetry { match result { Err(e) => { if !matches!(sender, Sender::Noop) { + let operation_subtype = (operation_kind == OperationKind::Subscription) + .then_some(OperationSubType::SubscriptionRequest); Self::update_apollo_metrics( ctx, field_level_instrumentation_ratio, @@ -1086,7 +1096,7 @@ impl Telemetry { true, start.elapsed(), operation_kind, - None, + operation_subtype, ); } let mut metric_attrs = Vec::new(); @@ -1116,29 +1126,60 @@ impl Telemetry { } Ok(router_response) => { let mut has_errors = !router_response.response.status().is_success(); - + if operation_kind == OperationKind::Subscription { + Self::update_apollo_metrics( + ctx, + field_level_instrumentation_ratio, + sender.clone(), + has_errors, + start.elapsed(), + operation_kind, + Some(OperationSubType::SubscriptionRequest), + ); + } Ok(router_response.map(move |response_stream| { let sender = sender.clone(); let ctx = ctx.clone(); response_stream - .map(move |response| { + .enumerate() + .map(move |(idx, response)| { if !response.errors.is_empty() { has_errors = true; } - if !response.has_next.unwrap_or(false) - && !matches!(sender, Sender::Noop) - { - Self::update_apollo_metrics( - &ctx, - field_level_instrumentation_ratio, - sender.clone(), - has_errors, - start.elapsed(), - operation_kind, - None, - ); + if !matches!(sender, Sender::Noop) { + if operation_kind == OperationKind::Subscription { + // Don't send for the first empty response because it's a heartbeat + if idx != 0 { + // Only for subscription events + Self::update_apollo_metrics( + &ctx, + field_level_instrumentation_ratio, + sender.clone(), + has_errors, + response + .created_at + .map(|c| c.elapsed()) + .unwrap_or_else(|| start.elapsed()), + operation_kind, + Some(OperationSubType::SubscriptionEvent), + ); + } + } else { + // If it's the last response + if !response.has_next.unwrap_or(false) { + Self::update_apollo_metrics( + &ctx, + field_level_instrumentation_ratio, + sender.clone(), + has_errors, + start.elapsed(), + operation_kind, + None, + ); + } + } } response @@ -1665,6 +1706,7 @@ mod tests { .create_instance( &serde_json::json!({"apollo": {"schema_id":"abc"}, "tracing": {}}), Default::default(), + Default::default(), ) .await .unwrap(); @@ -1813,6 +1855,7 @@ mod tests { } }), Default::default(), + Default::default(), ) .await .unwrap(); @@ -1981,6 +2024,7 @@ mod tests { ) .unwrap(), Default::default(), + Default::default(), ) .await .unwrap(); @@ -2262,6 +2306,7 @@ mod tests { ) .unwrap(), Default::default(), + Default::default(), ) .await .unwrap(); diff --git a/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs b/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs index c0e913001c..5a6ac3db25 100644 --- a/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs +++ b/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs @@ -31,8 +31,10 @@ use crate::axum_factory::utils::REQUEST_SPAN_NAME; use crate::plugins::telemetry; use crate::plugins::telemetry::apollo::ErrorConfiguration; use crate::plugins::telemetry::apollo::ErrorsConfiguration; +use crate::plugins::telemetry::apollo::OperationSubType; use crate::plugins::telemetry::apollo::SingleReport; use crate::plugins::telemetry::apollo_exporter::proto; +use crate::plugins::telemetry::apollo_exporter::proto::reports::trace::http::Method; use crate::plugins::telemetry::apollo_exporter::proto::reports::trace::http::Values; use crate::plugins::telemetry::apollo_exporter::proto::reports::trace::query_plan_node::ConditionNode; use crate::plugins::telemetry::apollo_exporter::proto::reports::trace::query_plan_node::DeferNode; @@ -58,6 +60,8 @@ use crate::plugins::telemetry::EXECUTION_SPAN_NAME; use crate::plugins::telemetry::ROUTER_SPAN_NAME; use crate::plugins::telemetry::SUBGRAPH_SPAN_NAME; use crate::plugins::telemetry::SUPERGRAPH_SPAN_NAME; +use crate::query_planner::subscription::SUBSCRIPTION_EVENT_SPAN_NAME; +use crate::query_planner::OperationKind; use crate::query_planner::CONDITION_ELSE_SPAN_NAME; use crate::query_planner::CONDITION_IF_SPAN_NAME; use crate::query_planner::CONDITION_SPAN_NAME; @@ -69,7 +73,8 @@ use crate::query_planner::FLATTEN_SPAN_NAME; use crate::query_planner::PARALLEL_SPAN_NAME; use crate::query_planner::SEQUENCE_SPAN_NAME; -const APOLLO_PRIVATE_DURATION_NS: Key = Key::from_static_str("apollo_private.duration_ns"); +pub(crate) const APOLLO_PRIVATE_DURATION_NS: &str = "apollo_private.duration_ns"; +const APOLLO_PRIVATE_DURATION_NS_KEY: Key = Key::from_static_str(APOLLO_PRIVATE_DURATION_NS); const APOLLO_PRIVATE_SENT_TIME_OFFSET: Key = Key::from_static_str("apollo_private.sent_time_offset"); const APOLLO_PRIVATE_GRAPHQL_VARIABLES: Key = @@ -144,8 +149,10 @@ pub(crate) struct Exporter { errors_configuration: ErrorsConfiguration, } +#[derive(Debug)] enum TreeData { Request(Result, Error>), + SubscriptionEvent(Result, Error>), Router { http: Box, client_name: Option, @@ -211,7 +218,7 @@ impl Exporter { duration_ns: 0, root: None, details: None, - http: Some(http), + http: (http.method != Method::Unknown as i32).then_some(http), ..Default::default() }; @@ -226,12 +233,14 @@ impl Exporter { client_version, duration_ns, } => { - let root_http = root_trace - .http - .as_mut() - .expect("http was extracted earlier, qed"); - root_http.request_headers = http.request_headers; - root_http.response_headers = http.response_headers; + if http.method != Method::Unknown as i32 { + let root_http = root_trace + .http + .as_mut() + .expect("http was extracted earlier, qed"); + root_http.request_headers = http.request_headers; + root_http.response_headers = http.response_headers; + } root_trace.client_name = client_name.unwrap_or_default(); root_trace.client_version = client_version.unwrap_or_default(); root_trace.duration_ns = duration_ns; @@ -249,9 +258,24 @@ impl Exporter { }); } TreeData::Execution(operation_type) => { + if operation_type == OperationKind::Subscription.as_apollo_operation_type() { + root_trace.operation_subtype = if root_trace.http.is_some() { + OperationSubType::SubscriptionRequest.to_string() + } else { + OperationSubType::SubscriptionEvent.to_string() + }; + } root_trace.operation_type = operation_type; } - _ => panic!("should never have had other node types"), + TreeData::Trace(_) => { + continue; + } + other => { + tracing::error!( + "should never have had other node types, current type is: {other:?}" + ); + return Err(Error::TraceParsingFailed); + } } } @@ -262,13 +286,14 @@ impl Exporter { self.extract_data_from_spans(&span)? .pop() .and_then(|node| { - if let TreeData::Request(trace) = node { - Some(trace) - } else { - None + match node { + TreeData::Request(trace) | TreeData::SubscriptionEvent(trace) => { + Some(trace) + } + _ => None } }) - .expect("root trace must exist because it is constructed on the request span, qed") + .expect("root trace must exist because it is constructed on the request or subscription_event span, qed") } fn extract_data_from_spans(&mut self, span: &LightSpanData) -> Result, Error> { @@ -401,7 +426,7 @@ impl Exporter { .and_then(extract_string), duration_ns: span .attributes - .get(&APOLLO_PRIVATE_DURATION_NS) + .get(&APOLLO_PRIVATE_DURATION_NS_KEY) .and_then(extract_i64) .map(|e| e as u64) .unwrap_or_default(), @@ -481,6 +506,48 @@ impl Exporter { )); child_nodes } + SUBSCRIPTION_EVENT_SPAN_NAME => { + // To put the duration + child_nodes.push(TreeData::Router { + http: Box::new(extract_http_data(span)), + client_name: span.attributes.get(&CLIENT_NAME).and_then(extract_string), + client_version: span + .attributes + .get(&CLIENT_VERSION) + .and_then(extract_string), + duration_ns: span + .attributes + .get(&APOLLO_PRIVATE_DURATION_NS_KEY) + .and_then(extract_i64) + .map(|e| e as u64) + .unwrap_or_default(), + }); + + // To put the signature and operation name + child_nodes.push(TreeData::Supergraph { + operation_signature: span + .attributes + .get(&APOLLO_PRIVATE_OPERATION_SIGNATURE) + .and_then(extract_string) + .unwrap_or_default(), + operation_name: span + .attributes + .get(&OPERATION_NAME) + .and_then(extract_string) + .unwrap_or_default(), + variables_json: HashMap::new(), + }); + + child_nodes.push(TreeData::Execution( + OperationKind::Subscription + .as_apollo_operation_type() + .to_string(), + )); + + vec![TreeData::SubscriptionEvent( + self.extract_root_trace(span, child_nodes), + )] + } _ => child_nodes, }) } @@ -629,7 +696,7 @@ impl SpanExporter for Exporter { // We may get spans that simply don't complete. These need to be cleaned up after a period. It's the price of using ftv1. let mut traces: Vec<(String, proto::reports::Trace)> = Vec::new(); for span in batch { - if span.name == REQUEST_SPAN_NAME { + if span.name == REQUEST_SPAN_NAME || span.name == SUBSCRIPTION_EVENT_SPAN_NAME { match self.extract_trace(span.into()) { Ok(mut trace) => { let mut operation_signature = Default::default(); @@ -665,6 +732,7 @@ impl SpanExporter for Exporter { .push(len, span.into()); } } + tracing::info!(value.apollo_router_span_lru_size = self.spans_by_parent_id.len() as u64,); let mut report = telemetry::apollo::Report::default(); report += SingleReport::Traces(TracesReport { traces }); let exporter = self.report_exporter.clone(); @@ -782,6 +850,7 @@ mod test { for t in tree_data { match t { TreeData::Request(_) => elements.push("request"), + TreeData::SubscriptionEvent(_) => elements.push("subscription_event"), TreeData::Supergraph { .. } => elements.push("supergraph"), TreeData::QueryPlanNode(_) => elements.push("query_plan_node"), TreeData::DeferPrimary(_) => elements.push("defer_primary"), diff --git a/apollo-router/src/plugins/traffic_shaping/mod.rs b/apollo-router/src/plugins/traffic_shaping/mod.rs index 09c954ec02..f183edf0f4 100644 --- a/apollo-router/src/plugins/traffic_shaping/mod.rs +++ b/apollo-router/src/plugins/traffic_shaping/mod.rs @@ -754,7 +754,7 @@ mod test { ) .unwrap(); - let shaping_config = TrafficShaping::new(PluginInit::new(config, Default::default())) + let shaping_config = TrafficShaping::new(PluginInit::fake_builder().config(config).build()) .await .unwrap(); diff --git a/apollo-router/src/protocols/mod.rs b/apollo-router/src/protocols/mod.rs new file mode 100644 index 0000000000..c93950fdf2 --- /dev/null +++ b/apollo-router/src/protocols/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod multipart; +pub(crate) mod websocket; diff --git a/apollo-router/src/protocols/multipart.rs b/apollo-router/src/protocols/multipart.rs new file mode 100644 index 0000000000..b617c5ff50 --- /dev/null +++ b/apollo-router/src/protocols/multipart.rs @@ -0,0 +1,208 @@ +use std::pin::Pin; +use std::task::Poll; +use std::time::Duration; + +use bytes::Bytes; +use futures::stream::select; +use futures::stream::StreamExt; +use futures::Stream; +use serde::Serialize; +use serde_json_bytes::Value; +use tokio_stream::wrappers::IntervalStream; + +use crate::graphql; + +#[cfg(test)] +const HEARTBEAT_INTERVAL: Duration = Duration::from_millis(10); +#[cfg(not(test))] +const HEARTBEAT_INTERVAL: Duration = Duration::from_secs(5); + +#[derive(thiserror::Error, Debug)] +pub(crate) enum Error { + #[error("serialization error")] + SerdeError(#[from] serde_json::Error), +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) enum ProtocolMode { + Subscription, + Defer, +} + +#[derive(Clone, Debug, Serialize)] +struct SubscriptionPayload { + payload: Option, + #[serde(skip_serializing_if = "Vec::is_empty")] + errors: Vec, +} + +pub(crate) struct Multipart { + stream: Pin> + Send>>, + is_first_chunk: bool, + is_terminated: bool, + mode: ProtocolMode, +} + +impl Multipart { + pub(crate) fn new(stream: S, mode: ProtocolMode) -> Self + where + S: Stream + Send + 'static, + { + let stream = match mode { + ProtocolMode::Subscription => select( + stream.map(Some), + IntervalStream::new(tokio::time::interval(HEARTBEAT_INTERVAL)).map(|_| None), + ) + .boxed(), + ProtocolMode::Defer => stream.map(Some).boxed(), + }; + + Self { + stream, + is_first_chunk: true, + is_terminated: false, + mode, + } + } +} + +impl Stream for Multipart { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + if self.is_terminated { + return Poll::Ready(None); + } + match self.stream.as_mut().poll_next(cx) { + Poll::Ready(message) => match message { + Some(None) => { + // It's the ticker for heartbeat for subscription + let buf = if self.is_first_chunk { + self.is_first_chunk = false; + Bytes::from_static( + &b"\r\n--graphql\r\ncontent-type: application/json\r\n\r\n{}\r\n--graphql\r\n"[..] + ) + } else { + Bytes::from_static( + &b"content-type: application/json\r\n\r\n{}\r\n--graphql\r\n"[..], + ) + }; + + Poll::Ready(Some(Ok(buf))) + } + Some(Some(mut response)) => { + let mut buf = if self.is_first_chunk { + self.is_first_chunk = false; + Vec::from(&b"\r\n--graphql\r\ncontent-type: application/json\r\n\r\n"[..]) + } else { + Vec::from(&b"content-type: application/json\r\n\r\n"[..]) + }; + let is_still_open = + response.has_next.unwrap_or(false) || response.subscribed.unwrap_or(false); + match self.mode { + ProtocolMode::Subscription => { + let resp = SubscriptionPayload { + errors: if is_still_open { + Vec::new() + } else { + response.errors.drain(..).collect() + }, + payload: match response.data { + None | Some(Value::Null) => None, + _ => response.into(), + }, + }; + + serde_json::to_writer(&mut buf, &resp)?; + } + ProtocolMode::Defer => { + serde_json::to_writer(&mut buf, &response)?; + } + } + + if is_still_open { + buf.extend_from_slice(b"\r\n--graphql\r\n"); + } else { + self.is_terminated = true; + buf.extend_from_slice(b"\r\n--graphql--\r\n"); + } + + Poll::Ready(Some(Ok(buf.into()))) + } + None => Poll::Ready(None), + }, + Poll::Pending => Poll::Pending, + } + } +} + +#[cfg(test)] +mod tests { + use futures::stream; + use serde_json_bytes::ByteString; + + use super::*; + + // TODO add test with empty stream + + #[tokio::test] + async fn test_heartbeat_and_boundaries() { + let responses = vec![ + graphql::Response::builder() + .data(serde_json_bytes::Value::String(ByteString::from( + String::from("foo"), + ))) + .subscribed(true) + .build(), + graphql::Response::builder() + .data(serde_json_bytes::Value::String(ByteString::from( + String::from("bar"), + ))) + .subscribed(true) + .build(), + graphql::Response::builder() + .data(serde_json_bytes::Value::String(ByteString::from( + String::from("foobar"), + ))) + .build(), + ]; + let gql_responses = stream::iter(responses); + + let mut protocol = Multipart::new(gql_responses, ProtocolMode::Subscription); + let heartbeat = String::from( + "\r\n--graphql\r\ncontent-type: application/json\r\n\r\n{}\r\n--graphql\r\n", + ); + let mut curr_index = 0; + while let Some(resp) = protocol.next().await { + let res = String::from_utf8(resp.unwrap().to_vec()).unwrap(); + if res == heartbeat { + continue; + } else { + match curr_index { + 0 => { + assert_eq!(res, "\r\n--graphql\r\ncontent-type: application/json\r\n\r\n{\"payload\":{\"data\":\"foo\"}}\r\n--graphql\r\n"); + } + 1 => { + assert_eq!( + res, + "content-type: application/json\r\n\r\n{\"payload\":{\"data\":\"bar\"}}\r\n--graphql\r\n" + ); + } + 2 => { + assert_eq!( + res, + "content-type: application/json\r\n\r\n{\"payload\":{\"data\":\"foobar\"}}\r\n--graphql--\r\n" + ); + } + _ => { + panic!("should not happened, test failed"); + } + } + curr_index += 1; + } + } + } +} diff --git a/apollo-router/src/protocols/websocket.rs b/apollo-router/src/protocols/websocket.rs new file mode 100644 index 0000000000..d8fe3778db --- /dev/null +++ b/apollo-router/src/protocols/websocket.rs @@ -0,0 +1,815 @@ +use std::pin::Pin; +use std::task::Poll; +use std::time::Duration; + +use futures::future; +use futures::Future; +use futures::Sink; +use futures::SinkExt; +use futures::Stream; +use futures::StreamExt; +use http::HeaderValue; +use pin_project_lite::pin_project; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use serde_json_bytes::Value; +use tokio::io::AsyncRead; +use tokio::io::AsyncWrite; +use tokio_tungstenite::tungstenite::protocol::frame::coding::CloseCode; +use tokio_tungstenite::tungstenite::protocol::CloseFrame; +use tokio_tungstenite::tungstenite::Message; +use tokio_tungstenite::WebSocketStream; + +use crate::graphql; + +const CONNECTION_ACK_TIMEOUT: Duration = Duration::from_secs(5); + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize, JsonSchema, Copy)] +#[serde(rename_all = "snake_case")] +pub(crate) enum WebSocketProtocol { + // New one + GraphqlWs, + #[serde(rename = "graphql_transport_ws")] + // Old one + SubscriptionsTransportWs, +} + +impl Default for WebSocketProtocol { + fn default() -> Self { + Self::GraphqlWs + } +} + +impl From for HeaderValue { + fn from(value: WebSocketProtocol) -> Self { + match value { + WebSocketProtocol::GraphqlWs => HeaderValue::from_static("graphql-transport-ws"), + WebSocketProtocol::SubscriptionsTransportWs => HeaderValue::from_static("graphql-ws"), + } + } +} + +impl WebSocketProtocol { + fn subscribe(&self, id: String, payload: graphql::Request) -> ClientMessage { + match self { + // old + WebSocketProtocol::SubscriptionsTransportWs => ClientMessage::OldStart { id, payload }, + // new + WebSocketProtocol::GraphqlWs => ClientMessage::Subscribe { id, payload }, + } + } + + fn complete(&self, id: String) -> ClientMessage { + match self { + // old + WebSocketProtocol::SubscriptionsTransportWs => ClientMessage::OldStop { id }, + // new + WebSocketProtocol::GraphqlWs => ClientMessage::Complete { id }, + } + } +} + +/// A websocket message received from the client +#[derive(Deserialize, Serialize, Debug)] +#[serde(tag = "type", rename_all = "snake_case")] +#[allow(clippy::large_enum_variant)] // Request is at fault +pub(crate) enum ClientMessage { + /// A new connection + ConnectionInit { + /// Optional init payload from the client + payload: Option, + }, + /// The start of a Websocket subscription + Subscribe { + /// Message ID + id: String, + /// The GraphQL Request - this can be modified by protocol implementors + /// to add files uploads. + payload: graphql::Request, + }, + #[serde(rename = "start")] + /// For old protocol + OldStart { + /// Message ID + id: String, + /// The GraphQL Request - this can be modified by protocol implementors + /// to add files uploads. + payload: graphql::Request, + }, + /// The end of a Websocket subscription + Complete { + /// Message ID + id: String, + }, + /// For old protocol + #[serde(rename = "stop")] + OldStop { + /// Message ID + id: String, + }, + /// Connection terminated by the client + ConnectionTerminate, + /// Useful for detecting failed connections, displaying latency metrics or + /// other types of network probing. + /// + /// Reference: + Ping { + /// Additional details about the ping. + #[serde(skip_serializing_if = "Option::is_none")] + payload: Option, + }, + /// The response to the Ping message. + /// + /// Reference: + Pong { + /// Additional details about the pong. + #[serde(skip_serializing_if = "Option::is_none")] + payload: Option, + }, +} + +#[derive(Deserialize, Serialize, Debug)] +#[serde(tag = "type", rename_all = "snake_case")] +pub(crate) enum ServerMessage { + ConnectionAck, + /// subscriptions-transport-ws protocol alias for next payload + #[serde(alias = "data")] + /// graphql-ws protocol next payload + Next { + id: String, + payload: graphql::Response, + }, + #[serde(alias = "connection_error")] + Error { + id: String, + payload: ServerError, + }, + Complete { + id: String, + }, + #[serde(alias = "ka")] + KeepAlive, + /// The response to the Ping message. + /// + /// https://github.com/enisdenjo/graphql-ws/blob/master/PROTOCOL.md#pong + Pong { + payload: Option, + }, + Ping { + payload: Option, + }, +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(untagged)] +pub(crate) enum ServerError { + Error(graphql::Error), + Errors(Vec), +} + +impl From for Vec { + fn from(value: ServerError) -> Self { + match value { + ServerError::Error(e) => vec![e], + ServerError::Errors(e) => e, + } + } +} + +impl ServerMessage { + fn into_graphql_response(self) -> (Option, bool) { + match self { + ServerMessage::Next { id: _, mut payload } => { + payload.subscribed = Some(true); + (Some(payload), false) + } + ServerMessage::Error { id: _, payload } => ( + Some( + graphql::Response::builder() + .errors(payload.into()) + .subscribed(false) + .build(), + ), + true, + ), + ServerMessage::Complete { .. } => (None, true), + ServerMessage::ConnectionAck | ServerMessage::Pong { .. } => (None, false), + ServerMessage::Ping { .. } => (None, false), + ServerMessage::KeepAlive => (None, false), + } + } + + fn id(&self) -> Option { + match self { + ServerMessage::ConnectionAck + | ServerMessage::KeepAlive + | ServerMessage::Ping { .. } + | ServerMessage::Pong { .. } => None, + ServerMessage::Next { id, .. } + | ServerMessage::Error { id, .. } + | ServerMessage::Complete { id } => Some(id.to_string()), + } + } +} + +pin_project! { +pub(crate) struct GraphqlWebSocket { + #[pin] + stream: S, + id: String, + protocol: WebSocketProtocol, + // Booleans for state machine when closing the stream + completed: bool, + terminated: bool, +} +} + +impl GraphqlWebSocket +where + S: Stream> + Sink + std::marker::Unpin, +{ + pub(crate) async fn new( + mut stream: S, + id: String, + protocol: WebSocketProtocol, + connection_params: Option, + ) -> Result { + let connection_init_msg = match connection_params { + Some(connection_params) => ClientMessage::ConnectionInit { + payload: Some(serde_json_bytes::json!({ + "connectionParams": connection_params + })), + }, + None => ClientMessage::ConnectionInit { payload: None }, + }; + stream.send(connection_init_msg).await.map_err(|_err| { + graphql::Error::builder() + .message("cannot send connection init through websocket connection") + .extension_code("WEBSOCKET_INIT_ERROR") + .build() + })?; + + let resp = tokio::time::timeout(CONNECTION_ACK_TIMEOUT, stream.next()) + .await + .map_err(|_| { + graphql::Error::builder() + .message("cannot receive connection ack from websocket connection") + .extension_code("WEBSOCKET_ACK_ERROR_TIMEOUT") + .build() + })?; + if !matches!(resp, Some(Ok(ServerMessage::ConnectionAck))) { + return Err(graphql::Error::builder() + .message("didn't receive the connection ack from websocket connection") + .extension_code("WEBSOCKET_ACK_ERROR") + .build()); + } + + Ok(Self { + stream, + id, + protocol, + completed: false, + terminated: false, + }) + } +} + +#[derive(thiserror::Error, Debug)] +pub(crate) enum Error { + #[error("websocket error")] + WebSocketError(#[from] tokio_tungstenite::tungstenite::Error), + #[error("deserialization/serialization error")] + SerdeError(#[from] serde_json::Error), +} + +pub(crate) fn convert_websocket_stream( + stream: WebSocketStream, + id: String, +) -> impl Stream> + Sink +where + T: AsyncRead + AsyncWrite + Unpin, +{ + stream + .with(|client_message: ClientMessage| { + // It applies to the Sink + future::ready(match serde_json::to_string(&client_message) { + Ok(client_message_str) => Ok(Message::Text(client_message_str)), + Err(err) => Err(Error::SerdeError(err)), + }) + }) + .map(move |msg| match msg { + // It applies to the Stream + Ok(Message::Text(text)) => serde_json::from_str(&text), + Ok(Message::Binary(bin)) => serde_json::from_slice(&bin), + Ok(Message::Ping(payload)) => Ok(ServerMessage::Ping { + payload: serde_json::from_slice(&payload).ok(), + }), + Ok(Message::Pong(payload)) => Ok(ServerMessage::Pong { + payload: serde_json::from_slice(&payload).ok(), + }), + Ok(Message::Close(None)) => Ok(ServerMessage::Complete { id: id.to_string() }), + Ok(Message::Close(Some(CloseFrame{ code, reason }))) => { + if code == CloseCode::Normal { + Ok(ServerMessage::Complete { id: id.to_string() }) + } else { + Ok(ServerMessage::Error { + id: id.to_string(), + payload: ServerError::Error( + graphql::Error::builder() + .message(format!("websocket connection has been closed with error code '{code}' and reason '{reason}'")) + .extension_code("WEBSOCKET_CLOSE_ERROR") + .build(), + ), + }) + } + } + Ok(Message::Frame(frame)) => serde_json::from_slice(frame.payload()), + Err(err) => { + tracing::error!("cannot consume more message on websocket stream: {err:?}"); + + Ok(ServerMessage::Error { + id: id.to_string(), + payload: ServerError::Error( + graphql::Error::builder() + .message("cannot read message from websocket") + .extension_code("WEBSOCKET_MESSAGE_ERROR") + .build(), + ), + }) + } + }) +} + +impl Stream for GraphqlWebSocket +where + S: Stream> + Sink, +{ + type Item = graphql::Response; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let mut this = self.as_mut().project(); + + match Pin::new(&mut this.stream).poll_next(cx) { + Poll::Ready(message) => match message { + Some(server_message) => match server_message { + Ok(server_message) => { + if let Some(id) = &server_message.id() { + if this.id != id { + tracing::error!("we should not receive data from other subscriptions, closing the stream"); + return Poll::Ready(None); + } + } + if let ServerMessage::Ping { .. } = server_message { + // Send pong asynchronously + let _ = Pin::new( + &mut Pin::new(&mut this.stream) + .send(ClientMessage::Pong { payload: None }), + ) + .poll(cx); + } + match server_message.into_graphql_response() { + (None, true) => Poll::Ready(None), + // For ignored message like ACK, Ping, Pong, etc... + (None, false) => self.poll_next(cx), + (Some(resp), _) => Poll::Ready(Some(resp)), + } + } + Err(err) => Poll::Ready( + graphql::Response::builder() + .error( + graphql::Error::builder() + .message(format!( + "cannot deserialize websocket server message: {err:?}" + )) + .extension_code("INVALID_WEBSOCKET_SERVER_MESSAGE_FORMAT") + .build(), + ) + .build() + .into(), + ), + }, + None => Poll::Ready(None), + }, + Poll::Pending => Poll::Pending, + } + } +} + +impl Sink for GraphqlWebSocket +where + S: Stream> + Sink, +{ + type Error = graphql::Error; + + fn poll_ready( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let mut this = self.project(); + + match Pin::new(&mut this.stream).poll_ready(cx) { + Poll::Ready(Ok(_)) => Poll::Ready(Ok(())), + Poll::Ready(Err(_err)) => Poll::Ready(Err("websocket connection error")), + Poll::Pending => Poll::Pending, + } + .map_err(|err| { + graphql::Error::builder() + .message(format!("cannot establish websocket connection: {err}")) + .extension_code("WEBSOCKET_CONNECTION_ERROR") + .build() + }) + } + + fn start_send(self: Pin<&mut Self>, item: graphql::Request) -> Result<(), Self::Error> { + let mut this = self.project(); + + Pin::new(&mut this.stream) + .start_send(this.protocol.subscribe(this.id.to_string(), item)) + .map_err(|_err| { + graphql::Error::builder() + .message("cannot send to websocket connection") + .extension_code("WEBSOCKET_CONNECTION_ERROR") + .build() + }) + } + + fn poll_flush( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let mut this = self.project(); + Pin::new(&mut this.stream).poll_flush(cx).map_err(|_err| { + graphql::Error::builder() + .message("cannot flush to websocket connection") + .extension_code("WEBSOCKET_CONNECTION_ERROR") + .build() + }) + } + + fn poll_close( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let mut this = self.project(); + if !*this.completed { + match Pin::new( + &mut Pin::new(&mut this.stream).send(this.protocol.complete(this.id.to_string())), + ) + .poll(cx) + { + Poll::Ready(_) => { + *this.completed = true; + } + Poll::Pending => { + return Poll::Pending; + } + } + } + if let WebSocketProtocol::SubscriptionsTransportWs = this.protocol { + if !*this.terminated { + match Pin::new( + &mut Pin::new(&mut this.stream).send(ClientMessage::ConnectionTerminate), + ) + .poll(cx) + { + Poll::Ready(_) => { + *this.terminated = true; + } + Poll::Pending => { + return Poll::Pending; + } + } + } + } + Pin::new(&mut this.stream).poll_close(cx).map_err(|_err| { + graphql::Error::builder() + .message("cannot close websocket connection") + .extension_code("WEBSOCKET_CONNECTION_ERROR") + .build() + }) + } +} + +#[derive(Deserialize, Serialize)] +struct WithId { + id: String, +} + +#[cfg(test)] +mod tests { + use std::convert::Infallible; + use std::net::SocketAddr; + use std::str::FromStr; + + use axum::extract::ws::Message as AxumWsMessage; + use axum::extract::WebSocketUpgrade; + use axum::response::IntoResponse; + use axum::routing::get; + use axum::Router; + use axum::Server; + use futures::StreamExt; + use http::HeaderValue; + use tokio_tungstenite::connect_async; + use tokio_tungstenite::tungstenite::client::IntoClientRequest; + use uuid::Uuid; + + use super::*; + use crate::graphql::Request; + + async fn emulate_correct_websocket_server_new_protocol(socket_addr: SocketAddr) { + async fn ws_handler(ws: WebSocketUpgrade) -> Result { + let res = ws.on_upgrade(move |mut socket| async move { + let connection_ack = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let ack_msg: ClientMessage = serde_json::from_str(&connection_ack).unwrap(); + if let ClientMessage::ConnectionInit { payload } = ack_msg { + assert_eq!(payload, Some(serde_json_bytes::json!({"connectionParams": { + "token": "XXX" + }}))); + } else { + panic!("it should be a connection init message"); + } + + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::ConnectionAck).unwrap(), + )) + .await + .unwrap(); + let new_message = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let subscribe_msg: ClientMessage = serde_json::from_str(&new_message).unwrap(); + assert!(matches!(subscribe_msg, ClientMessage::Subscribe { .. })); + #[allow(unused_assignments)] + let mut client_id = None; + if let ClientMessage::Subscribe { payload, id } = subscribe_msg { + client_id = Some(id); + assert_eq!( + payload, + Request::builder() + .query("subscription {\n userWasCreated {\n username\n }\n}") + .build() + ); + } else { + panic!("we should receive a subscribe message"); + } + + socket + .send(AxumWsMessage::Text( + "coucou".to_string(), + )) + .await + .unwrap(); + + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::Next { id: client_id.clone().unwrap(), payload: graphql::Response::builder().data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})).build() }).unwrap(), + )) + .await + .unwrap(); + + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::Ping { payload: None }).unwrap(), + )) + .await + .unwrap(); + + let pong_message = socket.next().await.unwrap().unwrap(); + assert_eq!(pong_message, AxumWsMessage::Text( + serde_json::to_string(&ClientMessage::Pong { payload: None }).unwrap(), + )); + + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::Ping { payload: None }).unwrap(), + )) + .await + .unwrap(); + + let pong_message = socket.next().await.unwrap().unwrap(); + assert_eq!(pong_message, AxumWsMessage::Text( + serde_json::to_string(&ClientMessage::Pong { payload: None }).unwrap(), + )); + + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::Complete { id: client_id.unwrap() }).unwrap(), + )) + .await + .unwrap(); + + let terminate_sub = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let terminate_msg: ClientMessage = serde_json::from_str(&terminate_sub).unwrap(); + assert!(matches!(terminate_msg, ClientMessage::ConnectionTerminate)); + socket.close().await.unwrap(); + }); + + Ok(res) + } + + let app = Router::new().route("/ws", get(ws_handler)); + let server = Server::bind(&socket_addr).serve(app.into_make_service()); + server.await.unwrap(); + } + + async fn emulate_correct_websocket_server_old_protocol(socket_addr: SocketAddr) { + async fn ws_handler(ws: WebSocketUpgrade) -> Result { + let res = ws.on_upgrade(move |mut socket| async move { + let init_connection = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let init_msg: ClientMessage = serde_json::from_str(&init_connection).unwrap(); + assert!(matches!(init_msg, ClientMessage::ConnectionInit { .. })); + + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::ConnectionAck).unwrap(), + )) + .await + .unwrap(); + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::KeepAlive).unwrap(), + )) + .await + .unwrap(); + let new_message = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let subscribe_msg: ClientMessage = serde_json::from_str(&new_message).unwrap(); + assert!(matches!(subscribe_msg, ClientMessage::OldStart { .. })); + #[allow(unused_assignments)] + let mut client_id = None; + if let ClientMessage::OldStart { payload, id } = subscribe_msg { + client_id = Some(id); + assert_eq!( + payload, + Request::builder() + .query("subscription {\n userWasCreated {\n username\n }\n}") + .build() + ); + } else { + panic!("we should receive a subscribe message"); + } + + socket + .send(AxumWsMessage::Text( + "coucou".to_string(), + )) + .await + .unwrap(); + + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::Next { id: client_id.clone().unwrap(), payload: graphql::Response::builder().data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})).build() }).unwrap(), + )) + .await + .unwrap(); + socket + .send(AxumWsMessage::Text( + serde_json::to_string(&ServerMessage::KeepAlive).unwrap(), + )) + .await + .unwrap(); + + let stop_sub = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let stop_msg: ClientMessage = serde_json::from_str(&stop_sub).unwrap(); + assert!(matches!(stop_msg, ClientMessage::OldStop { .. })); + + let terminate_sub = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let terminate_msg: ClientMessage = serde_json::from_str(&terminate_sub).unwrap(); + assert!(matches!(terminate_msg, ClientMessage::ConnectionTerminate)); + + socket.close().await.unwrap(); + }); + + Ok(res) + } + + let app = Router::new().route("/ws", get(ws_handler)); + let server = Server::bind(&socket_addr).serve(app.into_make_service()); + server.await.unwrap(); + } + + #[tokio::test] + async fn test_ws_connection_new_proto() { + let socket_addr = SocketAddr::from_str("127.0.0.1:3900").unwrap(); + let join_task = + tokio::task::spawn(emulate_correct_websocket_server_new_protocol(socket_addr)); + let url = url::Url::parse("ws://localhost:3900/ws").unwrap(); + let mut request = url.into_client_request().unwrap(); + request.headers_mut().insert( + http::header::SEC_WEBSOCKET_PROTOCOL, + HeaderValue::from_static("graphql-transport-ws"), + ); + let (ws_stream, _resp) = connect_async(request).await.unwrap(); + + let sub_uuid = Uuid::new_v4(); + let gql_stream = GraphqlWebSocket::new( + convert_websocket_stream(ws_stream, sub_uuid.to_string()), + sub_uuid.to_string(), + WebSocketProtocol::GraphqlWs, + Some(serde_json_bytes::json!({ + "token": "XXX" + })), + ) + .await + .unwrap(); + + let sub = "subscription {\n userWasCreated {\n username\n }\n}"; + let (mut gql_sink, mut gql_read_stream) = gql_stream.split(); + let _handle = tokio::task::spawn(async move { + gql_sink + .send(graphql::Request::builder().query(sub).build()) + .await + .unwrap(); + }); + + let next_payload = gql_read_stream.next().await.unwrap(); + assert_eq!(next_payload, graphql::Response::builder() + .error( + graphql::Error::builder() + .message( + "cannot deserialize websocket server message: Error(\"expected value\", line: 1, column: 1)".to_string()) + .extension_code("INVALID_WEBSOCKET_SERVER_MESSAGE_FORMAT") + .build(), + ) + .build() + ); + + let next_payload = gql_read_stream.next().await.unwrap(); + assert_eq!( + next_payload, + graphql::Response::builder() + .subscribed(true) + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build() + ); + assert!( + gql_read_stream.next().await.is_none(), + "It should be completed" + ); + + join_task.abort(); + } + + #[tokio::test] + async fn test_ws_connection_old_proto() { + let socket_addr = SocketAddr::from_str("127.0.0.1:3901").unwrap(); + let join_task = + tokio::task::spawn(emulate_correct_websocket_server_old_protocol(socket_addr)); + let url = url::Url::parse("ws://localhost:3901/ws").unwrap(); + let mut request = url.into_client_request().unwrap(); + request.headers_mut().insert( + http::header::SEC_WEBSOCKET_PROTOCOL, + HeaderValue::from_static("graphql-ws"), + ); + let (ws_stream, _resp) = connect_async(request).await.unwrap(); + + let sub_uuid = Uuid::new_v4(); + let gql_stream = GraphqlWebSocket::new( + convert_websocket_stream(ws_stream, sub_uuid.to_string()), + sub_uuid.to_string(), + WebSocketProtocol::SubscriptionsTransportWs, + None, + ) + .await + .unwrap(); + + let sub = "subscription {\n userWasCreated {\n username\n }\n}"; + let (mut gql_sink, mut gql_read_stream) = gql_stream.split(); + let _handle = tokio::task::spawn(async move { + gql_sink + .send(graphql::Request::builder().query(sub).build()) + .await + .unwrap(); + gql_sink.close().await.unwrap(); + }); + + let next_payload = gql_read_stream.next().await.unwrap(); + assert_eq!(next_payload, graphql::Response::builder() + .error( + graphql::Error::builder() + .message( + "cannot deserialize websocket server message: Error(\"expected value\", line: 1, column: 1)".to_string()) + .extension_code("INVALID_WEBSOCKET_SERVER_MESSAGE_FORMAT") + .build(), + ) + .build() + ); + + let next_payload = gql_read_stream.next().await.unwrap(); + assert_eq!( + next_payload, + graphql::Response::builder() + .subscribed(true) + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build() + ); + assert!( + gql_read_stream.next().await.is_none(), + "It should be completed" + ); + + join_task.abort(); + } +} diff --git a/apollo-router/src/query_planner/execution.rs b/apollo-router/src/query_planner/execution.rs index 85fba06537..a08fcd46d0 100644 --- a/apollo-router/src/query_planner/execution.rs +++ b/apollo-router/src/query_planner/execution.rs @@ -8,6 +8,7 @@ use tokio_stream::wrappers::BroadcastStream; use tracing::Instrument; use super::log; +use super::subscription::SubscriptionHandle; use super::DeferredNode; use super::PlanNode; use super::QueryPlan; @@ -17,6 +18,7 @@ use crate::graphql::Response; use crate::json_ext::Path; use crate::json_ext::Value; use crate::json_ext::ValueExt; +use crate::plugins::subscription::SubscriptionConfig; use crate::query_planner::FlattenNode; use crate::query_planner::Primary; use crate::query_planner::CONDITION_ELSE_SPAN_NAME; @@ -35,6 +37,7 @@ use crate::spec::Schema; use crate::Context; impl QueryPlan { + #[allow(clippy::too_many_arguments)] /// Execute the plan and return a [`Response`]. pub(crate) async fn execute<'a>( &self, @@ -43,11 +46,14 @@ impl QueryPlan { supergraph_request: &'a Arc>, schema: &'a Arc, sender: futures::channel::mpsc::Sender, + subscription_handle: Option, + subscription_config: &'a Option, ) -> Response { let root = Path::empty(); log::trace_query_plan(&self.root); let deferred_fetches = HashMap::new(); + let (value, subselection, errors) = self .root .execute_recursively( @@ -58,6 +64,9 @@ impl QueryPlan { supergraph_request, deferred_fetches: &deferred_fetches, query: &self.query, + root_node: &self.root, + subscription_handle: &subscription_handle, + subscription_config, }, &root, &Value::default(), @@ -85,10 +94,13 @@ pub(crate) struct ExecutionParameters<'a> { pub(crate) supergraph_request: &'a Arc>, pub(crate) deferred_fetches: &'a HashMap)>>, pub(crate) query: &'a Arc, + pub(crate) root_node: &'a PlanNode, + pub(crate) subscription_handle: &'a Option, + pub(crate) subscription_config: &'a Option, } impl PlanNode { - fn execute_recursively<'a>( + pub(super) fn execute_recursively<'a>( &'a self, parameters: &'a ExecutionParameters<'a>, current_dir: &'a Path, @@ -178,6 +190,27 @@ impl PlanNode { errors = err; subselection = subselect; } + PlanNode::Subscription { primary, rest } => { + if parameters.subscription_handle.is_some() { + errors = primary + .execute_recursively( + parameters, + current_dir, + parent_value, + sender, + rest, + ) + .await; + } else { + tracing::error!("No subscription handle provided for a subscription"); + errors = vec![Error::builder() + .message("no subscription handle provided for a subscription") + .extension_code("NO_SUBSCRIPTION_HANDLE") + .build()]; + }; + + value = Value::default(); + } PlanNode::Fetch(fetch_node) => { let fetch_time_offset = parameters.context.created_at.elapsed().as_nanos() as i64; @@ -249,6 +282,9 @@ impl PlanNode { supergraph_request: parameters.supergraph_request, deferred_fetches: &deferred_fetches, query: parameters.query, + root_node: parameters.root_node, + subscription_handle: parameters.subscription_handle, + subscription_config: parameters.subscription_config, }, current_dir, &value, @@ -390,8 +426,11 @@ impl DeferredNode { let sc = parameters.schema.clone(); let orig = parameters.supergraph_request.clone(); let sf = parameters.service_factory.clone(); + let root_node = parameters.root_node.clone(); let ctx = parameters.context.clone(); let query = parameters.query.clone(); + let subscription_handle = parameters.subscription_handle.clone(); + let subscription_config = parameters.subscription_config.clone(); let mut primary_receiver = primary_sender.subscribe(); let mut value = parent_value.clone(); let depends_json = serde_json::to_string(&self.depends).unwrap_or_default(); @@ -428,6 +467,9 @@ impl DeferredNode { supergraph_request: &orig, deferred_fetches: &deferred_fetches, query: &query, + root_node: &root_node, + subscription_handle: &subscription_handle, + subscription_config: &subscription_config, }, &Path::default(), &value, diff --git a/apollo-router/src/query_planner/fetch.rs b/apollo-router/src/query_planner/fetch.rs index 11d494bac8..1eb70661d7 100644 --- a/apollo-router/src/query_planner/fetch.rs +++ b/apollo-router/src/query_planner/fetch.rs @@ -96,15 +96,15 @@ pub(crate) struct FetchNode { pub(crate) output_rewrites: Option>, } -struct Variables { - variables: Object, - paths: HashMap, +pub(crate) struct Variables { + pub(crate) variables: Object, + pub(crate) paths: HashMap, } impl Variables { #[instrument(skip_all, level = "debug", name = "make_variables")] #[allow(clippy::too_many_arguments)] - async fn new( + pub(super) async fn new( requires: &[Selection], variable_usages: &[String], data: &Value, @@ -224,8 +224,7 @@ impl FetchNode { .uri( parameters .schema - .subgraphs() - .find_map(|(name, url)| (name == service_name).then_some(url)) + .subgraph_url(service_name) .unwrap_or_else(|| { panic!( "schema uri for subgraph '{service_name}' should already have been checked" diff --git a/apollo-router/src/query_planner/mod.rs b/apollo-router/src/query_planner/mod.rs index 8043a1181d..9a2e0dd583 100644 --- a/apollo-router/src/query_planner/mod.rs +++ b/apollo-router/src/query_planner/mod.rs @@ -14,9 +14,11 @@ pub(crate) mod fetch; mod plan; pub(crate) mod rewrites; mod selection; +pub(crate) mod subscription; pub use plan::*; pub(crate) const FETCH_SPAN_NAME: &str = "fetch"; +pub(crate) const SUBSCRIBE_SPAN_NAME: &str = "subscribe"; pub(crate) const FLATTEN_SPAN_NAME: &str = "flatten"; pub(crate) const SEQUENCE_SPAN_NAME: &str = "sequence"; pub(crate) const PARALLEL_SPAN_NAME: &str = "parallel"; diff --git a/apollo-router/src/query_planner/plan.rs b/apollo-router/src/query_planner/plan.rs index 5d4e408b6c..122caa82a4 100644 --- a/apollo-router/src/query_planner/plan.rs +++ b/apollo-router/src/query_planner/plan.rs @@ -8,6 +8,7 @@ use serde::Serialize; pub(crate) use self::fetch::OperationKind; use super::fetch; +use super::subscription::SubscriptionNode; use crate::error::QueryPlannerError; use crate::json_ext; use crate::json_ext::Object; @@ -57,6 +58,13 @@ impl QueryPlan { pub(crate) fn is_deferred(&self, operation: Option<&str>, variables: &Object) -> bool { self.root.is_deferred(operation, variables, &self.query) } + + pub(crate) fn is_subscription(&self, operation: Option<&str>) -> bool { + match self.query.operation(operation) { + Some(op) => matches!(op.kind(), OperationKind::Subscription), + None => false, + } + } } /// Query plans are composed of a set of nodes. @@ -86,6 +94,11 @@ pub(crate) enum PlanNode { deferred: Vec, }, + Subscription { + primary: SubscriptionNode, + rest: Option>, + }, + #[serde(rename_all = "camelCase")] Condition { condition: String, @@ -105,6 +118,7 @@ impl PlanNode { .as_ref() .map(|n| n.contains_mutations()) .unwrap_or(false), + Self::Subscription { .. } => false, Self::Flatten(_) => false, Self::Condition { if_clause, @@ -142,6 +156,7 @@ impl PlanNode { Self::Flatten(node) => node.node.is_deferred(operation, variables, query), Self::Fetch(..) => false, Self::Defer { .. } => true, + Self::Subscription { .. } => false, Self::Condition { if_clause, else_clause, @@ -264,6 +279,12 @@ impl PlanNode { Ok(()) } Self::Fetch(..) => Ok(()), + Self::Subscription { rest, .. } => { + if let Some(node) = rest { + node.collect_subselections(schema, initial_path, kind, subselections)?; + } + Ok(()) + } Self::Condition { if_clause, else_clause, @@ -290,6 +311,13 @@ impl PlanNode { Box::new(nodes.iter().flat_map(|x| x.service_usage())) } Self::Fetch(fetch) => Box::new(Some(fetch.service_name()).into_iter()), + Self::Subscription { primary, rest } => match rest { + Some(rest) => Box::new( + rest.service_usage() + .chain(Some(primary.service_name.as_str()).into_iter()), + ) as Box + 'a>, + None => Box::new(Some(primary.service_name.as_str()).into_iter()), + }, Self::Flatten(flatten) => flatten.node.service_usage(), Self::Defer { primary, deferred } => primary .node diff --git a/apollo-router/src/query_planner/subscription.rs b/apollo-router/src/query_planner/subscription.rs new file mode 100644 index 0000000000..f4a1c6eec0 --- /dev/null +++ b/apollo-router/src/query_planner/subscription.rs @@ -0,0 +1,451 @@ +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; +use std::time::Instant; + +use futures::channel::mpsc; +use futures::channel::mpsc::SendError; +use futures::future; +use futures::SinkExt; +use futures::Stream; +use futures::StreamExt; +use router_bridge::planner::UsageReporting; +use serde::Deserialize; +use serde::Serialize; +use serde_json_bytes::Value; +use tokio::sync::broadcast; +use tower::ServiceExt; +use tracing::field; +use tracing::Span; +use tracing_futures::Instrument; + +use super::execution::ExecutionParameters; +use super::fetch::Variables; +use super::rewrites; +use super::rewrites::DataRewrite; +use super::OperationKind; +use super::PlanNode; +use crate::error::FetchError; +use crate::graphql; +use crate::graphql::Error; +use crate::graphql::Request; +use crate::graphql::Response; +use crate::http_ext; +use crate::json_ext::Path; +use crate::notification::HandleStream; +use crate::plugins::telemetry::tracing::apollo_telemetry::APOLLO_PRIVATE_DURATION_NS; +use crate::plugins::telemetry::GRAPHQL_OPERATION_NAME_CONTEXT_KEY; +use crate::plugins::telemetry::LOGGING_DISPLAY_BODY; +use crate::query_planner::SUBSCRIBE_SPAN_NAME; +use crate::services::SubgraphRequest; + +pub(crate) const SUBSCRIPTION_EVENT_SPAN_NAME: &str = "subscription_event"; +pub(crate) static OPENED_SUBSCRIPTIONS: AtomicUsize = AtomicUsize::new(0); +pub(crate) struct SubscriptionHandle { + pub(crate) closed_signal: broadcast::Receiver<()>, +} + +impl Clone for SubscriptionHandle { + fn clone(&self) -> Self { + Self { + closed_signal: self.closed_signal.resubscribe(), + } + } +} + +impl SubscriptionHandle { + pub(crate) fn new(closed_signal: broadcast::Receiver<()>) -> Self { + Self { closed_signal } + } +} + +#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct SubscriptionNode { + /// The name of the service or subgraph that the subscription is querying. + pub(crate) service_name: String, + + /// The variables that are used for the subgraph subscription. + pub(crate) variable_usages: Vec, + + /// The GraphQL subquery that is used for the subscription. + pub(crate) operation: String, + + /// The GraphQL subquery operation name. + pub(crate) operation_name: Option, + + /// The GraphQL operation kind that is used for the fetch. + pub(crate) operation_kind: OperationKind, + + // Optionally describes a number of "rewrites" that query plan executors should apply to the data that is sent as input of this subscription. + pub(crate) input_rewrites: Option>, + + // Optionally describes a number of "rewrites" to apply to the data that received from a subscription (and before it is applied to the current in-memory results). + pub(crate) output_rewrites: Option>, +} + +impl SubscriptionNode { + pub(crate) fn execute_recursively<'a>( + &'a self, + parameters: &'a ExecutionParameters<'a>, + current_dir: &'a Path, + parent_value: &'a Value, + mut sender: futures::channel::mpsc::Sender, + rest: &'a Option>, + ) -> future::BoxFuture> { + if parameters.subscription_handle.is_none() { + tracing::error!("No subscription handle provided for a subscription"); + return Box::pin(async { + vec![Error::builder() + .message("no subscription handle provided for a subscription") + .extension_code("NO_SUBSCRIPTION_HANDLE") + .build()] + }); + }; + if let Some(max_opened_subscriptions) = parameters + .subscription_config + .as_ref() + .and_then(|s| s.max_opened_subscriptions) + { + if OPENED_SUBSCRIPTIONS.load(Ordering::Relaxed) >= max_opened_subscriptions { + return Box::pin(async { + vec![Error::builder() + .message("can't open new subscription, limit reached") + .extension_code("SUBSCRIPTION_MAX_LIMIT") + .build()] + }); + } + } + let subscription_handle = parameters + .subscription_handle + .as_ref() + .expect("checked above; qed"); + let mode = match parameters.subscription_config.as_ref().map(|c| &c.mode) { + Some(mode) => mode.get_subgraph_config(&self.service_name), + None => { + return Box::pin(async { + vec![Error::builder() + .message("subscription support is not enabled") + .extension_code("SUBSCRIPTION_DISABLED") + .build()] + }); + } + }; + let output_rewrites = self.output_rewrites.clone(); + let service_name = self.service_name.clone(); + + Box::pin(async move { + let cloned_qp = parameters.root_node.clone(); + let current_dir_cloned = current_dir.clone(); + let context = parameters.context.clone(); + let service_factory = parameters.service_factory.clone(); + let schema = parameters.schema.clone(); + let supergraph_request = parameters.supergraph_request.clone(); + let deferred_fetches = parameters.deferred_fetches.clone(); + let query = parameters.query.clone(); + let subscription_handle = subscription_handle.clone(); + let subscription_config = parameters.subscription_config.clone(); + let rest = rest.clone(); + + match mode { + Some(_) => { + let (tx_handle, mut rx_handle) = + mpsc::channel::>(1); + + let _subscription_task = tokio::task::spawn(async move { + let sub_handle = match rx_handle.next().await { + Some(ws) => ws, + None => { + tracing::debug!("cannot get the graphql subscription stream"); + let _ = sender.send(graphql::Response::builder().error(graphql::Error::builder().message("cannot get the subscription stream from subgraph").extension_code("SUBSCRIPTION_STREAM_GET").build()).build()).await; + return; + } + }; + + let parameters = ExecutionParameters { + context: &context, + service_factory: &service_factory, + schema: &schema, + supergraph_request: &supergraph_request, + deferred_fetches: &deferred_fetches, + query: &query, + root_node: &cloned_qp, + subscription_handle: &Some(subscription_handle), + subscription_config: &subscription_config, + }; + + Self::task( + sub_handle, + ¶meters, + rest, + output_rewrites, + ¤t_dir_cloned, + sender.clone(), + service_name, + ) + .await; + }); + + let fetch_time_offset = + parameters.context.created_at.elapsed().as_nanos() as i64; + match self + .subgraph_call(parameters, current_dir, parent_value, tx_handle) + .instrument(tracing::info_span!( + SUBSCRIBE_SPAN_NAME, + "otel.kind" = "INTERNAL", + "apollo.subgraph.name" = self.service_name.as_str(), + "apollo_private.sent_time_offset" = fetch_time_offset + )) + .await + { + Ok(e) => e, + Err(err) => { + failfast_error!("subgraph call fetch error: {}", err); + vec![err.to_graphql_error(Some(current_dir.to_owned()))] + } + } + } + None => { + vec![Error::builder() + .message(format!( + "subscription mode is not configured for subgraph {:?}", + self.service_name + )) + .extension_code("INVALID_SUBSCRIPTION_MODE") + .build()] + } + } + }) + } + + #[allow(clippy::too_many_arguments)] + pub(crate) async fn task<'a>( + mut receiver: impl Stream + Unpin, + parameters: &'a ExecutionParameters<'a>, + rest: Option>, + output_rewrites: Option>, + current_dir: &'a Path, + mut sender: futures::channel::mpsc::Sender, + service_name: String, + ) { + let limit_is_set = parameters + .subscription_config + .as_ref() + .and_then(|s| s.max_opened_subscriptions) + .is_some(); + + if limit_is_set { + OPENED_SUBSCRIPTIONS.fetch_add(1, Ordering::Relaxed); + } + let mut subscription_handle = parameters + .subscription_handle + .clone() + .expect("it has already been checked before; qed"); + + let operation_signature = if let Some(usage_reporting) = parameters + .context + .private_entries + .lock() + .get::() + { + usage_reporting.stats_report_key.clone() + } else { + String::new() + }; + + let operation_name = parameters + .context + .get::<_, String>(GRAPHQL_OPERATION_NAME_CONTEXT_KEY) + .ok() + .flatten() + .unwrap_or_default(); + let display_body = parameters.context.contains_key(LOGGING_DISPLAY_BODY); + + loop { + tokio::select! { + _ = subscription_handle.closed_signal.recv() => { + break; + } + message = receiver.next() => { + match message { + Some(mut val) => { + if display_body { + tracing::info!(http.request.body = ?val, apollo.subgraph.name = %service_name, "Subscription event body from subgraph {service_name:?}"); + } + val.created_at = Some(Instant::now()); + if let Some(data) = &mut val.data { + rewrites::apply_rewrites(parameters.schema, data, &output_rewrites); + } + + + if let Err(err) = + Self::dispatch_value(val, parameters, &rest, current_dir, sender.clone()) + .instrument(tracing::info_span!(SUBSCRIPTION_EVENT_SPAN_NAME, + graphql.document = parameters.query.string, + graphql.operation.name = %operation_name, + otel.kind = "INTERNAL", + apollo_private.operation_signature = %operation_signature, + apollo_private.duration_ns = field::Empty,) + ) + .await + { + if !err.is_disconnected() { + tracing::error!("cannot send the subscription to the client: {err:?}"); + } + break; + } + } + None => break, + } + } + } + } + if let Err(err) = sender.close().await { + tracing::trace!("cannot close the sender {err:?}"); + } + + tracing::trace!("Leaving the task for subscription"); + if limit_is_set { + OPENED_SUBSCRIPTIONS.fetch_sub(1, Ordering::Relaxed); + } + } + + async fn dispatch_value<'a>( + mut val: graphql::Response, + parameters: &'a ExecutionParameters<'a>, + rest: &Option>, + current_dir: &'a Path, + mut sender: futures::channel::mpsc::Sender, + ) -> Result<(), SendError> { + let start = Instant::now(); + let span = Span::current(); + + match rest { + Some(rest) => { + let created_at = val.created_at.take(); + let (value, subselection, mut errors) = rest + .execute_recursively( + parameters, + current_dir, + &val.data.unwrap_or_default(), + sender.clone(), + ) + .in_current_span() + .await; + + errors.append(&mut val.errors); + + sender + .send( + Response::builder() + .data(value) + .and_subscribed(val.subscribed) + .and_subselection(subselection) + .errors(errors) + .extensions(val.extensions) + .and_path(val.path) + .and_created_at(created_at) + .build(), + ) + .await?; + } + None => { + sender.send(val).await?; + } + } + span.record( + APOLLO_PRIVATE_DURATION_NS, + start.elapsed().as_nanos() as i64, + ); + + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + pub(crate) async fn subgraph_call<'a>( + &'a self, + parameters: &'a ExecutionParameters<'a>, + current_dir: &'a Path, + data: &Value, + tx_gql: mpsc::Sender>, + ) -> Result, FetchError> { + let SubscriptionNode { + operation, + operation_name, + service_name, + .. + } = self; + + let Variables { variables, .. } = match Variables::new( + &[], + self.variable_usages.as_ref(), + data, + current_dir, + // Needs the original request here + parameters.supergraph_request, + parameters.schema, + &self.input_rewrites, + ) + .await + { + Some(variables) => variables, + None => { + return Ok(Vec::new()); + } + }; + + let subgraph_request = SubgraphRequest::builder() + .supergraph_request(parameters.supergraph_request.clone()) + .subgraph_request( + http_ext::Request::builder() + .method(http::Method::POST) + .uri( + parameters + .schema + .subgraph_url(service_name) + .unwrap_or_else(|| { + panic!( + "schema uri for subgraph '{service_name}' should already have been checked" + ) + }) + .clone(), + ) + .body( + Request::builder() + .query(operation) + .and_operation_name(operation_name.clone()) + .variables(variables.clone()) + .build(), + ) + .build() + .expect("it won't fail because the url is correct and already checked; qed"), + ) + .operation_kind(OperationKind::Subscription) + .context(parameters.context.clone()) + .subscription_stream(tx_gql) + .and_connection_closed_signal(parameters.subscription_handle.as_ref().map(|s| s.closed_signal.resubscribe())) + .build(); + + let service = parameters + .service_factory + .create(service_name) + .expect("we already checked that the service exists during planning; qed"); + + let (_parts, response) = service + .oneshot(subgraph_request) + .instrument(tracing::trace_span!("subscription_call")) + .await + // TODO this is a problem since it restores details about failed service + // when errors have been redacted in the include_subgraph_errors module. + // Unfortunately, not easy to fix here, because at this point we don't + // know if we should be redacting errors for this subgraph... + .map_err(|e| FetchError::SubrequestHttpError { + service: service_name.to_string(), + reason: e.to_string(), + status_code: None, + })? + .response + .into_parts(); + + Ok(response.errors) + } +} diff --git a/apollo-router/src/query_planner/tests.rs b/apollo-router/src/query_planner/tests.rs index 9ac82bd93c..c218db8e1b 100644 --- a/apollo-router/src/query_planner/tests.rs +++ b/apollo-router/src/query_planner/tests.rs @@ -108,6 +108,8 @@ async fn mock_subgraph_service_withf_panics_should_be_reported_as_service_closed &Default::default(), &Arc::new(Schema::parse_test(test_schema!(), &Default::default()).unwrap()), sender, + None, + &None, ) .await; assert_eq!(result.errors.len(), 1); @@ -165,6 +167,8 @@ async fn fetch_includes_operation_name() { &Default::default(), &Arc::new(Schema::parse_test(test_schema!(), &Default::default()).unwrap()), sender, + None, + &None, ) .await; @@ -219,6 +223,8 @@ async fn fetch_makes_post_requests() { &Default::default(), &Arc::new(Schema::parse_test(test_schema!(), &Default::default()).unwrap()), sender, + None, + &None, ) .await; @@ -356,7 +362,15 @@ async fn defer() { }); let response = query_plan - .execute(&Context::new(), &sf, &Default::default(), &schema, sender) + .execute( + &Context::new(), + &sf, + &Default::default(), + &schema, + sender, + None, + &None, + ) .await; // primary response @@ -437,7 +451,6 @@ async fn defer_if_condition() { )])), plugins: Default::default(), }); - let defer_primary_response = query_plan .execute( &Context::new(), @@ -453,6 +466,8 @@ async fn defer_if_condition() { ), &schema, sender, + None, + &None, ) .await; @@ -471,6 +486,8 @@ async fn defer_if_condition() { &Default::default(), &schema, default_sender, + None, + &None, ) .await; @@ -495,6 +512,8 @@ async fn defer_if_condition() { ), &schema, sender, + None, + &None, ) .await; insta::assert_json_snapshot!(defer_disabled); @@ -612,6 +631,8 @@ async fn dependent_mutations() { &Default::default(), &Arc::new(Schema::parse_test(schema, &Default::default()).unwrap()), sender, + None, + &None, ) .await; } diff --git a/apollo-router/src/response.rs b/apollo-router/src/response.rs index 3cb0f48ed0..71e8610e2c 100644 --- a/apollo-router/src/response.rs +++ b/apollo-router/src/response.rs @@ -1,4 +1,5 @@ #![allow(missing_docs)] // FIXME +use std::time::Instant; use bytes::Bytes; use serde::Deserialize; @@ -41,6 +42,13 @@ pub struct Response { #[serde(skip_serializing_if = "Option::is_none", default)] pub has_next: Option, + #[serde(skip, default)] + pub subscribed: Option, + + /// Used for subscription event to compute the duration of a subscription event + #[serde(skip, default)] + pub created_at: Option, + #[serde(skip_serializing)] pub subselection: Option, @@ -61,7 +69,9 @@ impl Response { extensions: Map, subselection: Option, has_next: Option, + subscribed: Option, incremental: Vec, + created_at: Option, ) -> Self { Self { label, @@ -71,7 +81,9 @@ impl Response { extensions, subselection, has_next, + subscribed, incremental, + created_at, } } @@ -162,7 +174,9 @@ impl Response { extensions, subselection: None, has_next, + subscribed: None, incremental, + created_at: None, }) } } diff --git a/apollo-router/src/router_factory.rs b/apollo-router/src/router_factory.rs index a63d1859d2..1166139f96 100644 --- a/apollo-router/src/router_factory.rs +++ b/apollo-router/src/router_factory.rs @@ -21,6 +21,8 @@ use crate::configuration::TlsSubgraph; use crate::plugin::DynPlugin; use crate::plugin::Handler; use crate::plugin::PluginFactory; +use crate::plugins::subscription::Subscription; +use crate::plugins::subscription::APOLLO_SUBSCRIPTION_PLUGIN; use crate::plugins::traffic_shaping::TrafficShaping; use crate::plugins::traffic_shaping::APOLLO_TRAFFIC_SHAPING; use crate::query_planner::BridgeQueryPlanner; @@ -165,6 +167,12 @@ impl RouterSuperServiceFactory for YamlRouterFactory { let mut builder = PluggableSupergraphServiceBuilder::new(bridge_query_planner); builder = builder.with_configuration(configuration.clone()); + let subscription_plugin_conf = plugins + .iter() + .find(|i| i.0.as_str() == APOLLO_SUBSCRIPTION_PLUGIN) + .and_then(|plugin| (*plugin.1).as_any().downcast_ref::()) + .map(|p| p.config.clone()); + for (name, _) in schema.subgraphs() { let subgraph_root_store = configuration .tls @@ -195,10 +203,19 @@ impl RouterSuperServiceFactory for YamlRouterFactory { .unwrap_or(configuration.apq.subgraph.all.enabled), subgraph_root_store, shaping.enable_subgraph_http2(name), + subscription_plugin_conf.clone(), + configuration.notify.clone(), ), ), ), - None => Either::B(SubgraphService::new(name, false, subgraph_root_store, true)), + None => Either::B(SubgraphService::new( + name, + false, + subgraph_root_store, + true, + subscription_plugin_conf.clone(), + configuration.notify.clone(), + )), }; builder = builder.with_subgraph_service(name, subgraph_service); } @@ -274,6 +291,11 @@ impl YamlRouterFactory { let mut builder = PluggableSupergraphServiceBuilder::new(bridge_query_planner); builder = builder.with_configuration(configuration.clone()); + let subscription_plugin_conf = plugins + .iter() + .find(|i| i.0.as_str() == APOLLO_SUBSCRIPTION_PLUGIN) + .and_then(|plugin| (*plugin.1).as_any().downcast_ref::()) + .map(|p| p.config.clone()); for (name, _) in schema.subgraphs() { let subgraph_root_store = configuration .tls @@ -304,10 +326,19 @@ impl YamlRouterFactory { .unwrap_or(configuration.apq.subgraph.all.enabled), subgraph_root_store, shaping.enable_subgraph_http2(name), + subscription_plugin_conf.clone(), + configuration.notify.clone(), ), ), ), - None => Either::B(SubgraphService::new(name, false, subgraph_root_store, true)), + None => Either::B(SubgraphService::new( + name, + false, + subgraph_root_store, + true, + subscription_plugin_conf.clone(), + configuration.notify.clone(), + )), }; builder = builder.with_subgraph_service(name, subgraph_service); } @@ -404,6 +435,7 @@ pub(crate) async fn create_plugins( let plugin_registry: Vec<&'static Lazy> = crate::plugin::plugins().collect(); let mut plugin_instances = Vec::new(); let extra = extra_plugins.unwrap_or_default(); + let notify = configuration.notify.clone(); for (name, mut configuration) in configuration.plugins().into_iter() { if extra.iter().any(|(n, _)| *n == name) { @@ -422,7 +454,7 @@ pub(crate) async fn create_plugins( inject_schema_id(schema, &mut configuration); } match factory - .create_instance(&configuration, schema.as_string().clone()) + .create_instance(&configuration, schema.as_string().clone(), notify.clone()) .await { Ok(plugin) => { @@ -473,7 +505,7 @@ pub(crate) async fn create_plugins( inject_schema_id(schema, &mut config); } match factory - .create_instance(&config, schema.as_string().clone()) + .create_instance(&config, schema.as_string().clone(), notify.clone()) .await { Ok(plugin) => { diff --git a/apollo-router/src/services/execution_service.rs b/apollo-router/src/services/execution_service.rs index 2c5d28392c..a377d50ca6 100644 --- a/apollo-router/src/services/execution_service.rs +++ b/apollo-router/src/services/execution_service.rs @@ -1,17 +1,22 @@ //! Implements the Execution phase of the request lifecycle. use std::future::ready; +use std::pin::Pin; use std::sync::Arc; +use std::task::Context; use std::task::Poll; +use futures::channel::mpsc; use futures::channel::mpsc::Receiver; use futures::channel::mpsc::SendError; use futures::channel::mpsc::Sender; use futures::future::BoxFuture; use futures::stream::once; use futures::SinkExt; +use futures::Stream; use futures::StreamExt; use serde_json_bytes::Value; +use tokio::sync::broadcast; use tower::BoxError; use tower::ServiceBuilder; use tower::ServiceExt; @@ -28,6 +33,10 @@ use crate::json_ext::Object; use crate::json_ext::Path; use crate::json_ext::PathElement; use crate::json_ext::ValueExt; +use crate::plugins::subscription::Subscription; +use crate::plugins::subscription::SubscriptionConfig; +use crate::plugins::subscription::APOLLO_SUBSCRIPTION_PLUGIN; +use crate::query_planner::subscription::SubscriptionHandle; use crate::services::execution; use crate::services::ExecutionRequest; use crate::services::ExecutionResponse; @@ -38,6 +47,32 @@ use crate::spec::Schema; pub(crate) struct ExecutionService { pub(crate) schema: Arc, pub(crate) subgraph_service_factory: Arc, + /// Subscription config if enabled + subscription_config: Option, +} + +type CloseSignal = broadcast::Sender<()>; +// Used to detect when the stream is dropped and then when the client closed the connection +pub(crate) struct StreamWrapper(pub(crate) Receiver, Option); + +impl Stream for StreamWrapper { + type Item = Response; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.0).poll_next(cx) + } +} + +impl Drop for StreamWrapper { + fn drop(&mut self) { + if let Some(closed_signal) = self.1.take() { + if let Err(err) = closed_signal.send(()) { + tracing::trace!("cannot close the subscription: {err:?}"); + } + } + + self.0.close(); + } } impl Service for ExecutionService { @@ -64,15 +99,24 @@ impl Service for ExecutionService { let fut = async move { let context = req.context; let ctx = context.clone(); - let (sender, receiver) = futures::channel::mpsc::channel(10); + let (sender, receiver) = mpsc::channel(10); let variables = req.supergraph_request.body().variables.clone(); let operation_name = req.supergraph_request.body().operation_name.clone(); let is_deferred = req .query_plan .is_deferred(operation_name.as_deref(), &variables); + let is_subscription = req + .query_plan + .is_subscription(operation_name.as_deref()); + let (tx_close_signal, subscription_handle) = if is_subscription { + let (tx_close_signal, rx_close_signal) = broadcast::channel(1); + (Some(tx_close_signal), Some(SubscriptionHandle::new(rx_close_signal))) + } else { + (None, None) + }; - let first = req + let mut first = req .query_plan .execute( &context, @@ -80,12 +124,21 @@ impl Service for ExecutionService { &Arc::new(req.supergraph_request), &this.schema, sender, + subscription_handle.clone(), + &this.subscription_config ) .await; - let query = req.query_plan.query.clone(); - let stream = if is_deferred { - filter_stream(first, receiver).boxed() + let stream = if is_deferred || is_subscription { + let stream_mode = if is_deferred { + StreamMode::Defer + } else { + // Keep the connection opened only if there is no error when init the subscription + first.subscribed = Some(first.errors.is_empty()); + StreamMode::Subscription + }; + let stream = filter_stream(first, receiver, stream_mode); + StreamWrapper(stream, tx_close_signal).boxed() } else { once(ready(first)).chain(receiver).boxed() }; @@ -107,6 +160,11 @@ impl Service for ExecutionService { } } + // Empty response (could happen when a subscription stream is closed from the subgraph) + if response.subscribed == Some(false) && response.data.is_none() && response.errors.is_empty() { + return ready(response.into()); + } + let has_next = response.has_next.unwrap_or(true); tracing::debug_span!("format_response").in_scope(|| { let mut paths = Vec::new(); @@ -194,7 +252,7 @@ impl Service for ExecutionService { .iter() .filter(|error| match &error.path { None => false, - Some(error_path) =>query.contains_error_path(operation_name.as_deref(), response.subselection.as_deref(), response.path.as_ref(), error_path) && error_path.starts_with(&path), + Some(error_path) => query.contains_error_path(operation_name.as_deref(), response.subselection.as_deref(), response.path.as_ref(), error_path) && error_path.starts_with(&path), }) .cloned() @@ -285,25 +343,39 @@ impl Service for ExecutionService { } } -// modifies the response stream to set `has_next` to `false` on the last response -fn filter_stream(first: Response, mut stream: Receiver) -> Receiver { - let (mut sender, receiver) = futures::channel::mpsc::channel(10); +#[derive(Clone, Copy)] +enum StreamMode { + Defer, + Subscription, +} + +// modifies the response stream to set `has_next` to `false` and `subscribed` to `false` on the last response +fn filter_stream( + first: Response, + mut stream: Receiver, + stream_mode: StreamMode, +) -> Receiver { + let (mut sender, receiver) = mpsc::channel(10); tokio::task::spawn(async move { - let mut seen_last_message = consume_responses(first, &mut stream, &mut sender).await?; + let mut seen_last_message = + consume_responses(first, &mut stream, &mut sender, stream_mode).await?; while let Some(current_response) = stream.next().await { seen_last_message = - consume_responses(current_response, &mut stream, &mut sender).await?; + consume_responses(current_response, &mut stream, &mut sender, stream_mode).await?; } // the response stream disconnected early so we could not add `has_next = false` to the // last message, so we add an empty one if !seen_last_message { - sender - .send(Response::builder().has_next(false).build()) - .await?; + let res = match stream_mode { + StreamMode::Defer => Response::builder().has_next(false).build(), + StreamMode::Subscription => Response::builder().subscribed(false).build(), + }; + sender.send(res).await?; } + Ok::<_, SendError>(()) }); @@ -315,6 +387,7 @@ async fn consume_responses( mut current_response: Response, stream: &mut Receiver, sender: &mut Sender, + stream_mode: StreamMode, ) -> Result { loop { match stream.try_next() { @@ -322,7 +395,6 @@ async fn consume_responses( // this means more deferred responses can come Err(_) => { sender.send(current_response).await?; - return Ok(false); } @@ -336,7 +408,10 @@ async fn consume_responses( // there will be no other deferred responses after that, // so we set `has_next` to `false` Ok(None) => { - current_response.has_next = Some(false); + match stream_mode { + StreamMode::Defer => current_response.has_next = Some(false), + StreamMode::Subscription => current_response.subscribed = Some(false), + } sender.send(current_response).await?; return Ok(true); @@ -356,6 +431,13 @@ impl ServiceFactory for ExecutionServiceFactory { type Service = execution::BoxService; fn create(&self) -> Self::Service { + let subscription_plugin_conf = self + .plugins + .iter() + .find(|i| i.0.as_str() == APOLLO_SUBSCRIPTION_PLUGIN) + .and_then(|plugin| (*plugin.1).as_any().downcast_ref::()) + .map(|p| p.config.clone()); + ServiceBuilder::new() .layer(AllowOnlyHttpPostMutationsLayer::default()) .service( @@ -363,6 +445,7 @@ impl ServiceFactory for ExecutionServiceFactory { crate::services::execution_service::ExecutionService { schema: self.schema.clone(), subgraph_service_factory: self.subgraph_service_factory.clone(), + subscription_config: subscription_plugin_conf, } .boxed(), |acc, (_, e)| e.execution_service(acc), diff --git a/apollo-router/src/services/layers/content_negociation.rs b/apollo-router/src/services/layers/content_negociation.rs index bd3c418d8e..ef49a93416 100644 --- a/apollo-router/src/services/layers/content_negociation.rs +++ b/apollo-router/src/services/layers/content_negociation.rs @@ -28,9 +28,11 @@ use crate::services::supergraph; use crate::services::MULTIPART_DEFER_CONTENT_TYPE; use crate::services::MULTIPART_DEFER_SPEC_PARAMETER; use crate::services::MULTIPART_DEFER_SPEC_VALUE; +use crate::services::MULTIPART_SUBSCRIPTION_CONTENT_TYPE; +use crate::services::MULTIPART_SUBSCRIPTION_SPEC_PARAMETER; +use crate::services::MULTIPART_SUBSCRIPTION_SPEC_VALUE; pub(crate) const GRAPHQL_JSON_RESPONSE_HEADER_VALUE: &str = "application/graphql-response+json"; - /// [`Layer`] for Content-Type checks implementation. #[derive(Clone, Default)] pub(crate) struct RouterLayer {} @@ -70,28 +72,34 @@ where return Ok(ControlFlow::Break(response.into())); } + let accepts = parse_accept(req.router_request.headers()); - if accepts.wildcard || accepts.multipart || accepts.json { + if accepts.wildcard + || accepts.multipart_defer + || accepts.multipart_subscription + || accepts.json + { req.context.private_entries.lock().insert(accepts); Ok(ControlFlow::Continue(req)) } else { let response: http::Response = http::Response::builder().status(StatusCode::NOT_ACCEPTABLE).header(CONTENT_TYPE, APPLICATION_JSON.essence_str()).body( - hyper::Body::from( - serde_json::json!({ - "errors": [ - graphql::Error::builder() - .message(format!( - r#"'accept' header must be one of: \"*/*\", {:?}, {:?} or {:?}"#, - APPLICATION_JSON.essence_str(), - GRAPHQL_JSON_RESPONSE_HEADER_VALUE, - MULTIPART_DEFER_CONTENT_TYPE - )) - .extension_code("INVALID_ACCEPT_HEADER") - .build() - ] - }).to_string())).expect("cannot fail"); + hyper::Body::from( + serde_json::json!({ + "errors": [ + graphql::Error::builder() + .message(format!( + r#"'accept' header must be one of: \"*/*\", {:?}, {:?}, {:?} or {:?}"#, + APPLICATION_JSON.essence_str(), + GRAPHQL_JSON_RESPONSE_HEADER_VALUE, + MULTIPART_SUBSCRIPTION_CONTENT_TYPE, + MULTIPART_DEFER_CONTENT_TYPE + )) + .extension_code("INVALID_ACCEPT_HEADER") + .build() + ] + }).to_string())).expect("cannot fail"); Ok(ControlFlow::Break(response.into())) } @@ -120,7 +128,8 @@ where let ClientRequestAccepts { wildcard: accepts_wildcard, json: accepts_json, - multipart: accepts_multipart, + multipart_defer: accepts_multipart_defer, + multipart_subscription: accepts_multipart_subscription, } = context .private_entries .lock() @@ -133,11 +142,16 @@ where CONTENT_TYPE, HeaderValue::from_static(APPLICATION_JSON.essence_str()), ); - } else if accepts_multipart { + } else if accepts_multipart_defer { parts.headers.insert( CONTENT_TYPE, HeaderValue::from_static(MULTIPART_DEFER_CONTENT_TYPE), ); + } else if accepts_multipart_subscription { + parts.headers.insert( + CONTENT_TYPE, + HeaderValue::from_static(MULTIPART_SUBSCRIPTION_CONTENT_TYPE), + ); } (parts, res) }) @@ -167,7 +181,6 @@ fn content_type_is_json(headers: &HeaderMap) -> bool { .unwrap_or(false) }) } - // Clippy suggests `for mime in MediaTypeList::new(str).flatten()` but less indentation // does not seem worth making it invisible that Result is involved. #[allow(clippy::manual_flatten)] @@ -191,13 +204,24 @@ fn parse_accept(headers: &HeaderMap) -> ClientRequestAccepts { if !accepts.wildcard && (mime.ty == _STAR && mime.subty == _STAR) { accepts.wildcard = true } - if !accepts.multipart && (mime.ty == MULTIPART && mime.subty == MIXED) { + if !accepts.multipart_defer && (mime.ty == MULTIPART && mime.subty == MIXED) { let parameter = mediatype::Name::new(MULTIPART_DEFER_SPEC_PARAMETER) .expect("valid name"); let value = mediatype::Value::new(MULTIPART_DEFER_SPEC_VALUE).expect("valid value"); if mime.get_param(parameter) == Some(value) { - accepts.multipart = true + accepts.multipart_defer = true + } + } + if !accepts.multipart_subscription + && (mime.ty == MULTIPART && mime.subty == MIXED) + { + let parameter = mediatype::Name::new(MULTIPART_SUBSCRIPTION_SPEC_PARAMETER) + .expect("valid name"); + let value = mediatype::Value::new(MULTIPART_SUBSCRIPTION_SPEC_VALUE) + .expect("valid value"); + if mime.get_param(parameter) == Some(value) { + accepts.multipart_subscription = true } } } @@ -256,6 +280,6 @@ mod tests { HeaderValue::from_static(MULTIPART_DEFER_CONTENT_TYPE), ); let accepts = parse_accept(&default_headers); - assert!(accepts.multipart); + assert!(accepts.multipart_defer); } } diff --git a/apollo-router/src/services/mod.rs b/apollo-router/src/services/mod.rs index c99b245c60..399cabc9ca 100644 --- a/apollo-router/src/services/mod.rs +++ b/apollo-router/src/services/mod.rs @@ -75,3 +75,8 @@ pub(crate) const MULTIPART_DEFER_SPEC_PARAMETER: &str = "deferSpec"; pub(crate) const MULTIPART_DEFER_SPEC_VALUE: &str = "20220824"; pub(crate) const MULTIPART_DEFER_CONTENT_TYPE: &str = "multipart/mixed;boundary=\"graphql\";deferSpec=20220824"; + +pub(crate) const MULTIPART_SUBSCRIPTION_CONTENT_TYPE: &str = + "multipart/mixed;boundary=\"graphql\";subscriptionSpec=1.0"; +pub(crate) const MULTIPART_SUBSCRIPTION_SPEC_PARAMETER: &str = "subscriptionSpec"; +pub(crate) const MULTIPART_SUBSCRIPTION_SPEC_VALUE: &str = "1.0"; diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index a61e9cc552..5e5de357c7 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -19,6 +19,7 @@ use tower::BoxError; use super::supergraph; use super::MULTIPART_DEFER_CONTENT_TYPE; +use super::MULTIPART_SUBSCRIPTION_CONTENT_TYPE; use crate::graphql; use crate::json_ext::Path; use crate::services::TryIntoHeaderName; @@ -218,7 +219,10 @@ impl Response { .headers() .get(CONTENT_TYPE) .iter() - .any(|value| *value == HeaderValue::from_static(MULTIPART_DEFER_CONTENT_TYPE)) + .any(|value| { + *value == HeaderValue::from_static(MULTIPART_DEFER_CONTENT_TYPE) + || *value == HeaderValue::from_static(MULTIPART_SUBSCRIPTION_CONTENT_TYPE) + }) { let multipart = Multipart::new(self.response.into_body(), "graphql"); @@ -248,7 +252,8 @@ impl Response { #[derive(Clone, Default)] pub(crate) struct ClientRequestAccepts { - pub(crate) multipart: bool, + pub(crate) multipart_defer: bool, + pub(crate) multipart_subscription: bool, pub(crate) json: bool, pub(crate) wildcard: bool, } diff --git a/apollo-router/src/services/router_service.rs b/apollo-router/src/services/router_service.rs index e85819a2af..0fa61d3826 100644 --- a/apollo-router/src/services/router_service.rs +++ b/apollo-router/src/services/router_service.rs @@ -5,7 +5,6 @@ use std::task::Poll; use axum::body::StreamBody; use axum::response::*; -use bytes::Bytes; use futures::future::ready; use futures::future::BoxFuture; use futures::stream; @@ -43,10 +42,13 @@ use super::HasPlugins; use super::HasSchema; use super::SupergraphCreator; use super::MULTIPART_DEFER_CONTENT_TYPE; +use super::MULTIPART_SUBSCRIPTION_CONTENT_TYPE; use crate::cache::DeduplicatingCache; use crate::graphql; #[cfg(test)] use crate::plugin::test::MockSupergraphService; +use crate::protocols::multipart::Multipart; +use crate::protocols::multipart::ProtocolMode; use crate::query_planner::QueryPlanResult; use crate::router_factory::RouterFactory; use crate::services::layers::content_negociation::GRAPHQL_JSON_RESPONSE_HEADER_VALUE; @@ -287,7 +289,8 @@ impl Service for RouterService { let ClientRequestAccepts { wildcard: accepts_wildcard, json: accepts_json, - multipart: accepts_multipart, + multipart_defer: accepts_multipart_defer, + multipart_subscription: accepts_multipart_subscription, } = context .private_entries .lock() @@ -313,6 +316,7 @@ impl Service for RouterService { } Some(response) => { if !response.has_next.unwrap_or(false) + && !response.subscribed.unwrap_or(false) && (accepts_json || accepts_wildcard) { parts.headers.insert( @@ -329,44 +333,33 @@ impl Service for RouterService { context, }) }) - } else if accepts_multipart { - parts.headers.insert( - CONTENT_TYPE, - HeaderValue::from_static(MULTIPART_DEFER_CONTENT_TYPE), - ); - - // each chunk contains a response and the next delimiter, to let client parsers - // know that they can process the response right away - let mut first_buf = Vec::from( - &b"\r\n--graphql\r\ncontent-type: application/json\r\n\r\n"[..], - ); - serde_json::to_writer(&mut first_buf, &response)?; - if response.has_next.unwrap_or(false) { - first_buf.extend_from_slice(b"\r\n--graphql\r\n"); - } else { - first_buf.extend_from_slice(b"\r\n--graphql--\r\n"); + } else if accepts_multipart_defer || accepts_multipart_subscription { + if accepts_multipart_defer { + parts.headers.insert( + CONTENT_TYPE, + HeaderValue::from_static(MULTIPART_DEFER_CONTENT_TYPE), + ); + } else if accepts_multipart_subscription { + parts.headers.insert( + CONTENT_TYPE, + HeaderValue::from_static( + MULTIPART_SUBSCRIPTION_CONTENT_TYPE, + ), + ); } - - let body = once(ready(Ok(Bytes::from(first_buf)))).chain(body.map( - |res| { - let mut buf = Vec::from( - &b"content-type: application/json\r\n\r\n"[..], - ); - serde_json::to_writer(&mut buf, &res)?; - - // the last chunk has a different end delimiter - if res.has_next.unwrap_or(false) { - buf.extend_from_slice(b"\r\n--graphql\r\n"); - } else { - buf.extend_from_slice(b"\r\n--graphql--\r\n"); - } - - Ok::<_, BoxError>(buf.into()) - }, - )); + let multipart_stream = match response.subscribed { + Some(true) => StreamBody::new(Multipart::new( + body, + ProtocolMode::Subscription, + )), + _ => StreamBody::new(Multipart::new( + once(ready(response)).chain(body), + ProtocolMode::Defer, + )), + }; let response = - (parts, StreamBody::new(body)).into_response().map(|body| { + (parts, multipart_stream).into_response().map(|body| { // Axum makes this `body` have type: // https://docs.rs/http-body/0.4.5/http_body/combinators/struct.UnsyncBoxBody.html let mut body = Box::pin(body); diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback-2.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback-2.snap new file mode 100644 index 0000000000..58085506fc --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback-2.snap @@ -0,0 +1,28 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: stream.next_response().await.unwrap() +--- +{ + "data": { + "userWasCreated": { + "name": "test", + "activeOrganization": { + "id": "0", + "suborga": [ + { + "id": "1", + "name": "A" + }, + { + "id": "2", + "name": "B" + }, + { + "id": "3", + "name": "C" + } + ] + } + } + } +} diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback-3.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback-3.snap new file mode 100644 index 0000000000..dc0cc0735e --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback-3.snap @@ -0,0 +1,17 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: stream.next_response().await.unwrap() +--- +{ + "data": { + "userWasCreated": null + }, + "errors": [ + { + "message": "cannot fetch the name", + "extensions": { + "code": "INVALID" + } + } + ] +} diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback.snap new file mode 100644 index 0000000000..96ce82a848 --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback.snap @@ -0,0 +1,7 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: res +--- +{ + "data": null +} diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-2.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-2.snap new file mode 100644 index 0000000000..58085506fc --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-2.snap @@ -0,0 +1,28 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: stream.next_response().await.unwrap() +--- +{ + "data": { + "userWasCreated": { + "name": "test", + "activeOrganization": { + "id": "0", + "suborga": [ + { + "id": "1", + "name": "A" + }, + { + "id": "2", + "name": "B" + }, + { + "id": "3", + "name": "C" + } + ] + } + } + } +} diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-3.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-3.snap new file mode 100644 index 0000000000..dc0cc0735e --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-3.snap @@ -0,0 +1,17 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: stream.next_response().await.unwrap() +--- +{ + "data": { + "userWasCreated": null + }, + "errors": [ + { + "message": "cannot fetch the name", + "extensions": { + "code": "INVALID" + } + } + ] +} diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-4.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-4.snap new file mode 100644 index 0000000000..56196cf126 --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit-4.snap @@ -0,0 +1,15 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: res +--- +{ + "data": null, + "errors": [ + { + "message": "can't open new subscription, limit reached", + "extensions": { + "code": "SUBSCRIPTION_MAX_LIMIT" + } + } + ] +} diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit.snap new file mode 100644 index 0000000000..96ce82a848 --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_with_callback_with_limit.snap @@ -0,0 +1,7 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: res +--- +{ + "data": null +} diff --git a/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_without_header.snap b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_without_header.snap new file mode 100644 index 0000000000..966aa577aa --- /dev/null +++ b/apollo-router/src/services/snapshots/apollo_router__services__supergraph_service__tests__subscription_without_header.snap @@ -0,0 +1,14 @@ +--- +source: apollo-router/src/services/supergraph_service.rs +expression: res +--- +{ + "errors": [ + { + "message": "the router received a query with a subscription but the client does not accept multipart/mixed HTTP responses. To enable subscription support, add the HTTP header 'Accept: multipart/mixed; boundary=graphql; subscriptionSpec=1.0'", + "extensions": { + "code": "SUBSCRIPTION_BAD_HEADER" + } + } + ] +} diff --git a/apollo-router/src/services/subgraph.rs b/apollo-router/src/services/subgraph.rs index f9ad9e7a4b..e926bda188 100644 --- a/apollo-router/src/services/subgraph.rs +++ b/apollo-router/src/services/subgraph.rs @@ -2,17 +2,24 @@ use std::sync::Arc; +use futures::channel::mpsc; use http::StatusCode; +use http::Version; use serde_json_bytes::ByteString; use serde_json_bytes::Map as JsonMap; use serde_json_bytes::Value; +use sha2::Digest; +use sha2::Sha256; use static_assertions::assert_impl_all; +use tokio::sync::broadcast; use tower::BoxError; use crate::error::Error; use crate::graphql; use crate::json_ext::Object; use crate::json_ext::Path; +use crate::notification::HandleStream; +use crate::plugins::authentication::APOLLO_AUTHENTICATION_JWT_CLAIMS; use crate::query_planner::fetch::OperationKind; use crate::Context; @@ -31,6 +38,11 @@ pub struct Request { pub operation_kind: OperationKind, pub context: Context, + + /// Channel to send the subscription stream to listen on events coming from subgraph in a task + pub(crate) subscription_stream: Option>>, + /// Channel triggered when the client connection has been dropped + pub(crate) connection_closed_signal: Option>, } #[buildstructor::buildstructor] @@ -44,12 +56,16 @@ impl Request { subgraph_request: http::Request, operation_kind: OperationKind, context: Context, + subscription_stream: Option>>, + connection_closed_signal: Option>, ) -> Request { Self { supergraph_request, subgraph_request, operation_kind, context, + subscription_stream, + connection_closed_signal, } } @@ -64,12 +80,16 @@ impl Request { subgraph_request: Option>, operation_kind: Option, context: Option, + subscription_stream: Option>>, + connection_closed_signal: Option>, ) -> Request { Request::new( supergraph_request.unwrap_or_default(), subgraph_request.unwrap_or_default(), operation_kind.unwrap_or(OperationKind::Query), context.unwrap_or_default(), + subscription_stream, + connection_closed_signal, ) } } @@ -99,6 +119,11 @@ impl Clone for Request { subgraph_request, operation_kind: self.operation_kind, context: self.context.clone(), + subscription_stream: self.subscription_stream.clone(), + connection_closed_signal: self + .connection_closed_signal + .as_ref() + .map(|s| s.resubscribe()), } } } @@ -210,3 +235,64 @@ impl Response { )) } } + +impl Request { + #[allow(dead_code)] + pub(crate) fn to_sha256(&self) -> String { + let mut hasher = Sha256::new(); + let http_req = &self.subgraph_request; + hasher.update(http_req.method().as_str().as_bytes()); + + // To not allocate + let version = match http_req.version() { + Version::HTTP_09 => "HTTP/0.9", + Version::HTTP_10 => "HTTP/1.0", + Version::HTTP_11 => "HTTP/1.1", + Version::HTTP_2 => "HTTP/2.0", + Version::HTTP_3 => "HTTP/3.0", + _ => "unknown", + }; + hasher.update(version.as_bytes()); + let uri = http_req.uri(); + if let Some(scheme) = uri.scheme() { + hasher.update(scheme.as_str().as_bytes()); + } + if let Some(authority) = uri.authority() { + hasher.update(authority.as_str().as_bytes()); + } + if let Some(query) = uri.query() { + hasher.update(query.as_bytes()); + } + + // this assumes headers are in the same order + for (name, value) in http_req.headers() { + hasher.update(name.as_str().as_bytes()); + hasher.update(value.to_str().unwrap_or("ERROR").as_bytes()); + } + if let Some(claim) = self + .context + .get_json_value(APOLLO_AUTHENTICATION_JWT_CLAIMS) + { + hasher.update(format!("{claim:?}").as_bytes()); + } + let body = http_req.body(); + if let Some(operation_name) = &body.operation_name { + hasher.update(operation_name.as_bytes()); + } + if let Some(query) = &body.query { + hasher.update(query.as_bytes()); + } + for (var_name, var_value) in &body.variables { + hasher.update(var_name.inner()); + // TODO implement to_bytes() for value in serde_json_bytes + hasher.update(var_value.to_string().as_bytes()); + } + for (name, val) in &body.extensions { + hasher.update(name.inner()); + // TODO implement to_bytes() for value in serde_json_bytes + hasher.update(val.to_string().as_bytes()); + } + + hex::encode(hasher.finalize()) + } +} diff --git a/apollo-router/src/services/subgraph_service.rs b/apollo-router/src/services/subgraph_service.rs index 88f6cc5245..c9f65c61df 100644 --- a/apollo-router/src/services/subgraph_service.rs +++ b/apollo-router/src/services/subgraph_service.rs @@ -12,6 +12,8 @@ use async_compression::tokio::write::BrotliEncoder; use async_compression::tokio::write::GzipEncoder; use async_compression::tokio::write::ZlibEncoder; use futures::future::BoxFuture; +use futures::SinkExt; +use futures::StreamExt; use global::get_text_map_propagator; use http::header::ACCEPT; use http::header::ACCEPT_ENCODING; @@ -28,7 +30,11 @@ use mime::APPLICATION_JSON; use opentelemetry::global; use rustls::RootCertStore; use schemars::JsonSchema; +use serde::Serialize; use tokio::io::AsyncWriteExt; +use tokio_tungstenite::connect_async; +use tokio_tungstenite::connect_async_tls_with_config; +use tokio_tungstenite::tungstenite::client::IntoClientRequest; use tower::util::BoxService; use tower::BoxError; use tower::Service; @@ -38,17 +44,29 @@ use tower_http::decompression::Decompression; use tower_http::decompression::DecompressionLayer; use tracing::Instrument; use tracing_opentelemetry::OpenTelemetrySpanExt; +use uuid::Uuid; use super::layers::content_negociation::GRAPHQL_JSON_RESPONSE_HEADER_VALUE; use super::Plugins; use crate::error::FetchError; use crate::graphql; +use crate::json_ext::Object; +use crate::plugins::subscription::create_verifier; +use crate::plugins::subscription::CallbackMode; +use crate::plugins::subscription::SubscriptionConfig; +use crate::plugins::subscription::SubscriptionMode; +use crate::plugins::subscription::WebSocketConfiguration; +use crate::plugins::subscription::SUBSCRIPTION_WS_CUSTOM_CONNECTION_PARAMS; use crate::plugins::telemetry::LOGGING_DISPLAY_BODY; use crate::plugins::telemetry::LOGGING_DISPLAY_HEADERS; +use crate::protocols::websocket::convert_websocket_stream; +use crate::protocols::websocket::GraphqlWebSocket; +use crate::query_planner::OperationKind; use crate::services::layers::apq; use crate::services::SubgraphRequest; use crate::services::SubgraphResponse; use crate::Context; +use crate::Notify; const PERSISTED_QUERY_NOT_FOUND_EXTENSION_CODE: &str = "PERSISTED_QUERY_NOT_FOUND"; const PERSISTED_QUERY_NOT_SUPPORTED_EXTENSION_CODE: &str = "PERSISTED_QUERY_NOT_SUPPORTED"; @@ -90,6 +108,13 @@ impl Display for Compression { } } +#[derive(Serialize, Clone, Debug)] +struct SubscriptionExtension { + subscription_id: String, + callback_url: url::Url, + verifier: String, +} + /// Client for interacting with subgraphs. #[derive(Clone)] pub(crate) struct SubgraphService { @@ -106,6 +131,9 @@ pub(crate) struct SubgraphService { /// If a subgraph sends the error message PERSISTED_QUERY_NOT_SUPPORTED, /// apq is set to false apq: Arc, + /// Subscription config if enabled + subscription_config: Option, + notify: Notify, } impl SubgraphService { @@ -114,6 +142,8 @@ impl SubgraphService { enable_apq: bool, tls_cert_store: Option, enable_http2: bool, + subscription_config: Option, + notify: Notify, ) -> Self { let mut http_connector = HttpConnector::new(); http_connector.set_nodelay(true); @@ -146,6 +176,8 @@ impl SubgraphService { .service(hyper::Client::builder().build(connector)), service: Arc::new(service.into()), apq: Arc::new(::new(enable_apq)), + subscription_config, + notify, } } } @@ -162,21 +194,144 @@ impl tower::Service for SubgraphService { } fn call(&mut self, request: SubgraphRequest) -> Self::Future { + let subscription_config = (request.operation_kind == OperationKind::Subscription) + .then(|| self.subscription_config.clone()) + .flatten(); + let service_name = (*self.service).to_owned(); + + // Do it only for subscription to dedup them + let hashed_request = if request.operation_kind == OperationKind::Subscription { + let subscription_config = match &subscription_config { + Some(sub_cfg) => sub_cfg, + None => { + return Box::pin(async move { + Err(BoxError::from(FetchError::SubrequestWsError { + service: service_name, + reason: "subscription is not enabled".to_string(), + })) + }); + } + }; + if subscription_config.enable_deduplication { + request.to_sha256() + } else { + Uuid::new_v4().to_string() + } + } else { + String::new() + }; + let SubgraphRequest { subgraph_request, context, .. } = request.clone(); - let (_, body) = subgraph_request.into_parts(); + let (_, mut body) = subgraph_request.into_parts(); let clone = self.client.clone(); let client = std::mem::replace(&mut self.client, clone); - let service_name = (*self.service).to_owned(); let arc_apq_enabled = self.apq.clone(); + let mut notify = self.notify.clone(); let make_calls = async move { + // Subscription handling + if request.operation_kind == OperationKind::Subscription + && request.subscription_stream.is_some() + { + let subscription_config = + subscription_config.ok_or_else(|| FetchError::SubrequestHttpError { + service: service_name.clone(), + reason: "subscription is not enabled".to_string(), + status_code: None, + })?; + let mode = subscription_config.mode.get_subgraph_config(&service_name); + + match &mode { + Some(SubscriptionMode::Passthrough(ws_conf)) => { + // call_websocket for passthrough mode + return call_websocket( + notify, + request, + context, + service_name, + ws_conf, + hashed_request, + ) + .await; + } + Some(SubscriptionMode::Callback(CallbackMode { public_url, .. })) => { + // Hash the subgraph_request + let subscription_id = hashed_request; + + // Call create_or_subscribe on notify + let (handle, created) = notify + .create_or_subscribe(subscription_id.clone(), true) + .await?; + + // If it existed before just send the right stream (handle) and early return + let mut stream_tx = + request.subscription_stream.clone().ok_or_else(|| { + FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "cannot get the websocket stream".to_string(), + } + })?; + stream_tx.send(handle.into_stream()).await?; + + if !created { + tracing::info!( + monotonic_counter.apollo_router_deduplicated_subscriptions_total = 1u64, + mode = %"callback", + ); + // Dedup happens here + return Ok(SubgraphResponse::builder() + .context(context) + .extensions(Object::default()) + .build()); + } + + // If not then put the subscription_id in the extensions for callback mode and continue + // Do this if the topic doesn't already exist + let callback_url = + public_url.join(&format!("/callback/{subscription_id}"))?; + // Generate verifier + let verifier = create_verifier(&subscription_id).map_err(|err| { + FetchError::SubrequestHttpError { + service: service_name.clone(), + reason: format!("{err:?}"), + status_code: None, + } + })?; + + let subscription_extension = SubscriptionExtension { + subscription_id, + callback_url, + verifier, + }; + body.extensions.insert( + "subscription", + serde_json_bytes::to_value(subscription_extension).map_err(|_err| { + FetchError::SubrequestHttpError { + service: service_name.clone(), + reason: String::from( + "cannot serialize the subscription extension", + ), + status_code: None, + } + })?, + ); + } + _ => { + return Err(Box::new(FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "subscription mode is not enabled".to_string(), + })); + } + } + } + // If APQ is not enabled, simply make the graphql call // with the same request body. let apq_enabled = arc_apq_enabled.as_ref(); @@ -242,6 +397,124 @@ impl tower::Service for SubgraphService { } } +/// call websocket makes websocket calls with modified graphql::Request (body) +async fn call_websocket( + mut notify: Notify, + request: SubgraphRequest, + context: Context, + service_name: String, + subgraph_cfg: &WebSocketConfiguration, + subscription_hash: String, +) -> Result { + let SubgraphRequest { + subgraph_request, + subscription_stream, + .. + } = request; + let mut subscription_stream_tx = + subscription_stream.ok_or_else(|| FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "cannot get the websocket stream".to_string(), + })?; + + let (handle, created) = notify + .create_or_subscribe(subscription_hash.clone(), false) + .await?; + if !created { + subscription_stream_tx.send(handle.into_stream()).await?; + tracing::info!( + monotonic_counter.apollo_router_deduplicated_subscriptions_total = 1u64, + mode = %"passthrough", + ); + + // Dedup happens here + return Ok(SubgraphResponse::builder() + .context(context) + .extensions(Object::default()) + .build()); + } + + let (parts, body) = subgraph_request.into_parts(); + + // Check context key and Authorization header (context key takes precedence) to set connection params if needed + let connection_params = match ( + context.get_json_value(SUBSCRIPTION_WS_CUSTOM_CONNECTION_PARAMS), + parts + .headers + .get(http::header::AUTHORIZATION) + .and_then(|auth| auth.to_str().ok()), + ) { + (Some(connection_params), _) => Some(connection_params), + (None, Some(authorization)) => Some(serde_json_bytes::json!({ "token": authorization })), + _ => None, + }; + + let request = get_websocket_request(service_name.clone(), parts, subgraph_cfg)?; + let display_headers = context.contains_key(LOGGING_DISPLAY_HEADERS); + let display_body = context.contains_key(LOGGING_DISPLAY_BODY); + if display_headers { + tracing::info!(http.request.headers = ?request.headers(), apollo.subgraph.name = %service_name, "Websocket request headers to subgraph {service_name:?}"); + } + if display_body { + tracing::info!(http.request.body = ?request.body(), apollo.subgraph.name = %service_name, "Websocket request body to subgraph {service_name:?}"); + } + + let (ws_stream, mut resp) = match request.uri().scheme_str() { + Some("wss") => connect_async_tls_with_config(request, None, None).await, + _ => connect_async(request).await, + } + .map_err(|err| FetchError::SubrequestWsError { + service: service_name.clone(), + reason: format!("cannot connect websocket to subgraph: {err}"), + })?; + + if display_body { + tracing::info!( + response.body = %String::from_utf8_lossy(&resp.body_mut().take().unwrap_or_default()), apollo.subgraph.name = %service_name, "Raw response body from subgraph {service_name:?} received" + ); + } + + let mut gql_stream = GraphqlWebSocket::new( + convert_websocket_stream(ws_stream, subscription_hash.clone()), + subscription_hash, + subgraph_cfg.protocol, + connection_params, + ) + .await + .map_err(|_| FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "cannot get the GraphQL websocket stream".to_string(), + })?; + + gql_stream + .send(body) + .await + .map_err(|err| FetchError::SubrequestWsError { + service: service_name, + reason: format!("cannot send the subgraph request to websocket stream: {err:?}"), + })?; + let (mut gql_sink, gql_stream) = gql_stream.split(); + let (handle_sink, handle_stream) = handle.split(); + + tokio::task::spawn(async move { + let _ = gql_stream + .map(Ok::<_, graphql::Error>) + .forward(handle_sink) + .await; + + if let Err(err) = gql_sink.close().await { + tracing::trace!("cannot close the websocket stream: {err:?}"); + } + }); + + subscription_stream_tx.send(handle_stream).await?; + + Ok(SubgraphResponse::new_from_response( + resp.map(|_| graphql::Response::default()), + context, + )) +} + /// call_http makes http calls with modified graphql::Request (body) async fn call_http( request: SubgraphRequest, @@ -424,6 +697,59 @@ async fn call_http( Ok(SubgraphResponse::new_from_response(resp, context)) } +fn get_websocket_request( + service_name: String, + mut parts: http::request::Parts, + subgraph_ws_cfg: &WebSocketConfiguration, +) -> Result, FetchError> { + let mut subgraph_url = url::Url::parse(&parts.uri.to_string()).map_err(|err| { + tracing::error!("cannot parse subgraph url {}: {err:?}", parts.uri); + FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "cannot parse subgraph url".to_string(), + } + })?; + let new_scheme = match subgraph_url.scheme() { + "http" => "ws", + "https" => "wss", + _ => "ws", + }; + subgraph_url.set_scheme(new_scheme).map_err(|err| { + tracing::error!("cannot set a scheme '{new_scheme}' on subgraph url: {err:?}"); + + FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "cannot set a scheme on websocket url".to_string(), + } + })?; + + let subgraph_url = match &subgraph_ws_cfg.path { + Some(path) => subgraph_url + .join(path) + .map_err(|_| FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "cannot parse subgraph url with the specific websocket path".to_string(), + })?, + None => subgraph_url, + }; + let mut request = subgraph_url.into_client_request().map_err(|err| { + tracing::error!("cannot create websocket client request: {err:?}"); + + FetchError::SubrequestWsError { + service: service_name.clone(), + reason: "cannot create websocket client request".to_string(), + } + })?; + request.headers_mut().insert( + http::header::SEC_WEBSOCKET_PROTOCOL, + subgraph_ws_cfg.protocol.into(), + ); + parts.headers.extend(request.headers_mut().drain()); + *request.headers_mut() = parts.headers; + + Ok(request) +} + fn get_apq_error(gql_response: &graphql::Response) -> APQError { for error in &gql_response.errors { // Check if error message is an APQ error @@ -543,8 +869,16 @@ mod tests { use std::net::SocketAddr; use std::str::FromStr; + use axum::extract::ws::Message; + use axum::extract::ConnectInfo; + use axum::extract::WebSocketUpgrade; + use axum::response::IntoResponse; + use axum::routing::get; + use axum::Router; use axum::Server; use bytes::Buf; + use futures::channel::mpsc; + use futures::StreamExt; use http::header::HOST; use http::StatusCode; use http::Uri; @@ -560,6 +894,11 @@ mod tests { use crate::graphql::Error; use crate::graphql::Request; use crate::graphql::Response; + use crate::plugins::subscription::SubgraphPassthroughMode; + use crate::plugins::subscription::SubscriptionModeConfig; + use crate::protocols::websocket::ClientMessage; + use crate::protocols::websocket::ServerMessage; + use crate::protocols::websocket::WebSocketProtocol; use crate::query_planner::fetch::OperationKind; use crate::Context; @@ -981,11 +1320,209 @@ mod tests { server.await.unwrap(); } + async fn emulate_correct_websocket_server(socket_addr: SocketAddr) { + async fn ws_handler( + ws: WebSocketUpgrade, + ConnectInfo(_addr): ConnectInfo, + ) -> Result { + // finalize the upgrade process by returning upgrade callback. + // we can customize the callback by sending additional info such as address. + let res = ws.on_upgrade(move |mut socket| async move { + let connection_ack = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let ack_msg: ClientMessage = serde_json::from_str(&connection_ack).unwrap(); + assert!(matches!(ack_msg, ClientMessage::ConnectionInit { .. })); + + socket + .send(Message::Text( + serde_json::to_string(&ServerMessage::ConnectionAck).unwrap(), + )) + .await + .unwrap(); + let new_message = socket.recv().await.unwrap().unwrap().into_text().unwrap(); + let subscribe_msg: ClientMessage = serde_json::from_str(&new_message).unwrap(); + assert!(matches!(subscribe_msg, ClientMessage::Subscribe { .. })); + let client_id = if let ClientMessage::Subscribe { payload, id } = subscribe_msg { + assert_eq!( + payload, + Request::builder() + .query("subscription {\n userWasCreated {\n username\n }\n}") + .build() + ); + + id + } else { + panic!("subscribe message should be sent"); + }; + + socket + .send(Message::Text( + serde_json::to_string(&ServerMessage::Next { id: client_id, payload: graphql::Response::builder().data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})).build() }).unwrap(), + )) + .await + .unwrap(); + }); + + Ok(res) + } + + let app = Router::new().route("/ws", get(ws_handler)); + let server = Server::bind(&socket_addr) + .serve(app.into_make_service_with_connect_info::()); + server.await.unwrap(); + } + + async fn emulate_incorrect_websocket_server(socket_addr: SocketAddr) { + async fn ws_handler( + _ws: WebSocketUpgrade, + ConnectInfo(_addr): ConnectInfo, + ) -> Result { + Ok((http::StatusCode::BAD_REQUEST, "bad request")) + } + + let app = Router::new().route("/ws", get(ws_handler)); + let server = Server::bind(&socket_addr) + .serve(app.into_make_service_with_connect_info::()); + server.await.unwrap(); + } + + fn subscription_config() -> SubscriptionConfig { + SubscriptionConfig { + mode: SubscriptionModeConfig { + callback: None, + passthrough: Some(SubgraphPassthroughMode { + all: None, + subgraphs: [( + "test".to_string(), + WebSocketConfiguration { + path: Some(String::from("/ws")), + protocol: WebSocketProtocol::default(), + }, + )] + .into(), + }), + }, + enable_deduplication: true, + max_opened_subscriptions: None, + queue_capacity: None, + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_subgraph_service_websocket() { + let socket_addr = SocketAddr::from_str("127.0.0.1:2222").unwrap(); + let spawned_task = tokio::task::spawn(emulate_correct_websocket_server(socket_addr)); + let subgraph_service = SubgraphService::new( + "test", + true, + None, + false, + subscription_config().into(), + Notify::builder().build(), + ); + let (tx, mut rx) = mpsc::channel(2); + + let url = Uri::from_str(&format!("ws://{socket_addr}")).unwrap(); + let response = subgraph_service + .oneshot(SubgraphRequest { + supergraph_request: Arc::new( + http::Request::builder() + .header(HOST, "host") + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .body( + Request::builder() + .query("subscription {\n userWasCreated {\n username\n }\n}") + .build(), + ) + .expect("expecting valid request"), + ), + subgraph_request: http::Request::builder() + .header(HOST, "rhost") + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .uri(url) + .body( + Request::builder() + .query("subscription {\n userWasCreated {\n username\n }\n}") + .build(), + ) + .expect("expecting valid request"), + operation_kind: OperationKind::Subscription, + context: Context::new(), + subscription_stream: Some(tx), + connection_closed_signal: None, + }) + .await + .unwrap(); + assert!(response.response.body().errors.is_empty()); + + let mut gql_stream = rx.next().await.unwrap(); + let message = gql_stream.next().await.unwrap(); + assert_eq!( + message, + graphql::Response::builder() + .subscribed(true) + .data(serde_json_bytes::json!({"userWasCreated": {"username": "ada_lovelace"}})) + .build() + ); + spawned_task.abort(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_subgraph_service_websocket_with_error() { + let socket_addr = SocketAddr::from_str("127.0.0.1:2323").unwrap(); + tokio::task::spawn(emulate_incorrect_websocket_server(socket_addr)); + let subgraph_service = SubgraphService::new( + "test", + true, + None, + false, + subscription_config().into(), + Notify::builder().build(), + ); + let (tx, _rx) = mpsc::channel(2); + + let url = Uri::from_str(&format!("ws://{socket_addr}")).unwrap(); + let err = subgraph_service + .oneshot(SubgraphRequest { + supergraph_request: Arc::new( + http::Request::builder() + .header(HOST, "host") + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .body( + Request::builder() + .query("subscription {\n userWasCreated {\n username\n }\n}") + .build(), + ) + .expect("expecting valid request"), + ), + subgraph_request: http::Request::builder() + .header(HOST, "rhost") + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .uri(url) + .body( + Request::builder() + .query("subscription {\n userWasCreated {\n username\n }\n}") + .build(), + ) + .expect("expecting valid request"), + operation_kind: OperationKind::Subscription, + context: Context::new(), + subscription_stream: Some(tx), + connection_closed_signal: None, + }) + .await + .unwrap_err(); + assert_eq!( + err.to_string(), + "Websocket fetch failed from 'test': cannot connect websocket to subgraph: HTTP error: 400 Bad Request".to_string() + ); + } + #[tokio::test(flavor = "multi_thread")] async fn test_bad_status_code_should_not_fail() { let socket_addr = SocketAddr::from_str("127.0.0.1:2626").unwrap(); tokio::task::spawn(emulate_subgraph_bad_request(socket_addr)); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let response = subgraph_service @@ -1005,6 +1542,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); @@ -1019,7 +1558,8 @@ mod tests { let socket_addr = SocketAddr::from_str("127.0.0.1:2525").unwrap(); tokio::task::spawn(emulate_subgraph_bad_response_format(socket_addr)); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let err = subgraph_service @@ -1039,6 +1579,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap_err(); @@ -1052,7 +1594,8 @@ mod tests { async fn test_compressed_request_response_body() { let socket_addr = SocketAddr::from_str("127.0.0.1:2727").unwrap(); tokio::task::spawn(emulate_subgraph_compressed_response(socket_addr)); - let subgraph_service = SubgraphService::new("test", false, None, true); + let subgraph_service = + SubgraphService::new("test", false, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let resp = subgraph_service @@ -1073,6 +1616,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); @@ -1089,7 +1634,8 @@ mod tests { async fn test_unauthorized() { let socket_addr = SocketAddr::from_str("127.0.0.1:2828").unwrap(); tokio::task::spawn(emulate_subgraph_unauthorized(socket_addr)); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let err = subgraph_service @@ -1109,6 +1655,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap_err(); @@ -1122,7 +1670,8 @@ mod tests { async fn test_persisted_query_not_supported_message() { let socket_addr = SocketAddr::from_str("127.0.0.1:2929").unwrap(); tokio::task::spawn(emulate_persisted_query_not_supported_message(socket_addr)); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); assert!(subgraph_service.clone().apq.as_ref().load(Relaxed)); @@ -1145,6 +1694,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); @@ -1164,7 +1715,8 @@ mod tests { tokio::task::spawn(emulate_persisted_query_not_supported_extension_code( socket_addr, )); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); assert!(subgraph_service.clone().apq.as_ref().load(Relaxed)); @@ -1187,6 +1739,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); @@ -1204,7 +1758,8 @@ mod tests { async fn test_persisted_query_not_found_message() { let socket_addr = SocketAddr::from_str("127.0.0.1:3131").unwrap(); tokio::task::spawn(emulate_persisted_query_not_found_message(socket_addr)); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let resp = subgraph_service @@ -1225,6 +1780,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); @@ -1243,7 +1800,8 @@ mod tests { tokio::task::spawn(emulate_persisted_query_not_found_extension_code( socket_addr, )); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let resp = subgraph_service @@ -1264,6 +1822,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); @@ -1280,7 +1840,8 @@ mod tests { async fn test_apq_enabled_subgraph_configuration() { let socket_addr = SocketAddr::from_str("127.0.0.1:3333").unwrap(); tokio::task::spawn(emulate_expected_apq_enabled_configuration(socket_addr)); - let subgraph_service = SubgraphService::new("test", true, None, true); + let subgraph_service = + SubgraphService::new("test", true, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let resp = subgraph_service @@ -1301,6 +1862,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); @@ -1317,7 +1880,8 @@ mod tests { async fn test_apq_disabled_subgraph_configuration() { let socket_addr = SocketAddr::from_str("127.0.0.1:3434").unwrap(); tokio::task::spawn(emulate_expected_apq_disabled_configuration(socket_addr)); - let subgraph_service = SubgraphService::new("test", false, None, true); + let subgraph_service = + SubgraphService::new("test", false, None, true, None, Notify::default()); let url = Uri::from_str(&format!("http://{socket_addr}")).unwrap(); let resp = subgraph_service @@ -1338,6 +1902,8 @@ mod tests { .expect("expecting valid request"), operation_kind: OperationKind::Query, context: Context::new(), + subscription_stream: None, + connection_closed_signal: None, }) .await .unwrap(); diff --git a/apollo-router/src/services/supergraph_service.rs b/apollo-router/src/services/supergraph_service.rs index cfde2e27a1..c3da8c7185 100644 --- a/apollo-router/src/services/supergraph_service.rs +++ b/apollo-router/src/services/supergraph_service.rs @@ -194,9 +194,11 @@ async fn service_call( Some(QueryPlannerContent::Plan { plan }) => { let operation_name = body.operation_name.clone(); let is_deferred = plan.is_deferred(operation_name.as_deref(), &variables); + let is_subscription = plan.is_subscription(operation_name.as_deref()); let ClientRequestAccepts { - multipart: accepts_multipart, + multipart_defer: accepts_multipart_defer, + multipart_subscription: accepts_multipart_subscription, .. } = context .private_entries @@ -205,13 +207,24 @@ async fn service_call( .cloned() .unwrap_or_default(); - if is_deferred && !accepts_multipart { - let mut response = SupergraphResponse::new_from_graphql_response(graphql::Response::builder() - .errors(vec![crate::error::Error::builder() - .message(String::from("the router received a query with the @defer directive but the client does not accept multipart/mixed HTTP responses. To enable @defer support, add the HTTP header 'Accept: multipart/mixed; deferSpec=20220824'")) - .extension_code("DEFER_BAD_HEADER") - .build()]) - .build(), context); + if (is_deferred || is_subscription) + && !accepts_multipart_defer + && !accepts_multipart_subscription + { + let (error_message, error_code) = if is_deferred { + (String::from("the router received a query with the @defer directive but the client does not accept multipart/mixed HTTP responses. To enable @defer support, add the HTTP header 'Accept: multipart/mixed; deferSpec=20220824'"), "DEFER_BAD_HEADER") + } else { + (String::from("the router received a query with a subscription but the client does not accept multipart/mixed HTTP responses. To enable subscription support, add the HTTP header 'Accept: multipart/mixed; boundary=graphql; subscriptionSpec=1.0'"), "SUBSCRIPTION_BAD_HEADER") + }; + let mut response = SupergraphResponse::new_from_graphql_response( + graphql::Response::builder() + .errors(vec![crate::error::Error::builder() + .message(error_message) + .extension_code(error_code) + .build()]) + .build(), + context, + ); *response.response.status_mut() = StatusCode::NOT_ACCEPTABLE; Ok(response) } else if let Some(err) = plan.query.validate_variables(body, &schema).err() { @@ -470,11 +483,14 @@ impl SupergraphCreator { #[cfg(test)] mod tests { + use std::collections::HashMap; + use std::time::Duration; use super::*; use crate::plugin::test::MockSubgraph; use crate::services::supergraph; use crate::test_harness::MockedSubgraphs; + use crate::Notify; use crate::TestHarness; const SCHEMA: &str = r#"schema @@ -498,6 +514,11 @@ mod tests { type Query { currentUser: User @join__field(graph: USER) } + + type Subscription @join__type(graph: USER) { + userWasCreated: User + } + type User @join__owner(graph: USER) @join__type(graph: ORGA, key: "id") @@ -989,6 +1010,198 @@ mod tests { insta::assert_json_snapshot!(stream.next_response().await.unwrap()); } + #[tokio::test] + async fn subscription_with_callback() { + let mut notify = Notify::builder().build(); + let (handle, _) = notify + .create_or_subscribe("TEST_TOPIC".to_string(), false) + .await + .unwrap(); + let subgraphs = MockedSubgraphs([ + ("user", MockSubgraph::builder().with_json( + serde_json::json!{{"query":"subscription{userWasCreated{name activeOrganization{__typename id}}}"}}, + serde_json::json!{{"data": {"userWasCreated": { "__typename": "User", "id": "1", "activeOrganization": { "__typename": "Organization", "id": "0" } }}}} + ).with_subscription_stream(handle.clone()).build()), + ("orga", MockSubgraph::builder().with_json( + serde_json::json!{{ + "query":"query($representations:[_Any!]!){_entities(representations:$representations){...on Organization{suborga{id name}}}}", + "variables": { + "representations":[{"__typename": "Organization", "id":"0"}] + } + }}, + serde_json::json!{{ + "data": { + "_entities": [{ "suborga": [ + { "__typename": "Organization", "id": "1", "name": "A"}, + { "__typename": "Organization", "id": "2", "name": "B"}, + { "__typename": "Organization", "id": "3", "name": "C"}, + ] }] + }, + }} + ).build()) + ].into_iter().collect()); + + let mut configuration: Configuration = serde_json::from_value(serde_json::json!({"include_subgraph_errors": { "all": true }, "subscription": { "mode": {"preview_callback": {"public_url": "http://localhost:4545"}}}})).unwrap(); + configuration.notify = notify.clone(); + let service = TestHarness::builder() + .configuration(Arc::new(configuration)) + .schema(SCHEMA) + .extra_plugin(subgraphs) + .build_supergraph() + .await + .unwrap(); + + let request = supergraph::Request::fake_builder() + .query( + "subscription { userWasCreated { name activeOrganization { id suborga { id name } } } }", + ) + .context(subscription_context()) + .build() + .unwrap(); + let mut stream = service.oneshot(request).await.unwrap(); + let res = stream.next_response().await.unwrap(); + assert_eq!(&res.data, &Some(serde_json_bytes::Value::Null)); + insta::assert_json_snapshot!(res); + notify.broadcast(graphql::Response::builder().data(serde_json_bytes::json!({"userWasCreated": { "name": "test", "activeOrganization": { "__typename": "Organization", "id": "0" }}})).build()).await.unwrap(); + insta::assert_json_snapshot!(stream.next_response().await.unwrap()); + // error happened + notify + .broadcast( + graphql::Response::builder() + .error( + graphql::Error::builder() + .message("cannot fetch the name") + .extension_code("INVALID") + .build(), + ) + .build(), + ) + .await + .unwrap(); + insta::assert_json_snapshot!(stream.next_response().await.unwrap()); + } + + #[tokio::test] + async fn subscription_with_callback_with_limit() { + let mut notify = Notify::builder().build(); + let (handle, _) = notify + .create_or_subscribe("TEST_TOPIC".to_string(), false) + .await + .unwrap(); + let subgraphs = MockedSubgraphs([ + ("user", MockSubgraph::builder().with_json( + serde_json::json!{{"query":"subscription{userWasCreated{name activeOrganization{__typename id}}}"}}, + serde_json::json!{{"data": {"userWasCreated": { "__typename": "User", "id": "1", "activeOrganization": { "__typename": "Organization", "id": "0" } }}}} + ).with_subscription_stream(handle.clone()).build()), + ("orga", MockSubgraph::builder().with_json( + serde_json::json!{{ + "query":"query($representations:[_Any!]!){_entities(representations:$representations){...on Organization{suborga{id name}}}}", + "variables": { + "representations":[{"__typename": "Organization", "id":"0"}] + } + }}, + serde_json::json!{{ + "data": { + "_entities": [{ "suborga": [ + { "__typename": "Organization", "id": "1", "name": "A"}, + { "__typename": "Organization", "id": "2", "name": "B"}, + { "__typename": "Organization", "id": "3", "name": "C"}, + ] }] + }, + }} + ).build()) + ].into_iter().collect()); + + let mut configuration: Configuration = serde_json::from_value(serde_json::json!({"include_subgraph_errors": { "all": true }, "subscription": { "max_opened_subscriptions": 1, "mode": {"preview_callback": {"public_url": "http://localhost:4545"}}}})).unwrap(); + configuration.notify = notify.clone(); + let mut service = TestHarness::builder() + .configuration(Arc::new(configuration)) + .schema(SCHEMA) + .extra_plugin(subgraphs) + .build_supergraph() + .await + .unwrap(); + + let request = supergraph::Request::fake_builder() + .query( + "subscription { userWasCreated { name activeOrganization { id suborga { id name } } } }", + ) + .context(subscription_context()) + .build() + .unwrap(); + let mut stream = service.ready().await.unwrap().call(request).await.unwrap(); + let res = stream.next_response().await.unwrap(); + assert_eq!(&res.data, &Some(serde_json_bytes::Value::Null)); + assert!(res.errors.is_empty()); + insta::assert_json_snapshot!(res); + notify.broadcast(graphql::Response::builder().data(serde_json_bytes::json!({"userWasCreated": { "name": "test", "activeOrganization": { "__typename": "Organization", "id": "0" }}})).build()).await.unwrap(); + insta::assert_json_snapshot!(stream.next_response().await.unwrap()); + // error happened + notify + .broadcast( + graphql::Response::builder() + .error( + graphql::Error::builder() + .message("cannot fetch the name") + .extension_code("INVALID") + .build(), + ) + .build(), + ) + .await + .unwrap(); + insta::assert_json_snapshot!(stream.next_response().await.unwrap()); + let request = supergraph::Request::fake_builder() + .query( + "subscription { userWasCreated { name activeOrganization { id suborga { id name } } } }", + ) + .context(subscription_context()) + .build() + .unwrap(); + let mut stream_2 = service.ready().await.unwrap().call(request).await.unwrap(); + let res = stream_2.next_response().await.unwrap(); + assert!(!res.errors.is_empty()); + insta::assert_json_snapshot!(res); + drop(stream); + drop(stream_2); + let request = supergraph::Request::fake_builder() + .query( + "subscription { userWasCreated { name activeOrganization { id suborga { id name } } } }", + ) + .context(subscription_context()) + .build() + .unwrap(); + // Wait a bit to ensure all the closed signals has been triggered + tokio::time::sleep(Duration::from_millis(100)).await; + let mut stream_2 = service.ready().await.unwrap().call(request).await.unwrap(); + let res = stream_2.next_response().await.unwrap(); + assert!(res.errors.is_empty()); + } + + #[tokio::test] + async fn subscription_without_header() { + let subgraphs = MockedSubgraphs(HashMap::new()); + let configuration: Configuration = serde_json::from_value(serde_json::json!({"include_subgraph_errors": { "all": true }, "subscription": { "mode": {"preview_callback": {"public_url": "http://localhost:4545"}}}})).unwrap(); + let service = TestHarness::builder() + .configuration(Arc::new(configuration)) + .schema(SCHEMA) + .extra_plugin(subgraphs) + .build_supergraph() + .await + .unwrap(); + + let request = supergraph::Request::fake_builder() + .query( + "subscription { userWasCreated { name activeOrganization { id suborga { id name } } } }", + ) + .build() + .unwrap(); + + let mut stream = service.oneshot(request).await.unwrap(); + let res = stream.next_response().await.unwrap(); + insta::assert_json_snapshot!(res); + } + #[tokio::test] async fn root_typename_with_defer_and_empty_first_response() { let subgraphs = MockedSubgraphs([ @@ -1494,10 +1707,20 @@ mod tests { insta::assert_json_snapshot!(stream.next_response().await.unwrap()); } + fn subscription_context() -> Context { + let context = Context::new(); + context.private_entries.lock().insert(ClientRequestAccepts { + multipart_subscription: true, + ..Default::default() + }); + + context + } + fn defer_context() -> Context { let context = Context::new(); context.private_entries.lock().insert(ClientRequestAccepts { - multipart: true, + multipart_defer: true, ..Default::default() }); @@ -1765,7 +1988,6 @@ mod tests { let mut stream = service.oneshot(request).await.unwrap(); let response = stream.next_response().await.unwrap(); - println!("{response:?}"); assert_eq!( serde_json::to_value(&response.data).unwrap(), diff --git a/apollo-router/src/spec/query.rs b/apollo-router/src/spec/query.rs index 3d1d31dea3..86c1940a2a 100644 --- a/apollo-router/src/spec/query.rs +++ b/apollo-router/src/spec/query.rs @@ -1089,9 +1089,6 @@ impl Operation { ) -> Result { let name = operation.name().map(|s| s.to_owned()); let kind = operation.operation_ty().into(); - if kind == OperationKind::Subscription { - return Err(SpecError::SubscriptionNotSupported); - } let current_field_type = FieldType::new_named(schema.root_operation_name(kind)); let selection_set = operation .selection_set() diff --git a/apollo-router/src/spec/schema.rs b/apollo-router/src/spec/schema.rs index ec42d1a512..8111b3e215 100644 --- a/apollo-router/src/spec/schema.rs +++ b/apollo-router/src/spec/schema.rs @@ -145,6 +145,11 @@ impl Schema { self.subgraphs.iter() } + /// Return the subgraph URI given the service name + pub(crate) fn subgraph_url(&self, service_name: &str) -> Option<&Uri> { + self.subgraphs.get(service_name) + } + pub(crate) fn api_schema(&self) -> &Schema { match &self.api_schema { Some(schema) => schema, diff --git a/apollo-router/src/uplink/license_enforcement.rs b/apollo-router/src/uplink/license_enforcement.rs index e33e7e358d..a4d6421b48 100644 --- a/apollo-router/src/uplink/license_enforcement.rs +++ b/apollo-router/src/uplink/license_enforcement.rs @@ -154,6 +154,10 @@ impl LicenseEnforcementReport { .path("$.traffic_shaping..experimental_entity_caching") .name("Subgraph entity caching") .build(), + ConfigurationRestriction::builder() + .path("$.subscription") + .name("Federated subscriptions") + .build(), // Per-operation limits are restricted but parser limits like `parser_max_recursion` // where the Router only configures apollo-rs are not. ConfigurationRestriction::builder() diff --git a/apollo-router/src/uplink/snapshots/apollo_router__uplink__license_enforcement__test__restricted_features_via_config.snap b/apollo-router/src/uplink/snapshots/apollo_router__uplink__license_enforcement__test__restricted_features_via_config.snap index 8b0e81a535..fd01c309ab 100644 --- a/apollo-router/src/uplink/snapshots/apollo_router__uplink__license_enforcement__test__restricted_features_via_config.snap +++ b/apollo-router/src/uplink/snapshots/apollo_router__uplink__license_enforcement__test__restricted_features_via_config.snap @@ -24,6 +24,9 @@ Configuration yaml: * Subgraph entity caching .traffic_shaping..experimental_entity_caching +* Federated subscriptions + .subscription + * Operation depth limiting .preview_operation_limits.max_depth diff --git a/apollo-router/src/uplink/testdata/restricted.router.yaml b/apollo-router/src/uplink/testdata/restricted.router.yaml index 93280ed7d2..177b357121 100644 --- a/apollo-router/src/uplink/testdata/restricted.router.yaml +++ b/apollo-router/src/uplink/testdata/restricted.router.yaml @@ -44,6 +44,17 @@ traffic_shaping: urls: - https://example.com +subscription: + mode: + passthrough: + subgraphs: + reviews: + path: /graphql + preview_callback: + public_url: "http://localhost:4040/" + listen: 0.0.0.0:4040 + subgraphs: + - atestsubgraph plugins: experimental.restricted: enabled: true diff --git a/apollo-router/templates/sandbox_index.html b/apollo-router/templates/sandbox_index.html index d6fa57f4c0..2e97cc0dba 100644 --- a/apollo-router/templates/sandbox_index.html +++ b/apollo-router/templates/sandbox_index.html @@ -57,6 +57,7 @@

Welcome to the Apollo Router

new window.EmbeddedSandbox({ target: '#embeddableSandbox', initialEndpoint, + initialSubscriptionEndpoint: initialEndpoint, initialRequestQueryPlan: true, initialState: { includeCookies: true, diff --git a/apollo-router/testing_schema.graphql b/apollo-router/testing_schema.graphql index 04295346bd..b26ac30848 100644 --- a/apollo-router/testing_schema.graphql +++ b/apollo-router/testing_schema.graphql @@ -30,6 +30,10 @@ type Mutation { createReview(body: String, id: ID!, upc: ID!): Review @join__field(graph: REVIEWS) } +type Subscription { + userWasCreated: User @join__field(graph: ACCOUNTS) +} + type Product @join__owner(graph: PRODUCTS) @join__type(graph: PRODUCTS, key: "upc") diff --git a/apollo-router/tests/common.rs b/apollo-router/tests/common.rs index 930b3f25eb..57cc0e259b 100644 --- a/apollo-router/tests/common.rs +++ b/apollo-router/tests/common.rs @@ -349,6 +349,45 @@ impl IntegrationTest { } } + #[allow(dead_code)] + pub async fn run_subscription(&self, subscription: &str) -> (String, reqwest::Response) { + assert!( + self.router.is_some(), + "router was not started, call `router.start().await; router.assert_started().await`" + ); + let client = reqwest::Client::new(); + let id = Uuid::new_v4().to_string(); + let span = info_span!("client_request", unit_test = id.as_str()); + let _span_guard = span.enter(); + + let mut request = client + .post("http://localhost:4000") + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .header( + ACCEPT, + "multipart/mixed;boundary=\"graphql\";subscriptionSpec=1.0", + ) + .header("apollographql-client-name", "custom_name") + .header("apollographql-client-version", "1.0") + .json(&json!({"query":subscription,"variables":{}})) + .build() + .unwrap(); + + global::get_text_map_propagator(|propagator| { + propagator.inject_context( + &span.context(), + &mut opentelemetry_http::HeaderInjector(request.headers_mut()), + ); + }); + + match client.execute(request).await { + Ok(response) => (id, response), + Err(err) => { + panic!("unable to send successful request to router, {err}") + } + } + } + #[allow(dead_code)] pub async fn get_metrics_response(&self) -> reqwest::Result { let client = reqwest::Client::new(); diff --git a/apollo-router/tests/fixtures/subscription.router.yaml b/apollo-router/tests/fixtures/subscription.router.yaml new file mode 100644 index 0000000000..74738bde4f --- /dev/null +++ b/apollo-router/tests/fixtures/subscription.router.yaml @@ -0,0 +1,26 @@ +supergraph: + listen: 127.0.0.1:4000 + path: / + introspection: true +homepage: + enabled: false +sandbox: + enabled: true +override_subgraph_url: + accounts: "http://localhost:4041" +include_subgraph_errors: + all: true +subscription: + mode: + passthrough: + all: + path: /ws + subgraphs: + rng: + path: /ws + protocol: graphql_transport_ws +headers: + all: # Header rules for all subgraphs + request: + - propagate: + named: custom_id \ No newline at end of file diff --git a/apollo-router/tests/subscription_load_test.rs b/apollo-router/tests/subscription_load_test.rs new file mode 100644 index 0000000000..1e7b023ce1 --- /dev/null +++ b/apollo-router/tests/subscription_load_test.rs @@ -0,0 +1,179 @@ +//! This file is to load test subscriptions and should be launched manually, not in our CI +use futures::StreamExt; +use serde_json::json; +use tower::BoxError; + +use crate::common::IntegrationTest; +use crate::common::Telemetry; + +mod common; + +const SUBSCRIPTION_CONFIG: &str = include_str!("fixtures/subscription.router.yaml"); +const SUB_QUERY: &str = r#"subscription { userWasCreated { name reviews { body } }}"#; +const UNFEDERATED_SUB_QUERY: &str = r#"subscription { userWasCreated { name username }}"#; + +#[ignore] +#[tokio::test(flavor = "multi_thread")] +async fn test_subscription_load() -> Result<(), BoxError> { + let mut router = create_router(SUBSCRIPTION_CONFIG).await?; + router.start().await; + router.assert_started().await; + + for i in 0..1000000i64 { + let (_, response) = router.run_subscription(UNFEDERATED_SUB_QUERY).await; + assert!(response.status().is_success()); + + tokio::spawn(async move { + let mut stream = response.bytes_stream(); + while let Some(_chunk) = stream.next().await {} + }); + if i % 100 == 0 { + println!("iii - {i}"); + } + } + + for _ in 0..100 { + let (_id, resp) = router.run_query().await; + assert!(resp.status().is_success()); + } + + Ok(()) +} + +#[ignore] +#[tokio::test(flavor = "multi_thread")] +async fn test_subscription_load_federated() -> Result<(), BoxError> { + let mut router = create_router(SUBSCRIPTION_CONFIG).await?; + router.start().await; + router.assert_started().await; + + for i in 0..1000000i64 { + let (_, response) = router.run_subscription(SUB_QUERY).await; + assert!(response.status().is_success()); + + tokio::spawn(async move { + let mut stream = response.bytes_stream(); + while let Some(_chunk) = stream.next().await {} + }); + if i % 100 == 0 { + println!("iii - {i}"); + } + } + + for _ in 0..100 { + let (_id, resp) = router.run_query().await; + assert!(resp.status().is_success()); + } + + Ok(()) +} + +#[ignore] +#[tokio::test(flavor = "multi_thread")] +async fn test_subscription_with_dedup_load_standalone() -> Result<(), BoxError> { + for i in 0..1000000i64 { + let response = run_subscription(UNFEDERATED_SUB_QUERY, None).await; + assert!( + response.status().is_success(), + "error status {:?}", + response.status() + ); + + tokio::spawn(async move { + let mut stream = response.bytes_stream(); + while let Some(_chunk) = stream.next().await {} + }); + if i % 100 == 0 { + println!("iii - {i}"); + } + } + + Ok(()) +} + +#[ignore] +#[tokio::test(flavor = "multi_thread")] +async fn test_subscription_memory_usage() -> Result<(), BoxError> { + for i in 0..300i64 { + let response = run_subscription(SUB_QUERY, None).await; + assert!( + response.status().is_success(), + "error status {:?}", + response.status() + ); + + if i == 299 { + let mut stream = response.bytes_stream(); + while let Some(_chunk) = stream.next().await {} + } else { + tokio::spawn(async move { + let mut stream = response.bytes_stream(); + while let Some(_chunk) = stream.next().await {} + }); + } + if i % 100 == 0 { + println!("iii - {i}"); + } + } + + Ok(()) +} + +#[ignore] +#[tokio::test(flavor = "multi_thread")] +async fn test_subscription_without_dedup_load_standalone() -> Result<(), BoxError> { + for i in 0..1000000i64 { + let response = run_subscription(UNFEDERATED_SUB_QUERY, Some(i)).await; + assert!( + response.status().is_success(), + "error status {:?}", + response.status() + ); + + tokio::spawn(async move { + let mut stream = response.bytes_stream(); + while let Some(_chunk) = stream.next().await {} + }); + if i % 100 == 0 { + println!("iii - {i}"); + } + } + + Ok(()) +} + +async fn create_router(config: &'static str) -> Result { + Ok(IntegrationTest::builder() + .telemetry(Telemetry::Jaeger) + .config(config) + .build() + .await) +} + +async fn run_subscription(sub_query: &str, id: Option) -> reqwest::Response { + let client = reqwest::Client::new(); + + let mut request = client + .post("http://localhost:4000") + .header( + "accept", + "multipart/mixed;boundary=\"graphql\";subscriptionSpec=1.0", + ) + .header("apollographql-client-name", "custom_name") + .header("apollographql-client-version", "1.0") + .json(&json!({"query":sub_query,"variables":{}})); + + // Introduce a header to generate a different sub and then disable dedup + if let Some(id) = id { + request = request.header("custom_id", format!("{id}")); + } + + let request = request.build().unwrap(); + + match client.execute(request).await { + Ok(response) => response, + Err(err) => { + panic!("unable to send successful request to router, {err}") + } + } +} diff --git a/dev-docs/callback_protocol.md b/dev-docs/callback_protocol.md new file mode 100644 index 0000000000..72c81698f8 --- /dev/null +++ b/dev-docs/callback_protocol.md @@ -0,0 +1,173 @@ +# GraphQL subscription over callback protocol + +## Communication + +The callback protocol for GraphQL subscription aims to be an alternative to existing websocket protocols when using Apollo Federation and communicate between the Apollo Router and an event source (a subgraph for example). + +Main goal is to not keep an opened connection between a subgraph and the Apollo Router in order to be more efficient. + +**All** payloads contain the `kind` field outlining the kind of payload it is, in our case it will always be `"subscription"`. The payload also always contains the `action` field describing what kind of action we want to process, the `verifier` (to check that we're authorized to make that callback) the Apollo Router sent via `extensions` in the request and finally the `id` field which is the identifier (an uuid v4) for a specific opened subscription. + +Depending on the `action`, the payload can contain two more _optional_ fields: + +- `payload` holding the GraphQL Response when sending the subscription event from the source event to the Apollo Router. +- `errors` used to complete a connection and add errors if critical errors happened. `errors` is an array of GraphQL error. + +When opening a GraphQL subscription on the Apollo Router it will directly send a request to the subgraph containing the original subscription and more data related to callback mode in GraphQL `extensions`. For example: + +```json +{ + "query": "subscription { userWasCreated { name reviews { body } } }", + "extensions": { + "subscription": { + "callback_url": "http://localhost:4000/callback/c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "subscription_id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" + } + } +} +``` + +When a subgraph receives a subscription request. It must first make a `check` request to the callback endpoint (see below) with the given data (`callback_url`, `subscription_id` and a `verifier`). This will ensure the subgraph is able to send source stream events, and that the `subscription_id` and the `verifier` are correct. A successful call to the callback URL with a `check` yields an empty body and a header `subscription-protocol: callback`. Only then can the subgraph can answer the initial subscription request, and start notifying the callback URL with subscription events. + + +The event source can terminate a subscription any time by sending the `complete` action message (cf message types just below) and include `errors` if needed. + + +If the subscription is closed or doesn't exist anymore on the Apollo Router then when the source event will send an event message to the callback endpoint it will returns a 404 HTTP status code. +So at the event source side if you're receiving a 404 HTTP status code from the callback endpoint you must terminate the subscription. + +## Message types + +> Note that all messages are sent for the source event to the Apollo Router + +### `check` + +Indicates that the event source wants to check that the callback url and subscription id it received is correct. If the subscription id is correct the callback endpoint must respond with a 204 HTTP status code without payload. + +> When opening a `subcription` this is the first message to be sent to the callback endpoint and it MUST be synchronous. It means it's called directly when the event source is receiving a request for a subscription before executing it. The event source MUST call the callback endpoint and send this message in order to check if it's able to communicate with the Apollo Router. If it fails it should directly return an error, if it works it returns an empty body with 204 HTTP status code. Once the subscription has been correctly created this message can also be used to heartbeat a single subscription, if you want to heartbeat several subscriptions at once, use the `heartbeat` message. + +```json +{ + "kind": "subscription", + "action": "check", + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +### `heartbeat` + +This message is used to heartbeat the subscription and to check the event source can access the callback endpoint. If one of the subscription ids is incorrect the callback endpoint must respond with a 400 HTTP status code and a payload containing the `invalid_ids` field (it's an array of incorrect ids), the `verifier` to use next time and the `id` linked to that `verifier`. If all ids are correct then the callback endpoint must respond a 204 HTTP status code without payload. The `id` field correspond to the `id` you received from the router with the provided `verifier` you're sending. + +> If no IDs are still valid then we will return a 404 error status code without any payload + +```json +{ + "kind": "subscription", + "action": "heartbeat", + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "ids": ["c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b254"], + "verifier": "XXX" +} +``` + +Example of payload sent with HTTP status code 400 if it contains incorrect ids: + +```json +{ + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "invalid_ids": ["c4a9d1b8-dc57-44ab-9e5a-6e6189b2b254"], + "verifier": "XXX" +} +``` + +### `next` + +Operation execution result(s) from the event source created by subscription. The `payload` field must be a compliant GraphQL execution result. After all results have been emitted, the `complete` message will follow indicating stream completion. + +```json +{ + "kind": "subscription", + "action": "next", + "payload": { + "data": { + "foo": "bar" + } + }, + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +### `complete` + +It indicates that the requested GraphQL subscription execution has completed. If the message contains the `errors` field it means the operation failed, if `errors` is empty it means the operation has been executed successfully. In each cases (`errors` empty or not), when receiving `complete` eventt then the Apollo Router will close the subscription to the client. The `errors` field is optional and is an array of GraphQL errors. + +```typescript +{ + "kind": "subscription", + "action": "complete", + "errors": [{ // Optional if successful + "message": "something is wrong" + }], + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +### Error cases + ++ The event source can't call the callback endpoint with the `check` message either because the subscription id is incorrect nor the callback endpoint is available. ++ Any messages sent to the callback endpoint by the source event is falling in error, if it's a 404 HTTP status code it means the subscription doesn't exist anymore and should be closed on the source event side. All other errors are unexpected and should result in a termination of the subscription at the source event level. ++ The source event didn't send the `check` message every 5 secs and so the subscription is automatically closed at the Apollo Router level and will cut the connection with the client. + +## Examples + +For the sake of clarity, the following examples demonstrate the callback protocol. + +### Streaming operation + +#### `subscription` operation + +1. _The Apollo Router_ receives a `subscription` operation +2. _The Apollo Router_ generates an unique ID (uuid v4) for the following subscription +3. _The Apollo Router_ sends a query containing a [GraphQL request payload](https://github.com/graphql/graphql-over-http/blob/main/spec/GraphQLOverHTTP.md#request-parameters) with all callback data in `extensions` + +Example: + +```json +{ + "query": "subscription { userWasCreated { name reviews { body } } }", + "extensions": { + "subscription": { + "callback_url": "http://localhost:4000/callback/c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "subscription_id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" + } + } +} +``` + +4. _Event source_ receives the GraphQL `subscription` with all callback data directly in `extensions`. +5. _Event source_ calls the callback endpoint given in `extensions` with a [`check`](#check) to init the subscription to _the Apollo Router_. + +Payload example for `POST` on `http://localhost:4000/callback/c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945`: + +```json +{ + "kind": "subscription", + "action": "check", + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +6. _Event source_ receives 204 HTTP status code from the call to the callback endpoint + - If an error happens or you don't receive 204 HTTP status code then directly return a >= 400 HTTP status code to the _Apollo Router_ +7. _Event source_ spawns a background task to listen on `subscription` events + - Every 5 seconds the _Event source_ must call the callback endpoint with a [`heartbeat` payload](#heartbeat) to heartbeat and confirm it's still listening to the subscription + - Every received events the _Event source_ calls the callback endpoint with a [`next` payload](#next) + - If an error appears the _Event source_ calls the callback endpoint with a [`complete` payload with errors field](#complete) + - If the stream of events is done then send a [`complete` payload WITHOUT errors field](#complete) +8. _Event source_ returns empty body containing a new header `subscription-protocol: callback` in answer to the initial call from _Apollo Router_ \ No newline at end of file diff --git a/dev-docs/multipart-subscriptions-protocol.md b/dev-docs/multipart-subscriptions-protocol.md new file mode 100644 index 0000000000..7e65ffa3c9 --- /dev/null +++ b/dev-docs/multipart-subscriptions-protocol.md @@ -0,0 +1,72 @@ +# Multipart subscriptions protocol + +Instead of relying on WebSockets, the subscriptions protocol supported by the router uses streaming multipart HTTP responses, following the lead of the [Incremental Delivery over HTTP](https://github.com/graphql/graphql-over-http/blob/main/rfcs/IncrementalDelivery.md) spec that is already in use to support `@defer` today. + +## Communication + +When sending a request containing a subscription to the router, clients should include the following `Accept` header to indicate their support for the multipart subscriptions protocol: +``` +Accept: multipart/mixed; boundary="graphql"; subscriptionSpec="1.0", application/json +``` + +> Note that `boundary` should always be `graphql` for now, and `subscriptionSpec` is `1.0` for the current version of the protocol. + +The router will then respond with a stream of body parts, following the [definition of multipart content specified in RFC1341](https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html). + +An example response might look as follows: + +``` +--graphql +Content-Type: application/json + +{} +--graphql +Content-Type: application/json + +{"payload": {"data": { "newPost": { "id": 123, "title": "Hello!"}}}} +--graphql-- +``` + +When HTTP/1 is used, the response will use `Transfer-Encoding: chunked`, but this is not needed for HTTP/2 (which has built-in support for data streaming) and actually [disallowed](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding). + +## Heartbeats + +The router will send periodic heartbeats to avoid any intermediaries from closing the connection. Heartbeats are sent as an empty JSON object without a payload, and they should be silently ignored by clients: + +``` +--graphql +Content-Type: application/json + +{} +--graphql-- +``` + +## Messages + +The protocol differentiates between transport-level concerns and the GraphQL response payloads themselves. One reason for this is that [the response format is part of the GraphQL spec](https://spec.graphql.org/draft/#sec-Response-Format), and additional fields might be confusing or could even break client typing. + +Except for [heartbeats](#heartbeats), every message will therefore include a `payload`, but the payload may be `null`. + +Some errors are part of an execution result. Results may include partial data, and these errors are not fatal (meaning the subscription stream should be kept open). They are therefore delivered within the payload: + +```json +{ + "payload": { + "errors": [...], + "data": {...}, + "extensions": {...} + } +} +``` + +When the router encounters an error that is fatal and should lead to termination of the subscription, it will instead send a message with a top-level `errors` field, and then close the connection: + +```json +{ + "payload": null, + "errors": [...] +} +``` + +Both types of `errors` will follow the [GraphQL error format](http://spec.graphql.org/draft/#sec-Errors.Error-Result-Format) (but top-level `errors` will never have `locations` or `path`). + diff --git a/docker-compose.yml b/docker-compose.yml index 9ee2c52719..a795b3393d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,4 +10,4 @@ services: - 6831:6831/udp - 6832:6832/udp - 16686:16686 - - 14268:14268 + - 14268:14268 \ No newline at end of file diff --git a/dockerfiles/tracing/docker-compose.datadog.yml b/dockerfiles/tracing/docker-compose.datadog.yml index 2c9f75938d..d12d1b5a24 100644 --- a/dockerfiles/tracing/docker-compose.datadog.yml +++ b/dockerfiles/tracing/docker-compose.datadog.yml @@ -3,7 +3,7 @@ services: apollo-router: container_name: apollo-router - image: ghcr.io/apollographql/router:v1.21.0 + image: ghcr.io/apollographql/router:v1.22.0 volumes: - ./supergraph.graphql:/etc/config/supergraph.graphql - ./router/datadog.router.yaml:/etc/config/configuration.yaml diff --git a/dockerfiles/tracing/docker-compose.jaeger.yml b/dockerfiles/tracing/docker-compose.jaeger.yml index fa2fb3ba6f..ec74433d34 100644 --- a/dockerfiles/tracing/docker-compose.jaeger.yml +++ b/dockerfiles/tracing/docker-compose.jaeger.yml @@ -4,7 +4,7 @@ services: apollo-router: container_name: apollo-router #build: ./router - image: ghcr.io/apollographql/router:v1.21.0 + image: ghcr.io/apollographql/router:v1.22.0 volumes: - ./supergraph.graphql:/etc/config/supergraph.graphql - ./router/jaeger.router.yaml:/etc/config/configuration.yaml diff --git a/dockerfiles/tracing/docker-compose.zipkin.yml b/dockerfiles/tracing/docker-compose.zipkin.yml index fad26b51e2..3976a2c631 100644 --- a/dockerfiles/tracing/docker-compose.zipkin.yml +++ b/dockerfiles/tracing/docker-compose.zipkin.yml @@ -4,7 +4,7 @@ services: apollo-router: container_name: apollo-router build: ./router - image: ghcr.io/apollographql/router:v1.21.0 + image: ghcr.io/apollographql/router:v1.22.0 volumes: - ./supergraph.graphql:/etc/config/supergraph.graphql - ./router/zipkin.router.yaml:/etc/config/configuration.yaml diff --git a/docs/source/config.json b/docs/source/config.json index 65f52b3b06..0991dfa71f 100644 --- a/docs/source/config.json +++ b/docs/source/config.json @@ -52,6 +52,31 @@ "Privacy and data collection": "/privacy" } }, + "Executing Operations": { + "Build and run queries": "/executing-operations/build-run-queries", + "@defer support": "/executing-operations/defer-support", + "Request format": "/executing-operations/requests", + "GraphQL Subscriptions": { + "Subscriptions setup": [ + "/executing-operations/subscription-support", + [ + "enterprise" + ] + ], + "Subgraph protocol: HTTP callback": [ + "/executing-operations/subscription-callback-protocol", + [ + "enterprise" + ] + ], + "Client protocol: HTTP multipart": [ + "/executing-operations/subscription-multipart-protocol", + [ + "enterprise" + ] + ] + } + }, "Metrics & Monitoring": { "GraphOS reporting": "/configuration/apollo-telemetry", "OpenTelemetry tracing": "/configuration/tracing", @@ -64,11 +89,6 @@ "Docker": "/containerization/docker", "Kubernetes": "/containerization/kubernetes" }, - "Executing Operations": { - "Build and run queries": "/executing-operations/build-run-queries", - "@defer support": "/executing-operations/defer-support", - "Request format": "/executing-operations/requests" - }, "Managed Federation": { "Overview": "https://www.apollographql.com/docs/federation/managed-federation/overview", "Setup": "https://www.apollographql.com/docs/federation/managed-federation/setup", diff --git a/docs/source/configuration/metrics.mdx b/docs/source/configuration/metrics.mdx index 9acb8df120..6b0b637ac2 100644 --- a/docs/source/configuration/metrics.mdx +++ b/docs/source/configuration/metrics.mdx @@ -110,6 +110,11 @@ All cache metrics listed above have the following attributes: Note that the initial call to uplink during router startup will not be reflected in metrics. +#### Subscription +- `apollo_router_opened_subscriptions` - Number of different opened subscriptions (not the number of clients with an opened subscriptions in case it's deduplicated) +- `apollo_router_deduplicated_subscriptions_total` - Number of subscriptions that has been deduplicated +- `apollo_router_skipped_event_count` - Number of subscription events that has been skipped because too many events have been received from the subgraph but not yet sent to the client. + ## Using OpenTelemetry Collector You can send metrics to [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) for processing and reporting metrics. diff --git a/docs/source/configuration/overview.mdx b/docs/source/configuration/overview.mdx index e61e305155..bf468b3010 100644 --- a/docs/source/configuration/overview.mdx +++ b/docs/source/configuration/overview.mdx @@ -483,6 +483,10 @@ See [Configuring CORS in the Apollo Router](./cors). See [Apollo Router support for `@defer`](../executing-operations/defer-support/#disabling-defer). +### Subscription support + +See [GraphQL subscriptions in the Apollo Router](../executing-operations/subscription-support/#router-setup). + ### External coprocessing See [External coprocessing in the Apollo Router](../customizations/coprocessor/). diff --git a/docs/source/containerization/docker.mdx b/docs/source/containerization/docker.mdx index 21b2881f84..6f862613f7 100644 --- a/docs/source/containerization/docker.mdx +++ b/docs/source/containerization/docker.mdx @@ -11,7 +11,7 @@ The default behaviour of the router images is suitable for a quickstart or devel Note: The [docker documentation](https://docs.docker.com/engine/reference/run/) for the run command may be helpful when reading through the examples. -Note: The exact image version to use is your choice depending on which release you wish to use. In the following examples, replace `` with your chosen version. e.g.: `v1.21.0` +Note: The exact image version to use is your choice depending on which release you wish to use. In the following examples, replace `` with your chosen version. e.g.: `v1.22.0` ## Override the configuration diff --git a/docs/source/containerization/kubernetes.mdx b/docs/source/containerization/kubernetes.mdx index 6b19e67bf5..3eaa878712 100644 --- a/docs/source/containerization/kubernetes.mdx +++ b/docs/source/containerization/kubernetes.mdx @@ -13,7 +13,7 @@ import { Link } from 'gatsby'; [Helm](https://helm.sh) is the package manager for kubernetes. -There is a complete [helm chart definition](https://github.com/apollographql/router/tree/v1.21.0/helm/chart/router) in the repo which illustrates how to use helm to deploy the router in kubernetes. +There is a complete [helm chart definition](https://github.com/apollographql/router/tree/v1.22.0/helm/chart/router) in the repo which illustrates how to use helm to deploy the router in kubernetes. In both the following examples, we are using helm to install the router: - into namespace "router-deploy" (create namespace if it doesn't exist) @@ -64,10 +64,10 @@ kind: ServiceAccount metadata: name: release-name-router labels: - helm.sh/chart: router-1.21.0 + helm.sh/chart: router-1.22.0 app.kubernetes.io/name: router app.kubernetes.io/instance: release-name - app.kubernetes.io/version: "v1.21.0" + app.kubernetes.io/version: "v1.22.0" app.kubernetes.io/managed-by: Helm --- # Source: router/templates/secret.yaml @@ -76,10 +76,10 @@ kind: Secret metadata: name: "release-name-router" labels: - helm.sh/chart: router-1.21.0 + helm.sh/chart: router-1.22.0 app.kubernetes.io/name: router app.kubernetes.io/instance: release-name - app.kubernetes.io/version: "v1.21.0" + app.kubernetes.io/version: "v1.22.0" app.kubernetes.io/managed-by: Helm data: managedFederationApiKey: "UkVEQUNURUQ=" @@ -90,10 +90,10 @@ kind: ConfigMap metadata: name: release-name-router labels: - helm.sh/chart: router-1.21.0 + helm.sh/chart: router-1.22.0 app.kubernetes.io/name: router app.kubernetes.io/instance: release-name - app.kubernetes.io/version: "v1.21.0" + app.kubernetes.io/version: "v1.22.0" app.kubernetes.io/managed-by: Helm data: configuration.yaml: | @@ -117,10 +117,10 @@ kind: Service metadata: name: release-name-router labels: - helm.sh/chart: router-1.21.0 + helm.sh/chart: router-1.22.0 app.kubernetes.io/name: router app.kubernetes.io/instance: release-name - app.kubernetes.io/version: "v1.21.0" + app.kubernetes.io/version: "v1.22.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -143,10 +143,10 @@ kind: Deployment metadata: name: release-name-router labels: - helm.sh/chart: router-1.21.0 + helm.sh/chart: router-1.22.0 app.kubernetes.io/name: router app.kubernetes.io/instance: release-name - app.kubernetes.io/version: "v1.21.0" + app.kubernetes.io/version: "v1.22.0" app.kubernetes.io/managed-by: Helm annotations: @@ -172,7 +172,7 @@ spec: - name: router securityContext: {} - image: "ghcr.io/apollographql/router:v1.21.0" + image: "ghcr.io/apollographql/router:v1.22.0" imagePullPolicy: IfNotPresent args: - --hot-reload @@ -223,10 +223,10 @@ kind: Pod metadata: name: "release-name-router-test-connection" labels: - helm.sh/chart: router-1.21.0 + helm.sh/chart: router-1.22.0 app.kubernetes.io/name: router app.kubernetes.io/instance: release-name - app.kubernetes.io/version: "v1.21.0" + app.kubernetes.io/version: "v1.22.0" app.kubernetes.io/managed-by: Helm annotations: "helm.sh/hook": test diff --git a/docs/source/enterprise-features.mdx b/docs/source/enterprise-features.mdx index cd6f7fd5f7..aa2b2664a8 100644 --- a/docs/source/enterprise-features.mdx +++ b/docs/source/enterprise-features.mdx @@ -3,16 +3,17 @@ title: Enterprise features for the Apollo Router description: Available with GraphOS Enterprise --- -The Apollo Router provides expanded security, performance, and customization features for organizations with a [GraphOS Enterprise plan](https://www.apollographql.com/pricing/). +The Apollo Router provides expanded performance, security, and customization features for organizations with a [GraphOS Enterprise plan](https://www.apollographql.com/pricing/). > You can test out these features for free with an [Enterprise trial](/graphos/org/plans/#enterprise-trials). ## List of features -- Authentication of inbound requests via [JSON Web Token (JWT)](./configuration/authn-jwt/) -- Redis-backed [distributed caching of query plans and persisted queries](./configuration/distributed-caching/) -- Custom request handling in any language via [external coprocessing](./customizations/coprocessor/) -- Mitigation of potentially malicious requests via [operation limits](./configuration/operation-limits) +- **Real-time updates** via [GraphQL subscriptions](./executing-operations/subscription-support/) +- **Authentication of inbound requests** via [JSON Web Token (JWT)](./configuration/authn-jwt/) +- Redis-backed [**distributed caching** of query plans and persisted queries](./configuration/distributed-caching/) +- **Custom request handling** in any language via [external coprocessing](./customizations/coprocessor/) +- **Mitigation of potentially malicious requests** via [operation limits](./configuration/operation-limits) Articles specifically about Enterprise features are marked with a **❖** icon in the left navigation. @@ -20,10 +21,11 @@ Articles specifically about Enterprise features are marked with a **❖** icon i ## Enabling Enterprise features -To enable support for the Apollo Router's Enterprise features: +**To enable support for Apollo Router Enterprise features:** - Your organization must have a [GraphOS Enterprise plan](https://www.apollographql.com/pricing/). - You must run v1.12 or later of the Apollo Router. [Download the latest version.](./quickstart#download-options) + - Certain Enterprise features might require a later router version. See a particular feature's documentation for details. - Your Apollo Router instances must connect to GraphOS with a **graph API key** and **graph ref** associated with your organization. - You connect your router to GraphOS by setting [these environment variables](./configuration/overview/#environment-variables) when starting the router. - If your router _already_ connects to your GraphOS Enterprise organization, no further action is required. diff --git a/docs/source/executing-operations/subscription-callback-protocol.mdx b/docs/source/executing-operations/subscription-callback-protocol.mdx new file mode 100644 index 0000000000..e0718992d7 --- /dev/null +++ b/docs/source/executing-operations/subscription-callback-protocol.mdx @@ -0,0 +1,176 @@ +--- +title: HTTP callback protocol for GraphQL subscriptions +description: For federated subgraphs communicating with the Apollo Router +--- + +## Communication + +The callback protocol for GraphQL subscription aims to be an alternative to existing websocket protocols when using Apollo Federation and communicate between the Apollo Router and an event source (a subgraph for example). + +Main goal is to not keep an opened connection between a subgraph and the Apollo Router in order to be more efficient. + +**All** payloads contain the `kind` field outlining the kind of payload it is, in our case it will always be `"subscription"`. The payload also always contains the `action` field describing what kind of action we want to process, the `verifier` (to check that we're authorized to make that callback) the Apollo Router sent via `extensions` in the request and finally the `id` field which is the identifier (an uuid v4) for a specific opened subscription. + +Depending on the `action`, the payload can contain two more _optional_ fields: + +- `payload` holding the GraphQL Response when sending the subscription event from the source event to the Apollo Router. +- `errors` used to complete a connection and add errors if critical errors happened. `errors` is an array of GraphQL error. + +When opening a GraphQL subscription on the Apollo Router it will directly send a request to the subgraph containing the original subscription and more data related to callback mode in GraphQL `extensions`. For example: + +```json +{ + "query": "subscription { userWasCreated { name reviews { body } } }", + "extensions": { + "subscription": { + "callback_url": "http://localhost:4000/callback/c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "subscription_id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" + } + } +} +``` + +When a subgraph receives a subscription request. It must first make a `check` request to the callback endpoint (see below) with the given data (`callback_url`, `subscription_id` and a `verifier`). This will ensure the subgraph is able to send source stream events, and that the `subscription_id` and the `verifier` are correct. A successful call to the callback URL with a `check` yields an empty body and a header `subscription-protocol: callback`. Only then can the subgraph can answer the initial subscription request, and start notifying the callback URL with subscription events. + + +The event source can terminate a subscription any time by sending the `complete` action message (cf message types just below) and include `errors` if needed. + + +If the subscription is closed or doesn't exist anymore on the Apollo Router then when the source event will send an event message to the callback endpoint it will returns a 404 HTTP status code. +So at the event source side if you're receiving a 404 HTTP status code from the callback endpoint you must terminate the subscription. + +## Message types + +> Note that all messages are sent for the source event to the Apollo Router + +### `check` + +Indicates that the event source wants to check that the callback url and subscription id it received is correct. If the subscription id is correct the callback endpoint must respond with a 204 HTTP status code without payload. + +> When opening a `subcription` this is the first message to be sent to the callback endpoint and it MUST be synchronous. It means it's called directly when the event source is receiving a request for a subscription before executing it. The event source MUST call the callback endpoint and send this message in order to check if it's able to communicate with the Apollo Router. If it fails it should directly return an error, if it works it returns an empty body with 204 HTTP status code. Once the subscription has been correctly created this message can also be used to heartbeat a single subscription, if you want to heartbeat several subscriptions at once, use the `heartbeat` message. + +```json +{ + "kind": "subscription", + "action": "check", + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +### `heartbeat` + +This message is used to heartbeat the subscription and to check the event source can access the callback endpoint. If one of the subscription ids is incorrect the callback endpoint must respond with a 400 HTTP status code and a payload containing the `invalid_ids` field (it's an array of incorrect ids), the `verifier` to use next time and the `id` linked to that `verifier`. If all ids are correct then the callback endpoint must respond a 204 HTTP status code without payload. The `id` field correspond to the `id` you received from the router with the provided `verifier` you're sending. + +> If no IDs are still valid then we will return a 404 error status code without any payload + +```json +{ + "kind": "subscription", + "action": "heartbeat", + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "ids": ["c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b254"], + "verifier": "XXX" +} +``` + +Example of payload sent with HTTP status code 400 if it contains incorrect ids: + +```json +{ + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "invalid_ids": ["c4a9d1b8-dc57-44ab-9e5a-6e6189b2b254"], + "verifier": "XXX" +} +``` + +### `next` + +Operation execution result(s) from the event source created by subscription. The `payload` field must be a compliant GraphQL execution result. After all results have been emitted, the `complete` message will follow indicating stream completion. + +```json +{ + "kind": "subscription", + "action": "next", + "payload": { + "data": { + "foo": "bar" + } + }, + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +### `complete` + +It indicates that the requested GraphQL subscription execution has completed. If the message contains the `errors` field it means the operation failed, if `errors` is empty it means the operation has been executed successfully. In each cases (`errors` empty or not), when receiving `complete` eventt then the Apollo Router will close the subscription to the client. The `errors` field is optional and is an array of GraphQL errors. + +```typescript +{ + "kind": "subscription", + "action": "complete", + "errors": [{ // Optional if successful + "message": "something is wrong" + }], + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +### Error cases + ++ The event source can't call the callback endpoint with the `check` message either because the subscription id is incorrect nor the callback endpoint is available. ++ Any messages sent to the callback endpoint by the source event is falling in error, if it's a 404 HTTP status code it means the subscription doesn't exist anymore and should be closed on the source event side. All other errors are unexpected and should result in a termination of the subscription at the source event level. ++ The source event didn't send the `check` message every 5 secs and so the subscription is automatically closed at the Apollo Router level and will cut the connection with the client. + +## Examples + +For the sake of clarity, the following examples demonstrate the callback protocol. + +### Streaming operation + +#### `subscription` operation + +1. _The Apollo Router_ receives a `subscription` operation +2. _The Apollo Router_ generates an unique ID (uuid v4) for the following subscription +3. _The Apollo Router_ sends a query containing a [GraphQL request payload](https://github.com/graphql/graphql-over-http/blob/main/spec/GraphQLOverHTTP.md#request-parameters) with all callback data in `extensions` + +Example: + +```json +{ + "query": "subscription { userWasCreated { name reviews { body } } }", + "extensions": { + "subscription": { + "callback_url": "http://localhost:4000/callback/c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "subscription_id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" + } + } +} +``` + +4. _Event source_ receives the GraphQL `subscription` with all callback data directly in `extensions`. +5. _Event source_ calls the callback endpoint given in `extensions` with a [`check`](#check) to init the subscription to _the Apollo Router_. + +Payload example for `POST` on `http://localhost:4000/callback/c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945`: + +```json +{ + "kind": "subscription", + "action": "check", + "id": "c4a9d1b8-dc57-44ab-9e5a-6e6189b2b945", + "verifier": "XXX" +} +``` + +6. _Event source_ receives 204 HTTP status code from the call to the callback endpoint + - If an error happens or you don't receive 204 HTTP status code then directly return a >= 400 HTTP status code to the _Apollo Router_ +7. _Event source_ spawns a background task to listen on `subscription` events + - Every 5 seconds the _Event source_ must call the callback endpoint with a [`heartbeat` payload](#heartbeat) to heartbeat and confirm it's still listening to the subscription + - Every received events the _Event source_ calls the callback endpoint with a [`next` payload](#next) + - If an error appears the _Event source_ calls the callback endpoint with a [`complete` payload with errors field](#complete) + - If the stream of events is done then send a [`complete` payload WITHOUT errors field](#complete) +8. _Event source_ returns empty body containing a new header `subscription-protocol: callback` in answer to the initial call from _Apollo Router_ diff --git a/docs/source/executing-operations/subscription-multipart-protocol.mdx b/docs/source/executing-operations/subscription-multipart-protocol.mdx new file mode 100644 index 0000000000..464dc7ff13 --- /dev/null +++ b/docs/source/executing-operations/subscription-multipart-protocol.mdx @@ -0,0 +1,80 @@ +--- +title: Multipart HTTP protocol for GraphQL subscriptions +description: For GraphQL clients communicating with the Apollo Router +--- + +To execute GraphQL subscription operations on the Apollo Router, client apps do _not_ communicate over WebSocket. Instead, they use **HTTP with multipart responses**. This multipart protocol is built on the same [Incremental Delivery over HTTP](https://github.com/graphql/graphql-over-http/blob/main/rfcs/IncrementalDelivery.md) spec that the Apollo Router uses to support [the `@defer` directive](./defer-support/). + +**You only need to read this reference if you're adding protocol support to a new GraphQL client library!** Apollo Client for [Web](/react/data/subscriptions#http), [Kotlin](/kotlin/essentials/subscriptions#configuring-http-subscriptions), and [iOS](/ios/fetching/subscriptions#http) all support this protocol. + +## Executing a subscription + +To execute a subscription on the Apollo Router, a GraphQL client sends an HTTP request with _almost_ the exact same format that it uses for query and mutation requests. + +The only difference is that the request should include the following `Accept` header: + +```text title="Example header" +Accept: multipart/mixed; boundary="graphql"; subscriptionSpec="1.0", application/json +``` + +> At this time, the value for `boundary` should _always_ be `graphql`, and the value for `subscriptionSpec` should _always_ be `1.0`. + +As subscription events occur, the router sends back HTTP response "parts" that conform to the definition of multipart content specified in [RFC1341](https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html). + +An example response might look like this: + +``` +--graphql +Content-Type: application/json + +{} +--graphql +Content-Type: application/json + +{"payload": {"data": { "newPost": { "id": 123, "title": "Hello!"}}}} +--graphql-- +``` + +- **If the request uses HTTP/1**, the response includes the `Transfer-Encoding: chunked` header. +- **If the request uses HTTP/2** (which provides built-in support for data streaming), chunked encoding is _not_ used (and is in fact [disallowed](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding)). + +## Heartbeats + +While a client subscription remains active, the Apollo Router sends periodic "heartbeat" response parts to prevent any intermediaries from closing the connection. The body of a heartbeat is an empty JSON object, which clients should ignore silently: + +```text title="Heartbeat response part" +--graphql +Content-Type: application/json + +{} +--graphql-- +``` + +## Message and error format + +This protocol differentiates between transport-level errors and GraphQL errors in response payloads themselves. This is because the GraphQL response format is [defined in the GraphQL spec](https://spec.graphql.org/draft/#sec-Response-Format), and unexpected fields might be confusing or could even break client typing. + +With the exception of [heartbeats](#heartbeats), every response part body includes a `payload` property, which contains standard GraphQL response properties. **The `payload` property can be null if a transport-level error occurs.** + +**If a GraphQL-level error occurs,** the Apollo Router can sometimes still return partial data, and the subscription connection should remain open. These errors are provided _within_ the `payload` property: + +```json +{ + "payload": { + "errors": [...], //highlight-line + "data": {...}, + "extensions": {...} + } +} +``` + +**If a fatal transport-level error occurs,** the router sends a message with a top-level `errors` field and null `payload` field, then closes the connection: + +```json +{ + "payload": null, + "errors": [...] //highlight-line +} +``` + +Both types of `errors` follow the [GraphQL error format](http://spec.graphql.org/draft/#sec-Errors.Error-Result-Format), but top-level `errors` never include `locations` or `path`. diff --git a/docs/source/executing-operations/subscription-support.mdx b/docs/source/executing-operations/subscription-support.mdx new file mode 100644 index 0000000000..fa764c2aff --- /dev/null +++ b/docs/source/executing-operations/subscription-support.mdx @@ -0,0 +1,425 @@ +--- +title: GraphQL subscriptions in the Apollo Router +description: Real-time data with GraphOS Enterprise +--- + +> ⚠️ **This is an [Enterprise feature](../enterprise-features/) of the Apollo Router.** It requires an organization with a [GraphOS Enterprise plan](https://www.apollographql.com/pricing/). + +With [GraphOS Enterprise](/graphos/enterprise/), the Apollo Router provides support for GraphQL subscription operations: + +```graphql +subscription OnStockPricesChanged { + stockPricesChanged { + symbol + price + } +} +``` + +With subscription support enabled, you can add `Subscription` fields to the schema of any subgraph that supports common WebSocket protocols for subscription communication: + +```graphql title="stocks.graphql" +type Subscription { + stockPricesChanged: [Stock!]! +} +``` + +> ⚠️ **Important:** To use subscriptions with the Apollo Router, you must first complete certain [prerequisites](#prerequisites). + +## What are subscriptions for? + +GraphQL subscriptions enable clients to receive continual, real-time updates whenever new data becomes available. Unlike queries and mutations, subscriptions are _long-lasting_. This means a client can receive multiple updates from a single subscription: + +```mermaid +sequenceDiagram + participant Client as GraphQL Client + participant Router as Apollo Router + Client->>Router: Initiates subscription + Note over Router: New data available + Router->>Client: Sends new data + Note over Router: New data available + Router->>Client: Sends new data +``` + +Subscriptions are best suited to apps that rely on frequently-changing, time-sensitive data (such as stock prices, IoT sensor readings, live chat, or sports scores). + +## How it works + +```mermaid +flowchart LR; + client(Client); + subgraph "Your infrastructure"; + router(["Apollo
Router"]); + subgraphA[Stocks
subgraph]; + subgraphB[Portfolios
subgraph]; + router-->|"Subscribes
over WebSocket
(or via callback)"|subgraphA; + router-.->|Can query for
entity fields
as needed|subgraphB; + end; + client-->|Subscribes
over HTTP|router; + class client secondary; +``` + +1. A client executes a GraphQL subscription operation against your router _over HTTP:_ + + ```graphql title="Example subscription" + subscription OnStockPricesChanged { + stockPricesChanged { + symbol + price + } + } + ``` + + - **The client does _not_ use a WebSocket protocol!** Instead, it receives updates via [multipart HTTP responses](./subscription-multipart-protocol/). + - By using HTTP for subscriptions, clients can execute _all_ GraphQL operation types over HTTP instead of using two different protocols. + - Apollo Client for [Web](/react/data/subscriptions#http), [Kotlin](/kotlin/essentials/subscriptions#configuring-http-subscriptions), and [iOS](/ios/fetching/subscriptions#http) all support GraphQL subscriptions over HTTP with minimal configuration. See each library's documentation for details. + +2. When your router receives a subscription, it executes that _same_ subscription against whichever subgraph defines the requested field (`stockPricesChanged` in the example above). + + - This communication usually _does_ use [a WebSocket subprotocol](#subscription-modes), for compatibility with most subgraph libraries. + - An HTTP-callback-based protocol is also available in preview. See [HTTP callback setup](#http-callback-setup-preview). + +3. The subgraph periodically sends new data to your router. Whenever it does, the router returns that data to the client in an additional HTTP response "chunk". + - A subscription can include federated entity fields that are defined in _other_ subgraphs. If it does, the router _first_ fetches those fields by querying the corresponding subgraphs (such as **Portfolios** in the diagram above). These queries use HTTP as usual. + +> [Walk through an example.](#example-execution) + +## Prerequisites + +⚠️ **Before you add `Subscription` fields to your subgraphs,** do _all_ of the following _in the order shown_ to prevent schema composition errors: + +1. Update your Apollo Router instances to version `1.22.0` or later. [Download the latest version.](../quickstart/) + - Previous versions of the Apollo Router don't support subscription operations. +2. Make sure your router is [connected to a GraphOS Enterprise organization](../enterprise-features/#enabling-enterprise-features). + - Subscription support is an Enterprise feature of self-hosted routers. +3. **If you compose your router's supergraph schema with GraphOS** (instead of with the Rover CLI), [update your build pipeline](/graphos/graphs/updating#2-update-your-build-pipeline) to use Apollo Federation 2.4 or later. + - Previous versions of Apollo Federation don't support subscription operations. +4. Modify your subgraph schemas to use Apollo Federation 2.4 or later: + ```graphql title="stocks.graphql" + extend schema + @link(url: "https://specs.apollo.dev/federation/v2.4", #highlight-line + import: ["@key", "@shareable"]) + + type Subscription { + stockPricesChanged: [Stock!]! + } + ``` + + - You _only_ need to modify subgraph schemas that define `Subscription` fields. + +After you complete these prerequisites, you can safely [configure your router](#router-setup) for subscriptions. + +## Router setup + +> ⚠️ **Make sure you've completed all [prerequisites](#prerequisites)!** + +In your router's [YAML config file](../configuration/overview/#yaml-config-file), you configure how the router communicates with each of your subgraphs when executing GraphQL subscriptions. + +The Apollo Router supports two popular [WebSocket protocols](#websocket-setup) for subscriptions, and it also provides preview support for an [HTTP-callback-based protocol](#http-callback-setup-preview). **Your router must use whichever protocol is expected by each subgraph!** + +### WebSocket setup + +Here's an example router configuration snippet that sets up subgraph subscriptions over WebSocket: + +```yaml title="router.yaml" +subscription: + mode: + passthrough: + all: # The router uses these subscription settings UNLESS overridden per-subgraph + path: /subscriptions # The URL path to use for subgraph subscription endpoints (Default: /ws) + subgraphs: # Overrides subscription settings for individual subgraphs + reviews: # Overrides settings for the 'reviews' subgraph + path: /ws # Overrides '/subscriptions' defined above + protocol: graphql_transport_ws # The WebSocket-based protocol to use for subscription communication (Default: graphql_ws) +``` + +This example enables subscriptions in **passthrough mode**, which uses long-lived WebSocket connections. The following WebSocket subprotocols are supported: + +- [`graphql_ws`](https://github.com/enisdenjo/graphql-ws) +- [`subscriptions-transport-ws`](https://github.com/apollographql/subscriptions-transport-ws) (⚠️ unmaintained) + +> ⚠️ **Your router must use whichever subprotocol is expected by each of your subgraphs!** + +By default, the router uses `graphql-ws` for all subgraphs. You can change this global default and/or override it for individual subgraphs by setting the `protocol` key as shown above. + +Your router creates a separate WebSocket connection for each client subscription, _unless_ it can perform [subscription deduplication](#subscription-deduplication). + +### HTTP callback setup (preview) + +The Apollo Router provides preview support for receiving subgraph subscription events via HTTP callbacks, _instead of_ over a persistent WebSocket connection. This **callback mode** provides the following advantages over WebSocket-based subscriptions: + +- The router doesn't need to maintain a persistent connection for each distinct subscription. +- You can publish events directly to the router from a pubsub system, instead of routing those events through the subgraph. + +Callback mode requires your subgraph library to support the router's [HTTP callback protocol](./subscription-callback-protocol/). + +
+ +**This [HTTP callback protocol](./subscription-callback-protocol/) is in [preview](/resources/product-launch-stages#preview)!** Breaking changes to the protocol might occur during the preview period. + +Currently, no subgraph libraries support this protocol (Apollo Server support is forthcoming). If you're implementing support in a subgraph library, please [create a GitHub discussion](https://github.com/apollographql/router/discussions/). + +
+ +Here's an example configuration that sets up subgraph subscriptions in callback mode: + +```yaml title="router.yaml" +subscription: + mode: + preview_callback: + public_url: https://example.com:4000 # The router's public URL + listen: 0.0.0.0:4000 # The IP address and port the router will listen on for subscription callbacks + path: /callback # The path of the router's callback endpoint + subgraphs: # The list of subgraphs that use the HTTP callback protocol + - accounts +``` + +### Using a combination of modes + +If some of your subgraphs require [passthrough mode](#websocket-setup) and others require [callback mode](#http-callback-setup-preview) for subscriptions, you can apply different modes to different subgraphs in your configuration: + +```yaml title="router.yaml" +subscription: + mode: + passthrough: + subgraphs: + reviews: #highlight-line + path: /ws + protocol: graphql_transport_ws + preview_callback: + public_url: http://public_url_of_my_router_instance:4000 + listen: 0.0.0.0:4000 + path: /callback + subgraphs: + - accounts #highlight-line +``` + +In this example, the `reviews` subgraph uses WebSocket and the `accounts` subgraph uses HTTP-based callbacks. + +> **Important:** If you configure both passthrough mode _and_ callback mode for a particular subgraph, the router uses the passthrough mode configuration. +> +> If any subgraphs require callback mode, **do not set the `passthrough.all` key**. If you do, the router uses the passthrough mode configuration for _all_ subgraphs. + +## Example execution + +Let's say our supergraph includes the following subgraphs and partial schemas: + + + +```graphql title="Products subgraph" +type Product @key(fields: "id") { + id: ID! + name: String! + price: Int! +} + +# highlight-start +type Subscription { + productPriceChanged: Product! +} +#highlight-end +``` + +```graphql title="Reviews subgraph" +type Product @key(fields: "id") { + id: ID! + reviews: [Review!]! +} + +type Review { + score: Int! +} +``` + + + +Now, let's say a client executes the following subscription against our router ([over HTTP!](#how-it-works)): + +```graphql +subscription OnProductPriceChanged { + productPriceChanged { + # Defined in Products subgraph + name + price + reviews { + # Defined in Reviews subgraph! + score + } + } +} +``` + +When our router receives this operation, it executes a corresponding subscription operation against the Products subgraph (over a new WebSocket connection): + +```graphql +subscription { + productPriceChanged { + id # Added for entity fetching + name + price + # Reviews fields removed! + } +} +``` + +
+ +**Note the following:** + +- This operation _adds_ the `Product.id` field. The router needs `@key` fields of the `Product` entity to merge entity fields from across subgraphs. +- This operation _removes_ all fields defined in the Reviews subgraph, because the Products subgraph can't resolve them. + +
+ +At any point after the subscription is initiated, the Products subgraph might send updated data to our router. Whenever this happens, the router _does not_ immediately return this data to the client, because it's missing requested fields from the Reviews subgraph! + +Instead, our router executes a standard GraphQL _query_ against the Reviews subgraph to fetch the missing entity fields: + +```graphql +query { + _entities(representations: [...]) { + ... on Product { + reviews { + score + } + } + } +} +``` + +After receiving this query result from the Reviews subgraph, our router combines it with the data from Products and returns the combination to the subscribing client. + +## Trying subscriptions with `curl` + +To quickly try out the Apollo Router's HTTP-based subscriptions _without_ setting up an Apollo Client library, you can execute a `curl` command against your router with the following format: + +```bash + curl 'http://localhost:4000/' -v \ + -H 'accept: multipart/mixed; boundary="graphql"; subscriptionSpec=1.0, application/json' \ + -H 'content-type: application/json' \ + --data-raw '{"query":"subscription OnProductPriceChanged { productPriceChanged { name price reviews { score } } }","operationName":"OnProductPriceChanged"}' +``` + +This command creates an HTTP multipart request and keeps an open connection that receives new subscription data in response "chunks": + +``` +--graphql +content-type: application/json + +{} +--graphql +content-type: application/json + +{"payload":{"data":{"productPriceChanged":{"name":"Croissant","price":400,"reviews":[{"score":5}]}}}} +--graphql +content-type: application/json + +{"payload":{"data":{"productPriceChanged":{"name":"Croissant","price":375,"reviews":[{"score":5}]}}}} +--graphql +content-type: application/json + +{"payload":{"data":{"productPriceChanged":{"name":"Croissant","price":425,"reviews":[{"score":5}]}}}} +--graphql-- +``` + +> This example subscription only emits three events and then directly closes the connection. +> +> For more information on this multipart HTTP subscription protocol, see [this article](./subscription-multipart-protocol/). + +## Subscription deduplication + +**By default, the Apollo Router deduplicates identical subscriptions.** This can dramatically reduce load on both your router _and_ your subgraphs, because the router doesn't need to open a new connection if an _existing_ connection is already handling the exact same subscription. + +> For example, if thousands of clients all subscribe to real-time score updates for the same sports game, your router only needs to maintain _one_ connection to your `sportsgames` subgraph to receive events for _all_ of those subscriptions. + +The router considers subscription operations **identical** if _all_ of the following are true: + +- The operations sent to the subgraph have identical GraphQL selection sets (i.e., requested fields). +- The operations provide identical values for all headers that the router sends to the subgraph. + +### Disabling deduplication + +You can disable subscription deduplication by adding the following to your router's YAML config file under the `subscription` key: + +```yaml title="router.yaml" +subscription: +# highlight-start + enable_deduplication: false # default: true +# highlight-end +``` + +Note that this is a _global_ setting (not per-subgraph or per-operation). + +#### Why disable deduplication? + +Disabling deduplication is useful if you _need_ to create a separate connection to your subgraph for each client-initiated subscription. For example: + +- Your subgraph needs to trigger an important event every time a new client subscribes to its data. + - This event _doesn't_ trigger whenever the router reuses an existing connection. +- Your subscription needs to start by receiving the _first_ value in a particular sequence, instead of the _most recent_ value. + - If a subscription reuses an existing connection, it starts by receiving the next value _for that connection._ + - As a basic example, let's say a subscription should always fire events returning the integers `0` through `1000`, in order. If a new subscription reuses an existing subgraph connection, it starts by receiving whichever value is next for the original connection, which is almost definitely _not_ `0`. + +## Advanced configuration + +### WebSocket auth support + +By default, if you've configured your router to [propagate](../configuration/header-propagation/) HTTP `Authorization` headers to your subgraph, then the router automatically sets corresponding `connectionParams` when initiating a WebSocket connection to that subgraph. + +For example, when your router sends the [`connection_init` message](https://github.com/enisdenjo/graphql-ws/blob/master/PROTOCOL.md#connectioninit) to a subgraph, it includes the value of the `Authorization` header via the following payload: + +```json +{ + "connectionParams": { + "token": "CONTENTS_OF_AUTHORIZATION_HEADER" + } +} +``` + +To specify a _custom_ payload for the`connection_init` message, you can write a [Rhai script](../customizations/rhai/) and use the `context` directly: + +```rhai +fn subgraph_service(service, subgraph) { + let f = |request| { + request.context[Router.APOLLO_SUBSCRIPTION_WS_CUSTOM_CONNECTION_PARAMS] = #{ + my_token: "here is my token" + }; + }; + + service.map_request(f); +} +``` + +> **Note:** If you specify both a `context` entry _and_ an `Authorization` header, the `context` entry takes precedence. + +### Expanding event queue capacity + +If your router receives a high volume of events for a particular subscription, it might accumulate a backlog of those events to send to clients. To handle this backlog, the router maintains an in-memory queue of unsent events. + +> The router maintains a _separate_ event queue for _each_ of its active subscription connections to subgraphs. + +You can configure the size of each event queue in your router's YAML config file, like so: + +```yaml title="router.yaml" +subscription: + queue_capacity: 100000 # Default: 128 +``` + +> The value of `queue_capacity` corresponds to the _maximum number of subscription events for each queue_, not the _total size_ of those events. + +Whenever your router receives a subscription event when its queue is full, it _discards_ the _oldest_ unsent event in the queue and enqueues the newly received event. The discarded event is _not_ sent to subscribing clients. + +If it's absolutely necessary for clients to receive _every_ subscription event, increase the size of your event queue as needed. + +### Limiting the number of client connections + +Client subscriptions are [long-lived HTTP connections](#how-it-works), which means they might remain open indefinitely. You can limit the number of simultaneous client subscription connections in your router's YAML config file, like so: + +```yaml title="router.yaml" +subscription: + #highlight-start + max_opened_subscriptions: 150 # Only 150 simultaneous connections allowed + #highlight-end +``` + +If a client attempts to execute a subscription on your router when it's already at `max_open_subscriptions`, the router rejects the client's request with an error. diff --git a/docs/source/index.mdx b/docs/source/index.mdx index fb1b91104d..4cbd3ba100 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -5,22 +5,24 @@ description: High-performance routing for self-hosted supergraphs import { Link } from 'gatsby'; -**The Apollo Router** is a configurable, high-performance router for a [self-hosted supergraph](/graphos/graphs/overview/#self-hosted-supergraphs): +**The Apollo Router** is a configurable, high-performance router for your federated GraphQL API (also known as a [supergraph](/graphos/graphs/#self-hosted-supergraphs)): ```mermaid flowchart LR; clients(Clients); subgraph "Your infrastructure"; - gateway(["Apollo Router"]); - serviceB[Products
subgraph]; - serviceC[Reviews
subgraph]; - gateway --- serviceB & serviceC; + router(["Apollo Router"]); + serviceB[Products
API]; + serviceC[Reviews
API]; + router -->|Sub-query| serviceB & serviceC; end; - clients -.- gateway; + clients -.->|Query| router; class clients secondary; ``` - -If you have an existing self-hosted supergraph that currently uses `@apollo/gateway`, you can move to the Apollo Router without changing any other part of your supergraph. + +The Apollo Router intelligently distributes inbound queries across your GraphQL-powered microservices, enabling clients to fetch data from multiple sources with a single request. + +If you have an existing federated graph that currently uses `@apollo/gateway`, you can move to the Apollo Router without changing any other part of your graph.