diff --git a/cmd/skipper/main.go b/cmd/skipper/main.go index de945cc955..a545080dcf 100644 --- a/cmd/skipper/main.go +++ b/cmd/skipper/main.go @@ -96,6 +96,7 @@ const ( loadBalancerHealthCheckIntervalUsage = "use to set the health checker interval to check healthiness of former dead or unhealthy routes" reverseSourcePredicateUsage = "reverse the order of finding the client IP from X-Forwarded-For header" enableHopHeadersRemovalUsage = "enables removal of Hop-Headers according to RFC-2616" + rfcPatchPathUsage = "patches the incoming request path to preserve uncoded reserved characters according to RFC 2616 and RFC 3986" maxAuditBodyUsage = "sets the max body to read to log in the audit log body" // logging, metrics, tracing: @@ -245,6 +246,7 @@ var ( loadBalancerHealthCheckInterval time.Duration reverseSourcePredicate bool removeHopHeaders bool + rfcPatchPath bool maxAuditBody int enableBreakers bool breakers breakerFlags @@ -406,6 +408,7 @@ func init() { flag.DurationVar(&loadBalancerHealthCheckInterval, "lb-healthcheck-interval", defaultLoadBalancerHealthCheckInterval, loadBalancerHealthCheckIntervalUsage) flag.BoolVar(&reverseSourcePredicate, "reverse-source-predicate", false, reverseSourcePredicateUsage) flag.BoolVar(&removeHopHeaders, "remove-hop-headers", false, enableHopHeadersRemovalUsage) + flag.BoolVar(&rfcPatchPath, "rfc-patch-path", false, rfcPatchPathUsage) flag.IntVar(&maxAuditBody, "max-audit-body", defaultMaxAuditBody, maxAuditBodyUsage) flag.BoolVar(&enableBreakers, "enable-breakers", false, enableBreakersUsage) flag.Var(&breakers, "breaker", breakerUsage) @@ -785,6 +788,10 @@ func main() { options.ProxyFlags |= proxy.HopHeadersRemoval } + if rfcPatchPath { + options.ProxyFlags |= proxy.PatchPath + } + if clientKeyFile != "" && clientCertFile != "" { certsFiles := strings.Split(clientCertFile, ",") keyFiles := strings.Split(clientKeyFile, ",") diff --git a/docs/operation/operation.md b/docs/operation/operation.md index 192ff2d538..5eee3a9943 100644 --- a/docs/operation/operation.md +++ b/docs/operation/operation.md @@ -583,3 +583,21 @@ scheduler group. [`LifoGroup`](../../reference/filters/#lifogroup) has a user chosen scheduler group and [`lifo()`](../../reference/filters/#lifo) will get a per route unique scheduler group. + +# URI standards interpretation + +Considering the following request path: /foo%2Fbar, Skipper can handle +it in two different ways. The current default way is that when the +request is parsed purely relying on the Go stdlib url package, this +path becomes /foo/bar. According to RFC 2616 and RFC 3986, this may +be considered wrong, and this path should be parsed as /foo%2Fbar. +This is possible to achieve centrally, when Skipper is started with +the -rfc-patch-path flag. It is also possible to allow the default +behavior and only force the alternative interpretation on a per-route +basis with the rfcPath() filter. See +[`rfcPath()`](../../reference/filters/#rfcPath). + +If the second interpretation gets considered the right way, and the +other one a bug, then the default value for this flag may become to +be on. + diff --git a/docs/reference/filters.md b/docs/reference/filters.md index 30f79cf497..ca168aac10 100644 --- a/docs/reference/filters.md +++ b/docs/reference/filters.md @@ -1450,3 +1450,35 @@ lifoGroup("mygroup", 100, 150, "10s") The above configuration will set MaxConcurrency to 100, MaxStackSize to 150 and Timeout to 10 seconds for the lifoGroup "mygroup", that can be shared between more than routes. + +## rfcPath + +This filter forces an alternative interpretation of the RFC 2616 and RFC 3986 standards, +where paths containing reserved characters will have these characters unescaped when the +incoming request also has them unescaped. + +Example: + +``` +Path("/api/*id) -> rfcPath() -> "http://api-backend" +``` + +In the above case, if the incoming request has something like foo%2Fbar in the id +position, the api-backend service will also receive it in the format foo%2Fbar, while +without the rfcPath() filter the outgoing request path will become /api/foo/bar. + +In case we want to use the id while routing the request, we can use the +backend. Example: + +``` +api: Path("/api/:id") -> setPath("/api/${id}/summary") -> "http://api-backend"; +patch: Path("/api/*id") -> rfcPath() -> ; +``` + +In the above case, if the incoming request path is /api/foo%2Fbar, it will match +the 'patch' route, and then the patched request will match the api route, and +the api-backend service will receive a request with the path /api/foo%2Fbar/summary. + +It is also possible to enable this behavior centrally for a Skipper instance with +the -rfc-patch-path flag. See +[URI standards interpretation](../../operation/operation/#uri-standards-interpretation). diff --git a/filters/builtin/builtin.go b/filters/builtin/builtin.go index cb155e61ef..17102ba789 100644 --- a/filters/builtin/builtin.go +++ b/filters/builtin/builtin.go @@ -14,6 +14,7 @@ import ( "github.com/zalando/skipper/filters/flowid" logfilter "github.com/zalando/skipper/filters/log" "github.com/zalando/skipper/filters/ratelimit" + "github.com/zalando/skipper/filters/rfc" "github.com/zalando/skipper/filters/scheduler" "github.com/zalando/skipper/filters/tee" "github.com/zalando/skipper/filters/tracing" @@ -133,6 +134,7 @@ func MakeRegistry() filters.Registry { auth.NewForwardToken(), scheduler.NewLIFO(), scheduler.NewLIFOGroup(), + rfc.NewPath(), } { r.Register(s) } diff --git a/filters/rfc/rfc.go b/filters/rfc/rfc.go new file mode 100644 index 0000000000..457df0929c --- /dev/null +++ b/filters/rfc/rfc.go @@ -0,0 +1,27 @@ +package rfc + +import ( + "github.com/zalando/skipper/filters" + "github.com/zalando/skipper/rfc" +) + +const Name = "rfcPath" + +type path struct{} + +// NewPath creates a filter specification for the rfcPath() filter, that +// reencodes the reserved characters in the request path, if it detects +// that they are encoded in the raw path. +// +// See also the PatchPath documentation in the rfc package. +// +func NewPath() filters.Spec { return path{} } + +func (p path) Name() string { return Name } +func (p path) CreateFilter([]interface{}) (filters.Filter, error) { return path{}, nil } +func (p path) Response(filters.FilterContext) {} + +func (p path) Request(ctx filters.FilterContext) { + req := ctx.Request() + req.URL.Path = rfc.PatchPath(req.URL.Path, req.URL.RawPath) +} diff --git a/filters/rfc/rfc_test.go b/filters/rfc/rfc_test.go new file mode 100644 index 0000000000..6be01364e5 --- /dev/null +++ b/filters/rfc/rfc_test.go @@ -0,0 +1,26 @@ +package rfc + +import ( + "net/http" + "net/url" + "testing" + + "github.com/zalando/skipper/filters/filtertest" +) + +func TestPatch(t *testing.T) { + req := &http.Request{URL: &url.URL{RawPath: "/foo%2Fbar", Path: "/foo/bar"}} + ctx := &filtertest.Context{ + FRequest: req, + } + + f, err := NewPath().CreateFilter(nil) + if err != nil { + t.Fatal(err) + } + + f.Request(ctx) + if req.URL.Path != "/foo%2Fbar" { + t.Error("failed to patch the path", req.URL.Path) + } +} diff --git a/proxy/pathpatch_test.go b/proxy/pathpatch_test.go new file mode 100644 index 0000000000..830683a082 --- /dev/null +++ b/proxy/pathpatch_test.go @@ -0,0 +1,51 @@ +package proxy + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/zalando/skipper/dataclients/routestring" + "github.com/zalando/skipper/filters/builtin" + "github.com/zalando/skipper/routing" +) + +func testPatch(t *testing.T, title string, f Flags, expectedStatus int) { + t.Run(title, func(t *testing.T) { + dc, err := routestring.New(`Path("/foo%2Fbar") -> status(200) -> `) + if err != nil { + t.Fatal(err) + } + + rt := routing.New(routing.Options{ + SignalFirstLoad: true, + FilterRegistry: builtin.MakeRegistry(), + DataClients: []routing.DataClient{dc}, + }) + defer rt.Close() + + p := WithParams(Params{Routing: rt, Flags: f}) + defer p.Close() + + s := httptest.NewServer(p) + defer s.Close() + + <-rt.FirstLoad() + + rsp, err := http.Get(s.URL + "/foo%2Fbar") + if err != nil { + t.Fatal(err) + } + + defer rsp.Body.Close() + + if rsp.StatusCode != expectedStatus { + t.Error() + } + }) +} + +func TestPathPatch(t *testing.T) { + testPatch(t, "not patched", FlagsNone, http.StatusNotFound) + testPatch(t, "patched", PatchPath, http.StatusOK) +} diff --git a/proxy/proxy.go b/proxy/proxy.go index 84a6c094b6..3d3453da2f 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -29,6 +29,7 @@ import ( "github.com/zalando/skipper/logging" "github.com/zalando/skipper/metrics" "github.com/zalando/skipper/ratelimit" + "github.com/zalando/skipper/rfc" "github.com/zalando/skipper/routing" "github.com/zalando/skipper/scheduler" "github.com/zalando/skipper/tracing" @@ -88,6 +89,11 @@ const ( // HopHeadersRemoval indicates whether the Hop Headers should be removed // in compliance with RFC 2616 HopHeadersRemoval + + // PatchPath instructs the proxy to patch the parsed request path + // if the reserved characters according to RFC 2616 and RFC 3986 + // were unescaped by the parser. + PatchPath ) // Options are deprecated alias for Flags. @@ -259,6 +265,8 @@ func (f Flags) Debug() bool { return f&Debug != 0 } // When set, the proxy will remove the Hop Headers func (f Flags) HopHeadersRemoval() bool { return f&HopHeadersRemoval != 0 } +func (f Flags) patchPath() bool { return f&PatchPath != 0 } + // Priority routes are custom route implementations that are matched against // each request before the routes in the general lookup tree. type PriorityRoute interface { @@ -1227,6 +1235,10 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { } }() + if p.flags.patchPath() { + r.URL.Path = rfc.PatchPath(r.URL.Path, r.URL.RawPath) + } + p.tracing.setTag(span, SpanKindTag, SpanKindServer) p.setCommonSpanInfo(r.URL, r, span) r = r.WithContext(ot.ContextWithSpan(r.Context(), span)) diff --git a/rfc/doc.go b/rfc/doc.go new file mode 100644 index 0000000000..ede659fbaf --- /dev/null +++ b/rfc/doc.go @@ -0,0 +1,4 @@ +/* +Package rfc provides standards related functions. +*/ +package rfc diff --git a/rfc/patchpath.go b/rfc/patchpath.go new file mode 100644 index 0000000000..581238aadf --- /dev/null +++ b/rfc/patchpath.go @@ -0,0 +1,130 @@ +package rfc + +const escapeLength = 3 // always, because we only handle the below cases + +const ( + semicolon = ';' + slash = '/' + questionMark = '?' + colon = ':' + at = '@' + and = '&' + eq = '=' + plus = '+' + dollar = '$' + comma = ',' +) + +// https://tools.ietf.org/html/rfc3986#section-2.2 +func unescape(seq []byte) (byte, bool) { + switch string(seq) { + case "%3B", "%3b": + return semicolon, true + case "%2F", "%2f": + return slash, true + case "%3F", "%3f": + return questionMark, true + case "%3A", "%3a": + return colon, true + case "%40": + return at, true + case "%26": + return and, true + case "%3D", "%3d": + return eq, true + case "%2B", "%2b": + return plus, true + case "%24": + return dollar, true + case "%2C", "%2c": + return comma, true + default: + return 0, false + } +} + +// PatchPath attempts to patch a request path based on an interpretation of the standards +// RFC 2616 and RFC 3986 where the reserved characters should not be unescaped. Currently +// the Go stdlib does unescape these characters (v1.12.5). +// +// It expects the parsed path as found in http.Request.URL.Path and the raw path as found +// in http.Request.URL.RawPath. It returns a path where characters e.g. like '/' have the +// escaped form of %2F, if it was detected that they are unescaped in the raw path. +// +// It only returns the patched variant, if the only difference between the parsed and raw +// paths are the encoding of the chars, according to RFC 3986. If it detects any other +// difference between the two, it returns the original parsed path as provided. It +// tolerates an empty argument for the raw path, which can happen when the URL parsed via +// the stdlib url package, and there is no difference between the parsed and the raw path. +// This basically means that the following code is correct: +// +// req.URL.Path = rfc.PatchPath(req.URL.Path, req.URL.RawPath) +// +// Links: +// - https://tools.ietf.org/html/rfc2616#section-3.2.3 and +// - https://tools.ietf.org/html/rfc3986#section-2.2 +// +func PatchPath(parsed, raw string) string { + p, r := []byte(parsed), []byte(raw) + patched := make([]byte, 0, len(r)) + var ( + escape bool + seq []byte + unescaped byte + handled bool + doPatch bool + modified bool + pi int + ) + + for i := 0; i < len(r); i++ { + c := r[i] + escape = c == '%' + modified = pi >= len(p) || !escape && p[pi] != c + if modified { + doPatch = false + break + } + + if !escape { + patched = append(patched, p[pi]) + pi++ + continue + } + + if len(r) < i+escapeLength { + doPatch = false + break + } + + seq = r[i : i+escapeLength] + i += escapeLength - 1 + unescaped, handled = unescape(seq) + if !handled { + patched = append(patched, p[pi]) + pi++ + continue + } + + modified = p[pi] != unescaped + if modified { + doPatch = false + break + } + + patched = append(patched, seq...) + doPatch = true + pi++ + } + + if !doPatch { + return parsed + } + + modified = pi < len(p) + if modified { + return parsed + } + + return string(patched) +} diff --git a/rfc/patchpath_test.go b/rfc/patchpath_test.go new file mode 100644 index 0000000000..b560474b94 --- /dev/null +++ b/rfc/patchpath_test.go @@ -0,0 +1,91 @@ +package rfc + +import "testing" + +func TestPatch(t *testing.T) { + type test struct{ title, parsed, raw, expected string } + for _, test := range []test{{ + title: "empty", + }, { + title: "not escaped, empty raw", + parsed: "/foo/bar", + expected: "/foo/bar", + }, { + title: "already escaped, empty raw (invalid case)", + parsed: "/foo%2Fbar", + expected: "/foo%2Fbar", + }, { + title: "only raw (invalid case)", + raw: "/foo/bar", + }, { + title: "not reserved", + raw: "/foo%2Abar", + parsed: "/foo*bar", + expected: "/foo*bar", + }, { + title: "reserved", + raw: "/foo%2Fbar", + parsed: "/foo/bar", + expected: "/foo%2Fbar", + }, { + title: "reserved, lower case", + raw: "/foo%2fbar", + parsed: "/foo/bar", + expected: "/foo%2fbar", + }, { + title: "modified, too short", + raw: "/foo%2Fbar", + parsed: "/foo/", + expected: "/foo/", + }, { + title: "modified, too short, before escape", + raw: "/foo%2Fbar", + parsed: "/foo", + expected: "/foo", + }, { + title: "modified, too long", + raw: "/foo%2Fbar", + parsed: "/foo/bar/baz", + expected: "/foo/bar/baz", + }, { + title: "modified, different", + raw: "/foo%2Fbar", + parsed: "/foo/baz", + expected: "/foo/baz", + }, { + title: "modified, different escaped", + raw: "/foo%2Fbar", + parsed: "/foo*bar", + expected: "/foo*bar", + }, { + title: "damaged raw (invalid case)", + raw: "/foo%2", + parsed: "/foo/", + expected: "/foo/", + }, { + title: "reserved and unreserved", + raw: "/foo%2Fbar/%2A", + parsed: "/foo/bar/*", + expected: "/foo%2Fbar/*", + }, { + title: "unreserved and reserved", + raw: "/foo%2Abar%2F", + parsed: "/foo*bar/", + expected: "/foo*bar%2F", + }, { + title: "non-ascii range", + raw: "/世%2F界", + parsed: "/世/界", + expected: "/世%2F界", + }} { + t.Run(test.title, func(t *testing.T) { + patched := PatchPath(test.parsed, test.raw) + if patched != test.expected { + t.Errorf( + "patched: %s, %s; got: %s, expected: %s", + test.parsed, test.raw, patched, test.expected, + ) + } + }) + } +}