From e1d2f4cfcc5a95bebc6540136bd287829c834338 Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Wed, 11 Jan 2023 12:44:46 +0100 Subject: [PATCH] proxy: refactor filter panic handling (#2194) In case of filter panic proxy abrupts filter chain and responds with 500 InternalServerError. Signed-off-by: Alexander Yastrebov Signed-off-by: Alexander Yastrebov --- go.mod | 1 + go.sum | 3 +- proxy/context.go | 1 - proxy/debug.go | 17 ++----- proxy/debug_test.go | 10 ++-- proxy/proxy.go | 105 ++++++++++++++++++------------------------ proxy/proxy_test.go | 109 +++++++++++++++++--------------------------- 7 files changed, 96 insertions(+), 150 deletions(-) diff --git a/go.mod b/go.mod index 9d2579d9b3..c564dc3b5c 100644 --- a/go.mod +++ b/go.mod @@ -47,6 +47,7 @@ require ( golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 + golang.org/x/time v0.3.0 gopkg.in/alecthomas/kingpin.v2 v2.2.6 gopkg.in/square/go-jose.v2 v2.6.0 gopkg.in/yaml.v2 v2.4.0 diff --git a/go.sum b/go.sum index 4168b82a71..0d14ea44bf 100644 --- a/go.sum +++ b/go.sum @@ -1128,7 +1128,8 @@ golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac h1:7zkz7BUtwNFFqcowJ+RIgu2MaV/MapERkDIy+mwPyjs= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/proxy/context.go b/proxy/context.go index 600a481d1d..f23979337f 100644 --- a/proxy/context.go +++ b/proxy/context.go @@ -35,7 +35,6 @@ type context struct { originalRequest *http.Request originalResponse *http.Response outgoingHost string - debugFilterPanics []interface{} outgoingDebugRequest *http.Request executionCounter int startServe time.Time diff --git a/proxy/debug.go b/proxy/debug.go index 4deb2c9e8e..e95e0e5aff 100644 --- a/proxy/debug.go +++ b/proxy/debug.go @@ -2,7 +2,6 @@ package proxy import ( "encoding/json" - "fmt" "io" "net/http" @@ -36,19 +35,17 @@ type ( ResponseModBody string `json:"response_mod_body,omitempty"` ResponseModErr string `json:"response_mod_error,omitempty"` ProxyError string `json:"proxy_error,omitempty"` - FilterPanics []string `json:"filter_panics,omitempty"` Filters []*eskip.Filter `json:"filters,omitempty"` Predicates []*eskip.Predicate `json:"predicates,omitempty"` } ) type debugInfo struct { - route *eskip.Route - incoming *http.Request - outgoing *http.Request - response *http.Response - err error - filterPanics []interface{} + route *eskip.Route + incoming *http.Request + outgoing *http.Request + response *http.Response + err error } func convertRequest(r *http.Request) *debugRequest { @@ -121,10 +118,6 @@ func convertDebugInfo(d *debugInfo) debugDocument { doc.ProxyError = d.err.Error() } - for _, fp := range d.filterPanics { - doc.FilterPanics = append(doc.FilterPanics, fmt.Sprint(fp)) - } - return doc } diff --git a/proxy/debug_test.go b/proxy/debug_test.go index 98612d2c85..74c0b6b123 100644 --- a/proxy/debug_test.go +++ b/proxy/debug_test.go @@ -173,12 +173,10 @@ func TestDebug(t *testing.T) { "error", debugInfo{ err: errors.New("test error"), - filterPanics: []interface{}{ - errors.New("panic one"), - errors.New("panic two")}}, + }, debugDocument{ - ProxyError: "test error", - FilterPanics: []string{"panic one", "panic two"}}, + ProxyError: "test error", + }, }} { compareStrings := func(smsg string, got, expect []string) { if len(got) != len(expect) { @@ -277,7 +275,5 @@ func TestDebug(t *testing.T) { if got.ProxyError != ti.expect.ProxyError { t.Error(ti.msg, "failed to convert proxy error") } - - compareStrings("filter panics", got.FilterPanics, ti.expect.FilterPanics) } } diff --git a/proxy/proxy.go b/proxy/proxy.go index 65923f39e6..926f8e0214 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -20,6 +20,8 @@ import ( "time" "unicode/utf8" + "golang.org/x/time/rate" + ot "github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go/ext" "github.com/zalando/skipper/circuit" @@ -337,6 +339,7 @@ type Proxy struct { auditLogHook chan struct{} clientTLS *tls.Config hostname string + onPanicSometimes rate.Sometimes } // proxyError is used to wrap errors during proxying and to indicate @@ -717,31 +720,10 @@ func WithParams(p Params) *Proxy { upgradeAuditLogErr: os.Stderr, clientTLS: tr.TLSClientConfig, hostname: hostname, + onPanicSometimes: rate.Sometimes{First: 3, Interval: 1 * time.Minute}, } } -var caughtPanic = false - -// tryCatch executes function `p` and `onErr` if `p` panics -// onErr will receive a stack trace string of the first panic -// further panics are ignored for efficiency reasons -func tryCatch(p func(), onErr func(err interface{}, stack string)) { - defer func() { - if err := recover(); err != nil { - s := "" - if !caughtPanic { - buf := make([]byte, 1024) - l := runtime.Stack(buf, false) - s = string(buf[:l]) - caughtPanic = true - } - onErr(err, s) - } - }() - - p() -} - // applies filters to a request func (p *Proxy) applyFiltersToRequest(f []*routing.RouteFilter, ctx *context) []*routing.RouteFilter { if len(f) == 0 { @@ -756,20 +738,11 @@ func (p *Proxy) applyFiltersToRequest(f []*routing.RouteFilter, ctx *context) [] for _, fi := range f { start := time.Now() filterTracing.logStart(fi.Name) - tryCatch(func() { - ctx.setMetricsPrefix(fi.Name) - fi.Request(ctx) - p.metrics.MeasureFilterRequest(fi.Name, start) - }, func(err interface{}, stack string) { - if p.flags.Debug() { - // these errors are collected for the debug mode to be able - // to report in the response which filters failed. - ctx.debugFilterPanics = append(ctx.debugFilterPanics, err) - return - } + ctx.setMetricsPrefix(fi.Name) - p.log.Errorf("error while processing filter during request: %s: %v (%s)", fi.Name, err, stack) - }) + fi.Request(ctx) + + p.metrics.MeasureFilterRequest(fi.Name, start) filterTracing.logEnd(fi.Name) filters = append(filters, fi) @@ -793,20 +766,11 @@ func (p *Proxy) applyFiltersToResponse(filters []*routing.RouteFilter, ctx *cont fi := filters[last-i] start := time.Now() filterTracing.logStart(fi.Name) - tryCatch(func() { - ctx.setMetricsPrefix(fi.Name) - fi.Response(ctx) - p.metrics.MeasureFilterResponse(fi.Name, start) - }, func(err interface{}, stack string) { - if p.flags.Debug() { - // these errors are collected for the debug mode to be able - // to report in the response which filters failed. - ctx.debugFilterPanics = append(ctx.debugFilterPanics, err) - return - } + ctx.setMetricsPrefix(fi.Name) - p.log.Errorf("error while processing filters during response: %s: %v (%s)", fi.Name, err, stack) - }) + fi.Response(ctx) + + p.metrics.MeasureFilterResponse(fi.Name, start) filterTracing.logEnd(fi.Name) } @@ -1050,7 +1014,29 @@ func newRatelimitError(settings ratelimit.Settings, retryAfter int) error { } } -func (p *Proxy) do(ctx *context) error { +// copied from debug.PrintStack +func stack() []byte { + buf := make([]byte, 1024) + for { + n := runtime.Stack(buf, false) + if n < len(buf) { + return buf[:n] + } + buf = make([]byte, 2*len(buf)) + } +} + +func (p *Proxy) do(ctx *context) (err error) { + defer func() { + if r := recover(); r != nil { + p.onPanicSometimes.Do(func() { + p.log.Errorf("stacktrace of panic caused by: %v:\n%s", r, stack()) + }) + + err = fmt.Errorf("panic caused by: %v", r) + } + }() + if ctx.executionCounter > p.maxLoops { return errMaxLoopbacksReached } @@ -1081,8 +1067,7 @@ func (p *Proxy) do(ctx *context) error { // proxy global setting if !ctx.wasExecuted() { if settings, retryAfter := p.limiters.Check(ctx.request); retryAfter > 0 { - rerr := newRatelimitError(settings, retryAfter) - return rerr + return newRatelimitError(settings, retryAfter) } } // every time the context is used for a request the context executionCounter is incremented @@ -1204,11 +1189,10 @@ func retryable(ctx *context, perr *proxyError) bool { func (p *Proxy) serveResponse(ctx *context) { if p.flags.Debug() { dbgResponse(ctx.responseWriter, &debugInfo{ - route: &ctx.route.Route, - incoming: ctx.originalRequest, - outgoing: ctx.outgoingDebugRequest, - response: ctx.response, - filterPanics: ctx.debugFilterPanics, + route: &ctx.route.Route, + incoming: ctx.originalRequest, + outgoing: ctx.outgoingDebugRequest, + response: ctx.response, }) return @@ -1283,11 +1267,10 @@ func (p *Proxy) errorResponse(ctx *context, err error) { if p.flags.Debug() { di := &debugInfo{ - incoming: ctx.originalRequest, - outgoing: ctx.outgoingDebugRequest, - response: ctx.response, - err: err, - filterPanics: ctx.debugFilterPanics, + incoming: ctx.originalRequest, + outgoing: ctx.outgoingDebugRequest, + response: ctx.response, + err: err, } if ctx.route != nil { diff --git a/proxy/proxy_test.go b/proxy/proxy_test.go index a386d5a6d7..4937dd8341 100644 --- a/proxy/proxy_test.go +++ b/proxy/proxy_test.go @@ -16,6 +16,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "testing" "time" @@ -939,8 +940,11 @@ type nilFilterSpec struct{} func (*nilFilterSpec) Name() string { return "nilFilter" } func (*nilFilterSpec) CreateFilter(config []interface{}) (filters.Filter, error) { return nil, nil } -func TestNilFilterIsNotCalledAndDoesNotBreakFilterChain(t *testing.T) { - s := startTestServer([]byte("Hello World!"), 0, func(r *http.Request) {}) +func TestFilterPanic(t *testing.T) { + var backendRequests int32 + s := startTestServer([]byte("Hello World!"), 0, func(r *http.Request) { + atomic.AddInt32(&backendRequests, 1) + }) defer s.Close() fr := make(filters.Registry) @@ -956,41 +960,52 @@ func TestNilFilterIsNotCalledAndDoesNotBreakFilterChain(t *testing.T) { appendRequestHeader("X-Expected", "after") -> appendResponseHeader("X-Expected", "after") -> "%s"`, s.URL) - tp, err := newTestProxyWithFilters(fr, doc, FlagsNone) - if err != nil { - t.Error(err) - return - } + tp, err := newTestProxyWithFilters(fr, doc, FlagsNone) + require.NoError(t, err) defer tp.close() - r, _ := http.NewRequest("GET", "https://www.example.org/foo", nil) + r := httptest.NewRequest("GET", "/foo", nil) w := httptest.NewRecorder() tp.proxy.ServeHTTP(w, r) - if requestExpectedHeader, has := r.Header["X-Expected"]; has { - assert.Contains(t, requestExpectedHeader, "before", - "request header was not added before nil filter") - assert.Contains(t, requestExpectedHeader, "after", - "request header was not added after nil filter (nil filter broke the filter chain)") - } else { - t.Error("Request is missing the expected header (added during filter chain winding)") - return + assert.Equal(t, http.StatusInternalServerError, w.Code) + assert.Equal(t, int32(0), backendRequests, "expected no backend request") + assert.Equal(t, []string{"before"}, r.Header["X-Expected"], "panic expected to skip the rest of the request filters") + assert.NotContains(t, w.Header(), "X-Expected", "panic expected to skip all of the response filters") + + const msg = "panic caused by: runtime error: invalid memory address or nil pointer dereference" + if err = tp.log.WaitFor(msg, 100*time.Millisecond); err != nil { + t.Errorf("expected '%s' in logs", msg) } +} - if responseExpectedHeader, has := w.Header()["X-Expected"]; has { - assert.Contains(t, responseExpectedHeader, "before", - "response header was not added before nil filter") - assert.Contains(t, responseExpectedHeader, "after", - "response header was not added after nil filter (nil filter broke the filter chain)") - } else { - t.Error("Response is missing the expected header (added during filter chain unwinding)") - return +func TestFilterPanicPrintStackRate(t *testing.T) { + fr := make(filters.Registry) + fr.Register(new(nilFilterSpec)) + + tp, err := newTestProxyWithFilters(fr, `* -> nilFilter() -> `, FlagsNone) + require.NoError(t, err) + defer tp.close() + + const ( + panicsCaused = 10 + stacksPrinted = 3 // see Proxy.onPanicSometimes + ) + + for i := 0; i < panicsCaused; i++ { + r := httptest.NewRequest("GET", "/", nil) + w := httptest.NewRecorder() + + tp.proxy.ServeHTTP(w, r) } - if w.Code != http.StatusOK { - t.Errorf("Wrong status code. Expected 200 but got %d", w.Code) + const msg = "error while proxying" + if err = tp.log.WaitForN(msg, panicsCaused, 100*time.Millisecond); err != nil { + t.Errorf("expected '%s' in logs", msg) } + + assert.Equal(t, stacksPrinted, tp.log.Count("github.com/zalando/skipper/proxy.TestFilterPanicPrintStackRate")) } func TestProcessesRequestWithShuntBackend(t *testing.T) { @@ -2100,48 +2115,6 @@ func TestUserAgent(t *testing.T) { } } -func thisOneWillPanic() { - panic("oops") -} - -func TestTryCatch(t *testing.T) { - caughtPanic = false - tryCatch(thisOneWillPanic, func(err interface{}, stack string) { - if err == nil { - t.Error("should provide error") - return - } - - if stack == "" { - t.Error("should provide stack trace") - return - } - - if !strings.Contains(stack, "TestTryCatch") || - !strings.Contains(stack, "thisOneWillPanic") || - !strings.Contains(stack, "proxy_test.go") { - t.Error("should provide function names and file names in the stack trace") - return - } - }) -} - -func TestTryCatchProvidesStackTraceOnlyOnce(t *testing.T) { - caughtPanic = false - tryCatch(thisOneWillPanic, func(err interface{}, stack string) { - if stack == "" { - t.Error("should provide stack trace the first time") - return - } - }) - tryCatch(thisOneWillPanic, func(err interface{}, stack string) { - if stack != "" { - t.Error("should not provide stack trace the second time") - return - } - }) -} - func benchmarkAccessLog(b *testing.B, filter string, responseCode int) { response := "some bytes"