diff --git a/.buildkite/pipeline_lib.sh b/.buildkite/pipeline_lib.sh
index ced606e47a..bf61b3cdea 100644
--- a/.buildkite/pipeline_lib.sh
+++ b/.buildkite/pipeline_lib.sh
@@ -53,8 +53,12 @@ gen_bazel_test_steps() {
         echo "          - \"bazel-testlogs.tar.gz\""
         echo "        timeout_in_minutes: 20"
         echo "        retry:"
+        echo "          manual:"
+        echo "            permit_on_passed: true"
         echo "          automatic:"
         echo "            - exit_status: -1 # Agent was lost"
         echo "            - exit_status: 255 # Forced agent shutdown"
+        echo "            - exit_status: 3 # Test may be flaky or it just didn't pass"
+        echo "              limit: 2"
     done
 }
diff --git a/acceptance/router_benchmark/test.py b/acceptance/router_benchmark/test.py
index f72d66e4ee..df2f753c22 100644
--- a/acceptance/router_benchmark/test.py
+++ b/acceptance/router_benchmark/test.py
@@ -20,7 +20,7 @@
 
 from collections import namedtuple
 from plumbum import cli
-from plumbum.cmd import docker, whoami
+from plumbum.cmd import docker, whoami, cat
 from plumbum import cmd
 
 from acceptance.common import base
@@ -34,11 +34,11 @@
 
 # Those values are valid expectations only when running in the CI environment.
 TEST_CASES = {
-    'in': 270000,
-    'out': 240000,
-    'in_transit': 270000,
-    'out_transit': 240000,
-    'br_transit': 260000,
+    'in': 290000,
+    'out': 290000,
+    'in_transit': 250000,
+    'out_transit': 250000,
+    'br_transit': 290000,
 }
 
 
@@ -60,6 +60,12 @@ def mac_for_ip(ip: str) -> str:
     return 'f0:0d:ca:fe:{:02x}:{:02x}'.format(int(ipBytes[2]), int(ipBytes[3]))
 
 
+# Dump the cpu info into the log just to inform our thoughts on performance variability.
+def log_cpu_info():
+    cpu_info = cat('/proc/cpuinfo')
+    logger.info(f"CPU INFO BEGINS\n{cpu_info}\nCPU_INFO ENDS")
+
+
 class RouterBMTest(base.TestBase):
     """
     Tests that the implementation of a router has sufficient performance in terms of packets
@@ -148,6 +154,9 @@ def create_interface(self, req: IntfReq, ns: str):
     def setup_prepare(self):
         super().setup_prepare()
 
+        # As the name inplies.
+        log_cpu_info()
+
         # get the config where the router can find it.
         shutil.copytree("acceptance/router_benchmark/conf/", self.artifacts / "conf")
 
@@ -221,13 +230,16 @@ def teardown(self):
 
     def execBrLoad(self, case: str, mapArgs: list[str], count: int) -> str:
         brload = self.get_executable("brload")
+        # For num-streams, attempt to distribute uniformly on many possible number of cores.
+        # 840 is a multiple of 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 15, 20, 21, 24, 28, ...
+        # We can't go too far down that road; the test has to build one packet for each stream.
         return sudo(brload.executable,
                     "run",
                     "--artifacts", self.artifacts,
                     *mapArgs,
                     "--case", case,
                     "--num-packets", str(count),
-                    "--num-streams", "2")
+                    "--num-streams", "840")
 
     def runTestCase(self, case: str, mapArgs: list[str]):
         logger.info(f"==> Starting load {case}")
@@ -242,9 +254,8 @@ def runTestCase(self, case: str, mapArgs: list[str]):
 
         # The raw metrics are expressed in terms of core*seconds. We convert to machine*seconds
         # which allows us to provide a projected packet/s; ...more intuitive than packets/core*s.
-        # We're interested only in br_transit traffic. We measure the rate over 10s. For best
-        # results we sample the end of the middle 10s of the run. "beg" is the start time of the
-        # real action and "end" is the end time.
+        # We measure the rate over 10s. For best results we sample the end of the middle 10s of the
+        # run. "beg" is the start time of the real action and "end" is the end time.
         sampleTime = (int(beg) + int(end) + 10) / 2
         promQuery = urlencode({
             'time': f'{sampleTime}',
@@ -300,9 +311,9 @@ def _run(self):
         for label, intf in self.intfMap.items():
             mapArgs.extend(["--interface", f"{label}={intf.name},{intf.mac},{intf.peerMac}"])
 
-        # Run one (10% size) test as warm-up to trigger the frequency scaling, else the first test
+        # Run one (30% size) test as warm-up to trigger the frequency scaling, else the first test
         # gets much lower performance.
-        self.execBrLoad(list(TEST_CASES)[0], mapArgs, 1000000)
+        self.execBrLoad(list(TEST_CASES)[0], mapArgs, 3000000)
 
         # At long last, run the tests
         rateMap = {}
diff --git a/router/config/config.go b/router/config/config.go
index 711bd73b61..c4d78bc684 100644
--- a/router/config/config.go
+++ b/router/config/config.go
@@ -74,11 +74,24 @@ func (cfg *RouterConfig) Validate() error {
 func (cfg *RouterConfig) InitDefaults() {
 
 	// NumProcessors is the number of goroutines used to handle the processing queue.
-	// By default, there are as many as cores allowed by Go and other goroutines displace
-	// the packet processors sporadically. It may be either good or bad to create more
-	// processors (plus the other goroutines) than there are cores... experience will tell.
+	// It has been observed that allowing the packet processors starve the other tasks was
+	// counterproductive. We get much better results by setting two cores aside for other go
+	// routines (such as for input and output). It remains to be seen if even more cores need to be
+	// set aside on large core-count systems.
+
 	if cfg.NumProcessors == 0 {
-		cfg.NumProcessors = runtime.GOMAXPROCS(0)
+		// Do what we think is best, so in most cases there's no need for an explicit config.
+		maxProcs := runtime.GOMAXPROCS(0)
+		if maxProcs > 3 {
+			// Two for I/Os, two or more for processing.
+			cfg.NumProcessors = maxProcs - 2
+		} else if maxProcs > 1 {
+			// I/Os <= processing.
+			cfg.NumProcessors = maxProcs - 1
+		} else {
+			// No choice.
+			cfg.NumProcessors = maxProcs
+		}
 	}
 
 	if cfg.NumSlowPathProcessors == 0 {