Update error log parsing

aorwall · Sep 19, 2024 · da3d60b · da3d60b
1 parent fbf7464
commit da3d60b
Show file tree

Hide file tree

Showing 6 changed files with 129 additions and 60 deletions.
diff --git a/testbed/client/client.py b/testbed/client/client.py
@@ -21,7 +21,8 @@
     SWEbenchInstance,
     TestResult, TestbedDetailed, TestbedStatusDetailed, ContainerStatus, TestRunResponse,
 )
-from testbed.swebench.constants import ResolvedStatus, APPLY_PATCH_FAIL
+from testbed.swebench.constants import ResolvedStatus, APPLY_PATCH_FAIL, RUN_TESTS
+from testbed.swebench.log_parsers import parse_log
 
 from testbed.swebench.test_spec import TestSpec
 
@@ -392,7 +393,9 @@ def run_tests(
         commands = []
         commands.extend(self.test_spec.test_script(test_files))
         response = self.execute(commands)
-        test_result = self.test_spec.parse_logs(response.output)
+
+        log = response.output.split(f"{RUN_TESTS}\n")[-1]
+        test_result = parse_log(log, self.test_spec.repo)
 
         filtered_test_result = []
 

diff --git a/testbed/swebench/log_parsers.py b/testbed/swebench/log_parsers.py
@@ -7,6 +7,39 @@
 
 logger = logging.getLogger(__name__)
 
+
+def parse_log(log: str, repo: str) -> list[TestResult]:
+    log_parser = MAP_REPO_TO_PARSER[repo]
+    test_results = log_parser(log)
+
+    if not test_results:
+        logger.info(f"No test results found in log, will check for unhandled errors.")
+        # Check for unhandled pytest error
+        if detect_unhandled_pytest_error(log):
+            logger.info("Found unhandled pytest error in log")
+            unhandled_test_result = parse_unhandled_pytest_error(log, "unhandled_test_error")
+            test_results.append(unhandled_test_result)
+        else:
+            lines = log.splitlines()
+            traceback_start = next((i for i, line in enumerate(lines) if "Traceback (most recent call last):" in line), None)
+            if traceback_start is not None:
+                traceback_end = next((i for i, line in enumerate(lines[traceback_start:], start=traceback_start) if "During handling of the above exception" in line), len(lines))
+                traceback = "\n".join(lines[traceback_start:traceback_end])
+                traceback_result = parse_traceback(traceback)
+                if traceback_result:
+                    test_results.append(traceback_result)
+
+    # Skip testbed prefix in file paths
+    for result in test_results:
+        if result.file_path and result.file_path.startswith("/testbed/"):
+            result.file_path = result.file_path[len("/testbed/"):]
+
+        if result.failure_output:
+            result.failure_output = result.failure_output.replace("/testbed/", "")
+
+    return test_results
+
+
 def parse_log_pytest(log: str) -> list[TestResult]:
     test_results = []
     test_errors = []
@@ -171,9 +204,6 @@ def parse_log_pytest(log: str) -> list[TestResult]:
         if test.name in failure_outputs:
             test.failure_output = "\n".join(failure_outputs[test.name])
 
-    if not test_results and detect_unhandled_pytest_error(log):
-        unhandled_test_result = parse_unhandled_pytest_error(log, "unhandled_test_error")
-        test_results.append(unhandled_test_result)
 
     return test_results
 

diff --git a/testbed/swebench/test_spec.py b/testbed/swebench/test_spec.py
@@ -24,7 +24,7 @@
     RUN_TESTS,
 )
 from testbed.swebench.grading import get_eval_tests_report, get_resolution_status
-from testbed.swebench.log_parsers import MAP_REPO_TO_PARSER, parse_traceback
+from testbed.swebench.log_parsers import MAP_REPO_TO_PARSER, parse_traceback, parse_log
 from testbed.swebench.utils import get_test_directives
 
 DIFF_MODIFIED_FILE_REGEX = r"--- a/(.*)"
@@ -256,7 +256,7 @@ def get_pred_report(self, content: str) -> TestsStatus:
             report (EvaluationResult): report of metrics
         """
 
-        test_result = self.parse_logs(content)
+        test_result = parse_log(content, self.repo)
         eval_ref = {
             KEY_INSTANCE_ID: self.instance_id,
             FAIL_TO_PASS: self.fail_to_pass,
@@ -271,30 +271,3 @@ def get_pred_report(self, content: str) -> TestsStatus:
             fail_to_pass=EvalTestResult(**report[FAIL_TO_PASS]),
             pass_to_pass=EvalTestResult(**report[PASS_TO_PASS]),
         )
-
-    def parse_logs(self, content: str | None = None) -> list[TestResult]:
-        """
-        Retrieve evaluation results for a task instance from its corresponding log file
-        """
-
-        content = content.split(f"{RUN_TESTS}\n")[-1]
-
-        if content.strip().startswith("Traceback (most recent call last):") or content.strip().startswith("ImportError"):
-            result = parse_traceback(content)
-            if result:
-                return [result]
-            else:
-                logger.warning(f"Failed to parse traceback for output:\n{content}")
-
-        log_parser = MAP_REPO_TO_PARSER[self.repo]
-
-        results = log_parser(content)
-
-        for result in results:
-            if result.file_path and result.file_path.startswith("/testbed/"):
-                result.file_path = result.file_path[len("/testbed/"):]
-
-            if result.failure_output:
-                result.failure_output = result.failure_output.replace("/testbed/", "")
-
-        return results
diff --git a/tests/data/sympy_output_4.txt b/tests/data/sympy_output_4.txt
@@ -0,0 +1,31 @@
+sympy/combinatorics/tests/test_permutations.py[0] Traceback (most recent call last):
+  File "/testbed/sympy/utilities/runtests.py", line 1079, in test_file
+    exec_(code, gl)
+  File "/testbed/sympy/combinatorics/tests/test_permutations.py", line 5, in <module>
+    from sympy.combinatorics.permutations import (Permutation, _af_parity,
+  File "/testbed/sympy/combinatorics/__init__.py", line 7, in <module>
+    from sympy.combinatorics.polyhedron import (Polyhedron, tetrahedron, cube,
+  File "/testbed/sympy/combinatorics/polyhedron.py", line 824, in <module>
+    dodecahedron_faces, icosahedron_faces) = _pgroup_calcs()
+  File "/testbed/sympy/combinatorics/polyhedron.py", line 724, in _pgroup_calcs
+    _c_pgroup = [Perm(p) for p in
+  File "/testbed/sympy/combinatorics/polyhedron.py", line 724, in <listcomp>
+    _c_pgroup = [Perm(p) for p in
+  File "/testbed/sympy/combinatorics/permutations.py", line 900, in __new__
+    for i in range(len(ci)):
+TypeError: object of type 'int' has no len()
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "<string>", line 1, in <module>
+  File "/testbed/sympy/utilities/runtests.py", line 536, in _test
+    return int(not t.test(sort=sort, timeout=timeout,
+  File "/testbed/sympy/utilities/runtests.py", line 1013, in test
+    self.test_file(f, sort, timeout, slow, enhance_asserts)
+  File "/testbed/sympy/utilities/runtests.py", line 1086, in test_file
+    reporter.test_exception(sys.exc_info())
+  File "/testbed/sympy/utilities/runtests.py", line 2217, in test_exception
+    self._exceptions.append((self._active_file, self._active_f, exc_info))
+AttributeError: 'PyTestReporter' object has no attribute '_active_file'
+
diff --git a/tests/data/syntax_error.txt b/tests/data/syntax_error.txt
@@ -1,3 +1,5 @@
+/testbed/sympy/core/basic.py:3: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working
+  from collections import Mapping, defaultdict
 Traceback (most recent call last):
   File "/testbed/./tests/runtests.py", line 25, in <module>
     from django.test import TestCase, TransactionTestCase