Skip to content

Commit

Permalink
Update error log parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
aorwall committed Sep 19, 2024
1 parent fbf7464 commit da3d60b
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 60 deletions.
7 changes: 5 additions & 2 deletions testbed/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
SWEbenchInstance,
TestResult, TestbedDetailed, TestbedStatusDetailed, ContainerStatus, TestRunResponse,
)
from testbed.swebench.constants import ResolvedStatus, APPLY_PATCH_FAIL
from testbed.swebench.constants import ResolvedStatus, APPLY_PATCH_FAIL, RUN_TESTS
from testbed.swebench.log_parsers import parse_log

from testbed.swebench.test_spec import TestSpec

Expand Down Expand Up @@ -392,7 +393,9 @@ def run_tests(
commands = []
commands.extend(self.test_spec.test_script(test_files))
response = self.execute(commands)
test_result = self.test_spec.parse_logs(response.output)

log = response.output.split(f"{RUN_TESTS}\n")[-1]
test_result = parse_log(log, self.test_spec.repo)

filtered_test_result = []

Expand Down
36 changes: 33 additions & 3 deletions testbed/swebench/log_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,39 @@

logger = logging.getLogger(__name__)


def parse_log(log: str, repo: str) -> list[TestResult]:
log_parser = MAP_REPO_TO_PARSER[repo]
test_results = log_parser(log)

if not test_results:
logger.info(f"No test results found in log, will check for unhandled errors.")
# Check for unhandled pytest error
if detect_unhandled_pytest_error(log):
logger.info("Found unhandled pytest error in log")
unhandled_test_result = parse_unhandled_pytest_error(log, "unhandled_test_error")
test_results.append(unhandled_test_result)
else:
lines = log.splitlines()
traceback_start = next((i for i, line in enumerate(lines) if "Traceback (most recent call last):" in line), None)
if traceback_start is not None:
traceback_end = next((i for i, line in enumerate(lines[traceback_start:], start=traceback_start) if "During handling of the above exception" in line), len(lines))
traceback = "\n".join(lines[traceback_start:traceback_end])
traceback_result = parse_traceback(traceback)
if traceback_result:
test_results.append(traceback_result)

# Skip testbed prefix in file paths
for result in test_results:
if result.file_path and result.file_path.startswith("/testbed/"):
result.file_path = result.file_path[len("/testbed/"):]

if result.failure_output:
result.failure_output = result.failure_output.replace("/testbed/", "")

return test_results


def parse_log_pytest(log: str) -> list[TestResult]:
test_results = []
test_errors = []
Expand Down Expand Up @@ -171,9 +204,6 @@ def parse_log_pytest(log: str) -> list[TestResult]:
if test.name in failure_outputs:
test.failure_output = "\n".join(failure_outputs[test.name])

if not test_results and detect_unhandled_pytest_error(log):
unhandled_test_result = parse_unhandled_pytest_error(log, "unhandled_test_error")
test_results.append(unhandled_test_result)

return test_results

Expand Down
31 changes: 2 additions & 29 deletions testbed/swebench/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
RUN_TESTS,
)
from testbed.swebench.grading import get_eval_tests_report, get_resolution_status
from testbed.swebench.log_parsers import MAP_REPO_TO_PARSER, parse_traceback
from testbed.swebench.log_parsers import MAP_REPO_TO_PARSER, parse_traceback, parse_log
from testbed.swebench.utils import get_test_directives

DIFF_MODIFIED_FILE_REGEX = r"--- a/(.*)"
Expand Down Expand Up @@ -256,7 +256,7 @@ def get_pred_report(self, content: str) -> TestsStatus:
report (EvaluationResult): report of metrics
"""

test_result = self.parse_logs(content)
test_result = parse_log(content, self.repo)
eval_ref = {
KEY_INSTANCE_ID: self.instance_id,
FAIL_TO_PASS: self.fail_to_pass,
Expand All @@ -271,30 +271,3 @@ def get_pred_report(self, content: str) -> TestsStatus:
fail_to_pass=EvalTestResult(**report[FAIL_TO_PASS]),
pass_to_pass=EvalTestResult(**report[PASS_TO_PASS]),
)

def parse_logs(self, content: str | None = None) -> list[TestResult]:
"""
Retrieve evaluation results for a task instance from its corresponding log file
"""

content = content.split(f"{RUN_TESTS}\n")[-1]

if content.strip().startswith("Traceback (most recent call last):") or content.strip().startswith("ImportError"):
result = parse_traceback(content)
if result:
return [result]
else:
logger.warning(f"Failed to parse traceback for output:\n{content}")

log_parser = MAP_REPO_TO_PARSER[self.repo]

results = log_parser(content)

for result in results:
if result.file_path and result.file_path.startswith("/testbed/"):
result.file_path = result.file_path[len("/testbed/"):]

if result.failure_output:
result.failure_output = result.failure_output.replace("/testbed/", "")

return results
31 changes: 31 additions & 0 deletions tests/data/sympy_output_4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
sympy/combinatorics/tests/test_permutations.py[0] Traceback (most recent call last):
File "/testbed/sympy/utilities/runtests.py", line 1079, in test_file
exec_(code, gl)
File "/testbed/sympy/combinatorics/tests/test_permutations.py", line 5, in <module>
from sympy.combinatorics.permutations import (Permutation, _af_parity,
File "/testbed/sympy/combinatorics/__init__.py", line 7, in <module>
from sympy.combinatorics.polyhedron import (Polyhedron, tetrahedron, cube,
File "/testbed/sympy/combinatorics/polyhedron.py", line 824, in <module>
dodecahedron_faces, icosahedron_faces) = _pgroup_calcs()
File "/testbed/sympy/combinatorics/polyhedron.py", line 724, in _pgroup_calcs
_c_pgroup = [Perm(p) for p in
File "/testbed/sympy/combinatorics/polyhedron.py", line 724, in <listcomp>
_c_pgroup = [Perm(p) for p in
File "/testbed/sympy/combinatorics/permutations.py", line 900, in __new__
for i in range(len(ci)):
TypeError: object of type 'int' has no len()

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/testbed/sympy/utilities/runtests.py", line 536, in _test
return int(not t.test(sort=sort, timeout=timeout,
File "/testbed/sympy/utilities/runtests.py", line 1013, in test
self.test_file(f, sort, timeout, slow, enhance_asserts)
File "/testbed/sympy/utilities/runtests.py", line 1086, in test_file
reporter.test_exception(sys.exc_info())
File "/testbed/sympy/utilities/runtests.py", line 2217, in test_exception
self._exceptions.append((self._active_file, self._active_f, exc_info))
AttributeError: 'PyTestReporter' object has no attribute '_active_file'

2 changes: 2 additions & 0 deletions tests/data/syntax_error.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/testbed/sympy/core/basic.py:3: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working
from collections import Mapping, defaultdict
Traceback (most recent call last):
File "/testbed/./tests/runtests.py", line 25, in <module>
from django.test import TestCase, TransactionTestCase
Expand Down
Loading

0 comments on commit da3d60b

Please sign in to comment.