This repository has been archived by the owner on Nov 19, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathbisector.py
executable file
·582 lines (513 loc) · 19.9 KB
/
bisector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
#!/usr/bin/env python3
import copy
import functools
import logging
import math
import os
import subprocess
import tarfile
from pathlib import Path
from typing import Optional
import ccbuilder
from ccbuilder import (
Builder,
BuildException,
CompilerProject,
PatchDB,
Repo,
get_compiler_info,
)
from ccbuilder.utils.utils import select_repo
import checker
import generator
import parsers
import reducer
import utils
class BisectionException(Exception):
pass
def find_cached_revisions(
compiler_name: str, config: utils.NestedNamespace
) -> list[str]:
if compiler_name == "llvm":
compiler_name = "clang"
compilers = []
for entry in Path(config.cachedir).iterdir():
if entry.is_symlink() or not entry.stem.startswith(compiler_name):
continue
if not (entry / "bin" / compiler_name).exists():
continue
rev = str(entry).split("-")[-1]
compilers.append(rev)
return compilers
class Bisector:
"""Class to bisect a given case."""
def __init__(
self,
config: utils.NestedNamespace,
bldr: Builder,
chkr: checker.Checker,
) -> None:
self.config = config
self.bldr = bldr
self.chkr = chkr
self.steps = 0
def _is_interesting(self, case: utils.Case, rev: str) -> bool:
"""_is_interesting.
Args:
case (utils.Case): Case to check
rev (str): What revision to check the case against.
Returns:
bool: True if the case is interesting wrt `rev`.
Raises:
builder.CompileError:
"""
case_cpy = copy.deepcopy(case)
case_cpy.bad_setting.rev = rev
try:
if case_cpy.reduced_code:
case_cpy.code = case_cpy.reduced_code
return self.chkr.is_interesting(case_cpy, preprocess=False)
else:
return self.chkr.is_interesting(case_cpy, preprocess=True)
except subprocess.CalledProcessError as e:
raise utils.CompileError(e)
def bisect_file(self, file: Path, force: bool = False) -> bool:
"""Bisect case found in `file`.
Args:
file (Path): Path to case file to bisect.
force (bool): Whether or not to force a bisection
if there's already one.
Returns:
bool: True if the bisection of the case in `file` succeeded.
"""
case = utils.Case.from_file(self.config, file)
if self.bisect_case(case, force):
case.to_file(file)
return True
return False
def bisect_case(self, case: utils.Case, force: bool = False) -> bool:
"""Bisect a given case.
Args:
case (utils.Case): Case to bisect.
force (bool): Whether or not to force a bisection
if there's already one.
Returns:
bool: True if the bisection succeeded.
"""
if not force and case.bisection:
logging.info(f"Ignoring case: Already bisected")
return True
try:
if res := self.bisect_code(
case.code, case.marker, case.bad_setting, case.good_settings
):
case.bisection = res
return True
except BisectionException:
return False
return False
def bisect_code(
self,
code: str,
marker: str,
bad_setting: utils.CompilerSetting,
good_settings: list[utils.CompilerSetting],
) -> Optional[str]:
"""Bisect a given code wrt. marker, the bad setting and the good settings.
Args:
self:
code (str): code
marker (str): marker
bad_setting (utils.CompilerSetting): bad_setting
good_settings (list[utils.CompilerSetting]): good_settings
Returns:
Optional[str]: Revision the code bisects to, if it is successful.
None otherwise.
Raises:
BisectionException: Raised if the bisection failed somehow.
"""
case = utils.Case(
code,
marker,
bad_setting,
good_settings,
utils.Scenario([bad_setting], good_settings),
None,
None,
None,
)
bad_compiler_config = case.bad_setting.compiler_project
repo = select_repo(
bad_setting.compiler_project,
gcc_repo=self.bldr.gcc_repo,
llvm_repo=self.bldr.llvm_repo,
)
# ===== Get good and bad commits
bad_commit = case.bad_setting.rev
# Only the ones which are on the same opt_level and have the same compiler can be bisected
possible_good_commits = [
gs.rev
for gs in case.good_settings
if gs.opt_level == case.bad_setting.opt_level
and gs.compiler_project.to_string() == bad_compiler_config.to_string()
]
if len(possible_good_commits) == 0:
logging.info(f"No matching optimization level found. Aborting...")
return None
# Sort commits based on branch point wrt to the bad commit
# Why? Look at the following commit graph
# Bad
# | Good_1
# | /
# A Good_2
# | /
# | /
# B
# |
# We want to bisect between Bad and Good_1 because it's less bisection work.
possible_good_commits_t = [
(rev, repo.get_best_common_ancestor(bad_commit, rev))
for rev in possible_good_commits
]
good_commit: str
common_ancestor: str
def cmp_func(x: tuple[str, str], y: tuple[str, str]) -> bool:
return repo.is_ancestor(x[1], y[1])
good_commit, common_ancestor = min(
possible_good_commits_t,
key=functools.cmp_to_key(cmp_func),
)
# ====== Figure out in which part the introducer or fixer lies
#
# Bad Bad
# | |
# | | Good
# | or |b1 /
# |b0 | / b2
# | | /
# Good CA
#
# if good is_ancestor of bad:
# case b0
# searching regression
# else:
# if CA is not interesting:
# case b1
# searching regression
# else:
# case b2
# searching fixer
try:
if repo.is_ancestor(good_commit, bad_commit):
res = self._bisection(good_commit, bad_commit, case, repo)
print(f"{res}")
else:
if not self._is_interesting(case, common_ancestor):
# b1 case
logging.info("B1 Case")
res = self._bisection(
common_ancestor, bad_commit, case, repo, interesting_is_bad=True
)
print(f"{res}")
self._check(case, res, repo)
else:
# b2 case
logging.info("B2 Case")
# TODO: Figure out how to save and handle b2
logging.critical(f"Currently ignoring b2, sorry")
raise BisectionException("Currently ignoring Case type B2, sorry")
# res = self._bisection(
# common_ancestor, good_commit, case, repo, interesting_is_bad=False
# )
# self._check(case, res, repo, interesting_is_bad=False)
# print(f"First good commit {res}")
except utils.CompileError:
return None
return res
def _check(
self,
case: utils.Case,
rev: str,
repo: Repo,
interesting_is_bad: bool = True,
) -> None:
"""Sanity check, that the bisected commit is actually
correct.
Args:
case (utils.Case): Case to check.
rev (str): Revision believed to the bisection commit.
repo (repository.Repo): Repository to get the previous commit from.
interesting_is_bad (bool): Whether or not to switch the expected result
of the interestingness-test.
Raises:
AssertionError: Raised when the check fails.
"""
# TODO(Yann): Don't use assertion errors.
prev_commit = repo.rev_to_commit(f"{rev}~")
if interesting_is_bad:
assert self._is_interesting(case, rev) and not self._is_interesting(
case, prev_commit
)
else:
assert not self._is_interesting(case, rev) and self._is_interesting(
case, prev_commit
)
def _bisection(
self,
good_rev: str,
bad_rev: str,
case: utils.Case,
repo: Repo,
interesting_is_bad: bool = True,
max_build_fail: int = 2,
) -> str:
"""Actual bisection part.
First bisects within the cache, then continues with a normal bisection.
Args:
good_rev (str): Revision that is ancestor of bad_rev.
bad_rev (str): Rev that comes later in the tree.
case (utils.Case): Case to bisect.
repo (repository.Repo): Repo to get the revisions from.
interesting_is_bad (bool): Whether or not to switch how to interpret
the outcome of the interestingness-test.
max_build_fail (int): How many times the builder can fail to build w/o
aborting the bisection.
"""
self.steps = 0
# check cache
possible_revs = repo.direct_first_parent_path(good_rev, bad_rev)
cached_revs = find_cached_revisions(
case.bad_setting.compiler_project.to_string(), self.config
)
cached_revs = [r for r in cached_revs if r in possible_revs]
# Create enumeration dict to sort cached_revs with
sort_dict = dict((r, v) for v, r in enumerate(possible_revs))
cached_revs = sorted(cached_revs, key=lambda x: sort_dict[x])
# bisect in cache
len_region = len(repo.direct_first_parent_path(good_rev, bad_rev))
logging.info(f"Bisecting in cache...")
midpoint = ""
old_midpoint = ""
failed_to_compile = False
while True:
if failed_to_compile:
failed_to_compile = False
cached_revs.remove(midpoint)
logging.info(f"{len(cached_revs): 4}, bad: {bad_rev}, good: {good_rev}")
if len(cached_revs) == 0:
break
midpoint_idx = len(cached_revs) // 2
old_midpoint = midpoint
midpoint = cached_revs[midpoint_idx]
if old_midpoint == midpoint:
break
# There should be no build failure here, as we are working on cached builds
# But there could be a CompileError
self.steps += 1
try:
test: bool = self._is_interesting(case, midpoint)
except utils.CompileError:
logging.warning(
f"Failed to compile code with {case.bad_setting.compiler_project.to_string()}-{midpoint}"
)
failed_to_compile = True
continue
if test:
# bad is always "on top" in the history tree
# git rev-list returns commits in order of the parent relation
# cached_revs is also sorted in that order
# Thus when finding something bad i.e interesting, we have to cut the head
# and when finding something good, we have to cut the tail
if interesting_is_bad:
bad_rev = midpoint
cached_revs = cached_revs[midpoint_idx + 1 :]
else:
good_rev = midpoint
cached_revs = cached_revs[:midpoint_idx]
else:
if interesting_is_bad:
good_rev = midpoint
cached_revs = cached_revs[:midpoint_idx]
else:
bad_rev = midpoint
cached_revs = cached_revs[midpoint_idx + 1 :]
len_region2 = len(repo.direct_first_parent_path(good_rev, bad_rev))
logging.info(f"Cache bisection: range size {len_region} -> {len_region2}")
# bisect
len_region = len(repo.direct_first_parent_path(good_rev, bad_rev))
logging.info(f"Bisecting for approx. {math.ceil(math.log2(len_region))} steps")
midpoint = ""
old_midpoint = ""
failed_to_build_or_compile = False
failed_to_build_counter = 0
guaranteed_termination_counter = 0
while True:
if not failed_to_build_or_compile:
old_midpoint = midpoint
midpoint = repo.next_bisection_commit(good_rev, bad_rev)
failed_to_build_counter = 0
if midpoint == "" or midpoint == old_midpoint:
break
else:
if failed_to_build_counter >= max_build_fail:
raise BisectionException(
"Failed too many times in a row while bisecting. Aborting bisection..."
)
if failed_to_build_counter % 2 == 0:
# Get size of range
range_size = len(repo.direct_first_parent_path(midpoint, bad_rev))
# Move 10% towards the last bad
step = max(int(0.9 * range_size), 1)
midpoint = repo.rev_to_commit(f"{bad_rev}~{step}")
else:
# Symmetric to case above but jumping 10% into the other directory i.e 20% from our position.
range_size = len(repo.direct_first_parent_path(good_rev, midpoint))
step = max(int(0.2 * range_size), 1)
midpoint = repo.rev_to_commit(f"{midpoint}~{step}")
failed_to_build_counter += 1
failed_to_build_or_compile = False
if guaranteed_termination_counter >= 20:
raise BisectionException(
"Failed too many times in a row while bisecting. Aborting bisection..."
)
guaranteed_termination_counter += 1
logging.info(f"Midpoint: {midpoint}")
try:
test = self._is_interesting(case, midpoint)
except BuildException:
logging.warning(
f"Could not build {case.bad_setting.compiler_project.to_string()} {midpoint}!"
)
failed_to_build_or_compile = True
continue
except utils.CompileError:
logging.warning(
f"Failed to compile code with {case.bad_setting.compiler_project.to_string()}-{midpoint}"
)
failed_to_build_or_compile = True
continue
if test:
if interesting_is_bad:
# "As if not_interesting_is_good does not exist"-case
bad_rev = midpoint
else:
good_rev = midpoint
else:
if interesting_is_bad:
# "As if not_interesting_is_good does not exist"-case
good_rev = midpoint
else:
bad_rev = midpoint
return bad_rev
if __name__ == "__main__":
config, args = utils.get_config_and_parser(parsers.bisector_parser())
patchdb = PatchDB(Path(config.patchdb))
_, llvm_repo = get_compiler_info("llvm", Path(config.repodir))
_, gcc_repo = get_compiler_info("gcc", Path(config.repodir))
bldr = Builder(
Path(config.cachedir),
gcc_repo,
llvm_repo,
patchdb,
args.cores,
logdir=Path(config.logdir),
)
chkr = checker.Checker(config, bldr)
gnrtr = generator.CSmithCaseGenerator(config, patchdb, args.cores)
rdcr = reducer.Reducer(config, bldr)
bsctr = Bisector(config, bldr, chkr)
# TODO: This is duplicate code
if args.work_through:
if args.output_directory is None:
print("Missing output/work-through directory!")
exit(1)
else:
output_dir = Path(os.path.abspath(args.output_directory))
os.makedirs(output_dir, exist_ok=True)
tars = [
output_dir / d
for d in os.listdir(output_dir)
if tarfile.is_tarfile(output_dir / d)
]
print(f"Processing {len(tars)} tars")
for tf in tars:
print(f"Processing {tf}")
try:
bsctr.bisect_file(tf, force=args.force)
except BisectionException as e:
print(f"BisectionException in {tf}: '{e}'")
continue
except AssertionError as e:
print(f"AssertionError in {tf}: '{e}'")
continue
except BuildException as e:
print(f"BuildException in {tf}: '{e}'")
continue
if args.generate:
if args.output_directory is None:
print("Missing output directory!")
exit(1)
else:
output_dir = os.path.abspath(args.output_directory)
os.makedirs(output_dir, exist_ok=True)
scenario = utils.Scenario([], [])
# When file is specified, use scenario of file as base
if args.file:
file = Path(args.file).absolute()
scenario = utils.Case.from_file(config, file).scenario
tmp = utils.get_scenario(config, args)
if len(tmp.target_settings) > 0:
scenario.target_settings = tmp.target_settings
if len(tmp.attacker_settings) > 0:
scenario.attacker_settings = tmp.attacker_settings
gen = gnrtr.parallel_interesting_case_file(
config, scenario, bldr.jobs, output_dir, start_stop=True
)
if args.amount == 0:
while True:
path = next(gen)
worked = False
if args.reducer:
try:
worked = rdcr.reduce_file(path)
except BuildException as e:
print(f"BuildException in {path}: {e}")
continue
if not args.reducer or worked:
try:
bsctr.bisect_file(path, force=args.force)
except BisectionException as e:
print(f"BisectionException in {path}: '{e}'")
continue
except AssertionError as e:
print(f"AssertionError in {path}: '{e}'")
continue
except BuildException as e:
print(f"BuildException in {path}: '{e}'")
continue
else:
for i in range(args.amount):
path = next(gen)
worked = False
if args.reducer:
try:
worked = rdcr.reduce_file(path)
except BuildException as e:
print(f"BuildException in {path}: {e}")
continue
if not args.reducer or worked:
try:
bsctr.bisect_file(path, force=args.force)
except BisectionException as e:
print(f"BisectionException in {path}: '{e}'")
continue
except AssertionError as e:
print(f"AssertionError in {path}: '{e}'")
continue
except BuildException as e:
print(f"BuildException in {path}: '{e}'")
continue
elif args.file:
file = Path(args.file)
bsctr.bisect_file(file, force=args.force)
gnrtr.terminate_processes()