Skip to content

Commit

Permalink
feat: changes made to evaluation runner of width-generic tactics (#1001)
Browse files Browse the repository at this point in the history
We change our message printing to be more legible, and we also start
adding scripting to analyze the DB that we generate from the evaluation.
  • Loading branch information
bollu authored Feb 7, 2025
1 parent 4e76df5 commit d5d4f38
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 5 deletions.
4 changes: 2 additions & 2 deletions SSA/Experimental/Bits/Fast/Reflect.lean
Original file line number Diff line number Diff line change
Expand Up @@ -1098,13 +1098,13 @@ structure Config where
This is useful to prevent the tactic from taking oodles of time cruncing on goals that
build large state spaces, which can happen in the presence of tactics.
-/
circuitSizeThreshold : Nat := 200
circuitSizeThreshold : Nat := 0

/--
The upper bound on the state space of the FSM, beyond which the tactic will bail out on an error.
See also `Config.circuitSizeThreshold`.
-/
stateSpaceSizeThreshold : Nat := 20
stateSpaceSizeThreshold : Nat := 0
/--
Whether the tactic should used a specialized solver for fixed-width constraints.
-/
Expand Down
3 changes: 2 additions & 1 deletion SSA/Projects/InstCombine/TacticAuto.lean
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ macro "bv_bench": tactic =>
"bv_ring" : (bv_ring; done),
"bv_of_bool" : (bv_of_bool; done),
"bv_omega" : (bv_omega; done),
-- Automata, Classic.
"bv_automata_classic_prop" : (bool_to_prop; bv_automata_classic; done),
"bv_automata_classic" : (bv_automata_classic; done),
"bv_normalize_automata_classic" : ((try (solve | bv_normalize)); (try bv_automata_classic); done),
Expand Down Expand Up @@ -346,7 +347,7 @@ macro "bv_bench_automata": tactic =>
all_goals (
tac_bench (config := { outputType := .csv }) [
"presburger" : (bv_automata_classic; done),
"circuit" : (bv_automata_circuit (config := { backend := .cadical /- maxIter -/ 4 }); done),
"circuit" : (bv_automata_circuit (config := { backend := .cadical 0 }); done),
"no_uninterpreted" : (bv_automata_fragment_no_uninterpreted),
"width_ok" : (bv_automata_fragment_width_legal),
"reflect_ok" : (bv_automata_fragment_reflect),
Expand Down
1 change: 1 addition & 0 deletions bv-evaluation/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*.sqlite3
.venv/
*.sqlite3.log
results/

80 changes: 78 additions & 2 deletions bv-evaluation/compare-automata-automata-circuit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import re
import sqlite3
import logging
import random
from tabulate import tabulate

ROOT_DIR = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).decode('utf-8').strip()
BENCHMARK_DIR = ROOT_DIR + '/SSA/Projects/InstCombine/tests/proofs/'
Expand Down Expand Up @@ -138,7 +140,15 @@ def process(db : str, jobs: int, prod_run : bool):
con.close()

logging.info(f"Clearing git state of '{BENCHMARK_DIR}' with 'git checkout --'")
subprocess.Popen(f'git checkout -- {BENCHMARK_DIR}', cwd=ROOT_DIR, shell=True).wait()
gco = subprocess.Popen(f'git checkout -- {BENCHMARK_DIR}', cwd=ROOT_DIR, shell=True)
gco.wait()
assert gco.returncode == 0, f"git checkout -- {BENCHMARK_DIR} should succeed."


logging.info(f"Running a 'lake exe cache get && lake build'.")
lake = subprocess.Popen(f'lake exe cache get && lake build', cwd=ROOT_DIR, shell=True)
lake.wait()
assert lake.returncode == 0, f"lake build should succeed before running evaluation."

with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor:
futures = {}
Expand Down Expand Up @@ -167,21 +177,87 @@ def setup_logging(db_name : str):
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.FileHandler(f'{db_name}.log', mode='a'), logging.StreamHandler()])

def analyze_uninterpreted_functions(cur : sqlite3.Cursor):
"""Tabulate all uninterpreted functions"""
logging.info("Analyzing uninterpreted functions")
rows = cur.execute("""
SELECT fileTitle, thmName, goalStr, errMsg FROM tests WHERE tactic = "circuit" AND status = "err"
""")

KEY_UNTRUE = "failed to prove goal since decideIfZerosM established that theorem is not true"
KEY_MULTIPLE_WIDTHS = "found multiple bitvector widths in the target"
KEY_UNKNOWN_GOAL_STATE_FORMAT = "expected predicate over bitvectors (no quantification) found"
KEY_SYMBOLIC_SHIFT_LEFT = "expected shiftLeft by natural number found symbolic shift amount"
KEY_UNKNOWN_BOOLEAN_EQUALITY = "only boolean conditionals allowed are 'bv.slt bv = true' 'bv.sle bv = true'. Found"
KEY_UNKNOWN_BOOLEAN_DISEQUALITY = "Expected typeclass to be 'BitVec w' / 'Nat' found ' Bool' in"

str2explanation = {
KEY_UNTRUE : "theorems that are established as untrue",
KEY_MULTIPLE_WIDTHS : "theorems that have multiple widths",
KEY_UNKNOWN_GOAL_STATE_FORMAT : "theorems that have unknown goal state format",
KEY_SYMBOLIC_SHIFT_LEFT : "theorems that have symbolic left shift amount",
KEY_UNKNOWN_BOOLEAN_EQUALITY : "unknown boolean equality",
KEY_UNKNOWN_BOOLEAN_DISEQUALITY : "unknown boolean disequality",
}
str2matches = {
KEY_UNTRUE : [],
KEY_MULTIPLE_WIDTHS : [],
KEY_UNKNOWN_GOAL_STATE_FORMAT : [],
KEY_SYMBOLIC_SHIFT_LEFT : [],
KEY_UNKNOWN_BOOLEAN_EQUALITY : [],
KEY_UNKNOWN_BOOLEAN_DISEQUALITY : [],
}


HEADERS = ["errMsg", "goalStr", "thmName", "fileTitle"]
HEADER_COL_WIDTHS = [40, 40, 50, 50]
for row in rows:
(fileTitle, thmName, goalStr, errMsg) = row
matched = False
for s in str2matches:
if s in errMsg:
str2matches[s].append((errMsg, goalStr, thmName, fileTitle))
matched = True
break
if not matched:
print(errMsg)

for s in str2matches:
NEXAMPLES = 5
print(f"{str2explanation[s]} (#{len(str2matches[KEY_UNTRUE])}):")
print(tabulate(str2matches[s][:NEXAMPLES], headers=HEADERS, tablefmt="grid", maxcolwidths=HEADER_COL_WIDTHS))


# analyze the sqlite db.
def analyze(db : str):
con = sqlite3.connect(db)
cur = con.cursor()
analyze_uninterpreted_functions(cur)
pass


if __name__ == "__main__":
current_time = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
nproc = os.cpu_count()
parser = argparse.ArgumentParser(prog='compare-automata-automata-circuit')
parser.add_argument('--db', default=f'automata-circuit-{current_time}.sqlite3', help='path to sqlite3 database')
default_db = f'automata-circuit-{current_time}.sqlite3'
parser.add_argument('--db', default=default_db, help='path to sqlite3 database')
parser.add_argument('-j', '--jobs', type=int, default=nproc // 3)
parser.add_argument('--run', action='store_true', help="run evaluation")
parser.add_argument('--prodrun', action='store_true', help="run production run of evaluation")
parser.add_argument('--analyze', action='store_true', help="analyze the data of the db")
args = parser.parse_args()
setup_logging(args.db)
logging.info(args)
if args.run:
process(args.db, args.jobs, prod_run=False)
elif args.prodrun:
process(args.db, args.jobs, prod_run=True)
elif args.analyze:
if args.db == default_db:
logging.error("expected additional argument '--db <path/to/db/to/analyze'")
exit(1)
analyze(args.db)
else:
logging.error("expected --run or --prodrun.")

10 changes: 10 additions & 0 deletions bv-evaluation/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
importlib_metadata==8.6.1
numpy==2.2.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2025.1
six==1.17.0
tabulate==0.9.0
tzdata==2025.1
visidata==3.1.1
zipp==3.21.0

0 comments on commit d5d4f38

Please sign in to comment.