diff --git a/check_test_plan_gen.py b/check_test_plan_gen.py index bd5eabea7a7..10522b0ae9d 100644 --- a/check_test_plan_gen.py +++ b/check_test_plan_gen.py @@ -1,9 +1,26 @@ -from evaluation_sanity_check import check, generate -import sys +import glob +import os -# current_dir = "log" -# previous_dir = sys.argv[1] -# generate.generate_test_plan_stat() -# check.check_massive_testing_results(current_dir, previous_dir) -check.check_bug_reproduction_test_plans() +def check_bug_reproduction_test_plans(): + gen_configs = glob.glob( + os.path.join( + "sieve_learn_results/*/*/learn/*/*.yaml", + ) + ) + + reprod_configs = glob.glob("bug_reproduction_test_plans/*.yaml") + + for reprod_config in reprod_configs: + if "indirect" in reprod_config: + continue + found = False + for gen_config in gen_configs: + if open(reprod_config).read() == open(gen_config).read(): + print(reprod_config + " <= " + gen_config) + found = True + if not found: + print("\033[91m" + reprod_config + " not found\033[0m") + + +check_bug_reproduction_test_plans() diff --git a/evaluation_sanity_check/__init__.py b/evaluation_sanity_check/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/evaluation_sanity_check/check.py b/evaluation_sanity_check/check.py deleted file mode 100644 index ffae7f4e23e..00000000000 --- a/evaluation_sanity_check/check.py +++ /dev/null @@ -1,70 +0,0 @@ -import glob -import os -from evaluation_sanity_check import common - - -def specs_to_map(specs): - m = {} - for spec in specs: - spec_content = open(spec).read() - if spec_content in m: - m[spec_content].append(spec) - # print(m[spec_content]) - else: - m[spec_content] = [] - m[spec_content].append(spec) - return m - - -def check_massive_testing_results(current_dir, previous_dir): - for controller in common.controllers_to_check: - for test in common.controllers_to_check[controller]: - for mode in ["intermediate-state", "unobserved-state", "stale-state"]: - cur_specs = glob.glob( - os.path.join( - current_dir, - controller, - test, - "generate-oracle/learn.yaml/" + mode + "/*.yaml", - ) - ) - pre_specs = glob.glob( - os.path.join( - previous_dir, - controller, - test, - "generate-oracle/learn.yaml/" + mode + "/*.yaml", - ) - ) - prev_map = specs_to_map(pre_specs) - cur_map = specs_to_map(cur_specs) - for spec in set(cur_map.keys()).union(prev_map.keys()): - if spec in cur_map and spec not in prev_map: - print("missing: ", cur_map[spec]) - elif spec in prev_map and spec not in cur_map: - print("redundant: ", prev_map[spec]) - elif len(cur_map[spec]) > len(prev_map[spec]): - print("missing diff:", cur_map[spec], prev_map[spec]) - elif len(cur_map[spec]) < len(prev_map[spec]): - print("redundant diff:", cur_map[spec], prev_map[spec]) - - -def check_bug_reproduction_test_plans(): - gen_configs = glob.glob( - os.path.join( - "sieve_learn_results/*/*/learn/learn.yaml/*/*.yaml", - ) - ) - - reprod_configs = glob.glob("bug_reproduction_test_plans/*.yaml") - - for reprod_config in reprod_configs: - if "indirect" in reprod_config: - continue - found = False - for gen_config in gen_configs: - if open(reprod_config).read() == open(gen_config).read(): - print(reprod_config + " <= " + gen_config) - found = True - if not found: - print("\033[91m" + reprod_config + " not found\033[0m") diff --git a/evaluation_sanity_check/common.py b/evaluation_sanity_check/common.py deleted file mode 100644 index ba35802cfaf..00000000000 --- a/evaluation_sanity_check/common.py +++ /dev/null @@ -1,29 +0,0 @@ -controllers_to_check = { - "cass-operator": ["recreate", "scaledown-scaleup"], - "cassandra-operator": ["recreate", "scaledown-scaleup"], - "casskop-operator": ["recreate", "scaledown-to-zero", "reducepdb"], - "elastic-operator": ["recreate", "scaledown-scaleup"], - "mongodb-operator": [ - "recreate", - "scaleup-scaledown", - "disable-enable-shard", - "disable-enable-arbiter", - "run-cert-manager", - ], - "nifikop-operator": ["recreate", "scaledown-scaleup", "change-config"], - "rabbitmq-operator": ["recreate", "scaleup-scaledown", "resize-pvc"], - "xtradb-operator": [ - "recreate", - "disable-enable-haproxy", - "disable-enable-proxysql", - "run-cert-manager", - "scaleup-scaledown", - ], - "yugabyte-operator": [ - "recreate", - "scaleup-scaledown-tserver", - "disable-enable-tls", - "disable-enable-tuiport", - ], - "zookeeper-operator": ["recreate", "scaledown-scaleup"], -} diff --git a/evaluation_sanity_check/generate.py b/evaluation_sanity_check/generate.py deleted file mode 100644 index f8a8620eea6..00000000000 --- a/evaluation_sanity_check/generate.py +++ /dev/null @@ -1,131 +0,0 @@ -import os -import json -import shutil -from sieve_common.config import get_common_config -from evaluation_sanity_check import common - -total_result_map = {} - - -def collect_spec(): - sub_result_map = {} - for controller in common.controllers_to_check: - sub_result_map[controller] = {} - ds_base_cnt = 0 - ms_base_cnt = 0 - ss_base_cnt = 0 - ds_after_p1_cnt = 0 - ms_after_p1_cnt = 0 - ss_after_p1_cnt = 0 - ds_after_p2_cnt = 0 - ms_after_p2_cnt = 0 - ss_after_p2_cnt = 0 - ds_cnt = 0 - ms_cnt = 0 - ss_cnt = 0 - for test in common.controllers_to_check[controller]: - result_filename = "sieve_learn_results/{}-{}.json".format(controller, test) - result_map = json.load(open(result_filename)) - ds_base_cnt += result_map["intermediate-state"]["baseline"] - ds_after_p1_cnt += result_map["intermediate-state"]["after_p1"] - ds_after_p2_cnt += result_map["intermediate-state"]["after_p2"] - ds_cnt += result_map["intermediate-state"]["final"] - ms_base_cnt += result_map["unobserved-state"]["baseline"] - ms_after_p1_cnt += result_map["unobserved-state"]["after_p1"] - ms_after_p2_cnt += result_map["unobserved-state"]["after_p2"] - ms_cnt += result_map["unobserved-state"]["final"] - ss_base_cnt += result_map["stale-state"]["baseline"] - ss_after_p1_cnt += result_map["stale-state"]["after_p1"] - ss_after_p2_cnt += result_map["stale-state"]["after_p2"] - ss_cnt += result_map["stale-state"]["final"] - - sub_result_map[controller]["baseline-ds"] = ds_base_cnt - sub_result_map[controller]["after-p1-ds"] = ds_after_p1_cnt - sub_result_map[controller]["after-p2-ds"] = ds_after_p2_cnt - sub_result_map[controller]["ds"] = ds_cnt - - sub_result_map[controller]["baseline-ss"] = ss_base_cnt - sub_result_map[controller]["after-p1-ss"] = ss_after_p1_cnt - sub_result_map[controller]["after-p2-ss"] = ss_after_p2_cnt - sub_result_map[controller]["ss"] = ss_cnt - - sub_result_map[controller]["baseline-ms"] = ms_base_cnt - sub_result_map[controller]["after-p1-ms"] = ms_after_p1_cnt - sub_result_map[controller]["after-p2-ms"] = ms_after_p2_cnt - sub_result_map[controller]["ms"] = ms_cnt - return sub_result_map - - -def overwrite_config_json(new_config): - shutil.copy("sieve_config.json", "sieve_config.json.bkp") - my_config = json.load(open("sieve_config.json")) - for key in new_config: - my_config[key] = new_config[key] - json.dump(my_config, open("sieve_config.json", "w")) - - -def recover_config_json(): - shutil.copy("sieve_config.json.bkp", "sieve_config.json") - - -def learn_all(): - for controller in common.controllers_to_check: - for test_suite in common.controllers_to_check[controller]: - docker_repo_name = get_common_config().container_registry - cmd = "python3 sieve.py -p %s -t %s -d %s -s learn --phase=check" % ( - controller, - test_suite, - docker_repo_name, - ) - os.system(cmd) - - -def generate_test_plan_stat(): - table = "controller\tbaseline-ds\tafter-p1-ds\tafter-p2-ds\tds\tbaseline-ss\tafter-p1-ss\tafter-p2-ss\tss\tbaseline-ms\tafter-p1-ms\tafter-p2-ms\tms\tbaseline-total\tafter-p1-total\tafter-p2-total\ttotal\n" - short_table = ( - "controller\tintermediate-state\tstale-state\tunobserved-state\ttotal\n" - ) - learn_all() - sub_map = collect_spec() - for controller in common.controllers_to_check: - baseline_ds = sub_map[controller]["baseline-ds"] - baseline_ss = sub_map[controller]["baseline-ss"] - baseline_ms = sub_map[controller]["baseline-ms"] - after_p1_ds = sub_map[controller]["after-p1-ds"] - after_p1_ss = sub_map[controller]["after-p1-ss"] - after_p1_ms = sub_map[controller]["after-p1-ms"] - after_p2_ds = sub_map[controller]["after-p2-ds"] - after_p2_ss = sub_map[controller]["after-p2-ss"] - after_p2_ms = sub_map[controller]["after-p2-ms"] - ds = sub_map[controller]["ds"] - ss = sub_map[controller]["ss"] - ms = sub_map[controller]["ms"] - - table += "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( - controller, - baseline_ds, - after_p1_ds, - after_p2_ds, - ds, - baseline_ss, - after_p1_ss, - after_p2_ss, - ss, - baseline_ms, - after_p1_ms, - after_p2_ms, - ms, - baseline_ds + baseline_ss + baseline_ms, - after_p1_ds + after_p1_ss + after_p1_ms, - after_p2_ds + after_p2_ss + after_p2_ms, - ds + ss + ms, - ) - short_table += "{}\t{}\t{}\t{}\t{}\n".format( - controller, - ds, - ss, - ms, - ds + ss + ms, - ) - print(short_table) - open("test_plan_stats.tsv", "w").write(table) diff --git a/parallel_testing/combine_json.py b/parallel_testing/combine_json.py index 641bc992839..9fffa70c22f 100644 --- a/parallel_testing/combine_json.py +++ b/parallel_testing/combine_json.py @@ -30,10 +30,9 @@ def merge(result, patch): generated_test_plans = glob.glob( os.path.join( result_folder, - "log", controller, "*", - "generate-oracle/learn.yaml", + "learn", "*", "*-test-plan-*.yaml", ) diff --git a/parallel_testing/gen_commands.py b/parallel_testing/gen_commands.py index 7fa1d449d7a..aa9baf17048 100644 --- a/parallel_testing/gen_commands.py +++ b/parallel_testing/gen_commands.py @@ -2,32 +2,32 @@ import os import argparse -modes = ["intermediate-state", "unobserved-state", "stale-state"] +patterns = ["intermediate-state", "unobserved-state", "stale-state"] if __name__ == "__main__": parser = argparse.ArgumentParser( description="Generate testcase commands into a file." ) parser.add_argument( - "-d", - dest="docker", - help="Docker account", + "-r", + dest="registry", + help="Container registry", default="ghcr.io/sieve-project/action", ) parser.add_argument( "-o", dest="output", help="Output file name", default="commands.txt" ) parser.add_argument("-c", dest="controllers", help="Controllers to test", nargs="+") - parser.add_argument("-m", dest="modes", help="Modes to test", nargs="+") + parser.add_argument( + "--pattern", dest="patterns", help="Patterns to test", nargs="+" + ) args = parser.parse_args() if args.controllers is None: - controllers = os.listdir("../log") - else: - controllers = args.controllers + args.controllers = os.listdir("../log") - if args.modes is not None: - modes = args.modes + if args.patterns is None: + args.patterns = patterns with open(args.output, "w") as command_file, open( "pull-commands.txt", "w" @@ -36,41 +36,30 @@ pull_command_file.write( "docker pull {}/node:v1.18.9-test\n".format(args.docker) ) - pull_command_file.write( - "docker pull {}/node:v1.18.9-vanilla\n".format(args.docker) - ) - for controller in controllers: + for controller in args.controllers: pull_command_file.write( "docker pull {}/{}:test\n".format(args.docker, controller) ) - pull_command_file.write( - "docker pull {}/{}:vanilla\n".format(args.docker, controller) - ) - for mode in modes: - for testcase in os.listdir(os.path.join("../log", controller)): - if mode == "vanilla": - command_file.write( - "python3 sieve.py -m vanilla -c {} -w {} -r {}\n".format( - controller, testcase, args.docker - ) + for pattern in args.patterns: + for test_workload in os.listdir( + os.path.join("../sieve_learn_results", controller) + ): + test_plans = glob.glob( + os.path.join( + os.path.abspath("../sieve_learn_results"), + controller, + test_workload, + "learn", + pattern, + "*.yaml", ) - else: - configs = glob.glob( - os.path.join( - os.path.abspath("../log"), + ) + for test_plan in test_plans: + command_file.write( + "python3 sieve.py -m test -c {} -w {} -p {} -r {}\n".format( controller, - testcase, - "generate-oracle/learn.yaml", - mode, - "*.yaml", + test_workload, + test_plan, + args.docker, ) ) - for config in configs: - command_file.write( - "python3 sieve.py -m test -c {} -w {} -p {} -r {}\n".format( - controller, - testcase, - config, - args.docker, - ) - ) diff --git a/parallel_testing/runlearn.py b/parallel_testing/runlearn.py index 27f9e6c78a2..b5e26a427b4 100644 --- a/parallel_testing/runlearn.py +++ b/parallel_testing/runlearn.py @@ -38,9 +38,9 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Automate learning run.") parser.add_argument( - "-d", - dest="docker", - help="Docker account", + "-r", + dest="registry", + help="Container registry", default="ghcr.io/sieve-project/action", ) parser.add_argument("-c", dest="controllers", help="Controllers to test", nargs="+") @@ -53,14 +53,12 @@ else: controllers = args.controllers - os.system("docker pull %s/node:learn" % "ghcr.io/sieve-project/action") + os.system("docker pull %s/node:learn" % args.registry) for controller in controllers: - os.system( - "docker pull %s/%s:learn" % ("ghcr.io/sieve-project/action", controller) - ) + os.system("docker pull %s/%s:learn" % (args.registry, controller)) for testcase in controllers_to_run[controller]: os.system( "python3 sieve.py -m learn -c {} -w {} -r {}".format( - controller, testcase, "ghcr.io/sieve-project/action" + controller, testcase, args.registry ) ) diff --git a/reproduce_bugs.py b/reproduce_bugs.py index 3947c324821..3bc16dde0fa 100644 --- a/reproduce_bugs.py +++ b/reproduce_bugs.py @@ -210,7 +210,7 @@ def after_reproducing_cassandra_operator_indirect_1(): } -def reproduce_single_bug(controller, bug, phase, registry, skip): +def reproduce_single_bug(controller, bug, registry, skip): before_reproduce = None after_reproduce = None if ( @@ -229,10 +229,9 @@ def reproduce_single_bug(controller, bug, phase, registry, skip): test_plan = os.path.join( "bug_reproduction_test_plans", reprod_map[controller][bug][1] ) - sieve_cmd = "python3 sieve.py -c {} -m test -p {} --phase={} -r {}".format( + sieve_cmd = "python3 sieve.py -c {} -m test -p {} -r {}".format( manifest_map[controller], test_plan, - phase, registry, ) cprint(sieve_cmd, bcolors.OKGREEN) @@ -259,13 +258,13 @@ def reproduce_single_bug(controller, bug, phase, registry, skip): return {"reproduced": False, "test-result-file": test_result_file} -def reproduce_bug(controller, bug, phase, registry, skip): +def reproduce_bug(controller, bug, registry, skip): stats_map = {} if bug == "all": for b in reprod_map[controller]: if "indirect" in b: continue - stats_map[b] = reproduce_single_bug(controller, b, phase, registry, skip) + stats_map[b] = reproduce_single_bug(controller, b, registry, skip) elif ( bug == "intermediate-state" or bug == "unobserved-state" @@ -274,11 +273,9 @@ def reproduce_bug(controller, bug, phase, registry, skip): ): for b in reprod_map[controller]: if b.startswith(bug): - stats_map[b] = reproduce_single_bug( - controller, b, phase, registry, skip - ) + stats_map[b] = reproduce_single_bug(controller, b, registry, skip) else: - stats_map[bug] = reproduce_single_bug(controller, bug, phase, registry, skip) + stats_map[bug] = reproduce_single_bug(controller, bug, registry, skip) return stats_map @@ -345,13 +342,6 @@ def generate_table3(): metavar="BUG", default="all", ) - parser.add_option( - "--phase", - dest="phase", - help="run the PHASE: setup, workload, check or all", - metavar="PHASE", - default="all", - ) parser.add_option( "-r", "--registry", @@ -384,13 +374,12 @@ def generate_table3(): if options.controller == "all": for controller in reprod_map: stats_map[controller] = reproduce_bug( - controller, options.bug, options.phase, options.registry, options.skip + controller, options.bug, options.registry, options.skip ) else: stats_map[options.controller] = reproduce_bug( options.controller, options.bug, - options.phase, options.registry, options.skip, ) diff --git a/sieve.py b/sieve.py index 4c8434294e5..05b3cfc7060 100644 --- a/sieve.py +++ b/sieve.py @@ -13,10 +13,10 @@ import glob from sieve_analyzer import analyze from sieve_oracle.oracle import ( - persist_state, - persist_history, - generate_controller_family, - canonicalize_history_and_state, + save_state, + save_history, + save_controller_related_object_list, + create_differential_oracles, check, ) from sieve_oracle.checker_common import ( @@ -39,6 +39,7 @@ os_system, deploy_directory, rmtree_if_exists, + first_pass_learn_result_dir, ) @@ -47,11 +48,15 @@ def save_run_result( test_result: TestResult, start_time, ): + """ + Save the testing result into a json file for later debugging. + The test result json contains the test plan, the errors detected by the oracles and so on. + """ if test_context.mode != sieve_modes.TEST: return if test_result is None: - return + assert False, "test result should not be None" result_map = { test_context.controller: { @@ -81,7 +86,7 @@ def save_run_result( } } - # Testing mode, write test result under sieve_test_result directory + # Write test result under sieve_test_result directory result_filename = "{}/{}-{}-{}.json".format( test_context.result_root_dir, test_context.controller, @@ -103,13 +108,7 @@ def save_run_result( ) -def watch_crd(crds, addrs): - for addr in addrs: - for crd in crds: - os_system("kubectl get {} -s {} --ignore-not-found=true".format(crd, addr)) - - -def generate_configmap(test_plan): +def create_configmap(test_plan): test_plan_content = open(test_plan).read() configmap = {} configmap["apiVersion"] = "v1" @@ -121,7 +120,7 @@ def generate_configmap(test_plan): return configmap_path -def generate_kind_config(num_apiservers, num_workers): +def create_kind_config(num_apiservers, num_workers): kind_config_dir = "kind_configs" os.makedirs(kind_config_dir, exist_ok=True) kind_config_filename = os.path.join( @@ -217,7 +216,7 @@ def prepare_sieve_server(test_context: TestContext): org_dir = os.getcwd() os.chdir("sieve_server") os_system("go mod tidy") - # TODO: we should build a container image for sieve server + # TODO: we should build a container image for sieve server. os_system("env GOOS=linux GOARCH=amd64 go build") os.chdir(org_dir) os_system("docker cp sieve_server kind-control-plane:/sieve_server") @@ -241,7 +240,7 @@ def stop_sieve_server(): def setup_kind_cluster(test_context: TestContext): - kind_config = generate_kind_config( + kind_config = create_kind_config( test_context.num_apiservers, test_context.num_workers ) k8s_container_registry = test_context.container_registry @@ -249,10 +248,11 @@ def setup_kind_cluster(test_context: TestContext): test_context.controller_config.kubernetes_version + "-" + test_context.image_tag ) retry_cnt = 0 + # Retry cluster creation for 5 times. while retry_cnt < 5: try: os_system("kind delete cluster") - # sleep here in case if the machine is slow and kind cluster deletion is not done before creating a new cluster + # Sleep here in case if the machine is slow and kind cluster deletion is not done before creating a new cluster. time.sleep(10 * retry_cnt) if retry_cnt == 0: print("Trying to create kind cluster") @@ -275,41 +275,32 @@ def setup_kind_cluster(test_context: TestContext): def setup_cluster(test_context: TestContext): + """ + Set up the kind cluster for testing and wait until the control plane is ready. + """ setup_kind_cluster(test_context) print("\n\n") - rmtree_if_exists(test_context.result_dir) - os.makedirs(test_context.result_dir) - if ( - test_context.mode == sieve_modes.LEARN - or test_context.mode == sieve_modes.GEN_ORACLE - ): - print("Test plan: {}".format(test_context.test_plan)) - generate_plan_for_learn_mode(test_context) - elif test_context.mode == sieve_modes.VANILLA: - print("Test plan: {}".format(test_context.test_plan)) - generate_plan_for_vanilla_mode(test_context) - else: - assert test_context.mode == sieve_modes.TEST - print("Test plan: {}".format(test_context.test_plan)) - generate_plan_for_test_mode(test_context) - - # when testing stale-state, we need to pause the apiserver - # if workers talks to the paused apiserver, the whole cluster will be slowed down - # so we need to redirect the workers to other apiservers + # When testing stale-state, we need to pause the apiserver + # if workers talks to the paused apiserver, the whole cluster will be slowed down. + # In a multi-apiserver set up (HA mode), each worker (kubelet) talks to a load balancer + # which might forward the request to any backend apiserver. + # We want to focus on testing how the controller handles staleness + # so here we redirect the workers to an apiserver (configurable in config.json) + # which Sieve will NOT slow down later. if "reconnectController" in test_context.action_types: cprint( "Redirecting workers and kubectl to the leading API server...", bcolors.OKGREEN, ) - redirect_workers(test_context) - redirect_kubectl() + redirect_workers(test_context) # Redirect the kubelet on each worker node. + redirect_kubectl() # Redirect the local kubectl. ok("Redirection done") kubernetes.config.load_kube_config() core_v1 = kubernetes.client.CoreV1Api() - # Then we wait apiservers to be ready + # Then we wait apiservers to be ready. print("Waiting for apiservers to be ready...") apiserver_list = [] for i in range(test_context.num_apiservers): @@ -317,7 +308,7 @@ def setup_cluster(test_context: TestContext): "" if i == 0 else str(i + 1) ) apiserver_list.append(apiserver_name) - + # TODO: this can be better replaced by a watch. for tick in range(600): created = core_v1.list_namespaced_pod( "kube-system", watch=False, label_selector="component=kube-apiserver" @@ -329,16 +320,26 @@ def setup_cluster(test_context: TestContext): time.sleep(1) if test_context.mode != sieve_modes.VANILLA: + # Start the Sieve server. + # In learn mode, it will record the controller events used for generating test plans. + # In test mode, it reads the test plan and injects fault accordingly. prepare_sieve_server(test_context) cprint("Setting up Sieve server...", bcolors.OKGREEN) start_sieve_server(test_context) ok("Sieve server set up") - time.sleep(3) # ensure that every apiserver will see the configmap is created - configmap = generate_configmap(test_context.test_plan) + time.sleep(3) # Ensure that every apiserver will see the configmap is created. + + # We store the test plan into a configmap and create this configmap + # so that the instrumentation at the apiserver side can also read the test plan + # (by examining each incoming confimap creation event). + # This is a bit hacky. + # TODO: find a more elegant way to communicate with the instrumented apiserver. + configmap = create_configmap(test_context.test_plan) os_system("kubectl apply -f {}".format(configmap)) - # Preload controller image to kind nodes + # Preload controller image to kind nodes. + # This makes it faster to start the controller. image = "{}/{}:{}".format( test_context.container_registry, test_context.controller, @@ -353,6 +354,7 @@ def setup_cluster(test_context: TestContext): def deploy_controller(test_context: TestContext): + # Install csi driver if some controller needs it. if test_context.use_csi_driver: print("Installing csi provisioner...") org_dir = os.getcwd() @@ -361,11 +363,13 @@ def deploy_controller(test_context: TestContext): os.chdir(org_dir) deployment_file = test_context.controller_config.controller_deployment_file_path - # backup deployment file + + # Backup the provided deployment file. backup_deployment_file = deployment_file + ".bkp" shutil.copyfile(deployment_file, backup_deployment_file) - # modify container_registry and image_tag + # Modify the marked container_registry and image_tag in the deployment yaml. + # TODO: there should be a better way to parameterize the deployment yaml file. fin = open(deployment_file) data = fin.read() data = data.replace("${SIEVE-DR}", test_context.container_registry) @@ -375,18 +379,21 @@ def deploy_controller(test_context: TestContext): fin.write(data) fin.close() - # run the deploy script + # Run the provided deploy script. org_dir = os.getcwd() os.chdir(deploy_directory(test_context)) os_system("./deploy.sh") os.chdir(org_dir) - # restore deployment file + # Restore deployment file for later use. shutil.copyfile(backup_deployment_file, deployment_file) os.remove(backup_deployment_file) def start_controller(test_context: TestContext): + """ + Deploy the controller and wait until the controller becomes ready + """ controller = test_context.controller num_apiservers = test_context.num_apiservers deploy_controller(test_context) @@ -394,7 +401,8 @@ def start_controller(test_context: TestContext): kubernetes.config.load_kube_config() core_v1 = kubernetes.client.CoreV1Api() - # Wait for controller pod ready + # Wait for controller pod to be ready + # TODO: this can be better replaced by a watch. print("Wait for the controller pod to be ready...") pod_ready = False for tick in range(600): @@ -412,24 +420,33 @@ def start_controller(test_context: TestContext): fail("waiting for the controller pod to be ready") raise Exception("Wait timeout after 600 seconds") + # Issue a get crd operation to each apiserver (i.e., addr) + # to make sure the instrumentation at the apiserver side + # can observe the crd change. + # This is necessary because sometimes the instrumentation at the apiserver + # side needs to talk to the Sieve server when certain changes happen to + # the custom resource objects (depending on the test plan). apiserver_addr_list = [] apiserver_ports = get_apiserver_ports(num_apiservers) - # print("apiserver ports", apiserver_ports) for port in apiserver_ports: apiserver_addr_list.append("https://127.0.0.1:" + port) - watch_crd( - test_context.controller_config.custom_resource_definitions, apiserver_addr_list - ) + for addr in apiserver_addr_list: + for crd in test_context.controller_config.custom_resource_definitions: + os_system("kubectl get {} -s {} --ignore-not-found=true".format(crd, addr)) def run_workload( test_context: TestContext, -) -> Tuple[int, str]: +): + """ + Deploy the controller and run the provided testing workload. + """ cprint("Deploying controller...", bcolors.OKGREEN) start_controller(test_context) ok("Controller deployed") + # If there are multiple contains in the controller pod then we need to find the correct one. select_container_from_pod = ( " -c {} ".format(test_context.controller_config.container_name) if test_context.controller_config.container_name is not None @@ -447,6 +464,9 @@ def run_workload( .items[0] .metadata.name ) + # Stream the controller log to streamed-controller.log for debugging purpose. + # If the controller crashes due to some panic, Sieve will report that by checking + # the streamed log. streamed_log_file = open( os.path.join(test_context.result_dir, "streamed-controller.log"), "w+" ) @@ -458,6 +478,15 @@ def run_workload( preexec_fn=os.setsid, ) + # Stream the apiserver log. + # The apiserver log should contain the state changes (i.e., creation, deletion and update events) + # during the test workload and the log content will be later used for + # (1) generating differential oracles for learn mode and + # (2) detecting errors (e.g., inconsistency in end states) for test mode. + # TODO: Storing everything in a log is a bit hacky, + # and it is better to save all the recorded state changes in a database during runtime, + # if that does not hurt the performance too much. + # Another alternative is to just watch all the objects using the local kubectl. streamed_api_server_log_file = open( os.path.join(test_context.result_dir, "apiserver1.log"), "w+" ) @@ -489,6 +518,7 @@ def run_workload( .metadata.name ) + # Also save the log of other apiservers (for multi-apiserver set up) for debugging purpose. for i in range(1, test_context.num_apiservers): apiserver_name = "kube-apiserver-kind-control-plane" + ( "" if i == 0 else str(i + 1) @@ -500,6 +530,9 @@ def run_workload( ) ) + # Save the Sieve server log. + # In learn mode, the Sieve server log contains the collected controller events + # which will be used to generate test plans. if test_context.mode != sieve_modes.VANILLA: os_system( "docker cp kind-control-plane:/sieve_server/sieve-server.log {}/sieve-server.log".format( @@ -507,13 +540,16 @@ def run_workload( ) ) + # Save the controller log as well mainly for debugging purpose. os_system( "kubectl logs {} {} > {}/controller.log".format( pod_name, select_container_from_pod, test_context.result_dir ) ) + # Stop streaming controller log. os.killpg(streaming.pid, signal.SIGTERM) streamed_log_file.close() + # Stop streaming apiserver log. os.killpg(streaming_api_server.pid, signal.SIGTERM) streamed_api_server_log_file.close() @@ -521,19 +557,32 @@ def run_workload( stop_sieve_server() -def check_result( +def save_history_and_end_state(test_context: TestContext): + """ + Generate three files: + (1) a list of controller related objects (e.g., the controller pod) which will not be considered when applying differential oracles + because the injected fault (e.g., controller crash) directly affects their states. + (2) the entire history, including all the creation, deletion and update events, during the test workload, + which is used for generating and applying differential oracles. + (3) the end state after the test workload, which is used for generating and applying differential oracles. + """ + save_controller_related_object_list(test_context) + save_history(test_context) + save_state(test_context) + + +def post_process( test_context: TestContext, ) -> TestResult: - generate_controller_family(test_context) - persist_history(test_context) - persist_state(test_context) - if ( - test_context.mode == sieve_modes.LEARN - or test_context.mode == sieve_modes.GEN_ORACLE - ): - if test_context.mode == sieve_modes.GEN_ORACLE: - canonicalize_history_and_state(test_context) - analyze.analyze_trace(test_context) + """ + Postprocess the collected logs and results. + For learn mode, it will generate the test plan and differential oracles. + For test mode, it will apply the differential oracles and report any detected errors. + """ + if test_context.mode == sieve_modes.LEARN: + if test_context.build_oracle: + create_differential_oracles(test_context) + analyze.generate_test_plans_from_learn_run(test_context) return None elif test_context.mode == sieve_modes.VANILLA: return None @@ -544,29 +593,84 @@ def check_result( return test_result +def teardown_cluster(): + os_system("kind delete cluster") + + +def save_previous_learn_results(test_context: TestContext): + """ + Move the learn result to a differen folder. + This should be only called in learn mode with build_oracle enabled. + To build the differential oracles, we need to run the same workload twice + to eliminate the non-determinism from the end state and state updates. + """ + assert test_context.mode == sieve_modes.LEARN and test_context.build_oracle + learn_res_dir = test_context.result_dir + learn_prev_res_dir = first_pass_learn_result_dir(test_context.result_dir) + assert os.path.isdir( + learn_res_dir + ), "{} should exist after first pass of learn run".format(learn_res_dir) + print( + "Moving the first pass learn result from {} to {}...".format( + learn_res_dir, learn_prev_res_dir + ) + ) + rmtree_if_exists(learn_prev_res_dir) + shutil.move(learn_res_dir, learn_prev_res_dir) + + +def prepare_test_plan(test_context: TestContext): + """ + Prepare the test plan. + The test plan for test mode details what fault to inject and when to inject. + The plans for learn and vanilla mode are rather simple: + for learn mode, it only needs to contain the list of CRD to help filter out the irrevelant events; + for vanilla mode it is empty. + """ + # Clean this result dir if it exists + rmtree_if_exists(test_context.result_dir) + os.makedirs(test_context.result_dir) + print("Sieve result dir: {}".format(test_context.result_dir)) + # Prepare the test plan for different modes + if test_context.mode == sieve_modes.LEARN: + # The plan for learn mode just contains the CRD list to fliter out events irrelevant to the controller + print("Config for learn mode: {}".format(test_context.test_plan)) + create_plan_for_learn_mode(test_context) + elif test_context.mode == sieve_modes.VANILLA: + # The plan for vanilla mode is basically empty + print("Config for vanilla mode: {}".format(test_context.test_plan)) + create_plan_for_vanilla_mode(test_context) + else: + # The test plan details what fault to inject and where to inject + # and is generated by learn mode + assert test_context.mode == sieve_modes.TEST + print("Test plan: {}".format(test_context.test_plan)) + create_plan_for_test_mode(test_context) + + def run_test(test_context: TestContext) -> TestResult: try: - if ( - test_context.phase == "all" - or test_context.phase == "setup" - or test_context.phase == "setup_workload" - ): + if test_context.postprocess: + return post_process(test_context) + prepare_test_plan(test_context) + setup_cluster(test_context) + run_workload(test_context) + teardown_cluster() + save_history_and_end_state(test_context) + # if the build_oracle is enabled, then we need to run the learn run again + # to eliminate nondeterminism in the end-state and state-update collected by Sieve + if test_context.mode == sieve_modes.LEARN and test_context.build_oracle: + print( + "\nTo build the differential oracle, we need to run the learn run twice" + ) + print("Starting the second learn run...") + save_previous_learn_results(test_context) + prepare_test_plan(test_context) setup_cluster(test_context) - if ( - test_context.phase == "all" - or test_context.phase == "setup_workload" - or test_context.phase == "workload" - or test_context.phase == "workload_check" - ): run_workload(test_context) - if ( - test_context.phase == "all" - or test_context.phase == "check" - or test_context.phase == "workload_check" - ): - test_result = check_result(test_context) - return test_result - return None + teardown_cluster() + save_history_and_end_state(test_context) + return post_process(test_context) except Exception: print(traceback.format_exc()) return TestResult( @@ -580,19 +684,23 @@ def run_test(test_context: TestContext) -> TestResult: ) -def generate_plan_for_test_mode(test_context: TestContext): +def create_plan_for_test_mode(test_context: TestContext): test_plan_content = yaml.load(open(test_context.original_test_plan)) + # TODO: we should probably just add the annotatedReconcileStackFrame when generating the test plan. test_plan_content["annotatedReconcileStackFrame"] = [ i for i in test_context.controller_config.annotated_reconcile_functions.values() ] yaml.dump(test_plan_content, open(test_context.test_plan, "w"), sort_keys=False) -def generate_plan_for_learn_mode(test_context: TestContext): +def create_plan_for_learn_mode(test_context: TestContext): crd_list = test_context.controller_config.custom_resource_definitions learn_plan_content = {} + # NOTE: we use the CRD list to focus on recording the events relevant to the controller during learn run + # Here we assume all the relevant events are related to the CR objects or their owned objects + # TODO: support customized defintion of "relevant events" learn_plan_content["crdList"] = crd_list - # hardcode rate limiter to disabled for now + # NOTE: rateLimiterEnabled is deprecated, will remove later learn_plan_content["rateLimiterEnabled"] = False learn_plan_content["rateLimiterInterval"] = 3 learn_plan_content["annotatedReconcileStackFrame"] = [ @@ -601,7 +709,7 @@ def generate_plan_for_learn_mode(test_context: TestContext): yaml.dump(learn_plan_content, open(test_context.test_plan, "w"), sort_keys=False) -def generate_plan_for_vanilla_mode(test_context: TestContext): +def create_plan_for_vanilla_mode(test_context: TestContext): vanilla_plan_content = {} yaml.dump(vanilla_plan_content, open(test_context.test_plan, "w"), sort_keys=False) @@ -611,23 +719,11 @@ def get_test_workload_from_test_plan(test_plan_file): return test_plan["workload"] -def run( - controller_config_dir, - test_workload, - result_root_dir, - mode, - test_plan, - container_registry, - phase="all", -): - controller_config = load_controller_config(controller_config_dir) +def generate_testing_cluster_config(mode, controller_config, test_plan, test_workload): num_apiservers = 1 num_workers = 2 use_csi_driver = False - if test_workload is None: - assert mode == sieve_modes.TEST - test_workload = get_test_workload_from_test_plan(test_plan) - print("Get test workload {} from test plan".format(test_workload)) + if test_workload in controller_config.test_setting: if "num_apiservers" in controller_config.test_setting[test_workload]: num_apiservers = controller_config.test_setting[test_workload][ @@ -639,24 +735,70 @@ def run( use_csi_driver = controller_config.test_setting[test_workload][ "use_csi_driver" ] + return ( + num_apiservers, + num_workers, + use_csi_driver, + ) + + +def run( + controller_config_dir, + test_workload, + result_root_dir, + mode, + test_plan, + container_registry, + postprocess, + build_oracle, +): + """ + Prepare the test context based on the input options and the configurations + and start to run the test. + """ + controller_config = load_controller_config(controller_config_dir) + if test_workload is None: + assert mode == sieve_modes.TEST + test_workload = get_test_workload_from_test_plan(test_plan) + print("Get test workload {} from test plan".format(test_workload)) + num_apiservers, num_workers, use_csi_driver = generate_testing_cluster_config( + mode, controller_config, test_plan, test_workload + ) oracle_dir = os.path.join(controller_config_dir, "oracle", test_workload) - os.makedirs(oracle_dir, exist_ok=True) - result_dir = os.path.join( - result_root_dir, - controller_config.controller_name, - test_workload, - mode, - os.path.basename(test_plan), + assert ( + os.path.isdir(oracle_dir) + or (mode == sieve_modes.LEARN and build_oracle) + or mode == sieve_modes.VANILLA + ), "The oracle dir: {} must exist unless (1) you are running vanilla mode or (2) build_oracle is enabled".format( + oracle_dir ) - print("Log dir: {}/".format(result_dir)) - image_tag = sieve_modes.LEARN if mode == sieve_modes.GEN_ORACLE else mode + os.makedirs(oracle_dir, exist_ok=True) + if mode == sieve_modes.TEST: + result_dir = os.path.join( + result_root_dir, + controller_config.controller_name, + test_workload, + mode, + os.path.splitext(os.path.basename(test_plan))[0], + ) + else: + result_dir = os.path.join( + result_root_dir, + controller_config.controller_name, + test_workload, + mode, + ) + + image_tag = mode test_plan_to_run = os.path.join(result_dir, os.path.basename(test_plan)) + # Prepare the context for testing the controller test_context = TestContext( controller=controller_config.controller_name, controller_config_dir=controller_config_dir, test_workload=test_workload, mode=mode, - phase=phase, + postprocess=postprocess, + build_oracle=build_oracle, original_test_plan=test_plan, test_plan=test_plan_to_run, result_root_dir=result_root_dir, @@ -674,8 +816,20 @@ def run( return test_result, test_context -def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, phase): - assert mode == sieve_modes.TEST, "batch mode only allowed in test mode" +def run_batch( + controller, + test_workload, + dir, + mode, + test_plan_folder, + docker, + postprocess, + build_oracle, +): + """ + Run multiple test plans in the test_plan_folder in a batch. + """ + assert mode == sieve_modes.TEST, "batch mode only allowed in test mode for now" assert os.path.isdir(test_plan_folder), "{} should be a folder".format( test_plan_folder ) @@ -692,7 +846,8 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph mode, test_plan, docker, - phase, + postprocess, + build_oracle, ) save_run_result( test_context, @@ -731,7 +886,7 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph "-m", "--mode", dest="mode", - help="MODE: vanilla, test, learn, generate-oracle", + help="MODE: vanilla, test, learn", metavar="MODE", ) parser.add_option( @@ -758,11 +913,18 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph default=False, ) parser.add_option( - "--phase", - dest="phase", - help="run the PHASE: setup, workload, check or all", - metavar="PHASE", - default="all", + "--postprocess", + dest="postprocess", + action="store_true", + help="run postprocess only: report bugs for test mode, generate test plans for learn mode", + default=False, + ) + parser.add_option( + "--build-oracle", + dest="build_oracle", + action="store_true", + help="build the oracle by running learn twice", + default=False, ) (options, args) = parser.parse_args() @@ -770,7 +932,7 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph if options.controller_config_dir is None: parser.error("parameter controller required") - if options.mode == sieve_modes.LEARN or options.mode == sieve_modes.GEN_ORACLE: + if options.mode == sieve_modes.LEARN: options.test_plan = "learn.yaml" if options.dir is None: options.dir = "sieve_learn_results" @@ -788,15 +950,13 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph if options.mode != sieve_modes.TEST: parser.error("parameter test required in learn and vanilla mode") - if options.phase not in [ - "all", - "setup", - "workload", - "check", - "setup_workload", - "workload_check", - ]: - parser.error("invalid phase option: {}".format(options.phase)) + if options.mode != sieve_modes.LEARN and options.build_oracle: + parser.error("parameter build_oracle cannot be enabled when mode is not learn") + + if options.postprocess and options.build_oracle: + parser.error( + "parameter postprocess cannot be enabled when build_oracle is enabled" + ) print("Running Sieve with mode: {}...".format(options.mode)) @@ -808,21 +968,10 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph options.mode, options.test_plan, options.registry, - options.phase, + options.postprocess, + options.build_oracle, ) else: - if options.mode == sieve_modes.GEN_ORACLE: - # Run learn mode first - run( - options.controller_config_dir, - options.test_workload, - options.dir, - sieve_modes.LEARN, - options.test_plan, - options.registry, - options.phase, - ) - test_result, test_context = run( options.controller_config_dir, options.test_workload, @@ -830,7 +979,8 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph options.mode, options.test_plan, options.registry, - options.phase, + options.postprocess, + options.build_oracle, ) save_run_result( @@ -838,5 +988,4 @@ def run_batch(controller, test_workload, dir, mode, test_plan_folder, docker, ph test_result, start_time, ) - os_system("kind delete cluster") print("Total time: {} seconds".format(time.time() - start_time)) diff --git a/sieve_analyzer/analyze.py b/sieve_analyzer/analyze.py index df78a0d9333..804f894dfcc 100644 --- a/sieve_analyzer/analyze.py +++ b/sieve_analyzer/analyze.py @@ -17,7 +17,7 @@ ) -def sanity_check_sieve_log(path): +def sanity_check_sieve_server_log(path): lines = open(path).readlines() reconcile_status = {} controller_write_status = {} @@ -334,7 +334,7 @@ def generate_write_hear_pairs(event_graph: EventGraph): return vertex_pairs -def build_event_graph(test_context: TestContext, log_path, oracle_dir): +def build_controller_event_graph(test_context: TestContext, log_path, oracle_dir): learned_masked_paths = json.load(open(os.path.join(oracle_dir, "mask.json"))) controller_hear_list = parse_receiver_events(log_path) @@ -363,7 +363,7 @@ def build_event_graph(test_context: TestContext, log_path, oracle_dir): return event_graph -def generate_test_config( +def generate_test_plans( test_context: TestContext, analysis_mode, event_graph: EventGraph ): log_dir = test_context.result_dir @@ -382,7 +382,7 @@ def generate_test_config( ) -def analyze_trace( +def generate_test_plans_from_learn_run( test_context: TestContext, ): log_dir = test_context.result_dir @@ -390,12 +390,13 @@ def analyze_trace( log_path = os.path.join(log_dir, "sieve-server.log") print("Sanity checking the sieve log {}...".format(log_path)) - sanity_check_sieve_log(log_path) + sanity_check_sieve_server_log(log_path) - if not os.path.exists(os.path.join(oracle_dir, "mask.json")): - fail("cannot find mask.json") - return - event_graph = build_event_graph(test_context, log_path, oracle_dir) + assert os.path.exists( + os.path.join(oracle_dir, "mask.json") + ), "cannot find mask.json" + + event_graph = build_controller_event_graph(test_context, log_path, oracle_dir) sieve_learn_result = { "controller": test_context.controller, "test": test_context.test_workload, @@ -410,7 +411,7 @@ def analyze_trace( after_p1_spec_number, after_p2_spec_number, final_spec_number, - ) = generate_test_config(test_context, analysis_mode, event_graph) + ) = generate_test_plans(test_context, analysis_mode, event_graph) sieve_learn_result[analysis_mode] = { "baseline": baseline_spec_number, "after_p1": after_p1_spec_number, diff --git a/sieve_client/common.go b/sieve_client/common.go index f5b5a7801af..15095a6bd07 100644 --- a/sieve_client/common.go +++ b/sieve_client/common.go @@ -506,7 +506,11 @@ func generateResourceKey(resourceType, namespace, name string) string { // TODO: handle more complex plural cases func pluralToSingular(plural string) string { - return plural[:len(plural)-1] + if len(plural) == 0 { + return plural + } else { + return plural[:len(plural)-1] + } } func generateResourceKeyFromRestCall(verb, resourceType, namespace, name string, object interface{}) string { diff --git a/sieve_client/learn_client.go b/sieve_client/learn_client.go index ae099f5b50a..d52ebf3d7fb 100644 --- a/sieve_client/learn_client.go +++ b/sieve_client/learn_client.go @@ -229,6 +229,10 @@ func NotifyLearnBeforeRestCall(verb string, pathPrefix string, subpath string, n if err := initRPCClient(); err != nil { return -1 } + // NOTE: sometimes the resourceType is empty string, and we skip these cases + if len(resourceType) == 0 { + return -1 + } reconcileFun := getMatchedReconcileStackFrame() if reconcileFun == UNKNOWN_RECONCILE_FUN { return -1 @@ -274,6 +278,10 @@ func NotifyLearnAfterRestCall(controllerOperationID int, verb string, pathPrefix if serializationErr != nil { return } + // NOTE: sometimes the resourceType is empty string, and we skip these cases + if len(resourceType) == 0 { + return + } if err := initRPCClient(); err != nil { return } @@ -294,7 +302,7 @@ func NotifyLearnAfterRestCall(controllerOperationID int, verb string, pathPrefix if controllerOperationType == UNKNOWN { log.Println("Unknown operation") } else if controllerOperationType == GET || controllerOperationType == LIST { - log.Println("READ operation") + log.Println("Read operation") request := &NotifyLearnAfterRestReadRequest{ ControllerOperationID: controllerOperationID, ControllerOperationType: controllerOperationType, diff --git a/sieve_common/common.py b/sieve_common/common.py index 3aa7cc660e1..97d12e9da8e 100644 --- a/sieve_common/common.py +++ b/sieve_common/common.py @@ -57,7 +57,6 @@ class sieve_modes: TEST = "test" VANILLA = "vanilla" LEARN = "learn" - GEN_ORACLE = "generate-oracle" ALL = "all" NONE = "none" @@ -75,7 +74,8 @@ def __init__( controller_config_dir, test_workload, mode, - phase, + postprocess, + build_oracle, original_test_plan, test_plan, result_root_dir, @@ -93,7 +93,8 @@ def __init__( self.controller_config_dir = controller_config_dir self.test_workload = test_workload self.mode = mode - self.phase = phase + self.postprocess = postprocess + self.build_oracle = build_oracle self.original_test_plan = original_test_plan self.test_plan = test_plan self.result_root_dir = result_root_dir @@ -183,6 +184,14 @@ def os_system(cmd, early_exit=True): return return_code +def first_pass_learn_result_dir(learn_result_dir): + learn_prev_res_dir = os.path.join( + os.path.dirname(learn_result_dir), + sieve_modes.LEARN + "_prev", + ) + return learn_prev_res_dir + + def dump_json_file(dir, data, json_file_name): json.dump( data, open(os.path.join(dir, json_file_name), "w"), indent=4, sort_keys=True diff --git a/sieve_common/k8s_event.py b/sieve_common/k8s_event.py index 4c3a3e83846..c71c99a9212 100644 --- a/sieve_common/k8s_event.py +++ b/sieve_common/k8s_event.py @@ -813,6 +813,7 @@ def parse_controller_write(line: str) -> ControllerWrite: def parse_controller_read(line: str) -> ControllerRead: assert SIEVE_AFTER_REST_READ_MARK in line + print(line) tokens = line[line.find(SIEVE_AFTER_REST_READ_MARK) :].strip("\n").split("\t") tokens = tokens[1:] if tokens[1] == "Get": @@ -837,7 +838,7 @@ def parse_controller_read(line: str) -> ControllerRead: "", tokens[2], tokens[3], - tokens[5], + tokens[7], ) else: assert False, "read type should be: Get, List" diff --git a/sieve_oracle/checker_common.py b/sieve_oracle/checker_common.py index edf40136236..45b810942e4 100644 --- a/sieve_oracle/checker_common.py +++ b/sieve_oracle/checker_common.py @@ -83,7 +83,7 @@ def generate_controller_related_list(test_context: TestContext): return controller_related_list -def learn_twice_trim(base_resources, twice_resources): +def second_pass_learn_trim(base_resources, twice_resources): def nested_set(dic, keys, value): for key in keys[:-1]: dic = dic[key] diff --git a/sieve_oracle/liveness_checker.py b/sieve_oracle/liveness_checker.py index 7a3a55247d8..dcf4713a652 100644 --- a/sieve_oracle/liveness_checker.py +++ b/sieve_oracle/liveness_checker.py @@ -55,16 +55,16 @@ def generate_state(test_context: TestContext): def canonicalize_state(test_context: TestContext): - assert test_context.mode == sieve_modes.GEN_ORACLE - learn_twice_dir = test_context.result_dir - cur_state = json.loads(open(os.path.join(learn_twice_dir, "state.json")).read()) - learn_once_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.LEARN, - "learn.yaml", + assert test_context.mode == sieve_modes.LEARN and test_context.build_oracle + second_pass_learn_dir = test_context.result_dir + cur_state = json.loads( + open(os.path.join(second_pass_learn_dir, "state.json")).read() ) - prev_state = json.loads(open(os.path.join(learn_once_dir, "state.json")).read()) - canonicalized_state = learn_twice_trim(prev_state, cur_state) + first_pass_learn_dir = first_pass_learn_result_dir(test_context.result_dir) + prev_state = json.loads( + open(os.path.join(first_pass_learn_dir, "state.json")).read() + ) + canonicalized_state = second_pass_learn_trim(prev_state, cur_state) return canonicalized_state @@ -140,22 +140,18 @@ def get_canonicalized_state(test_context: TestContext): def get_learning_once_state(test_context: TestContext): - learn_once_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.LEARN, - "learn.yaml", + first_pass_learn_dir = first_pass_learn_result_dir(test_context.result_dir) + learning_once_state = json.load( + open(os.path.join(first_pass_learn_dir, "state.json")) ) - learning_once_state = json.load(open(os.path.join(learn_once_dir, "state.json"))) return learning_once_state def get_learning_twice_state(test_context: TestContext): - learn_twice_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.GEN_ORACLE, - "learn.yaml", + second_pass_learn_dir = test_context.result_dir + learning_twice_state = json.load( + open(os.path.join(second_pass_learn_dir, "state.json")) ) - learning_twice_state = json.load(open(os.path.join(learn_twice_dir, "state.json"))) return learning_twice_state diff --git a/sieve_oracle/oracle.py b/sieve_oracle/oracle.py index 100126f9f18..a34c79f12b1 100644 --- a/sieve_oracle/oracle.py +++ b/sieve_oracle/oracle.py @@ -7,7 +7,7 @@ from sieve_oracle.customized_safety_checker import * -def persist_history(test_context: TestContext): +def save_history(test_context: TestContext): cprint("Generating state update summary...", bcolors.OKGREEN) history = generate_history(test_context) history_digest = generate_history_digest(test_context) @@ -15,13 +15,13 @@ def persist_history(test_context: TestContext): dump_json_file(test_context.result_dir, history_digest, "event.json") -def persist_state(test_context: TestContext): +def save_state(test_context: TestContext): cprint("Generating end state...", bcolors.OKGREEN) state = generate_state(test_context) dump_json_file(test_context.result_dir, state, "state.json") -def generate_controller_family(test_context: TestContext): +def save_controller_related_object_list(test_context: TestContext): cprint("Generating controller family list...", bcolors.OKGREEN) controller_related_list = generate_controller_related_list(test_context) dump_json_file( @@ -29,10 +29,10 @@ def generate_controller_family(test_context: TestContext): ) -def canonicalize_history_and_state(test_context: TestContext): +def create_differential_oracles(test_context: TestContext): if not test_context.common_config.update_oracle_file_enabled: return - assert test_context.mode == sieve_modes.GEN_ORACLE + assert test_context.mode == sieve_modes.LEARN and test_context.build_oracle cprint("Generating canonicalized state update summary...", bcolors.OKGREEN) can_history_digest = canonicalize_history_digest(test_context) dump_json_file(test_context.oracle_dir, can_history_digest, "event.json") diff --git a/sieve_oracle/safety_checker.py b/sieve_oracle/safety_checker.py index feba12ba01e..67a8299230b 100644 --- a/sieve_oracle/safety_checker.py +++ b/sieve_oracle/safety_checker.py @@ -85,20 +85,16 @@ def generate_history_digest(test_context: TestContext): def canonicalize_history_digest(test_context: TestContext): - assert test_context.mode == sieve_modes.GEN_ORACLE - learn_twice_dir = test_context.result_dir + assert test_context.mode == sieve_modes.LEARN and test_context.build_oracle + second_pass_learn_dir = test_context.result_dir cur_history_digest = json.loads( - open(os.path.join(learn_twice_dir, "event.json")).read() - ) - learn_once_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.LEARN, - "learn.yaml", + open(os.path.join(second_pass_learn_dir, "event.json")).read() ) + first_pass_learn_dir = first_pass_learn_result_dir(test_context.result_dir) prev_history_digest = json.loads( - open(os.path.join(learn_once_dir, "event.json")).read() + open(os.path.join(first_pass_learn_dir, "event.json")).read() ) - can_history_digest = learn_twice_trim(prev_history_digest, cur_history_digest) + can_history_digest = second_pass_learn_trim(prev_history_digest, cur_history_digest) def remove_ignored_value(event_map): ignored = set() @@ -125,25 +121,17 @@ def get_canonicalized_history_digest(test_context: TestContext): def get_learning_once_history_digest(test_context: TestContext): - learn_once_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.LEARN, - "learn.yaml", - ) + first_pass_learn_dir = first_pass_learn_result_dir(test_context.result_dir) learning_once_history_digest = json.load( - open(os.path.join(learn_once_dir, "event.json")) + open(os.path.join(first_pass_learn_dir, "event.json")) ) return learning_once_history_digest def get_learning_twice_history_digest(test_context: TestContext): - learn_twice_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.GEN_ORACLE, - "learn.yaml", - ) + second_pass_learn_dir = test_context.result_dir learning_twice_history_digest = json.load( - open(os.path.join(learn_twice_dir, "event.json")) + open(os.path.join(second_pass_learn_dir, "event.json")) ) return learning_twice_history_digest @@ -163,25 +151,17 @@ def get_testing_history_digest(test_context: TestContext): def get_learning_once_history(test_context: TestContext): - learn_once_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.LEARN, - "learn.yaml", - ) + first_pass_learn_dir = first_pass_learn_result_dir(test_context.result_dir) learning_once_history = json.load( - open(os.path.join(learn_once_dir, "history.json")) + open(os.path.join(first_pass_learn_dir, "history.json")) ) return learning_once_history def get_learning_twice_history(test_context: TestContext): - learn_twice_dir = os.path.join( - os.path.dirname(os.path.dirname(test_context.result_dir)), - sieve_modes.GEN_ORACLE, - "learn.yaml", - ) + second_pass_learn_dir = test_context.result_dir learning_twice_history = json.load( - open(os.path.join(learn_twice_dir, "history.json")) + open(os.path.join(second_pass_learn_dir, "history.json")) ) return learning_twice_history diff --git a/sieve_perturbation_policies/stale_state.py b/sieve_perturbation_policies/stale_state.py index 93395d31dc7..b2553aeaafb 100644 --- a/sieve_perturbation_policies/stale_state.py +++ b/sieve_perturbation_policies/stale_state.py @@ -37,7 +37,7 @@ def stale_state_detectable_pass( controller_hear.signature_counter, ): candidate_pairs.append(pair) - print(".formatd -> .formatd edges".format(len(event_pairs), len(candidate_pairs))) + print("{} -> {} edges".format(len(event_pairs), len(candidate_pairs))) return candidate_pairs diff --git a/sieve_server/config.go b/sieve_server/config.go index 2153739b6b8..33a5db92566 100644 --- a/sieve_server/config.go +++ b/sieve_server/config.go @@ -19,7 +19,7 @@ func getConfig() map[interface{}]interface{} { err = yaml.Unmarshal([]byte(data), &m) checkError(err) - log.Printf("config:\n%v\n", m) + log.Printf("config: %v\n", m) return m } @@ -31,7 +31,7 @@ func getMask() (map[string][][]string, map[string][][]string, map[string][][]str err = json.Unmarshal([]byte(data), &learnedFieldPathMask) checkError(err) - log.Printf("learned mask:\n%v\n", learnedFieldPathMask) + log.Printf("learned mask: %v\n", learnedFieldPathMask) data, err = ioutil.ReadFile("configured_field_path_mask.json") checkError(err) @@ -39,7 +39,7 @@ func getMask() (map[string][][]string, map[string][][]string, map[string][][]str err = json.Unmarshal([]byte(data), &configuredFieldPathMask) checkError(err) - log.Printf("configured mask:\n%v\n", configuredFieldPathMask) + log.Printf("configured mask: %v\n", configuredFieldPathMask) data, err = ioutil.ReadFile("configured_field_key_mask.json") checkError(err) @@ -47,7 +47,7 @@ func getMask() (map[string][][]string, map[string][][]string, map[string][][]str err = json.Unmarshal([]byte(data), &configuredFieldKeyMask) checkError(err) - log.Printf("configured mask:\n%v\n", configuredFieldKeyMask) + log.Printf("configured mask: %v\n", configuredFieldKeyMask) return learnedFieldPathMask, configuredFieldPathMask, configuredFieldKeyMask }