From 9744ac7996e0b112b919d844d7b879be42ea2534 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Fri, 27 Nov 2020 00:38:24 +0100 Subject: [PATCH 1/6] Extend JSON Converter --- tools/JSONConverter.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/JSONConverter.cpp b/tools/JSONConverter.cpp index 8bcd3b1..84bdd95 100644 --- a/tools/JSONConverter.cpp +++ b/tools/JSONConverter.cpp @@ -450,14 +450,18 @@ json_t convert(json_t & input, bool generate_full_data) } } + std::cerr << "Important: " << important_indices.size() << std::endl; // before replacing and after finding out what's important std::map loops_params; std::map functions_params; + std::map> functions_names; int dynamic_loops = 0; + int count = 0, count_empty_params = 0; for(int func_idx : important_indices) { std::string name = input["functions_mangled_names"][func_idx].get(); if(name.find("MPI_") == 0) { + ++count; } else { json_t & entry = functions[name]; //std::cerr << name << " " << entry << std::endl; @@ -484,16 +488,23 @@ json_t convert(json_t & input, bool generate_full_data) } } } + std::cerr << "Function: " << name << " params: " << params.size() << std::endl; + if(params.size() == 0) + count_empty_params++; for(const std::string & param : params) { functions_params[param]++; + functions_names[param].push_back(name); for(const std::string & param2 : params) { if(param2 != param) { if(param < param2) { for(const std::string & param3 : params) if(param2 != param3) - if(param2 < param3) + if(param2 < param3) { functions_params[param + "_" + param2 + "_" + param3]++; + functions_names[param + "_" + param2 + "_" + param3].push_back(name); + } functions_params[param + "_" + param2]++; + functions_names[param + "_" + param2].push_back(name); //params.insert(param); } } @@ -506,8 +517,10 @@ json_t convert(json_t & input, bool generate_full_data) //std::cerr << std::endl; } } + std::cerr << "MPI functions: " << count << " empty params: " << count_empty_params << std::endl; json_t loops_out; loops_out["functions"] = functions_params; + loops_out["functions_names"] = functions_names; loops_out["loops"] = dynamic_loops; for(auto it = loops_params.begin(); it != loops_params.end(); ++it) loops_out["param"][it->first] = it->second; From 38a8a261e4cdaa97c9d7915ef218113f94e9e52b Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sat, 28 Nov 2020 04:43:43 +0100 Subject: [PATCH 2/6] Implement runtime side of branch tainting --- include/perf-taint/runtime/runtime.h | 5 + lib/runtime/json_export.cpp | 136 ++++++++++++++++++--------- lib/runtime/runtime.c | 19 +++- 3 files changed, 110 insertions(+), 50 deletions(-) diff --git a/include/perf-taint/runtime/runtime.h b/include/perf-taint/runtime/runtime.h index 83ca385..57a576f 100644 --- a/include/perf-taint/runtime/runtime.h +++ b/include/perf-taint/runtime/runtime.h @@ -163,5 +163,10 @@ EXTERN dependencies * __dfsw_EXTRAP_DEPS_FUNC(int func_idx); EXTERN dependencies * __dfsw_EXTRAP_GET_DEPS(int32_t loop_idx, int32_t depth, int32_t function_idx); +extern int32_t __perf_taint_loop_branches_offsets[]; +extern int32_t __perf_taint_loop_branches_counts[]; +extern int16_t __perf_taint_loop_branches_data[]; + + #endif diff --git a/lib/runtime/json_export.cpp b/lib/runtime/json_export.cpp index 579331b..0a5f496 100644 --- a/lib/runtime/json_export.cpp +++ b/lib/runtime/json_export.cpp @@ -139,6 +139,42 @@ json_t __dfsw_json_write_single_loop(dependencies * deps, bool clean) return params; } +json_t __dfsw_json_write_single_loop_branch(int function_idx, int nested_loop_idx) +{ + json_t branches; + // control-flow branches + int32_t offset = __perf_taint_loop_branches_offsets[function_idx]; + int32_t begin = __perf_taint_loop_branches_counts[offset + nested_loop_idx]; + int32_t end = __perf_taint_loop_branches_counts[offset + nested_loop_idx + 1]; + int count = 0; + while(begin != end) { + uint16_t val = __perf_taint_loop_branches_data[begin]; + json_t dependency; + int vars_count = __EXTRAP_INSTRUMENTATION_EXPLICIT_PARAMS_COUNT + __EXTRAP_INSTRUMENTATION_IMPLICIT_PARAMS_COUNT; + for(int kk = __EXTRAP_INSTRUMENTATION_IMPLICIT_PARAMS_COUNT; + kk < vars_count; ++kk) { + if(val & (1 << kk)) { + __EXTRAP_INSTRUMENTATION_PARAMS_USED[kk] = true; + dependency.push_back(__EXTRAP_INSTRUMENTATION_PARAMS_NAMES[kk]); + } + } + for(int kk = 0; kk < __EXTRAP_INSTRUMENTATION_IMPLICIT_PARAMS_COUNT; ++kk) + if(val & (1 << kk)) { + __EXTRAP_INSTRUMENTATION_PARAMS_USED[kk] = true; + dependency.push_back(__EXTRAP_INSTRUMENTATION_PARAMS_NAMES[kk]); + } + + if(!dependency.empty() && !dependency.is_null()) + branches[std::to_string(count)] = dependency; + + __perf_taint_loop_branches_data[begin] = 0; + count++; + begin++; + + } + return branches; +} + json_t __dfsw_json_write_loop(int function_idx, int32_t * loop_data, int32_t * loop_structure, dependencies * deps, int & nested_loop_idx, nested_call *& begin, nested_call * end, bool clean = true) @@ -148,10 +184,15 @@ json_t __dfsw_json_write_loop(int function_idx, int32_t * loop_data, bool non_empty = false; json_t params = __dfsw_json_write_single_loop(deps++, clean); + json_t branches = __dfsw_json_write_single_loop_branch(function_idx, nested_loop_idx); if(!params.empty()) { loop["params"] = params; non_empty = true; } + if(!branches.empty() && !branches.is_null()) { + loop["branches"] = branches; + non_empty = true; + } loop["level"] = 0; int level_size = *loop_structure, prev_level_size = 0, next_level_size = 0; @@ -189,10 +230,15 @@ json_t __dfsw_json_write_loop(int function_idx, int32_t * loop_data, json_t loop_level; loop_level["level"] = level; json_t params = __dfsw_json_write_single_loop(deps++, clean); + json_t branches = __dfsw_json_write_single_loop_branch(function_idx, nested_loop_idx); if(!params.empty()) { non_empty = true; loop_level["params"] = params; } + if(!branches.empty() && !branches.is_null()) { + loop_level["branches"] = branches; + non_empty = true; + } while(begin != end && begin->nested_loop_idx == nested_loop_idx) { if(begin->len > 0) { json_t entry; @@ -420,51 +466,51 @@ bool __dfsw_json_write_loop(int function_idx, int calls_count) if(non_empty) { if(output.empty()) exit(4); - json_t * func = &__dfsw_json_get(function_idx); - json_t & prev_loops = (*func)["loops"]; - bool found = false; - size_t cur_idx = 0; - for(json_t & prev : prev_loops) { - if(prev["instance"] == output) { - found = true; - bool callstack_found = false; - json_t callstack; - //fprintf(stderr, "Write callstack of length %lu %d \n", __EXTRAP_CALLSTACK.len - 1, static_cast(__EXTRAP_CALLSTACK.len) - 1); - for(int i = 0; i < static_cast(__EXTRAP_CALLSTACK.len) - 1; ++i) - callstack.push_back( __EXTRAP_CALLSTACK.stack[i] ); - for(const json_t & stack : prev["callstacks"]) { - if(stack == callstack) { - callstack_found = true; - break; - } - } - __dfsw_json_loop_committed(function_idx, cur_idx); //&prev); - //fprintf(stderr, "Commit JSON data already known %p callstack_len %d\n", &prev, - //callstack.size()); - if(!callstack_found) { - prev["callstacks"].push_back( std::move(callstack) ); - } - break; - } - cur_idx++; - } - if(!found) { - // don't write current function - json_t callstack; - for(size_t i = 0; i < __EXTRAP_CALLSTACK.len - 1; ++i) - callstack.push_back( __EXTRAP_CALLSTACK.stack[i] ); - json_t instance; - instance["callstacks"].push_back(callstack); - instance["instance"] = output; - //// - //std::cout << instance << std::endl; - //std::cout << *func << std::endl; - //std::cout << function_idx << std::endl; - //std::cout << prev_loops << std::endl; - prev_loops.push_back( std::move(instance) ); - //fprintf(stderr, "Commit new JSON data %p callstack_len %d \n", &prev_loops.back(), callstack.size()); - __dfsw_json_loop_committed(function_idx, prev_loops.size() - 1); //&prev_loops.back()); - } + json_t * func = &__dfsw_json_get(function_idx); + json_t & prev_loops = (*func)["loops"]; + bool found = false; + size_t cur_idx = 0; + for(json_t & prev : prev_loops) { + if(prev["instance"] == output) { + found = true; + bool callstack_found = false; + json_t callstack; + //fprintf(stderr, "Write callstack of length %lu %d \n", __EXTRAP_CALLSTACK.len - 1, static_cast(__EXTRAP_CALLSTACK.len) - 1); + for(int i = 0; i < static_cast(__EXTRAP_CALLSTACK.len) - 1; ++i) + callstack.push_back( __EXTRAP_CALLSTACK.stack[i] ); + for(const json_t & stack : prev["callstacks"]) { + if(stack == callstack) { + callstack_found = true; + break; + } + } + __dfsw_json_loop_committed(function_idx, cur_idx); //&prev); + //fprintf(stderr, "Commit JSON data already known %p callstack_len %d\n", &prev, + //callstack.size()); + if(!callstack_found) { + prev["callstacks"].push_back( std::move(callstack) ); + } + break; + } + cur_idx++; + } + if(!found) { + // don't write current function + json_t callstack; + for(size_t i = 0; i < __EXTRAP_CALLSTACK.len - 1; ++i) + callstack.push_back( __EXTRAP_CALLSTACK.stack[i] ); + json_t instance; + instance["callstacks"].push_back(callstack); + instance["instance"] = output; + //// + //std::cout << instance << std::endl; + //std::cout << *func << std::endl; + //std::cout << function_idx << std::endl; + //std::cout << prev_loops << std::endl; + prev_loops.push_back( std::move(instance) ); + //fprintf(stderr, "Commit new JSON data %p callstack_len %d \n", &prev_loops.back(), callstack.size()); + __dfsw_json_loop_committed(function_idx, prev_loops.size() - 1); //&prev_loops.back()); + } } diff --git a/lib/runtime/runtime.c b/lib/runtime/runtime.c index 45ef448..5effb86 100644 --- a/lib/runtime/runtime.c +++ b/lib/runtime/runtime.c @@ -15,11 +15,6 @@ #define DEBUG false -//extern int32_t __EXTRAP_INSTRUMENTATION_RESULTS[]; -//extern int8_t * __EXTRAP_INSTRUMENTATION_FUNCS_NAMES[]; -//extern int32_t __EXTRAP_INSTRUMENTATION_FUNCS_COUNT; -//extern int32_t __EXTRAP_INSTRUMENTATION_PARAMS_COUNT; - extern dfsan_label __EXTRAP_INSTRUMENTATION_LABELS[]; callstack __EXTRAP_CALLSTACK = {0, 0, NULL}; @@ -375,3 +370,17 @@ void __dfsw_EXTRAP_MARK_IMPLICIT_LABEL(uint16_t function_idx, __dfsw_add_dep(found_params, &__EXTRAP_LOOP_DEPENDENCIES[offset]); } +void __dfsw_perf_taint_branch(uint16_t label, int32_t function_idx, int32_t nested_loop_idx, int32_t branch_idx) +{ + size_t param_count = __EXTRAP_INSTRUMENTATION_EXPLICIT_PARAMS_COUNT + + __EXTRAP_INSTRUMENTATION_IMPLICIT_PARAMS_COUNT; + // We iterate only to # of currently known parameters + uint16_t found_params = 0; + for(size_t i = 0; i < param_count; ++i) + if(__EXTRAP_INSTRUMENTATION_LABELS[i]) { + bool has_label = dfsan_has_label(label, __EXTRAP_INSTRUMENTATION_LABELS[i]); + found_params |= (has_label << i); + } + __perf_taint_loop_branches_data[branch_idx] |= found_params; +} + From 62a3a0e39b25931f093d2fb3f48f9e487da094f4 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sat, 28 Nov 2020 05:27:19 +0100 Subject: [PATCH 3/6] Implementation of tainting of loop branches --- include/perf-taint/llvm-pass/Function.hpp | 8 + .../perf-taint/llvm-pass/PerfTaintPass.hpp | 21 ++ include/perf-taint/runtime/runtime.h | 2 + lib/llvm-pass/PerfTaintPass.cpp | 246 +++++++++++++++++- lib/runtime/json_export.cpp | 20 +- lib/runtime/runtime.c | 22 +- 6 files changed, 298 insertions(+), 21 deletions(-) diff --git a/include/perf-taint/llvm-pass/Function.hpp b/include/perf-taint/llvm-pass/Function.hpp index 9a49c70..08fa0d3 100644 --- a/include/perf-taint/llvm-pass/Function.hpp +++ b/include/perf-taint/llvm-pass/Function.hpp @@ -29,6 +29,12 @@ namespace perf_taint { {} }; + struct LoopBranch + { + uint32_t nested_loop_idx; + llvm::Instruction * branch; + }; + struct Function { int idx; @@ -42,6 +48,8 @@ namespace perf_taint { llvm::SmallVector implicit_loops; typedef std::vector< std::vector > vec_t; + llvm::SmallVector loop_cf_branches; + Function(int _idx, llvm::StringRef _name, bool _overriden = false): idx(_idx), name(_name), diff --git a/include/perf-taint/llvm-pass/PerfTaintPass.hpp b/include/perf-taint/llvm-pass/PerfTaintPass.hpp index 8972925..ad7842a 100644 --- a/include/perf-taint/llvm-pass/PerfTaintPass.hpp +++ b/include/perf-taint/llvm-pass/PerfTaintPass.hpp @@ -113,6 +113,19 @@ namespace perf_taint { llvm::GlobalVariable * glob_loops_sizes_offsets; llvm::GlobalVariable * glob_loops_number; + // branches + llvm::GlobalVariable * glob_branches_offsets; + llvm::GlobalVariable * glob_branches_counts; + llvm::GlobalVariable * glob_branches_data; + static constexpr const char * glob_branches_offsets_name + = "__perf_taint_loop_branches_offsets"; + static constexpr const char * glob_branches_counts_name + = "__perf_taint_loop_branches_counts"; + static constexpr const char * glob_branches_data_name + = "__perf_taint_loop_branches_data"; + static constexpr const char * glob_branches_enabled_name + = "__perf_taint_loop_branches_enabled"; + static constexpr const char * glob_retval_tls_name = "__dfsan_retval_tls"; static constexpr const char * glob_labels_name @@ -217,6 +230,9 @@ namespace perf_taint { // void __dfsw_EXTRAP_WRITE_PARAMETER(int8_t *, size_t, int32_t) llvm::Function * write_parameter_function; + // void __dfsw_perf_taint_branch(int16, int32, int32, int32) + llvm::Function * taint_branch_function; + Instrumenter(llvm::Module & _m): m(_m), builder(m.getContext()), @@ -289,6 +305,9 @@ namespace perf_taint { void findTerminator(llvm::Function & f, llvm::SmallVector & returns); llvm::Function * getAtExit(); uint64_t size_of(llvm::Value * val); + + void instrumentLoopBranch(llvm::Instruction * branch, int32_t function_idx, + int32_t nested_loop_idx, int32_t branch_idx); }; struct LabelAnnotator : public llvm::InstVisitor @@ -374,6 +393,8 @@ namespace perf_taint { std::set recursive_functions; + void analyzeLoopBranches(Function & f, llvm::Loop & l, int & nested_loop_idx); + DfsanInstr(): ModulePass(ID), m(nullptr), diff --git a/include/perf-taint/runtime/runtime.h b/include/perf-taint/runtime/runtime.h index 57a576f..b002c2a 100644 --- a/include/perf-taint/runtime/runtime.h +++ b/include/perf-taint/runtime/runtime.h @@ -50,6 +50,8 @@ typedef struct _nested_call_vec { nested_call * data; } nested_call_vec; +extern bool __perf_taint_loop_branches_enabled; + extern int32_t __EXTRAP_INSTRUMENTATION_RESULTS[]; extern const char * __EXTRAP_INSTRUMENTATION_FUNCS_NAMES[]; extern const char * __EXTRAP_INSTRUMENTATION_FUNCS_MANGLED_NAMES[]; diff --git a/lib/llvm-pass/PerfTaintPass.cpp b/lib/llvm-pass/PerfTaintPass.cpp index e7fd60c..dbbaff8 100644 --- a/lib/llvm-pass/PerfTaintPass.cpp +++ b/lib/llvm-pass/PerfTaintPass.cpp @@ -62,6 +62,11 @@ static llvm::cl::opt EnableSCEV("perf-taint-scev", llvm::cl::init(false), llvm::cl::value_desc("boolean flag")); +static llvm::cl::opt EnableBranches("perf-taint-branches-enable", + llvm::cl::desc("Enable detection of branches inside loops."), + llvm::cl::init(false), + llvm::cl::value_desc("boolean flag")); + static llvm::cl::opt GenerateStats("perf-taint-export-stats", llvm::cl::desc("Specify directory for output logs"), llvm::cl::init(false), @@ -182,6 +187,14 @@ namespace perf_taint { } } } + // sorted instrumented functions + std::vector functions(parent_functions.size()); + for(auto & f : instrumented_functions) { + if( !f.second.hasValue() || f.second->is_overriden()) + continue; + int f_idx = f.second->function_idx(); + functions[f_idx] = &f.second.getValue(); + } instr.createGlobalStorage(parent_functions, database, instrumented_functions.begin(), instrumented_functions.end(), @@ -189,12 +202,28 @@ namespace perf_taint { notinstrumented_functions.begin(), notinstrumented_functions.end()); //instr.annotateParams(found_params); size_t params_count = Parameters::globals_names.size() + Parameters::local_names.size(); + size_t branch_idx = 0; for(auto & f : instrumented_functions) { - if(f.second) { - modifyFunction(*f.first, f.second.getValue(), instr); + if(f.second) { + modifyFunction(*f.first, f.second.getValue(), instr); + } + + + } + + if(EnableBranches) { + for(Function * f : functions) { + for(auto & branch : f->loop_cf_branches) { + llvm::errs() << "BranchInstrument " << f->name << '\n'; + instr.instrumentLoopBranch(branch.branch, f->idx, branch.nested_loop_idx, branch_idx); + ++branch_idx; } + } } + + + // TODO: also change that we dependent on this vector. // we should have all functions - not important, instrumented - in same data structure // TODO: why the hell is it parent_functions? @@ -289,6 +318,65 @@ namespace perf_taint { (*it).second->add_callsite(val); } + void DfsanInstr::analyzeLoopBranches(Function & f, llvm::Loop & l, int & nested_loop_idx) + { + typedef std::vector loops_t; + loops_t buf_first{1, &l}, buf_second; + + loops_t * cur_loops = &buf_first, * next_loops = &buf_second; + while(!cur_loops->empty()) { + + for(llvm::Loop * l : *cur_loops) { + llvm::SmallVector exit_blocks; + l->getExitingBlocks(exit_blocks); + // FIXME: Merge with the same code in instrumentLoop + llvm::SmallSet subloops_bb; + auto & subloops = l->getSubLoops(); + for(llvm::Loop * subloop : subloops) { + for(llvm::BasicBlock * bb : subloop->blocks()) + subloops_bb.insert(bb); + } + + llvm::SmallSet cf_branches; + for(llvm::BasicBlock * bb : l->blocks()) { + // loop basic block that is not a part of subloop + if(!subloops_bb.count(bb)) { + for(llvm::Instruction & inst : *bb) { + //detect all branches + llvm::BranchInst * br = llvm::dyn_cast(&inst); + if(br && br->isConditional()) { + cf_branches.insert(&inst); + } else if(llvm::SwitchInst * _switch = llvm::dyn_cast(&inst)) { + cf_branches.erase(br); + } + } + } + } + + // now remove branches that are parts of exit + for(llvm::BasicBlock * bb : exit_blocks) { + llvm::Instruction * inst = bb->getTerminator(); + llvm::BranchInst * br = llvm::dyn_cast(inst); + if(br && br->isConditional()) { + cf_branches.erase(br); + } else if(llvm::SwitchInst * _switch = llvm::dyn_cast(inst)) { + cf_branches.erase(_switch); + } + } + llvm::errs() << "Done " << cf_branches.size() << '\n'; + // insert branches for further processing + for(llvm::Instruction * br : cf_branches) { + f.loop_cf_branches.push_back({static_cast(nested_loop_idx), br}); + } + + nested_loop_idx++; + std::copy(subloops.begin(), subloops.end(), std::back_inserter(*next_loops)); + } + std::swap(cur_loops, next_loops); + next_loops->clear(); + } + } + int DfsanInstr::analyzeLoop(Function & f, llvm::Loop & l, std::vector> & data, int depth) { @@ -302,6 +390,8 @@ namespace perf_taint { for(llvm::Loop * l : subloops) { loop_count += analyzeLoop(f, *l, data, depth + 1); } + + return loop_count; } @@ -411,6 +501,7 @@ namespace perf_taint { foundFunction(f, true, override_counter); Function & func = instrumented_functions[&f].getValue(); + int nested_loop_idx = 0; // TODO: refactor for general loop interface for(llvm::Loop * l : *linfo) { std::vector< std::vector > data; @@ -425,6 +516,10 @@ namespace perf_taint { func.loops_sizes.push_back(depth); func.loops_sizes.push_back(structure_size); func.loops_sizes.push_back(loop_count); + + // cf-branches + if(EnableBranches) + analyzeLoopBranches(func, *l, nested_loop_idx); } int implicit_loops = 0; @@ -586,6 +681,7 @@ namespace perf_taint { // loop basic block that is not a part of subloop if(!subloops_bb.count(bb)) { for(llvm::Instruction & inst : *bb) { + // call! if(llvm::CallBase * call = llvm::dyn_cast(&inst)) { @@ -623,6 +719,7 @@ namespace perf_taint { llvm::errs() << "Unknown branch: " << *inst << '\n'; } } + nested_loop_idx++; internal_nested_index++; std::copy(subloops.begin(), subloops.end(), std::back_inserter(*next_loops)); @@ -738,7 +835,6 @@ namespace perf_taint { // } // } //} - int idx = 0; for(llvm::Value * callsite : func.callsites) { llvm::Instruction * s = llvm::dyn_cast(callsite); @@ -1280,6 +1376,115 @@ namespace perf_taint { builder.getInt32Ty(), false, llvm::GlobalValue::WeakAnyLinkage, builder.getInt32(number_of_loops), glob_loops_number_name); + + // control-flow branches + if(EnableBranches) { + std::vector branches_offsets; + std::vector branches_counts; + branches_counts.push_back(0); + int inserted_branches = 0; + + size_t offset = 0; + auto b = begin; + for(Function * f : functions) { + + + branches_offsets.push_back(offset); + int number_of_loops = 0; + int loops = f->loops_sizes.size() / 3; + for(int i = 0; i < loops; ++i) + number_of_loops += f->loops_sizes[3*i + 2]; + auto branches_begin = f->loop_cf_branches.begin(); + auto branches_end = f->loop_cf_branches.end(); + for(int i = 0; i < number_of_loops; ++i) { + + // Certain number of branches for this particular loop + int count = 0; + while(branches_begin != branches_end && (*branches_begin).nested_loop_idx == i) { + ++count; ++branches_begin; + } + if(count) { + branches_counts.push_back(branches_counts.back() + count); + inserted_branches += count; + } + // No branches - same position as before + else { + branches_counts.push_back(branches_counts.back()); + } + + } + offset += number_of_loops; + } + + array_type = llvm::ArrayType::get(builder.getInt32Ty(), functions_count); + std::vector offsets(branches_offsets.size()); + std::transform(branches_offsets.begin(), branches_offsets.end(), + offsets.begin(), + [this](int offset) { + return builder.getInt32(offset); + } + ); + glob_branches_offsets = new llvm::GlobalVariable(m, + array_type, false, llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantArray::get(array_type, offsets), + glob_branches_offsets_name + ); + + array_type = llvm::ArrayType::get(builder.getInt32Ty(), branches_counts.size()); + std::vector counts(branches_counts.size()); + std::transform(branches_counts.begin(), branches_counts.end(), + counts.begin(), + [this](int offset) { + return builder.getInt32(offset); + } + ); + glob_branches_counts = new llvm::GlobalVariable(m, + array_type, false, llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantArray::get(array_type, counts), + glob_branches_counts_name + ); + + array_type = llvm::ArrayType::get(builder.getInt16Ty(), inserted_branches); + glob_branches_data = new llvm::GlobalVariable(m, + array_type, + false, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantAggregateZero::get(array_type), + glob_branches_data_name); + + new llvm::GlobalVariable(m, builder.getInt1Ty(), false, + llvm::GlobalValue::WeakAnyLinkage, + builder.getTrue(), + glob_branches_enabled_name + ); + + } else { + array_type = llvm::ArrayType::get(builder.getInt32Ty(), 0); + glob_branches_offsets = new llvm::GlobalVariable(m, + array_type, + false, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantAggregateZero::get(array_type), + glob_branches_offsets_name); + glob_branches_counts = new llvm::GlobalVariable(m, + array_type, + false, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantAggregateZero::get(array_type), + glob_branches_counts_name); + array_type = llvm::ArrayType::get(builder.getInt16Ty(), 0); + glob_branches_data = new llvm::GlobalVariable(m, + array_type, + false, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantAggregateZero::get(array_type), + glob_branches_data_name); + new llvm::GlobalVariable(m, builder.getInt1Ty(), false, + llvm::GlobalValue::WeakAnyLinkage, + builder.getFalse(), + glob_branches_enabled_name + ); + } } void Instrumenter::checkLoop(int nested_loop_idx, int function_idx, @@ -1638,6 +1843,12 @@ namespace perf_taint { m.getOrInsertFunction("__dfsw_EXTRAP_WRITE_PARAMETER", func_t); write_parameter_function = m.getFunction("__dfsw_EXTRAP_WRITE_PARAMETER"); assert(write_parameter_function); + + // void __dfsw_EXTRAP_WRITE_PARAMETER(int8_t *, size_t, int32_t) + func_t = llvm::FunctionType::get(void_t, {i16_t, idx_t, idx_t, idx_t}, false); + m.getOrInsertFunction("__dfsw_perf_taint_branch", func_t); + taint_branch_function = m.getFunction("__dfsw_perf_taint_branch"); + assert(taint_branch_function); } // polly lib/CodeGen/PerfMonitor.cpp @@ -1759,6 +1970,35 @@ namespace perf_taint { return layout->getTypeStoreSize(ptr->getPointerElementType()); } + void Instrumenter::instrumentLoopBranch(llvm::Instruction * branch, int32_t function_idx, + int32_t nested_loop_idx, int32_t branch_idx) + { + builder.SetInsertPoint(branch); + llvm::BranchInst * br = llvm::dyn_cast(branch); + llvm::Value * label = nullptr; + if(br) { + llvm::Instruction * inst = + llvm::dyn_cast(br->getCondition()); + assert(inst); + label = getLabel(inst); + } else if(const llvm::SwitchInst * _switch = llvm::dyn_cast(branch)) { + llvm::Instruction * inst = + llvm::dyn_cast(_switch->getCondition()); + assert(inst); + label = getLabel(inst); + } + + builder.CreateCall( + taint_branch_function, + { + label, + builder.getInt32(function_idx), + builder.getInt32(nested_loop_idx), + builder.getInt32(branch_idx) + } + ); + } + void InstrumenterVisiter::visitLoadInst(llvm::LoadInst & load) { if(avoid_duplicates) { diff --git a/lib/runtime/json_export.cpp b/lib/runtime/json_export.cpp index 0a5f496..581d474 100644 --- a/lib/runtime/json_export.cpp +++ b/lib/runtime/json_export.cpp @@ -184,14 +184,16 @@ json_t __dfsw_json_write_loop(int function_idx, int32_t * loop_data, bool non_empty = false; json_t params = __dfsw_json_write_single_loop(deps++, clean); - json_t branches = __dfsw_json_write_single_loop_branch(function_idx, nested_loop_idx); if(!params.empty()) { loop["params"] = params; non_empty = true; } - if(!branches.empty() && !branches.is_null()) { - loop["branches"] = branches; - non_empty = true; + if(__perf_taint_loop_branches_enabled) { + json_t branches = __dfsw_json_write_single_loop_branch(function_idx, nested_loop_idx); + if(!branches.empty() && !branches.is_null()) { + loop["branches"] = branches; + non_empty = true; + } } loop["level"] = 0; int level_size = *loop_structure, prev_level_size = 0, next_level_size = 0; @@ -230,14 +232,16 @@ json_t __dfsw_json_write_loop(int function_idx, int32_t * loop_data, json_t loop_level; loop_level["level"] = level; json_t params = __dfsw_json_write_single_loop(deps++, clean); - json_t branches = __dfsw_json_write_single_loop_branch(function_idx, nested_loop_idx); if(!params.empty()) { non_empty = true; loop_level["params"] = params; } - if(!branches.empty() && !branches.is_null()) { - loop_level["branches"] = branches; - non_empty = true; + if(__perf_taint_loop_branches_enabled) { + json_t branches = __dfsw_json_write_single_loop_branch(function_idx, nested_loop_idx); + if(!branches.empty() && !branches.is_null()) { + loop_level["branches"] = branches; + non_empty = true; + } } while(begin != end && begin->nested_loop_idx == nested_loop_idx) { if(begin->len > 0) { diff --git a/lib/runtime/runtime.c b/lib/runtime/runtime.c index 5effb86..30f57b5 100644 --- a/lib/runtime/runtime.c +++ b/lib/runtime/runtime.c @@ -372,15 +372,17 @@ void __dfsw_EXTRAP_MARK_IMPLICIT_LABEL(uint16_t function_idx, void __dfsw_perf_taint_branch(uint16_t label, int32_t function_idx, int32_t nested_loop_idx, int32_t branch_idx) { - size_t param_count = __EXTRAP_INSTRUMENTATION_EXPLICIT_PARAMS_COUNT - + __EXTRAP_INSTRUMENTATION_IMPLICIT_PARAMS_COUNT; - // We iterate only to # of currently known parameters - uint16_t found_params = 0; - for(size_t i = 0; i < param_count; ++i) - if(__EXTRAP_INSTRUMENTATION_LABELS[i]) { - bool has_label = dfsan_has_label(label, __EXTRAP_INSTRUMENTATION_LABELS[i]); - found_params |= (has_label << i); - } - __perf_taint_loop_branches_data[branch_idx] |= found_params; + if(__perf_taint_loop_branches_enabled) { + size_t param_count = __EXTRAP_INSTRUMENTATION_EXPLICIT_PARAMS_COUNT + + __EXTRAP_INSTRUMENTATION_IMPLICIT_PARAMS_COUNT; + // We iterate only to # of currently known parameters + uint16_t found_params = 0; + for(size_t i = 0; i < param_count; ++i) + if(__EXTRAP_INSTRUMENTATION_LABELS[i]) { + bool has_label = dfsan_has_label(label, __EXTRAP_INSTRUMENTATION_LABELS[i]); + found_params |= (has_label << i); + } + __perf_taint_loop_branches_data[branch_idx] |= found_params; + } } From 75e34cb68e20bde96efbb5e75026fd8bab020962 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sat, 28 Nov 2020 05:38:26 +0100 Subject: [PATCH 4/6] Add tests --- tests/unit/dataflow/cf_branches.cpp | 104 ++++++++ tests/unit/dataflow/cf_branches.cpp.json | 293 +++++++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 tests/unit/dataflow/cf_branches.cpp create mode 100644 tests/unit/dataflow/cf_branches.cpp.json diff --git a/tests/unit/dataflow/cf_branches.cpp b/tests/unit/dataflow/cf_branches.cpp new file mode 100644 index 0000000..fd01a17 --- /dev/null +++ b/tests/unit/dataflow/cf_branches.cpp @@ -0,0 +1,104 @@ +// RUN: %clangxx %cxx_flags %s -emit-llvm -o %t1.bc +// RUN: %opt %opt_flags -perf-taint-branches-enable < %t1.bc 2> /dev/null > %t1.tainted.bc +// RUN: %llc %llc_flags < %t1.tainted.bc > %t1.tainted.o +// RUN: %clangxx %link_flags %t1.tainted.o -o %t1.exe +// RUN: %execparams %t1.exe 10 10 10 > %t1.json +// RUN: diff -w %s.json %t1.json + +// RUN: %opt %opt_flags %opt_cfsan -perf-taint-branches-enable < %t1.bc 2> /dev/null > %t2.tainted.bc +// RUN: %llc %llc_flags < %t2.tainted.bc > %t2.tainted.o +// RUN: %clangxx %link_flags %t2.tainted.o -o %t2.exe +// RUN: %execparams %t2.exe 10 10 10 > %t2.json +// RUN: diff -w %s.json %t2.json + +#include +#include + +#include "perf-taint/PerfTaint.hpp" + +int f(int x1, int x2, int x3) +{ + int tmp = 1; + for(int i = 0; i < x2; ++i) { + tmp += 2*i; + } + for(int i = 0; i < x1 + x2; ++i) { + if(x3 > 5) + tmp += i; + else + tmp += 2*i; + } + for(int i = 0; i < x1; ++i) { + tmp += 2*i; + } + return tmp; +} + +int g(int x1, int x2, int x3) +{ + int tmp = 1; + for(int j = x1; j < x2; ++j) { + for(int i = 0; i < x2; ++i) { + if(x2 > 5) + tmp += i; + else + tmp += 2*i; + } + for(int i = 0; i < x1 + x2; ++i) { + tmp += 2*i; + } + if(x1 == 0) + tmp += j; + else + tmp += 2*j; + } + return tmp; +} + +int h(int x1, int x2, int x3) +{ + int tmp = 1; + for(int j = 0; j < x1 + x2; ++j) { + for(int i = 0; i < x2; ++i) { + if(x2 > 5) + tmp += i; + else + tmp += 2*i; + for(int k = 0; k < x1 + x2; ++k) { + if(x3 > 5) + tmp += i; + else + tmp += 2*i; + + if(x1 < 11) + tmp += i; + else + tmp += 2*i; + } + for(int k = 0; k < x1 + x2; ++k) { + tmp += 2; + } + } + } + return tmp; +} + +int main(int argc, char ** argv) +{ + int x1 EXTRAP = atoi(argv[1]); + int x2 EXTRAP = atoi(argv[2]); + register_variable(&x1, VARIABLE_NAME(x1)); + register_variable(&x2, VARIABLE_NAME(x2)); + int y = 2*x1 + 1; + + f(x2, y, 10); + f(x2, y, x1); + + g(x2, y, 10); + g(x2, y, x1); + + h(x1, x2, 10); + h(x1, x2, x1 + x2); + + return 0; +} diff --git a/tests/unit/dataflow/cf_branches.cpp.json b/tests/unit/dataflow/cf_branches.cpp.json new file mode 100644 index 0000000..1c89798 --- /dev/null +++ b/tests/unit/dataflow/cf_branches.cpp.json @@ -0,0 +1,293 @@ +{ + "functions": { + "_Z1fiii": { + "file": "tests/unit/dataflow/cf_branches.cpp", + "func_idx": 0, + "line": 19, + "loops": [ + { + "callstacks": [ + [ + 3 + ] + ], + "instance": { + "0": { + "level": 0, + "params": [ + [ + "x2" + ] + ] + }, + "1": { + "level": 0, + "params": [ + [ + "x1", + "x2" + ] + ] + }, + "2": { + "level": 0, + "params": [ + [ + "x1" + ] + ] + } + } + }, + { + "callstacks": [ + [ + 3 + ] + ], + "instance": { + "0": { + "level": 0, + "params": [ + [ + "x2" + ] + ] + }, + "1": { + "branches": { + "0": [ + "x1" + ] + }, + "level": 0, + "params": [ + [ + "x1", + "x2" + ] + ] + }, + "2": { + "level": 0, + "params": [ + [ + "x1" + ] + ] + } + } + } + ] + }, + "_Z1giii": { + "file": "tests/unit/dataflow/cf_branches.cpp", + "func_idx": 1, + "line": 37, + "loops": [ + { + "callstacks": [ + [ + 3 + ] + ], + "instance": { + "0": { + "branches": { + "0": [ + "x2" + ] + }, + "level": 0, + "loops": { + "0": { + "branches": { + "0": [ + "x1" + ] + }, + "level": 1, + "params": [ + [ + "x1" + ] + ] + }, + "1": { + "level": 1, + "params": [ + [ + "x1", + "x2" + ] + ] + } + }, + "params": [ + [ + "x1", + "x2" + ] + ] + } + } + } + ] + }, + "_Z1hiii": { + "file": "tests/unit/dataflow/cf_branches.cpp", + "func_idx": 2, + "line": 58, + "loops": [ + { + "callstacks": [ + [ + 3 + ] + ], + "instance": { + "0": { + "level": 0, + "loops": { + "0": { + "branches": { + "0": [ + "x2" + ] + }, + "level": 1, + "loops": { + "0": { + "branches": { + "1": [ + "x1" + ] + }, + "level": 2, + "params": [ + [ + "x1", + "x2" + ] + ] + }, + "1": { + "level": 2, + "params": [ + [ + "x1", + "x2" + ] + ] + } + }, + "params": [ + [ + "x2" + ] + ] + } + }, + "params": [ + [ + "x1", + "x2" + ] + ] + } + } + }, + { + "callstacks": [ + [ + 3 + ] + ], + "instance": { + "0": { + "level": 0, + "loops": { + "0": { + "branches": { + "0": [ + "x2" + ] + }, + "level": 1, + "loops": { + "0": { + "branches": { + "0": [ + "x1", + "x2" + ], + "1": [ + "x1" + ] + }, + "level": 2, + "params": [ + [ + "x1", + "x2" + ] + ] + }, + "1": { + "level": 2, + "params": [ + [ + "x1", + "x2" + ] + ] + } + }, + "params": [ + [ + "x2" + ] + ] + } + }, + "params": [ + [ + "x1", + "x2" + ] + ] + } + } + } + ] + } + }, + "functions_demangled_names": [ + "f(int, int, int)", + "g(int, int, int)", + "h(int, int, int)", + "main", + "atoi", + "void register_variable(int*, char const*)" + ], + "functions_mangled_names": [ + "_Z1fiii", + "_Z1giii", + "_Z1hiii", + "main", + "atoi", + "_Z17register_variableIiEvPT_PKc" + ], + "functions_names": [ + "f", + "g", + "h", + "main", + "atoi", + "register_variable" + ], + "parameters": [ + "x1", + "x2" + ] +} From 10a28bb56decc336628eaa7ac9d0680f5698c354 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sat, 28 Nov 2020 21:36:09 +0100 Subject: [PATCH 5/6] update dockerfile --- docker/Dockerfile.perf-taint | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.perf-taint b/docker/Dockerfile.perf-taint index 4710431..f4dbe21 100644 --- a/docker/Dockerfile.perf-taint +++ b/docker/Dockerfile.perf-taint @@ -1,11 +1,15 @@ -FROM mcopik/clang-dfsan:dfsan-9.0 +FROM mcopik/clang-dfsan:cfsan-9.0 +ENV PATH="/opt/llvm/bin:${PATH}" USER docker +ADD . ${HOME}/perf-taint +RUN mkdir ${HOME}/build + RUN sudo apt-get update\ # Basic developer tools && sudo apt-get install -y --no-install-recommends\ - ninja-build\ + make\ cmake\ git\ # OpenMPI @@ -19,4 +23,16 @@ RUN sudo apt-get update\ python3-pip\ && pip3 install wheel\ && pip3 install lit + +RUN cd ${HOME}/build && cmake\ + -DCMAKE_C_COMPILER=clang\ + -DCMAKE_CXX_COMPILER=clang++\ + -DLLVM_DIR=/opt/llvm/\ + -DWITH_MPI=ON\ + -DLIBCXX_PATH=/opt/llvm/\ + -DWITH_REGRESSION_TESTS=ON\ + -DWITH_UNIT_TESTS=On\ + -DLLVM_WITH_CFSAN=On\ + -DCMAKE_INSTALL_PREFIX=${HOME}/install\ + ../perf-taint && make From 09256400ee52044fb159651d2dcd80af76435744 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sun, 29 Nov 2020 05:33:45 +0100 Subject: [PATCH 6/6] Dead code removal --- tools/JSONConverter.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tools/JSONConverter.cpp b/tools/JSONConverter.cpp index 84bdd95..33f1711 100644 --- a/tools/JSONConverter.cpp +++ b/tools/JSONConverter.cpp @@ -564,7 +564,6 @@ json_t convert(json_t & input, bool generate_full_data) json_t new_callstack; //new_callstack.push_back(json_t::array()); for(auto v : value) { - //size_t size = new_callstack.size(); //if(!important_indices.count(v.get())) { // new_callstack.resize(size*2); @@ -575,19 +574,8 @@ json_t convert(json_t & input, bool generate_full_data) // push update_u -> update_h :( old hack around ScoreP filtering if(important_indices.count(v.get()) - #if ENABLE_FIX_ICS_2019_RESULTS - //update_h - || v.get() == 418 - || v.get() == 420 - //setup_output_gauge_file - || v.get() == 352 - || v.get() == 354 - //cleanup_gathers - || v.get() == 345 - || v.get() == 347 - #endif || input["functions_names"][v.get()] == "main") - new_callstack.push_back(v); + new_callstack.push_back(v); } bool found = false; for(json_t & prev_callstack : converted_callstacks)