Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Dev shadow #39

Open
wants to merge 6 commits into
base: execution
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ user_tag

# Editor config
.vscode
core.*
5 changes: 2 additions & 3 deletions symbolic_trace/opcode_translator/executor/function_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@ class FunctionGraph:
This Graph can be compiled as a f_locals dependency function which produce the same outputs.
"""

def __init__(self, frame):
def __init__(self, f_globals, f_code):
self.sir_ctx = SymbolicTraceContext()
self.inner_out = set()
self.input_trackers = []
self.pycode_gen = PyCodeGen(frame)
self.py_frame = frame
self.pycode_gen = PyCodeGen(f_globals, f_code)

def collect_input_trackers(self, inputs):
outputs = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def __init__(self, frame: types.FrameType):
self._locals = {}
self._globals = {}
self._lasti = 0 # idx of instruction list
self.graph = FunctionGraph(self._frame)
self.graph = FunctionGraph(frame.f_globals, frame.f_code)
self.new_code = None

self._instructions = get_instructions(self._code)
Expand Down
105 changes: 95 additions & 10 deletions symbolic_trace/opcode_translator/executor/pycode_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from ..instruction_utils import gen_instr, modify_instrs


'''
code options for PyCodeObject
'''
Expand Down Expand Up @@ -141,13 +142,17 @@ def stacksize(instructions):


class PyCodeGen:
def __init__(self, frame):
self._frame = frame
self._origin_code = frame.f_code
def __init__(self, f_globals, f_code):
self._origin_code = f_code
self._code_options = gen_code_options(self._origin_code)
self._f_globals = frame.f_globals
self._f_globals = f_globals
self._instructions = []
self.objname_map = {} # map from name to LOAD_GLOBAL index
# map from name to LOAD_GLOBAL/LOAD_ATTR/STORE_GLOBAL/STORE_ATTR index
self.co_names_argval2arg : Dict[str, int] = {}
# map from varname to LOAD_FAST/STORE_FAST index
self.co_varnames_argval2arg : Dict[str, int] = {}
# map from const to LOAD_CONST index
self.co_consts_argval2arg : Dict[str, int] = {}

def gen_pycode(self):
"""
Expand All @@ -160,23 +165,63 @@ def gen_pycode(self):
return new_code

def gen_load_object(self, obj, obj_name):
if obj_name not in self.objname_map:
return self.load_global(obj, obj_name)

def load_global(self, obj, obj_name):
idx, inserted = self._get_name_arg_and_inserted(argval=obj_name)
if inserted:
self._f_globals[obj_name] = obj
self._code_options["co_names"].append(obj_name)
idx = len(self._code_options["co_names"]) - 1
self.objname_map[obj_name] = idx
idx = self.objname_map[obj_name]
self._add_instr("LOAD_GLOBAL", arg=idx, argval=obj_name)

def store_global(self, name):
name_index = self._get_name_arg(name)
self._add_instr("STORE_GLOBAL", arg=name_index, argval=name)

def load_attr(self, attr_name):
name_index = self._get_name_arg(attr_name)
self._add_instr("LOAD_ATTR", arg=name_index, argval=attr_name)

def import_name(self, name):
name_index = self._get_name_arg(name)
self._add_instr("IMPORT_NAME", arg=name_index, argval=name)

def load_method(self, method_name):
name_index = self._get_name_arg(method_name)
self._add_instr("LOAD_METHOD", arg=name_index, argval=method_name)

def load_const(self, obj):
name_index = self._get_const_arg(obj)
self._add_instr("LOAD_CONST", arg=name_index, argval=obj)

def load_fast(self, varname):
name_index = self._get_varname_arg(varname)
self._add_instr("LOAD_FAST", arg=name_index, argval=varname)

def store_fast(self, varname):
name_index = self._get_varname_arg(varname)
self._add_instr("STORE_FAST", arg=name_index, argval=varname)

def gen_build_tuple(self, count):
self._add_instr("BUILD_TUPLE", arg=count, argval=count)

def gen_call_function(self, argc=0):
self.call_function(argc=argc)

def call_function(self, argc=0):
self._add_instr("CALL_FUNCTION", arg=argc, argval=argc)

def call_method(self, argc=0):
self._add_instr("CALL_METHOD", arg=argc, argval=argc)

def pop_top(self):
self._add_instr("POP_TOP", arg=None, argval=None)

def gen_return(self):
self._add_instr("RETURN_VALUE")

def return_value(self):
self._add_instr("RETURN_VALUE")

def add_pure_instructions(self, instructions):
"""
add instructions and do nothing.
Expand All @@ -186,7 +231,47 @@ def add_pure_instructions(self, instructions):
def _add_instr(self, *args, **kwargs):
instr = gen_instr(*args, **kwargs)
self._instructions.append(instr)
return instr

def pprint(self):
for instr in self._instructions:
print(instr.opname, "\t\t", instr.argval)

def _get_name_arg(self, argval):
return self._get_name_arg_and_inserted(argval)[0]

def _get_name_arg_and_inserted(self, argval):
return self._get_arg_and_inserted(
arg_map_name="co_names",
argval2arg=self.co_names_argval2arg,
argval=argval
)

def _get_varname_arg(self, argval):
return self._get_varname_arg_and_inserted(argval)[0]

def _get_varname_arg_and_inserted(self, argval):
return self._get_arg_and_inserted(
arg_map_name="co_varnames",
argval2arg=self.co_varnames_argval2arg,
argval=argval
)

def _get_const_arg(self, argval):
return self._get_const_arg_and_inserted(argval)[0]

def _get_const_arg_and_inserted(self, argval):
return self._get_arg_and_inserted(
arg_map_name="co_consts",
argval2arg=self.co_consts_argval2arg,
argval=argval
)

def _get_arg_and_inserted(self, arg_map_name, argval2arg, argval):
if argval not in argval2arg:
self._code_options[arg_map_name].append(argval)
idx = len(self._code_options[arg_map_name]) - 1
argval2arg[argval] = idx
return argval2arg[argval], True
else:
return argval2arg[argval], False
1 change: 0 additions & 1 deletion symbolic_trace/opcode_translator/executor/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def from_value(value, graph):
return ListVariable(value)
elif isinstance(value, tuple):
return TupleVariable(value)
return
raise RuntimeError(
f"Don't Implement a value binding method for type: `{type(value)}`"
)
Expand Down
3 changes: 2 additions & 1 deletion symbolic_trace/opcode_translator/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from ..utils import log, log_do
from .executor.opcode_executor import InstructionTranslatorCache
from ..shadow.symbolic_translator_cache import SymbolicTranslatorCache
from .skip_files import need_skip_path

CustomCode = collections.namedtuple("CustomCode", ["code"])
Expand All @@ -20,7 +21,7 @@ def eval_frame_callback(frame):
log(8, "[transform_opcode] old_opcode: " + frame.f_code.co_name + "\n")
log_do(8, lambda: dis.dis(frame.f_code))

new_code = InstructionTranslatorCache()(frame)
new_code = SymbolicTranslatorCache()(frame)

log(
7,
Expand Down
17 changes: 17 additions & 0 deletions symbolic_trace/shadow/initial_symbolic_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from .symbolic_executor import SymbolicExecutor
from .symbolic_frame_mgr import SymbolicFrameMgr
from ..utils import no_eval_frame
import types

class InitialSymbolicExecutor(SymbolicExecutor):
@no_eval_frame
def __init__(self, code_obj: types.CodeType):
frame = SymbolicFrameMgr.current_frame(code_obj)
super().__init__(frame)

def pre_RETURN_VALUE(self, instruction):
assert len(self.frame.stack) == 1, "Stack must have one element."
ret_val = self.pop()
new_code, guard_fn = self.frame.function_graph.start_compile(ret_val)
from .symbolic_translator_cache import SymbolicTranslatorCache
SymbolicTranslatorCache().update_executed_code_obj(self.frame.f_code, new_code)
14 changes: 14 additions & 0 deletions symbolic_trace/shadow/normal_symbolic_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from .symbolic_executor import SymbolicExecutor
from .symbolic_frame_mgr import SymbolicFrameMgr
from ..utils import no_eval_frame
import types

class NormalSymbolicExecutor(SymbolicExecutor):
@no_eval_frame
def __init__(self, code_obj: types.CodeType):
frame = SymbolicFrameMgr.create_frame(code_obj)
super().__init__(frame)

def pre_RETURN_VALUE(self, instruction):
# Do nothing
pass
4 changes: 4 additions & 0 deletions symbolic_trace/shadow/symbolic_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

class SymbolicDict:
pass

90 changes: 90 additions & 0 deletions symbolic_trace/shadow/symbolic_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from .symbolic_frame import SymbolicFrame
from ..opcode_translator.executor.source import LocalSource
from ..opcode_translator.executor.variables import (
ConstantVariable,
)
from ..utils import no_eval_frame
import types
import dis
import sys

class SymbolicExecutor:
frame: SymbolicFrame
# next instruction to be executed.
next_instruction_index: int

def __init__(self, frame: SymbolicFrame):
self.frame = frame
self.next_instruction_index = 0

@no_eval_frame
def __call__(self, instruction_index):
instruction = self.frame.instructions[instruction_index]
if self.next_instruction_index != instruction_index:
self._run_post_jump_instruction(self.next_instruction_index, instruction_index)
self._run_post_instruction(instruction_index)
self.next_instruction_index = instruction_index + 1

@no_eval_frame
def pre_action(self, instruction_index):
instruction = self.frame.instructions[instruction_index]
method_name = f"pre_{instruction.opname}"
assert hasattr(self, method_name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里实现了pre_RETURN_VALUE,是否所有的opcode类型都需要实现 pre_XXX 函数逻辑?还是按需的?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

所有的控制类包括跳转指令、RETURN_VALUE、YIELD_VALUE等都需要实现pre_XXX。其他实现post action就行。

getattr(self, method_name)(instruction)

def pre_RETURN_VALUE(self, instruction):
raise NotImplementedError("Derived class should override prev_RETURN_VALUE() method")

def _run_post_jump_instruction(self, jump_instruction_index, target_instruction_index):
jump_instruction = self.get_instruction(jump_instruction_index)
assert self._is_jump_instruction(jump_instruction)
is_jump = self._is_jump(jump_instruction, target_instruction_index)
TODO

def _run_post_instruction(self, instruction_index):
assert instruction_index >= 0
instruction = self.frame.instructions[instruction_index]
opname = instruction.opname
assert hasattr(self, opname), f"{opname} not supported"
method = getattr(self, opname)
method(instruction)

def push(self, value):
self.frame.stack.append(value)

def pop(self):
return self.frame.stack.pop()

def LOAD_FAST(self, instr):
varname = instr.argval
var = self.frame.f_locals[varname]
var.try_set_source(LocalSource(instr.arg, varname))
self.push(var)

def STORE_FAST(self, instr):
"""
TODO: side effect may happen
"""
var = self.pop()
self.frame.f_locals[instr.argval] = var

def LOAD_CONST(self, instr):
var = ConstantVariable(instr.argval)
self.push(var)

def BINARY_ADD(self, instr):
b = self.pop()
a = self.pop()
self.push(a + b)

def BINARY_MULTIPLY(self, instr):
b = self.pop()
a = self.pop()
self.push(a * b)

def RETURN_VALUE(self, instr):
raise NotImplementedError("dead code never to be executed.")

def __del__(self):
# Do nothing.
pass
24 changes: 24 additions & 0 deletions symbolic_trace/shadow/symbolic_frame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import List,Dict, Optional
from . import symbolic_frame_stack as symbolic_frame_stack
import types
import dis

class SymbolicFrame:
f_locals: Dict[str, "VariableTracker"]
function_graph: "FunctionGraph"
f_code: types.CodeType
stack: List["VariableTracker"]
instructions: List[dis.Instruction]
f_back: "SymbolicFrame"

def __init__(self, f_locals, function_graph, code_obj, instructions):
self.f_locals = f_locals
self.function_graph = function_graph
self.f_code = code_obj
self.instructions = instructions
self.stack = []
self.f_back = symbolic_frame_stack.top()
symbolic_frame_stack.push(self)

def __del__(self):
symbolic_frame_stack.pop(self.f_back)
Loading