Skip to content

Commit

Permalink
Detect undefined variables and return values with UVGs (#720)
Browse files Browse the repository at this point in the history
  • Loading branch information
Akuli authored Feb 5, 2025
1 parent 1ce2400 commit 839707c
Show file tree
Hide file tree
Showing 13 changed files with 367 additions and 103 deletions.
15 changes: 0 additions & 15 deletions broken_tests/should_succeed/return_none.jou

This file was deleted.

6 changes: 1 addition & 5 deletions compiler/builders/ast_to_builder.jou
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ class AstToBuilder:

self->set_location(old)

def end_function(self) -> None:
self->builder->ret(NULL) # implicit "return" when falling off end of function
self->builder->end_function()

def local_var_exists(self, name: byte*) -> bool:
for i = 0; i < self->nlocals; i++:
if strcmp(self->locals[i].name, name) == 0:
Expand Down Expand Up @@ -632,6 +628,6 @@ def feed_ast_to_builder(func_ast: AstFunctionOrMethod*, func_location: Location,
ast2ir = AstToBuilder{builder = builder}
ast2ir.begin_function(&func_ast->types.signature, func_location, func_ast->types.locals, func_ast->types.nlocals, public)
ast2ir.build_body(&func_ast->body)
ast2ir.end_function()
builder->end_function()
free(ast2ir.locals)
free(ast2ir.loops)
12 changes: 7 additions & 5 deletions compiler/builders/llvm_builder.jou
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,12 @@ class LBuilder:
LLVMSetLinkage(self->llvm_func, LLVMLinkage.Private)

def end_function(self) -> None:
if self->returns_a_value:
# Implicit "return" at the end of a function that should return a value
LLVMBuildUnreachable(self->llvm_builder)
else:
LLVMBuildRetVoid(self->llvm_builder)

LLVMPositionBuilderAtEnd(self->llvm_builder, self->alloca_block)
LLVMBuildBr(self->llvm_builder, self->code_start_block)

Expand Down Expand Up @@ -549,11 +555,7 @@ class LBuilder:

def ret(self, value: LBuilderValue*) -> None:
if value == NULL:
if self->returns_a_value:
# Implicit "return" at the end of a function that should return a value
LLVMBuildUnreachable(self->llvm_builder)
else:
LLVMBuildRetVoid(self->llvm_builder)
LLVMBuildRetVoid(self->llvm_builder)
else:
LLVMBuildRet(self->llvm_builder, value->llvm_value)

Expand Down
122 changes: 72 additions & 50 deletions compiler/builders/uvg_builder.jou
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import "stdlib/str.jou"
import "stdlib/mem.jou"

import "../uvg_analyze.jou"
Expand All @@ -10,29 +9,33 @@ import "./ast_to_builder.jou"
import "./either_builder.jou"


# Within this class, UVG variable ID -1 denotes a value that is not a pointer,
# or is some pointer that we don't keep track of. Any other ID represents a
# pointer to the corresponding UVG variable.
# Within this class, we use the following special UVG variable IDs.
# TODO: ideally these would be constants, not functions.

# A value that is not a pointer, or is some pointer that we don't keep track of.
def ANONYMOUS_VALUE_ID() -> int:
return -1

# Boolean value known to be true at compile time.
def TRUE_ID() -> int:
return -2

# Boolean value known to be false at compile time.
def FALSE_ID() -> int:
return -3


class UBuilder:
uvg: Uvg
current_block: UvgBlock*
returns_a_value: bool
location: Location

def begin_function(self, sig: Signature*, public: bool) -> None:
self->returns_a_value = sig->returntype != NULL
self_class = sig->get_self_class()
if self_class == NULL:
# function
assert sizeof(self->uvg.name) >= sizeof(sig->name)
strcpy(self->uvg.name, sig->name)
else:
# method
snprintf(self->uvg.name, sizeof self->uvg.name, "%s.%s", self_class->name, sig->name)

self->uvg.signature = sig
self->current_block = self->uvg.add_block()

def end_function(self) -> None:
self->ret(NULL)
self->current_block = NULL
self->location = Location{}

Expand All @@ -54,15 +57,15 @@ class UBuilder:
b->instructions[b->ninstructions++] = ins

def use(self, var: int) -> None:
if var != -1:
if var >= 0:
self->add_instruction(UvgInstruction{kind = UvgInstructionKind.Use, var = var})

def set(self, var: int) -> None:
if var != -1:
if var >= 0:
self->add_instruction(UvgInstruction{kind = UvgInstructionKind.Set, var = var})

def dont_analyze(self, var: int) -> None:
if var != -1:
if var >= 0:
self->add_instruction(UvgInstruction{kind = UvgInstructionKind.DontAnalyze, var = var})

def stack_alloc(self, t: Type*, varname: byte*) -> int:
Expand All @@ -77,83 +80,92 @@ class UBuilder:

def dereference(self, ptr: int) -> int:
self->use(ptr)
return -1
return ANONYMOUS_VALUE_ID()

def indexed_pointer(self, ptr: int, index: int) -> int:
self->dont_analyze(ptr)
return -1
return ANONYMOUS_VALUE_ID()

def class_field_pointer(self, ptr: int, field_name: byte*) -> int:
self->dont_analyze(ptr)
return -1
return ANONYMOUS_VALUE_ID()

def global_var_ptr(self, name: byte*, var_type: Type*) -> int:
return -1
return ANONYMOUS_VALUE_ID()

def get_argument(self, i: int, argtype: Type*) -> int:
return -1
return ANONYMOUS_VALUE_ID()

def call(self, sig: Signature*, args: int*, nargs: int) -> int:
for i = 0; i < nargs; i++:
self->dont_analyze(args[i])
return -1
return ANONYMOUS_VALUE_ID()

def string_array(self, s: byte*, array_size: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

def string(self, s: byte*) -> int:
return -1
return ANONYMOUS_VALUE_ID()

def boolean(self, b: bool) -> int:
return -1
if b:
return TRUE_ID()
else:
return FALSE_ID()

def integer(self, t: Type*, value: long) -> int:
return -1
return ANONYMOUS_VALUE_ID()

def float_or_double(self, t: Type*, string: byte*) -> int:
return -1
return ANONYMOUS_VALUE_ID()

def zero_of_type(self, t: Type*) -> int:
return -1
return ANONYMOUS_VALUE_ID()

def enum_member(self, t: Type*, name: byte*) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# a + b
def add(self, a: int, b: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# a - b
def sub(self, a: int, b: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# a * b
def mul(self, a: int, b: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# a / b
def div(self, a: int, b: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# a % b
def mod(self, a: int, b: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# a == b
def eq(self, a: int, b: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# a < b
def lt(self, a: int, b: int) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# not value
def not_(self, value: int) -> int:
return -1
match value:
case TRUE_ID():
return FALSE_ID()
case FALSE_ID():
return TRUE_ID()
case _:
return ANONYMOUS_VALUE_ID()

# sizeof(any value of given type)
def size_of(self, t: Type*) -> int:
return -1
return ANONYMOUS_VALUE_ID()

# memset(ptr, 0, sizeof(*ptr))
def memset_to_zero(self, ptr: int) -> None:
Expand All @@ -162,11 +174,16 @@ class UBuilder:
# value as to
def cast(self, value: int, to: Type*) -> int:
if to->is_pointer_type():
# Needed to keep track of pointers to local variables, implicit casts are basically everywhere
return value
elif to == boolType and (value == TRUE_ID() or value == FALSE_ID()):
# Thips helps with e.g. if statements and loops where condition is a literal "True".
# There is an implicit cast from bool to bool.
return value
else:
# e.g. cast pointer to long
self->dont_analyze(value)
return -1
return ANONYMOUS_VALUE_ID()

# Blocks are used to implement e.g. if statements and loops.
def add_block(self) -> UvgBlock*:
Expand All @@ -177,14 +194,19 @@ class UBuilder:
self->current_block = block

def branch(self, cond: int, then: UvgBlock*, otherwise: UvgBlock*) -> None:
# TODO: do something with cond?
assert self->current_block != NULL
assert self->current_block->terminator.kind == UvgTerminatorKind.NotSet
self->current_block->terminator = UvgTerminator{
kind = UvgTerminatorKind.Branch,
branch = UvgBranch{then = then, otherwise = otherwise},
}
self->current_block = NULL
match cond:
case TRUE_ID():
self->jump(then)
case FALSE_ID():
self->jump(otherwise)
case _:
assert self->current_block != NULL
assert self->current_block->terminator.kind == UvgTerminatorKind.NotSet
self->current_block->terminator = UvgTerminator{
kind = UvgTerminatorKind.Branch,
branch = UvgBranch{then = then, otherwise = otherwise},
}
self->current_block = NULL

def jump(self, next_block: UvgBlock*) -> None:
assert self->current_block != NULL
Expand All @@ -205,7 +227,7 @@ class UBuilder:
if value != NULL:
self->set(self->uvg.get_local_var_ptr("return"))

if self->returns_a_value:
if self->uvg.signature->returntype != NULL:
self->use(self->uvg.get_local_var_ptr("return"))

assert self->current_block != NULL
Expand Down
4 changes: 3 additions & 1 deletion compiler/main.jou
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ class CompileState:

free(pending_exports)

def analyze_all_uvgs(self) -> None:
def build_and_analyze_all_uvgs(self) -> None:
for i = 0; i < self->nfiles; i++:
if command_line_args.verbosity >= 1:
printf("Building and analyzing UVGs for %s\n", self->files[i].path)
Expand Down Expand Up @@ -410,6 +410,8 @@ def main(argc: int, argv: byte**) -> int:
build_and_process_uvgs(&mainfile->ast, UvgProcessing.Print)
return 0

compst.build_and_analyze_all_uvgs()

objpaths: byte** = calloc(sizeof objpaths[0], compst.nfiles + 1)
for i = 0; i < compst.nfiles; i++:
llvm_ir = compst.files[i].build_llvm_ir()
Expand Down
16 changes: 14 additions & 2 deletions compiler/uvg.jou
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import "stdlib/str.jou"
import "stdlib/mem.jou"

import "./errors_and_warnings.jou"
import "./types.jou"


enum UvgInstructionKind:
Expand Down Expand Up @@ -43,10 +44,19 @@ class UvgBlock:
def free(self) -> None:
free(self->instructions)

def jumps_to(self, other: UvgBlock*) -> bool:
match self->terminator.kind:
case UvgTerminatorKind.Jump:
return other == self->terminator.jump_block
case UvgTerminatorKind.Branch:
return other == self->terminator.branch.then or other == self->terminator.branch.otherwise
case _:
return False


# We build one UVG for each function.
class Uvg:
name: byte[200]
signature: Signature*

# Each block is allocated separately so that we can pass them around as
# pointers, and they don't become invalid when adding more blocks.
Expand All @@ -70,7 +80,9 @@ class Uvg:
assert False

def print(self) -> None:
printf("===== UVG for %s =====\n", self->name)
sigstr = self->signature->to_string(True, True)
printf("===== UVG for %s =====\n", sigstr)
free(sigstr)

assert self->nblocks > 0

Expand Down
Loading

0 comments on commit 839707c

Please sign in to comment.