-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSmaliClassDef.py
380 lines (280 loc) · 13.3 KB
/
SmaliClassDef.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
import os.path
import SmaliTypes
import StigmaStringParsingLib
import Instrumenter
import re
from SmaliMethodDef import SmaliMethodDef
class SmaliClassDef:
# self.other_scds: a dictionary of other SmaliClassDef objects for this project / app
# self.header: a list of strings, lines from the beginning of the file
# self.static_fields: a list of strings, the static fields in this class
# self.instance_fields: a list of strings, the instance fields in this class
# self.methods: a list of SmaliMethodDef objects in this class
# self.file_name: the (absolute?) path to the file
# self.class_name: extracted from the first line of the smali file
# example: Lcom/google/android/material/animation/AnimationUtils;
def __init__(self, file_name):
# These are just lists of strings
# Should be filled in before instrument
self.header = []
self.static_fields = []
self.instance_fields = []
# list of the class names (strings) of all internal classes
# should probably be a set
self.internal_class_names = []
# This is a list of SmaliMethodDef (as seen above) which aids instrumentation later
self.methods = []
self.file_name = file_name
self.class_name = SmaliClassDef.extract_class_name(file_name)
self.internal_class_names.append(self.class_name)
#print("self.class_name created: ", self.class_name)
fh = open(file_name, "r")
lines = fh.readlines()
fh.close()
cur_dest = self.header
pre_methods = True
idx = 0
while idx < len(lines):
#print("processing line: " + str(lines[idx]))
match_object = re.match(StigmaStringParsingLib.BEGINS_WITH_DOT_METHOD, lines[idx])
if match_object is not None: # This is the start of a method defintion
#print(str(match_object) + " in line: " + lines[idx])
method_code = []
match_object = re.match(StigmaStringParsingLib.BEGINS_WITH_DOT_END_METHOD, lines[idx])
while match_object is None:
#print(str(idx))
method_code.append(lines[idx])
match_object = re.match(StigmaStringParsingLib.BEGINS_WITH_DOT_END_METHOD, lines[idx])
idx += 1
#print(str(match_object) + " in line: " + lines[idx])
smd = SmaliMethodDef(method_code, self)
self.methods.append(smd)
#if all file is eaten up (eating last method)
if idx >= len(lines):
#print("stopping!")
break
if "# static fields\n" == lines[idx]:
cur_dest = self.static_fields
if "# instance fields\n" == lines[idx]:
cur_dest = self.instance_fields
if "# direct methods\n" == lines[idx]:
pre_methods = False
if pre_methods:
cur_dest.append(lines[idx])
#debugging left in
#print("\n")
#print(lines)
#print("len(lines): " + str(len(lines)))
#
#print("idx: " + str(idx))
idx = idx + 1
@staticmethod
def extract_class_name(filename):
fh = open(filename, 'r')
line = fh.readline()
return line.split()[-1].strip("\n")
@staticmethod
def is_function(line):
# check this line is a method (begins with "invoke-*")
match_object = re.match(StigmaStringParsingLib.REGEX_BEGINS_WITH_INVOKE, line)
return match_object is not None
@staticmethod
def _get_taint_storage_name_pair(identifier, reg_name):
# computes the name of a taint_storage field given the
# identifier and the register name
# <init> and v2 as input => init_v2_TAINT:I
# assert("init_v2_TAINT:I" in scd.static_fields)
# add-int v2, v3, v4
# foo_v2_Taint:I <- foo_v3_taint:I OR foo_v4_taint:I
#
#
# Some bug here?
# iput v2, p1, someclass->somefield
# iget v3, p2, someclass->somefield
#
# somefield_p1_TAINT:I <- foo_v2_taint:F
#
# identifier = method name or instance field name
# most method names are expected: "getIMEI(0", "leakSomething())"
# edge case 1: constructors show up as <init> and <cinit>
identifier = identifier.replace("<", "")
identifier = identifier.replace(">", "")
if reg_name != "":
static_f_name = str(identifier) + "_" + str(reg_name) + "_TAINT:F"
else:
static_f_name = str(identifier) + "_TAINT:F"
full_name = ".field public static " + static_f_name + "\n"
return (static_f_name, full_name)
def get_super_class(self):
return SmaliTypes.from_string(self.header[1].split(" ")[1].strip())
def create_taint_field(self, identifier, reg_name=""):
# Makes a new taint_storage field in this class
static_f_name, full_name = self._get_taint_storage_name_pair(identifier, reg_name)
# could be more efficient as a hash map instead of a list but that might change the order
# AND, the number of items is small (probably < 50) so it doesn't really matter
if full_name not in self.static_fields:
self.static_fields.append(full_name)
self.static_fields.append("\n") # because entire list will be appended to output file
return static_f_name
def create_taint_field_smart(self, calling_method, reg_name=""):
# Currently not called anywhere in the program. This method
# might be useful in the future if we decide that the taint
# tags need to be instance fields and not always static
# That decision would require substantial changes to the instrumenters
# See comments in create_taint_field()
identifier = calling_method.get_name()
identifier = identifier.replace("<", "")
identifier = identifier.replace(">", "")
# might have two consecutive under-scores but who cares
name = "_".join([identifier, reg_name, "TAINT:T"])
if(calling_method.is_static()):
full_name = ".field public static " + name + "\n"
fields_list = self.static_fields
else:
full_name = ".field public " + name + "\n"
fields_list = self.instance_fields
# could be more efficient as a hash map instead of a list but that might change the order
# AND, the number of items is small (probably < 50) so it doesn't really matter
if full_name not in fields_list:
fields_list.append(full_name)
fields_list.append("\n") # because entire list will be appended to output file
return name
def is_internal_function(self, line):
if not self.is_function(line):
return False
func_name = line.split(" ")[-1]
return func_name in self.methods
def is_external_function(self, line):
if not self.is_function(line):
return False
func_name = line.split(" ")[-1]
return func_name not in self.methods
def instrument(self):
# if self.other_scds == {}:
# raise ValueError("Other SCDs list not passed to scd")
#print("\ninstrumenting: ", self)
#this will write code into methods
for m in self.methods:
if(self._should_instrument_method(m)):
#print("Instrumenting: " + str(m.signature.name))
m.grow_locals(Instrumenter.MAX_DESIRED_NUM_REGISTERS)
m.instrument()
def _should_instrument_method(self, m):
# maybe this method should be in the Instrumenter class
if(m.signature.is_abstract or m.signature.is_native):
# We shouldn't instrument methods that don't have code / locals
return False
launcher_oncreate = (Instrumenter.start_of_launcher_oncreate_method_handler != None and self.is_launcher_activity() and m.signature.name == "onCreate")
method_start = (Instrumenter.start_of_method_handler != None)
opcodes = (Instrumenter.instrumentation_map != {})
# opcodes = (none of Instrumenter.instrumentation_map.keys() are in m)
return (launcher_oncreate or method_start or opcodes)
def is_launcher_activity(self):
# Remember, Instrumenter.LAUNCHER_ACTIVITIES is a list of SmaliTypes.ObjectReference
for item in Instrumenter.LAUNCHER_ACTIVITIES:
if item == self: #invokes the __eq__ on the SmaliTypes.ObjectReference
return True
def write_to_file(self, class_smali_file):
# Write new "program" out to file
fh = open(class_smali_file, "w")
for line in self.header + self.static_fields + self.instance_fields:
fh.write(line)
fh.write("# methods\n")
for m in self.methods:
for line in m.raw_text:
fh.write(str(line)) # because we're converting to str, SmaliAssemblyInstruction object's repr() called
fh.write("\n")
fh.close()
def overwrite_to_file(self):
self.write_to_file(self.file_name)
def get_num_lines(self):
total_lines = len(self.header) + len(self.static_fields) + len(self.instance_fields)
for m in self.methods:
total_lines = total_lines + len(m.raw_text)
return total_lines
def verbose(self):
for line in self.header + self.static_fields + self.instance_fields:
print(line)
print("# methods\n")
for m in self.methods:
for line in m.raw_text:
print(str(line))
print("\n")
def get_num_comparison_instructions(self):
count = 0
for m in self.methods:
count = count + m.get_num_comparison_instructions()
#print("count: " + str(count))
return count
def get_num_field_declarations(self):
return self.get_num_static_fields() + self.get_num_instance_fields()
def get_num_method_declarations(self):
return len(self.methods)
def _count_references(self, filter_function):
# this should be computed here and not computed else-where
# and cached. The reasoning is because the number of field
# references changes drastically after the instrumentation
ref_set = set()
for m in self.methods:
for line in m.raw_text:
if filter_function(line):
name = StigmaStringParsingLib.break_into_tokens(line)[-1]
ref_set.add(name)
#print(field_ref_set)
return len(ref_set)
def get_num_field_references(self):
return self._count_references(StigmaStringParsingLib.is_field_instruction)
def get_num_method_references(self):
return self._count_references(StigmaStringParsingLib.is_method_call_instruction)
@staticmethod
def _count_fields(fieldsList):
regexBeginsWithField = r"^\s*.field"
count = 0
for line in fieldsList:
search_object = re.search(regexBeginsWithField, line)
if(search_object != None):
count+=1
return count
def get_num_static_fields(self):
return self._count_fields(self.static_fields)
def get_num_instance_fields(self):
return self._count_fields(self.instance_fields)
def is_internal_class(self, other_class_name):
#print("\nis_internal_class(" + str(other_class_name) + ")")
#print("self.class_name:" + str(self.class_name))
# print("self.internal_class_names:", self.internal_class_names)
if other_class_name in self.internal_class_names:
#print("\tTRUE!")
return True
#print("\tFALSE!")
return False
def __str__(self):
return str(self.file_name)
def __eq__(self, other):
#print("checking equality on " + str(self) + " and " + str(other))
if isinstance(other, SmaliClassDef):
return self.class_name == other.class_name
if isinstance(other, SmaliTypes.ObjectReference):
#print(self.class_name, " == ", other.raw_type_string, "=>", self.class_name == other.raw_type_string)
return self.class_name == other.raw_type_string
if isinstance(other, str):
return str(self) == other
return False
class MockSmaliClassDef(SmaliClassDef):
def __init__(self):
self.file_name = ""
self.class_name = "LMockClass;"
self.internal_class_names = [self.class_name]
self.header = []
self.static_fields = []
self.instance_fields = []
self.methods = []
def is_internal_class(self, other):
return False
def tests():
ts = SmaliClassDef(os.path.join("test", "Main.smali"))
print(type(ts.get_super_class()))
assert (ts.get_super_class() == "Landroid/support/v7/app/AppCompatActivity;")
print("ALL SmaliClassDef TESTS PASSED")
if __name__ == "__main__":
tests()