-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathteam1.py
383 lines (340 loc) · 13.4 KB
/
team1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
import sys
import random
import signal
import copy
import time
class Player1:
def __init__(self):
self.maxdepth = 5 ##default search depth of mini-max
self.came = 0 ## Store time when our bot is asked to make move
self.timed_out = False;
self.inf = 1000000000000
self.threshold = [2, 5.9, 3.9, 0]
self.win_pos = [(0, 1, 2),(3, 4, 5),(6, 7, 8),(0, 3, 6),(1, 4, 7),(2, 5, 8),(0, 4, 8),(2, 4, 6)]
self.twos = [] ## Store all two combination of "x" and "o" in each row, diagonal, column
for each in self.win_pos:
self.twos.append((each[0],each[1],each[2]))
self.twos.append((each[1],each[2],each[0]))
self.twos.append((each[0],each[2],each[1]))
self.corners = [0, 2, 6, 8]
self.rest = [1, 3, 5, 7]
self.flag = " "
self.opp_flag = " "
self.local_score ={ ##for each 3 by 3 board (cell heuristic scores)
"winpos" : 1, ## when u got any of the winning position, increment by this
"two" : 0.025, ## when in a line (diag, hor, vert) u have two "x" and one "-", increment by this
"center" : 0.020, ## when u have access to the center, increment by this
"corner" : 0.007, ## when u have access to nay of the corner, increment by this
"rest" : 0.002, ## when u have access to nay of the remaining cells, increment by this.
"blockedtwo":0.010, ## when in a line (diag, hor, vert) you have two "x" and one "o"
"blockpos":0.040, ## if in a line (diag, hor, vert) you have two "x" and one "o", then decrease utility by this amount.
}
self.global_score ={ ## for each 3 by 3 block (block heuristic scores)
"winpos" : 100,
"two" : 95,
"center" : 90,
"corner" : 30,
"rest" : 5,
"blockedtwo":25,
"blockpos":60,
}
self.llookup = {'x' : {},'o' : {}}
self.glookup = {'x' : {},'o' : {}}
self.memoization() ##Pre-compute the heuristic values for each 3x3 possible sub grid
def hsh(self, temp_node):
return tuple(temp_node)
def print_lists(self,bs):
print("=========== PRINT NODES =========")
for i in range(0, 9, 3):
print(bs[i] + " " + bs[i+1] + " " + bs[i+2])
print("==================================")
print()
def memoization(self):
##expensive step. Need to work on alternative.
## for 3*3 ultimate tic-tac-toe the obj initialisation takes place within 4 millisec
## for 4*4 ultimate tic-tac-toe the obj initialisation takes place within 9 seconds
symbol = ['x', 'o', '-']
for enum in range(0, 3**9): ##iterating over all combinations in 3*3 tic tac toe grid (block)
## NOTE: many combinations are not valid and are handled afterwards.
temp = enum
node = []## store every combination
for i in range(0, 9):
node.append(symbol[temp % 3])
temp //= 3
#self.print_lists(node)
hshed = self.hsh(node)
self.llookup['x'][hshed] = self.heuristic_local(node, 'x') ##compute sub-grid heuristic
self.llookup['o'][hshed] = -self.llookup['x'][hshed]
self.glookup['x'][hshed] = self.heuristic_global(node, 'x') ##compute block heuristic
self.glookup['o'][hshed] = -self.glookup['x'][hshed]
def blocks_allowed(self, old_move, block_stat):
##compute valid available blocks (which are marked with '-') as per preset rules.
blocks = []
if old_move[0]%3 == 0:
if old_move[1]%3 == 0:
blocks = [1,3]
if old_move[1]%3 == 1:
blocks = [0,2]
if old_move[1]%3 == 2:
blocks = [1,5]
if old_move[0]%3 == 1:
if old_move[1]%3 == 0:
blocks = [0,6]
if old_move[1]%3 == 1:
blocks = [4]
if old_move[1]%3 == 2:
blocks = [2,8]
if old_move[0]%3 == 2:
if old_move[1]%3 == 0:
blocks = [3,7]
if old_move[1]%3 == 1:
blocks = [6,8]
if old_move[1]%3 == 2:
blocks = [5,7]
final_blocks_allowed = []
for block in blocks:
if block_stat[block] == '-':
final_blocks_allowed.append(block)
if old_move == (-1, -1):
final_blocks_allowed = [4]
return final_blocks_allowed
def cells_allowed(self, temp_board, blocks_allowed, block_stat):
##Compute valid available cells (available moves):
cells = []
for block in blocks_allowed:
start_row = (block // 3) * 3
start_col = ((block) % 3) * 3
for i in range(start_row, start_row + 3):
for j in range(start_col, start_col + 3):
if temp_board[i][j] == '-':
cells.append((i,j)) ##valid cells in available blocks
if not cells:
## if all the available blocks are filled then find the cells in left over blocks.
for i in range(9):
if block_stat[i] != '-':
continue
start_row = (i // 3) * 3
start_col = ((i) % 3) * 3
for j in range(start_row, start_row + 3):
for k in range(start_col, start_col + 3):
if temp_board[j][k] == '-':
cells.append((j,k))
return cells
def heuristic(self, node, temp_block):
## simple evaluator function to find eval_val of each node (board configuration) in mini-max search tree.
## Eval_Score(node) = sum(local_heuristic of each of its sub-grid) + sum(global_heuristic of each of its block)
## TODO: Improve the scoring method.
utility = 0
for i in range(9):
start_row = (i // 3) * 3
start_col = ((i) % 3) * 3
i_stat = []
for j in range(start_row, start_row + 3):
for k in range(start_col, start_col + 3):
i_stat.append(node[j][k])
utility += self.llookup[self.flag][self.hsh(i_stat)]
bl_stat = copy.deepcopy(temp_block)
for i in range(9):
if bl_stat[i] == 'D':
bl_stat[i] = '-'
utility += self.glookup[self.flag][self.hsh(bl_stat)]
return utility
def heuristic_local(self, node, curr_flag):
curr_opp_flag = " "
if curr_flag == 'x':
curr_opp_flag = 'o'
else:
curr_opp_flag = 'x'
utility = 0
i_stat = node
#Local win
for each in self.win_pos:
if i_stat[each[0]] == curr_flag and i_stat[each[1]] == curr_flag and i_stat[each[2]] == curr_flag:
utility += self.local_score["winpos"]
break
if i_stat[each[0]] == curr_opp_flag and i_stat[each[1]] == curr_opp_flag and i_stat[each[2]] == curr_opp_flag:
utility -= self.local_score["winpos"]
break
#Local twos
for each in self.twos:
if i_stat[each[0]] == curr_flag and i_stat[each[1]] == curr_flag and i_stat[each[2]]=='-':
utility += self.local_score["two"]
if i_stat[each[0]] == curr_flag and i_stat[each[1]] == curr_flag and i_stat[each[2]]==curr_opp_flag:
utility += self.local_score["blockedtwo"]
utility -= self.local_score["blockpos"]#opponent in blocking position,dec utility
if i_stat[each[0]] == curr_opp_flag and i_stat[each[1]] == curr_opp_flag and i_stat[each[2]]=='-':
utility -= self.local_score["two"]
if i_stat[each[0]] == curr_opp_flag and i_stat[each[1]] == curr_opp_flag and i_stat[each[2]]==curr_flag:
utility -= self.local_score["blockedtwo"]
utility += self.local_score["blockpos"]#self in blocking position,inc utility
#Local corner
for each in self.corners:
if i_stat[each] == curr_flag:
utility += self.local_score["corner"]
if i_stat[each] == curr_opp_flag:
utility -= self.local_score["corner"]
#Local rest
for each in self.rest:
if i_stat[each] == curr_flag:
utility += self.local_score["rest"]
if i_stat[each] == curr_opp_flag:
utility -= self.local_score["rest"]
#Local center
if i_stat[4] == curr_flag:
utility += self.local_score["center"]
if i_stat[4] == curr_opp_flag:
utility -= self.local_score["center"]
return utility
def heuristic_global(self, node, curr_flag):
curr_opp_flag = " "
if curr_flag == 'x':
curr_opp_flag = 'o'
else:
curr_opp_flag = 'x'
utility = 0
i_stat = node
#Global win
for each in self.win_pos:
if i_stat[each[0]] == curr_flag and i_stat[each[1]] == curr_flag and i_stat[each[2]] == curr_flag:
utility += self.global_score["winpos"]
break
if i_stat[each[0]] == curr_opp_flag and i_stat[each[1]] == curr_opp_flag and i_stat[each[2]] == curr_opp_flag:
utility -= self.global_score["winpos"]
break
#Global twos
for each in self.twos:
if i_stat[each[0]] == curr_flag and i_stat[each[1]] == curr_flag and i_stat[each[2]]=='-':
utility += self.global_score["two"]
if i_stat[each[0]] == curr_flag and i_stat[each[1]] == curr_flag and i_stat[each[2]]==curr_opp_flag:
utility += self.global_score["blockedtwo"]
utility -= self.global_score["blockpos"]#opponent in blocking position,dec utility
if i_stat[each[0]] == curr_opp_flag and i_stat[each[1]] == curr_opp_flag and i_stat[each[2]]=='-':
utility -= self.global_score["two"]
if i_stat[each[0]] == curr_opp_flag and i_stat[each[1]] == curr_opp_flag and i_stat[each[2]]==curr_flag:
utility -= self.global_score["blockedtwo"]
utility += self.global_score["blockpos"]#self in blocking position,inc utility
#Global corner
for each in self.corners:
if i_stat[each] == curr_flag:
utility += self.global_score["corner"]
if i_stat[each] == curr_opp_flag:
utility -= self.global_score["corner"]
#Global rest
for each in self.rest:
if i_stat[each] == curr_flag:
utility += self.global_score["rest"]
if i_stat[each] == curr_opp_flag:
utility -= self.global_score["rest"]
#Global center
if i_stat[4] == curr_flag:
utility += self.global_score["center"]
if i_stat[4] == curr_opp_flag:
utility -= self.global_score["center"]
return utility
def genChild(self, node, temp_block, mov, current_flag):
## generate the successor of node and mark the block_state accordingly.
temp_node = copy.deepcopy(node)
temp_node[mov[0]][mov[1]] = current_flag
current_temp_block = copy.deepcopy(temp_block)
block_num = (mov[0] // 3) * 3 + (mov[1] // 3)
temp_stat = []
start_row = (block_num // 3) * 3
start_col = ((block_num) % 3) * 3
for j in range(start_row, start_row + 3):
for k in range(start_col, start_col + 3):
temp_stat.append(temp_node[j][k])
for each in self.win_pos:
if temp_stat[each[0]] == self.flag and temp_stat[each[1]] == self.flag and temp_stat[each[2]] == self.flag:
current_temp_block[block_num] = self.flag
break
if temp_stat[each[0]] == self.opp_flag and temp_stat[each[1]] == self.opp_flag and temp_stat[each[2]] == self.opp_flag:
current_temp_block[block_num] = self.opp_flag
break
return (temp_node, current_temp_block)
def alphabeta(self, node, depth, alpha, beta, maximizingPlayer, old_move, temp_block):
if (depth == 0 and depth != self.maxdepth) or self.timed_out == True: ##When we reached the leaf nodes or in case of TLE while exploring some node at certain depth, then return the evaluated score of node.
#print("ok1: ",depth,old_move,self.timed_out)
return self.heuristic(copy.deepcopy(node), copy.deepcopy(temp_block))
if time.time() - self.came >= self.threshold[self.maxdepth-4]: ##check for TLE
#print("ok2: ",depth,old_move,self.came,time.time(),time.time() - self.came)
self.timed_out = True
return self.heuristic(copy.deepcopy(node), copy.deepcopy(temp_block))
blocks = self.blocks_allowed(old_move, temp_block)
cells_allowed = self.cells_allowed(node, blocks, temp_block)
if not cells_allowed:
return self.heuristic(copy.deepcopy(node), copy.deepcopy(temp_block))
ret_mov = " "
if maximizingPlayer:
v = -self.inf
for mov in cells_allowed:
tmp = self.genChild(node, temp_block, mov, self.flag)
child = tmp[0] ##child_node of node
current_temp_block = tmp[1] ##child_node's block
temp = self.alphabeta(copy.deepcopy(child), depth - 1, copy.deepcopy(alpha), copy.deepcopy(beta), False, copy.deepcopy(mov), copy.deepcopy(current_temp_block))
if v < temp:
v = temp
ret_mov = mov ##best_move found so far
alpha = max(alpha, v)
if beta <= alpha:
break
if depth == self.maxdepth: ##return best_move when we compelete the search at given MaxDepth
return ret_mov
else:
return v
else:
v = self.inf
for mov in cells_allowed:
tmp = self.genChild(node, temp_block, mov, self.opp_flag)
child = tmp[0]
current_temp_block = tmp[1]
temp = self.alphabeta(copy.deepcopy(child), depth - 1, copy.deepcopy(alpha), copy.deepcopy(beta), True, copy.deepcopy(mov), copy.deepcopy(current_temp_block))
if v > temp:
v = temp
ret_mov = mov
beta = min(beta, v)
if beta <= alpha:
break
if depth == self.maxdepth:
return ret_mov
else:
return v
def move(self, temp_board, temp_block, old_move, flag):
if old_move == (-1, -1):
return (3, 3)
ret2 = " "
ret3 = " "
ret4 = " "
self.timed_out = False
self.flag = flag
if self.opp_flag == " ":
if self.flag == 'x':
self.opp_flag = 'o'
else:
self.opp_flag = 'x'
self.maxdepth = 7 ## Start with initial Maxdepth of mini-max. if TLE then reduced depth by 1 (Iterative deepning)
self.came = time.time()
ret = self.alphabeta(copy.deepcopy(temp_board), self.maxdepth, -self.inf, self.inf, True, copy.deepcopy(old_move), copy.deepcopy(temp_block))
if self.timed_out == True:
self.timed_out = False
self.maxdepth = 6
self.came = time.time()
ret2 = self.alphabeta(copy.deepcopy(temp_board), self.maxdepth, -self.inf, self.inf, True, copy.deepcopy(old_move), copy.deepcopy(temp_block))
if self.timed_out == True:
self.timed_out = False
self.maxdepth = 5
self.came = time.time()
ret3 = self.alphabeta(copy.deepcopy(temp_board), self.maxdepth, -self.inf, self.inf, True, copy.deepcopy(old_move), copy.deepcopy(temp_block))
if self.timed_out == True:
self.timed_out = False
self.maxdepth = 4
self.came = time.time()
ret4 = self.alphabeta(copy.deepcopy(temp_board), self.maxdepth, -self.inf, self.inf, True, copy.deepcopy(old_move), copy.deepcopy(temp_block))
return ret4
else:
return ret3
else:
return ret2
else:
return ret
#if __name__=="__main__":
#ply = Player1()