-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAI_Game2048.py
180 lines (139 loc) · 7.16 KB
/
AI_Game2048.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import wait
import numpy as np
class SimGame2048ForOneDirection:
"""
This is a class for the one of the directions. All the variable will be set in the constructor,
And then the run method, will run the actuall simulations
Important settings just below
"""
# This is the simulationcount for each process. If you only have the first line
# active it will be this value simulations pr. direction.
# If you have 2 lines active = 2*simulation_count for each direction
simulation_count = 200
scores=[]
def __init__(self, first_step, current_board, initial_score,max_depth):
self.first_step = first_step
self.initial_board = current_board
self.initial_score = initial_score
self.max_depth=max_depth
def run(self):
"""
Runs multiple simulations, by using the first move from the constructor.
Returns:
float: The average score of the simulations
"""
from Game2048 import Game2048
import numpy as np
actions = ['left', 'up', 'down', 'right']
sim = Game2048((self.initial_board, self.initial_score))
self.scores=[]
if(not sim.move_is_legal(self.first_step)):
return [-1]
# loop for running n simulations
for i in range(self.simulation_count):
sim.set_state((self.initial_board, self.initial_score))
(board, score), reward, done = sim.step(self.first_step)
done = False
current_depth = 0
# loop for each step of a simulation
while not done and current_depth < self.max_depth:
action = actions[np.random.randint(4)]
(board, score), reward, done = sim.step(action)
current_depth += 1
# TODO: maybe implement scoring system that gives points
# based on the current state for each step
# if game_over
if(current_depth < self.max_depth):
self.scores.append(0)
continue
self.scores.append(score)
return self.scores
def sim_factory(direction, board, score, max_depth):
"""
A function to run in its own process, and be a wrapper around the simulation for one direction.
futures.append(process_pool.submit(sim_factory, direction=direction, board=env.board, score=env.score) for direction in actions)
It will start the simulation, and retun the result to the main thread.
Args:
board(board): The initial board-state for all the simulations
score(int): The initial score of the board for all the simulations
"""
from AI_Game2048 import SimGame2048ForOneDirection
sim = SimGame2048ForOneDirection(direction, board, score, max_depth)
return {'direction': direction, 'score': sim.run()}
def main():
"""
The main function. Only to be run on the main thread.
"""
actions = ['left', 'up', 'down', 'right']
exit_program = False
process_pool = ProcessPoolExecutor(16)
# here you can change MSD. It is a range so it will start with the first MSD, and will continue to
# loop over that one MSD until the confidence interval 95%
for max_simulation_depth in range(4,11):
if exit_program:
break
# initalization of the stastistical feedback system
scores = []
confidence_interval=0
mean=0
# loop the games for a specific max_simulation_depth
while (confidence_interval>=0.05*mean or len(scores)<30) and not exit_program:
env = Game2048()
env.reset()
action_taken = False
done = False
# loop the steps of a game
while not done and not exit_program:
# comment out this line, if you want to stop the rendering
env.render()
# this will start 4 process, that will calculate the different directions.
# a future is a representation of the function call to the process.
# this represents. This first line is the first processes for each direction
futures = [process_pool.submit(sim_factory, direction=direction, board=env.board, score=env.score, max_depth=max_simulation_depth) for direction in actions]
# each of these lines adds another process for each direction. So one line adds 4 extra processes:
futures.extend([process_pool.submit(sim_factory, direction=direction, board=env.board, score=env.score, max_depth=max_simulation_depth) for direction in actions])
# futures.extend([process_pool.submit(sim_factory, direction=direction, board=env.board, score=env.score, max_depth=max_depth) for direction in actions])
# wait for all the process-calls to be done
wait(futures)
results = []
total_directions = []
for future in futures:
results.append(future.result())
for direction in actions:
direction_result = map(lambda x: x['score'], list(filter(lambda r: r['direction'] == direction, results)))
direction_results = []
for r in direction_result:
direction_results.extend(r)
total_directions.append({'direction': direction, 'score_sum': sum(direction_results)/len(direction_results)})
direction_to_go = sorted(total_directions, key=lambda d: d['score_sum'])[-1]['direction']
action, action_taken = direction_to_go, True
if action_taken:
(board, score), reward, done = env.step(action)
action_taken = False
# process game events
for event in pygame.event.get():
if event.type == pygame.QUIT:
process_pool.shutdown()
exit_program = True
break
if event.type == pygame.KEYDOWN and event.key == pygame.K_r:
env.reset()
for future in futures:
future.cancel()
# after each game use the stastistical feedback system to calculate the confidence_interval
scores.append(score)
mean = sum(scores)/len(scores)
sd = np.sqrt(sum([(s-mean)**2 for s in scores])/(len(scores)-1))
confidence_interval = 1.96*sd/np.sqrt(len(scores))
print(score, " ; ", confidence_interval, "/", 0.05*mean)
print(f'Max Depth: {max_simulation_depth}; Mean: {mean}, Confidence Interval: {mean - confidence_interval} - {confidence_interval + mean}')
with open(r"./results simcount=400","a") as f:
f.write((f'Max Depth: {max_simulation_depth}; Mean: {mean}; Confidence Interval: {mean - confidence_interval} - {confidence_interval + mean}; Raw: {scores}\n'))
env.close()
exit()
if __name__ == '__main__':
from Game2048 import Game2048
import numpy as np
import pygame
main()