-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdebug.py
33 lines (24 loc) · 825 Bytes
/
debug.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import numpy as np
from games import TicTacToe
from players import RandomPlayer, MCTSPlayer
from montecarlo import MCTS
game = TicTacToe(3)
print(game.board())
player_random = RandomPlayer(game)
mcts_play = MCTS(game=game, player=player_random, episodes=800)
player = MCTSPlayer(game=game, mcts=mcts_play)
mcts = MCTS(game=game, player=player_random, episodes=800)
for i in range(1000):
while game.winner() == 0:
mcts.train()
p = mcts.get_policy(prop=1)
a = np.random.choice(mcts.action_space, p=p)
print(p, a, mcts.get_Qsa(game.state(), mcts.action_space), sum(mcts.get_Nsa(game.state(), mcts.action_space)))
game.move(a)
print(game.board())
mcts.update()
game.set_state(game.start())
print(i)
print(game.board())
print()
mcts.update()