-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplay.py
72 lines (58 loc) · 1.59 KB
/
play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
from montecarlo import MCTS, MCTSNet
from games import Chess, TicTacToe, Othello
from players import NetPlayer
from nnet.keras import AlphaZeroNet
#game = Chess()
#game = TicTacToe(3)
game = Othello(6)
print(game.board())
print()
net = AlphaZeroNet(game, blocks=10, epochs=10, save_path='./models/keras/othello.model')
net.load_model()
player = NetPlayer(game, net)
move = 0
def play(t):
global move
"""legal_moves =game.engine.legal_moves()
print(legal_moves)
human = input()
x, y = human.split(' ')
x, y = int(x), int(y)
while (x, y) not in legal_moves:
human = input()
x, y = human.split(' ')
x, y = int(x), int(y)
game.move((x, y))
move +=1
print(game.board())
print('')"""
mcts = MCTSNet(game, player, train_time=t, exploration=0.5)
mcts.train()
prop = int(move<0)
act = mcts.get_policy(prop=prop)
act = np.random.choice(range(game.action_size()), p=act)
s = game.state()
game.move(act)
print(mcts.get_Qsa(s, act), mcts.get_Nsa(s, act))
move += 1
print(game.board())
print('')
if game.winner() != 0:
return
mcts_random = MCTS(game, train_time=t)
mcts_random.train()
prop = int(move<0)
act = mcts_random.get_policy(prop=prop)
act = np.random.choice(range(game.action_size()), p=act)
s = game.state()
game.move(act)
print(mcts_random.get_Qsa(s, act), mcts_random.get_Nsa(s, act))
move += 1
print(game.board())
print('')
if game.winner() != 0:
return
while game.winner() == 0:
play(1)
print(game.result())