This repository has been archived by the owner on Sep 19, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalpha_beta_agent.py
74 lines (60 loc) · 2.68 KB
/
alpha_beta_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import random
from typing import Optional
import othello
from log_referee import LogReferee
import evaluation
class AlphaBetaAgent(othello.Agent):
def __init__(self, play_as: othello.Player, search_depth: int =4, eval_func=evaluation.heuristic_eval_comprehensive) -> None:
super().__init__()
self.play_as = play_as
self.depth = search_depth
self.evaluation_function = lambda state: eval_func(state, self.play_as)
def play(self, state: othello.State) -> Optional[othello.Action]:
def minmax(gameState: othello.State, agent: othello.Player, depth: int, alpha: float, beta: float) -> float:
if depth == 0 or gameState.is_terminal() :
return self.evaluation_function(gameState)
moves = gameState.get_legal_actions(agent)
# when there is no action , this node has only 1 child, no search is needed
if moves is None:
return minmax(gameState, agent.adversary, depth-1, alpha, beta)
v = None
if agent == self.play_as: # max node
v = float('-inf')
for m in moves:
nextState = gameState.perform_action(agent,m)
nextAgent = agent.adversary
nextDepth = depth-1
score = minmax(nextState , nextAgent, nextDepth, alpha, beta)
v = max(v, score)
alpha = max(alpha, v)
if v > beta:
break
else: # min Node
v = float('inf')
for m in moves:
nextState = gameState.perform_action(agent,m)
nextAgent = agent.adversary
nextDepth = depth-1
score = minmax(nextState , nextAgent, nextDepth, alpha, beta)
v = min(v, score)
beta = min(beta, v)
if v < alpha:
break
return v
moves = list(state.get_legal_actions(self.play_as))
if len(moves) == 0:
return None
max_score = float('-inf')
best_move = moves[0]
for m in moves:
score = minmax(state.perform_action(self.play_as,m), self.play_as.adversary , 2*self.depth - 1, max_score, float('inf'))
if score > max_score:
best_move = m
max_score = score
return best_move
def run_alpha_beta_agents() -> None:
referee = LogReferee(AlphaBetaAgent(othello.Player.DARK),
AlphaBetaAgent(othello.Player.LIGHT))
referee.run()
if __name__ == '__main__':
run_alpha_beta_agents()