-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluation.py
113 lines (88 loc) · 3.7 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python3
# encoding: utf-8
"""
Code used to load an agent and evaluate its performance.
Usage:
python3 evaluation.py -h
"""
import sys
import argparse
import logging
import importlib.util
import pygame
from utils import create_environment
logger = logging.getLogger(__name__)
def evaluate(env, predict_function, seed_games):
rewards = {agent: 0 for agent in env.possible_agents}
do_terminate = False
for i in seed_games:
env.reset(seed=i)
env.action_space(env.possible_agents[0]).seed(i)
for agent in env.agent_iter():
obs, reward, termination, truncation, info = env.last()
for a in env.agents:
rewards[a] += env.rewards[a]
if termination or truncation:
break
else:
action = predict_function(obs, agent)
if env.render_mode == "human":
events = pygame.event.get() # This is required to prevent the window from freezing
for event in events:
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_q:
pygame.quit()
do_terminate = True
if do_terminate:
break
env.step(action)
if do_terminate:
break
if do_terminate:
break
env.close()
avg_reward = sum(rewards.values()) / len(seed_games)
avg_reward_per_agent = {
agent: rewards[agent] / len(seed_games) for agent in env.possible_agents
}
print(f"Avg reward: {avg_reward}")
print("Avg reward per agent, per game: ", avg_reward_per_agent)
return avg_reward
def main(argv=None):
parser = argparse.ArgumentParser(description='Load an agent and play the KAZ game.')
parser.add_argument('--verbose', '-v', action='count', default=0, help='Verbose output')
parser.add_argument('--quiet', '-q', action='count', default=0, help='Quiet output')
parser.add_argument('--load', '-l',
help=('Load from the given file, otherwise use '
'rllib_student_code_to_submit.'))
parser.add_argument('--screen', '-s', action='store_true',
help='Set render mode to human (show game)')
args = parser.parse_args(argv)
logger.setLevel(max(logging.INFO - 10 * (args.verbose - args.quiet), logging.DEBUG))
logger.addHandler(logging.StreamHandler(sys.stdout))
num_agents = 1
visual_observation = False
render_mode = "human" if args.screen else None # "human" or None
logger.info(f'Show game: {render_mode}')
if render_mode == "human":
logger.info(f'Press q to end game')
logger.info(f'Use pixels: {visual_observation}')
# Loading student submitted code
if args.load is not None:
spec = importlib.util.spec_from_file_location("KAZ_agent", args.load)
Agent = importlib.util.module_from_spec(spec)
spec.loader.exec_module(Agent)
print(Agent)
CustomWrapper = Agent.CustomWrapper
CustomPredictFunction = Agent.CustomPredictFunction
else:
from submission_single_example_rllib import CustomWrapper, CustomPredictFunction
# Create the PettingZoo environment for evaluation (with rendering)
env = create_environment(num_agents=num_agents, render_mode=render_mode,
visual_observation=visual_observation)
env = CustomWrapper(env)
# Loading best checkpoint and evaluating
random_seeds = list(range(100)) # We will be using different seeds for evaluation
evaluate(env, CustomPredictFunction(env), seed_games=random_seeds)
if __name__ == "__main__":
sys.exit(main())