diff --git a/.gitignore b/.gitignore index 9aae4247b9e..a4ebf2a5277 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.py~ .DS_Store .cache +.pytest_cache/ # Setuptools distribution and build folders. /dist/ diff --git a/gym/envs/box2d/bipedal_walker.py b/gym/envs/box2d/bipedal_walker.py index d4c3e00c17f..32970f33c07 100644 --- a/gym/envs/box2d/bipedal_walker.py +++ b/gym/envs/box2d/bipedal_walker.py @@ -1,6 +1,7 @@ -import sys, math -import numpy as np +import sys +import math +import numpy as np import Box2D from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener) @@ -137,9 +138,9 @@ def __init__(self): self.reset() - high = np.array([np.inf]*24) - self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1])) - self.observation_space = spaces.Box(-high, high) + high = np.array([np.inf] * 24) + self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) diff --git a/gym/envs/classic_control/acrobot.py b/gym/envs/classic_control/acrobot.py index 9a3ef4d8981..55446a525bb 100644 --- a/gym/envs/classic_control/acrobot.py +++ b/gym/envs/classic_control/acrobot.py @@ -1,9 +1,10 @@ """classic Acrobot task""" -from gym import core, spaces -from gym.utils import seeding import numpy as np from numpy import sin, cos, pi +from gym import core, spaces +from gym.utils import seeding + __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy" __credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann", "William Dabney", "Jonathan P. How"] @@ -86,7 +87,7 @@ def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high - self.observation_space = spaces.Box(low=low, high=high) + self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) self.action_space = spaces.Discrete(3) self.state = None self.seed() diff --git a/gym/envs/classic_control/continuous_mountain_car.py b/gym/envs/classic_control/continuous_mountain_car.py index 65bec623769..a65f11bdd0e 100644 --- a/gym/envs/classic_control/continuous_mountain_car.py +++ b/gym/envs/classic_control/continuous_mountain_car.py @@ -9,16 +9,18 @@ and then modified by Arnaud de Broissia * the OpenAI/gym MountainCar environment -itself from +itself from http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp permalink: https://perma.cc/6Z2N-PFWC """ import math + +import numpy as np + import gym from gym import spaces from gym.utils import seeding -import numpy as np class Continuous_MountainCarEnv(gym.Env): metadata = { @@ -40,8 +42,10 @@ def __init__(self): self.viewer = None - self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,)) - self.observation_space = spaces.Box(low=self.low_state, high=self.high_state) + self.action_space = spaces.Box(low=self.min_action, high=self.max_action, + shape=(1,), dtype=np.float32) + self.observation_space = spaces.Box(low=self.low_state, high=self.high_state, + dtype=np.float32) self.seed() self.reset() diff --git a/gym/envs/classic_control/mountain_car.py b/gym/envs/classic_control/mountain_car.py index 40b5a63cda7..8f99bd39dfd 100644 --- a/gym/envs/classic_control/mountain_car.py +++ b/gym/envs/classic_control/mountain_car.py @@ -4,10 +4,12 @@ """ import math + +import numpy as np + import gym from gym import spaces from gym.utils import seeding -import numpy as np class MountainCarEnv(gym.Env): metadata = { @@ -27,7 +29,7 @@ def __init__(self): self.viewer = None self.action_space = spaces.Discrete(3) - self.observation_space = spaces.Box(self.low, self.high) + self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32) self.seed() self.reset() diff --git a/gym/envs/tests/test_envs.py b/gym/envs/tests/test_envs.py index 1f19fefe02c..4d7dd9bff8d 100644 --- a/gym/envs/tests/test_envs.py +++ b/gym/envs/tests/test_envs.py @@ -1,5 +1,6 @@ -import numpy as np import pytest +import numpy as np + from gym import envs from gym.envs.tests.spec_list import spec_list @@ -8,7 +9,14 @@ # envs. @pytest.mark.parametrize("spec", spec_list) def test_env(spec): - env = spec.make() + # Capture warnings + with pytest.warns(None) as warnings: + env = spec.make() + + # Check that dtype is explicitly declared for gym.Box spaces + for warning_msg in warnings: + assert not 'autodetected dtype' in str(warning_msg.message) + ob_space = env.observation_space act_space = env.action_space ob = env.reset() @@ -40,4 +48,3 @@ def test_random_rollout(): (ob, _reward, done, _info) = env.step(a) if done: break env.close() - diff --git a/gym/envs/toy_text/guessing_game.py b/gym/envs/toy_text/guessing_game.py index 9906ded95b2..6eb69c389df 100644 --- a/gym/envs/toy_text/guessing_game.py +++ b/gym/envs/toy_text/guessing_game.py @@ -1,7 +1,8 @@ +import numpy as np + import gym from gym import spaces from gym.utils import seeding -import numpy as np class GuessingGame(gym.Env): @@ -40,7 +41,8 @@ def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 - self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) + self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), + dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 diff --git a/gym/envs/toy_text/hotter_colder.py b/gym/envs/toy_text/hotter_colder.py index 6f8e8261a29..11208dee736 100644 --- a/gym/envs/toy_text/hotter_colder.py +++ b/gym/envs/toy_text/hotter_colder.py @@ -1,7 +1,8 @@ +import numpy as np + import gym from gym import spaces from gym.utils import seeding -import numpy as np class HotterColder(gym.Env): @@ -25,7 +26,8 @@ def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds - self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) + self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), + dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 diff --git a/gym/envs/toy_text/kellycoinflip.py b/gym/envs/toy_text/kellycoinflip.py index c2a91fa92bd..2bc78965b1f 100644 --- a/gym/envs/toy_text/kellycoinflip.py +++ b/gym/envs/toy_text/kellycoinflip.py @@ -1,15 +1,18 @@ -import gym -from gym import spaces -from gym.utils import seeding -from gym.spaces import prng # for Generalized Kelly coinflip game distributions: from scipy.stats import genpareto import numpy as np import numpy.random +import gym +from gym import spaces +from gym.utils import seeding +from gym.spaces import prng + + def flip(edge, np_random): return np_random.uniform() < edge + class KellyCoinflipEnv(gym.Env): """The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game.""" metadata = {'render.modes': ['human']} @@ -17,7 +20,7 @@ def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300) self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments self.observation_space = spaces.Tuple(( - spaces.Box(0, maxWealth, [1]), # (w,b) + spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b) spaces.Discrete(maxRounds+1))) self.reward_range = (0, maxWealth) self.edge = edge @@ -95,11 +98,11 @@ def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWea # the rest proceeds as before: self.action_space = spaces.Discrete(int(maxWealth*100)) self.observation_space = spaces.Tuple(( - spaces.Box(0, maxWealth, shape=[1]), # current wealth + spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth spaces.Discrete(maxRounds+1), # rounds elapsed spaces.Discrete(maxRounds+1), # wins spaces.Discrete(maxRounds+1), # losses - spaces.Box(0, maxWealth, [1]))) # maximum observed wealth + spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth self.reward_range = (0, maxWealth) self.edge = edge self.wealth = self.initialWealth diff --git a/gym/logger.py b/gym/logger.py index 24c365ca856..80f9901dbf4 100644 --- a/gym/logger.py +++ b/gym/logger.py @@ -1,3 +1,5 @@ +import warnings + from gym.utils import colorize DEBUG = 10 @@ -25,7 +27,7 @@ def info(msg, *args): def warn(msg, *args): if MIN_LEVEL <= WARN: - print(colorize('%s: %s'%('WARN', msg % args), 'yellow')) + warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow')) def error(msg, *args): if MIN_LEVEL <= ERROR: diff --git a/gym/spaces/box.py b/gym/spaces/box.py index d0d41f27aeb..6a0708c2c8e 100644 --- a/gym/spaces/box.py +++ b/gym/spaces/box.py @@ -1,4 +1,5 @@ import numpy as np + import gym from gym import logger @@ -28,7 +29,7 @@ def __init__(self, low=None, high=None, shape=None, dtype=None): dtype = np.uint8 else: dtype = np.float32 - logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype) + logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype)) self.low = low.astype(dtype) self.high = high.astype(dtype) gym.Space.__init__(self, shape, dtype) @@ -47,6 +48,6 @@ def from_jsonable(self, sample_n): def __repr__(self): return "Box" + str(self.shape) - + def __eq__(self, other): return np.allclose(self.low, other.low) and np.allclose(self.high, other.high) diff --git a/gym/spaces/tests/test_spaces.py b/gym/spaces/tests/test_spaces.py index cadc86ff06c..cbe230ca27e 100644 --- a/gym/spaces/tests/test_spaces.py +++ b/gym/spaces/tests/test_spaces.py @@ -10,10 +10,11 @@ @pytest.mark.parametrize("space", [ Discrete(3), Tuple([Discrete(5), Discrete(10)]), - Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]), + Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), Tuple((Discrete(5), Discrete(2), Discrete(2))), MultiDiscrete([2, 2, 100]), - Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}), + Dict({"position": Discrete(5), + "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}), ]) def test_roundtripping(space): sample_1 = space.sample() @@ -39,11 +40,12 @@ def test_roundtripping(space): Discrete(3), Box(low=np.array([-10, 0]),high=np.array([10, 10])), Tuple([Discrete(5), Discrete(10)]), - Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]), + Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), Tuple((Discrete(5), Discrete(2), Discrete(2))), MultiDiscrete([2, 2, 100]), MultiBinary(6), - Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}), + Dict({"position": Discrete(5), + "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}), ]) def test_equality(space): space1 = space @@ -55,8 +57,8 @@ def test_equality(space): (Discrete(3), Discrete(4)), (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])), (MultiBinary(8), MultiBinary(7)), - (Box(low=np.array([-10, 0]),high=np.array([10, 10])), - Box(low=np.array([-10, 0]),high=np.array([10, 9]))), + (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32), + Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)), (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])), (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})), (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),