diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 53cf3f5d9b..9372673ac2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,7 @@ # Contribution Guidelines -Thank you for your interest in contributing to ML-Agents! We are incredibly -excited to see how members of our community will use and extend ML-Agents. +Thank you for your interest in contributing to the ML-Agents toolkit! We are incredibly +excited to see how members of our community will use and extend the ML-Agents toolkit. To facilitate your contributions, we've outlined a brief set of guidelines to ensure that your extensions can be easily integrated. @@ -11,7 +11,7 @@ First, please read through our [code of conduct](CODE_OF_CONDUCT.md), as we expect all our contributors to follow it. Second, before starting on a project that you intend to contribute -to ML-Agents (whether environments or modifications to the codebase), +to the ML-Agents toolkit (whether environments or modifications to the codebase), we **strongly** recommend posting on our [Issues page](https://github.com/Unity-Technologies/ml-agents/issues) and briefly outlining the changes you plan to make. This will enable us to provide diff --git a/README.md b/README.md index 6da73394ab..179cf4f8a7 100755 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# Unity ML-Agents (Beta) +# Unity ML-Agents Toolkit (Beta) -**Unity Machine Learning Agents** (ML-Agents) is an open-source Unity plugin +**The Unity Machine Learning Agents Toolkit** (ML-Agents) is an open-source Unity plugin that enables games and simulations to serve as environments for training intelligent agents. Agents can be trained using reinforcement learning, imitation learning, neuroevolution, or other machine learning methods through @@ -12,7 +12,7 @@ and hobbyists to easily train intelligent agents for 2D, 3D and VR/AR games. These trained agents can be used for multiple purposes, including controlling NPC behavior (in a variety of settings such as multi-agent and adversarial), automated testing of game builds and evaluating different game -design decisions pre-release. ML-Agents is mutually beneficial for both game +design decisions pre-release. The ML-Agents toolkit is mutually beneficial for both game developers and AI researchers as it provides a central platform where advances in AI can be evaluated on Unity’s rich environments and then made accessible to the wider research and game developer communities. @@ -34,7 +34,7 @@ to the wider research and game developer communities. * For more information, in addition to installation and usage instructions, see our [documentation home](docs/Readme.md). * If you have -used a version of ML-Agents prior to v0.4, we strongly recommend +used a version of the ML-Agents toolkit prior to v0.4, we strongly recommend our [guide on migrating from earlier versions](docs/Migrating.md). ## References @@ -56,7 +56,7 @@ In addition to our own documentation, here are some additional, relevant article ## Community and Feedback -ML-Agents is an open-source project and we encourage and welcome contributions. +The ML-Agents toolkit is an open-source project and we encourage and welcome contributions. If you wish to contribute, be sure to review our [contribution guidelines](CONTRIBUTING.md) and [code of conduct](CODE_OF_CONDUCT.md). @@ -65,10 +65,10 @@ You can connect with us and the broader community through Unity Connect and GitHub: * Join our [Unity Machine Learning Channel](https://connect.unity.com/messages/c/035fba4f88400000) -to connect with others using ML-Agents and Unity developers enthusiastic +to connect with others using the ML-Agents toolkit and Unity developers enthusiastic about machine learning. We use that channel to surface updates -regarding ML-Agents (and, more broadly, machine learning in games). -* If you run into any problems using ML-Agents, +regarding the ML-Agents toolkit (and, more broadly, machine learning in games). +* If you run into any problems using the ML-Agents toolkit, [submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and make sure to include as much detail as possible. @@ -77,7 +77,7 @@ team at ml-agents@unity3d.com. ## Translations -To make Unity ML-Agents accessible to the global research and +To make the Unity ML-Agents toolkit accessible to the global research and Unity developer communities, we're attempting to create and maintain translations of our documentation. We've started with translating a subset of the documentation to one language (Chinese), but we hope to continue diff --git a/docs/API-Reference.md b/docs/API-Reference.md index 8562bc6525..52f46900f7 100644 --- a/docs/API-Reference.md +++ b/docs/API-Reference.md @@ -11,7 +11,7 @@ the following command within the `docs/` directory: doxygen dox-ml-agents.conf -`dox-ml-agents.conf` is a Doxygen configuration file for ML-Agents +`dox-ml-agents.conf` is a Doxygen configuration file for the ML-Agents toolkit that includes the classes that have been properly formatted. The generated HTML files will be placed in the `html/` subdirectory. Open `index.html` within that subdirectory to diff --git a/docs/Background-Machine-Learning.md b/docs/Background-Machine-Learning.md index 427391f3f5..672691b2ec 100644 --- a/docs/Background-Machine-Learning.md +++ b/docs/Background-Machine-Learning.md @@ -1,8 +1,8 @@ # Background: Machine Learning -Given that a number of users of ML-Agents might not have a formal machine +Given that a number of users of the ML-Agents toolkit might not have a formal machine learning background, this page provides an overview to facilitate the -understanding of ML-Agents. However, We will not attempt to provide a thorough +understanding of the ML-Agents toolkit. However, We will not attempt to provide a thorough treatment of machine learning as there are fantastic resources online. Machine learning, a branch of artificial intelligence, focuses on learning @@ -77,7 +77,7 @@ tasks are active areas of machine learning research and, in practice, require several iterations to achieve good performance. We now switch to reinforcement learning, the third class of -machine learning algorithms, and arguably the one most relevant for ML-Agents. +machine learning algorithms, and arguably the one most relevant for the ML-Agents toolkit. ## Reinforcement Learning @@ -132,8 +132,8 @@ in many ways, one can view a non-playable character (NPC) as a virtual robot, with its own observations about the environment, its own set of actions and a specific objective. Thus it is natural to explore how we can train behaviors within Unity using reinforcement learning. This is precisely -what ML-Agents offers. The video linked below includes a reinforcement -learning demo showcasing training character behaviors using ML-Agents. +what the ML-Agents toolkit offers. The video linked below includes a reinforcement +learning demo showcasing training character behaviors using the ML-Agents toolkit.

diff --git a/docs/Background-TensorFlow.md b/docs/Background-TensorFlow.md index 615259883e..0e7526c3be 100644 --- a/docs/Background-TensorFlow.md +++ b/docs/Background-TensorFlow.md @@ -2,19 +2,19 @@ As discussed in our [machine learning background page](Background-Machine-Learning.md), many of the -algorithms we provide in ML-Agents leverage some form of deep learning. +algorithms we provide in the ML-Agents toolkit leverage some form of deep learning. More specifically, our implementations are built on top of the open-source library [TensorFlow](https://www.tensorflow.org/). This means that the models -produced by ML-Agents are (currently) in a format only understood by +produced by the ML-Agents toolkit are (currently) in a format only understood by TensorFlow. In this page we provide a brief overview of TensorFlow, in addition -to TensorFlow-related tools that we leverage within ML-Agents. +to TensorFlow-related tools that we leverage within the ML-Agents toolkit. ## TensorFlow [TensorFlow](https://www.tensorflow.org/) is an open source library for performing computations using data flow graphs, the underlying representation of deep learning models. It facilitates training and inference on CPUs and -GPUs in a desktop, server, or mobile device. Within ML-Agents, when you +GPUs in a desktop, server, or mobile device. Within the ML-Agents toolkit, when you train the behavior of an Agent, the output is a TensorFlow model (.bytes) file that you can then embed within an Internal Brain. Unless you implement a new algorithm, the use of TensorFlow is mostly abstracted away and behind @@ -47,5 +47,5 @@ that contains an Internal Brain is built, inference is performed via TensorFlowSharp. We provide an additional in-depth overview of how to leverage [TensorFlowSharp within Unity](Using-TensorFlow-Sharp-in-Unity.md) which will become more relevant once you install and start training -behaviors within ML-Agents. Given the reliance on TensorFlowSharp, the +behaviors within the ML-Agents toolkit. Given the reliance on TensorFlowSharp, the Internal Brain is currently marked as experimental. diff --git a/docs/Background-Unity.md b/docs/Background-Unity.md index d6e43f6111..d9a2b37f5b 100644 --- a/docs/Background-Unity.md +++ b/docs/Background-Unity.md @@ -6,7 +6,7 @@ we highly recommend the [Tutorials page](https://unity3d.com/learn/tutorials). The [Roll-a-ball tutorial](https://unity3d.com/learn/tutorials/s/roll-ball-tutorial) is a fantastic resource to learn all the basic concepts of Unity to get started -with ML-Agents: +with the ML-Agents toolkit: * [Editor](https://docs.unity3d.com/Manual/UsingTheEditor.html) * [Interface](https://docs.unity3d.com/Manual/LearningtheInterface.html) * [Scene](https://docs.unity3d.com/Manual/CreatingScenes.html) diff --git a/docs/Basic-Guide.md b/docs/Basic-Guide.md index 229dad27be..c50bd4e8da 100644 --- a/docs/Basic-Guide.md +++ b/docs/Basic-Guide.md @@ -5,19 +5,19 @@ This guide will show you how to use a pretrained model in an example Unity envir If you are not familiar with the [Unity Engine](https://unity3d.com/unity), we highly recommend the [Roll-a-ball tutorial](https://unity3d.com/learn/tutorials/s/roll-ball-tutorial) to learn all the basic concepts of Unity. -## Setting up ML-Agents within Unity +## Setting up the ML-Agents Toolkit within Unity -In order to use ML-Agents within Unity, you need to change some Unity settings first. Also [TensorFlowSharp plugin](https://s3.amazonaws.com/unity-ml-agents/0.4/TFSharpPlugin.unitypackage) is needed for you to use pretrained model within Unity, which is based on the [TensorFlowSharp repo](https://github.com/migueldeicaza/TensorFlowSharp). +In order to use the ML-Agents toolkit within Unity, you need to change some Unity settings first. Also [TensorFlowSharp plugin](https://s3.amazonaws.com/unity-ml-agents/0.4/TFSharpPlugin.unitypackage) is needed for you to use pretrained model within Unity, which is based on the [TensorFlowSharp repo](https://github.com/migueldeicaza/TensorFlowSharp). 1. Launch Unity 2. On the Projects dialog, choose the **Open** option at the top of the window. -3. Using the file dialog that opens, locate the `unity-environment` folder within the ML-Agents project and click **Open**. +3. Using the file dialog that opens, locate the `unity-environment` folder within the the ML-Agents toolkit project and click **Open**. 4. Go to **Edit** > **Project Settings** > **Player** 5. For **each** of the platforms you target (**PC, Mac and Linux Standalone**, **iOS** or **Android**): 1. Option the **Other Settings** section. 2. Select **Scripting Runtime Version** to - **Experimental (.NET 4.6 Equivalent)** + **Experimental (.NET 4.6 Equivalent or .NET 4.x Equivalent)** 3. In **Scripting Defined Symbols**, add the flag `ENABLE_TENSORFLOW`. After typing in the flag name, press Enter. 6. Go to **File** > **Save Project** @@ -67,7 +67,7 @@ object. ### Training the environment 1. Open a command or terminal window. -2. Nagivate to the folder where you installed ML-Agents. +2. Nagivate to the folder where you installed the ML-Agents toolkit. 3. Change to the `python` directory. 4. Run `python3 learn.py --run-id= --train` Where: @@ -99,7 +99,7 @@ to the **Graph Model** placeholder in the **Ball3DBrain** inspector window. ## Next Steps -* For more information on ML-Agents, in addition to helpful background, check out the [ML-Agents Overview](ML-Agents-Overview.md) page. +* For more information on the ML-Agents toolkit, in addition to helpful background, check out the [ML-Agents Toolkit Overview](ML-Agents-Overview.md) page. * For a more detailed walk-through of our 3D Balance Ball environment, check out the [Getting Started](Getting-Started-with-Balance-Ball.md) page. * For a "Hello World" introduction to creating your own learning environment, check out the [Making a New Learning Environment](Learning-Environment-Create-New.md) page. * For a series of Youtube video tutorials, checkout the [Machine Learning Agents PlayList](https://www.youtube.com/playlist?list=PLX2vGYjWbI0R08eWQkO7nQkGiicHAX7IX) page. diff --git a/docs/FAQ.md b/docs/FAQ.md index f6fcdf7294..4e674bcae9 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -3,13 +3,13 @@ ### Scripting Runtime Environment not setup correctly -If you haven't switched your scripting runtime version from .NET 3.5 to .NET 4.6, you will see such error message: +If you haven't switched your scripting runtime version from .NET 3.5 to .NET 4.6 or .NET 4.x, you will see such error message: ``` error CS1061: Type `System.Text.StringBuilder' does not contain a definition for `Clear' and no extension method `Clear' of type `System.Text.StringBuilder' could be found. Are you missing an assembly reference? ``` -This is because .NET 3.5 doesn't support method Clear() for StringBuilder, refer to [Setting Up ML-Agents Within Unity](Installation.md#setting-up-ml-agent-within-unity) for solution. +This is because .NET 3.5 doesn't support method Clear() for StringBuilder, refer to [Setting Up The ML-Agents Toolkit Within Unity](Installation.md#setting-up-ml-agent-within-unity) for solution. ### TensorFlowSharp flag not turned on. @@ -19,7 +19,7 @@ If you have already imported the TensorFlowSharp plugin, but havn't set ENABLE_T You need to install and enable the TensorFlowSharp plugin in order to use the internal brain. ``` -This error message occurs because the TensorFlowSharp plugin won't be usage without the ENABLE_TENSORFLOW flag, refer to [Setting Up ML-Agents Within Unity](Installation.md#setting-up-ml-agent-within-unity) for solution. +This error message occurs because the TensorFlowSharp plugin won't be usage without the ENABLE_TENSORFLOW flag, refer to [Setting Up The ML-Agents Toolkit Within Unity](Installation.md#setting-up-ml-agent-within-unity) for solution. ### Tensorflow epsilon placeholder error diff --git a/docs/Getting-Started-with-Balance-Ball.md b/docs/Getting-Started-with-Balance-Ball.md index b12669a42e..81ddd27a1c 100644 --- a/docs/Getting-Started-with-Balance-Ball.md +++ b/docs/Getting-Started-with-Balance-Ball.md @@ -1,11 +1,11 @@ # Getting Started with the 3D Balance Ball Environment -This tutorial walks through the end-to-end process of opening an ML-Agents +This tutorial walks through the end-to-end process of opening a ML-Agents toolkit example environment in Unity, building the Unity executable, training an agent in it, and finally embedding the trained model into the Unity environment. -ML-Agents includes a number of [example environments](Learning-Environment-Examples.md) -which you can examine to help understand the different ways in which ML-Agents +The ML-Agents toolkit includes a number of [example environments](Learning-Environment-Examples.md) +which you can examine to help understand the different ways in which the ML-Agents toolkit can be used. These environments can also serve as templates for new environments or as ways to test new ML algorithms. After reading this tutorial, you should be able to explore and build the example environments. @@ -24,7 +24,7 @@ Let's get started! ## Installation -In order to install and set up ML-Agents, the Python dependencies and Unity, +In order to install and set up the ML-Agents toolkit, the Python dependencies and Unity, see the [installation instructions](Installation.md). ## Understanding a Unity Environment (3D Balance Ball) @@ -108,7 +108,7 @@ when you embed the trained model in the Unity application, you will change the **Vector Observation Space** Before making a decision, an agent collects its observation about its state -in the world. ML-Agents classifies vector observations into two types: +in the world. The ML-Agents toolkit classifies vector observations into two types: **Continuous** and **Discrete**. The **Continuous** vector observation space collects observations in a vector of floating point numbers. The **Discrete** vector observation space is an index into a table of states. Most of the example @@ -124,7 +124,7 @@ values are defined in the agent's `CollectObservations()` function.) **Vector Action Space** An agent is given instructions from the brain in the form of *actions*. Like -states, ML-Agents classifies actions into two types: the **Continuous** +states, ML-Agents toolkit classifies actions into two types: the **Continuous** vector action space is a vector of numbers that can vary continuously. What each element of the vector means is defined by the agent logic (the PPO training process just learns what values are better given particular state @@ -193,7 +193,7 @@ In order to train an agent to correctly balance the ball, we will use a Reinforcement Learning algorithm called Proximal Policy Optimization (PPO). This is a method that has been shown to be safe, efficient, and more general purpose than many other RL algorithms, as such we have chosen it as the -example algorithm for use with ML-Agents. For more information on PPO, +example algorithm for use with ML-Agents toolkit. For more information on PPO, OpenAI has a recent [blog post](https://blog.openai.com/openai-baselines-ppo/) explaining it. @@ -217,7 +217,7 @@ When the message _"Start training by pressing the Play button in the Unity Edito **Note**: If you're using Anaconda, don't forget to activate the ml-agents environment first. -The `--train` flag tells ML-Agents to run in training mode. +The `--train` flag tells the ML-Agents toolkit to run in training mode. **Note**: You can train using an executable rather than the Editor. To do so, follow the intructions in [Using an Execuatble](Learning-Environment-Executable.md). @@ -271,7 +271,7 @@ Because TensorFlowSharp support is still experimental, it is disabled by default. In order to enable it, you must follow these steps. Please note that the `Internal` Brain mode will only be available once completing these steps. -To set up the TensorFlowSharp Support, follow [Setting up ML-Agents within Unity](Basic-Guide.md#setting-up-ml-agents-within-unity) section. +To set up the TensorFlowSharp Support, follow [Setting up ML-Agents Toolkit within Unity](Basic-Guide.md#setting-up-ml-agents-within-unity) section. of the Basic Guide page. ### Embedding the trained model into Unity diff --git a/docs/Glossary.md b/docs/Glossary.md index 7057dceb37..45b6bd56a3 100644 --- a/docs/Glossary.md +++ b/docs/Glossary.md @@ -1,4 +1,4 @@ -# ML-Agents Glossary +# ML-Agents Toolkit Glossary * **Academy** - Unity Component which controls timing, reset, and training/inference settings of the environment. diff --git a/docs/Installation-Windows.md b/docs/Installation-Windows.md index 9dd056bb7b..f33e7d465d 100644 --- a/docs/Installation-Windows.md +++ b/docs/Installation-Windows.md @@ -1,12 +1,12 @@ -# Installing ML-Agents for Windows +# Installing ML-Agents Toolkit for Windows -ML-Agents supports Windows 10. While it might be possible to run ML-Agents using other versions of Windows, it has not been tested on other versions. Furthermore, ML-Agents has not been tested on a Windows VM such as Bootcamp or Parallels. +The ML-Agents toolkit supports Windows 10. While it might be possible to run the ML-Agents toolkit using other versions of Windows, it has not been tested on other versions. Furthermore, the ML-Agents toolkit has not been tested on a Windows VM such as Bootcamp or Parallels. -To use ML-Agents, you install Python and the required Python packages as outlined below. This guide also covers how set up GPU-based training (for advanced users). GPU-based training is not required for the v0.4 release of ML-Agents. However, training on a GPU might be required by future versions and features. +To use the ML-Agents toolkit, you install Python and the required Python packages as outlined below. This guide also covers how set up GPU-based training (for advanced users). GPU-based training is not required for the v0.4 release of the ML-Agents toolkit. However, training on a GPU might be required by future versions and features. ## Step 1: Install Python via Anaconda -[Download](https://www.anaconda.com/download/#windows) and install Anaconda for Windows. By using Anaconda, you can manage separate environments for different distributions of Python. Python 3 is required as we no longer support Python 2. In this guide, we are using Python version 3.6 and Anaconda version 5.1 ([64-bit](https://repo.continuum.io/archive/Anaconda3-5.1.0-Windows-x86_64.exe) or [32-bit](https://repo.continuum.io/archive/Anaconda3-5.1.0-Windows-x86.exe) direct links). +[Download](https://www.anaconda.com/download/#windows) and install Anaconda for Windows. By using Anaconda, you can manage separate environments for different distributions of Python. Python 3.5 or 3.6 is required as we no longer support Python 2. In this guide, we are using Python version 3.6 and Anaconda version 5.1 ([64-bit](https://repo.continuum.io/archive/Anaconda3-5.1.0-Windows-x86_64.exe) or [32-bit](https://repo.continuum.io/archive/Anaconda3-5.1.0-Windows-x86.exe) direct links).

-## Clone the ml-agents Repository +## Clone the Ml-Agents Repository -Once installed, you will want to clone the ML-Agents GitHub repository. +Once installed, you will want to clone the ML-Agents Toolkit GitHub repository. git clone https://github.com/Unity-Technologies/ml-agents.git @@ -28,7 +28,7 @@ Both directories are located at the root of the repository. ## Install Python (with Dependencies) -In order to use ML-Agents, you need Python 3 along with +In order to use ML-Agents toolkit, you need Python 3.5 or 3.6 along with the dependencies listed in the [requirements file](../python/requirements.txt). Some of the primary dependencies include: - [TensorFlow](Background-TensorFlow.md) @@ -59,7 +59,7 @@ If you'd like to use Docker for ML-Agents, please follow ## Next Steps The [Basic Guide](Basic-Guide.md) page contains several short -tutorials on setting up ML-Agents within Unity, running a pre-trained model, in +tutorials on setting up the ML-Agents toolkit within Unity, running a pre-trained model, in addition to building and training environments. ## Help diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md index ec63c03c19..3ed4aa7089 100644 --- a/docs/Learning-Environment-Create-New.md +++ b/docs/Learning-Environment-Create-New.md @@ -8,7 +8,7 @@ In this example, we will train a ball to roll to a randomly placed cube. The bal ## Overview -Using ML-Agents in a Unity project involves the following basic steps: +Using the ML-Agents toolkit in a Unity project involves the following basic steps: 1. Create an environment for your agents to live in. An environment can range from a simple physical simulation containing a few objects to an entire game or ecosystem. 2. Implement an Academy subclass and add it to a GameObject in the Unity scene containing the environment. This GameObject will serve as the parent for any Brain objects in the scene. Your Academy class can implement a few optional methods to update the scene independently of any agents. For example, you can add, move, or delete agents and other entities in the environment. diff --git a/docs/Learning-Environment-Design-Agents.md b/docs/Learning-Environment-Design-Agents.md index 26787bacdf..369f1278ea 100644 --- a/docs/Learning-Environment-Design-Agents.md +++ b/docs/Learning-Environment-Design-Agents.md @@ -133,7 +133,7 @@ In addition, make sure that the Agent's Brain expects a visual observation. In t ### Discrete Vector Observation Space: Table Lookup -You can use the discrete vector observation space when an agent only has a limited number of possible states and those states can be enumerated by a single number. For instance, the [Basic example environment](Learning-Environment-Examples.md#basic) in ML-Agents defines an agent with a discrete vector observation space. The states of this agent are the integer steps between two linear goals. In the Basic example, the agent learns to move to the goal that provides the greatest reward. +You can use the discrete vector observation space when an agent only has a limited number of possible states and those states can be enumerated by a single number. For instance, the [Basic example environment](Learning-Environment-Examples.md#basic) in the ML-Agents toolkit defines an agent with a discrete vector observation space. The states of this agent are the integer steps between two linear goals. In the Basic example, the agent learns to move to the goal that provides the greatest reward. More generally, the discrete vector observation identifier could be an index into a table of the possible states. However, tables quickly become unwieldy as the environment becomes more complex. For example, even a simple game like [tic-tac-toe has 765 possible states](https://en.wikipedia.org/wiki/Game_complexity) (far more if you don't reduce the number of observations by combining those that are rotations or reflections of each other). diff --git a/docs/Learning-Environment-Design-Brains.md b/docs/Learning-Environment-Design-Brains.md index 9d3c1f7521..9f0abf9255 100644 --- a/docs/Learning-Environment-Design-Brains.md +++ b/docs/Learning-Environment-Design-Brains.md @@ -2,7 +2,7 @@ The Brain encapsulates the decision making process. Brain objects must be children of the Academy in the Unity scene hierarchy. Every Agent must be assigned a Brain, but you can use the same Brain with more than one Agent. You can also create several Brains, attach each of the Brain to one or more than one Agent. -Use the Brain class directly, rather than a subclass. Brain behavior is determined by the **Brain Type**. ML-Agents defines four Brain Types: +Use the Brain class directly, rather than a subclass. Brain behavior is determined by the **Brain Type**. The ML-Agents toolkit defines four Brain Types: * [External](Learning-Environment-Design-External-Internal-Brains.md) — The **External** and **Internal** types typically work together; set **External** when training your agents. You can also use the **External** brain to communicate with a Python script via the Python `UnityEnvironment` class included in the Python portion of the ML-Agents SDK. * [Internal](Learning-Environment-Design-External-Internal-Brains.md) – Set **Internal** to make use of a trained model. diff --git a/docs/Learning-Environment-Design.md b/docs/Learning-Environment-Design.md index 7a449ae4f8..d45afd0cb4 100644 --- a/docs/Learning-Environment-Design.md +++ b/docs/Learning-Environment-Design.md @@ -2,7 +2,7 @@ Reinforcement learning is an artificial intelligence technique that trains _agents_ to perform tasks by rewarding desirable behavior. During reinforcement learning, an agent explores its environment, observes the state of things, and, based on those observations, takes an action. If the action leads to a better state, the agent receives a positive reward. If it leads to a less desirable state, then the agent receives no reward or a negative reward (punishment). As the agent learns during training, it optimizes its decision making so that it receives the maximum reward over time. -ML-Agents uses a reinforcement learning technique called [Proximal Policy Optimization (PPO)](https://blog.openai.com/openai-baselines-ppo/). PPO uses a neural network to approximate the ideal function that maps an agent's observations to the best action an agent can take in a given state. The ML-Agents PPO algorithm is implemented in TensorFlow and runs in a separate Python process (communicating with the running Unity application over a socket). +The ML-Agents toolkit uses a reinforcement learning technique called [Proximal Policy Optimization (PPO)](https://blog.openai.com/openai-baselines-ppo/). PPO uses a neural network to approximate the ideal function that maps an agent's observations to the best action an agent can take in a given state. The ML-Agents PPO algorithm is implemented in TensorFlow and runs in a separate Python process (communicating with the running Unity application over a socket). **Note:** if you aren't studying machine and reinforcement learning as a subject and just want to train agents to accomplish tasks, you can treat PPO training as a _black box_. There are a few training-related parameters to adjust inside Unity as well as on the Python training side, but you do not need in-depth knowledge of the algorithm itself to successfully create and train agents. Step-by-step procedures for running the training process are provided in the [Training section](Training-ML-Agents.md). @@ -29,7 +29,7 @@ To create a training environment, extend the Academy and Agent classes to implem ## Organizing the Unity Scene -To train and use ML-Agents in a Unity scene, the scene must contain a single Academy subclass along with as many Brain objects and Agent subclasses as you need. Any Brain instances in the scene must be attached to GameObjects that are children of the Academy in the Unity Scene Hierarchy. Agent instances should be attached to the GameObject representing that agent. +To train and use the ML-Agents toolkit in a Unity scene, the scene must contain a single Academy subclass along with as many Brain objects and Agent subclasses as you need. Any Brain instances in the scene must be attached to GameObjects that are children of the Academy in the Unity Scene Hierarchy. Agent instances should be attached to the GameObject representing that agent. ![Scene Hierarchy](images/scene-hierarchy.png) @@ -76,7 +76,7 @@ See [Agents](Learning-Environment-Design-Agents.md) for detailed information abo ## Environments -An _environment_ in ML-Agents can be any scene built in Unity. The Unity scene provides the environment in which agents observe, act, and learn. How you set up the Unity scene to serve as a learning environment really depends on your goal. You may be trying to solve a specific reinforcement learning problem of limited scope, in which case you can use the same scene for both training and for testing trained agents. Or, you may be training agents to operate in a complex game or simulation. In this case, it might be more efficient and practical to create a purpose-built training scene. +An _environment_ in the ML-Agents toolkit can be any scene built in Unity. The Unity scene provides the environment in which agents observe, act, and learn. How you set up the Unity scene to serve as a learning environment really depends on your goal. You may be trying to solve a specific reinforcement learning problem of limited scope, in which case you can use the same scene for both training and for testing trained agents. Or, you may be training agents to operate in a complex game or simulation. In this case, it might be more efficient and practical to create a purpose-built training scene. Both training and testing (or normal game) scenes must contain an Academy object to control the agent decision making process. The Academy defines several properties that can be set differently for a training scene versus a regular scene. The Academy's **Configuration** properties control rendering and time scale. You can set the **Training Configuration** to minimize the time Unity spends rendering graphics in order to speed up training. You may need to adjust the other functional, Academy settings as well. For example, `Max Steps` should be as short as possible for training — just long enough for the agent to accomplish its task, with some extra time for "wandering" while it learns. In regular scenes, you often do not want the Academy to reset the scene at all; if so, `Max Steps` should be set to zero. diff --git a/docs/Learning-Environment-Examples.md b/docs/Learning-Environment-Examples.md index 8769160f2d..dbaedab1c8 100644 --- a/docs/Learning-Environment-Examples.md +++ b/docs/Learning-Environment-Examples.md @@ -1,6 +1,6 @@ # Example Learning Environments -Unity ML-Agents contains an expanding set of example environments which +The Unity ML-Agents toolkit contains an expanding set of example environments which demonstrate various features of the platform. Environments are located in `unity-environment/Assets/ML-Agents/Examples` and summarized below. Additionally, our diff --git a/docs/Limitations.md b/docs/Limitations.md index a2917df598..cc5bb2e113 100644 --- a/docs/Limitations.md +++ b/docs/Limitations.md @@ -16,4 +16,4 @@ Update(), so game behavior implemented in Update() may be out of sync with the A As of version 0.3, we no longer support Python 2. ### Tensorflow support -Currently Ml-Agents uses TensorFlow 1.4 due to the version of the TensorFlowSharp plugin we are using. \ No newline at end of file +Currently the Ml-Agents toolkit uses TensorFlow 1.7.1 due to the version of the TensorFlowSharp plugin we are using. \ No newline at end of file diff --git a/docs/ML-Agents-Overview.md b/docs/ML-Agents-Overview.md index 2f3279451c..1be054a645 100644 --- a/docs/ML-Agents-Overview.md +++ b/docs/ML-Agents-Overview.md @@ -1,6 +1,6 @@ -# ML-Agents Overview +# ML-Agents Toolkit Overview -**Unity Machine Learning Agents** (ML-Agents) is an open-source Unity plugin +**The Unity Machine Learning Agents Toolkit** (ML-Agents Toolkit) is an open-source Unity plugin that enables games and simulations to serve as environments for training intelligent agents. Agents can be trained using reinforcement learning, imitation learning, neuroevolution, or other machine learning methods through @@ -10,14 +10,14 @@ and hobbyists to easily train intelligent agents for 2D, 3D and VR/AR games. These trained agents can be used for multiple purposes, including controlling NPC behavior (in a variety of settings such as multi-agent and adversarial), automated testing of game builds and evaluating different game -design decisions pre-release. ML-Agents is mutually beneficial for both game +design decisions pre-release. The ML-Agents toolkit is mutually beneficial for both game developers and AI researchers as it provides a central platform where advances in AI can be evaluated on Unity’s rich environments and then made accessible to the wider research and game developer communities. Depending on your background (i.e. researcher, game developer, hobbyist), you may have very different questions on your mind at the moment. -To make your transition to ML-Agents easier, we provide several background +To make your transition to the ML-Agents toolkit easier, we provide several background pages that include overviews and helpful resources on the [Unity Engine](Background-Unity.md), [machine learning](Background-Machine-Learning.md) and @@ -27,7 +27,7 @@ basic machine learning concepts or have not previously heard of TensorFlow. The remainder of this page contains a deep dive into ML-Agents, its key components, different training modes and scenarios. By the end of it, you -should have a good sense of _what_ ML-Agents allows you to do. The subsequent +should have a good sense of _what_ the ML-Agents toolkit allows you to do. The subsequent documentation pages provide examples of _how_ to use ML-Agents. ## Running Example: Training NPC Behaviors @@ -109,14 +109,14 @@ the process of learning a policy through running simulations is called the **training phase**, while playing the game with an NPC that is using its learned policy is called the **inference phase**. -ML-Agents provides all the necessary tools for using Unity as the simulation +The ML-Agents toolkit provides all the necessary tools for using Unity as the simulation engine for learning the policies of different objects in a Unity environment. -In the next few sections, we discuss how ML-Agents achieves this and what +In the next few sections, we discuss how the ML-Agents toolkit achieves this and what features it provides. ## Key Components -ML-Agents is a Unity plugin that contains three high-level components: +The ML-Agents toolkit is a Unity plugin that contains three high-level components: * **Learning Environment** - which contains the Unity scene and all the game characters. * **Python API** - which contains all the machine learning algorithms that are @@ -172,9 +172,9 @@ have different actions). border="10" />

-_Example block diagram of ML-Agents for our sample game._ +_Example block diagram of ML-Agents toolkit for our sample game._ -We have yet to discuss how ML-Agents trains behaviors, and what role the +We have yet to discuss how the ML-Agents toolkit trains behaviors, and what role the Python API and External Communicator play. Before we dive into those details, let's summarize the earlier components. Each character is attached to an Agent, and each Agent is linked to a Brain. The Brain receives observations and @@ -226,7 +226,7 @@ inference can proceed. ### Built-in Training and Inference -As mentioned previously, ML-Agents ships with several implementations of +As mentioned previously, the ML-Agents toolkit ships with several implementations of state-of-the-art algorithms for training intelligent agents. In this mode, the Brain type is set to External during training and Internal during inference. More specifically, during training, all the medics in the scene send their @@ -260,7 +260,7 @@ tutorial covers this training mode with the **3D Balance Ball** sample environme In the previous mode, the External Brain type was used for training to generate a TensorFlow model that the Internal Brain type can understand -and use. However, any user of ML-Agents can leverage their own algorithms +and use. However, any user of the ML-Agents toolkit can leverage their own algorithms for both training and inference. In this case, the Brain type would be set to External for both training and inferences phases and the behaviors of all the Agents in the scene will be controlled within Python. @@ -303,7 +303,7 @@ we allow the agent to quickly update the random policy to a more meaningful one that is successively improved as the environment gradually increases in complexity. In our example, we can imagine first training the medic when each team only contains one player, and then iteratively increasing the number of -players (i.e. the environment complexity). ML-Agents supports setting +players (i.e. the environment complexity). The ML-Agents toolkit supports setting custom environment parameters within the Academy. This allows elements of the environment related to difficulty or complexity to be dynamically adjusted based on training progress. @@ -369,11 +369,11 @@ giraffes, or an autonomous driving simulation within an urban environment. ## Additional Features -Beyond the flexible training scenarios available, ML-Agents includes +Beyond the flexible training scenarios available, the ML-Agents toolkit includes additional features which improve the flexibility and interpretability of the training process. -* **On Demand Decision Making** - With ML-Agents it is possible to have agents +* **On Demand Decision Making** - With the ML-Agents toolkit it is possible to have agents request decisions only when needed as opposed to requesting decisions at every step of the environment. This enables training of turn based games, games where agents @@ -403,7 +403,7 @@ agent’s behavior. You can learn more about using the Monitor class [here](Feature-Monitor.md). * **Complex Visual Observations** - Unlike other platforms, where the agent’s -observation might be limited to a single vector or image, ML-Agents allows +observation might be limited to a single vector or image, the ML-Agents toolkit allows multiple cameras to be used for observations per agent. This enables agents to learn to integrate information from multiple visual streams. This can be helpful in several scenarios such as training a self-driving car which requires @@ -430,20 +430,20 @@ without installing Python or TensorFlow directly, we provide a [guide](Using-Docker.md) on how to create and run a Docker container. -* **Cloud Training on AWS** - To facilitate using ML-Agents on +* **Cloud Training on AWS** - To facilitate using the ML-Agents toolkit on Amazon Web Services (AWS) machines, we provide a [guide](Training-on-Amazon-Web-Service.md) on how to set-up EC2 instances in addition to a public pre-configured Amazon Machine Image (AMI). -* **Cloud Training on Microsoft Azure** - To facilitate using ML-Agents on +* **Cloud Training on Microsoft Azure** - To facilitate using the ML-Agents toolkit on Azure machines, we provide a [guide](Training-on-Microsoft-Azure.md) on how to set-up virtual machine instances in addition to a pre-configured data science image. ## Summary and Next Steps -To briefly summarize: ML-Agents enables games and simulations built in Unity +To briefly summarize: The ML-Agents toolkit enables games and simulations built in Unity to serve as the platform for training intelligent agents. It is designed to enable a large variety of training modes and scenarios and comes packed with several features to enable researchers and developers to leverage diff --git a/docs/Migrating.md b/docs/Migrating.md index 2129a78af4..cb0dbfe088 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -1,14 +1,17 @@ -# Migrating from ML-Agents v0.3 to ML-Agents v0.4 +# Migrating from ML-Agents toolkit v0.3 to v0.4 ## Unity API * `using MLAgents;` needs to be added in all of the C# scripts that use ML-Agents. -# Migrating from ML-Agents v0.2 to ML-Agents v0.3 +## Python API + * We've changed some of the python packages dependencies in requirement.txt file. Make sure to run `pip install .` within your `ml-agents/python` folder to update your python packages. -There are a large number of new features and improvements in ML-Agents v0.3 which change both the training process and Unity API in ways which will cause incompatibilities with environments made using older versions. This page is designed to highlight those changes for users familiar with v0.1 or v0.2 in order to ensure a smooth transition. +# Migrating from ML-Agents toolkit v0.2 to v0.3 + +There are a large number of new features and improvements in the ML-Agents toolkit v0.3 which change both the training process and Unity API in ways which will cause incompatibilities with environments made using older versions. This page is designed to highlight those changes for users familiar with v0.1 or v0.2 in order to ensure a smooth transition. ## Important - * ML-Agents is no longer compatible with Python 2. + * The ML-Agents toolkit is no longer compatible with Python 2. ## Python Training * The training script `ppo.py` and `PPO.ipynb` Python notebook have been replaced with a single `learn.py` script as the launching point for training with ML-Agents. For more information on using `learn.py`, see [here](). diff --git a/docs/Python-API.md b/docs/Python-API.md index 84f7a769bd..d3e747997c 100644 --- a/docs/Python-API.md +++ b/docs/Python-API.md @@ -1,6 +1,6 @@ # Python API -ML-Agents provides a Python API for controlling the agent simulation loop of a environment or game built with Unity. This API is used by the ML-Agent training algorithms (run with `learn.py`), but you can also write your Python programs using this API. +The ML-Agents toolkit provides a Python API for controlling the agent simulation loop of a environment or game built with Unity. This API is used by the ML-Agent training algorithms (run with `learn.py`), but you can also write your Python programs using this API. The key objects in the Python API include: diff --git a/docs/Readme.md b/docs/Readme.md index e28360297c..29c26cb456 100644 --- a/docs/Readme.md +++ b/docs/Readme.md @@ -1,4 +1,4 @@ -# Unity ML-Agents Documentation +# Unity ML-Agents Toolkit Documentation ## Installation & Set-up * [Installation](Installation.md) @@ -7,7 +7,7 @@ * [Basic Guide](Basic-Guide.md) ## Getting Started - * [ML-Agents Overview](ML-Agents-Overview.md) + * [ML-Agents Toolkit Overview](ML-Agents-Overview.md) * [Background: Unity](Background-Unity.md) * [Background: Machine Learning](Background-Machine-Learning.md) * [Background: TensorFlow](Background-TensorFlow.md) diff --git a/docs/Training-Curriculum-Learning.md b/docs/Training-Curriculum-Learning.md index e51162fe06..2673a56561 100644 --- a/docs/Training-Curriculum-Learning.md +++ b/docs/Training-Curriculum-Learning.md @@ -10,7 +10,7 @@ task, such as moving toward an unobstructed goal, then the agent can easily lear accomplish the task. From there, we can slowly add to the difficulty of the task by increasing the size of the wall, until the agent can complete the initially near-impossible task of scaling the wall. We are including just such an environment with -ML-Agents 0.2, called Wall Jump. +the ML-Agents toolkit 0.2, called Wall Jump. ![Wall](images/curriculum.png) @@ -19,8 +19,8 @@ obstructs the path to the goal._ To see this in action, observe the two learning curves below. Each displays the reward over time for an agent trained using PPO with the same set of training hyperparameters. -The difference is that the agent on the left was trained using the full-height wall -version of the task, and the right agent was trained using the curriculum version of +The difference is that one agent was trained using the full-height wall +version of the task, and the other agent was trained using the curriculum version of the task. As you can see, without using curriculum learning the agent has a lot of difficulty. We think that by using well-crafted curricula, agents trained using reinforcement learning will be able to accomplish tasks otherwise much more difficult. diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 2d81ce3b41..7e1e4be075 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -1,12 +1,12 @@ # Training ML-Agents -ML-Agents conducts training using an external Python training process. During training, this external process communicates with the Academy object in the Unity scene to generate a block of agent experiences. These experiences become the training set for a neural network used to optimize the agent's policy (which is essentially a mathematical function mapping observations to actions). In reinforcement learning, the neural network optimizes the policy by maximizing the expected rewards. In imitation learning, the neural network optimizes the policy to achieve the smallest difference between the actions chosen by the agent trainee and the actions chosen by the expert in the same situation. +The ML-Agents toolkit conducts training using an external Python training process. During training, this external process communicates with the Academy object in the Unity scene to generate a block of agent experiences. These experiences become the training set for a neural network used to optimize the agent's policy (which is essentially a mathematical function mapping observations to actions). In reinforcement learning, the neural network optimizes the policy by maximizing the expected rewards. In imitation learning, the neural network optimizes the policy to achieve the smallest difference between the actions chosen by the agent trainee and the actions chosen by the expert in the same situation. The output of the training process is a model file containing the optimized policy. This model file is a TensorFlow data graph containing the mathematical operations and the optimized weights selected during the training process. You can use the generated model file with the Internal Brain type in your Unity project to decide the best course of action for an agent. Use the Python program, `learn.py` to train your agents. This program can be found in the `python` directory of the ML-Agents SDK. The [configuration file](#training-config-file), `trainer_config.yaml` specifies the hyperparameters used during training. You can edit this file with a text editor to add a specific configuration for each brain. -For a broader overview of reinforcement learning, imitation learning and the ML-Agents training process, see [ML-Agents Overview](ML-Agents-Overview.md). +For a broader overview of reinforcement learning, imitation learning and the ML-Agents training process, see [ML-Agents Toolkit Overview](ML-Agents-Overview.md). ## Training with learn.py @@ -68,7 +68,7 @@ The training config file, `trainer_config.yaml` specifies the training method, t | batches_per_epoch | In imitation learning, the number of batches of training examples to collect before training the model.| BC | | beta | The strength of entropy regularization.| PPO, BC | | brain\_to\_imitate | For imitation learning, the name of the GameObject containing the Brain component to imitate. | BC | -| buffer_size | The number of experiences to collect before updating the policy model. | PPO, BC | +| buffer_size | The number of experiences to collect before updating the policy model. | PPO | | curiosity\_enc\_size | The size of the encoding to use in the forward and inverse models in the Curioity module. | PPO | | curiosity_strength | Magnitude of intrinsic reward generated by Intrinsic Curiosity Module. | PPO | | epsilon | Influences how rapidly the policy can evolve during training.| PPO, BC | diff --git a/docs/Training-on-Amazon-Web-Service.md b/docs/Training-on-Amazon-Web-Service.md index eeac39c5ce..b61036098a 100644 --- a/docs/Training-on-Amazon-Web-Service.md +++ b/docs/Training-on-Amazon-Web-Service.md @@ -4,7 +4,7 @@ This page contains instructions for setting up an EC2 instance on Amazon Web Ser ## Preconfigured AMI -We've prepared an preconfigured AMI for you with the ID: `ami-6880c317` in the `us-east-1` region. It was created as a modification of [Deep Learning AMI (Ubuntu)](https://aws.amazon.com/marketplace/pp/B077GCH38C). If you want to do training without the headless mode, you need to enable X Server on it. After launching your EC2 instance using the ami and ssh into it, run the following commands to enable it: +We've prepared an preconfigured AMI for you with the ID: `ami-18642967` in the `us-east-1` region. It was created as a modification of [Deep Learning AMI (Ubuntu)](https://aws.amazon.com/marketplace/pp/B077GCH38C). If you want to do training without the headless mode, you need to enable X Server on it. After launching your EC2 instance using the ami and ssh into it, run the following commands to enable it: ``` //Start the X Server, press Enter to come to the command line @@ -37,11 +37,11 @@ nvidia-smi export DISPLAY=:0 ``` -## Configuring your own Instance +## Configuring your own instance You could also choose to configure your own instance. To begin with, you will need an EC2 instance which contains the latest Nvidia drivers, CUDA9, and cuDNN. In this tutorial we used the [Deep Learning AMI (Ubuntu)](https://aws.amazon.com/marketplace/pp/B077GCH38C) listed under AWS Marketplace with a p2.xlarge instance. -### Installing ML-Agents on the instance +### Installing the ML-Agents toolkit on the instance After launching your EC2 instance using the ami and ssh into it: diff --git a/docs/Training-on-Microsoft-Azure-Custom-Instance.md b/docs/Training-on-Microsoft-Azure-Custom-Instance.md index fd2423b018..7e0764c6a7 100644 --- a/docs/Training-on-Microsoft-Azure-Custom-Instance.md +++ b/docs/Training-on-Microsoft-Azure-Custom-Instance.md @@ -1,4 +1,4 @@ -# Setting up a Custom Instance on Microsoft Azure for Training +# Setting up a Custom Instance on Microsoft Azure for Training (works with the ML-Agents toolkit v0.3) This page contains instructions for setting up a custom Virtual Machine on Microsoft Azure so you can running ML-Agents training in the cloud. diff --git a/docs/Training-on-Microsoft-Azure.md b/docs/Training-on-Microsoft-Azure.md index 3a7f6886a9..fdd93ac23a 100644 --- a/docs/Training-on-Microsoft-Azure.md +++ b/docs/Training-on-Microsoft-Azure.md @@ -1,4 +1,4 @@ -# Training on Microsoft Azure +# Training on Microsoft Azure (works with ML-Agents toolkit v0.3) This page contains instructions for setting up training on Microsoft Azure through either [Azure Container Instances](https://azure.microsoft.com/services/container-instances/) or Virtual Machines. Non "headless" training has not yet been tested to verify support. @@ -78,4 +78,4 @@ Once you have started training, you can [use Tensorboard to observe the training ## Running on Azure Container Instances -[Azure Container Instances](https://azure.microsoft.com/services/container-instances/) allow you to spin up a container, on demand, that will run your training and then be shut down. This ensures you aren't leaving a billable VM running when it isn't needed. You can read more about [ML-Agents support for Docker containers here](Using-Docker.md). Using ACI enables you to offload training of your models without needing to install Python and Tensorflow on your own computer. You can find [instructions, including a pre-deployed image in DockerHub for you to use, available here](https://github.com/druttka/unity-ml-on-azure). \ No newline at end of file +[Azure Container Instances](https://azure.microsoft.com/services/container-instances/) allow you to spin up a container, on demand, that will run your training and then be shut down. This ensures you aren't leaving a billable VM running when it isn't needed. You can read more about [The ML-Agents toolkit support for Docker containers here](Using-Docker.md). Using ACI enables you to offload training of your models without needing to install Python and Tensorflow on your own computer. You can find [instructions, including a pre-deployed image in DockerHub for you to use, available here](https://github.com/druttka/unity-ml-on-azure). \ No newline at end of file diff --git a/docs/Using-TensorFlow-Sharp-in-Unity.md b/docs/Using-TensorFlow-Sharp-in-Unity.md index 6bccf3ec0d..6652c2c979 100644 --- a/docs/Using-TensorFlow-Sharp-in-Unity.md +++ b/docs/Using-TensorFlow-Sharp-in-Unity.md @@ -1,6 +1,6 @@ # Using TensorFlowSharp in Unity (Experimental) -ML-Agents allows you to use pre-trained [TensorFlow graphs](https://www.tensorflow.org/programmers_guide/graphs) inside your Unity games. This support is possible thanks to [the TensorFlowSharp project](https://github.com/migueldeicaza/TensorFlowSharp). The primary purpose for this support is to use the TensorFlow models produced by the ML-Agents own training programs, but a side benefit is that you can use any TensorFlow model. +The ML-Agents toolkit allows you to use pre-trained [TensorFlow graphs](https://www.tensorflow.org/programmers_guide/graphs) inside your Unity games. This support is possible thanks to [the TensorFlowSharp project](https://github.com/migueldeicaza/TensorFlowSharp). The primary purpose for this support is to use the TensorFlow models produced by the ML-Agents toolkit's own training programs, but a side benefit is that you can use any TensorFlow model. _Notice: This feature is still experimental. While it is possible to embed trained models into Unity games, Unity Technologies does not officially support this use-case for production games at this time. As such, no guarantees are provided regarding the quality of experience. If you encounter issues regarding battery life, or general performance (especially on mobile), please let us know._ diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md index 28d0b4e9b2..013efc97ee 100644 --- a/docs/Using-Tensorboard.md +++ b/docs/Using-Tensorboard.md @@ -1,6 +1,6 @@ # Using TensorBoard to Observe Training -ML-Agents saves statistics during learning session that you can view with a TensorFlow utility named, [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard). +The ML-Agents toolkit saves statistics during learning session that you can view with a TensorFlow utility named, [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard). The `learn.py` program saves training statistics to a folder named `summaries`, organized by the `run-id` value you assign to a training session. @@ -21,7 +21,7 @@ On the left side of the TensorBoard window, you can select which of the training When you run the training program, `learn.py`, you can use the `--save-freq` option to specify how frequently to save the statistics. -## ML-Agents training statistics +## The ML-Agents toolkit training statistics The ML-agents training program saves the following statistics: diff --git a/docs/dox-ml-agents.conf b/docs/dox-ml-agents.conf index e10bceed8e..9b8d8be418 100644 --- a/docs/dox-ml-agents.conf +++ b/docs/dox-ml-agents.conf @@ -25,7 +25,7 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "ML-Agents" +PROJECT_NAME = "ML-Agents Toolkit" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version diff --git a/docs/images/crawler.png b/docs/images/crawler.png index 7b5a5e9f99..3b5c46050a 100644 Binary files a/docs/images/crawler.png and b/docs/images/crawler.png differ diff --git a/docs/images/pyramids.png b/docs/images/pyramids.png index 6378d92eff..9d26a7d8cc 100644 Binary files a/docs/images/pyramids.png and b/docs/images/pyramids.png differ diff --git a/docs/images/walker.png b/docs/images/walker.png index 4ba36878f5..af901fa943 100644 Binary files a/docs/images/walker.png and b/docs/images/walker.png differ diff --git a/docs/localized/zh-CN/README.md b/docs/localized/zh-CN/README.md index 20304d1003..fc57ff2fd5 100755 --- a/docs/localized/zh-CN/README.md +++ b/docs/localized/zh-CN/README.md @@ -1,6 +1,6 @@ -# Unity ML-Agents (Beta) +# Unity ML-Agents 工具包(Beta) **注意:** 本文档为v0.3版本文档的部分翻译版,目前并不会随着英文版文档更新而更新。若要查看更新更全的英文版文档,请查看[这里](https://github.com/Unity-Technologies/ml-agents)。 diff --git a/docs/localized/zh-CN/docs/Learning-Environment-Create-New.md b/docs/localized/zh-CN/docs/Learning-Environment-Create-New.md index 892d5f32ea..541bc5485d 100755 --- a/docs/localized/zh-CN/docs/Learning-Environment-Create-New.md +++ b/docs/localized/zh-CN/docs/Learning-Environment-Create-New.md @@ -420,7 +420,7 @@ public override void AgentAction(float[] vectorAction, string textAction) 本节简要回顾了在 Unity 环境中使用 Agent 时 如何组织场景。 -您需要在场景中包含三种游戏对象才能使用 Unity ML-Agents: +您需要在场景中包含三种游戏对象才能使用 Unity ML-Agents 工具包: * Academy * Brain * Agent diff --git a/docs/localized/zh-CN/docs/Learning-Environment-Examples.md b/docs/localized/zh-CN/docs/Learning-Environment-Examples.md index ef43ca4860..718f491780 100644 --- a/docs/localized/zh-CN/docs/Learning-Environment-Examples.md +++ b/docs/localized/zh-CN/docs/Learning-Environment-Examples.md @@ -1,6 +1,6 @@ # 学习环境示例 -Unity ML-Agents 中内置了一些搭建好的学习环境的示例,并且我们还在不断增加新的示例,这些示例演示了该平台的各种功能。示例环境位于 +Unity ML-Agents 工具包中内置了一些搭建好的学习环境的示例,并且我们还在不断增加新的示例,这些示例演示了该平台的各种功能。示例环境位于 `unity-environment/Assets/ML-Agents/Examples` 中,并且我们在下文中进行了简单的介绍。 此外,我们的 [首届 ML-Agents 挑战赛](https://connect.unity.com/challenges/ml-agents-1) diff --git a/docs/localized/zh-CN/docs/Readme.md b/docs/localized/zh-CN/docs/Readme.md index 5770047ef4..81f9b1679b 100644 --- a/docs/localized/zh-CN/docs/Readme.md +++ b/docs/localized/zh-CN/docs/Readme.md @@ -1,4 +1,4 @@ -# Unity ML-Agents 文档 +# Unity ML-Agents 工具包文档 ## 入门 * [ML-Agents 概述*](ML-Agents-Overview.md) diff --git a/python/Basics.ipynb b/python/Basics.ipynb index f0f9cb0cf6..dbae6a3a8d 100755 --- a/python/Basics.ipynb +++ b/python/Basics.ipynb @@ -4,9 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Unity ML-Agents\n", + "# Unity ML-Agents Toolkit\n", "## Environment Basics\n", - "This notebook contains a walkthrough of the basic functions of the Python API for Unity ML-Agents. For instructions on building a Unity environment, see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Getting-Started-with-Balance-Ball.md)." + "This notebook contains a walkthrough of the basic functions of the Python API for the Unity ML-Agents toolkit. For instructions on building a Unity environment, see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Getting-Started-with-Balance-Ball.md)." ] }, { @@ -36,7 +36,7 @@ "source": [ "### 2. Load dependencies\n", "\n", - "The following loads the necessary dependencies and checks the Python version (at runtime). ML-Agents (v0.3 onwards) requires Python 3." + "The following loads the necessary dependencies and checks the Python version (at runtime). ML-Agents Toolkit (v0.3 onwards) requires Python 3." ] }, { @@ -60,7 +60,7 @@ "\n", "# check Python version\n", "if (sys.version_info[0] < 3):\n", - " raise Exception(\"ERROR: ML-Agents (v0.3 onwards) requires Python 3\")" + " raise Exception(\"ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3\")" ] }, { diff --git a/python/learn.py b/python/learn.py index bea0e0bf29..36036fcfd3 100755 --- a/python/learn.py +++ b/python/learn.py @@ -1,4 +1,4 @@ -# # Unity ML Agents +# # Unity ML-Agents Toolkit # ## ML-Agent Learning import logging diff --git a/python/setup.py b/python/setup.py index bb4e33d848..c69444fbd4 100755 --- a/python/setup.py +++ b/python/setup.py @@ -7,7 +7,7 @@ required = f.read().splitlines() setup(name='unityagents', - version='0.3.0', + version='0.4.0', description='Unity Machine Learning Agents', license='Apache License 2.0', author='Unity Technologies', diff --git a/python/trainer_config.yaml b/python/trainer_config.yaml index dd76dbf9a4..9f750e755f 100644 --- a/python/trainer_config.yaml +++ b/python/trainer_config.yaml @@ -114,10 +114,11 @@ VisualPyramidBrain: Ball3DBrain: normalize: true - batch_size: 1200 + batch_size: 64 buffer_size: 12000 summary_freq: 1000 time_horizon: 1000 + lambd: 0.99 gamma: 0.995 beta: 0.001 diff --git a/python/unityagents/brain.py b/python/unityagents/brain.py index 2188291018..d2b16d0fcb 100755 --- a/python/unityagents/brain.py +++ b/python/unityagents/brain.py @@ -3,8 +3,8 @@ class BrainInfo: def __init__(self, visual_observation, vector_observation, text_observations, memory=None, - reward=None, agents=None, local_done=None, - vector_action=None, text_action=None, max_reached=None): + reward=None, agents=None, local_done=None, + vector_action=None, text_action=None, max_reached=None): """ Describes experience at current step of all agents linked to a brain. """ @@ -49,10 +49,10 @@ def __str__(self): Vector Action space type: {5} Vector Action space size (per agent): {6} Vector Action descriptions: {7}'''.format(self.brain_name, - str(self.number_visual_observations), - self.vector_observation_space_type, - str(self.vector_observation_space_size), - str(self.num_stacked_vector_observations), - self.vector_action_space_type, - str(self.vector_action_space_size), - ', '.join(self.vector_action_descriptions)) + str(self.number_visual_observations), + self.vector_observation_space_type, + str(self.vector_observation_space_size), + str(self.num_stacked_vector_observations), + self.vector_action_space_type, + str(self.vector_action_space_size), + ', '.join(self.vector_action_descriptions)) diff --git a/python/unitytrainers/bc/trainer.py b/python/unitytrainers/bc/trainer.py index 72e2a04616..63b7fcafbc 100755 --- a/python/unitytrainers/bc/trainer.py +++ b/python/unitytrainers/bc/trainer.py @@ -1,4 +1,4 @@ -# # Unity ML Agents +# # Unity ML-Agents Toolkit # ## ML-Agent Learning (Imitation) # Contains an implementation of Behavioral Cloning Algorithm @@ -55,10 +55,10 @@ def __init__(self, sess, env, brain_name, trainer_parameters, training, seed): self.training_buffer = Buffer() self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous") self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous") - self.use_observations = (env.brains[brain_name].number_visual_observations > 0) - if self.use_observations: + self.use_visual_observations = (env.brains[brain_name].number_visual_observations > 0) + if self.use_visual_observations: logger.info('Cannot use observations with imitation learning') - self.use_states = (env.brains[brain_name].vector_observation_space_size > 0) + self.use_vector_observations = (env.brains[brain_name].vector_observation_space_size > 0) self.summary_path = trainer_parameters['summary_path'] if not os.path.exists(self.summary_path): os.makedirs(self.summary_path) @@ -144,16 +144,15 @@ def take_action(self, all_brain_info: AllBrainInfo): agent_brain = all_brain_info[self.brain_name] feed_dict = {self.model.dropout_rate: 1.0, self.model.sequence_length: 1} - if self.use_observations: + if self.use_visual_observations: for i, _ in enumerate(agent_brain.visual_observations): feed_dict[self.model.visual_in[i]] = agent_brain.visual_observations[i] - if self.use_states: + if self.use_vector_observations: feed_dict[self.model.vector_in] = agent_brain.vector_observations if self.use_recurrent: if agent_brain.memories.shape[1] == 0: agent_brain.memories = np.zeros((len(agent_brain.agents), self.m_size)) feed_dict[self.model.memory_in] = agent_brain.memories - if self.use_recurrent: agent_action, memories = self.sess.run(self.inference_run_list, feed_dict) return agent_action, memories, None, None else: @@ -192,11 +191,11 @@ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take info_teacher_record, next_info_teacher_record = "true", "true" if info_teacher_record == "true" and next_info_teacher_record == "true": if not stored_info_teacher.local_done[idx]: - if self.use_observations: + if self.use_visual_observations: for i, _ in enumerate(stored_info_teacher.visual_observations): self.training_buffer[agent_id]['visual_observations%d' % i]\ .append(stored_info_teacher.visual_observations[i][idx]) - if self.use_states: + if self.use_vector_observations: self.training_buffer[agent_id]['vector_observations']\ .append(stored_info_teacher.vector_observations[idx]) if self.use_recurrent: @@ -276,7 +275,6 @@ def update_model(self): """ Uses training_buffer to update model. """ - self.training_buffer.update_buffer.shuffle() batch_losses = [] for j in range( @@ -284,33 +282,31 @@ def update_model(self): _buffer = self.training_buffer.update_buffer start = j * self.n_sequences end = (j + 1) * self.n_sequences - batch_states = np.array(_buffer['vector_observations'][start:end]) - batch_actions = np.array(_buffer['actions'][start:end]) feed_dict = {self.model.dropout_rate: 0.5, self.model.batch_size: self.n_sequences, self.model.sequence_length: self.sequence_length} if self.is_continuous_action: - feed_dict[self.model.true_action] = batch_actions.reshape([-1, self.brain.vector_action_space_size]) - else: - feed_dict[self.model.true_action] = batch_actions.reshape([-1]) - if not self.is_continuous_observation: - feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.num_stacked_vector_observations]) + feed_dict[self.model.true_action] = np.array(_buffer['actions'][start:end]).\ + reshape([-1, self.brain.vector_action_space_size]) else: - feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size * - self.brain.num_stacked_vector_observations]) - if self.use_observations: + feed_dict[self.model.true_action] = np.array(_buffer['actions'][start:end]).reshape([-1]) + if self.use_vector_observations: + if not self.is_continuous_observation: + feed_dict[self.model.vector_in] = np.array(_buffer['vector_observations'][start:end])\ + .reshape([-1, self.brain.num_stacked_vector_observations]) + else: + feed_dict[self.model.vector_in] = np.array(_buffer['vector_observations'][start:end])\ + .reshape([-1, self.brain.vector_observation_space_size * self.brain.num_stacked_vector_observations]) + if self.use_visual_observations: for i, _ in enumerate(self.model.visual_in): _obs = np.array(_buffer['visual_observations%d' % i][start:end]) - (_batch, _seq, _w, _h, _c) = _obs.shape - feed_dict[self.model.visual_in[i]] = _obs.reshape([-1, _w, _h, _c]) + feed_dict[self.model.visual_in[i]] = _obs if self.use_recurrent: feed_dict[self.model.memory_in] = np.zeros([self.n_sequences, self.m_size]) - loss, _ = self.sess.run([self.model.loss, self.model.update], feed_dict=feed_dict) batch_losses.append(loss) if len(batch_losses) > 0: self.stats['losses'].append(np.mean(batch_losses)) else: self.stats['losses'].append(0) - diff --git a/python/unitytrainers/models.py b/python/unitytrainers/models.py index 83574a93c2..1654dfbba3 100755 --- a/python/unitytrainers/models.py +++ b/python/unitytrainers/models.py @@ -168,10 +168,7 @@ def create_observation_streams(self, num_streams, h_size, num_layers): :return: List of encoded streams. """ brain = self.brain - if brain.vector_action_space_type == "continuous": - activation_fn = tf.nn.tanh - else: - activation_fn = self.swish + activation_fn = self.swish self.visual_in = [] for i in range(brain.number_visual_observations): diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index 482d40180a..92c36d5e29 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -1,4 +1,4 @@ -# # Unity ML Agents +# # Unity ML-Agents Toolkit # ## ML-Agent Learning (PPO) # Contains an implementation of PPO as described (https://arxiv.org/abs/1707.06347). @@ -8,7 +8,7 @@ import numpy as np import tensorflow as tf -from unityagents import AllBrainInfo +from unityagents import AllBrainInfo, BrainInfo from unitytrainers.buffer import Buffer from unitytrainers.ppo.models import PPOModel from unitytrainers.trainer import UnityTrainerException, Trainer @@ -196,22 +196,61 @@ def take_action(self, all_brain_info: AllBrainInfo): else: return run_out[self.model.output], None, None, run_out + def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: + """ + Constructs a BrainInfo which contains the most recent previous experiences for all agents info + which correspond to the agents in a provided next_info. + :BrainInfo next_info: A t+1 BrainInfo. + :return: curr_info: Reconstructed BrainInfo to match agents of next_info. + """ + visual_observations = [[]] + vector_observations = [] + text_observations = [] + memories = [] + rewards = [] + local_dones = [] + max_reacheds = [] + agents = [] + prev_vector_actions = [] + prev_text_actions = [] + for agent_id in next_info.agents: + agent_brain_info = self.training_buffer[agent_id].last_brain_info + agent_index = agent_brain_info.agents.index(agent_id) + if agent_brain_info is None: + agent_brain_info = next_info + for i in range(len(next_info.visual_observations)): + visual_observations[i].append(agent_brain_info.visual_observations[i][agent_index]) + vector_observations.append(agent_brain_info.vector_observations[agent_index]) + text_observations.append(agent_brain_info.text_observations[agent_index]) + if self.use_recurrent: + memories.append(agent_brain_info.memories[agent_index]) + rewards.append(agent_brain_info.rewards[agent_index]) + local_dones.append(agent_brain_info.local_done[agent_index]) + max_reacheds.append(agent_brain_info.max_reached[agent_index]) + agents.append(agent_brain_info.agents[agent_index]) + prev_vector_actions.append(agent_brain_info.previous_vector_actions[agent_index]) + prev_text_actions.append(agent_brain_info.previous_text_actions[agent_index]) + curr_info = BrainInfo(visual_observations, vector_observations, text_observations, memories, rewards, + agents, local_dones, prev_vector_actions, prev_text_actions, max_reacheds) + return curr_info + def generate_intrinsic_rewards(self, curr_info, next_info): """ Generates intrinsic reward used for Curiosity-based training. - :param curr_info: Current BrainInfo. - :param next_info: Next BrainInfo. + :BrainInfo curr_info: Current BrainInfo. + :BrainInfo next_info: Next BrainInfo. :return: Intrinsic rewards for all agents. """ if self.use_curiosity: - if curr_info.agents != next_info.agents: - raise UnityTrainerException("Training with Curiosity-driven exploration" - " and On-Demand Decision making is currently not supported.") - feed_dict = {self.model.batch_size: len(curr_info.vector_observations), self.model.sequence_length: 1} + feed_dict = {self.model.batch_size: len(next_info.vector_observations), self.model.sequence_length: 1} if self.is_continuous_action: feed_dict[self.model.output] = next_info.previous_vector_actions else: feed_dict[self.model.action_holder] = next_info.previous_vector_actions.flatten() + + if curr_info.agents != next_info.agents: + curr_info = self.construct_curr_info(next_info) + if self.use_visual_obs: for i in range(len(curr_info.visual_observations)): feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i] @@ -262,12 +301,12 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn curr_info = curr_all_info[self.brain_name] next_info = next_all_info[self.brain_name] - intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info) - for agent_id in curr_info.agents: self.training_buffer[agent_id].last_brain_info = curr_info self.training_buffer[agent_id].last_take_action_outputs = take_action_outputs + intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info) + for agent_id in next_info.agents: stored_info = self.training_buffer[agent_id].last_brain_info stored_take_action_outputs = self.training_buffer[agent_id].last_take_action_outputs diff --git a/python/unitytrainers/trainer.py b/python/unitytrainers/trainer.py index 917241e9a2..aa5b3033f5 100755 --- a/python/unitytrainers/trainer.py +++ b/python/unitytrainers/trainer.py @@ -1,4 +1,4 @@ -# # Unity ML Agents +# # Unity ML-Agents Toolkit import logging import tensorflow as tf diff --git a/python/unitytrainers/trainer_controller.py b/python/unitytrainers/trainer_controller.py index 27905cd91b..ff067eb19a 100644 --- a/python/unitytrainers/trainer_controller.py +++ b/python/unitytrainers/trainer_controller.py @@ -1,4 +1,4 @@ -# # Unity ML Agents +# # Unity ML-Agents Toolkit # ## ML-Agent Learning # Launches unitytrainers for each External Brains in a Unity Environment diff --git a/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity b/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity index 2c5b0807d6..0862855ad9 100644 --- a/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity +++ b/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity @@ -818,7 +818,7 @@ Camera: m_Enabled: 1 serializedVersion: 2 m_ClearFlags: 2 - m_BackGroundColor: {r: 0.5043253, g: 0.5998091, b: 0.64705884, a: 0} + m_BackGroundColor: {r: 0.39609292, g: 0.49962592, b: 0.6509434, a: 0} m_NormalizedViewPortRect: serializedVersion: 2 x: 0 @@ -882,7 +882,8 @@ MonoBehaviour: m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) m_EditorClassIdentifier: broadcast: 1 - continuousPlayerActions: [] + keyContinuousPlayerActions: [] + axisContinuousPlayerActions: [] discretePlayerActions: [] defaultAction: 0 brain: {fileID: 393360180} diff --git a/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity b/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity index 7da5344ddc..029786b0ed 100644 --- a/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity +++ b/unity-environment/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity @@ -239,6 +239,23 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: 1b9d167a4e71146a883212e4f08bda88, type: 2} m_IsPrefabParent: 0 +--- !u!114 &355383963 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + broadcast: 1 + keyContinuousPlayerActions: [] + axisContinuousPlayerActions: [] + discretePlayerActions: [] + defaultAction: 0 + brain: {fileID: 393360180} --- !u!1001 &390093069 Prefab: m_ObjectHideFlags: 0 @@ -434,11 +451,11 @@ MonoBehaviour: vectorObservationSpaceType: 1 brainType: 0 CoreBrains: - - {fileID: 1141029800} - - {fileID: 1718413264} - - {fileID: 589256681} - - {fileID: 967284145} - instanceID: 25512 + - {fileID: 355383963} + - {fileID: 1368353555} + - {fileID: 1611747288} + - {fileID: 1663417967} + instanceID: 44260 --- !u!1 &425632980 GameObject: m_ObjectHideFlags: 0 @@ -529,18 +546,6 @@ Transform: m_PrefabParentObject: {fileID: 4554001968342878, guid: 1b9d167a4e71146a883212e4f08bda88, type: 2} m_PrefabInternal: {fileID: 666265353} ---- !u!114 &589256681 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - brain: {fileID: 393360180} --- !u!1 &636540579 GameObject: m_ObjectHideFlags: 0 @@ -1048,30 +1053,7 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: 1b9d167a4e71146a883212e4f08bda88, type: 2} m_IsPrefabParent: 0 ---- !u!114 &967284145 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - broadcast: 1 - graphModel: {fileID: 4900000, guid: 24ef4b74625ba4c53b782ec33ad9f7d6, type: 3} - graphScope: - graphPlaceholders: [] - BatchSizePlaceholderName: batch_size - VectorObservationPlacholderName: vector_observation - RecurrentInPlaceholderName: recurrent_in - RecurrentOutPlaceholderName: recurrent_out - VisualObservationPlaceholderName: [] - ActionPlaceholderName: action - PreviousActionPlaceholderName: prev_action - brain: {fileID: 393360180} ---- !u!114 &1141029800 +--- !u!114 &1368353555 MonoBehaviour: m_ObjectHideFlags: 0 m_PrefabParentObject: {fileID: 0} @@ -1079,15 +1061,11 @@ MonoBehaviour: m_GameObject: {fileID: 0} m_Enabled: 1 m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) m_EditorClassIdentifier: broadcast: 1 - keyContinuousPlayerActions: [] - axisContinuousPlayerActions: [] - discretePlayerActions: [] - defaultAction: 0 - brain: {fileID: 393360180} + brain: {fileID: 0} --- !u!1001 &1387246280 Prefab: m_ObjectHideFlags: 0 @@ -1255,7 +1233,7 @@ Camera: m_Enabled: 1 serializedVersion: 2 m_ClearFlags: 2 - m_BackGroundColor: {r: 0.5043253, g: 0.5998091, b: 0.64705884, a: 0} + m_BackGroundColor: {r: 0.39609292, g: 0.49962592, b: 0.6509434, a: 0} m_NormalizedViewPortRect: serializedVersion: 2 x: 0 @@ -1852,6 +1830,41 @@ MeshFilter: m_PrefabInternal: {fileID: 0} m_GameObject: {fileID: 1591997500} m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0} +--- !u!114 &1611747288 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + brain: {fileID: 393360180} +--- !u!114 &1663417967 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + broadcast: 1 + graphModel: {fileID: 4900000, guid: 24ef4b74625ba4c53b782ec33ad9f7d6, type: 3} + graphScope: + graphPlaceholders: [] + BatchSizePlaceholderName: batch_size + VectorObservationPlacholderName: vector_observation + RecurrentInPlaceholderName: recurrent_in + RecurrentOutPlaceholderName: recurrent_out + VisualObservationPlaceholderName: [] + ActionPlaceholderName: action + PreviousActionPlaceholderName: prev_action + brain: {fileID: 393360180} --- !u!1 &1706224190 GameObject: m_ObjectHideFlags: 0 @@ -1884,19 +1897,6 @@ Transform: m_Father: {fileID: 0} m_RootOrder: 3 m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} ---- !u!114 &1718413264 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - broadcast: 1 - brain: {fileID: 0} --- !u!1001 &1720875983 Prefab: m_ObjectHideFlags: 0 diff --git a/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity b/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity index 55e7b7c233..01e6852fcc 100644 --- a/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity +++ b/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity @@ -168,11 +168,11 @@ MonoBehaviour: vectorObservationSpaceType: 1 brainType: 0 CoreBrains: - - {fileID: 1640524635} - - {fileID: 724168002} - - {fileID: 1696069838} - - {fileID: 1856086268} - instanceID: 19778 + - {fileID: 719369990} + - {fileID: 696311630} + - {fileID: 1700719683} + - {fileID: 1602944013} + instanceID: 40746 --- !u!1001 &14183653 Prefab: m_ObjectHideFlags: 0 @@ -774,7 +774,7 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: bd804431e808a492bb5658bcd296e58e, type: 2} m_IsPrefabParent: 0 ---- !u!114 &724168002 +--- !u!114 &696311630 MonoBehaviour: m_ObjectHideFlags: 0 m_PrefabParentObject: {fileID: 0} @@ -783,10 +783,47 @@ MonoBehaviour: m_Enabled: 1 m_EditorHideFlags: 0 m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) m_EditorClassIdentifier: broadcast: 1 brain: {fileID: 7561199} +--- !u!114 &719369990 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + broadcast: 1 + keyContinuousPlayerActions: + - key: 119 + index: 0 + value: 1 + - key: 97 + index: 1 + value: -1 + - key: 100 + index: 1 + value: 1 + - key: 115 + index: 0 + value: -1 + axisContinuousPlayerActions: [] + discretePlayerActions: + - key: 119 + value: 0 + - key: 115 + value: 1 + - key: 97 + value: 3 + - key: 100 + value: 2 + defaultAction: -1 + brain: {fileID: 7561199} --- !u!1 &762086410 GameObject: m_ObjectHideFlags: 0 @@ -1099,7 +1136,7 @@ Camera: m_Enabled: 1 serializedVersion: 2 m_ClearFlags: 2 - m_BackGroundColor: {r: 0.5199359, g: 0.6937596, b: 0.8679245, a: 0} + m_BackGroundColor: {r: 0.39609292, g: 0.49962592, b: 0.6509434, a: 0} m_NormalizedViewPortRect: serializedVersion: 2 x: 0 @@ -1495,6 +1532,29 @@ Transform: m_Father: {fileID: 0} m_RootOrder: 2 m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} +--- !u!114 &1602944013 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + broadcast: 1 + graphModel: {fileID: 4900000, guid: 97f59608051e548d9a79803894260d13, type: 3} + graphScope: + graphPlaceholders: [] + BatchSizePlaceholderName: batch_size + VectorObservationPlacholderName: vector_observation + RecurrentInPlaceholderName: recurrent_in + RecurrentOutPlaceholderName: recurrent_out + VisualObservationPlaceholderName: [] + ActionPlaceholderName: action + PreviousActionPlaceholderName: prev_action + brain: {fileID: 7561199} --- !u!1001 &1613855850 Prefab: m_ObjectHideFlags: 0 @@ -1572,44 +1632,7 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: bd804431e808a492bb5658bcd296e58e, type: 2} m_IsPrefabParent: 0 ---- !u!114 &1640524635 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - broadcast: 1 - keyContinuousPlayerActions: - - key: 119 - index: 0 - value: 1 - - key: 97 - index: 1 - value: -1 - - key: 100 - index: 1 - value: 1 - - key: 115 - index: 0 - value: -1 - axisContinuousPlayerActions: [] - discretePlayerActions: - - key: 119 - value: 0 - - key: 115 - value: 1 - - key: 97 - value: 3 - - key: 100 - value: 2 - defaultAction: -1 - brain: {fileID: 7561199} ---- !u!114 &1696069838 +--- !u!114 &1700719683 MonoBehaviour: m_ObjectHideFlags: 0 m_PrefabParentObject: {fileID: 0} @@ -1618,31 +1641,8 @@ MonoBehaviour: m_Enabled: 1 m_EditorHideFlags: 0 m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - brain: {fileID: 7561199} ---- !u!114 &1856086268 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) m_EditorClassIdentifier: - broadcast: 1 - graphModel: {fileID: 4900000, guid: 97f59608051e548d9a79803894260d13, type: 3} - graphScope: - graphPlaceholders: [] - BatchSizePlaceholderName: batch_size - VectorObservationPlacholderName: vector_observation - RecurrentInPlaceholderName: recurrent_in - RecurrentOutPlaceholderName: recurrent_out - VisualObservationPlaceholderName: [] - ActionPlaceholderName: action - PreviousActionPlaceholderName: prev_action brain: {fileID: 7561199} --- !u!1001 &2127934646 Prefab: diff --git a/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/PyramidsIL.unity b/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/PyramidsIL.unity index 40e5280edc..86d2fbb17a 100644 --- a/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/PyramidsIL.unity +++ b/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/PyramidsIL.unity @@ -4313,7 +4313,7 @@ Camera: m_Enabled: 1 serializedVersion: 2 m_ClearFlags: 2 - m_BackGroundColor: {r: 0.5199359, g: 0.6937596, b: 0.8679245, a: 0} + m_BackGroundColor: {r: 0.39609292, g: 0.49962592, b: 0.6509434, a: 0} m_NormalizedViewPortRect: serializedVersion: 2 x: 0 diff --git a/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity b/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity index 3550a4e645..922045e36a 100644 --- a/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity +++ b/unity-environment/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity @@ -464,7 +464,7 @@ Camera: m_Enabled: 1 serializedVersion: 2 m_ClearFlags: 2 - m_BackGroundColor: {r: 0.5199359, g: 0.6937596, b: 0.8679245, a: 0} + m_BackGroundColor: {r: 0.39609292, g: 0.49962592, b: 0.6509434, a: 0} m_NormalizedViewPortRect: serializedVersion: 2 x: 0 diff --git a/unity-environment/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity b/unity-environment/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity index cc0d5337cc..6b2711ff41 100644 --- a/unity-environment/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity +++ b/unity-environment/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity @@ -253,11 +253,11 @@ MonoBehaviour: vectorObservationSpaceType: 1 brainType: 0 CoreBrains: - - {fileID: 314425644} - - {fileID: 1352504948} - - {fileID: 1230045554} - - {fileID: 548565157} - instanceID: 64514 + - {fileID: 245734037} + - {fileID: 1489685079} + - {fileID: 944836540} + - {fileID: 404442764} + instanceID: 44496 --- !u!1001 &155890235 Prefab: m_ObjectHideFlags: 0 @@ -309,12 +309,29 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: 94dced9d2186d4a76b970fb18ef6d7a6, type: 2} m_IsPrefabParent: 0 +--- !u!114 &245734037 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + broadcast: 1 + keyContinuousPlayerActions: [] + axisContinuousPlayerActions: [] + discretePlayerActions: [] + defaultAction: 0 + brain: {fileID: 48753806} --- !u!4 &251893594 stripped Transform: m_PrefabParentObject: {fileID: 4333477265252406, guid: 94dced9d2186d4a76b970fb18ef6d7a6, type: 2} m_PrefabInternal: {fileID: 1821407821} ---- !u!114 &314425644 +--- !u!114 &404442764 MonoBehaviour: m_ObjectHideFlags: 0 m_PrefabParentObject: {fileID: 0} @@ -322,13 +339,20 @@ MonoBehaviour: m_GameObject: {fileID: 0} m_Enabled: 1 m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) m_EditorClassIdentifier: broadcast: 1 - continuousPlayerActions: [] - discretePlayerActions: [] - defaultAction: 0 + graphModel: {fileID: 4900000, guid: 8d3f9a4b927984343b18c82559165047, type: 3} + graphScope: + graphPlaceholders: [] + BatchSizePlaceholderName: batch_size + VectorObservationPlacholderName: vector_observation + RecurrentInPlaceholderName: recurrent_in + RecurrentOutPlaceholderName: recurrent_out + VisualObservationPlaceholderName: [] + ActionPlaceholderName: action + PreviousActionPlaceholderName: prev_action brain: {fileID: 48753806} --- !u!1 &459314944 GameObject: @@ -368,7 +392,7 @@ Camera: m_Enabled: 1 serializedVersion: 2 m_ClearFlags: 2 - m_BackGroundColor: {r: 0.5043253, g: 0.5998091, b: 0.64705884, a: 0} + m_BackGroundColor: {r: 0.39609292, g: 0.49962592, b: 0.6509434, a: 0} m_NormalizedViewPortRect: serializedVersion: 2 x: 0 @@ -401,13 +425,13 @@ Transform: m_PrefabParentObject: {fileID: 0} m_PrefabInternal: {fileID: 0} m_GameObject: {fileID: 459314944} - m_LocalRotation: {x: 0.105809934, y: -0.5228518, z: 0.065579966, w: 0.84328496} + m_LocalRotation: {x: 0.1131026, y: -0.41461477, z: 0.05200237, w: 0.9014422} m_LocalPosition: {x: -474.12155, y: 10.422336, z: -262.80988} m_LocalScale: {x: 1, y: 1, z: 1} m_Children: [] m_Father: {fileID: 0} m_RootOrder: 1 - m_LocalEulerAnglesHint: {x: 10, y: -64.1, z: 0} + m_LocalEulerAnglesHint: {x: 14.302001, y: -49.4, z: -0.002} --- !u!1 &528661458 GameObject: m_ObjectHideFlags: 0 @@ -522,29 +546,6 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: 94dced9d2186d4a76b970fb18ef6d7a6, type: 2} m_IsPrefabParent: 0 ---- !u!114 &548565157 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - broadcast: 1 - graphModel: {fileID: 4900000, guid: 8d3f9a4b927984343b18c82559165047, type: 3} - graphScope: - graphPlaceholders: [] - BatchSizePlaceholderName: batch_size - VectorObservationPlacholderName: vector_observation - RecurrentInPlaceholderName: recurrent_in - RecurrentOutPlaceholderName: recurrent_out - VisualObservationPlaceholderName: [] - ActionPlaceholderName: action - PreviousActionPlaceholderName: prev_action - brain: {fileID: 48753806} --- !u!1001 &660230787 Prefab: m_ObjectHideFlags: 0 @@ -742,6 +743,18 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: 94dced9d2186d4a76b970fb18ef6d7a6, type: 2} m_IsPrefabParent: 0 +--- !u!114 &944836540 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + brain: {fileID: 48753806} --- !u!1001 &952648926 Prefab: m_ObjectHideFlags: 0 @@ -948,31 +961,6 @@ Transform: m_Father: {fileID: 528661459} m_RootOrder: 2 m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0} ---- !u!114 &1230045554 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - brain: {fileID: 48753806} ---- !u!114 &1352504948 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_PrefabParentObject: {fileID: 0} - m_PrefabInternal: {fileID: 0} - m_GameObject: {fileID: 0} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3} - m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) - m_EditorClassIdentifier: - broadcast: 1 - brain: {fileID: 48753806} --- !u!1 &1409355320 GameObject: m_ObjectHideFlags: 0 @@ -1162,6 +1150,19 @@ Prefab: m_RemovedComponents: [] m_ParentPrefab: {fileID: 100100000, guid: 94dced9d2186d4a76b970fb18ef6d7a6, type: 2} m_IsPrefabParent: 0 +--- !u!114 &1489685079 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3} + m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone) + m_EditorClassIdentifier: + broadcast: 1 + brain: {fileID: 48753806} --- !u!1001 &1518272637 Prefab: m_ObjectHideFlags: 0 diff --git a/unity-environment/Assets/ML-Agents/Scripts/Academy.cs b/unity-environment/Assets/ML-Agents/Scripts/Academy.cs index 07bb1d2553..cbcc5dcc38 100755 --- a/unity-environment/Assets/ML-Agents/Scripts/Academy.cs +++ b/unity-environment/Assets/ML-Agents/Scripts/Academy.cs @@ -10,7 +10,7 @@ /** * Welcome to Unity Machine Learning Agents (ML-Agents). * - * ML-Agents contains five entities: Academy, Brain, Agent, Communicator and + * The ML-Agents toolkit contains five entities: Academy, Brain, Agent, Communicator and * Python API. The academy, and all its brains and connected agents live within * a learning environment (herin called Environment), while the communicator * manages the communication between the learning environment and the Python diff --git a/unity-environment/Assets/ML-Agents/Scripts/Agent.cs b/unity-environment/Assets/ML-Agents/Scripts/Agent.cs index d28ecac270..12226ab654 100755 --- a/unity-environment/Assets/ML-Agents/Scripts/Agent.cs +++ b/unity-environment/Assets/ML-Agents/Scripts/Agent.cs @@ -236,22 +236,32 @@ public abstract class Agent : MonoBehaviour /// their own experience. int stepCount; - // Flag to signify that an agent has been reset but the fact that it is - // done has not been communicated (required for On Demand Decisions). + /// Flag to signify that an agent has been reset but the fact that it is + /// done has not been communicated (required for On Demand Decisions). bool hasAlreadyReset; - // Flag to signify that an agent is done and should not reset until - // the fact that it is done has been communicated. + /// Flag to signify that an agent is done and should not reset until + /// the fact that it is done has been communicated. bool terminate; /// Unique identifier each agent receives at initialization. It is used /// to separate between different agents in the environment. int id; + /// Array of Texture2D used to render to from render buffer before + /// transforming into float tensor. + Texture2D[] textureArray; + /// Monobehavior function that is called when the attached GameObject /// becomes enabled or active. void OnEnable() { + textureArray = new Texture2D[agentParameters.agentCameras.Count]; + for (int i = 0; i < brain.brainParameters.cameraResolutions.Length; i++) + { + textureArray[i] = new Texture2D(brain.brainParameters.cameraResolutions[i].width, + brain.brainParameters.cameraResolutions[i].height, TextureFormat.RGB24, false); + } id = gameObject.GetInstanceID(); Academy academy = Object.FindObjectOfType() as Academy; OnEnableHelper(academy); @@ -561,10 +571,12 @@ void SendInfoToBrain() for (int i = 0; i < brain.brainParameters.cameraResolutions.Length; i++) { - info.visualObservations.Add(ObservationToTexture( + ObservationToTexture( agentParameters.agentCameras[i], param.cameraResolutions[i].width, - param.cameraResolutions[i].height)); + param.cameraResolutions[i].height, + ref textureArray[i]); + info.visualObservations.Add(textureArray[i]); } info.reward = reward; @@ -926,37 +938,41 @@ void MakeRequests(int academyStepCounter) /// Converts a camera and correspinding resolution to a 2D texture. /// /// The 2D texture. - /// Camera. + /// Camera. /// Width of resulting 2D texture. /// Height of resulting 2D texture. - public static Texture2D ObservationToTexture(Camera camera, int width, int height) + /// Texture2D to render to. + public static void ObservationToTexture(Camera obsCamera, int width, int height, ref Texture2D texture2D) { - Rect oldRec = camera.rect; - camera.rect = new Rect(0f, 0f, 1f, 1f); + Rect oldRec = obsCamera.rect; + obsCamera.rect = new Rect(0f, 0f, 1f, 1f); var depth = 24; var format = RenderTextureFormat.Default; var readWrite = RenderTextureReadWrite.Default; var tempRT = RenderTexture.GetTemporary(width, height, depth, format, readWrite); - var tex = new Texture2D(width, height, TextureFormat.RGB24, false); + + if (width != texture2D.width || height != texture2D.height) + { + texture2D.Resize(width, height); + } var prevActiveRT = RenderTexture.active; - var prevCameraRT = camera.targetTexture; + var prevCameraRT = obsCamera.targetTexture; // render to offscreen texture (readonly from CPU side) RenderTexture.active = tempRT; - camera.targetTexture = tempRT; + obsCamera.targetTexture = tempRT; - camera.Render(); + obsCamera.Render(); - tex.ReadPixels(new Rect(0, 0, tex.width, tex.height), 0, 0); - tex.Apply(); - camera.targetTexture = prevCameraRT; - camera.rect = oldRec; + texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0); + texture2D.Apply(); + obsCamera.targetTexture = prevCameraRT; + obsCamera.rect = oldRec; RenderTexture.active = prevActiveRT; RenderTexture.ReleaseTemporary(tempRT); - return tex; } } } diff --git a/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs b/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs index 493a814fd1..7ac54c24b8 100644 --- a/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs +++ b/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs @@ -562,36 +562,37 @@ public void OnInspector() else pixels = 3; float[,,,] result = new float[batchSize, height, width, pixels]; + float[] resultTemp = new float[batchSize * height * width * pixels]; + int hwp = height * width * pixels; + int wp = width * pixels; for (int b = 0; b < batchSize; b++) { Color32[] cc = textures[b].GetPixels32(); - for (int w = 0; w < width; w++) + for (int h = height - 1; h >= 0; h--) { - for (int h = 0; h < height; h++) + for (int w = 0; w < width; w++) { - Color32 currentPixel = cc[h * width + w]; + Color32 currentPixel = cc[(height - h - 1) * width + w]; if (!blackAndWhite) { // For Color32, the r, g and b values are between // 0 and 255. - result[b, textures[b].height - h - 1, w, 0] = - currentPixel.r / 255.0f; - result[b, textures[b].height - h - 1, w, 1] = - currentPixel.g / 255.0f; - result[b, textures[b].height - h - 1, w, 2] = - currentPixel.b / 255.0f; + resultTemp[b * hwp + h * wp + w * pixels] = currentPixel.r / 255.0f; + resultTemp[b * hwp + h * wp + w * pixels + 1] = currentPixel.g / 255.0f; + resultTemp[b * hwp + h * wp + w * pixels + 2] = currentPixel.b / 255.0f; } else { - result[b, textures[b].height - h - 1, w, 0] = + resultTemp[b * hwp + h * wp + w * pixels] = (currentPixel.r + currentPixel.g + currentPixel.b) - / 3; + / 3f / 255.0f; } } } } + System.Buffer.BlockCopy(resultTemp, 0, result, 0, batchSize * hwp * sizeof(float)); return result; }