Add experimental simulations_factory test and corresponding implement…

…ions for the next_acting, action_spec, and resolve methods of the experimental game master's switch act component. PiperOrigin-RevId: 721772977 Change-Id: Ia70c5fe737586dcf22503c364da6c727f54bb6f8
google-deepmind · Jan 31, 2025 · f8a1724 · f8a1724
1 parent 537aa2e
commit f8a1724
Show file tree

Hide file tree

Showing 4 changed files with 223 additions and 10 deletions.
diff --git a/concordia/components/game_master/experimental/switch_act.py b/concordia/components/game_master/experimental/switch_act.py
@@ -14,7 +14,6 @@
 
 """A game master acting component with specific calls per action type."""
 
-
 from collections.abc import Sequence
 
 from concordia.document import interactive_document
@@ -26,6 +25,7 @@
 from concordia.utils import helper_functions
 from typing_extensions import override
 
+
 DEFAULT_PRE_ACT_KEY = 'Act'
 
 
@@ -44,6 +44,7 @@ def __init__(
       self,
       model: language_model.LanguageModel,
       clock: game_clock.GameClock,
+      entity_names: Sequence[str],
       component_order: Sequence[str] | None = None,
       pre_act_key: str = DEFAULT_PRE_ACT_KEY,
       logging_channel: logging.LoggingChannel = logging.NoOpLoggingChannel,
@@ -53,6 +54,7 @@ def __init__(
     Args:
       model: The language model to use for generating the action attempt.
       clock: the game clock is needed to know when is the current time
+      entity_names: sequence of entity names to choose from.
       component_order: The order in which the component contexts will be
         assembled when calling the act component. If None, the contexts will be
         assembled in the iteration order of the `ComponentContextMapping` passed
@@ -72,6 +74,7 @@ def __init__(
     """
     self._model = model
     self._clock = clock
+    self._entity_names = entity_names
     if component_order is None:
       self._component_order = None
     else:
@@ -118,25 +121,41 @@ def _next_acting(
       self,
       contexts: entity_component.ComponentContextMapping,
       action_spec: entity_lib.ActionSpec) -> str:
-    if 'initiative' in contexts:
-      return str(contexts['initiative'])
+    context = self._context_for_action(contexts)
+    if 'Initiative:' in contexts:
+      return str(contexts['Initiative:'])
     else:
-      return ''
+      # YOLO case
+      chain_of_thought = interactive_document.InteractiveDocument(self._model)
+      chain_of_thought.statement(context)
+      next_entity_index = chain_of_thought.multiple_choice_question(
+          question='Who is next?', answers=self._entity_names)
+      return self._entity_names[next_entity_index]
 
   def _next_entity_action_spec(
       self,
       contexts: entity_component.ComponentContextMapping,
       action_spec: entity_lib.ActionSpec) -> str:
+    context = self._context_for_action(contexts)
     if 'next_action_spec' in contexts:
       # action_spec_string = _convert_to_string(
       #     next_action_spec['scene_type'].action_spec)
       return ''
     else:
       # YOLO case
-      # Ask the GM first what kind of choice it is.
+      chain_of_thought = interactive_document.InteractiveDocument(self._model)
+      chain_of_thought.statement(context)
+      _ = chain_of_thought.open_question(
+          question='Who is next to act and what kind of decision do they face?')
       # Then ask the GM to reformat their answer in whatever string format can
       # be used by the engine and its parser.
-      return ''
+      chain_of_thought.statement(
+          'Example formatted action specs:\n"type: free"\n"type: choice"')
+      next_action_spec_string = chain_of_thought.open_question(
+          question='Format the decision type as an action spec.')
+      if 'type:' not in next_action_spec_string:
+        next_action_spec_string = 'type: free' + next_action_spec_string
+      return next_action_spec_string
 
   def _resolve(
       self,
@@ -145,7 +164,12 @@ def _resolve(
     if 'resolution' in contexts:
       return contexts['resolution']
     else:
-      return ''
+      chain_of_thought = interactive_document.InteractiveDocument(self._model)
+      context = self._context_for_action(contexts)
+      chain_of_thought.statement(context)
+      resolution = chain_of_thought.open_question(
+          question='As a result of the above, what happens next?')
+      return resolution
 
   @override
   def get_action_attempt(

diff --git a/concordia/factory/environment/experimental/simulation.py b/concordia/factory/environment/experimental/simulation.py
@@ -21,6 +21,7 @@
 from concordia.associative_memory import importance_function
 from concordia.clocks import game_clock
 from concordia.components import agent as components_lib
+from concordia.components.agent import memory_component
 from concordia.components.game_master import experimental as gm_components_lib
 from concordia.environment.experimental import engine as engine_lib
 from concordia.environment.experimental.engines import synchronous
@@ -124,6 +125,7 @@ def build_simulation(
   act_component = gm_components_lib.switch_act.SwitchAct(
       model=model,
       clock=clock,
+      entity_names=player_names,
       component_order=component_order,
   )
 
@@ -139,24 +141,71 @@ def build_simulation(
   return env, game_master_memory, game_master
 
 
+def create_log(
+    *,
+    model: language_model.LanguageModel,
+    scenes: Sequence[scene_lib.ExperimentalSceneSpec],
+    summarize_entire_episode: bool = True,
+) -> str:
+  """Create an HTML log of the simulation.
+
+  Args:
+    model: The language model to use.
+    scenes: Sequence of scenes.
+    summarize_entire_episode: Optionally, summarize the entire episode. This may
+      load a lot of tokens into a language model all at once and in some cases
+      exceed the model's context window and cause it to crash.
+
+  Returns:
+    An HTML string log of the simulation.
+  """
+  memories_per_scene = []
+  for scene in scenes:
+    scene_type = scene.scene_type
+    scene_game_master = scene_type.game_master
+    scene_memories = scene_game_master.get_component(
+        memory_component.DEFAULT_MEMORY_COMPONENT_NAME,
+        type_=memory_component.MemoryComponent).get_all_memories_as_text()
+    memories_per_scene.append('\n'.join(scene_memories))
+
+  if summarize_entire_episode:
+    detailed_story = '\n'.join(memories_per_scene)
+    episode_summary = model.sample_text(
+        f'Sequence of events:\n{detailed_story}'
+        + '\nNarratively summarize the above temporally ordered '
+        + 'sequence of events. Write it as a news report. Summary:\n',
+        max_tokens=3500,
+        terminators=(),
+    )
+  else:
+    episode_summary = ''
+
+  return episode_summary
+
+
 def run_simulation(
+    model: language_model.LanguageModel,
     players: Sequence[entity_agent_with_logging.EntityAgentWithLogging],
     clock: game_clock.MultiIntervalClock,
     scenes: Sequence[scene_lib.ExperimentalSceneSpec],
     verbose: bool = False,
+    summarize_entire_episode_in_log: bool = True,
     compute_metrics: Callable[[Mapping[str, str]], None] | None = None,
-) -> None:
+) -> str:
   """Run a simulation.
 
   Args:
+    model: The language model to use.
     players: The players.
     clock: The clock of the run.
     scenes: Sequence of scenes to simulate.
     verbose: Whether or not to print verbose debug information.
+    summarize_entire_episode_in_log: Optionally, include summaries of the full
+      episode in the log.
     compute_metrics: Optionally, a function to compute metrics.
 
   Returns:
-    None
+    string of the log of the simulation.
   """
   # Run the simulation.
   runner.run_scenes(
@@ -166,3 +215,9 @@ def run_simulation(
       verbose=verbose,
       compute_metrics=compute_metrics,
   )
+  result_log = create_log(
+      model=model,
+      scenes=scenes,
+      summarize_entire_episode=summarize_entire_episode_in_log,
+  )
+  return result_log
diff --git a/concordia/factory/environment/experimental/simulations_test.py b/concordia/factory/environment/experimental/simulations_test.py
@@ -0,0 +1,134 @@
+# Copyright 2024 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test environment (game master) factories.
+"""
+
+import datetime
+
+from absl.testing import absltest
+from absl.testing import parameterized
+from concordia.agents import entity_agent_with_logging
+from concordia.associative_memory import associative_memory
+from concordia.associative_memory import formative_memories
+from concordia.associative_memory import importance_function
+from concordia.clocks import game_clock
+from concordia.components import agent as agent_components
+from concordia.environment.experimental.engines import synchronous
+from concordia.factory.environment.experimental import simulation
+from concordia.language_model import no_language_model
+from concordia.typing import entity as entity_lib
+from concordia.typing import scene as scene_lib
+import numpy as np
+
+
+ENVIRONMENT_FACTORIES = {
+    'simulation': simulation,
+}
+
+
+def _embedder(text: str):
+  del text
+  return np.random.rand(16)
+
+
+class EnvironmentFactoriesTest(parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      dict(testcase_name='simulation',
+           simulation_factory_name='simulation'),
+  )
+  def test_simulation_factory(self, simulation_factory_name: str):
+    simulation_factory = ENVIRONMENT_FACTORIES[simulation_factory_name]
+    model = no_language_model.NoLanguageModel()
+    importance_model_gm = importance_function.ConstantImportanceModel()
+    setup_time = datetime.datetime.now()
+    clock = game_clock.MultiIntervalClock(
+        start=setup_time,
+        step_sizes=[datetime.timedelta(hours=1),
+                    datetime.timedelta(minutes=10)])
+    act_component = agent_components.concat_act_component.ConcatActComponent(
+        model=model,
+        clock=clock,
+        component_order=[],
+    )
+    player_a = entity_agent_with_logging.EntityAgentWithLogging(
+        agent_name='Rakshit',
+        act_component=act_component,
+        context_components={},
+    )
+
+    players = [player_a]
+
+    env, mem, game_master = simulation_factory.build_simulation(
+        model=model,
+        embedder=_embedder,
+        importance_model=importance_model_gm,
+        clock=clock,
+        players=players,
+        shared_memories=[],
+    )
+    self.assertIsInstance(env, synchronous.Synchronous)
+    self.assertIsInstance(mem, associative_memory.AssociativeMemory)
+    self.assertIsInstance(game_master,
+                          entity_agent_with_logging.EntityAgentWithLogging)
+
+    free_scenes = [
+        scene_lib.ExperimentalSceneSpec(
+            scene_type=scene_lib.ExperimentalSceneTypeSpec(
+                name='day',
+                game_master=game_master,
+                engine=env),
+            start_time=setup_time,
+            participant_configs=[
+                formative_memories.AgentConfig(name='Rakshit')],
+            num_rounds=1,
+        ),
+    ]
+
+    free_results_log = simulation.run_simulation(
+        model=model,
+        players=players,
+        clock=clock,
+        scenes=free_scenes,
+    )
+    self.assertIsInstance(free_results_log, str)
+
+    choice_scenes = [
+        scene_lib.ExperimentalSceneSpec(
+            scene_type=scene_lib.ExperimentalSceneTypeSpec(
+                name='night',
+                game_master=game_master,
+                engine=env,
+                action_spec=entity_lib.choice_action_spec(
+                    call_to_action='Pick x or y',
+                    options=['x', 'y']),
+            ),
+            start_time=setup_time,
+            participant_configs=[
+                formative_memories.AgentConfig(name='Rakshit')],
+            num_rounds=1,
+        ),
+    ]
+
+    choice_results_log = simulation.run_simulation(
+        model=model,
+        players=players,
+        clock=clock,
+        scenes=choice_scenes,
+    )
+    self.assertIsInstance(choice_results_log, str)
+
+if __name__ == '__main__':
+  absltest.main()
diff --git a/examples/modular/environment/experimental.py b/examples/modular/environment/experimental.py
@@ -428,9 +428,9 @@ def __call__(self) -> None:
       html_results_log: browseable log of the simulation in HTML format
     """
     simulation_factory.run_simulation(
+        model=self._model,
         players=self._all_players,
         clock=self._clock,
         scenes=self._scenes,
         verbose=True,
     )
-