Skip to content

Commit

Permalink
fix: always propagate visit counts, separately from rewards (#48)
Browse files Browse the repository at this point in the history
Co-authored-by: sweagent <[email protected]>
  • Loading branch information
mnskim and sweagent authored Jan 27, 2025
1 parent 6a85465 commit 78b360f
Showing 1 changed file with 21 additions and 13 deletions.
34 changes: 21 additions & 13 deletions moatless/search_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,24 +361,32 @@ def _simulate(self, node: Node):
raise # Re-raise to abort the entire search

def _backpropagate(self, node: Node):
"""Backpropagate the reward up the tree."""

if not node.reward:
"""Backpropagate both visits and rewards up the tree."""

# Always update visit counts, separately from reward propagation
current = node
while current is not None:
current.visits += 1
current = current.parent

# Only propagate rewards if they exist
if node.reward:
current = node
reward = node.reward.value
while current is not None:
if not current.value:
current.value = reward
else:
current.value += reward
current = current.parent

else:
self.log(
logger.info,
f"Node{node.node_id} has no evaluation. Skipping backpropagation.",
f"Node{node.node_id} has no evaluation. Skipping reward backpropagation.",
)
return

reward = node.reward.value
while node is not None:
node.visits += 1
if not node.value:
node.value = reward
else:
node.value += reward
node = node.parent

def get_best_trajectory(self) -> Node | None:
"""
Get the best finished trajectory to return
Expand Down

0 comments on commit 78b360f

Please sign in to comment.