From 78b360f6b5143a08f679788091d1abdf93aebf96 Mon Sep 17 00:00:00 2001 From: Minsoo Kim <48931129+mnskim@users.noreply.github.com> Date: Mon, 27 Jan 2025 11:06:14 -0800 Subject: [PATCH] fix: always propagate visit counts, separately from rewards (#48) Co-authored-by: sweagent --- moatless/search_tree.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/moatless/search_tree.py b/moatless/search_tree.py index 501ef8af..6318b564 100644 --- a/moatless/search_tree.py +++ b/moatless/search_tree.py @@ -361,24 +361,32 @@ def _simulate(self, node: Node): raise # Re-raise to abort the entire search def _backpropagate(self, node: Node): - """Backpropagate the reward up the tree.""" - - if not node.reward: + """Backpropagate both visits and rewards up the tree.""" + + # Always update visit counts, separately from reward propagation + current = node + while current is not None: + current.visits += 1 + current = current.parent + + # Only propagate rewards if they exist + if node.reward: + current = node + reward = node.reward.value + while current is not None: + if not current.value: + current.value = reward + else: + current.value += reward + current = current.parent + + else: self.log( logger.info, - f"Node{node.node_id} has no evaluation. Skipping backpropagation.", + f"Node{node.node_id} has no evaluation. Skipping reward backpropagation.", ) return - reward = node.reward.value - while node is not None: - node.visits += 1 - if not node.value: - node.value = reward - else: - node.value += reward - node = node.parent - def get_best_trajectory(self) -> Node | None: """ Get the best finished trajectory to return