diff --git a/skythought/skythought_evals/inference_and_check.py b/skythought/skythought_evals/inference_and_check.py
index 387e1c5..64e2e6b 100644
--- a/skythought/skythought_evals/inference_and_check.py
+++ b/skythought/skythought_evals/inference_and_check.py
@@ -464,7 +464,7 @@ def perform_inference_and_save(
                 )
                 token_usages.append(token_usage_for_response)
                 completion_token += token_usage_for_response["completion_tokens"]
-                response_entries.append(response_entry)
+                response_entries.append(response_entry.to_dict())
 
             completion_token /= args.n
             prompt_token = response.num_input_tokens
diff --git a/skythought/skythought_evals/util/response.py b/skythought/skythought_evals/util/response.py
index 0c34b8f..c4d41ee 100644
--- a/skythought/skythought_evals/util/response.py
+++ b/skythought/skythought_evals/util/response.py
@@ -89,7 +89,7 @@ class SingleParsedResponse:
     correctness: Optional[bool] = None
     reason: Optional[str] = None
 
-    def as_dict(self):
+    def to_dict(self):
         return {
             "content": self.content,
             "correctness": self.correctness,