From 26fcee4e2d0a79d14c409bcb46184be6b8b39d84 Mon Sep 17 00:00:00 2001
From: Rohan Julka <rohanjulka19@gmail.com>
Date: Thu, 20 Jun 2024 16:32:51 +0100
Subject: [PATCH] Use connected lists for tape data structure

Instead of a single list use mutliple connected lists to store elements.
This allows to dynamically increase the size of tape without the need of
relocating elements.
---
 benchmark/AlgorithmicComplexity.cpp          |   2 +-
 demos/CustomTypeNumDiff.cpp                  |   2 +-
 include/clad/Differentiator/Differentiator.h |   5 +-
 include/clad/Differentiator/NewTape.h        | 119 +++++++++++++++++++
 test/NumericalDiff/PureCentralDiffCalls.C    |   2 +-
 5 files changed, 126 insertions(+), 4 deletions(-)
 create mode 100644 include/clad/Differentiator/NewTape.h

diff --git a/benchmark/AlgorithmicComplexity.cpp b/benchmark/AlgorithmicComplexity.cpp
index 9717cb0cd..15089a24e 100644
--- a/benchmark/AlgorithmicComplexity.cpp
+++ b/benchmark/AlgorithmicComplexity.cpp
@@ -19,7 +19,7 @@ static void BM_NumericGausP(benchmark::State& state) {
   double p[] = {1, 2, 3, 4, 5};
   double dx[5] = {0, 0, 0, 0, 0};
   double dp[5] = {0, 0, 0, 0, 0};
-  clad::tape<clad::array_ref<double>> results = {};
+  clad::old_tape<clad::array_ref<double>> results = {};
   int dim = 5;
   results.emplace_back(dx, dim);
   results.emplace_back(dp, dim);
diff --git a/demos/CustomTypeNumDiff.cpp b/demos/CustomTypeNumDiff.cpp
index 0f92c22f9..33bc6adb1 100644
--- a/demos/CustomTypeNumDiff.cpp
+++ b/demos/CustomTypeNumDiff.cpp
@@ -134,7 +134,7 @@ int main() {
   // This is how we return the derivative with respect to all arguments.
   // The order of being placed in this tape should be the same as the order of
   // the arguments being passed to the function.
-  clad::tape<clad::array_ref<
+  clad::old_tape<clad::array_ref<
       double /*This should be the return value of the function you want to differentiate.*/>>
       grad = {};
   // Place the l-value reference of the variables in the tape.
diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h
index cca1cd5cf..690e232ed 100644
--- a/include/clad/Differentiator/Differentiator.h
+++ b/include/clad/Differentiator/Differentiator.h
@@ -14,6 +14,7 @@
 #include "DynamicGraph.h"
 #include "FunctionTraits.h"
 #include "Matrix.h"
+#include "NewTape.h"
 #include "NumericalDiff.h"
 #include "Tape.h"
 
@@ -47,7 +48,9 @@ inline CUDA_HOST_DEVICE unsigned int GetLength(const char* code) {
 #endif
 
 /// Tape type used for storing values in reverse-mode AD inside loops.
-template <typename T> using tape = tape_impl<T>;
+template <typename T> using tape = new_tape_impl<T>;
+
+template <typename T> using old_tape = tape_impl<T>;
 
 /// Add value to the end of the tape, return the same value.
 template <typename T, typename... ArgsT>
diff --git a/include/clad/Differentiator/NewTape.h b/include/clad/Differentiator/NewTape.h
new file mode 100644
index 000000000..743ca5dd3
--- /dev/null
+++ b/include/clad/Differentiator/NewTape.h
@@ -0,0 +1,119 @@
+#ifndef CLAD_DIFFERENTIATOR_NEWTAPE_H
+#define CLAD_DIFFERENTIATOR_NEWTAPE_H
+
+#include <cassert>
+#include <cstdio>
+#include <type_traits>
+#include <utility>
+
+#include "clad/Differentiator/CladConfig.h"
+
+namespace clad {
+
+static const int capacity = 32;
+
+template <typename T> class Block {
+public:
+  T data[capacity];
+  Block<T>* next;
+  Block<T>* prev;
+  using pointer = T*;
+  using iterator = pointer;
+
+  CUDA_HOST_DEVICE Block() {
+  }
+
+  CUDA_HOST_DEVICE ~Block() { destroy(block_begin(), block_end()); }
+
+  Block(const Block& other) = delete;
+  Block& operator=(const Block& other) = delete;
+
+  Block(Block&& other) = delete;
+  Block& operator=(const Block&& other) = delete;
+
+  CUDA_HOST_DEVICE iterator block_begin() { return data; }
+
+  CUDA_HOST_DEVICE iterator block_end() { return data + capacity; }
+
+  template <typename It> using value_type_of = decltype(*std::declval<It>());
+
+  template <typename It>
+  static typename std::enable_if<
+      !std::is_trivially_destructible<value_type_of<It>>::value>::type
+  destroy(It B, It E) {
+    for (It I = E - 1; I >= B; --I)
+      I->~value_type_of<It>();
+  }
+
+  template <typename It>
+  static typename std::enable_if<
+      std::is_trivially_destructible<value_type_of<It>>::value>::type
+      CUDA_HOST_DEVICE
+      destroy(It B, It E) {}
+};
+
+template <typename T> class new_tape_impl {
+  using NonConstT = typename std::remove_cv<T>::type;
+
+  Block<NonConstT>* m_cur_block = nullptr;
+  std::size_t m_size = 0;
+
+public:
+  new_tape_impl() = default;
+
+  ~new_tape_impl() { }
+
+  new_tape_impl(new_tape_impl& other) = delete;
+  new_tape_impl operator=(new_tape_impl& other) = delete;
+
+  new_tape_impl(new_tape_impl&& other) = delete;
+  new_tape_impl& operator=(new_tape_impl&& other) = delete;
+
+  template <typename... ArgsT>
+
+  CUDA_HOST_DEVICE void emplace_back(ArgsT&&... args) {
+    if (!m_cur_block || m_size >= capacity) {
+      Block<NonConstT>* prev_block = m_cur_block;
+      m_cur_block =  static_cast<Block<NonConstT>*>(::operator new(sizeof(Block<NonConstT>)));
+      if (prev_block != nullptr) {
+        prev_block->next = m_cur_block;
+        m_cur_block->prev = prev_block;
+      }
+      m_size = 0;
+    }
+    m_size += 1;
+    ::new (const_cast<void*>(static_cast<const volatile void*>(end())))
+        T(std::forward<ArgsT>(args)...);
+  }
+
+  [[nodiscard]] CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
+
+  CUDA_HOST_DEVICE T* end() { return m_cur_block->data + (m_size - 1); }
+
+  CUDA_HOST_DEVICE T& back() {
+    assert(m_size || m_cur_block->prev);
+    return *end();
+  }
+
+  CUDA_HOST_DEVICE void pop_back() {
+    assert(m_size || m_cur_block->prev);
+    m_size -= 1;
+    if (m_size == 0) {
+      Block<NonConstT>* temp = m_cur_block;
+      m_cur_block = m_cur_block->prev;
+      // delete temp;
+      m_size = capacity;
+    }
+  }
+
+  void destroy() {
+    while (m_cur_block != nullptr) {
+      Block<NonConstT>* prev_block = m_cur_block->prev;
+      delete m_cur_block;
+      m_cur_block = prev_block;
+    }
+  }
+};
+} // namespace clad
+
+#endif // CLAD_DIFFERENTIATOR_NEWTAPE_H
diff --git a/test/NumericalDiff/PureCentralDiffCalls.C b/test/NumericalDiff/PureCentralDiffCalls.C
index 399e7f784..8038229b6 100644
--- a/test/NumericalDiff/PureCentralDiffCalls.C
+++ b/test/NumericalDiff/PureCentralDiffCalls.C
@@ -73,7 +73,7 @@ int main() { // expected-no-diagnostics
   printf("Result is = %f\n", func1_res); // CHECK-EXEC: Result is = 2.000000
 
   // Gradients, derivative wrt all args
-  clad::tape<clad::array_ref<double>> grad = {};
+  clad::old_tape<clad::array_ref<double>> grad = {};
   grad.emplace_back(dx, 3);
   grad.emplace_back(&dy);
   grad.emplace_back(&dz);