Skip to content

Commit

Permalink
Remove ScratchpadAllocator
Browse files Browse the repository at this point in the history
Signed-off-by: Joaquin Anton Guirao <[email protected]>
  • Loading branch information
jantonguirao committed Feb 5, 2025
1 parent c548356 commit 69e69e4
Show file tree
Hide file tree
Showing 29 changed files with 159 additions and 327 deletions.
7 changes: 3 additions & 4 deletions dali/benchmark/slice_kernel_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/scratch.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {

Expand Down Expand Up @@ -76,10 +77,8 @@ class SliceBenchGPU : public DALIBenchmark {

auto req = kernel.Setup(ctx, in_tv, args_vec);

kernels::ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream));
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_tv, in_tv, args_vec);
CUDA_CALL(cudaStreamSynchronize(ctx.gpu.stream));
Expand Down
8 changes: 3 additions & 5 deletions dali/kernels/imgproc/convolution/convolution_cpu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/imgproc/convolution/baseline_convolution.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -302,11 +303,8 @@ struct ConvolutionCpuKernelTest : public ::testing::Test {
Kernel kernel;

auto req = kernel.Setup(ctx, in_.shape, k_win_.num_elements());
// this is painful
ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

testing::BaselineConvolve(baseline_out_, baseline_in_, k_win_, T::axis, T::window_size / 2);
TransformCase tranform(out_, baseline_out_, T::in_place);
Expand Down
17 changes: 8 additions & 9 deletions dali/kernels/imgproc/convolution/convolution_gpu_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "dali/kernels/scratch.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -158,7 +159,14 @@ struct ConvolutionGpuKernelTest : public ::testing::Test {

void RunTest() {
KernelContext ctx_cpu, ctx_gpu;

ctx_gpu.gpu.stream = 0;
DynamicScratchpad dyn_scratchpad_gpu({}, AccessOrder(ctx_gpu.gpu.stream));
ctx_gpu.scratchpad = &dyn_scratchpad_gpu;

DynamicScratchpad dyn_scratchpad_cpu({}, AccessOrder::host());
ctx_cpu.scratchpad = &dyn_scratchpad_cpu;

KernelCpu kernel_cpu;
KernelGpu kernel_gpu;

Expand All @@ -175,21 +183,12 @@ struct ConvolutionGpuKernelTest : public ::testing::Test {
int window_size = shape_window[sample][0];
auto req = kernel_cpu.Setup(ctx_cpu, data_shape[sample], window_size);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx_cpu.scratchpad = &scratchpad;

kernel_cpu.Run(ctx_cpu, baseline_out_[sample], baseline_in_[sample], k_win_[sample],
transform.GetCpuTransform(sample));
}

auto req = kernel_gpu.Setup(ctx_gpu, in_.shape, shape_window);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx_gpu.scratchpad = &scratchpad;
auto gpu_epilogue = transform.GetGpuEpilogue();
kernel_gpu.Run(ctx_gpu, out_, in_, k_win_, span<const int>{}, gpu_epilogue);

Expand Down
21 changes: 7 additions & 14 deletions dali/kernels/imgproc/convolution/laplacian_cpu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "dali/kernels/scratch.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -144,11 +145,8 @@ struct LaplacianCpuKernelTest : public ::testing::Test {
KernelContext ctx = {};

auto req = kernel.Setup(ctx, in_.shape, lapl_params_.window_sizes);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_, in_, lapl_params_.tensor_windows, uniform_array<axes>(1.f));
CompareOut(out_, kernel_);
Expand Down Expand Up @@ -287,10 +285,8 @@ struct LaplacianCpuTest : public ::testing::Test {
auto vol = volume(shape_);
for (int axis = 0; axis < axes; axis++) {
auto req = kernel.Setup(ctx, shape_, lapl_params_.window_sizes[axis]);
ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;
kernel.Run(ctx, intermediate_, in_, lapl_params_.tensor_windows[axis]);
for (int i = 0; i < vol; i++) {
baseline_acc_.data[i] += weights_[axis] * intermediate_.data[i];
Expand Down Expand Up @@ -322,11 +318,8 @@ struct LaplacianCpuTest : public ::testing::Test {
Kernel kernel;
KernelContext ctx = {};
auto req = kernel.Setup(ctx, in_.shape, lapl_params_.window_sizes);
ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;

DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;
kernel.Run(ctx, out_, in_, lapl_params_.tensor_windows, weights_);
RunBaseline();
CompareOut(out_, baseline_out_);
Expand Down
37 changes: 13 additions & 24 deletions dali/kernels/imgproc/convolution/separable_convolution_cpu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "dali/kernels/scratch.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -55,10 +56,8 @@ TEST(SeparableConvolutionTest, Axes1WithChannels) {

auto req = kernel.Setup(ctx, data_shape[0], window_dims);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_v, in_v,
uniform_array<1, TensorView<StorageCPU, const float, 1>>(kernel_window_v));
Expand Down Expand Up @@ -95,10 +94,8 @@ TEST(SeparableConvolutionTest, Axes1NoChannels) {

auto req = kernel.Setup(ctx, data_shape[0].first<1>(), window_dims);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_v, in_v,
uniform_array<1, TensorView<StorageCPU, const float, 1>>(kernel_window_v));
Expand Down Expand Up @@ -144,10 +141,8 @@ TEST(SeparableConvolutionTest, Axes2WithChannels) {

auto req = kernel.Setup(ctx, data_shape[0], window_dims);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_v, in_v, {kernel_window_0_v, kernel_window_1_v});
testing::BaselineConvolve(interm_v, in_v, kernel_window_1_v, 1, window_dims[1] / 2);
Expand Down Expand Up @@ -194,10 +189,8 @@ TEST(SeparableConvolutionTest, Axes2NoChannels) {

auto req = kernel.Setup(ctx, data_shape[0].first<2>(), window_dims);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_v, in_v, {kernel_window_0_v, kernel_window_1_v});
testing::BaselineConvolve(interm_v, baseline_in_v, kernel_window_1_v, 1, window_dims[1] / 2);
Expand Down Expand Up @@ -249,10 +242,8 @@ TEST(SeparableConvolutionTest, Axes3WithChannels) {

auto req = kernel.Setup(ctx, data_shape[0], window_dims);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_v, in_v, {kernel_window_0_v, kernel_window_1_v, kernel_window_2_v});

Expand Down Expand Up @@ -306,10 +297,8 @@ TEST(SeparableConvolutionTest, Axes3NoChannels) {

auto req = kernel.Setup(ctx, data_shape[0].first<3>(), window_dims);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out_v, in_v, {kernel_window_0_v, kernel_window_1_v, kernel_window_2_v});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "dali/kernels/scratch.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -116,14 +117,11 @@ class SepearableConvolutionGpuTestImpl {
KernelContext ctx_gpu, ctx_cpu;

ctx_gpu.gpu.stream = 0;
DynamicScratchpad dyn_scratchpad_gpu({}, AccessOrder(ctx_gpu.gpu.stream));
ctx_gpu.scratchpad = &dyn_scratchpad_gpu;

auto req_gpu = kernel_gpu.Setup(ctx_gpu, data_shape_, window_dims_);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req_gpu.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx_gpu.scratchpad = &scratchpad;

kernel_gpu.Run(ctx_gpu, out_gpu_v, in_gpu_v, window_v);
int nsamples = in_gpu_v.num_samples();

Expand All @@ -145,10 +143,8 @@ class SepearableConvolutionGpuTestImpl {
for (int frame = 0; frame < seq_elements; frame++) {
auto req_cpu = kernel_cpu.Setup(ctx_cpu, element_shape, window_dims);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(req_cpu.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx_cpu.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad_cpu({}, AccessOrder::host());
ctx_cpu.scratchpad = &dyn_scratchpad_cpu;

std::array<TensorView<StorageCPU, const float, 1>, kAxes> windows;
for (int axis = 0; axis < kAxes; axis++) {
Expand Down
13 changes: 5 additions & 8 deletions dali/kernels/imgproc/pointwise/linear_transformation_gpu_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "dali/kernels/imgproc/pointwise/linear_transformation_gpu.h"
#include "dali/test/cv_mat_utils.h"
#include "dali/kernels/imgproc/roi.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -158,10 +159,8 @@ TYPED_TEST(LinearTransformationGpuTest, run_test) {

auto reqs = kernel.Setup(ctx, in, make_cspan(this->vmat_), make_cspan(this->vvec_));

ScratchpadAllocator sa;
sa.Reserve(reqs.scratch_sizes);
auto scratchpad = sa.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream));
ctx.scratchpad = &dyn_scratchpad;

OutListGPU<typename TypeParam::Out, kNDims> out(
this->output_, reqs.output_shapes[0].template to_static<kNDims>());
Expand All @@ -186,10 +185,8 @@ TYPED_TEST(LinearTransformationGpuTest, run_test_with_roi) {
make_cspan(this->vmat_), make_cspan(this->vvec_),
make_cspan(this->rois_));

ScratchpadAllocator sa;
sa.Reserve(reqs.scratch_sizes);
auto scratchpad = sa.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream));
ctx.scratchpad = &dyn_scratchpad;

OutListGPU<typename TypeParam::Out, kNDims> out(
this->output_, reqs.output_shapes[0].template to_static<kNDims>());
Expand Down
8 changes: 4 additions & 4 deletions dali/kernels/imgproc/pointwise/multiply_add_gpu_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "dali/test/tensor_test_utils.h"
#include "dali/kernels/test/kernel_test_utils.h"
#include "dali/kernels/imgproc/pointwise/multiply_add_gpu.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -152,10 +153,9 @@ TYPED_TEST(MultiplyAddGpuTest, run_test) {

auto reqs = kernel.Setup(ctx, in, this->addends_, this->multipliers_);

ScratchpadAllocator sa;
sa.Reserve(reqs.scratch_sizes);
auto scratchpad = sa.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream));
ctx.scratchpad = &dyn_scratchpad;

kernel.Run(ctx, out, in, this->addends_, this->multipliers_);
CUDA_CALL(cudaDeviceSynchronize());

Expand Down
7 changes: 3 additions & 4 deletions dali/kernels/reduce/reduce_all_gpu_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "dali/core/util.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -231,10 +232,8 @@ void ReduceAllGPUTest<Reduction>::TestReduceAllKernel(int min_size, int max_size

auto req = kernel.Setup(ctx, in_view_gpu);

ScratchpadAllocator sa;
sa.Reserve(req.scratch_sizes);
auto scratchpad = sa.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream));
ctx.scratchpad = &dyn_scratchpad;

ASSERT_EQ(req.output_shapes[0], out_shape);

Expand Down
8 changes: 3 additions & 5 deletions dali/kernels/reduce/reduce_gpu_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "dali/kernels/reduce/reduce_test.h"
#include "dali/test/test_tensors.h"
#include "dali/test/tensor_test_utils.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand All @@ -37,7 +38,6 @@ struct ReductionKernelTest {
TestTensorList<Out> out, ref;

KernelContext ctx;
ScratchpadAllocator sa;
std::mt19937_64 rng{12345};


Expand All @@ -49,12 +49,10 @@ struct ReductionKernelTest {
Args &&...args) {
in.reshape(in_shape);
ref.reshape(ref_out_shape);
ctx.gpu.stream = 0;
auto req = kernel.Setup(ctx, in_shape, axes, keep_dims, batch, std::forward<Args>(args)...);
ASSERT_EQ(req.output_shapes.size(), 1), req;
ASSERT_EQ(req.output_shapes[0], ref_out_shape), req;
out.reshape(ref_out_shape);
sa.Reserve(req.scratch_sizes);
return req;
}

Expand All @@ -66,9 +64,9 @@ struct ReductionKernelTest {

template <typename... Args>
void Run(Args &&...args) {
auto scratchpad = sa.GetScratchpad();
ctx.gpu.stream = 0;
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder(ctx.gpu.stream));
ctx.scratchpad = &dyn_scratchpad;
kernel.Run(ctx, out.gpu(stream()), in.gpu(stream()), std::forward<Args>(args)...);
}

Expand Down
7 changes: 3 additions & 4 deletions dali/kernels/signal/dct/dct_cpu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "dali/test/tensor_test_utils.h"
#include "dali/test/test_tensors.h"
#include "dali/kernels/signal/dct/dct_test.h"
#include "dali/kernels/dynamic_scratchpad.h"

namespace dali {
namespace kernels {
Expand Down Expand Up @@ -89,10 +90,8 @@ TEST_P(Dct1DCpuTest, DctTest) {

KernelRequirements reqs = kernel.Setup(ctx, in_view_, args, axis_);

ScratchpadAllocator scratch_alloc;
scratch_alloc.Reserve(reqs.scratch_sizes);
auto scratchpad = scratch_alloc.GetScratchpad();
ctx.scratchpad = &scratchpad;
DynamicScratchpad dyn_scratchpad({}, AccessOrder::host());
ctx.scratchpad = &dyn_scratchpad;

TensorShape<> expected_out_shape = in_shape;

Expand Down
Loading

0 comments on commit 69e69e4

Please sign in to comment.