Skip to content

Commit

Permalink
code update and cornell box brightness fix
Browse files Browse the repository at this point in the history
  • Loading branch information
eduardz1 committed Feb 16, 2025
1 parent b9249ec commit 491baae
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 129 deletions.
2 changes: 0 additions & 2 deletions apps/main.cu
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#include "cuda_path_tracer/project.cuh"
#include <CLI/CLI.hpp>

// TODO(eduard): cornell_box.json is missing the back wall but it looks kinda
// nice like this
auto main(int argc, char **argv) -> int {
CLI::App app{"CUDA Path Tracer"};
argv = app.ensure_utf8(argv);
Expand Down
2 changes: 1 addition & 1 deletion examples/cornell_box.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"v": [0, 0, -105],
"material": {
"type": "light",
"color": [40, 40, 30]
"color": [30, 30, 20]
}
},
{
Expand Down
12 changes: 7 additions & 5 deletions include/cuda_path_tracer/camera.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@ struct CameraParams {
};

// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers, readability-magic-numbers)
static constexpr uint16_t HIGH_QUALITY_DEPTH = 50;
using HighQuality = CameraParams<dim3(8, 8), 128, 64, 50, true, curandState_t>;
static constexpr uint16_t HIGH_QUALITY_DEPTH = 100;
using HighQuality = CameraParams<dim3(8, 8), 1024, 16, HIGH_QUALITY_DEPTH,
false, curandStatePhilox4_32_10_t>;
static constexpr uint16_t MEDIUM_QUALITY_DEPTH = 20;
using MediumQuality =
CameraParams<dim3(8, 8), 128, 16, 10, true, curandState_t>;
using MediumQuality = CameraParams<dim3(8, 8), 256, 8, MEDIUM_QUALITY_DEPTH,
true, curandStatePhilox4_32_10_t>;
static constexpr uint16_t LOW_QUALITY_DEPTH = 10;
using LowQuality = CameraParams<dim3(8, 8), 32, 8, 5, true, curandState_t>;
using LowQuality = CameraParams<dim3(8, 8), 64, 4, LOW_QUALITY_DEPTH, true,
curandStatePhilox4_32_10_t>;
// NOLINTEND(cppcoreguidelines-avoid-magic-numbers, readability-magic-numbers)

class CameraInterface {
Expand Down
114 changes: 88 additions & 26 deletions include/cuda_path_tracer/camera.inl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ template <typename State, uint16_t Depth>
__device__ auto getColor(const Ray &ray,
const cuda::std::span<const Shape> shapes,
State &state, const Color background) -> Color {
// TODO(eduard): make use of shared memory
// TODO(eduard): talk in the report that the bounces of the rays create
// inherent control divergence
Vec3 throughput{1.0F};
Expand Down Expand Up @@ -126,6 +125,74 @@ __device__ auto getColor(const Ray &ray,
return {color};
}

template <typename State, uint16_t Depth>
__device__ auto
get4Colors(const std::tuple<Ray, Ray, Ray, Ray> &rays,
const cuda::std::span<const Shape> shapes, State &state,
const Color background) -> std::tuple<Color, Color, Color, Color> {

cuda::std::array<Vec3, 4> throughput = {Vec3{1.0F}, Vec3{1.0F}, Vec3{1.0F},
Vec3{1.0F}};
cuda::std::array<Vec3, 4> colors = {Vec3{0.0F}, Vec3{0.0F}, Vec3{0.0F},
Vec3{0.0F}};
cuda::std::array<Ray, 4> currents = {std::get<0>(rays), std::get<1>(rays),
std::get<2>(rays), std::get<3>(rays)};
cuda::std::array<bool, 4> active = {true, true, true, true};
int active_count = 4;

for (int d = 0; d < Depth && active_count > 0; d++) {
cuda::std::array<HitInfo, 4> hits;

#pragma unroll
for (int r = 0; r < 4; r++) {
if (!active[r]) {
continue;
}

const bool hit = hitShapes(currents[r], shapes, hits[r]);

if (!hit) {
colors[r] += throughput[r] * background;
active[r] = false;
active_count--;
continue;
}

const auto emitted = getEmittedColor<State, Depth>(hits[r]);
colors[r] += throughput[r] * emitted;

Ray scattered;
Color attenuation;
const bool scatter = tryScatter<State>(currents[r], hits[r], attenuation,
scattered, state);

if (!scatter) {
active[r] = false;
active_count--;
continue;
}

throughput[r] *= attenuation;
currents[r] = scattered;

// Russian roulette
if (d > MIN_DEPTH) {
const float p =
std::max({throughput[r].x, throughput[r].y, throughput[r].z});
if (curand_uniform(&state) > p) {
active[r] = false;
active_count--;
continue;
}
throughput[r] /= p;
}
}
}

return {Color{colors[0]}, Color{colors[1]}, Color{colors[2]},
Color{colors[3]}};
}

/**
* @brief Kernel for rendering the image, works by calculating the pixel index
* in the image, computing the Ray that goes from the camera's origin to the
Expand Down Expand Up @@ -166,6 +233,7 @@ renderImage(const uint16_t width, const uint16_t height,

curand_init(SEED, index + (stream_index * width * height), 0, &states);

constexpr auto SAMPLE_SCALE = 1.0F / static_cast<float>(NumSamples);
auto color = Vec3{};

// Evaluates at compile time the type of the state
Expand All @@ -177,21 +245,20 @@ renderImage(const uint16_t width, const uint16_t height,
const auto ray = get4Rays(origin, pixel00, deltaU, deltaV, defocusDiskU,
defocusDiskV, defocusAngle, x, y, states);

color += getColor<State, Depth>(cuda::std::get<0>(ray), shapes, states,
background);
color += getColor<State, Depth>(cuda::std::get<1>(ray), shapes, states,
background);
color += getColor<State, Depth>(cuda::std::get<2>(ray), shapes, states,
background);
color += getColor<State, Depth>(cuda::std::get<3>(ray), shapes, states,
background);
const auto colors =
get4Colors<State, Depth>(ray, shapes, states, background);

color += (cuda::std::get<0>(colors) + cuda::std::get<1>(colors) +
cuda::std::get<2>(colors) + cuda::std::get<3>(colors)) *
SAMPLE_SCALE;
}
} else {
for (auto s = 0; s < NumSamples; s++) {
const auto ray = getRay(origin, pixel00, deltaU, deltaV, defocusDiskU,
defocusDiskV, defocusAngle, x, y, states);

color += getColor<State, Depth>(ray, shapes, states, background);
color += getColor<State, Depth>(ray, shapes, states, background) *
SAMPLE_SCALE;
}
}

Expand All @@ -200,7 +267,7 @@ renderImage(const uint16_t width, const uint16_t height,

template <uint16_t NumImages>
__global__ void averagePixels(const uint16_t width, const uint16_t height,
const uint16_t padded_width, const float scale,
const uint16_t padded_width,
const cuda::std::span<Vec3> images,
cuda::std::span<uchar4> image_out) {
const auto x = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -213,12 +280,13 @@ __global__ void averagePixels(const uint16_t width, const uint16_t height,
const auto output_idx = y * width + x;
const auto padded_idx = y * padded_width + x;

constexpr float IMAGE_SCALE = 1.0F / static_cast<float>(NumImages);
auto sum = Vec3{};
for (auto img = 0; img < NumImages; img++) {
sum += images[img * (padded_width * height) + padded_idx];
}

image_out[output_idx] = Color(sum * scale).correctGamma().to8Bit();
image_out[output_idx] = Color(sum * IMAGE_SCALE).correctGamma().to8Bit();
}

// Align to prevent control divergence
Expand Down Expand Up @@ -280,8 +348,6 @@ Camera<Params>::render(const std::shared_ptr<Scene> &scene,
const auto padded_height = getPaddedSize(height);
const auto num_padded_pixels = padded_width * padded_height;

constexpr float render_scale = 1.0F / (NUM_IMAGES * NUM_SAMPLES);

assert(image.size() == static_cast<size_t>(width * height) &&
("Image size does not match the scene's width and height. Actual: " +
std::to_string(image.size()) +
Expand All @@ -304,7 +370,7 @@ Camera<Params>::render(const std::shared_ptr<Scene> &scene,
// Calling cudaGetLastError() here to clear any previous errors
CUDA_ERROR_CHECK(cudaGetLastError());
for (auto i = 0; i < NUM_IMAGES; i++) {
renderImage<State, NUM_IMAGES, DEPTH>
renderImage<State, NUM_SAMPLES, DEPTH>
<<<grid, BLOCK_SIZE, 0, streams.at(i)>>>(
padded_width, padded_height,
image_3d_span.subspan(i * num_padded_pixels), origin, pixel00,
Expand All @@ -322,7 +388,6 @@ __host__ void Camera<Params>::averageRenderedImages(
thrust::universal_host_pinned_vector<uchar4> &output,
const cuda::std::span<Vec3> &images, const uint16_t width,
const uint16_t height, const uint16_t padded_width) {
constexpr float render_scale = 1.0F / (NUM_IMAGES * NUM_SAMPLES);

if constexpr (AVG_WITH_THRUST) {
const auto num_pixels = output.size();
Expand All @@ -335,10 +400,11 @@ __host__ void Camera<Params>::averageRenderedImages(
const auto padded_idx = row * padded_width + col;

Vec3 sum{};
constexpr float IMAGE_SCALE = 1.0F / static_cast<float>(NUM_IMAGES);
for (int img = 0; img < NUM_IMAGES; img++) {
sum += images[img * (padded_width * height) + padded_idx];
}
return Color(sum * render_scale).correctGamma().to8Bit();
return Color(sum * IMAGE_SCALE).correctGamma().to8Bit();
});
} else {
const dim3 grid(std::ceil(width / BLOCK_SIZE.x),
Expand All @@ -347,8 +413,8 @@ __host__ void Camera<Params>::averageRenderedImages(
cuda::std::span<uchar4> output_span{thrust::raw_pointer_cast(output.data()),
output.size()};

averagePixels<NUM_IMAGES><<<grid, BLOCK_SIZE>>>(
width, height, padded_width, render_scale, images, output_span);
averagePixels<NUM_IMAGES><<<grid, BLOCK_SIZE>>>(width, height, padded_width,
images, output_span);
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
CUDA_ERROR_CHECK(cudaGetLastError());
}
Expand Down Expand Up @@ -395,24 +461,20 @@ __host__ auto CameraBuilder<Params>::defocusAngle(const float defocusAngle)
}

template <typename Params>
__host__ auto
CameraBuilder<Params>::focusDistance(const float focusDistance)
__host__ auto CameraBuilder<Params>::focusDistance(const float focusDistance)
-> CameraBuilder<Params> & {
this->camera.focusDistance = focusDistance;
return *this;
}

template <typename Params>
__host__ auto
CameraBuilder<Params>::background(const Color &background)
__host__ auto CameraBuilder<Params>::background(const Color &background)
-> CameraBuilder<Params> & {
this->camera.background = background;
return *this;
}

template <typename Params>
__host__ auto
CameraBuilder<Params>::build() -> Camera<Params> {
__host__ auto CameraBuilder<Params>::build() -> Camera<Params> {
return this->camera;
}

Loading

0 comments on commit 491baae

Please sign in to comment.