Skip to content

Commit

Permalink
FIX proper dimensions grid(height,depth),block(width)
Browse files Browse the repository at this point in the history
  • Loading branch information
plattenschieber committed Mar 17, 2015
1 parent e0c1bf4 commit 324a78f
Showing 1 changed file with 21 additions and 21 deletions.
42 changes: 21 additions & 21 deletions hscgv_uebung_4/lbm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,35 +38,35 @@ __constant__ float d_omega;
size_t __device__ index(int i, int j, int k) {

if(PeriodicBoundaries) {
i = (i+gridDim.x)%gridDim.x;
j = (j+blockDim.x)%blockDim.x;
k = (k+blockDim.y)%blockDim.y;
i = (i+blockDim.x)%blockDim.x;
j = (j+gridDim.x)%gridDim.x;
k = (k+gridDim.y)%gridDim.y;
}
return i + blockDim.x*(j + blockDim.y*size_t(k));
return i + blockDim.x*(j + gridDim.x*size_t(k));
}

size_t __device__ index(int i, int j, int k, int l) {
if(PeriodicBoundaries) {
i = (i+gridDim.x)%gridDim.x;
j = (j+blockDim.x)%blockDim.x;
k = (k+blockDim.y)%blockDim.y;
i = (i+blockDim.x)%blockDim.x;
j = (j+gridDim.x)%gridDim.x;
k = (k+gridDim.y)%gridDim.y;
}
#ifdef INNER_INDEX_DISTRIBUTION
return l + Q*(i + blockDim.x*(j + blockDim.y*size_t(k)));
return l + Q*(i + blockDim.x*(j + gridDim.x*size_t(k)));
#else
return i + blockDim.x*(j + blockDim.y*(size_t(k) + gridDim.x*l));
return i + blockDim.x*(j + gridDim.x*(size_t(k) + gridDim.y*l));
#endif
}

__global__ void collideCudaKernel(float *d_cellsCur, char *d_flags, float3 *d_velocity) {
// get the current thread position
int i = threadIdx.x;
int j = threadIdx.y;
int k = blockIdx.x;
int j = blockIdx.x;
int k = blockIdx.y;

// in case we have no periodic boundaries, the threads on the edges don't have anything to do
if (!PeriodicBoundaries) {
if (i==0 || i==blockDim.x-1 || j==0 || j==blockDim.y-1 || k==0 || k==gridDim.x-1)
if (i==0 || i==blockDim.x-1 || j==0 || j==gridDim.x-1 || k==0 || k==gridDim.y-1)
return;
}

Expand Down Expand Up @@ -109,12 +109,12 @@ __global__ void collideCudaKernel(float *d_cellsCur, char *d_flags, float3 *d_ve
__global__ void streamCudaKernel(float *d_cellsCur, float *d_cellsLast, char *d_flags) {
// get the current thread position
int i = threadIdx.x;
int j = threadIdx.y;
int k = blockIdx.x;
int j = blockIdx.x;
int k = blockIdx.y;

// in case we have no periodic boundaries, the threads on the edges don't have anything to do
if (!PeriodicBoundaries) {
if (i==0 || i==blockDim.x-1 || j==0 || j==blockDim.y-1 || k==0 || k==gridDim.x-1)
if (i==0 || i==blockDim.x-1 || j==0 || j==gridDim.x-1 || k==0 || k==gridDim.y-1)
return;
}

Expand All @@ -137,8 +137,8 @@ __global__ void streamCudaKernel(float *d_cellsCur, float *d_cellsLast, char *d_
__global__ void analyzeCudaKernel(float *d_cellsCur, char *d_flags, float *d_density, float3 *d_u, float3 *d_velocity) {
// get the current thread position
int i = threadIdx.x;
int j = threadIdx.y;
int k = blockIdx.x;
int j = blockIdx.x;
int k = blockIdx.y;

// compute density and velocity in cell
float density = 0.f;
Expand Down Expand Up @@ -182,25 +182,25 @@ void LBMD3Q19::initializeCuda() {

//! collide implementation with CUDA
void LBMD3Q19::collideCuda() {
collideCudaKernel<<<dim3(m_width),dim3(m_height,m_depth)>>>(d_cells[m_current], d_flags, d_velocity);
collideCudaKernel<<<dim3(m_height, m_depth),dim3(m_width)>>>(d_cells[m_current], d_flags, d_velocity);
}

//! streaming with CUDA
void LBMD3Q19::streamCuda() {
streamCudaKernel<<<dim3(m_width),dim3(m_height,m_depth)>>>(d_cells[m_current], d_cells[!m_current], d_flags);
streamCudaKernel<<<dim3(m_height, m_depth),dim3(m_width)>>>(d_cells[m_current], d_cells[!m_current], d_flags);
}

//! compute densities and velocities with CUDA
void LBMD3Q19::analyzeCuda() {
analyzeCudaKernel<<<dim3(m_width),dim3(m_height,m_depth)>>>(d_cells[m_current], d_flags, d_density, d_u, d_velocity);
analyzeCudaKernel<<<dim3(m_height, m_depth),dim3(m_width)>>>(d_cells[m_current], d_flags, d_density, d_u, d_velocity);
// we need to copy back the analyzed data to the host
gpuErrchk (cudaMemcpy(m_u, d_u, sizeof(float3) * m_width * m_height * m_depth, cudaMemcpyDeviceToHost));
gpuErrchk (cudaMemcpy(m_density, d_density, sizeof(float) * m_width * m_height * m_depth, cudaMemcpyDeviceToHost));
}

//! compute minimum and maximum density and velocity with CUDA
void LBMD3Q19::minMaxCuda() {
minMaxCudaKernel<<<dim3(m_width),dim3(m_height,m_depth)>>>();
minMaxCudaKernel<<<dim3(m_height, m_depth),dim3(m_width)>>>();
}

//! very dumb function that copies cells back to host
Expand Down

0 comments on commit 324a78f

Please sign in to comment.