Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nonbonded kernel #194

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added src/0�@�
Empty file.
852 changes: 799 additions & 53 deletions src/cuda.jl

Large diffs are not rendered by default.

15 changes: 4 additions & 11 deletions src/energy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -247,28 +247,21 @@ function specific_pe(atoms, coords, velocities, boundary, energy_units, sils_1_a
return pe
end

function potential_energy(sys::System{D, true, T}, neighbors, step_n::Integer=0;
function potential_energy(sys::System{D, true, T}, neighbors, step_n::Integer=0,
buffers=init_forces_buffer(ustrip_vec.(zero(sys.coords)), sys.coords, 1);
n_threads::Integer=Threads.nthreads()) where {D, T}
pe_vec_nounits = CUDA.zeros(T, 1)
val_ft = Val(T)

pairwise_inters_nonl = filter(!use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nonl) > 0
nbs = NoNeighborList(length(sys))
pairwise_pe_gpu!(pe_vec_nounits, sys.coords, sys.velocities, sys.atoms, sys.boundary,
pairwise_inters_nonl, nbs, step_n, sys.energy_units, val_ft)
pairwise_pe_gpu!(pe_vec_nounits, buffers, sys, pairwise_inters_nonl, nbs, step_n)
end

pairwise_inters_nl = filter(use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nl) > 0
if isnothing(neighbors)
error("an interaction uses the neighbor list but neighbors is nothing")
end
if length(neighbors) > 0
nbs = @view neighbors.list[1:neighbors.n]
pairwise_pe_gpu!(pe_vec_nounits, sys.coords, sys.velocities, sys.atoms, sys.boundary,
pairwise_inters_nl, nbs, step_n, sys.energy_units, val_ft)
end
pairwise_pe_gpu!(pe_vec_nounits, buffers, sys, pairwise_inters_nl, nothing, step_n)
end

for inter_list in values(sys.specific_inter_lists)
Expand Down
46 changes: 27 additions & 19 deletions src/force.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,29 @@ Base.:+(x::SpecificForce2Atoms, y::SpecificForce2Atoms) = SpecificForce2Atoms(x.
Base.:+(x::SpecificForce3Atoms, y::SpecificForce3Atoms) = SpecificForce3Atoms(x.f1 + y.f1, x.f2 + y.f2, x.f3 + y.f3)
Base.:+(x::SpecificForce4Atoms, y::SpecificForce4Atoms) = SpecificForce4Atoms(x.f1 + y.f1, x.f2 + y.f2, x.f3 + y.f3, x.f4 + y.f4)

function init_forces_buffer(forces_nounits, n_threads)
function init_forces_buffer(forces_nounits, ::AbstractArray{SVector{D, C}}, n_threads) where {D, C}
if n_threads == 1
return nothing
else
return [similar(forces_nounits) for _ in 1:n_threads]
end
end

function init_forces_buffer(forces_nounits::CuArray{SVector{D, T}}, n_threads) where {D, T}
return CUDA.zeros(T, D, length(forces_nounits))
struct ForcesBuffer{F, C}
fs_mat::F
box_mins::C
box_maxs::C
Morton_seq::CuArray{Int32}
Peppone98 marked this conversation as resolved.
Show resolved Hide resolved
end

function init_forces_buffer(forces_nounits::CuArray{SVector{D, T}}, ::AbstractArray{SVector{D, C}}, n_threads) where {D, T, C}
N = length(forces_nounits)
n_blocks = cld(N, 32)
fs_mat = CUDA.zeros(T, D, N)
box_mins = CUDA.zeros(C, n_blocks, D)
box_maxs = CUDA.zeros(C, n_blocks, D)
Morton_seq = CUDA.zeros(Int32, N)
return ForcesBuffer(fs_mat, box_mins, box_maxs, Morton_seq)
end

"""
Expand All @@ -139,7 +152,7 @@ end

function forces(sys, neighbors, step_n::Integer=0; n_threads::Integer=Threads.nthreads())
forces_nounits = ustrip_vec.(zero(sys.coords))
forces_buffer = init_forces_buffer(forces_nounits, n_threads)
forces_buffer = init_forces_buffer(forces_nounits, sys.coords, n_threads)
forces_nounits!(forces_nounits, sys, neighbors, forces_buffer, step_n; n_threads=n_threads)
return forces_nounits .* sys.force_units
end
Expand Down Expand Up @@ -347,36 +360,29 @@ function specific_forces!(fs_nounits, atoms, coords, velocities, boundary, force
end

function forces_nounits!(fs_nounits, sys::System{D, true, T}, neighbors,
fs_mat=CUDA.zeros(T, D, length(sys)), step_n::Integer=0;
buffers, step_n::Integer=0;
n_threads::Integer=Threads.nthreads()) where {D, T}
fill!(fs_mat, zero(T))
fill!(buffers.fs_mat, zero(T))
val_ft = Val(T)

pairwise_inters_nonl = filter(!use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nonl) > 0
nbs = NoNeighborList(length(sys))
pairwise_force_gpu!(fs_mat, sys.coords, sys.velocities, sys.atoms, sys.boundary, pairwise_inters_nonl,
nbs, step_n, sys.force_units, val_ft)
n = length(sys)
nbs = NoNeighborList(n)
pairwise_force_gpu!(buffers, sys, pairwise_inters_nonl, nbs, step_n)
end

pairwise_inters_nl = filter(use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nl) > 0
if isnothing(neighbors)
error("an interaction uses the neighbor list but neighbors is nothing")
end
if length(neighbors) > 0
nbs = @view neighbors.list[1:neighbors.n]
pairwise_force_gpu!(fs_mat, sys.coords, sys.velocities, sys.atoms, sys.boundary, pairwise_inters_nl,
nbs, step_n, sys.force_units, val_ft)
end
pairwise_force_gpu!(buffers, sys, pairwise_inters_nl, nothing, step_n)
end

for inter_list in values(sys.specific_inter_lists)
specific_force_gpu!(fs_mat, inter_list, sys.coords, sys.velocities, sys.atoms,
specific_force_gpu!(buffers.fs_mat, inter_list, sys.coords, sys.velocities, sys.atoms,
sys.boundary, step_n, sys.force_units, val_ft)
end

fs_nounits .= reinterpret(SVector{D, T}, vec(fs_mat))
fs_nounits .= reinterpret(SVector{D, T}, vec(buffers.fs_mat))

for inter in values(sys.general_inters)
fs_gen = AtomsCalculators.forces(sys, inter; neighbors=neighbors, step_n=step_n,
Expand All @@ -387,3 +393,5 @@ function forces_nounits!(fs_nounits, sys::System{D, true, T}, neighbors,

return fs_nounits
end


33 changes: 32 additions & 1 deletion src/neighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ export
find_neighbors,
DistanceNeighborFinder,
TreeNeighborFinder,
CellListMapNeighborFinder
CellListMapNeighborFinder,
GPUNeighborFinder

"""
use_neighbors(inter)
Expand Down Expand Up @@ -64,6 +65,29 @@ function DistanceNeighborFinder(;
eligible, dist_cutoff, special, n_steps, zero(eligible))
end

mutable struct GPUNeighborFinder{B, D}
Peppone98 marked this conversation as resolved.
Show resolved Hide resolved
eligible::B
dist_cutoff::D
special::B
n_steps_reorder::Int
initialized::Bool
end

function GPUNeighborFinder(;
eligible,
dist_cutoff,
special=zero(eligible),
n_steps_reorder=10,
initialized=false)
return GPUNeighborFinder{typeof(eligible), typeof(dist_cutoff)}(
eligible, dist_cutoff, special, n_steps_reorder, initialized)
end

find_neighbors(sys::System{D, true}, nf::GPUNeighborFinder, current_neighbors=nothing, step_n::Integer=0, initialized::Bool=false; kwargs...) where D = nothing

find_neighbors(sys::System, nf::GPUNeighborFinder, args...; kwargs...) = nothing


function find_neighbors(sys::System{D, false},
nf::DistanceNeighborFinder,
current_neighbors=nothing,
Expand Down Expand Up @@ -365,3 +389,10 @@ function Base.show(io::IO, neighbor_finder::Union{DistanceNeighborFinder,
println(io, " n_steps = " , neighbor_finder.n_steps)
print( io, " dist_cutoff = ", neighbor_finder.dist_cutoff)
end

function Base.show(io::IO, neighbor_finder::Union{GPUNeighborFinder})
println(io, typeof(neighbor_finder))
println(io, " Size of eligible matrix = " , size(neighbor_finder.eligible))
println(io, " n_steps_reorder = " , neighbor_finder.n_steps_reorder)
print( io, " dist_cutoff = ", neighbor_finder.dist_cutoff)
end
40 changes: 28 additions & 12 deletions src/setup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -887,14 +887,15 @@ function System(coord_file::AbstractString,
end
coords = wrap_coords.(coords, (boundary_used,))

if gpu || !use_cell_list
neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(eligible) : eligible),
special=(gpu ? CuArray(special) : special),
n_steps=10,
if gpu
neighbor_finder = GPUNeighborFinder(
eligible=CuArray(eligible),
dist_cutoff=T(dist_neighbors),
special=CuArray(special),
n_steps_reorder=10,
initialized=false,
)
else
elseif use_cell_list
neighbor_finder = CellListMapNeighborFinder(
eligible=eligible,
special=special,
Expand All @@ -903,6 +904,13 @@ function System(coord_file::AbstractString,
unit_cell=boundary_used,
dist_cutoff=T(dist_neighbors),
)
else
neighbor_finder = DistanceNeighborFinder(
eligible=eligible,
special=special,
n_steps=10,
dist_cutoff=T(dist_neighbors),
)
end
if gpu
atoms = CuArray([atoms_abst...])
Expand Down Expand Up @@ -1276,14 +1284,15 @@ function System(T::Type,
end
specific_inter_lists = tuple(specific_inter_array...)

if gpu || !use_cell_list
neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(eligible) : eligible),
special=(gpu ? CuArray(special) : special),
n_steps=10,
if gpu
neighbor_finder = GPUNeighborFinder(
eligible=CuArray(eligible),
dist_cutoff=T(dist_neighbors),
special=CuArray(special),
n_steps_reorder=10,
initialized=false,
)
else
elseif use_cell_list
neighbor_finder = CellListMapNeighborFinder(
eligible=eligible,
special=special,
Expand All @@ -1292,6 +1301,13 @@ function System(T::Type,
unit_cell=boundary_used,
dist_cutoff=T(dist_neighbors),
)
else
neighbor_finder = DistanceNeighborFinder(
eligible=eligible,
special=special,
n_steps=10,
dist_cutoff=T(dist_neighbors),
)
end
if gpu
atoms = CuArray([atoms_abst...])
Expand Down
37 changes: 27 additions & 10 deletions src/simulators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ Custom simulators should implement this function.
coords_copy = similar(sys.coords)
F_nounits = ustrip_vec.(similar(sys.coords))
F = F_nounits .* sys.force_units
forces_buffer = init_forces_buffer(F_nounits, n_threads)
forces_buffer = init_forces_buffer(F_nounits, sys.coords, n_threads)
F_nounits = forces_nounits!(F_nounits, sys, neighbors, forces_buffer, 0;
Peppone98 marked this conversation as resolved.
Show resolved Hide resolved
n_threads=n_threads)


for step_n in 1:sim.max_steps
F_nounits .= forces_nounits!(F_nounits, sys, neighbors, forces_buffer, step_n;
Expand Down Expand Up @@ -141,11 +144,13 @@ end
sys.coords .= wrap_coords.(sys.coords, (sys.boundary,))
!iszero(sim.remove_CM_motion) && remove_CM_motion!(sys)
neighbors = find_neighbors(sys, sys.neighbor_finder; n_threads=n_threads)
forces_t = forces(sys, neighbors; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(zero(sys.coords))
forces_buffer = init_forces_buffer(forces_nounits_t, sys.coords, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0; n_threads=n_threads)
forces_t = forces_nounits_t .* sys.force_units
accels_t = forces_t ./ masses(sys)
forces_nounits_t_dt = ustrip_vec.(similar(sys.coords))
forces_t_dt = forces_nounits_t_dt .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t_dt, n_threads)
accels_t_dt = zero(accels_t)
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads, current_forces=forces_t)
using_constraints = length(sys.constraints) > 0
Expand Down Expand Up @@ -231,7 +236,9 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer(forces_nounits_t, sys.coords, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0;
Peppone98 marked this conversation as resolved.
Show resolved Hide resolved
n_threads=n_threads)
accels_t = forces_t ./ masses(sys)
using_constraints = length(sys.constraints) > 0
if using_constraints
Expand Down Expand Up @@ -306,7 +313,9 @@ StormerVerlet(; dt, coupling=NoCoupling()) = StormerVerlet(dt, coupling)
coords_last, coords_copy = similar(sys.coords), similar(sys.coords)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer(forces_nounits_t, sys.coords, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0;
n_threads=n_threads)
accels_t = forces_t ./ masses(sys)
using_constraints = length(sys.constraints) > 0

Expand Down Expand Up @@ -389,7 +398,9 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer(forces_nounits_t, sys.coords, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0;
n_threads=n_threads)
accels_t = forces_t ./ masses(sys)
noise = similar(sys.velocities)
using_constraints = length(sys.constraints) > 0
Expand Down Expand Up @@ -497,7 +508,9 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer(forces_nounits_t, sys.coords, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0;
n_threads=n_threads)
accels_t = forces_t ./ masses(sys)
noise = similar(sys.velocities)

Expand Down Expand Up @@ -617,7 +630,9 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer(forces_nounits_t, sys.coords, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0;
n_threads=n_threads)
accels_t = forces_t ./ masses(sys)
noise = similar(sys.velocities)

Expand Down Expand Up @@ -685,11 +700,13 @@ end
sys.coords .= wrap_coords.(sys.coords, (sys.boundary,))
!iszero(sim.remove_CM_motion) && remove_CM_motion!(sys)
neighbors = find_neighbors(sys, sys.neighbor_finder; n_threads=n_threads)
forces_t = forces(sys, neighbors; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(zero(sys.coords))
forces_buffer = init_forces_buffer(forces_nounits_t, sys.coords, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0; n_threads=n_threads)
forces_t = forces_nounits_t .* sys.force_units
accels_t = forces_t ./ masses(sys)
forces_nounits_t_dt = ustrip_vec.(similar(sys.coords))
forces_t_dt = forces_nounits_t_dt .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t_dt, n_threads)
accels_t_dt = zero(accels_t)
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads, current_forces=forces_t)
v_half = zero(sys.velocities)
Expand Down
15 changes: 11 additions & 4 deletions test/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,17 @@
# Mark all pairs as ineligible for pairwise interactions and check that the
# potential energy from the specific interactions does not change on scaling
no_nbs = falses(length(sys), length(sys))
sys.neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(no_nbs) : no_nbs),
dist_cutoff=1.0u"nm",
)
if gpu
sys.neighbor_finder = GPUNeighborFinder(
eligible=(gpu ? CuArray(no_nbs) : no_nbs),
dist_cutoff=1.0u"nm",
)
else
sys.neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(no_nbs) : no_nbs),
dist_cutoff=1.0u"nm",
)
end
coords_start = copy(sys.coords)
pe_start = potential_energy(sys, find_neighbors(sys))
scale_factor = 1.02
Expand Down
Loading
Loading