From 78046b12910e3afc5f5ec84dc76f1ddd78ccf8fc Mon Sep 17 00:00:00 2001
From: albert-de-montserrat <albert.de.montserrat@gmail.com>
Date: Tue, 10 Oct 2023 14:09:27 +0200
Subject: [PATCH 1/3] fix typos

---
 src/JustPIC.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/JustPIC.jl b/src/JustPIC.jl
index b870d1f..73abd0d 100644
--- a/src/JustPIC.jl
+++ b/src/JustPIC.jl
@@ -40,8 +40,8 @@ const backend = @load_preference("backend", "Threads_Float64_2D")
 
 const TA = if occursin("CUDA", backend) 
     JustPIC.CUDA.CuArray 
-elseif occursin("AMGPU", backend) 
-    JustPIC.ADMGPU.ROCArray
+elseif occursin("AMDGPU", backend) 
+    JustPIC.AMDGPU.ROCArray
 else
     Array
 end

From ac73811da12c16f3cf98db872c16e1c81367d218 Mon Sep 17 00:00:00 2001
From: albert-de-montserrat <albert.de.montserrat@gmail.com>
Date: Tue, 10 Oct 2023 14:10:17 +0200
Subject: [PATCH 2/3] replace mapreduce by generated function

---
 src/Particles/advection.jl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Particles/advection.jl b/src/Particles/advection.jl
index c8032ed..deef57f 100644
--- a/src/Particles/advection.jl
+++ b/src/Particles/advection.jl
@@ -271,8 +271,11 @@ function inner_limits(grid::NTuple{N,T}) where {N,T}
     end
 end
 
-@inline function check_local_limits(
+@generated function check_local_limits(
     local_lims::NTuple{N,T1}, p::NTuple{N,T2}
 ) where {N,T1,T2}
-    return mapreduce(x -> x[1][1] < x[2] < x[1][2], &, zip(local_lims, p))
+    quote
+        Base.@nexprs $N i -> !(local_lims[i][1] < p[i] < local_lims[i][2]) && return false
+        return true
+    end
 end

From a0a76b3dcd8acfa30dd7a6fa8a439495b93c14dc Mon Sep 17 00:00:00 2001
From: albert-de-montserrat <albert.de.montserrat@gmail.com>
Date: Tue, 10 Oct 2023 14:10:30 +0200
Subject: [PATCH 3/3] AMDGPU script

---
 scripts/temperature_advection_AMDGPU.jl | 98 +++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 scripts/temperature_advection_AMDGPU.jl

diff --git a/scripts/temperature_advection_AMDGPU.jl b/scripts/temperature_advection_AMDGPU.jl
new file mode 100644
index 0000000..656af58
--- /dev/null
+++ b/scripts/temperature_advection_AMDGPU.jl
@@ -0,0 +1,98 @@
+using JustPIC
+
+# set_backend("AMDGPU_Float64_2D") # need to restart session if this changes
+
+using CellArrays
+using ParallelStencil
+using CairoMakie
+@init_parallel_stencil(AMDGPU, Float64, 2)
+
+function init_particles(nxcell, max_xcell, min_xcell, x, y, dx, dy, nx, ny)
+    ni = nx, ny
+    ncells = nx * ny
+    np = max_xcell * ncells
+    px, py = ntuple(_ -> @rand(ni..., celldims=(max_xcell,)) , Val(2))
+
+    inject = @fill(false, nx, ny, eltype=Bool)
+    index = @fill(false, ni..., celldims=(max_xcell,), eltype=Bool) 
+    
+    @parallel_indices (i, j) function fill_coords_index(px, py, index, x, y, dx, dy, nxcell)
+        # lower-left corner of the cell
+        x0, y0 = x[i], y[j]
+        # fill index array
+        for l in 1:nxcell
+            @cell px[l, i, j] = x0 + dx * (@cell(px[l, i, j]) * 0.9 + 0.05)
+            @cell py[l, i, j] = y0 + dy * (@cell(py[l, i, j]) * 0.9 + 0.05)
+            @cell index[l, i, j] = true
+        end
+        return nothing
+    end
+
+    @parallel (1:nx, 1:ny) fill_coords_index(px, py, index, x, y, dx, dy, nxcell) 
+
+    return Particles(
+        (px, py), index, inject, nxcell, max_xcell, min_xcell, np, (nx, ny)
+    )
+end
+
+function expand_range(x::AbstractRange)
+    dx = x[2] - x[1]
+    n = length(x)
+    x1, x2 = extrema(x)
+    xI = round(x1-dx; sigdigits=5)
+    xF = round(x2+dx; sigdigits=5)
+    range(xI, xF, length=n+2)
+end
+
+# Analytical flow solution
+vx_stream(x, y) =  250 * sin(π*x) * cos(π*y)
+vy_stream(x, y) = -250 * cos(π*x) * sin(π*y)
+g(x) = Point2f(
+    vx_stream(x[1], x[2]),
+    vy_stream(x[1], x[2])
+)
+
+function main()
+    # Initialize particles -------------------------------
+    nxcell, max_xcell, min_xcell = 24, 24, 24
+    n = 256
+    nx = ny = n-1
+    Lx = Ly = 1.0
+    # nodal vertices
+    xvi = xv, yv = range(0, Lx, length=n), range(0, Ly, length=n)
+    dxi = dx, dy = xv[2] - xv[1], yv[2] - yv[1]
+    # nodal centers
+    xc, yc = range(0+dx/2, Lx-dx/2, length=n-1), range(0+dy/2, Ly-dy/2, length=n-1)
+    # staggered grid velocity nodal locations
+    grid_vx = xv, expand_range(yc)
+    grid_vy = expand_range(xc), yv
+
+    particles = init_particles(
+        nxcell, max_xcell, min_xcell, xvi..., dxi..., nx, ny
+    )
+
+    # Cell fields -------------------------------
+    Vx = TA([vx_stream(x, y) for x in grid_vx[1], y in grid_vx[2]])
+    Vy = TA([vy_stream(x, y) for x in grid_vy[1], y in grid_vy[2]])
+    T  = TA([y for x in xv, y in yv])
+    V  = Vx, Vy
+
+    dt = min(dx / maximum(abs.(Vx)),  dy / maximum(abs.(Vy)))
+
+    # Advection test
+    particle_args = pT, = init_cell_arrays(particles, Val(1))
+    grid2particle!(pT, xvi, T, particles.coords)
+    
+    niter = 150
+    for _ in 1:niter
+        advection_RK!(particles, V, grid_vx, grid_vy, dt, 2 / 3)
+        shuffle_particles!(particles, xvi, particle_args)
+        particle2grid!(T, pT, xvi, particles.coords)
+    end
+
+    f, ax, = heatmap(xvi..., Array(T), colormap=:batlow)
+    streamplot!(ax, g, xvi...)
+    f
+end
+
+f = main()