From 49c590a511f61128ddc0149ee8b48f5615f0268a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 14 Feb 2014 17:04:08 -0500 Subject: [PATCH] new DFT api --- src/dct.jl | 103 +++++++ src/fft.jl | 781 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 884 insertions(+) create mode 100644 src/dct.jl create mode 100644 src/fft.jl diff --git a/src/dct.jl b/src/dct.jl new file mode 100644 index 0000000..53c8f6b --- /dev/null +++ b/src/dct.jl @@ -0,0 +1,103 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license + +# (This is part of the FFTW module.) + +export dct, idct, dct!, idct!, plan_dct, plan_idct, plan_dct!, plan_idct! + +# Discrete cosine transforms (type II/III) via FFTW's r2r transforms; +# we follow the Matlab convention and adopt a unitary normalization here. +# Unlike Matlab we compute the multidimensional transform by default, +# similar to the Julia fft functions. + +type DCTPlan{T<:fftwNumber,K,inplace} <: Plan{T} + plan::r2rFFTWPlan{T} + r::Array{UnitRange{Int}} # array of indices for rescaling + nrm::Float64 # normalization factor + region::Dims # dimensions being transformed + pinv::DCTPlan{T} + DCTPlan(plan,r,nrm,region) = new(plan,r,nrm,region) +end + +size(p::DCTPlan) = size(p.plan) + +function show{T,K,inplace}(io::IO, p::DCTPlan{T,K,inplace}) + print(io, inplace ? "FFTW in-place " : "FFTW ", + K == REDFT10 ? "DCT (DCT-II)" : "IDCT (DCT-III)", " plan for ") + showfftdims(io, p.plan.sz, p.plan.istride, eltype(p)) +end + +for (pf, pfr, K, inplace) in ((:plan_dct, :plan_r2r, REDFT10, false), + (:plan_dct!, :plan_r2r!, REDFT10, true), + (:plan_idct, :plan_r2r, REDFT01, false), + (:plan_idct!, :plan_r2r!, REDFT01, true)) + @eval function $pf{T<:fftwNumber}(X::StridedArray{T}, region; kws...) + r = [1:n for n in size(X)] + nrm = sqrt(0.5^length(region) * normalization(X,region)) + DCTPlan{T,$K,$inplace}($pfr(X, $K, region; kws...), r, nrm, + ntuple(i -> Int(region[i]), length(region))) + end +end + +function plan_inv{T,K,inplace}(p::DCTPlan{T,K,inplace}) + X = Array(T, p.plan.sz) + iK = inv_kind[K] + DCTPlan{T,iK,inplace}(inplace ? + plan_r2r!(X, iK, p.region, flags=p.plan.flags) : + plan_r2r(X, iK, p.region, flags=p.plan.flags), + p.r, p.nrm, p.region) +end + +for f in (:dct, :dct!, :idct, :idct!) + pf = symbol(string("plan_", f)) + @eval begin + $f{T<:fftwNumber}(x::AbstractArray{T}) = $pf(x) * x + $f{T<:fftwNumber}(x::AbstractArray{T}, region) = $pf(x, region) * x + $pf(x::AbstractArray; kws...) = $pf(x, 1:ndims(x); kws...) + $f{T<:Real}(x::AbstractArray{T}, region=1:ndims(x)) = $f(fftwfloat(x), region) + $pf{T<:Real}(x::AbstractArray{T}, region; kws...) = $pf(fftwfloat(x), region; kws...) + $pf{T<:Complex}(x::AbstractArray{T}, region; kws...) = $pf(fftwcomplex(x), region; kws...) + end +end + +const sqrthalf = sqrt(0.5) +const sqrt2 = sqrt(2.0) +const onerange = 1:1 + +function A_mul_B!{T}(y::StridedArray{T}, p::DCTPlan{T,REDFT10}, + x::StridedArray{T}) + assert_applicable(p.plan, x, y) + unsafe_execute!(p.plan, x, y) + scale!(y, p.nrm) + r = p.r + for d in p.region + oldr = r[d] + r[d] = onerange + y[r...] *= sqrthalf + r[d] = oldr + end + return y +end + +# note: idct changes input data +function A_mul_B!{T}(y::StridedArray{T}, p::DCTPlan{T,REDFT01}, + x::StridedArray{T}) + assert_applicable(p.plan, x, y) + scale!(x, p.nrm) + r = p.r + for d in p.region + oldr = r[d] + r[d] = onerange + x[r...] *= sqrt2 + r[d] = oldr + end + unsafe_execute!(p.plan, x, y) + return y +end + +*{T}(p::DCTPlan{T,REDFT10,false}, x::StridedArray{T}) = + A_mul_B!(Array(T, p.plan.osz), p, x) + +*{T}(p::DCTPlan{T,REDFT01,false}, x::StridedArray{T}) = + A_mul_B!(Array(T, p.plan.osz), p, copy(x)) # need copy to preserve input + +*{T,K}(p::DCTPlan{T,K,true}, x::StridedArray{T}) = A_mul_B!(x, p, x) diff --git a/src/fft.jl b/src/fft.jl new file mode 100644 index 0000000..539e15f --- /dev/null +++ b/src/fft.jl @@ -0,0 +1,781 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license + +module FFTW + +import ..DFT: fft, bfft, ifft, rfft, brfft, irfft, plan_fft, plan_bfft, plan_ifft, plan_rfft, plan_brfft, plan_irfft, fft!, bfft!, ifft!, plan_fft!, plan_bfft!, plan_ifft!, Plan, rfft_output_size, brfft_output_size, plan_inv, normalization, ScaledPlan + +import Base: show, *, convert, unsafe_convert, size, strides, ndims, pointer, A_mul_B! + +export r2r, r2r!, plan_r2r, plan_r2r! + +export export_wisdom, import_wisdom, import_system_wisdom, forget_wisdom, + MEASURE, DESTROY_INPUT, UNALIGNED, CONSERVE_MEMORY, EXHAUSTIVE, + PRESERVE_INPUT, PATIENT, ESTIMATE, WISDOM_ONLY, NO_TIMELIMIT, + R2HC, HC2R, DHT, REDFT00, REDFT01, REDFT10, REDFT11, + RODFT00, RODFT01, RODFT10, RODFT11, + fftwNumber, fftwReal, fftwComplex, flops + +## FFT: Implement fft by calling fftw. + +const libfftw = Base.libfftw_name +const libfftwf = Base.libfftwf_name + +const version = convert(VersionNumber, split(bytestring(cglobal((:fftw_version,Base.DFT.FFTW.libfftw), UInt8)), "-")[2]) + +## Direction of FFT + +const FORWARD = -1 +const BACKWARD = 1 + +## FFTW Flags from fftw3.h + +const MEASURE = UInt32(0) +const DESTROY_INPUT = UInt32(1 << 0) +const UNALIGNED = UInt32(1 << 1) +const CONSERVE_MEMORY = UInt32(1 << 2) +const EXHAUSTIVE = UInt32(1 << 3) # NO_EXHAUSTIVE is default +const PRESERVE_INPUT = UInt32(1 << 4) # cancels DESTROY_INPUT +const PATIENT = UInt32(1 << 5) # IMPATIENT is default +const ESTIMATE = UInt32(1 << 6) +const WISDOM_ONLY = UInt32(1 << 21) +const NO_SIMD = UInt32(1 << 17) # disable SIMD, useful for benchmarking + +## R2R transform kinds + +const R2HC = 0 +const HC2R = 1 +const DHT = 2 +const REDFT00 = 3 +const REDFT01 = 4 +const REDFT10 = 5 +const REDFT11 = 6 +const RODFT00 = 7 +const RODFT01 = 8 +const RODFT10 = 9 +const RODFT11 = 10 + +let k2s = Dict{Int,ASCIIString}(R2HC => "R2HC", HC2R => "HC2R", DHT => "DHT", REDFT00 => "REDFT00", REDFT01 => "REDFT01", REDFT10 => "REDFT10", REDFT11 => "REDFT11", RODFT00 => "RODFT00", RODFT01 => "RODFT01", RODFT10 => "RODFT10", RODFT11 => "RODFT11") + global kind2string + kind2string(k::Integer) = k2s[Int(k)] +end + +# FFTW floating-point types: + +typealias fftwNumber Union(Float64,Float32,Complex128,Complex64) +typealias fftwReal Union(Float64,Float32) +typealias fftwComplex Union(Complex128,Complex64) +typealias fftwDouble Union(Float64,Complex128) +typealias fftwSingle Union(Float32,Complex64) +typealias fftwTypeDouble Union(Type{Float64},Type{Complex128}) +typealias fftwTypeSingle Union(Type{Float32},Type{Complex64}) + +# For ESTIMATE plans, FFTW allows one to pass NULL for the array pointer, +# since it is not written to. Hence, it is convenient to create an +# array-like type that carries a size and a stride like a "real" array +# but which is converted to C_NULL as a pointer. +immutable FakeArray{T} + sz::Dims + st::Dims +end +size(a::FakeArray) = a.sz +strides(a::FakeArray) = a.st +ndims(a::FakeArray) = length(a.sz) +unsafe_convert{T}(::Type{Ptr{T}}, a::FakeArray{T}) = convert(Ptr{T}, C_NULL) +pointer{T}(a::FakeArray{T}) = convert(Ptr{T}, C_NULL) +FakeArray(T, sz::Dims) = FakeArray{T}(sz, colmajorstrides(sz)) +FakeArray(T, sz::Int...) = FakeArray(T, sz) +fakesimilar(flags, X, T) = flags & ESTIMATE != 0 ? FakeArray(T, size(X)) : Array(T, size(X)) +alignment_of(A::FakeArray) = Int32(0) +typealias StridedArrayish{T} Union(StridedArray{T}, FakeArray{T}) + +## Julia wrappers around FFTW functions + +# Wisdom + +# Import and export wisdom to/from a single file for all precisions, +# which is more user-friendly than requiring the user to call a +# separate routine depending on the fp precision of the plans. This +# requires a bit of trickness since we have to (a) use the libc file +# I/O routines with fftw_export_wisdom_to_file/import_wisdom_from_file +# (b) we need 256 bytes of space padding between the wisdoms to work +# around FFTW's internal file i/o buffering [see the BUFSZ constant in +# FFTW's api/import-wisdom-from-file.c file]. + +function export_wisdom(fname::AbstractString) + f = ccall(:fopen, Ptr{Void}, (Cstring,Ptr{UInt8}), fname, "w") + systemerror("could not open wisdom file $fname for writing", f == C_NULL) + ccall((:fftw_export_wisdom_to_file,libfftw), Void, (Ptr{Void},), f) + ccall(:fputs, Int32, (Ptr{UInt8},Ptr{Void}), " "^256, f) + ccall((:fftwf_export_wisdom_to_file,libfftwf), Void, (Ptr{Void},), f) + ccall(:fclose, Void, (Ptr{Void},), f) +end + +function import_wisdom(fname::AbstractString) + f = ccall(:fopen, Ptr{Void}, (Cstring,Ptr{UInt8}), fname, "r") + systemerror("could not open wisdom file $fname for reading", f == C_NULL) + if ccall((:fftw_import_wisdom_from_file,libfftw),Int32,(Ptr{Void},),f)==0|| + ccall((:fftwf_import_wisdom_from_file,libfftwf),Int32,(Ptr{Void},),f)==0 + error("failed to import wisdom from $fname") + end + ccall(:fclose, Void, (Ptr{Void},), f) +end + +function import_system_wisdom() + if ccall((:fftw_import_system_wisdom,libfftw), Int32, ()) == 0 || + ccall((:fftwf_import_system_wisdom,libfftwf), Int32, ()) == 0 + error("failed to import system wisdom") + end +end + +function forget_wisdom() + ccall((:fftw_forget_wisdom,libfftw), Void, ()) + ccall((:fftwf_forget_wisdom,libfftwf), Void, ()) +end + +# Threads + +let initialized = false + global set_num_threads + function set_num_threads(nthreads::Integer) + if !initialized + # must re-initialize FFTW if any FFTW routines have been called + ccall((:fftw_cleanup,libfftw), Void, ()) + ccall((:fftwf_cleanup,libfftwf), Void, ()) + stat = ccall((:fftw_init_threads,libfftw), Int32, ()) + statf = ccall((:fftwf_init_threads,libfftwf), Int32, ()) + if stat == 0 || statf == 0 + error("could not initialize FFTW threads") + end + initialized = true + end + ccall((:fftw_plan_with_nthreads,libfftw), Void, (Int32,), nthreads) + ccall((:fftwf_plan_with_nthreads,libfftwf), Void, (Int32,), nthreads) + end +end + +# pointer type for fftw_plan (opaque pointer) + +immutable fftw_plan_struct end +typealias PlanPtr Ptr{fftw_plan_struct} + +# Planner timelimits + +const NO_TIMELIMIT = -1.0 # from fftw3.h + +set_timelimit(precision::fftwTypeDouble,seconds) = + ccall((:fftw_set_timelimit,libfftw), Void, (Float64,), seconds) + +set_timelimit(precision::fftwTypeSingle,seconds) = + ccall((:fftwf_set_timelimit,libfftwf), Void, (Float64,), seconds) + +# Array alignment mod 16: +# FFTW plans may depend on the alignment of the array mod 16 bytes, +# i.e. the address mod 16 of the first element of the array, in order +# to exploit SIMD operations. Julia arrays are, by default, aligned +# to 16-byte boundaries (address mod 16 == 0), but this may not be +# true for data imported from external C code, or for SubArrays. +# Use the undocumented routine fftw_alignment_of to determine the +# alignment of a given pointer modulo whatever FFTW needs; this +# function will be documented in FFTW 3.3.4. + + +if Base.libfftw_name == "libmkl_rt" + alignment_of{T<:fftwDouble}(A::StridedArray{T}) = + convert(Int32, convert(Int64, pointer(A)) % 16) + alignment_of{T<:fftwSingle}(A::StridedArray{T}) = + convert(Int32, convert(Int64, pointer(A)) % 16) +else + alignment_of{T<:fftwDouble}(A::StridedArray{T}) = + ccall((:fftw_alignment_of, libfftw), Int32, (Ptr{T},), A) + alignment_of{T<:fftwSingle}(A::StridedArray{T}) = + ccall((:fftwf_alignment_of, libfftwf), Int32, (Ptr{T},), A) +end + +# FFTWPlan (low-level) + +# low-level storage of the FFTW plan, along with the information +# needed to determine whether it is applicable. We need to put +# this into a type to support a finalizer on the fftw_plan. +# K is FORWARD/BACKWARD for forward/backward or r2c/c2r plans, respectively. +# For r2r plans, K is a tuple of the transform kinds along each dimension. +abstract FFTWPlan{T<:fftwNumber,K,inplace} <: Plan{T} +for P in (:cFFTWPlan, :rFFTWPlan, :r2rFFTWPlan) # complex, r2c/c2r, and r2r + @eval begin + type $P{T<:fftwNumber,K,inplace} <: FFTWPlan{T,K,inplace} + plan::PlanPtr + sz::Dims # size of array on which plan operates (Int tuple) + osz::Dims # size of output array (Int tuple) + istride::Dims # strides of input + ostride::Dims # strides of output + ialign::Int32 # alignment mod 16 of input + oalign::Int32 # alignment mod 16 of input + flags::UInt32 # planner flags + region::Any # region (iterable) of dims that are transormed + pinv::ScaledPlan + function $P(plan::PlanPtr, sz::Dims, osz::Dims, + istride::Dims, ostride::Dims, + ialign::Int32, oalign::Int32, flags::Integer, R) + p = new(plan,sz,osz,istride,ostride,ialign,oalign,flags,R) + finalizer(p, destroy_plan) + return p + end + end + $P{T<:fftwNumber}(plan::PlanPtr, flags::Integer, R::Any, X::StridedArrayish{T}, Y::StridedArrayish, K) = $P{T,K,pointer(X) == pointer(Y)}(plan, size(X), size(Y), strides(X), strides(Y), alignment_of(X), alignment_of(Y), flags, R) + end +end + +size(p::FFTWPlan) = p.sz + +unsafe_convert(::Type{PlanPtr}, p::FFTWPlan) = p.plan + +destroy_plan{T<:fftwDouble}(plan::FFTWPlan{T}) = + ccall((:fftw_destroy_plan,libfftw), Void, (PlanPtr,), plan) + +destroy_plan{T<:fftwSingle}(plan::FFTWPlan{T}) = + ccall((:fftwf_destroy_plan,libfftwf), Void, (PlanPtr,), plan) + +cost{T<:fftwDouble}(plan::FFTWPlan{T}) = + ccall((:fftw_cost,libfftw), Float64, (PlanPtr,), plan) +cost{T<:fftwSingle}(plan::FFTWPlan{T}) = + ccall((:fftwf_cost,libfftwf), Float64, (PlanPtr,), plan) + +let add=[0.0], mul=[0.0], fma=[0.0] + global arithmetic_ops + function arithmetic_ops{T<:fftwDouble}(plan::FFTWPlan{T}) + ccall((:fftw_flops,libfftw), Void, + (PlanPtr,Ptr{Float64},Ptr{Float64},Ptr{Float64}), + plan, add, mul, fma) + (int64(add[1]), int64(mul[1]), int64(fma[1])) + end + function arithmetic_ops{T<:fftwSingle}(plan::FFTWPlan{T}) + ccall((:fftwf_flops,libfftwf), Void, + (PlanPtr,Ptr{Float64},Ptr{Float64},Ptr{Float64}), + plan, add, mul, fma) + (int64(add[1]), int64(mul[1]), int64(fma[1])) + end +end +flops(plan::FFTWPlan) = let ops=arithmetic_ops(plan) + ops[1] + ops[2] + 2*ops[3] # add + mul + 2*fma +end + +# Pretty-printing plans + +function showfftdims(io, sz::Dims, istride::Dims, T) + if isempty(sz) + print(io, "0-dimensional") + elseif length(sz) == 1 + print(io, sz[1], "-element") + else + print(io, join(sz, "x")) + end + if istride == colmajorstrides(sz) + print(io, " array of ", T) + else + print(io, " $istride-strided array of ", T) + end +end + +# The sprint_plan function was released in FFTW 3.3.4 +sprint_plan_{T<:fftwDouble}(plan::FFTWPlan{T}) = + ccall((:fftw_sprint_plan,libfftw), Ptr{UInt8}, (PlanPtr,), plan) +sprint_plan_{T<:fftwSingle}(plan::FFTWPlan{T}) = + ccall((:fftwf_sprint_plan,libfftwf), Ptr{UInt8}, (PlanPtr,), plan) +function sprint_plan(plan::FFTWPlan) + pointer_to_string(sprint_plan_(plan), true) +end + +function show{T,K,inplace}(io::IO, p::cFFTWPlan{T,K,inplace}) + print(io, inplace ? "FFTW in-place " : "FFTW ", + K < 0 ? "forward" : "backward", " plan for ") + showfftdims(io, p.sz, p.istride, T) + version >= v"3.3.4" && print(io, "\n", sprint_plan(p)) +end + +function show{T,K,inplace}(io::IO, p::rFFTWPlan{T,K,inplace}) + print(io, inplace ? "FFTW in-place " : "FFTW ", + K < 0 ? "real-to-complex" : "complex-to-real", + " plan for ") + showfftdims(io, p.sz, p.istride, T) + version >= v"3.3.4" && print(io, "\n", sprint_plan(p)) +end + +function show{T,K,inplace}(io::IO, p::r2rFFTWPlan{T,K,inplace}) + print(io, inplace ? "FFTW in-place r2r " : "FFTW r2r ") + if isempty(K) + print(io, "0-dimensional") + elseif K == ntuple(i -> K[1], length(K)) + print(io, kind2string(K[1])) + if length(K) > 1 + print(io, "^", length(K)) + end + else + print(io, join(map(kind2string, K), "x")) + end + print(io, " plan for ") + showfftdims(io, p.sz, p.istride, T) + version >= v"3.3.4" && print(io, "\n", sprint_plan(p)) +end + +# Check whether a FFTWPlan is applicable to a given input array, and +# throw an informative error if not: +function assert_applicable{T}(p::FFTWPlan{T}, X::StridedArray{T}) + if size(X) != p.sz + throw(ArgumentError("FFTW plan applied to wrong-size array")) + elseif strides(X) != p.istride + throw(ArgumentError("FFTW plan applied to wrong-strides array")) + elseif alignment_of(X) != p.ialign || p.flags & UNALIGNED != 0 + throw(ArgumentError("FFTW plan applied to array with wrong memory alignment")) + end +end + +function assert_applicable{T,K,inplace}(p::FFTWPlan{T,K,inplace}, X::StridedArray{T}, Y::StridedArray) + assert_applicable(p, X) + if size(Y) != p.osz + throw(ArgumentError("FFTW plan applied to wrong-size output")) + elseif strides(Y) != p.ostride + throw(ArgumentError("FFTW plan applied to wrong-strides output")) + elseif alignment_of(Y) != p.oalign || p.flags & UNALIGNED != 0 + throw(ArgumentError("FFTW plan applied to output with wrong memory alignment")) + elseif inplace != (pointer(X) == pointer(Y)) + throw(ArgumentError(string("FFTW ", + inplace ? "in-place" : "out-of-place", + " plan applied to ", + inplace ? "out-of-place" : "in-place", + " data"))) + end +end + +# strides for a column-major (Julia-style) array of size == sz +colmajorstrides(sz) = isempty(sz) ? () : tuple(1,cumprod(Int[sz[1:end-1]...])...) + +# Execute + +unsafe_execute!{T<:fftwDouble}(plan::FFTWPlan{T}) = + ccall((:fftw_execute,libfftw), Void, (PlanPtr,), plan) + +unsafe_execute!{T<:fftwSingle}(plan::FFTWPlan{T}) = + ccall((:fftwf_execute,libfftwf), Void, (PlanPtr,), plan) + +unsafe_execute!{T<:fftwDouble}(plan::cFFTWPlan{T}, + X::StridedArray{T}, Y::StridedArray{T}) = + ccall((:fftw_execute_dft,libfftw), Void, + (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) + +unsafe_execute!{T<:fftwSingle}(plan::cFFTWPlan{T}, + X::StridedArray{T}, Y::StridedArray{T}) = + ccall((:fftwf_execute_dft,libfftwf), Void, + (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) + +unsafe_execute!(plan::rFFTWPlan{Float64,FORWARD}, + X::StridedArray{Float64}, Y::StridedArray{Complex128}) = + ccall((:fftw_execute_dft_r2c,libfftw), Void, + (PlanPtr,Ptr{Float64},Ptr{Complex128}), plan, X, Y) + +unsafe_execute!(plan::rFFTWPlan{Float32,FORWARD}, + X::StridedArray{Float32}, Y::StridedArray{Complex64}) = + ccall((:fftwf_execute_dft_r2c,libfftwf), Void, + (PlanPtr,Ptr{Float32},Ptr{Complex64}), plan, X, Y) + +unsafe_execute!(plan::rFFTWPlan{Complex128,BACKWARD}, + X::StridedArray{Complex128}, Y::StridedArray{Float64}) = + ccall((:fftw_execute_dft_c2r,libfftw), Void, + (PlanPtr,Ptr{Complex128},Ptr{Float64}), plan, X, Y) + +unsafe_execute!(plan::rFFTWPlan{Complex64,BACKWARD}, + X::StridedArray{Complex64}, Y::StridedArray{Float32}) = + ccall((:fftwf_execute_dft_c2r,libfftwf), Void, + (PlanPtr,Ptr{Complex64},Ptr{Float32}), plan, X, Y) + +unsafe_execute!{T<:fftwDouble}(plan::r2rFFTWPlan{T}, + X::StridedArray{T}, Y::StridedArray{T}) = + ccall((:fftw_execute_r2r,libfftw), Void, + (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) + +unsafe_execute!{T<:fftwSingle}(plan::r2rFFTWPlan{T}, + X::StridedArray{T}, Y::StridedArray{T}) = + ccall((:fftwf_execute_r2r,libfftwf), Void, + (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) + +# NOTE ON GC (garbage collection): +# The FFTWPlan has a finalizer so that gc will destroy the plan, +# which is necessary for gc to work with plan_fft. However, +# even when we are creating a single-use FFTWPlan [e.g. for fftn(x)], +# we intentionally do NOT call destroy_plan explicitly, and instead +# wait for garbage collection. The reason is that, in the common +# case where the user calls fft(x) a second time soon afterwards, +# if destroy_plan has not yet been called then FFTW will internally +# re-use the table of trigonometric constants from the first plan. + +# Compute dims and howmany for FFTW guru planner +function dims_howmany(X::StridedArrayish, Y::StridedArrayish, + sz::Array{Int,1}, region) + reg = [region...] + if length(unique(reg)) < length(reg) + throw(ArgumentError("each dimension can be transformed at most once")) + end + ist = [strides(X)...] + ost = [strides(Y)...] + dims = [sz[reg] ist[reg] ost[reg]]' + oreg = [1:ndims(X);] + oreg[reg] = 0 + oreg = filter(d -> d > 0, oreg) + howmany = [sz[oreg] ist[oreg] ost[oreg]]' + return (dims, howmany) +end + +# check & convert kinds into int32 array with same length as region +function fix_kinds(region, kinds) + if length(kinds) != length(region) + if length(kinds) > length(region) + throw(ArgumentError("too many transform kinds")) + else + if length(kinds) == 0 + throw(ArgumentError("must supply a transform kind")) + end + k = Array(Int32, length(region)) + k[1:length(kinds)] = [kinds...] + k[length(kinds)+1:end] = kinds[end] + kinds = k + end + else + kinds = Int32[kinds...] + end + for i = 1:length(kinds) + if kinds[i] < 0 || kinds[i] > 10 + throw(ArgumentError("invalid transform kind")) + end + end + return kinds +end + +# low-level FFTWPlan creation (for internal use in FFTW module) + +for (Tr,Tc,fftw,lib) in ((:Float64,:Complex128,"fftw",libfftw), + (:Float32,:Complex64,"fftwf",libfftwf)) + + @eval function cFFTWPlan(X::StridedArrayish{$Tc}, Y::StridedArrayish{$Tc}, + region, direction::Integer, + flags::Integer, timelimit::Real) + set_timelimit($Tr, timelimit) + R = copy(region) + dims, howmany = dims_howmany(X, Y, [size(X)...], R) + plan = ccall(($(string(fftw,"_plan_guru64_dft")),$lib), + PlanPtr, + (Int32, Ptr{Int}, Int32, Ptr{Int}, + Ptr{$Tc}, Ptr{$Tc}, Int32, UInt32), + size(dims,2), dims, size(howmany,2), howmany, + X, Y, direction, flags) + set_timelimit($Tr, NO_TIMELIMIT) + if plan == C_NULL + error("FFTW could not create plan") # shouldn't normally happen + end + return cFFTWPlan(plan, flags, R, X, Y, + direction < 0 ? FORWARD : BACKWARD) + end + + @eval function rFFTWPlan(X::StridedArrayish{$Tr}, Y::StridedArrayish{$Tc}, + region, flags::Integer, timelimit::Real) + R = copy(region) + region = circshift([region...],-1) # FFTW halves last dim + set_timelimit($Tr, timelimit) + dims, howmany = dims_howmany(X, Y, [size(X)...], region) + plan = ccall(($(string(fftw,"_plan_guru64_dft_r2c")),$lib), + PlanPtr, + (Int32, Ptr{Int}, Int32, Ptr{Int}, + Ptr{$Tr}, Ptr{$Tc}, UInt32), + size(dims,2), dims, size(howmany,2), howmany, + X, Y, flags) + set_timelimit($Tr, NO_TIMELIMIT) + if plan == C_NULL + error("FFTW could not create plan") # shouldn't normally happen + end + return rFFTWPlan(plan, flags, R, X, Y, FORWARD) + end + + @eval function rFFTWPlan(X::StridedArrayish{$Tc}, Y::StridedArrayish{$Tr}, + region, flags::Integer, timelimit::Real) + R = copy(region) + region = circshift([region...],-1) # FFTW halves last dim + set_timelimit($Tr, timelimit) + dims, howmany = dims_howmany(X, Y, [size(Y)...], region) + plan = ccall(($(string(fftw,"_plan_guru64_dft_c2r")),$lib), + PlanPtr, + (Int32, Ptr{Int}, Int32, Ptr{Int}, + Ptr{$Tc}, Ptr{$Tr}, UInt32), + size(dims,2), dims, size(howmany,2), howmany, + X, Y, flags) + set_timelimit($Tr, NO_TIMELIMIT) + if plan == C_NULL + error("FFTW could not create plan") # shouldn't normally happen + end + return rFFTWPlan(plan, flags, R, X, Y, BACKWARD) + end + + @eval function r2rFFTWPlan(X::StridedArrayish{$Tr}, Y::StridedArrayish{$Tr}, + region, kinds, + flags::Integer, timelimit::Real) + R = copy(region) + knd = fix_kinds(region, kinds) + set_timelimit($Tr, timelimit) + dims, howmany = dims_howmany(X, Y, [size(X)...], region) + plan = ccall(($(string(fftw,"_plan_guru64_r2r")),$lib), + PlanPtr, + (Int32, Ptr{Int}, Int32, Ptr{Int}, + Ptr{$Tr}, Ptr{$Tr}, Ptr{Int32}, UInt32), + size(dims,2), dims, size(howmany,2), howmany, + X, Y, knd, flags) + set_timelimit($Tr, NO_TIMELIMIT) + if plan == C_NULL + error("FFTW could not create plan") # shouldn't normally happen + end + return r2rFFTWPlan(plan, flags, R, X, Y, tuple(map(Int,knd)...)) + end + + # support r2r transforms of complex = transforms of real & imag parts + @eval function r2rFFTWPlan(X::StridedArrayish{$Tc}, Y::StridedArrayish{$Tc}, + region, kinds, + flags::Integer, timelimit::Real) + R = copy(region) + knd = fix_kinds(region, kinds) + set_timelimit($Tr, timelimit) + dims, howmany = dims_howmany(X, Y, [size(X)...], region) + dims[2:3, 1:size(dims,2)] *= 2 + howmany[2:3, 1:size(howmany,2)] *= 2 + howmany = [howmany [2,1,1]] # append loop over real/imag parts + plan = ccall(($(string(fftw,"_plan_guru64_r2r")),$lib), + PlanPtr, + (Int32, Ptr{Int}, Int32, Ptr{Int}, + Ptr{$Tc}, Ptr{$Tc}, Ptr{Int32}, UInt32), + size(dims,2), dims, size(howmany,2), howmany, + X, Y, knd, flags) + set_timelimit($Tr, NO_TIMELIMIT) + if plan == C_NULL + error("FFTW could not create plan") # shouldn't normally happen + end + return r2rFFTWPlan(plan, flags, R, X, Y, tuple(map(Int,knd)...)) + end + +end + +# Convert arrays of numeric types to FFTW-supported packed complex-float types +# (FIXME: is there a way to use the Julia promotion rules more cleverly here?) +fftwcomplex{T<:fftwComplex}(X::StridedArray{T}) = X +fftwcomplex{T<:fftwReal}(X::AbstractArray{T}) = + copy!(Array(typeof(complex(one(T))), size(X)), X) +fftwcomplex{T<:Real}(X::AbstractArray{T}) = copy!(Array(Complex128, size(X)),X) +fftwcomplex{T<:Complex}(X::AbstractArray{T}) = + copy!(Array(Complex128, size(X)), X) +fftwfloat{T<:fftwReal}(X::StridedArray{T}) = X +fftwfloat{T<:Real}(X::AbstractArray{T}) = copy!(Array(Float64, size(X)), X) +fftwfloat{T<:Complex}(X::AbstractArray{T}) = fftwcomplex(X) + +for (f,direction) in ((:fft,FORWARD), (:bfft,BACKWARD)) + plan_f = symbol("plan_",f) + plan_f! = symbol("plan_",f,"!") + idirection = -direction + @eval begin + function $plan_f{T<:fftwComplex}(X::StridedArray{T}, region; + flags::Integer=ESTIMATE, + timelimit::Real=NO_TIMELIMIT) + cFFTWPlan(X, fakesimilar(flags, X, T), region, + $direction, flags,timelimit)::cFFTWPlan{T,$direction,false} + end + + function $plan_f!{T<:fftwComplex}(X::StridedArray{T}, region; + flags::Integer=ESTIMATE, + timelimit::Real=NO_TIMELIMIT) + cFFTWPlan(X, X, region, $direction, flags, timelimit)::cFFTWPlan{T,$direction,true} + end + $plan_f{T<:fftwComplex}(X::StridedArray{T}; kws...) = + $plan_f(X, 1:ndims(X); kws...) + $plan_f!{T<:fftwComplex}(X::StridedArray{T}; kws...) = + $plan_f!(X, 1:ndims(X); kws...) + + function plan_inv{T<:fftwComplex,inplace}(p::cFFTWPlan{T,$direction,inplace}) + X = Array(T, p.sz) + Y = inplace ? X : fakesimilar(p.flags, X, T) + ScaledPlan(cFFTWPlan(X, Y, p.region, $idirection, + p.flags, NO_TIMELIMIT)::cFFTWPlan{T,$idirection,inplace}, + normalization(X, p.region)) + end + end +end + +function A_mul_B!{T}(y::StridedArray{T}, p::cFFTWPlan{T}, x::StridedArray{T}) + assert_applicable(p, x, y) + unsafe_execute!(p, x, y) + return y +end + +function *{T,K,N}(p::cFFTWPlan{T,K,false}, x::StridedArray{T,N}) + assert_applicable(p, x) + y = Array(T, p.osz)::Array{T,N} + unsafe_execute!(p, x, y) + return y +end + +function *{T,K}(p::cFFTWPlan{T,K,true}, x::StridedArray{T}) + assert_applicable(p, x) + unsafe_execute!(p, x, x) + return x +end + +# rfft/brfft and planned variants. No in-place version for now. + +for (Tr,Tc) in ((:Float32,:Complex64),(:Float64,:Complex128)) + # Note: use $FORWARD and $BACKWARD below because of issue #9775 + @eval begin + function plan_rfft(X::StridedArray{$Tr}, region; + flags::Integer=ESTIMATE, timelimit::Real=NO_TIMELIMIT) + osize = rfft_output_size(X, region) + Y = flags&ESTIMATE != 0 ? FakeArray($Tc,osize...) : Array($Tc,osize...) + rFFTWPlan(X, Y, region, flags, timelimit)::rFFTWPlan{$Tr,$FORWARD,false} + end + + function plan_brfft(X::StridedArray{$Tc}, d::Integer, region; + flags::Integer=ESTIMATE, timelimit::Real=NO_TIMELIMIT) + osize = brfft_output_size(X, d, region) + Y = flags&ESTIMATE != 0 ? FakeArray($Tr,osize...) : Array($Tr,osize...) + + # FFTW currently doesn't support PRESERVE_INPUT for + # multidimensional out-of-place c2r transforms, so + # we have to handle 1d and >1d cases separately with a copy. Ugh. + if length(region) <= 1 + rFFTWPlan(X, Y, region, flags | PRESERVE_INPUT, timelimit) + else + Xc = copy(X) + rFFTWPlan(X, Y, region, flags, timelimit) + end::rFFTWPlan{$Tc,$BACKWARD,false} + end + + plan_rfft(X::StridedArray{$Tr};kws...)=plan_rfft(X,1:ndims(X);kws...) + plan_brfft(X::StridedArray{$Tr};kws...)=plan_brfft(X,1:ndims(X);kws...) + + function plan_inv(p::rFFTWPlan{$Tr,$FORWARD,false}) + X = Array($Tr, p.sz) + Y = p.flags&ESTIMATE != 0 ? FakeArray($Tc,p.osz) : Array($Tc,p.osz) + ScaledPlan(rFFTWPlan(Y, X, p.region, + length(p.region)<=1 ? p.flags | PRESERVE_INPUT + : p.flags, NO_TIMELIMIT)::rFFTWPlan{$Tc,$BACKWARD,false}, + normalization(X, p.region)) + end + + function plan_inv(p::rFFTWPlan{$Tc,$BACKWARD,false}) + X = Array($Tc, p.sz) + Y = p.flags&ESTIMATE != 0 ? FakeArray($Tr,p.osz) : Array($Tr,p.osz) + ScaledPlan(rFFTWPlan(Y, X, p.region, p.flags, NO_TIMELIMIT)::rFFTWPlan{$Tr,$FORWARD,false}, + normalization(Y, p.region)) + end + + function A_mul_B!(y::StridedArray{$Tc}, p::rFFTWPlan{$Tr,$FORWARD}, x::StridedArray{$Tr}) + assert_applicable(p, x, y) + unsafe_execute!(p, x, y) + return y + end + function A_mul_B!(y::StridedArray{$Tr}, p::rFFTWPlan{$Tc,$BACKWARD}, x::StridedArray{$Tc}) + assert_applicable(p, x, y) + unsafe_execute!(p, x, y) # note: may overwrite x as well as y! + return y + end + + function *{N}(p::rFFTWPlan{$Tr,$FORWARD,false}, x::StridedArray{$Tr,N}) + assert_applicable(p, x) + y = Array($Tc, p.osz)::Array{$Tc,N} + unsafe_execute!(p, x, y) + return y + end + + function *{N}(p::rFFTWPlan{$Tc,$BACKWARD,false}, x::StridedArray{$Tc,N}) + if p.flags & PRESERVE_INPUT != 0 + assert_applicable(p, x) + y = Array($Tr, p.osz)::Array{$Tr,N} + unsafe_execute!(p, x, y) + else # need to make a copy to avoid overwriting x + xc = copy(x) + assert_applicable(p, xc) + y = Array($Tr, p.osz)::Array{$Tr,N} + unsafe_execute!(p, xc, y) + end + return y + end + end +end + +# FFTW r2r transforms (low-level interface) + +for f in (:r2r, :r2r!) + pf = symbol("plan_", f) + @eval begin + $f{T<:fftwNumber}(x::AbstractArray{T}, kinds) = $pf(x, kinds) * x + $f{T<:fftwNumber}(x::AbstractArray{T}, kinds, region) = $pf(x, kinds, region) * x + $pf(x::AbstractArray, kinds; kws...) = $pf(x, kinds, 1:ndims(x); kws...) + $f{T<:Real}(x::AbstractArray{T}, kinds, region=1:ndims(x)) = $f(fftwfloat(x), kinds, region) + $pf{T<:Real}(x::AbstractArray{T}, kinds, region; kws...) = $pf(fftwfloat(x), kinds, region; kws...) + $f{T<:Complex}(x::AbstractArray{T}, kinds, region=1:ndims(x)) = $f(fftwcomplex(x), kinds, region) + $pf{T<:Complex}(x::AbstractArray{T}, kinds, region; kws...) = $pf(fftwcomplex(x), kinds, region; kws...) + end +end + +function plan_r2r{T<:fftwNumber}(X::StridedArray{T}, kinds, region; + flags::Integer=ESTIMATE, + timelimit::Real=NO_TIMELIMIT) + r2rFFTWPlan(X, fakesimilar(flags, X, T), region, kinds, flags, timelimit) +end + +function plan_r2r!{T<:fftwNumber}(X::StridedArray{T}, kinds, region; + flags::Integer=ESTIMATE, + timelimit::Real=NO_TIMELIMIT) + r2rFFTWPlan(X, X, region, kinds, flags, timelimit) +end + +# mapping from r2r kind to the corresponding inverse transform +const inv_kind = Dict{Int,Int}(R2HC => HC2R, HC2R => R2HC, DHT => DHT, + REDFT00 => REDFT00, + REDFT01 => REDFT10, REDFT10 => REDFT01, + REDFT11 => REDFT11, + RODFT00 => RODFT00, + RODFT01 => RODFT10, RODFT10 => RODFT01, + RODFT11 => RODFT11) + +# r2r inverses are normalized to 1/N, where N is a "logical" size +# the transform with length n and kind k: +function logical_size(n::Integer, k::Integer) + k <= DHT && return n + k == REDFT00 && return 2(n-1) + k == RODFT00 && return 2(n+1) + return 2n +end + +function plan_inv{T<:fftwNumber,K,inplace}(p::r2rFFTWPlan{T,K,inplace}) + X = Array(T, p.sz) + iK = fix_kinds(p.region, [inv_kind[k] for k in K]) + Y = inplace ? X : fakesimilar(p.flags, X, T) + ScaledPlan(r2rFFTWPlan(X, Y, p.region, iK, + p.flags, NO_TIMELIMIT), + normalization(real(T), + map(logical_size, [p.sz...][[p.region...]], iK), + 1:length(iK))) +end + +function A_mul_B!{T}(y::StridedArray{T}, p::r2rFFTWPlan{T}, x::StridedArray{T}) + assert_applicable(p, x, y) + unsafe_execute!(p, x, y) + return y +end + +function *{T,K,N}(p::r2rFFTWPlan{T,K,false}, x::StridedArray{T,N}) + assert_applicable(p, x) + y = Array(T, p.osz)::Array{T,N} + unsafe_execute!(p, x, y) + return y +end + +function *{T,K}(p::r2rFFTWPlan{T,K,true}, x::StridedArray{T}) + assert_applicable(p, x) + unsafe_execute!(p, x, x) + return x +end + +include("dct.jl") + +end # module