-
-
Notifications
You must be signed in to change notification settings - Fork 5.4k
/
reinterpretarray.jl
368 lines (336 loc) · 14.2 KB
/
reinterpretarray.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# This file is a part of Julia. License is MIT: https://julialang.org/license
"""
Gives a reinterpreted view (of element type T) of the underlying array (of element type S).
If the size of `T` differs from the size of `S`, the array will be compressed/expanded in
the first dimension.
"""
struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
parent::A
readable::Bool
writable::Bool
global reinterpret
function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
@_noinline_meta
throw(ArgumentError("cannot reinterpret `$(S)` `$(T)`, type `$(U)` is not a bits type"))
end
function throwsize0(::Type{S}, ::Type{T})
@_noinline_meta
throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a different size"))
end
function thrownonint(::Type{S}, ::Type{T}, dim)
@_noinline_meta
throw(ArgumentError("""
cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
The resulting array would have non-integral first dimension.
"""))
end
function throwaxes1(::Type{S}, ::Type{T}, ax1)
@_noinline_meta
throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` when the first axis is $ax1. Try reshaping first."))
end
isbitstype(T) || throwbits(S, T, T)
isbitstype(S) || throwbits(S, T, S)
(N != 0 || sizeof(T) == sizeof(S)) || throwsize0(S, T)
if N != 0 && sizeof(S) != sizeof(T)
ax1 = axes(a)[1]
dim = length(ax1)
rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim)
first(ax1) == 1 || throwaxes1(S, T, ax1)
end
readable = array_subpadding(T, S)
writable = array_subpadding(S, T)
new{T, N, S, A}(a, readable, writable)
end
end
reinterpret(::Type{T}, a::ReinterpretArray) where {T} = reinterpret(T, a.parent)
# Definition of StridedArray
StridedFastContiguousSubArray{T,N,A<:DenseArray} = FastContiguousSubArray{T,N,A}
StridedReinterpretArray{T,N,A<:Union{DenseArray,StridedFastContiguousSubArray}} = ReinterpretArray{T,N,S,A} where S
StridedReshapedArray{T,N,A<:Union{DenseArray,StridedFastContiguousSubArray,StridedReinterpretArray}} = ReshapedArray{T,N,A}
StridedSubArray{T,N,A<:Union{DenseArray,StridedReshapedArray,StridedReinterpretArray},
I<:Tuple{Vararg{Union{RangeIndex, ReshapedUnitRange, AbstractCartesianIndex}}}} = SubArray{T,N,A,I}
StridedArray{T,N} = Union{DenseArray{T,N}, StridedSubArray{T,N}, StridedReshapedArray{T,N}, StridedReinterpretArray{T,N}}
StridedVector{T} = StridedArray{T,1}
StridedMatrix{T} = StridedArray{T,2}
StridedVecOrMat{T} = Union{StridedVector{T}, StridedMatrix{T}}
# the definition of strides for Array{T,N} is tuple() if N = 0, otherwise it is
# a tuple containing 1 and a cumulative product of the first N-1 sizes
# this definition is also used for StridedReshapedArray and StridedReinterpretedArray
# which have the same memory storage as Array
stride(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}, i::Int) = _stride(a, i)
function stride(a::ReinterpretArray, i::Int)
a.parent isa StridedArray || ArgumentError("Parent must be strided.") |> throw
return _stride(a, i)
end
function _stride(a, i)
if i > ndims(a)
return length(a)
end
s = 1
for n = 1:(i-1)
s *= size(a, n)
end
return s
end
function strides(a::ReinterpretArray)
a.parent isa StridedArray || ArgumentError("Parent must be strided.") |> throw
size_to_strides(1, size(a)...)
end
strides(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}) = size_to_strides(1, size(a)...)
function check_readable(a::ReinterpretArray{T, N, S} where N) where {T,S}
# See comment in check_writable
if !a.readable && !array_subpadding(T, S)
throw(PaddingError(T, S))
end
end
function check_writable(a::ReinterpretArray{T, N, S} where N) where {T,S}
# `array_subpadding` is relatively expensive (compared to a simple arrayref),
# so it is cached in the array. However, it is computable at compile time if,
# inference has the types available. By using this form of the check, we can
# get the best of both worlds for the success case. If the types were not
# available to inference, we simply need to check the field (relatively cheap)
# and if they were we should be able to fold this check away entirely.
if !a.writable && !array_subpadding(S, T)
throw(PaddingError(T, S))
end
end
IndexStyle(a::ReinterpretArray) = IndexStyle(a.parent)
parent(a::ReinterpretArray) = a.parent
dataids(a::ReinterpretArray) = dataids(a.parent)
unaliascopy(a::ReinterpretArray{T}) where {T} = reinterpret(T, unaliascopy(a.parent))
function size(a::ReinterpretArray{T,N,S} where {N}) where {T,S}
psize = size(a.parent)
size1 = div(psize[1]*sizeof(S), sizeof(T))
tuple(size1, tail(psize)...)
end
size(a::ReinterpretArray{T,0}) where {T} = ()
function axes(a::ReinterpretArray{T,N,S} where {N}) where {T,S}
paxs = axes(a.parent)
f, l = first(paxs[1]), length(paxs[1])
size1 = div(l*sizeof(S), sizeof(T))
tuple(oftype(paxs[1], f:f+size1-1), tail(paxs)...)
end
axes(a::ReinterpretArray{T,0}) where {T} = ()
elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T)
unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
@inline @propagate_inbounds getindex(a::ReinterpretArray{T,0}) where {T} = reinterpret(T, a.parent[])
@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1]
@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
check_readable(a)
_getindex_ra(a, inds[1], tail(inds))
end
@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
check_readable(a)
if isa(IndexStyle(a), IndexLinear)
return _getindex_ra(a, i, ())
end
# Convert to full indices here, to avoid needing multiple conversions in
# the loop in _getindex_ra
inds = _to_subscript_indices(a, i)
_getindex_ra(a, inds[1], tail(inds))
end
@inline _memcpy!(dst, src, n) = ccall(:memcpy, Cvoid, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), dst, src, n)
@inline @propagate_inbounds function _getindex_ra(a::ReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
# Make sure to match the scalar reinterpret if that is applicable
if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
return reinterpret(T, a.parent[i1, tailinds...])
else
@boundscheck checkbounds(a, i1, tailinds...)
ind_start, sidx = divrem((i1-1)*sizeof(T), sizeof(S))
t = Ref{T}()
s = Ref{S}()
GC.@preserve t s begin
tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t))
sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s))
# Optimizations that avoid branches
if sizeof(T) % sizeof(S) == 0
# T is bigger than S and contains an integer number of them
n = sizeof(T) ÷ sizeof(S)
for i = 1:n
s[] = a.parent[ind_start + i, tailinds...]
_memcpy!(tptr + (i-1)*sizeof(S), sptr, sizeof(S))
end
elseif sizeof(S) % sizeof(T) == 0
# S is bigger than T and contains an integer number of them
s[] = a.parent[ind_start + 1, tailinds...]
_memcpy!(tptr, sptr + sidx, sizeof(T))
else
i = 1
nbytes_copied = 0
# This is a bit complicated to deal with partial elements
# at both the start and the end. LLVM will fold as appropriate,
# once it knows the data layout
while nbytes_copied < sizeof(T)
s[] = a.parent[ind_start + i, tailinds...]
nb = min(sizeof(S) - sidx, sizeof(T)-nbytes_copied)
_memcpy!(tptr + nbytes_copied, sptr + sidx, nb)
nbytes_copied += nb
sidx = 0
i += 1
end
end
end
return t[]
end
end
@inline @propagate_inbounds setindex!(a::ReinterpretArray{T,0,S} where T, v) where {S} = (a.parent[] = reinterpret(S, v))
@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v)
@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
check_writable(a)
_setindex_ra!(a, v, inds[1], tail(inds))
end
@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
check_writable(a)
if isa(IndexStyle(a), IndexLinear)
return _setindex_ra!(a, v, i, ())
end
inds = _to_subscript_indices(a, i)
_setindex_ra!(a, v, inds[1], tail(inds))
end
@inline @propagate_inbounds function _setindex_ra!(a::ReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
v = convert(T, v)::T
# Make sure to match the scalar reinterpret if that is applicable
if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
return setindex!(a.parent, reinterpret(S, v), i1, tailinds...)
else
@boundscheck checkbounds(a, i1, tailinds...)
ind_start, sidx = divrem((i1-1)*sizeof(T), sizeof(S))
t = Ref{T}(v)
s = Ref{S}()
GC.@preserve t s begin
tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t))
sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s))
# Optimizations that avoid branches
if sizeof(T) % sizeof(S) == 0
# T is bigger than S and contains an integer number of them
n = sizeof(T) ÷ sizeof(S)
for i = 0:n-1
_memcpy!(sptr, tptr + i*sizeof(S), sizeof(S))
a.parent[ind_start + i + 1, tailinds...] = s[]
end
elseif sizeof(S) % sizeof(T) == 0
# S is bigger than T and contains an integer number of them
s[] = a.parent[ind_start + 1, tailinds...]
_memcpy!(sptr + sidx, tptr, sizeof(T))
a.parent[ind_start + 1, tailinds...] = s[]
else
nbytes_copied = 0
i = 1
# Deal with any partial elements at the start. We'll have to copy in the
# element from the original array and overwrite the relevant parts
if sidx != 0
s[] = a.parent[ind_start + i, tailinds...]
nb = min((sizeof(S) - sidx) % UInt, sizeof(T) % UInt)
_memcpy!(sptr + sidx, tptr, nb)
nbytes_copied += nb
a.parent[ind_start + i, tailinds...] = s[]
i += 1
sidx = 0
end
# Deal with the main body of elements
while nbytes_copied < sizeof(T) && (sizeof(T) - nbytes_copied) > sizeof(S)
nb = min(sizeof(S), sizeof(T) - nbytes_copied)
_memcpy!(sptr, tptr + nbytes_copied, nb)
nbytes_copied += nb
a.parent[ind_start + i, tailinds...] = s[]
i += 1
end
# Deal with trailing partial elements
if nbytes_copied < sizeof(T)
s[] = a.parent[ind_start + i, tailinds...]
nb = min(sizeof(S), sizeof(T) - nbytes_copied)
_memcpy!(sptr, tptr + nbytes_copied, nb)
a.parent[ind_start + i, tailinds...] = s[]
end
end
end
end
return a
end
# Padding
struct Padding
offset::Int
size::Int
end
function intersect(p1::Padding, p2::Padding)
start = max(p1.offset, p2.offset)
stop = min(p1.offset + p1.size, p2.offset + p2.size)
Padding(start, max(0, stop-start))
end
struct PaddingError
S::Type
T::Type
end
function showerror(io::IO, p::PaddingError)
print(io, "Padding of type $(p.S) is not compatible with type $(p.T).")
end
"""
CyclePadding(padding, total_size)
Cylces an iterator of `Padding` structs, restarting the padding at `total_size`.
E.g. if `padding` is all the padding in a struct and `total_size` is the total
aligned size of that array, `CyclePadding` will correspond to the padding in an
infinite vector of such structs.
"""
struct CyclePadding{P}
padding::P
total_size::Int
end
eltype(::Type{<:CyclePadding}) = Padding
IteratorSize(::Type{<:CyclePadding}) = IsInfinite()
isempty(cp::CyclePadding) = isempty(cp.padding)
function iterate(cp::CyclePadding)
y = iterate(cp.padding)
y === nothing && return nothing
y[1], (0, y[2])
end
function iterate(cp::CyclePadding, state::Tuple)
y = iterate(cp.padding, tail(state)...)
y === nothing && return iterate(cp, (state[1]+cp.total_size,))
Padding(y[1].offset+state[1], y[1].size), (state[1], tail(y)...)
end
"""
Compute the location of padding in a type.
"""
function padding(T)
padding = Padding[]
last_end::Int = 0
for i = 1:fieldcount(T)
offset = fieldoffset(T, i)
fT = fieldtype(T, i)
if offset != last_end
push!(padding, Padding(offset, offset-last_end))
end
last_end = offset + sizeof(fT)
end
padding
end
function CyclePadding(T::DataType)
a, s = datatype_alignment(T), sizeof(T)
as = s + (a - (s % a)) % a
pad = padding(T)
s != as && push!(pad, Padding(s, as - s))
CyclePadding(pad, as)
end
using .Iterators: Stateful
@pure function array_subpadding(S, T)
checked_size = 0
lcm_size = lcm(sizeof(S), sizeof(T))
s, t = Stateful{<:Any, Any}(CyclePadding(S)),
Stateful{<:Any, Any}(CyclePadding(T))
isempty(t) && return true
isempty(s) && return false
while checked_size < lcm_size
# Take padding in T
pad = popfirst!(t)
# See if there's corresponding padding in S
while true
ps = peek(s)
ps.offset > pad.offset && return false
intersect(ps, pad) == pad && break
popfirst!(s)
end
checked_size = pad.offset + pad.size
end
return true
end