-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
/
pipeline-o2-broadcast.jl
123 lines (100 loc) · 4.69 KB
/
pipeline-o2-broadcast.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# This file is a part of Julia. License is MIT: https://julialang.org/license
# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
include(joinpath("..", "testhelpers", "llvmpasses.jl"))
# COM: Check broadcasted outer product is vectorized
# COM: Float32
# CHECK: @japi1_prod_v_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
# COM: Float64
# CHECK: @japi1_prod_v_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
# COM: Int32
# CHECK: @japi1_prod_v_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
# COM: Int64
# CHECK: @japi1_prod_v_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
function prod_v_vT(R, x, y)
R .= x .* y'
end
# COM: Check broadcasted inner product is vectorized
# COM: Float32
# CHECK: @japi1_prod_vT_v
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
# COM: Float64
# CHECK: @japi1_prod_vT_v
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
# COM: Int32
# CHECK: @japi1_prod_vT_v
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
# COM: Int64
# CHECK: @japi1_prod_vT_v
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
function prod_vT_v(R, x, y)
R .= x' .* y
end
# COM: Check broadcasted multiplications are vectorized
# COM: Float32
# CHECK: @japi1_prod_v_M_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
# XFAIL-CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
# COM: Float64
# CHECK: @japi1_prod_v_M_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
# XFAIL-CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
# COM: Int32
# CHECK: @japi1_prod_v_M_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
# XFAIL-CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
# COM: Int64
# CHECK: @japi1_prod_v_M_vT
# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
# XFAIL-CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
function prod_v_M_vT(R, x, M, y)
R .= x .* M .* y'
end
emit(prod_v_vT, Matrix{Float32}, Vector{Float32}, Vector{Float32})
emit(prod_v_vT, Matrix{Float64}, Vector{Float64}, Vector{Float64})
emit(prod_v_vT, Matrix{Int32}, Vector{Int32}, Vector{Int32})
emit(prod_v_vT, Matrix{Int64}, Vector{Int64}, Vector{Int64})
emit(prod_vT_v, Matrix{Float32}, Vector{Float32}, Vector{Float32})
emit(prod_vT_v, Matrix{Float64}, Vector{Float64}, Vector{Float64})
emit(prod_vT_v, Matrix{Int32}, Vector{Int32}, Vector{Int32})
emit(prod_vT_v, Matrix{Int64}, Vector{Int64}, Vector{Int64})
emit(prod_v_M_vT, Matrix{Float32}, Vector{Float32}, Matrix{Float32}, Vector{Float32})
emit(prod_v_M_vT, Matrix{Float64}, Vector{Float64}, Matrix{Float64}, Vector{Float64})
emit(prod_v_M_vT, Matrix{Int32}, Vector{Int32}, Matrix{Int32}, Vector{Int32})
emit(prod_v_M_vT, Matrix{Int64}, Vector{Int64}, Matrix{Int64}, Vector{Int64})