-
Notifications
You must be signed in to change notification settings - Fork 0
/
convolution.vhd
100 lines (78 loc) · 3.12 KB
/
convolution.vhd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
library util;
use util.math_pkg.all;
entity convolution is
generic (
-- TODO: input bitwidth, for now = 1
C_KERNEL_SIZE : integer range 1 to 7 := 3;
C_INPUT_CHANNEL : integer := 1
);
port (
isl_clk : in std_logic;
isl_valid : in std_logic;
islv_data : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
islv_weights : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
oslv_data : out std_logic_vector(log2(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL + 1) - 1 downto 0);
osl_valid : out std_logic
);
end entity convolution;
architecture behavioral of convolution is
constant C_PARALLEL_POPCOUNT : integer := 4;
constant C_SPLIT : integer := integer(ceil(real(islv_data'length) / real(C_PARALLEL_POPCOUNT)));
constant C_PADDED_BITWIDTH : integer := C_PARALLEL_POPCOUNT * C_SPLIT;
type t_ones_count is array(natural range<>) of unsigned(oslv_data'range);
signal a_ones_count : t_ones_count(0 to C_SPLIT);
signal sl_add : std_logic := '0';
signal sl_popcount : std_logic := '0';
signal slv_product : std_logic_vector(C_PADDED_BITWIDTH - 1 downto 0) := (others => '0');
signal slv_popcount : std_logic_vector(C_SPLIT * 3 - 1 downto 0);
signal sl_valid_out : std_logic := '0';
signal slv_data_out : std_logic_vector(oslv_data'range);
begin
i_adder_tree : entity util.adder_tree
generic map (
C_INPUT_COUNT => C_SPLIT,
C_INPUT_BITWIDTH => 3,
C_OUTPUT_BITWIDTH => oslv_data'length
)
port map (
isl_clk => isl_clk,
isl_valid => sl_add,
islv_data => slv_popcount,
oslv_data => slv_data_out,
osl_valid => sl_valid_out
);
proc_convolution : process (isl_clk) is
variable v_usig_popcount : unsigned(2 downto 0);
variable v_usig_popcount_total : unsigned(oslv_data'range);
begin
if (rising_edge(isl_clk)) then
sl_popcount <= '0';
sl_add <= '0';
if (isl_valid = '1') then
-- or map directly to hardware (islv_weights as constant)
-- pad zeros for the adder tree
slv_product <= (islv_data xnor islv_weights) & (slv_product'length - islv_data'length - 1 downto 0 => '0');
sl_popcount <= '1';
end if;
-- If using bram, one would be needed for each adder stage.
if (sl_popcount = '1') then
for slice in 0 to slv_product'length / C_PARALLEL_POPCOUNT - 1 loop
v_usig_popcount := (others => '0');
for i in 0 to C_PARALLEL_POPCOUNT - 1 loop
if (slv_product(i + slice * C_PARALLEL_POPCOUNT) = '1') then
v_usig_popcount := v_usig_popcount + 1;
end if;
end loop;
slv_popcount((slice + 1) * 3 - 1 downto slice * 3) <= std_logic_vector(v_usig_popcount);
end loop;
sl_add <= '1';
end if;
end if;
end process proc_convolution;
oslv_data <= slv_data_out;
osl_valid <= sl_valid_out;
end architecture behavioral;