Skip to content

Commit

Permalink
convolution: implement support for 8 bit input
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Mar 23, 2021
1 parent 601f5b5 commit 04d10e6
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 61 deletions.
11 changes: 4 additions & 7 deletions playground/04_custom_toplevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,10 @@ def update(self, previous_layer_info):

# thresholds
input_channel_bitwidth = previous_layer_info["bitwidth"]
bitwidth = (
math.ceil(
math.log2(input_channel * input_channel_bitwidth * kernel_size ** 2 + 1)
)
* output_channel
bitwidth = output_channel * (
input_channel_bitwidth
+ math.ceil(math.log2(input_channel * kernel_size ** 2 + 1))
+ 1
)
thresholds = "".join([str(randint(0, 1)) for _ in range(bitwidth)])
self.constants["C_THRESHOLDS"] = Parameter(
Expand Down Expand Up @@ -319,7 +318,6 @@ def __init__(
"height": image_height,
}

# TODO: input_channel
self.input_data_signal = Parameter(
f"slv_data_{self.previous_layer_info['name']}",
f"std_logic_vector(8 - 1 downto 0)",
Expand All @@ -335,7 +333,6 @@ def __init__(
library cnn_lib;
library util;"""

# TODO: input_channel
self.entity = f"""
entity bnn is
generic (
Expand Down
45 changes: 33 additions & 12 deletions sim/test_convolution.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from math import log2
import pathlib
from random import randint
from typing import List
Expand All @@ -20,29 +21,45 @@

@cocotb.test()
async def run_test(dut):
input_channel = dut.C_INPUT_CHANNEL.value.integer
window_size = dut.C_KERNEL_SIZE.value.integer ** 2
input_length = input_channel * window_size

# input is unsigned 1 or 8 bit, output is signed
input_channel_bitwidth = dut.C_INPUT_CHANNEL_BITWIDTH.value.integer
output_bitwidth = (
input_channel_bitwidth + int(log2(window_size * input_channel)) + 1
)

@dataclass
class Testcase:
input_activations: List[int]
input_weights: List[int]

@property
def input_activations_int(self) -> int:
return concatenate_integers(self.input_activations)
return concatenate_integers(
self.input_activations, bitwidth=input_channel_bitwidth
)

@property
def input_weights_int(self) -> int:
return concatenate_integers(self.input_weights)

@property
def output_data(self) -> int:
ones_count = 0
# 1 bit activations -> xnor, popcount
if input_channel_bitwidth == 1:
ones_count = 0
for act, weight in zip(self.input_activations, self.input_weights):
ones_count = ones_count + (not (act ^ weight))
return ones_count
# >1 bit activations -> multiplication
product = 0
for act, weight in zip(self.input_activations, self.input_weights):
ones_count = ones_count + (not (act ^ weight))
return ones_count
product = product + act * (-1 if weight == 0 else 1)
return product

input_channel = dut.C_INPUT_CHANNEL.value.integer
window_size = dut.C_KERNEL_SIZE.value.integer ** 2
input_length = input_channel * window_size
cases = (
Testcase([0] * input_length, [0] * input_length),
Testcase([0] * input_length, [1] * input_length),
Expand Down Expand Up @@ -70,17 +87,21 @@ def output_data(self) -> int:
while dut.osl_valid.value.integer == 0:
dut.isl_valid <= 0
await tick.wait()
assert (
dut.oslv_data.value.integer == case.output_data
), f"{dut.oslv_data.value.integer} /= {case.output_data}"

output_int = from_fixedint(
dut.oslv_data.value.integer, output_bitwidth, is_unsigned=False
)
assert output_int == case.output_data, f"{output_int} /= {case.output_data}"


@pytest.mark.parametrize("kernel_size", range(2, 7))
@pytest.mark.parametrize("kernel_size", (1, 2, 3, 5, 7))
@pytest.mark.parametrize("input_channel", (1, 4, 9))
def test_convolution(kernel_size, input_channel):
@pytest.mark.parametrize("input_channel_bitwidth", (1, 8))
def test_convolution(kernel_size, input_channel, input_channel_bitwidth):
generics = {
"C_KERNEL_SIZE": kernel_size,
"C_INPUT_CHANNEL": input_channel,
"C_INPUT_CHANNEL_BITWIDTH": input_channel_bitwidth,
}
run(
vhdl_sources=get_files(
Expand Down
5 changes: 4 additions & 1 deletion sim/test_window_convolution_activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ def get_threshold(self):

return concatenate_integers(
self.replace_minus(threshold),
bitwidth=math.ceil(math.log2(kernel_size[0] ** 2 * image_shape[2] + 1)),
bitwidth=bitwidth
+ math.ceil(math.log2(kernel_size[0] ** 2 * image_shape[2] + 1))
+ 1,
)

cases = (
Expand Down Expand Up @@ -214,6 +216,7 @@ def get_threshold(self):


# Don't run the full test matrix. Only the most common configs.
# TODO: Add test for 8 bit input.
@pytest.mark.parametrize(
"kernel_size,stride,input_channel,output_channel",
[
Expand Down
114 changes: 77 additions & 37 deletions src/convolution.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ library ieee;
use ieee.math_real.all;

library util;
use util.array_pkg.all;
use util.math_pkg.all;

entity convolution is
Expand All @@ -16,52 +17,42 @@ entity convolution is
port (
isl_clk : in std_logic;
isl_valid : in std_logic;
islv_data : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
islv_weights : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
oslv_data : out std_logic_vector(log2(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1) - 1 downto 0);
islv_data : in std_logic_vector(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
islv_weights : in std_logic_vector(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL - 1 downto 0);
oslv_data : out std_logic_vector(C_INPUT_CHANNEL_BITWIDTH + log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1) downto 0);
osl_valid : out std_logic
);
end entity convolution;

architecture behavioral of convolution is

constant C_PARALLEL_POPCOUNT : integer := 4;
constant C_SPLIT : integer := integer(ceil(real(islv_data'length) / real(C_PARALLEL_POPCOUNT)));
constant C_PADDED_BITWIDTH : integer := C_PARALLEL_POPCOUNT * C_SPLIT;

signal sl_add : std_logic := '0';
signal sl_popcount : std_logic := '0';
signal slv_product : std_logic_vector(C_PADDED_BITWIDTH - 1 downto 0) := (others => '0');
signal slv_popcount : std_logic_vector(C_SPLIT * 3 - 1 downto 0);

signal sl_valid_out : std_logic := '0';
signal slv_data_out : std_logic_vector(oslv_data'range);

begin

i_adder_tree : entity util.adder_tree
generic map (
C_INPUT_COUNT => C_SPLIT,
C_INPUT_BITWIDTH => 3,
C_OUTPUT_BITWIDTH => oslv_data'length
)
port map (
isl_clk => isl_clk,
isl_valid => sl_add,
islv_data => slv_popcount,
oslv_data => slv_data_out,
osl_valid => sl_valid_out
);

gen_matrix_multiplication : if C_INPUT_CHANNEL_BITWIDTH = 1 generate
constant C_PARALLEL_POPCOUNT : integer := 4;
constant C_SPLIT : integer := integer(ceil(real(islv_data'length) / real(C_PARALLEL_POPCOUNT)));
constant C_INPUT_BITWIDTH_ADDER : integer := log2(C_PARALLEL_POPCOUNT + 1);
constant C_PADDED_BITWIDTH_PRODUCT : integer := C_PARALLEL_POPCOUNT * C_SPLIT;

signal sl_add : std_logic := '0';
signal sl_popcount : std_logic := '0';
signal slv_product : std_logic_vector(C_PADDED_BITWIDTH_PRODUCT - 1 downto 0) := (others => '0');
signal slv_popcount : std_logic_vector(C_SPLIT * C_INPUT_BITWIDTH_ADDER - 1 downto 0);

signal slv_data_adder : std_logic_vector(C_INPUT_BITWIDTH_ADDER + log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1) - 1 downto 0);
begin

proc_xnor_popcount : process (isl_clk) is

variable v_usig_popcount : unsigned(2 downto 0);
variable v_usig_popcount_total : unsigned(oslv_data'range);
variable v_usig_popcount : unsigned(2 downto 0);

begin

report to_string(slv_data_out'length) & " " & to_string(slv_data_adder'length);

if (rising_edge(isl_clk)) then
sl_popcount <= '0';
sl_add <= '0';
Expand All @@ -82,7 +73,7 @@ begin
v_usig_popcount := v_usig_popcount + 1;
end if;
end loop;
slv_popcount((slice + 1) * 3 - 1 downto slice * 3) <= std_logic_vector(v_usig_popcount);
slv_popcount((slice + 1) * C_INPUT_BITWIDTH_ADDER - 1 downto slice * C_INPUT_BITWIDTH_ADDER) <= std_logic_vector(v_usig_popcount);
end loop;

sl_add <= '1';
Expand All @@ -91,31 +82,80 @@ begin

end process proc_xnor_popcount;

i_adder_tree : entity util.adder_tree
generic map (
C_INPUT_COUNT => C_SPLIT,
C_INPUT_BITWIDTH => C_INPUT_BITWIDTH_ADDER,
C_OUTPUT_BITWIDTH => slv_data_adder'length
)
port map (
isl_clk => isl_clk,
isl_valid => sl_add,
islv_data => slv_popcount,
oslv_data => slv_data_adder,
osl_valid => sl_valid_out
);

-- Adder output is unsigned, output is signed.
-- Adder output has to big bitwidth, because of the grouping. It can be resized.
slv_data_out <= std_logic_vector(resize(signed('0' & slv_data_adder), slv_data_out'length));

else generate
-- + 1, because of multiplication by +-1
constant C_PRODUCT_BITWIDTH : integer := C_INPUT_CHANNEL_BITWIDTH + 1;
signal sl_add : std_logic := '0';
signal slv_product : std_logic_vector(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_PRODUCT_BITWIDTH - 1 downto 0);

begin

gen_input : for input_channel in 0 to C_INPUT_CHANNEL - 1 generate

proc_add_sign : process (isl_clk) is

variable v_usig_popcount : unsigned(2 downto 0);
variable v_usig_popcount_total : unsigned(oslv_data'range);
variable v_int_index : integer;
variable v_slv_input_datum : std_logic_vector(C_PRODUCT_BITWIDTH - 1 downto 0);
variable v_slv_output_datum : std_logic_vector(C_PRODUCT_BITWIDTH - 1 downto 0);

begin

if (rising_edge(isl_clk)) then
sl_popcount <= '0';
sl_add <= '0';
sl_add <= '0';

if (isl_valid = '1') then
-- islv_data * +-1
-- assign slices to adder tree
-- extend adder tree by signed addition
sl_add <= '1';
for ch in 0 to C_INPUT_CHANNEL - 1 loop
for k in 0 to C_KERNEL_SIZE ** 2 - 1 loop
v_int_index := k + ch * C_KERNEL_SIZE ** 2;
v_slv_input_datum := '0' & get_slice(islv_data, v_int_index, C_PRODUCT_BITWIDTH - 1);
-- Calculate product, i. e. input data * +-1
if (islv_weights(v_int_index) = '1') then
v_slv_output_datum := v_slv_input_datum;
else
v_slv_output_datum := std_logic_vector(-signed(v_slv_input_datum));
end if;
slv_product((v_int_index + 1) * C_PRODUCT_BITWIDTH - 1 downto v_int_index * C_PRODUCT_BITWIDTH) <= v_slv_output_datum;
end loop;
end loop;
sl_add <= '1';
end if;
end if;

end process proc_add_sign;

i_adder_tree : entity util.adder_tree
generic map (
C_INPUT_COUNT => C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL,
C_INPUT_BITWIDTH => C_PRODUCT_BITWIDTH,
C_UNSIGNED => 0,
C_OUTPUT_BITWIDTH => oslv_data'length
)
port map (
isl_clk => isl_clk,
isl_valid => sl_add,
islv_data => slv_product,
oslv_data => slv_data_out,
osl_valid => sl_valid_out
);

end generate gen_input;

end generate gen_matrix_multiplication;
Expand Down
4 changes: 3 additions & 1 deletion src/util/adder_tree.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ architecture rtl of adder_tree is
v_sum_init := (others => (others => '0'));

-- Pad with zeros to widen from input bitwidth to output bitwidth.
assert C_OUTPUT_BITWIDTH >= C_INPUT_BITWIDTH + C_STAGES; -- Input gets extended by 1 bit at each stage.
-- Input gets extended by 1 bit at each stage.
assert C_OUTPUT_BITWIDTH >= C_INPUT_BITWIDTH + C_STAGES
report "required bitwidth: " & to_string(C_INPUT_BITWIDTH + C_STAGES) & ", actual bitwidth: " & to_string(C_OUTPUT_BITWIDTH);
v_input_datum := (others => '0');

for i in 0 to C_INPUT_COUNT - 1 loop
Expand Down
6 changes: 3 additions & 3 deletions src/window_convolution_activation.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ entity window_convolution_activation is
islv_data : in std_logic_vector(C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
-- islv_weights and islv_threshold are constants
islv_weights : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_OUTPUT_CHANNEL - 1 downto 0);
islv_threshold : in std_logic_vector(C_OUTPUT_CHANNEL * log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1) - 1 downto 0);
islv_threshold : in std_logic_vector(C_OUTPUT_CHANNEL * (C_INPUT_CHANNEL_BITWIDTH + log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1) + 1) - 1 downto 0);
oslv_data : out std_logic_vector(C_OUTPUT_CHANNEL * C_OUTPUT_CHANNEL_BITWIDTH - 1 downto 0);
osl_valid : out std_logic
);
Expand All @@ -40,13 +40,13 @@ end entity window_convolution_activation;
architecture behavioral of window_convolution_activation is

signal sl_valid_window_ctrl : std_logic := '0';
signal slv_data_window_ctrl : std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
signal slv_data_window_ctrl : std_logic_vector(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);

signal slv_valid_convolution : std_logic_vector(C_OUTPUT_CHANNEL - 1 downto 0);

type t_slv_array_1d is array(natural range <>) of std_logic_vector;

constant C_POST_CONVOLUTION_BITWIDTH : integer := log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1);
constant C_POST_CONVOLUTION_BITWIDTH : integer := C_INPUT_CHANNEL_BITWIDTH + log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1) + 1;
signal a_data_convolution : t_slv_array_1d(0 to C_OUTPUT_CHANNEL - 1)(C_POST_CONVOLUTION_BITWIDTH - 1 downto 0);

signal slv_valid_batch_normalization : std_logic_vector(C_OUTPUT_CHANNEL - 1 downto 0);
Expand Down

0 comments on commit 04d10e6

Please sign in to comment.