Skip to content

Commit

Permalink
add partial support for multi bit input
Browse files Browse the repository at this point in the history
Especially useful for the first layer.
  • Loading branch information
marph91 committed Mar 20, 2021
1 parent acee4ac commit 8ad56a7
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 76 deletions.
80 changes: 49 additions & 31 deletions playground/04_custom_toplevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,47 +48,59 @@ def update(self, previous_layer_info):

self.data_signal = Parameter(
f"slv_data_{self.info['name']}",
f"std_logic_vector(C_OUTPUT_CHANNEL_{self.info['name']} * C_OUTPUT_CHANNEL_BITWIDTH_{self.info['name']} - 1 downto 0)",
f"std_logic_vector(C_OUTPUT_CHANNEL_{self.info['name'].upper()} * C_OUTPUT_CHANNEL_BITWIDTH_{self.info['name'].upper()} - 1 downto 0)",
)
self.signals.append(self.data_signal)

# channel
self.info["channel"] = int(self.constants["C_OUTPUT_CHANNEL"].value)
input_channel = previous_layer_info["channel"]
self.info["bitwidth"] = int(self.constants["C_OUTPUT_CHANNEL_BITWIDTH"].value)
self.constants["C_INPUT_CHANNEL"] = Parameter(
f"C_INPUT_CHANNEL_{self.info['name']}",
f"C_INPUT_CHANNEL_{self.info['name'].upper()}",
"integer",
previous_layer_info["channel"],
)
self.constants["C_INPUT_CHANNEL_BITWIDTH"] = Parameter(
f"C_INPUT_CHANNEL_BITWIDTH_{self.info['name'].upper()}",
"integer",
previous_layer_info["bitwidth"],
)

# weights
kernel_size = int(self.constants["C_KERNEL_SIZE"].value)
output_channel = int(self.constants["C_OUTPUT_CHANNEL"].value)
input_channel = previous_layer_info["channel"]
output_channel = self.info["channel"]
bitwidth = input_channel * output_channel * kernel_size ** 2
weights = "".join([str(randint(0, 1)) for _ in range(bitwidth)])
self.constants["C_WEIGHTS"] = Parameter(
f"C_WEIGHTS_{self.info['name']}",
f"C_WEIGHTS_{self.info['name'].upper()}",
f"std_logic_vector({bitwidth} - 1 downto 0)",
f'"{weights}"',
)

# thresholds
input_channel_bitwidth = previous_layer_info["bitwidth"]
bitwidth = (
math.ceil(math.log2(input_channel * kernel_size ** 2 + 1)) * output_channel
math.ceil(
math.log2(input_channel * input_channel_bitwidth * kernel_size ** 2 + 1)
)
* output_channel
)
thresholds = "".join([str(randint(0, 1)) for _ in range(bitwidth)])
self.constants["C_THRESHOLDS"] = Parameter(
f"C_THRESHOLDS_{self.info['name']}",
f"C_THRESHOLDS_{self.info['name'].upper()}",
f"std_logic_vector({bitwidth} - 1 downto 0)",
f'"{thresholds}"',
)

# calculate new image size
self.constants["C_IMG_WIDTH"] = Parameter(
f"C_IMG_WIDTH_{self.info['name']}",
f"C_IMG_WIDTH_{self.info['name'].upper()}",
"integer",
str(previous_layer_info["width"]),
)
self.constants["C_IMG_HEIGHT"] = Parameter(
f"C_IMG_HEIGHT_{self.info['name']}",
f"C_IMG_HEIGHT_{self.info['name'].upper()}",
"integer",
str(previous_layer_info["height"]),
)
Expand All @@ -111,13 +123,13 @@ def get_instance(self):
C_KERNEL_SIZE => {self.constants["C_KERNEL_SIZE"].name},
C_STRIDE => {self.constants["C_STRIDE"].name},
C_INPUT_CHANNEL => {self.constants["C_INPUT_CHANNEL"].name},
C_OUTPUT_CHANNEL => {self.constants["C_OUTPUT_CHANNEL"].name},
C_INPUT_CHANNEL => {self.constants["C_INPUT_CHANNEL"].name},
C_INPUT_CHANNEL_BITWIDTH => {self.constants["C_INPUT_CHANNEL_BITWIDTH"].name},
C_OUTPUT_CHANNEL => {self.constants["C_OUTPUT_CHANNEL"].name},
C_OUTPUT_CHANNEL_BITWIDTH => {self.constants["C_OUTPUT_CHANNEL_BITWIDTH"].name},
C_IMG_WIDTH => {self.constants["C_IMG_WIDTH"].name},
C_IMG_HEIGHT => {self.constants["C_IMG_HEIGHT"].name},
C_OUTPUT_CHANNEL_BITWIDTH => {self.constants["C_OUTPUT_CHANNEL_BITWIDTH"].name}
C_IMG_HEIGHT => {self.constants["C_IMG_HEIGHT"].name}
)
port map (
isl_clk => isl_clk,
Expand Down Expand Up @@ -245,14 +257,21 @@ def new_size(prevous_size, kernel_size, stride, padding=0):

class Bnn:
def __init__(
self, image_height, image_width, input_channel, output_classes, output_bitwidth
self,
image_height,
image_width,
input_channel,
input_bitwidth,
output_classes,
output_bitwidth,
):
self.layers = []
self.output_classes = output_classes
self.output_bitwidth = output_bitwidth
self.previous_layer_info = {
"name": "in",
"channel": input_channel,
"bitwidth": input_bitwidth,
"width": image_width,
"height": image_height,
}
Expand All @@ -275,11 +294,15 @@ def __init__(
# TODO: input_channel
self.entity = f"""
entity bnn is
generic (
C_INPUT_CHANNEL : integer := {self.previous_layer_info["channel"]};
C_INPUT_CHANNEL_BITWIDTH : integer := {self.previous_layer_info["bitwidth"]}
);
port (
isl_clk : in std_logic;
isl_start : in std_logic;
isl_valid : in std_logic;
islv_data : in std_logic_vector(8 - 1 downto 0);
islv_data : in std_logic_vector(C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
oslv_data : out std_logic_vector({self.output_classes * self.output_bitwidth} - 1 downto 0);
osl_valid : out std_logic;
osl_finish : out std_logic
Expand All @@ -296,18 +319,6 @@ def to_vhdl(self):
implementation = []

declarations.append("-- input signals")
declarations.append(
parameter_to_vhdl(
"constant",
[
Parameter(
"C_INPUT_CHANNEL",
"integer",
self.previous_layer_info["channel"],
)
],
)
)
declarations.append(
parameter_to_vhdl(
"signal", [self.input_data_signal, self.input_control_signal]
Expand Down Expand Up @@ -350,11 +361,18 @@ def to_vhdl(self):


if __name__ == "__main__":
input_channel = 1
input_channel_bitwidth = 8
output_channel = 64
output_channel_bitwidth = 8
b = Bnn(8, 8, 1, output_channel, output_channel_bitwidth)
bn = BatchNormalization("000", [])
b.add_layer(bn)
b = Bnn(
8,
8,
input_channel,
input_channel_bitwidth,
output_channel,
output_channel_bitwidth,
)
c = Convolution(
"aaa",
[
Expand Down
98 changes: 62 additions & 36 deletions src/convolution.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,16 @@ library util;

entity convolution is
generic (
-- TODO: input bitwidth, for now = 1

C_KERNEL_SIZE : integer range 1 to 7 := 3;
C_INPUT_CHANNEL : integer := 1
C_KERNEL_SIZE : integer range 1 to 7 := 3;
C_INPUT_CHANNEL : integer := 1;
C_INPUT_CHANNEL_BITWIDTH : integer := 1
);
port (
isl_clk : in std_logic;
isl_valid : in std_logic;
islv_data : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
islv_data : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
islv_weights : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
oslv_data : out std_logic_vector(log2(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL + 1) - 1 downto 0);
oslv_data : out std_logic_vector(log2(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1) - 1 downto 0);
osl_valid : out std_logic
);
end entity convolution;
Expand All @@ -30,10 +29,6 @@ architecture behavioral of convolution is
constant C_SPLIT : integer := integer(ceil(real(islv_data'length) / real(C_PARALLEL_POPCOUNT)));
constant C_PADDED_BITWIDTH : integer := C_PARALLEL_POPCOUNT * C_SPLIT;

type t_ones_count is array(natural range<>) of unsigned(oslv_data'range);

signal a_ones_count : t_ones_count(0 to C_SPLIT);

signal sl_add : std_logic := '0';
signal sl_popcount : std_logic := '0';
signal slv_product : std_logic_vector(C_PADDED_BITWIDTH - 1 downto 0) := (others => '0');
Expand All @@ -58,41 +53,72 @@ begin
osl_valid => sl_valid_out
);

proc_convolution : process (isl_clk) is
gen_matrix_multiplication : if C_INPUT_CHANNEL_BITWIDTH = 1 generate

variable v_usig_popcount : unsigned(2 downto 0);
variable v_usig_popcount_total : unsigned(oslv_data'range);
proc_xnor_popcount : process (isl_clk) is

begin
variable v_usig_popcount : unsigned(2 downto 0);
variable v_usig_popcount_total : unsigned(oslv_data'range);

if (rising_edge(isl_clk)) then
sl_popcount <= '0';
sl_add <= '0';
begin

if (isl_valid = '1') then
-- or map directly to hardware (islv_weights as constant)
-- pad zeros for the adder tree
slv_product <= (islv_data xnor islv_weights) & (slv_product'length - islv_data'length - 1 downto 0 => '0');
sl_popcount <= '1';
end if;
if (rising_edge(isl_clk)) then
sl_popcount <= '0';
sl_add <= '0';

-- If using bram, one would be needed for each adder stage.
if (sl_popcount = '1') then
for slice in 0 to slv_product'length / C_PARALLEL_POPCOUNT - 1 loop
v_usig_popcount := (others => '0');
for i in 0 to C_PARALLEL_POPCOUNT - 1 loop
if (slv_product(i + slice * C_PARALLEL_POPCOUNT) = '1') then
v_usig_popcount := v_usig_popcount + 1;
end if;
if (isl_valid = '1') then
-- or map directly to hardware (islv_weights as constant)
-- pad zeros for the adder tree
slv_product <= (islv_data xnor islv_weights) & (slv_product'length - islv_data'length - 1 downto 0 => '0');
sl_popcount <= '1';
end if;

-- If using bram, one would be needed for each adder stage.
if (sl_popcount = '1') then
for slice in 0 to slv_product'length / C_PARALLEL_POPCOUNT - 1 loop
v_usig_popcount := (others => '0');
for i in 0 to C_PARALLEL_POPCOUNT - 1 loop
if (slv_product(i + slice * C_PARALLEL_POPCOUNT) = '1') then
v_usig_popcount := v_usig_popcount + 1;
end if;
end loop;
slv_popcount((slice + 1) * 3 - 1 downto slice * 3) <= std_logic_vector(v_usig_popcount);
end loop;
slv_popcount((slice + 1) * 3 - 1 downto slice * 3) <= std_logic_vector(v_usig_popcount);
end loop;

sl_add <= '1';
sl_add <= '1';
end if;
end if;
end if;

end process proc_convolution;
end process proc_xnor_popcount;

else generate

gen_input : for input_channel in 0 to C_INPUT_CHANNEL - 1 generate

proc_add_sign : process (isl_clk) is

variable v_usig_popcount : unsigned(2 downto 0);
variable v_usig_popcount_total : unsigned(oslv_data'range);

begin

if (rising_edge(isl_clk)) then
sl_popcount <= '0';
sl_add <= '0';

if (isl_valid = '1') then
-- islv_data * +-1
-- assign slices to adder tree
-- extend adder tree by signed addition
sl_add <= '1';
end if;
end if;

end process proc_add_sign;

end generate gen_input;

end generate gen_matrix_multiplication;

oslv_data <= slv_data_out;
osl_valid <= sl_valid_out;
Expand Down
18 changes: 9 additions & 9 deletions src/window_convolution_activation.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@ library window_ctrl_lib;

entity window_convolution_activation is
generic (
-- TODO: input bitwidth, for now = 1

C_KERNEL_SIZE : integer range 1 to 7 := 3;
C_STRIDE : integer := 1;

C_INPUT_CHANNEL : integer := 4;
C_INPUT_CHANNEL_BITWIDTH : integer := 1;
C_OUTPUT_CHANNEL : integer := 8;
C_OUTPUT_CHANNEL_BITWIDTH : integer range 1 to 32 := 1;

Expand All @@ -28,10 +27,10 @@ entity window_convolution_activation is
isl_clk : in std_logic;
isl_start : in std_logic;
isl_valid : in std_logic;
islv_data : in std_logic_vector(C_INPUT_CHANNEL - 1 downto 0);
islv_data : in std_logic_vector(C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
-- islv_weights and islv_threshold are constants
islv_weights : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_OUTPUT_CHANNEL - 1 downto 0);
islv_threshold : in std_logic_vector(C_OUTPUT_CHANNEL * log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1) - 1 downto 0);
islv_threshold : in std_logic_vector(C_OUTPUT_CHANNEL * log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1) - 1 downto 0);
oslv_data : out std_logic_vector(C_OUTPUT_CHANNEL * C_OUTPUT_CHANNEL_BITWIDTH - 1 downto 0);
osl_valid : out std_logic
);
Expand All @@ -40,13 +39,13 @@ end entity window_convolution_activation;
architecture behavioral of window_convolution_activation is

signal sl_valid_window_ctrl : std_logic := '0';
signal slv_data_window_ctrl : std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
signal slv_data_window_ctrl : std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);

signal slv_valid_convolution : std_logic_vector(C_OUTPUT_CHANNEL - 1 downto 0);

type t_slv_array_1d is array(natural range <>) of std_logic_vector;

constant C_POST_CONVOLUTION_BITWIDTH : integer := log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1);
constant C_POST_CONVOLUTION_BITWIDTH : integer := log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1);
signal a_data_convolution : t_slv_array_1d(0 to C_OUTPUT_CHANNEL - 1)(C_POST_CONVOLUTION_BITWIDTH - 1 downto 0);

signal slv_valid_batch_normalization : std_logic_vector(C_OUTPUT_CHANNEL - 1 downto 0);
Expand Down Expand Up @@ -77,7 +76,7 @@ begin

i_window_ctrl : entity window_ctrl_lib.window_ctrl
generic map (
C_BITWIDTH => 1 * C_INPUT_CHANNEL,
C_BITWIDTH => C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH,
C_CH_IN => 1,
C_CH_OUT => 1,
C_IMG_WIDTH => C_IMG_WIDTH,
Expand All @@ -100,8 +99,9 @@ begin

i_convolution : entity cnn_lib.convolution
generic map (
C_KERNEL_SIZE => C_KERNEL_SIZE,
C_INPUT_CHANNEL => C_INPUT_CHANNEL
C_KERNEL_SIZE => C_KERNEL_SIZE,
C_INPUT_CHANNEL => C_INPUT_CHANNEL,
C_INPUT_CHANNEL_BITWIDTH => C_INPUT_CHANNEL_BITWIDTH
)
port map (
isl_clk => isl_clk,
Expand Down

0 comments on commit 8ad56a7

Please sign in to comment.