add partial support for multi bit input

Especially useful for the first layer.
marph91 · Mar 20, 2021 · 8ad56a7 · 8ad56a7
1 parent acee4ac
commit 8ad56a7
Show file tree

Hide file tree

Showing 3 changed files with 120 additions and 76 deletions.
diff --git a/playground/04_custom_toplevel.py b/playground/04_custom_toplevel.py
@@ -48,47 +48,59 @@ def update(self, previous_layer_info):
 
  self.data_signal = Parameter(
  f"slv_data_{self.info['name']}",
- f"std_logic_vector(C_OUTPUT_CHANNEL_{self.info['name']} * C_OUTPUT_CHANNEL_BITWIDTH_{self.info['name']} - 1 downto 0)",
+ f"std_logic_vector(C_OUTPUT_CHANNEL_{self.info['name'].upper()} * C_OUTPUT_CHANNEL_BITWIDTH_{self.info['name'].upper()} - 1 downto 0)",
  )
  self.signals.append(self.data_signal)
 
  # channel
  self.info["channel"] = int(self.constants["C_OUTPUT_CHANNEL"].value)
- input_channel = previous_layer_info["channel"]
+ self.info["bitwidth"] = int(self.constants["C_OUTPUT_CHANNEL_BITWIDTH"].value)
  self.constants["C_INPUT_CHANNEL"] = Parameter(
- f"C_INPUT_CHANNEL_{self.info['name']}",
+ f"C_INPUT_CHANNEL_{self.info['name'].upper()}",
  "integer",
  previous_layer_info["channel"],
  )
+ self.constants["C_INPUT_CHANNEL_BITWIDTH"] = Parameter(
+ f"C_INPUT_CHANNEL_BITWIDTH_{self.info['name'].upper()}",
+ "integer",
+ previous_layer_info["bitwidth"],
+ )
 
+ # weights
  kernel_size = int(self.constants["C_KERNEL_SIZE"].value)
- output_channel = int(self.constants["C_OUTPUT_CHANNEL"].value)
+ input_channel = previous_layer_info["channel"]
+ output_channel = self.info["channel"]
  bitwidth = input_channel * output_channel * kernel_size ** 2
  weights = "".join([str(randint(0, 1)) for _ in range(bitwidth)])
  self.constants["C_WEIGHTS"] = Parameter(
- f"C_WEIGHTS_{self.info['name']}",
+ f"C_WEIGHTS_{self.info['name'].upper()}",
  f"std_logic_vector({bitwidth} - 1 downto 0)",
  f'"{weights}"',
  )
 
+ # thresholds
+ input_channel_bitwidth = previous_layer_info["bitwidth"]
  bitwidth = (
- math.ceil(math.log2(input_channel * kernel_size ** 2 + 1)) * output_channel
+ math.ceil(
+ math.log2(input_channel * input_channel_bitwidth * kernel_size ** 2 + 1)
+ )
+ * output_channel
  )
  thresholds = "".join([str(randint(0, 1)) for _ in range(bitwidth)])
  self.constants["C_THRESHOLDS"] = Parameter(
- f"C_THRESHOLDS_{self.info['name']}",
+ f"C_THRESHOLDS_{self.info['name'].upper()}",
  f"std_logic_vector({bitwidth} - 1 downto 0)",
  f'"{thresholds}"',
  )
 
  # calculate new image size
  self.constants["C_IMG_WIDTH"] = Parameter(
- f"C_IMG_WIDTH_{self.info['name']}",
+ f"C_IMG_WIDTH_{self.info['name'].upper()}",
  "integer",
  str(previous_layer_info["width"]),
  )
  self.constants["C_IMG_HEIGHT"] = Parameter(
- f"C_IMG_HEIGHT_{self.info['name']}",
+ f"C_IMG_HEIGHT_{self.info['name'].upper()}",
  "integer",
  str(previous_layer_info["height"]),
  )
@@ -111,13 +123,13 @@ def get_instance(self):
  C_KERNEL_SIZE => {self.constants["C_KERNEL_SIZE"].name},
  C_STRIDE => {self.constants["C_STRIDE"].name},
 
- C_INPUT_CHANNEL => {self.constants["C_INPUT_CHANNEL"].name},
- C_OUTPUT_CHANNEL => {self.constants["C_OUTPUT_CHANNEL"].name},
+ C_INPUT_CHANNEL => {self.constants["C_INPUT_CHANNEL"].name},
+ C_INPUT_CHANNEL_BITWIDTH => {self.constants["C_INPUT_CHANNEL_BITWIDTH"].name},
+ C_OUTPUT_CHANNEL => {self.constants["C_OUTPUT_CHANNEL"].name},
+ C_OUTPUT_CHANNEL_BITWIDTH => {self.constants["C_OUTPUT_CHANNEL_BITWIDTH"].name},
 
  C_IMG_WIDTH => {self.constants["C_IMG_WIDTH"].name},
- C_IMG_HEIGHT => {self.constants["C_IMG_HEIGHT"].name},
-
- C_OUTPUT_CHANNEL_BITWIDTH => {self.constants["C_OUTPUT_CHANNEL_BITWIDTH"].name}
+ C_IMG_HEIGHT => {self.constants["C_IMG_HEIGHT"].name}
  )
  port map (
  isl_clk => isl_clk,
@@ -245,14 +257,21 @@ def new_size(prevous_size, kernel_size, stride, padding=0):
 
 class Bnn:
  def __init__(
- self, image_height, image_width, input_channel, output_classes, output_bitwidth
+ self,
+ image_height,
+ image_width,
+ input_channel,
+ input_bitwidth,
+ output_classes,
+ output_bitwidth,
  ):
  self.layers = []
  self.output_classes = output_classes
  self.output_bitwidth = output_bitwidth
  self.previous_layer_info = {
  "name": "in",
  "channel": input_channel,
+ "bitwidth": input_bitwidth,
  "width": image_width,
  "height": image_height,
  }
@@ -275,11 +294,15 @@ def __init__(
  # TODO: input_channel
  self.entity = f"""
 entity bnn is
+ generic (
+ C_INPUT_CHANNEL : integer := {self.previous_layer_info["channel"]};
+ C_INPUT_CHANNEL_BITWIDTH : integer := {self.previous_layer_info["bitwidth"]}
+ );
  port (
  isl_clk : in std_logic;
  isl_start : in std_logic;
  isl_valid : in std_logic;
- islv_data : in std_logic_vector(8 - 1 downto 0);
+ islv_data : in std_logic_vector(C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
  oslv_data : out std_logic_vector({self.output_classes * self.output_bitwidth} - 1 downto 0);
  osl_valid : out std_logic;
  osl_finish : out std_logic
@@ -296,18 +319,6 @@ def to_vhdl(self):
  implementation = []
 
  declarations.append("-- input signals")
- declarations.append(
- parameter_to_vhdl(
- "constant",
- [
- Parameter(
- "C_INPUT_CHANNEL",
- "integer",
- self.previous_layer_info["channel"],
- )
- ],
- )
- )
  declarations.append(
  parameter_to_vhdl(
  "signal", [self.input_data_signal, self.input_control_signal]
@@ -350,11 +361,18 @@ def to_vhdl(self):
 
 
 if __name__ == "__main__":
+ input_channel = 1
+ input_channel_bitwidth = 8
  output_channel = 64
  output_channel_bitwidth = 8
- b = Bnn(8, 8, 1, output_channel, output_channel_bitwidth)
- bn = BatchNormalization("000", [])
- b.add_layer(bn)
+ b = Bnn(
+ 8,
+ 8,
+ input_channel,
+ input_channel_bitwidth,
+ output_channel,
+ output_channel_bitwidth,
+ )
  c = Convolution(
  "aaa",
  [

diff --git a/src/convolution.vhd b/src/convolution.vhd
@@ -9,17 +9,16 @@ library util;
 
 entity convolution is
  generic (
- -- TODO: input bitwidth, for now = 1
-
- C_KERNEL_SIZE : integer range 1 to 7 := 3;
- C_INPUT_CHANNEL : integer := 1
+ C_KERNEL_SIZE : integer range 1 to 7 := 3;
+ C_INPUT_CHANNEL : integer := 1;
+ C_INPUT_CHANNEL_BITWIDTH : integer := 1
  );
  port (
  isl_clk : in std_logic;
  isl_valid : in std_logic;
- islv_data : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
+ islv_data : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
  islv_weights : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
- oslv_data : out std_logic_vector(log2(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL + 1) - 1 downto 0);
+ oslv_data : out std_logic_vector(log2(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1) - 1 downto 0);
  osl_valid : out std_logic
  );
 end entity convolution;
@@ -30,10 +29,6 @@ architecture behavioral of convolution is
  constant C_SPLIT : integer := integer(ceil(real(islv_data'length) / real(C_PARALLEL_POPCOUNT)));
  constant C_PADDED_BITWIDTH : integer := C_PARALLEL_POPCOUNT * C_SPLIT;
 
- type t_ones_count is array(natural range<>) of unsigned(oslv_data'range);
-
- signal a_ones_count : t_ones_count(0 to C_SPLIT);
-
  signal sl_add : std_logic := '0';
  signal sl_popcount : std_logic := '0';
  signal slv_product : std_logic_vector(C_PADDED_BITWIDTH - 1 downto 0) := (others => '0');
@@ -58,41 +53,72 @@ begin
  osl_valid => sl_valid_out
  );
 
- proc_convolution : process (isl_clk) is
+ gen_matrix_multiplication : if C_INPUT_CHANNEL_BITWIDTH = 1 generate
 
- variable v_usig_popcount : unsigned(2 downto 0);
- variable v_usig_popcount_total : unsigned(oslv_data'range);
+ proc_xnor_popcount : process (isl_clk) is
 
- begin
+ variable v_usig_popcount : unsigned(2 downto 0);
+ variable v_usig_popcount_total : unsigned(oslv_data'range);
 
- if (rising_edge(isl_clk)) then
- sl_popcount <= '0';
- sl_add <= '0';
+ begin
 
- if (isl_valid = '1') then
- -- or map directly to hardware (islv_weights as constant)
- -- pad zeros for the adder tree
- slv_product <= (islv_data xnor islv_weights) & (slv_product'length - islv_data'length - 1 downto 0 => '0');
- sl_popcount <= '1';
- end if;
+ if (rising_edge(isl_clk)) then
+ sl_popcount <= '0';
+ sl_add <= '0';
 
- -- If using bram, one would be needed for each adder stage.
- if (sl_popcount = '1') then
- for slice in 0 to slv_product'length / C_PARALLEL_POPCOUNT - 1 loop
- v_usig_popcount := (others => '0');
- for i in 0 to C_PARALLEL_POPCOUNT - 1 loop
- if (slv_product(i + slice * C_PARALLEL_POPCOUNT) = '1') then
- v_usig_popcount := v_usig_popcount + 1;
- end if;
+ if (isl_valid = '1') then
+ -- or map directly to hardware (islv_weights as constant)
+ -- pad zeros for the adder tree
+ slv_product <= (islv_data xnor islv_weights) & (slv_product'length - islv_data'length - 1 downto 0 => '0');
+ sl_popcount <= '1';
+ end if;
+
+ -- If using bram, one would be needed for each adder stage.
+ if (sl_popcount = '1') then
+ for slice in 0 to slv_product'length / C_PARALLEL_POPCOUNT - 1 loop
+ v_usig_popcount := (others => '0');
+ for i in 0 to C_PARALLEL_POPCOUNT - 1 loop
+ if (slv_product(i + slice * C_PARALLEL_POPCOUNT) = '1') then
+ v_usig_popcount := v_usig_popcount + 1;
+ end if;
+ end loop;
+ slv_popcount((slice + 1) * 3 - 1 downto slice * 3) <= std_logic_vector(v_usig_popcount);
  end loop;
- slv_popcount((slice + 1) * 3 - 1 downto slice * 3) <= std_logic_vector(v_usig_popcount);
- end loop;
 
- sl_add <= '1';
+ sl_add <= '1';
+ end if;
  end if;
- end if;
 
- end process proc_convolution;
+ end process proc_xnor_popcount;
+
+ else generate
+
+ gen_input : for input_channel in 0 to C_INPUT_CHANNEL - 1 generate
+
+ proc_add_sign : process (isl_clk) is
+
+ variable v_usig_popcount : unsigned(2 downto 0);
+ variable v_usig_popcount_total : unsigned(oslv_data'range);
+
+ begin
+
+ if (rising_edge(isl_clk)) then
+ sl_popcount <= '0';
+ sl_add <= '0';
+
+ if (isl_valid = '1') then
+ -- islv_data * +-1
+ -- assign slices to adder tree
+ -- extend adder tree by signed addition
+ sl_add <= '1';
+ end if;
+ end if;
+
+ end process proc_add_sign;
+
+ end generate gen_input;
+
+ end generate gen_matrix_multiplication;
 
  oslv_data <= slv_data_out;
  osl_valid <= sl_valid_out;

diff --git a/src/window_convolution_activation.vhd b/src/window_convolution_activation.vhd
@@ -12,12 +12,11 @@ library window_ctrl_lib;
 
 entity window_convolution_activation is
  generic (
- -- TODO: input bitwidth, for now = 1
-
  C_KERNEL_SIZE : integer range 1 to 7 := 3;
  C_STRIDE : integer := 1;
 
  C_INPUT_CHANNEL : integer := 4;
+ C_INPUT_CHANNEL_BITWIDTH : integer := 1;
  C_OUTPUT_CHANNEL : integer := 8;
  C_OUTPUT_CHANNEL_BITWIDTH : integer range 1 to 32 := 1;
 
@@ -28,10 +27,10 @@ entity window_convolution_activation is
  isl_clk : in std_logic;
  isl_start : in std_logic;
  isl_valid : in std_logic;
- islv_data : in std_logic_vector(C_INPUT_CHANNEL - 1 downto 0);
+ islv_data : in std_logic_vector(C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
  -- islv_weights and islv_threshold are constants
  islv_weights : in std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_OUTPUT_CHANNEL - 1 downto 0);
- islv_threshold : in std_logic_vector(C_OUTPUT_CHANNEL * log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1) - 1 downto 0);
+ islv_threshold : in std_logic_vector(C_OUTPUT_CHANNEL * log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1) - 1 downto 0);
  oslv_data : out std_logic_vector(C_OUTPUT_CHANNEL * C_OUTPUT_CHANNEL_BITWIDTH - 1 downto 0);
  osl_valid : out std_logic
  );
@@ -40,13 +39,13 @@ end entity window_convolution_activation;
 architecture behavioral of window_convolution_activation is
 
  signal sl_valid_window_ctrl : std_logic := '0';
- signal slv_data_window_ctrl : std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL - 1 downto 0);
+ signal slv_data_window_ctrl : std_logic_vector(C_KERNEL_SIZE * C_KERNEL_SIZE * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH - 1 downto 0);
 
  signal slv_valid_convolution : std_logic_vector(C_OUTPUT_CHANNEL - 1 downto 0);
 
  type t_slv_array_1d is array(natural range <>) of std_logic_vector;
 
- constant C_POST_CONVOLUTION_BITWIDTH : integer := log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL + 1);
+ constant C_POST_CONVOLUTION_BITWIDTH : integer := log2(C_KERNEL_SIZE ** 2 * C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH + 1);
  signal a_data_convolution : t_slv_array_1d(0 to C_OUTPUT_CHANNEL - 1)(C_POST_CONVOLUTION_BITWIDTH - 1 downto 0);
 
  signal slv_valid_batch_normalization : std_logic_vector(C_OUTPUT_CHANNEL - 1 downto 0);
@@ -77,7 +76,7 @@ begin
 
  i_window_ctrl : entity window_ctrl_lib.window_ctrl
  generic map (
- C_BITWIDTH => 1 * C_INPUT_CHANNEL,
+ C_BITWIDTH => C_INPUT_CHANNEL * C_INPUT_CHANNEL_BITWIDTH,
  C_CH_IN => 1,
  C_CH_OUT => 1,
  C_IMG_WIDTH => C_IMG_WIDTH,
@@ -100,8 +99,9 @@ begin
 
  i_convolution : entity cnn_lib.convolution
  generic map (
- C_KERNEL_SIZE => C_KERNEL_SIZE,
- C_INPUT_CHANNEL => C_INPUT_CHANNEL
+ C_KERNEL_SIZE => C_KERNEL_SIZE,
+ C_INPUT_CHANNEL => C_INPUT_CHANNEL,
+ C_INPUT_CHANNEL_BITWIDTH => C_INPUT_CHANNEL_BITWIDTH
  )
  port map (
  isl_clk => isl_clk,