Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Have you ever tried batch normalization? #20

Closed
twmht opened this issue Aug 6, 2018 · 4 comments
Closed

Have you ever tried batch normalization? #20

twmht opened this issue Aug 6, 2018 · 4 comments

Comments

@twmht
Copy link

twmht commented Aug 6, 2018

@happynear

I tried to add batch normalization on your modified resnet20, but the loss became 87.3365. As far as I know, BN helps learning more quickly, Is it possible to add batch normalization with amsoftmax?

Here is the prototxt

layer {
  name: "input"
  type: "Input"
  top: "data"
  input_param {
    shape {
      dim: 1
      dim: 3
      dim: 160
      dim: 160
    }
  }
}
layer {
  name: "conv1_1"
  type: "Convolution"
  bottom: "data"
  top: "conv1_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv1_1/bn"
  type: "BatchNorm"
  bottom: "conv1_1"
  top: "conv1_1"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv1_1/scale"
  type: "Scale"
  bottom: "conv1_1"
  top: "conv1_1"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu1_1"
  type: "PReLU"
  bottom: "conv1_1"
  top: "conv1_1"
}
layer {
  name: "conv1_2"
  type: "Convolution"
  bottom: "conv1_1"
  top: "conv1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv1_2/bn"
  type: "BatchNorm"
  bottom: "conv1_2"
  top: "conv1_2"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv1_2/scale"
  type: "Scale"
  bottom: "conv1_2"
  top: "conv1_2"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu1_2"
  type: "PReLU"
  bottom: "conv1_2"
  top: "conv1_2"
}
layer {
  name: "conv1_3"
  type: "Convolution"
  bottom: "conv1_2"
  top: "conv1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv1_3/bn"
  type: "BatchNorm"
  bottom: "conv1_3"
  top: "conv1_3"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv1_3/scale"
  type: "Scale"
  bottom: "conv1_3"
  top: "conv1_3"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu1_3"
  type: "PReLU"
  bottom: "conv1_3"
  top: "conv1_3"
}
layer {
  name: "res1_3"
  type: "Eltwise"
  bottom: "conv1_1"
  bottom: "conv1_3"
  top: "res1_3"
}
layer {
  name: "conv2_1"
  type: "Convolution"
  bottom: "res1_3"
  top: "conv2_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv2_1/bn"
  type: "BatchNorm"
  bottom: "conv2_1"
  top: "conv2_1"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv2_1/scale"
  type: "Scale"
  bottom: "conv2_1"
  top: "conv2_1"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu2_1"
  type: "PReLU"
  bottom: "conv2_1"
  top: "conv2_1"
}
layer {
  name: "conv2_2"
  type: "Convolution"
  bottom: "conv2_1"
  top: "conv2_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv2_2/bn"
  type: "BatchNorm"
  bottom: "conv2_2"
  top: "conv2_2"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv2_2/scale"
  type: "Scale"
  bottom: "conv2_2"
  top: "conv2_2"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu2_2"
  type: "PReLU"
  bottom: "conv2_2"
  top: "conv2_2"
}
layer {
  name: "conv2_3"
  type: "Convolution"
  bottom: "conv2_2"
  top: "conv2_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv2_3/bn"
  type: "BatchNorm"
  bottom: "conv2_3"
  top: "conv2_3"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv2_3/scale"
  type: "Scale"
  bottom: "conv2_3"
  top: "conv2_3"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu2_3"
  type: "PReLU"
  bottom: "conv2_3"
  top: "conv2_3"
}
layer {
  name: "res2_3"
  type: "Eltwise"
  bottom: "conv2_1"
  bottom: "conv2_3"
  top: "res2_3"
}
layer {
  name: "conv2_4"
  type: "Convolution"
  bottom: "res2_3"
  top: "conv2_4"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv2_4/bn"
  type: "BatchNorm"
  bottom: "conv2_4"
  top: "conv2_4"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv2_4/scale"
  type: "Scale"
  bottom: "conv2_4"
  top: "conv2_4"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu2_4"
  type: "PReLU"
  bottom: "conv2_4"
  top: "conv2_4"
}
layer {
  name: "conv2_5"
  type: "Convolution"
  bottom: "conv2_4"
  top: "conv2_5"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv2_5/bn"
  type: "BatchNorm"
  bottom: "conv2_5"
  top: "conv2_5"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv2_5/scale"
  type: "Scale"
  bottom: "conv2_5"
  top: "conv2_5"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu2_5"
  type: "PReLU"
  bottom: "conv2_5"
  top: "conv2_5"
}
layer {
  name: "res2_5"
  type: "Eltwise"
  bottom: "res2_3"
  bottom: "conv2_5"
  top: "res2_5"
}
layer {
  name: "conv3_1"
  type: "Convolution"
  bottom: "res2_5"
  top: "conv3_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_1/bn"
  type: "BatchNorm"
  bottom: "conv3_1"
  top: "conv3_1"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_1/scale"
  type: "Scale"
  bottom: "conv3_1"
  top: "conv3_1"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_1"
  type: "PReLU"
  bottom: "conv3_1"
  top: "conv3_1"
}
layer {
  name: "conv3_2"
  type: "Convolution"
  bottom: "conv3_1"
  top: "conv3_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_2/bn"
  type: "BatchNorm"
  bottom: "conv3_2"
  top: "conv3_2"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_2/scale"
  type: "Scale"
  bottom: "conv3_2"
  top: "conv3_2"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_2"
  type: "PReLU"
  bottom: "conv3_2"
  top: "conv3_2"
}
layer {
  name: "conv3_3"
  type: "Convolution"
  bottom: "conv3_2"
  top: "conv3_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_3/bn"
  type: "BatchNorm"
  bottom: "conv3_3"
  top: "conv3_3"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_3/scale"
  type: "Scale"
  bottom: "conv3_3"
  top: "conv3_3"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_3"
  type: "PReLU"
  bottom: "conv3_3"
  top: "conv3_3"
}
layer {
  name: "res3_3"
  type: "Eltwise"
  bottom: "conv3_1"
  bottom: "conv3_3"
  top: "res3_3"
}
layer {
  name: "conv3_4"
  type: "Convolution"
  bottom: "res3_3"
  top: "conv3_4"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_4/bn"
  type: "BatchNorm"
  bottom: "conv3_4"
  top: "conv3_4"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_4/scale"
  type: "Scale"
  bottom: "conv3_4"
  top: "conv3_4"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_4"
  type: "PReLU"
  bottom: "conv3_4"
  top: "conv3_4"
}
layer {
  name: "conv3_5"
  type: "Convolution"
  bottom: "conv3_4"
  top: "conv3_5"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_5/bn"
  type: "BatchNorm"
  bottom: "conv3_5"
  top: "conv3_5"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_5/scale"
  type: "Scale"
  bottom: "conv3_5"
  top: "conv3_5"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_5"
  type: "PReLU"
  bottom: "conv3_5"
  top: "conv3_5"
}
layer {
  name: "res3_5"
  type: "Eltwise"
  bottom: "res3_3"
  bottom: "conv3_5"
  top: "res3_5"
}
layer {
  name: "conv3_6"
  type: "Convolution"
  bottom: "res3_5"
  top: "conv3_6"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_6/bn"
  type: "BatchNorm"
  bottom: "conv3_6"
  top: "conv3_6"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_6/scale"
  type: "Scale"
  bottom: "conv3_6"
  top: "conv3_6"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_6"
  type: "PReLU"
  bottom: "conv3_6"
  top: "conv3_6"
}
layer {
  name: "conv3_7"
  type: "Convolution"
  bottom: "conv3_6"
  top: "conv3_7"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_7/bn"
  type: "BatchNorm"
  bottom: "conv3_7"
  top: "conv3_7"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_7/scale"
  type: "Scale"
  bottom: "conv3_7"
  top: "conv3_7"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_7"
  type: "PReLU"
  bottom: "conv3_7"
  top: "conv3_7"
}
layer {
  name: "res3_7"
  type: "Eltwise"
  bottom: "res3_5"
  bottom: "conv3_7"
  top: "res3_7"
}
layer {
  name: "conv3_8"
  type: "Convolution"
  bottom: "res3_7"
  top: "conv3_8"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_8/bn"
  type: "BatchNorm"
  bottom: "conv3_8"
  top: "conv3_8"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_8/scale"
  type: "Scale"
  bottom: "conv3_8"
  top: "conv3_8"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_8"
  type: "PReLU"
  bottom: "conv3_8"
  top: "conv3_8"
}
layer {
  name: "conv3_9"
  type: "Convolution"
  bottom: "conv3_8"
  top: "conv3_9"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv3_9/bn"
  type: "BatchNorm"
  bottom: "conv3_9"
  top: "conv3_9"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv3_9/scale"
  type: "Scale"
  bottom: "conv3_9"
  top: "conv3_9"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu3_9"
  type: "PReLU"
  bottom: "conv3_9"
  top: "conv3_9"
}
layer {
  name: "res3_9"
  type: "Eltwise"
  bottom: "res3_7"
  bottom: "conv3_9"
  top: "res3_9"
}
layer {
  name: "conv4_1"
  type: "Convolution"
  bottom: "res3_9"
  top: "conv4_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv4_1/bn"
  type: "BatchNorm"
  bottom: "conv4_1"
  top: "conv4_1"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv4_1/scale"
  type: "Scale"
  bottom: "conv4_1"
  top: "conv4_1"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu4_1"
  type: "PReLU"
  bottom: "conv4_1"
  top: "conv4_1"
}
layer {
  name: "conv4_2"
  type: "Convolution"
  bottom: "conv4_1"
  top: "conv4_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv4_2/bn"
  type: "BatchNorm"
  bottom: "conv4_2"
  top: "conv4_2"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv4_2/scale"
  type: "Scale"
  bottom: "conv4_2"
  top: "conv4_2"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu4_2"
  type: "PReLU"
  bottom: "conv4_2"
  top: "conv4_2"
}
layer {
  name: "conv4_3"
  type: "Convolution"
  bottom: "conv4_2"
  top: "conv4_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "conv4_3/bn"
  type: "BatchNorm"
  bottom: "conv4_3"
  top: "conv4_3"
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
}
layer {
  name: "conv4_3/scale"
  type: "Scale"
  bottom: "conv4_3"
  top: "conv4_3"
  param {
    lr_mult: 1
    decay_mult: 0
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  scale_param {
    bias_term: true
  }
}
layer {
  name: "relu4_3"
  type: "PReLU"
  bottom: "conv4_3"
  top: "conv4_3"
}
layer {
  name: "res4_3"
  type: "Eltwise"
  bottom: "conv4_1"
  bottom: "conv4_3"
  top: "res4_3"
}
layer {
  name: "fc5"
  type: "InnerProduct"
  bottom: "res4_3"
  top: "fc5"
  inner_product_param {
    num_output: 512
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

@happynear
Copy link
Owner

Please also add BN after fc5. I've tried that. It could converge.
Adding Batch Normalization would improve the results. It is recommended to do so if you want to get higher scores.

@twmht
Copy link
Author

twmht commented Aug 6, 2018

@happynear

Actually I have tried added BN after fc5 and fc6 (Maybe fc6 is not necessary to normalize), but the loss is still 87.3365. What is your learning rate?

I will try to remove the BN after fc6, so keep this issue open:)

@twmht
Copy link
Author

twmht commented Aug 7, 2018

@happynear

I have added BN after fc5, but the loss is still 87.3365. I also tried to reduce the learning rate to 0.01, but still not work.

Any advise?

thank you

@twmht
Copy link
Author

twmht commented Aug 7, 2018

ok. it seems that there are some problems with my own folk caffe. Now problem solved.

@twmht twmht closed this as completed Aug 7, 2018
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants