Skip to content

Commit

Permalink
running estimation of exp and var in batch norm
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongwen committed May 15, 2015
1 parent d32c38f commit c17226a
Showing 1 changed file with 26 additions and 10 deletions.
36 changes: 26 additions & 10 deletions src/layer/batch_norm_layer-inl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ class BatchNormLayer : public ILayer<xpu> {
init_slope_ = 1.0f;
init_bias_ = 0.0f;
eps_ = 1e-10f;
bn_momentum_ = 0.9f;
}
virtual void SetParam(const char *name, const char* val) {
if (!strcmp(name, "init_slope")) init_slope_ = atof(val);
if (!strcmp(name, "init_bias")) init_bias_ = atof(val);
if (!strcmp(name, "eps")) eps_ = atof(val);
if (!strcmp(name, "bn_momentum")) bn_momentum_ = atof(val);
}
virtual void ApplyVisitor(typename ILayer<xpu>::IVisitor *pvisitor) {
pvisitor->Visit("wmat", slope_, gslope_);
Expand Down Expand Up @@ -54,19 +56,28 @@ class BatchNormLayer : public ILayer<xpu> {
wtf_.Resize(slope_.shape_);
bias_.Resize(slope_.shape_);
gbias_.Resize(slope_.shape_);
running_exp_.Resize(slope_.shape_);
running_var_.Resize(slope_.shape_);
gslope_ = 0.0f;
gbias_ = 0.0f;
gexp_ = 0.0f;
gvar_ = 0.0f;
running_exp_ = 0.0f;
running_var_ = 0.0f;
slope_ = init_slope_;
bias_ = init_bias_;
}
virtual void SaveModel(utils::IStream &fo) const{
slope_.SaveBinary(fo);
bias_.SaveBinary(fo);
running_exp_.SaveBinary(fo);
running_var_.SaveBinary(fo);
}
virtual void LoadModel(utils::IStream &fi){
slope_.LoadBinary(fi);
bias_.LoadBinary(fi);
running_exp_.LoadBinary(fi);
running_var_.LoadBinary(fi);
gslope_.Resize(slope_.shape_);
exp_.Resize(slope_.shape_);
gexp_.Resize(slope_.shape_);
Expand All @@ -89,6 +100,8 @@ class BatchNormLayer : public ILayer<xpu> {
wtf_.set_stream(stream);
bias_.set_stream(stream);
gbias_.set_stream(stream);
running_exp_.set_stream(stream);
running_var_.set_stream(stream);
}
virtual void OnBatchSizeChanged(const std::vector<Node<xpu>*> &nodes_in,
const std::vector<Node<xpu>*> &nodes_out,
Expand All @@ -112,26 +125,26 @@ class BatchNormLayer : public ILayer<xpu> {
in = (in - broadcast<1>(exp_, in.shape_)) /
F<op::square_root>(broadcast<1>(var_ + eps_, in_shape_));
out = in * broadcast<1>(slope_, in.shape_) + broadcast<1>(bias_, in.shape_);
running_exp_ = running_exp_ * bn_momentum_ + exp_ * (1 - bn_momentum_);
running_var_ = running_var_ * bn_momentum_ + var_ * (1 - bn_momentum_);
} else {
exp_ = scale * sumall_except_dim<3>(in);
var_ = scale * sumall_except_dim<3>(F<op::square>(in - broadcast<3>(exp_, in.shape_)));
in = (in - broadcast<3>(exp_, in.shape_)) /
F<op::square_root>(broadcast<3>(var_ + eps_, in_shape_));
out = in * broadcast<3>(slope_, in.shape_) + broadcast<3>(bias_, in.shape_);
running_exp_ = running_exp_ * bn_momentum_ + exp_ * (1 - bn_momentum_);
running_var_ = running_var_ * bn_momentum_ + var_ * (1 - bn_momentum_);
}
} else {
if (in.size(1) != 1) {
exp_ = scale * sumall_except_dim<1>(in);
var_ = scale * sumall_except_dim<1>(F<op::square>(in - broadcast<1>(exp_, in.shape_)));
out = broadcast<1>(slope_ / F<op::square_root>(var_ + eps_), in.shape_) *
in + broadcast<1>(bias_ - (slope_ * exp_) /
F<op::square_root>(var_ + eps_), in.shape_);
out = broadcast<1>(slope_ / F<op::square_root>(running_var_ + eps_), in.shape_) *
in + broadcast<1>(bias_ - (slope_ * running_exp_) /
F<op::square_root>(running_var_ + eps_), in.shape_);
} else {
exp_ = scale * sumall_except_dim<3>(in);
var_ = scale * sumall_except_dim<3>(F<op::square>(in - broadcast<3>(exp_, in.shape_)));
out = broadcast<3>(slope_ / F<op::square_root>(var_ + eps_), in.shape_) *
in + broadcast<3>(bias_ - (slope_ * exp_) /
F<op::square_root>(var_ + eps_), in.shape_);
out = broadcast<3>(slope_ / F<op::square_root>(running_var_ + eps_), in.shape_) *
in + broadcast<3>(bias_ - (slope_ * running_exp_) /
F<op::square_root>(running_var_ + eps_), in.shape_);
}
}
}
Expand Down Expand Up @@ -191,9 +204,12 @@ class BatchNormLayer : public ILayer<xpu> {
mshadow::TensorContainer<xpu, 1> gexp_;
mshadow::TensorContainer<xpu, 1> var_;
mshadow::TensorContainer<xpu, 1> gvar_;
mshadow::TensorContainer<xpu, 1> running_exp_;
mshadow::TensorContainer<xpu, 1> running_var_;
float init_slope_;
float init_bias_;
float eps_;
float bn_momentum_;
}; // class BatchNormLayer

} // namespace layer
Expand Down

0 comments on commit c17226a

Please sign in to comment.