Skip to content

Commit

Permalink
Merge pull request dmlc#155 from zhongwen/batch_norm
Browse files Browse the repository at this point in the history
running estimation of exp and var in BN layer
  • Loading branch information
zhongwen committed May 24, 2015
2 parents 584d562 + 2f64601 commit 9cafe1b
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 10 deletions.
59 changes: 50 additions & 9 deletions src/layer/batch_norm_layer-inl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,20 @@
namespace cxxnet {
namespace layer {

template<typename xpu>
template<typename xpu, bool moving_avg>
class BatchNormLayer : public ILayer<xpu> {
public:
BatchNormLayer(mshadow::Random<xpu> *p_rnd) : prnd_(p_rnd) {
init_slope_ = 1.0f;
init_bias_ = 0.0f;
eps_ = 1e-10f;
bn_momentum_ = 0.9f;
}
virtual void SetParam(const char *name, const char* val) {
if (!strcmp(name, "init_slope")) init_slope_ = atof(val);
if (!strcmp(name, "init_bias")) init_bias_ = atof(val);
if (!strcmp(name, "eps")) eps_ = atof(val);
if (!strcmp(name, "bn_momentum")) bn_momentum_ = atof(val);
}
virtual void ApplyVisitor(typename ILayer<xpu>::IVisitor *pvisitor) {
pvisitor->Visit("wmat", slope_, gslope_);
Expand Down Expand Up @@ -54,7 +56,14 @@ class BatchNormLayer : public ILayer<xpu> {
wtf_.Resize(slope_.shape_);
bias_.Resize(slope_.shape_);
gbias_.Resize(slope_.shape_);
if (moving_avg) {
running_exp_.Resize(slope_.shape_);
running_var_.Resize(slope_.shape_);
running_exp_ = 0.0f;
running_var_ = 0.0f;
}
gslope_ = 0.0f;
gbias_ = 0.0f;
gexp_ = 0.0f;
gvar_ = 0.0f;
slope_ = init_slope_;
Expand All @@ -63,10 +72,18 @@ class BatchNormLayer : public ILayer<xpu> {
virtual void SaveModel(utils::IStream &fo) const{
slope_.SaveBinary(fo);
bias_.SaveBinary(fo);
if (moving_avg) {
running_exp_.SaveBinary(fo);
running_var_.SaveBinary(fo);
}
}
virtual void LoadModel(utils::IStream &fi){
slope_.LoadBinary(fi);
bias_.LoadBinary(fi);
if (moving_avg) {
running_exp_.LoadBinary(fi);
running_var_.LoadBinary(fi);
}
gslope_.Resize(slope_.shape_);
exp_.Resize(slope_.shape_);
gexp_.Resize(slope_.shape_);
Expand All @@ -89,6 +106,10 @@ class BatchNormLayer : public ILayer<xpu> {
wtf_.set_stream(stream);
bias_.set_stream(stream);
gbias_.set_stream(stream);
if (moving_avg) {
running_exp_.set_stream(stream);
running_var_.set_stream(stream);
}
}
virtual void OnBatchSizeChanged(const std::vector<Node<xpu>*> &nodes_in,
const std::vector<Node<xpu>*> &nodes_out,
Expand Down Expand Up @@ -119,19 +140,36 @@ class BatchNormLayer : public ILayer<xpu> {
F<op::square_root>(broadcast<3>(var_ + eps_, in_shape_));
out = in * broadcast<3>(slope_, in.shape_) + broadcast<3>(bias_, in.shape_);
}
if (moving_avg) {
running_exp_ = running_exp_ * bn_momentum_ + exp_ * (1 - bn_momentum_);
running_var_ = running_var_ * bn_momentum_ + var_ * (1 - bn_momentum_);
}
} else {
if (in.size(1) != 1) {
exp_ = scale * sumall_except_dim<1>(in);
var_ = scale * sumall_except_dim<1>(F<op::square>(in - broadcast<1>(exp_, in.shape_)));
out = broadcast<1>(slope_ / F<op::square_root>(var_ + eps_), in.shape_) *
in + broadcast<1>(bias_ - (slope_ * exp_) /
if (moving_avg) {
out = broadcast<1>(slope_ / F<op::square_root>(running_var_ + eps_), in.shape_) *
in + broadcast<1>(bias_ - (slope_ * running_exp_) /
F<op::square_root>(running_var_ + eps_), in.shape_);

} else {
exp_ = scale * sumall_except_dim<1>(in);
var_ = scale * sumall_except_dim<1>(F<op::square>(in - broadcast<1>(exp_, in.shape_)));
out = broadcast<1>(slope_ / F<op::square_root>(var_ + eps_), in.shape_) *
in + broadcast<1>(bias_ - (slope_ * exp_) /
F<op::square_root>(var_ + eps_), in.shape_);
}
} else {
exp_ = scale * sumall_except_dim<3>(in);
var_ = scale * sumall_except_dim<3>(F<op::square>(in - broadcast<3>(exp_, in.shape_)));
out = broadcast<3>(slope_ / F<op::square_root>(var_ + eps_), in.shape_) *
in + broadcast<3>(bias_ - (slope_ * exp_) /
if (moving_avg) {
out = broadcast<3>(slope_ / F<op::square_root>(running_var_ + eps_), in.shape_) *
in + broadcast<3>(bias_ - (slope_ * running_exp_) /
F<op::square_root>(running_var_ + eps_), in.shape_);
} else {
exp_ = scale * sumall_except_dim<3>(in);
var_ = scale * sumall_except_dim<3>(F<op::square>(in - broadcast<3>(exp_, in.shape_)));
out = broadcast<3>(slope_ / F<op::square_root>(var_ + eps_), in.shape_) *
in + broadcast<3>(bias_ - (slope_ * exp_) /
F<op::square_root>(var_ + eps_), in.shape_);
}
}
}
}
Expand Down Expand Up @@ -191,9 +229,12 @@ class BatchNormLayer : public ILayer<xpu> {
mshadow::TensorContainer<xpu, 1> gexp_;
mshadow::TensorContainer<xpu, 1> var_;
mshadow::TensorContainer<xpu, 1> gvar_;
mshadow::TensorContainer<xpu, 1> running_exp_;
mshadow::TensorContainer<xpu, 1> running_var_;
float init_slope_;
float init_bias_;
float eps_;
float bn_momentum_;
}; // class BatchNormLayer

} // namespace layer
Expand Down
2 changes: 2 additions & 0 deletions src/layer/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ const int kChConcat = 28;
const int kPRelu = 29;
const int kBatchNorm = 30;
const int kFixConnect = 31;
const int kBatchNorm_no_ma = 32;
/*! \brief gap used to encode pairtest layer */
const int kPairTestGap = 1024;
/*! \brief use integer to encode layer types */
Expand Down Expand Up @@ -350,6 +351,7 @@ inline LayerType GetLayerType(const char *type) {
if (!strcmp(type, "ch_concat")) return kChConcat;
if (!strcmp(type, "prelu")) return kPRelu;
if (!strcmp(type, "batch_norm")) return kBatchNorm;
if (!strcmp(type, "batch_norm_no_ma")) return kBatchNorm_no_ma;
#if CXXNET_USE_CAFFE_ADAPTOR
if (!strcmp(type, "caffe")) return kCaffe;
#endif
Expand Down
3 changes: 2 additions & 1 deletion src/layer/layer_impl-inl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ ILayer<xpu>* CreateLayer_(LayerType type,
case kInsanity: return new InsanityLayer<xpu>(p_rnd);
case kInsanityPooling: return new InsanityPoolingLayer<mshadow::red::maximum, kMaxPooling, xpu>(p_rnd);
case kPRelu: return new PReluLayer<xpu>(p_rnd);
case kBatchNorm: return new BatchNormLayer<xpu>(p_rnd);
case kBatchNorm: return new BatchNormLayer<xpu, true>(p_rnd);
case kBatchNorm_no_ma: return new BatchNormLayer<xpu, false>(p_rnd);
case kLpLoss: return new LpLossLayer<xpu>(label_info);
case kMultiLogistic: return new MultiLogisticLayer<xpu>(label_info);
#if CXXNET_USE_CAFFE_ADAPTOR
Expand Down

0 comments on commit 9cafe1b

Please sign in to comment.