Skip to content

Commit

Permalink
Fixed a random cropping/mirroring bug (keeping same cropping/mirrorin…
Browse files Browse the repository at this point in the history
…g for images within a same video clip)
  • Loading branch information
Chuck Cho committed Mar 29, 2016
1 parent 313fd24 commit 1f2f559
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 20 deletions.
44 changes: 41 additions & 3 deletions include/caffe/data_transformer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ class DataTransformer {
*/
void InitRand();

/**
* @brief Set the Random number generations given seed
*/
void SetRandFromSeed(const unsigned int rng_seed);

/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to the data.
Expand Down Expand Up @@ -67,7 +72,7 @@ class DataTransformer {
*/
void Transform(const vector<cv::Mat> & mat_vector,
Blob<Dtype>* transformed_blob,
const bool is_video=false);
const bool is_video = false);

/**
* @brief Applies the transformation defined in the data layer's
Expand All @@ -81,10 +86,38 @@ class DataTransformer {
* @param force_no_mean
* If 3D mean cube was already subtracted, mean subtraction should not
* happen again for individual images.
* @param is_video
* A flag to reuse same random seed to replicate croppings/mirrorings and
* data augmentations for images within a same video clip
*/
void Transform(const cv::Mat& cv_img,
Blob<Dtype>* transformed_blob,
const bool force_no_mean=false);
const bool force_no_mean = false,
const bool is_video = false);

// ------------------------------------------------------------------
// Dextro custom functions
// Actual function that applies the transform, given the offset
// of the crop
void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob,
int h_off, int w_off, bool is_seg, const bool force_no_mean = false);

/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to a pair of cv::Mat.
* Ignores mean subtraction for second cv::Mat
* Useful for an (image, label) pair
* Appends seg as last channel of image
*
* @param cv_img
* 1-channel cv::Mat containing the data to be transformed.
* @param transformed_blob
* This is destination blob. It can be part of top blob's data if
* set_cpu_data() is used. See image_seg_data_layer.cpp for an example.
*/
void TransformImageSeg(const cv::Mat& cv_img, const cv::Mat & cv_seg,
Blob<Dtype>* transformed_data);
// ------------------------------------------------------------------
#endif // USE_OPENCV

/**
Expand Down Expand Up @@ -157,8 +190,13 @@ class DataTransformer {
// Tranformation parameters
TransformationParameter param_;


shared_ptr<Caffe::RNG> rng_;

// ------------------------------------------------------------------
// rng_seed_ for replicating random cropping/mirroring for images within a
// same video clip
unsigned int rng_seed_;

Phase phase_;
Blob<Dtype> data_mean_;
vector<Dtype> mean_values_;
Expand Down
48 changes: 31 additions & 17 deletions src/caffe/data_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
const int length = transformed_blob->shape(2);
const int height = transformed_blob->shape(3);
const int width = transformed_blob->shape(4);
bool c3d_mean_subtraction_applied;
bool mean_cube_subtracted = false;

// if mean cube file is given, 3d mean cube should be subtracted at this
// level
Expand All @@ -225,15 +225,17 @@ void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
CHECK_EQ(mat_vector[0].rows, data_mean_.height());
CHECK_EQ(mat_vector[0].cols, data_mean_.width());
mean = data_mean_.mutable_cpu_data();
if (1 != length && length == data_mean_.length()) {
c3d_mean_subtraction_applied = true;
if ((1 != length) && (length == data_mean_.length())) {
mean_cube_subtracted = true;
} else {
CHECK_EQ(1, data_mean_.channels()); // will be applied later
c3d_mean_subtraction_applied = false;
CHECK_EQ(1, data_mean_.length()); // will be applied later
}
} else {
c3d_mean_subtraction_applied = false;
}

// mirroring and random cropping should not be done on each image
// individually as images come from a same video clip
rng_seed_ = caffe_rng_rand();

CHECK_GT(mat_num, 0) << "There is no MAT to add";
CHECK_EQ(num, 1) << "First dimension (batch number) must be 1";
CHECK_EQ(mat_num, length) <<
Expand All @@ -248,16 +250,16 @@ void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
vector<int> indices(5);
for (int item_id = 0; item_id < mat_num; ++item_id) {
cv::Mat cv_img = mat_vector[item_id].clone();
CHECK(mat_vector[item_id].depth() == CV_8U) << "Image data type must be unsigned byte";
CHECK(mat_vector[item_id].depth() == CV_8U) << "Image data type must " <<
"be unsigned byte";

// mean cube subtraction for C3D data
if (c3d_mean_subtraction_applied) {
if (mean_cube_subtracted) {
for (int c = 0; c < cv_img.channels(); ++c) {
for (int h = 0; h < cv_img.rows; ++h) {
for (int w = 0; w < cv_img.cols; ++w) {
int mean_index = ((c * length + item_id) * height + h) * width + w;
//LOG(INFO) << "h="<<h<<", w="<<w<<", c="<<c<<"mean_index="<<mean_index;
//LOG(INFO) << "mean[]="<< mean[mean_index];
int mean_index = ((c * length + item_id) * height + h) * width
+ w;
cv_img.at<cv::Vec3b>(h, w)[c] -= mean[mean_index];
}
}
Expand All @@ -271,7 +273,8 @@ void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
indices[4] = 0;
int offset = transformed_blob->offset(indices);
uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
Transform(cv_img, &uni_blob, c3d_mean_subtraction_applied);

Transform(cv_img, &uni_blob, mean_cube_subtracted, true);
}
} else {
const int mat_num = mat_vector.size();
Expand All @@ -295,7 +298,8 @@ void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
Blob<Dtype>* transformed_blob,
const bool force_no_mean) {
const bool force_no_mean,
const bool is_video) {
const int crop_size = param_.crop_size();
const int img_channels = cv_img.channels();
const int img_height = cv_img.rows;
Expand Down Expand Up @@ -341,6 +345,11 @@ void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
}
}

// For videos, re-use random seed to replicate randomness
// (e.g. same croppings, mirrorings)
if (is_video)
SetRandFromSeed(rng_seed_);

int h_off = 0;
int w_off = 0;
cv::Mat cv_cropped_img = cv_img;
Expand Down Expand Up @@ -586,9 +595,9 @@ vector<int> DataTransformer<Dtype>::InferBlobShape(
vector<int> tmp_shape = InferBlobShape(mat_vector, false);
CHECK_EQ(tmp_shape.size(), 4) << "A mat_vector must be 4-dimensional";
vector<int> shape(5);
shape[0] = 1; // num of batches
shape[1] = tmp_shape[1]; // num of channels
shape[2] = num; // this is actually "length" or "depth" of C3D blob
shape[0] = 1; // num of batches
shape[1] = tmp_shape[1]; // num of channels
shape[2] = num; // this is actually "length" of C3D blob
shape[3] = tmp_shape[2];
shape[4] = tmp_shape[3];
return shape;
Expand Down Expand Up @@ -617,6 +626,11 @@ void DataTransformer<Dtype>::InitRand() {
}
}

template <typename Dtype>
void DataTransformer<Dtype>::SetRandFromSeed(const unsigned int rng_seed) {
rng_.reset(new Caffe::RNG(rng_seed));
}

template <typename Dtype>
int DataTransformer<Dtype>::Rand(int n) {
CHECK(rng_);
Expand Down

0 comments on commit 1f2f559

Please sign in to comment.