Fix incorrect results from Eigen-based IRFFT kernel when input dimens…

…ions exceed fft_length. Also fixes fft_ops_test msan failures for rank > 1 IRFFTs. The temporary buffer used for computing the (rank - 1) outer IFFTs is not fully initialized. Instead, only compute the (rank - 1) outer IFFTs over the valid positions of the temporary buffer. PiperOrigin-RevId: 158772209
TeeeJay · Jun 12, 2017 · 1bb1c0d · 1bb1c0d
1 parent cb94f36
commit 1bb1c0d
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 47 deletions.
diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc
@@ -124,19 +124,21 @@ class FFTCPU : public FFTBase {
  void DoFFT(OpKernelContext* ctx, const Tensor& in, uint64* fft_shape,
  Tensor* out) override {
  // Create the axes (which are always trailing).
- auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank);
+ const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank);
  auto device = ctx->eigen_device<CPUDevice>();
 
  if (!IsReal()) {
  auto input = (Tensor(in)).flat_inner_dims<complex64, FFTRank + 1>();
  // Compute the FFT using eigen.
  auto output = out->flat_inner_dims<complex64, FFTRank + 1>();
- output.device(device) = input.template fft < Eigen::BothParts,
- Forward ? Eigen::FFT_FORWARD : Eigen::FFT_REVERSE > (axes);
+ constexpr auto direction =
+ Forward ? Eigen::FFT_FORWARD : Eigen::FFT_REVERSE;
+ output.device(device) =
+ input.template fft<Eigen::BothParts, direction>(axes);
  } else {
  if (IsForward()) {
  auto input = (Tensor(in)).flat_inner_dims<float, FFTRank + 1>();
- auto input_dims = input.dimensions();
+ const auto input_dims = input.dimensions();
 
  // Slice input to fft_shape on its inner-most dimensions.
  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> input_slice_sizes;
@@ -163,60 +165,75 @@ class FFTCPU : public FFTBase {
  output.device(device) =
  full_fft.slice(zero_start_indices, output.dimensions());
  } else {
- // Reconstruct the full fft and take the inverse.
+ // Reconstruct the full FFT and take the inverse.
  auto input = ((Tensor)in).flat_inner_dims<complex64, FFTRank + 1>();
  auto output = out->flat_inner_dims<float, FFTRank + 1>();
+ const auto input_dims = input.dimensions();
 
- auto sizes = input.dimensions();
-
- // Calculate the shape of full-fft temporary tensor.
- TensorShape fullShape;
- fullShape.AddDim(sizes[0]);
+ // Calculate the shape of the temporary tensor for the full FFT and the
+ // region we will slice from input given fft_shape. We slice input to
+ // fft_shape on its inner-most dimensions, except the last (which we
+ // slice to fft_shape[-1] / 2 + 1).
+ Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> input_slice_sizes;
+ input_slice_sizes[0] = input_dims[0];
+ TensorShape full_fft_shape;
+ full_fft_shape.AddDim(input_dims[0]);
  for (auto i = 1; i <= FFTRank; i++) {
- fullShape.AddDim(fft_shape[i - 1]);
+ input_slice_sizes[i] =
+ i == FFTRank ? fft_shape[i - 1] / 2 + 1 : fft_shape[i - 1];
+ full_fft_shape.AddDim(fft_shape[i - 1]);
  }
 
  Tensor temp;
  OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<complex64>::v(),
- fullShape, &temp));
+ full_fft_shape, &temp));
  auto full_fft = temp.flat_inner_dims<complex64, FFTRank + 1>();
 
  // Calculate the starting point and range of the source of
  // negative frequency part.
- auto negSizes = input.dimensions();
- negSizes[FFTRank] = fft_shape[FFTRank - 1] - sizes[FFTRank];
- Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> negTargetIndices;
- negTargetIndices[FFTRank] = sizes[FFTRank];
-
- Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> startIndices,
- negStartIndices;
- negStartIndices[FFTRank] = 1;
-
- full_fft.slice(startIndices, sizes) = input.slice(startIndices, sizes);
-
- // First, conduct FFT on outer dimensions.
- auto outerAxes = Eigen::ArrayXi::LinSpaced(FFTRank - 1, 1, FFTRank - 1);
- full_fft = full_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(
- outerAxes);
+ auto neg_sizes = input_slice_sizes;
+ neg_sizes[FFTRank] =
+ fft_shape[FFTRank - 1] - input_slice_sizes[FFTRank];
+ Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> neg_target_indices;
+ neg_target_indices[FFTRank] = input_slice_sizes[FFTRank];
+
+ const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> start_indices;
+ Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> neg_start_indices;
+ neg_start_indices[FFTRank] = 1;
+
+ full_fft.slice(start_indices, input_slice_sizes).device(device) =
+ input.slice(start_indices, input_slice_sizes);
+
+ // First, conduct IFFTs on outer dimensions. We save computation (and
+ // avoid touching uninitialized memory) by slicing full_fft to the
+ // subregion we wrote input to.
+ if (FFTRank > 1) {
+ const auto outer_axes =
+ Eigen::ArrayXi::LinSpaced(FFTRank - 1, 1, FFTRank - 1);
+ full_fft.slice(start_indices, input_slice_sizes).device(device) =
+ full_fft.slice(start_indices, input_slice_sizes)
+ .template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(
+ outer_axes);
+ }
 
- // Reconstruct the full fft by appending reversed and conjugated
+ // Reconstruct the full FFT by appending reversed and conjugated
  // spectrum as the negative frequency part.
- Eigen::array<bool, FFTRank + 1> reversedAxis;
+ Eigen::array<bool, FFTRank + 1> reverse_last_axis;
  for (auto i = 0; i <= FFTRank; i++) {
- reversedAxis[i] = i == FFTRank;
+ reverse_last_axis[i] = i == FFTRank;
  }
 
- if (negSizes[FFTRank] != 0) {
- full_fft.slice(negTargetIndices, negSizes) =
- full_fft.slice(negStartIndices, negSizes)
- .reverse(reversedAxis)
+ if (neg_sizes[FFTRank] != 0) {
+ full_fft.slice(neg_target_indices, neg_sizes).device(device) =
+ full_fft.slice(neg_start_indices, neg_sizes)
+ .reverse(reverse_last_axis)
  .conjugate();
  }
 
- auto innerAxis = Eigen::array<int, 1>{FFTRank};
+ auto inner_axis = Eigen::array<int, 1>{FFTRank};
  output.device(device) =
  full_fft.template fft<Eigen::RealPart, Eigen::FFT_REVERSE>(
- innerAxis);
+ inner_axis);
  }
  }
  }

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
@@ -2185,7 +2185,6 @@ cuda_py_test(
  "//tensorflow/python:spectral_ops",
  ],
  shard_count = 3,
- tags = ["nomsan"], # b/62067589
 )
 
 cuda_py_test(

diff --git a/tensorflow/python/kernel_tests/fft_ops_test.py b/tensorflow/python/kernel_tests/fft_ops_test.py
@@ -351,20 +351,13 @@ def testFftLength(self):
  # Test truncation (FFT size < dimensions).
  fft_length = (size - 2,) * rank
  self._CompareForward(r2c.astype(np.float32), rank, fft_length)
- if test.is_gpu_available():
- # TODO(rjryan): figure out why eigen kernel gives wrong answer.
- self._CompareBackward(c2r.astype(np.complex64), rank, fft_length)
+ self._CompareBackward(c2r.astype(np.complex64), rank, fft_length)
 
  # Confirm it works with unknown shapes as well.
  self._CompareForward(r2c.astype(np.float32), rank, fft_length,
  use_placeholder=True)
- if test.is_gpu_available():
- # TODO(rjryan): figure out why eigen kernel gives wrong answer.
- self._CompareBackward(
- c2r.astype(np.complex64),
- rank,
- fft_length,
- use_placeholder=True)
+ self._CompareBackward(c2r.astype(np.complex64), rank, fft_length,
+ use_placeholder=True)
 
  # Test padding (FFT size > dimensions).
  fft_length = (size + 2,) * rank