Skip to content

Commit

Permalink
Add GPU acceleration (timsainb#14)
Browse files Browse the repository at this point in the history
* added pad clipping flag

* added tensorflow backend

* updated docstrings

* updated readme

* updated readme

* updated package version

* updated readme

* updated readme

* updated website

* updated testing

* updated testing and black formatting

* update testing

* added test requirements

* updated travis yml to install requirements-txt

* updated travis yml

* updated test-req
  • Loading branch information
timsainb authored Jun 11, 2019
1 parent 3cfc83e commit db94fe2
Show file tree
Hide file tree
Showing 11 changed files with 616 additions and 134 deletions.
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ python:
script:
- pytest --cov=noisereduce/
after_success:
- coveralls
- coveralls
install:
- pip install -r requirements.txt
- pip install -r requirements-test.txt
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
## Installation
`pip install noisereduce`

*noisereduce optionally uses Tensorflow as a backend to speed up FFT and gaussian convolution. It is not listed in the requirements.txt so because (1) it is optional and (2) tensorflow-gpu and tensorflow (cpu) are both compatible with this package. The package requires Tensorflow 2+ for all tensorflow operations.*

## Usage
(see notebooks)

Expand All @@ -45,6 +47,8 @@ win_length (int): Each frame of audio is windowed by `window()`. The window will
hop_length (int):number audio of frames between STFT columns.
n_std_thresh (int): how many standard deviations louder than the mean dB of the noise (at each frequency level) to be considered signal
prop_decrease (float): To what extent should you decrease noise (1 = all, 0 = none)
pad_clipping (bool): Pad the signals with zeros to ensure that the reconstructed data is equal length to the data
use_tensorflow (bool): Use tensorflow as a backend for convolution and fft to speed up computation
verbose (bool): Whether to plot the steps of the algorithm
```
<div style="text-align:center">
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: birdbrain
name: noisereduce
channels:
- conda-forge
- defaults
Expand Down
Binary file modified noisereduce/._noisereduce.py
Binary file not shown.
141 changes: 127 additions & 14 deletions noisereduce/noisereduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,73 @@
import librosa
from noisereduce.plotting import plot_reduction_steps
from tqdm.autonotebook import tqdm
import warnings

try:
import tensorflow as tf

print(
"GPUs available: {}".format(tf.config.experimental.list_physical_devices("GPU"))
)
if int(tf.__version__[0]) < 2:
warnings.warn(
"Tensorflow version is below 2.0, some GPU accelerated functionality may not work"
)
except ImportError:
warnings.warn(
"Tensorflow is not installed and cannot be used for GPU accelerated STFT"
)


def _stft(y, n_fft, hop_length, win_length, use_tensorflow=False):
if use_tensorflow:
# return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True)
return _stft_tensorflow(y, n_fft, hop_length, win_length)
else:
return librosa.stft(
y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True
)


def _istft(y, n_fft, hop_length, win_length, use_tensorflow=False):
if use_tensorflow:
# return librosa.istft(y, hop_length, win_length)
return _istft_tensorflow(y.T, n_fft, hop_length, win_length)
else:
return librosa.istft(y, hop_length, win_length)

def _stft(y, n_fft, hop_length, win_length):
return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)

def _stft_librosa(y, n_fft, hop_length, win_length):
return librosa.stft(
y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True
)


def _istft(y, hop_length, win_length):
def _istft_librosa(y, hop_length, win_length):
return librosa.istft(y, hop_length, win_length)


def _stft_tensorflow(y, n_fft, hop_length, win_length):
return (
tf.signal.stft(
y,
win_length,
hop_length,
n_fft,
pad_end=True,
window_fn=tf.signal.hann_window,
)
.numpy()
.T
)


def _istft_tensorflow(y, n_fft, hop_length, win_length):
return tf.signal.inverse_stft(
y.astype(np.complex64), win_length, hop_length, n_fft
).numpy()


def _amp_to_db(x):
return librosa.core.amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0)

Expand All @@ -31,9 +88,7 @@ def update_pbar(pbar, message):

def _smoothing_filter(n_grad_freq, n_grad_time):
"""Generates a filter to smooth the mask for the spectrogram
[description]
Arguments:
n_grad_freq {[type]} -- [how many frequency channels to smooth over with the mask.]
n_grad_time {[type]} -- [how many time channels to smooth over with the mask.]
Expand All @@ -58,10 +113,8 @@ def _smoothing_filter(n_grad_freq, n_grad_time):


def mask_signal(sig_stft_db, sig_mask, mask_gain_dB, sig_stft):
"""[summary]
[description]
""" Reduces amplitude of time/frequency regions of a spectrogram based upon a mask
Arguments:
sig_stft_db {[type]} -- spectrogram of signal in dB
sig_mask {[type]} -- mask to apply to signal
Expand All @@ -83,6 +136,33 @@ def mask_signal(sig_stft_db, sig_mask, mask_gain_dB, sig_stft):
return sig_stft_amp, sig_stft_db_masked


def convolve_gaussian(sig_mask, smoothing_filter, use_tensorflow=False):
""" Convolves a gaussian filter with a mask (or any image)
Arguments:
sig_mask {[type]} -- The signal mask
smoothing_filter {[type]} -- the filter to convolve
Keyword Arguments:
use_tensorflow {bool} -- use tensorflow.signal or scipy.signal (default: {False})
"""
if use_tensorflow:
smoothing_filter = smoothing_filter * (
(np.shape(smoothing_filter)[1] - 1) / 2 + 1
)
smoothing_filter = smoothing_filter[:, :, tf.newaxis, tf.newaxis].astype(
"float32"
)
img = sig_mask[:, :, tf.newaxis, tf.newaxis].astype("float32")
return (
tf.nn.conv2d(img, smoothing_filter, strides=[1, 1, 1, 1], padding="SAME")
.numpy()
.squeeze()
)
else:
return scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")


def reduce_noise(
audio_clip,
noise_clip,
Expand All @@ -93,6 +173,8 @@ def reduce_noise(
hop_length=512,
n_std_thresh=1.5,
prop_decrease=1.0,
pad_clipping=True,
use_tensorflow=False,
verbose=False,
):
"""Remove noise from audio based upon a clip containing only noise
Expand All @@ -107,6 +189,8 @@ def reduce_noise(
hop_length (int):number audio of frames between STFT columns.
n_std_thresh (int): how many standard deviations louder than the mean dB of the noise (at each frequency level) to be considered signal
prop_decrease (float): To what extent should you decrease noise (1 = all, 0 = none)
pad_clipping (bool): Pad the signals with zeros to ensure that the reconstructed data is equal length to the data
use_tensorflow (bool): Use tensorflow as a backend for convolution and fft to speed up computation
verbose (bool): Whether to plot the steps of the algorithm
Returns:
Expand All @@ -120,7 +204,9 @@ def reduce_noise(

update_pbar(pbar, "STFT on noise")
# STFT over noise
noise_stft = _stft(noise_clip, n_fft, hop_length, win_length)
noise_stft = _stft(
noise_clip, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
)
noise_stft_db = _amp_to_db(np.abs(noise_stft)) # convert to dB
# Calculate statistics over noise
update_pbar(pbar, "STFT on signal")
Expand All @@ -129,7 +215,15 @@ def reduce_noise(
noise_thresh = mean_freq_noise + std_freq_noise * n_std_thresh
# STFT over signal
update_pbar(pbar, "STFT on signal")
sig_stft = _stft(audio_clip, n_fft, hop_length, win_length)

# pad signal with zeros to avoid extra frames being clipped if desired
if pad_clipping:
nsamp = len(audio_clip)
audio_clip = np.pad(audio_clip, [0, hop_length], mode="constant")

sig_stft = _stft(
audio_clip, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
)
sig_stft_db = _amp_to_db(np.abs(sig_stft))
update_pbar(pbar, "Generate mask")
# Calculate value to mask dB to
Expand All @@ -145,19 +239,38 @@ def reduce_noise(
update_pbar(pbar, "Smooth mask")
# Create a smoothing filter for the mask in time and frequency
smoothing_filter = _smoothing_filter(n_grad_freq, n_grad_time)

# convolve the mask with a smoothing filter
sig_mask = convolve_gaussian(sig_mask, smoothing_filter, use_tensorflow)

sig_mask = scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")
sig_mask = sig_mask * prop_decrease
update_pbar(pbar, "Apply mask")
# mask the signal

sig_stft_amp, sig_stft_db_masked = mask_signal(
sig_stft_db, sig_mask, mask_gain_dB, sig_stft
)

update_pbar(pbar, "Recover signal")
# recover the signal
recovered_signal = _istft(sig_stft_amp, hop_length, win_length)
recovered_signal = _istft(
sig_stft_amp, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
)
# fix the recovered signal length if padding signal
if pad_clipping:
recovered_signal = librosa.util.fix_length(recovered_signal, nsamp)

recovered_spec = _amp_to_db(
np.abs(_stft(recovered_signal, n_fft, hop_length, win_length))
np.abs(
_stft(
recovered_signal,
n_fft,
hop_length,
win_length,
use_tensorflow=use_tensorflow,
)
)
)
if verbose:
plot_reduction_steps(
Expand Down
15 changes: 15 additions & 0 deletions noisereduce/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np


def int16_to_float32(data):
""" Converts from uint16 wav to float32 wav
"""
if np.max(np.abs(data)) > 32768:
raise ValueError("Data has values above 32768")
return (data / 32768.0).astype("float32")


def float32_to_int16(data):
if np.max(data) > 1:
data = data / np.max(np.abs(data))
return np.array(data * 32767).astype("int16")
Loading

0 comments on commit db94fe2

Please sign in to comment.