Skip to content

Commit

Permalink
Python 3.7 compatibility; add stft.py to remove librosa dependency.
Browse files Browse the repository at this point in the history
  • Loading branch information
dpwe committed Nov 23, 2018
1 parent f578570 commit 4c96b91
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 39 deletions.
12 changes: 10 additions & 2 deletions audfprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
import hash_table # My hash_table implementation


if sys.version_info[0] >= 3:
# Python 3 specific definitions
time_clock = time.process_time
else:
# Python 2 specific definitions
time_clock = time.clock


def filename_list_iterator(filelist, wavdir, wavext, listflag):
""" Iterator to yeild all the filenames, possibly interpreting them
as list files, prepending wavdir """
Expand Down Expand Up @@ -395,7 +403,7 @@ def main(argv):
report = setup_reporter(args)

# Keep track of wall time
initticks = time.clock()
initticks = time_clock()

# Command line sanity.
if args["--maxtimebits"]:
Expand Down Expand Up @@ -472,7 +480,7 @@ def main(argv):
skip_existing=args['--skip-existing'],
strip_prefix=args['--wavdir'])

elapsedtime = time.clock() - initticks
elapsedtime = time_clock() - initticks
if analyzer and analyzer.soundfiletotaldur > 0.:
print("Processed "
+ "%d files (%.1f s total dur) in %.1f s sec = %.3f x RT" \
Expand Down
21 changes: 8 additions & 13 deletions audfprint_analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,13 @@
import struct # For reading/writing hashes to file
import time # For glob2hashtable, localtester

import librosa
import numpy as np
import scipy.signal

import audio_read
import hash_table # For utility, glob2hashtable
import stft

try:
# noinspection PyUnresolvedReferences,PyUnboundLocalVariable
xrange(0) # Py2
except NameError:
xrange = range # Py3

# ############### Globals ############### #
# Special extension indicating precomputed fingerprint
Expand Down Expand Up @@ -275,9 +270,9 @@ def find_peaks(self, d, sr):
a_dec = (1 - 0.01 * (self.density * np.sqrt(self.n_hop / 352.8) / 35)) ** (1 / OVERSAMP)
# Take spectrogram
mywin = np.hanning(self.n_fft + 2)[1:-1]
sgram = np.abs(librosa.stft(d, n_fft=self.n_fft,
hop_length=self.n_hop,
window=mywin))
sgram = np.abs(stft.stft(d, n_fft=self.n_fft,
hop_length=self.n_hop,
window=mywin))
sgrammax = np.max(sgram)
if sgrammax > 0.0:
sgram = np.log(np.maximum(sgram, np.max(sgram) / 1e6))
Expand All @@ -300,7 +295,7 @@ def find_peaks(self, d, sr):
# build a list of peaks we ended up with
scols = np.shape(sgram)[1]
pklist = []
for col in xrange(scols):
for col in range(scols):
for bin_ in np.nonzero(peaks[:, col])[0]:
pklist.append((col, bin_))
return pklist
Expand All @@ -318,15 +313,15 @@ def peaks2landmarks(self, pklist):
# Find column of the final peak in the list
scols = pklist[-1][0] + 1
# Convert (col, bin) list into peaks_at[col] lists
peaks_at = [[] for _ in xrange(scols)]
peaks_at = [[] for _ in range(scols)]
for (col, bin_) in pklist:
peaks_at[col].append(bin_)

# Build list of landmarks <starttime F1 endtime F2>
for col in xrange(scols):
for col in range(scols):
for peak in peaks_at[col]:
pairsthispeak = 0
for col2 in xrange(col + self.mindt,
for col2 in range(col + self.mindt,
min(scols, col + self.targetdt)):
if pairsthispeak < self.maxpairsperpeak:
for peak2 in peaks_at[col2]:
Expand Down
18 changes: 11 additions & 7 deletions audfprint_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,19 @@
import time

import psutil
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np
import scipy.signal

# Don't sweat failure to import graphics support.
try:
import matplotlib.pyplot as plt
import librosa.display
except:
pass

import audfprint_analyze # for localtest and illustrate
import audio_read

import stft

def process_info():
rss = usrtime = 0
Expand Down Expand Up @@ -420,9 +424,9 @@ def illustrate_match(self, analyzer, ht, filename):
# Make the spectrogram
# d, sr = librosa.load(filename, sr=analyzer.target_sr)
d, sr = audio_read.audio_read(filename, sr=analyzer.target_sr, channels=1)
sgram = np.abs(librosa.stft(d, n_fft=analyzer.n_fft,
hop_length=analyzer.n_hop,
window=np.hanning(analyzer.n_fft + 2)[1:-1]))
sgram = np.abs(stft.stft(d, n_fft=analyzer.n_fft,
hop_length=analyzer.n_hop,
window=np.hanning(analyzer.n_fft + 2)[1:-1]))
sgram = 20.0 * np.log10(np.maximum(sgram, np.max(sgram) / 1e6))
sgram = sgram - np.mean(sgram)
# High-pass filter onset emphasis
Expand Down
27 changes: 11 additions & 16 deletions hash_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,21 @@
import math
import os
import random
import sys

import numpy as np
import scipy.io

try:
import cPickle as pickle # Py2
except ImportError:
if sys.version_info[0] >= 3:
# Python 3 specific definitions
import pickle # Py3

try:
# noinspection PyUnresolvedReferences,PyUnboundLocalVariable
xrange(0) # Py2
except NameError:
xrange = range # Py3

try:
# noinspection PyUnresolvedReferences,PyUnboundLocalVariable
basestring # Py2
except NameError:
basestring = (str, bytes) # Py3
pickle_options = {'encoding': 'latin1'}
else:
# Python 2 specific definitions
import cPickle as pickle # Py2
pickle_options = {}


# Current format version
HT_VERSION = 20170724
Expand Down Expand Up @@ -164,7 +159,7 @@ def get_hits(self, hashes):
maxtimemask = (1 << self.maxtimebits) - 1
hashmask = (1 << self.hashbits) - 1
# Fill in
for ix in xrange(nhashes):
for ix in range(nhashes):
time_ = hashes[ix][0]
hash_ = hashmask & hashes[ix][1]
nids = min(self.depth, self.counts[hash_])
Expand Down Expand Up @@ -221,7 +216,7 @@ def load_pkl(self, name, file_object=None):
f = file_object
else:
f = gzip.open(name, 'rb')
temp = pickle.load(f)
temp = pickle.load(f, **pickle_options)
if temp.ht_version < HT_OLD_COMPAT_VERSION:
raise ValueError('Version of ' + name + ' is ' + str(temp.ht_version)
+ ' which is not at least ' +
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
librosa
numpy
scipy
docopt
Expand Down
94 changes: 94 additions & 0 deletions stft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Provide stft to avoid librosa dependency.
This implementation is based on routines from
https://github.com/tensorflow/models/blob/master/research/audioset/mel_features.py
"""

from __future__ import division

import numpy as np


def frame(data, window_length, hop_length):
"""Convert array into a sequence of successive possibly overlapping frames.
An n-dimensional array of shape (num_samples, ...) is converted into an
(n+1)-D array of shape (num_frames, window_length, ...), where each frame
starts hop_length points after the preceding one.
This is accomplished using stride_tricks, so the original data is not
copied. However, there is no zero-padding, so any incomplete frames at the
end are not included.
Args:
data: np.array of dimension N >= 1.
window_length: Number of samples in each frame.
hop_length: Advance (in samples) between each window.
Returns:
(N+1)-D np.array with as many rows as there are complete frames that can be
extracted.
"""
num_samples = data.shape[0]
num_frames = 1 + ((num_samples - window_length) // hop_length)
shape = (num_frames, window_length) + data.shape[1:]
strides = (data.strides[0] * hop_length,) + data.strides
return np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)


def periodic_hann(window_length):
"""Calculate a "periodic" Hann window.
The classic Hann window is defined as a raised cosine that starts and
ends on zero, and where every value appears twice, except the middle
point for an odd-length window. Matlab calls this a "symmetric" window
and np.hanning() returns it. However, for Fourier analysis, this
actually represents just over one cycle of a period N-1 cosine, and
thus is not compactly expressed on a length-N Fourier basis. Instead,
it's better to use a raised cosine that ends just before the final
zero value - i.e. a complete cycle of a period-N cosine. Matlab
calls this a "periodic" window. This routine calculates it.
Args:
window_length: The number of points in the returned window.
Returns:
A 1D np.array containing the periodic hann window.
"""
return 0.5 - (0.5 * np.cos(2 * np.pi / window_length *
np.arange(window_length)))


def stft(signal, n_fft, hop_length=None, window=None):
"""Calculate the short-time Fourier transform.
Args:
signal: 1D np.array of the input time-domain signal.
n_fft: Size of the FFT to apply.
hop_length: Advance (in samples) between each frame passed to FFT. Defaults
to half the window length.
window: Length of each block of samples to pass to FFT, or vector of window
values. Defaults to n_fft.
Returns:
2D np.array where each column contains the complex values of the
fft_length/2+1 unique values of the FFT for the corresponding frame of
input samples ("spectrogram transposition").
"""
if window is None:
window = n_fft
if isinstance(window, (int, float)):
# window holds the window length, need to make the actual window.
window = periodic_hann(int(window))
window_length = len(window)
if not hop_length:
hop_length = window_length // 2
# Default librosa STFT behavior.
pad_mode = 'reflect'
signal = np.pad(signal, (n_fft // 2), mode=pad_mode)
frames = frame(signal, window_length, hop_length)
# Apply frame window to each frame. We use a periodic Hann (cosine of period
# window_length) instead of the symmetric Hann of np.hanning (period
# window_length-1).
windowed_frames = frames * window
return np.fft.rfft(windowed_frames, n_fft).transpose()

0 comments on commit 4c96b91

Please sign in to comment.