Python 3.7 compatibility; add stft.py to remove librosa dependency.

aldycool · Nov 23, 2018 · 4c96b91 · 4c96b91
1 parent f578570
commit 4c96b91
Show file tree

Hide file tree

Showing 6 changed files with 134 additions and 39 deletions.
diff --git a/audfprint.py b/audfprint.py
@@ -23,6 +23,14 @@
 import hash_table # My hash_table implementation
 
 
+if sys.version_info[0] >= 3:
+ # Python 3 specific definitions
+ time_clock = time.process_time
+else:
+ # Python 2 specific definitions
+ time_clock = time.clock
+
+
 def filename_list_iterator(filelist, wavdir, wavext, listflag):
  """ Iterator to yeild all the filenames, possibly interpreting them
  as list files, prepending wavdir """
@@ -395,7 +403,7 @@ def main(argv):
  report = setup_reporter(args)
 
  # Keep track of wall time
- initticks = time.clock()
+ initticks = time_clock()
 
  # Command line sanity.
  if args["--maxtimebits"]:
@@ -472,7 +480,7 @@ def main(argv):
  skip_existing=args['--skip-existing'],
  strip_prefix=args['--wavdir'])
 
- elapsedtime = time.clock() - initticks
+ elapsedtime = time_clock() - initticks
  if analyzer and analyzer.soundfiletotaldur > 0.:
  print("Processed "
  + "%d files (%.1f s total dur) in %.1f s sec = %.3f x RT" \

diff --git a/audfprint_analyze.py b/audfprint_analyze.py
@@ -14,18 +14,13 @@
 import struct # For reading/writing hashes to file
 import time # For glob2hashtable, localtester
 
-import librosa
 import numpy as np
 import scipy.signal
 
 import audio_read
 import hash_table # For utility, glob2hashtable
+import stft
 
-try:
- # noinspection PyUnresolvedReferences,PyUnboundLocalVariable
- xrange(0) # Py2
-except NameError:
- xrange = range # Py3
 
 # ############### Globals ############### #
 # Special extension indicating precomputed fingerprint
@@ -275,9 +270,9 @@ def find_peaks(self, d, sr):
  a_dec = (1 - 0.01 * (self.density * np.sqrt(self.n_hop / 352.8) / 35)) ** (1 / OVERSAMP)
  # Take spectrogram
  mywin = np.hanning(self.n_fft + 2)[1:-1]
- sgram = np.abs(librosa.stft(d, n_fft=self.n_fft,
-  hop_length=self.n_hop,
-  window=mywin))
+ sgram = np.abs(stft.stft(d, n_fft=self.n_fft,
+ hop_length=self.n_hop,
+ window=mywin))
  sgrammax = np.max(sgram)
  if sgrammax > 0.0:
  sgram = np.log(np.maximum(sgram, np.max(sgram) / 1e6))
@@ -300,7 +295,7 @@ def find_peaks(self, d, sr):
  # build a list of peaks we ended up with
  scols = np.shape(sgram)[1]
  pklist = []
- for col in xrange(scols):
+ for col in range(scols):
  for bin_ in np.nonzero(peaks[:, col])[0]:
  pklist.append((col, bin_))
  return pklist
@@ -318,15 +313,15 @@ def peaks2landmarks(self, pklist):
  # Find column of the final peak in the list
  scols = pklist[-1][0] + 1
  # Convert (col, bin) list into peaks_at[col] lists
- peaks_at = [[] for _ in xrange(scols)]
+ peaks_at = [[] for _ in range(scols)]
  for (col, bin_) in pklist:
  peaks_at[col].append(bin_)
 
  # Build list of landmarks <starttime F1 endtime F2>
- for col in xrange(scols):
+ for col in range(scols):
  for peak in peaks_at[col]:
  pairsthispeak = 0
- for col2 in xrange(col + self.mindt,
+ for col2 in range(col + self.mindt,
  min(scols, col + self.targetdt)):
  if pairsthispeak < self.maxpairsperpeak:
  for peak2 in peaks_at[col2]:

diff --git a/audfprint_match.py b/audfprint_match.py
@@ -11,15 +11,19 @@
 import time
 
 import psutil
-import matplotlib.pyplot as plt
-import librosa
-import librosa.display
 import numpy as np
 import scipy.signal
 
+# Don't sweat failure to import graphics support.
+try:
+ import matplotlib.pyplot as plt
+ import librosa.display
+except:
+ pass
+
 import audfprint_analyze # for localtest and illustrate
 import audio_read
-
+import stft
 
 def process_info():
  rss = usrtime = 0
@@ -420,9 +424,9 @@ def illustrate_match(self, analyzer, ht, filename):
  # Make the spectrogram
  # d, sr = librosa.load(filename, sr=analyzer.target_sr)
  d, sr = audio_read.audio_read(filename, sr=analyzer.target_sr, channels=1)
- sgram = np.abs(librosa.stft(d, n_fft=analyzer.n_fft,
-  hop_length=analyzer.n_hop,
-  window=np.hanning(analyzer.n_fft + 2)[1:-1]))
+ sgram = np.abs(stft.stft(d, n_fft=analyzer.n_fft,
+ hop_length=analyzer.n_hop,
+ window=np.hanning(analyzer.n_fft + 2)[1:-1]))
  sgram = 20.0 * np.log10(np.maximum(sgram, np.max(sgram) / 1e6))
  sgram = sgram - np.mean(sgram)
  # High-pass filter onset emphasis

diff --git a/hash_table.py b/hash_table.py
@@ -13,26 +13,21 @@
 import math
 import os
 import random
+import sys
 
 import numpy as np
 import scipy.io
 
-try:
- import cPickle as pickle # Py2
-except ImportError:
+if sys.version_info[0] >= 3:
+ # Python 3 specific definitions
  import pickle # Py3
-
-try:
- # noinspection PyUnresolvedReferences,PyUnboundLocalVariable
- xrange(0) # Py2
-except NameError:
- xrange = range # Py3
-
-try:
- # noinspection PyUnresolvedReferences,PyUnboundLocalVariable
- basestring # Py2
-except NameError:
  basestring = (str, bytes) # Py3
+ pickle_options = {'encoding': 'latin1'}
+else:
+ # Python 2 specific definitions
+ import cPickle as pickle # Py2
+ pickle_options = {}
+
 
 # Current format version
 HT_VERSION = 20170724
@@ -164,7 +159,7 @@ def get_hits(self, hashes):
  maxtimemask = (1 << self.maxtimebits) - 1
  hashmask = (1 << self.hashbits) - 1
  # Fill in
- for ix in xrange(nhashes):
+ for ix in range(nhashes):
  time_ = hashes[ix][0]
  hash_ = hashmask & hashes[ix][1]
  nids = min(self.depth, self.counts[hash_])
@@ -221,7 +216,7 @@ def load_pkl(self, name, file_object=None):
  f = file_object
  else:
  f = gzip.open(name, 'rb')
- temp = pickle.load(f)
+ temp = pickle.load(f, **pickle_options)
  if temp.ht_version < HT_OLD_COMPAT_VERSION:
  raise ValueError('Version of ' + name + ' is ' + str(temp.ht_version)
  + ' which is not at least ' +

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-librosa
 numpy
 scipy
 docopt

diff --git a/stft.py b/stft.py
@@ -0,0 +1,94 @@
+"""Provide stft to avoid librosa dependency. 
+
+This implementation is based on routines from 
+https://github.com/tensorflow/models/blob/master/research/audioset/mel_features.py
+"""
+
+from __future__ import division
+
+import numpy as np
+
+
+def frame(data, window_length, hop_length):
+ """Convert array into a sequence of successive possibly overlapping frames.
+
+ An n-dimensional array of shape (num_samples, ...) is converted into an
+ (n+1)-D array of shape (num_frames, window_length, ...), where each frame
+ starts hop_length points after the preceding one.
+
+ This is accomplished using stride_tricks, so the original data is not
+ copied. However, there is no zero-padding, so any incomplete frames at the
+ end are not included.
+
+ Args:
+ data: np.array of dimension N >= 1.
+ window_length: Number of samples in each frame.
+ hop_length: Advance (in samples) between each window.
+
+ Returns:
+ (N+1)-D np.array with as many rows as there are complete frames that can be
+ extracted.
+ """
+ num_samples = data.shape[0]
+ num_frames = 1 + ((num_samples - window_length) // hop_length)
+ shape = (num_frames, window_length) + data.shape[1:]
+ strides = (data.strides[0] * hop_length,) + data.strides
+ return np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)
+
+
+def periodic_hann(window_length):
+ """Calculate a "periodic" Hann window.
+
+ The classic Hann window is defined as a raised cosine that starts and
+ ends on zero, and where every value appears twice, except the middle
+ point for an odd-length window. Matlab calls this a "symmetric" window
+ and np.hanning() returns it. However, for Fourier analysis, this
+ actually represents just over one cycle of a period N-1 cosine, and
+ thus is not compactly expressed on a length-N Fourier basis. Instead,
+ it's better to use a raised cosine that ends just before the final
+ zero value - i.e. a complete cycle of a period-N cosine. Matlab
+ calls this a "periodic" window. This routine calculates it.
+
+ Args:
+ window_length: The number of points in the returned window.
+
+ Returns:
+ A 1D np.array containing the periodic hann window.
+ """
+ return 0.5 - (0.5 * np.cos(2 * np.pi / window_length *
+ np.arange(window_length)))
+
+
+def stft(signal, n_fft, hop_length=None, window=None):
+ """Calculate the short-time Fourier transform.
+
+ Args:
+ signal: 1D np.array of the input time-domain signal.
+ n_fft: Size of the FFT to apply.
+ hop_length: Advance (in samples) between each frame passed to FFT. Defaults
+ to half the window length.
+ window: Length of each block of samples to pass to FFT, or vector of window
+ values. Defaults to n_fft.
+
+ Returns:
+ 2D np.array where each column contains the complex values of the 
+ fft_length/2+1 unique values of the FFT for the corresponding frame of 
+ input samples ("spectrogram transposition").
+ """
+ if window is None:
+ window = n_fft
+ if isinstance(window, (int, float)):
+ # window holds the window length, need to make the actual window.
+ window = periodic_hann(int(window))
+ window_length = len(window)
+ if not hop_length:
+ hop_length = window_length // 2
+ # Default librosa STFT behavior.
+ pad_mode = 'reflect'
+ signal = np.pad(signal, (n_fft // 2), mode=pad_mode)
+ frames = frame(signal, window_length, hop_length)
+ # Apply frame window to each frame. We use a periodic Hann (cosine of period
+ # window_length) instead of the symmetric Hann of np.hanning (period
+ # window_length-1).
+ windowed_frames = frames * window
+ return np.fft.rfft(windowed_frames, n_fft).transpose()