Melody_extraction.py

#!/usr/bin/env python
# encoding: utf-8
"""
Author: Yuan-Ping Chen
Data: 2016/02/06
----------------------------------------------------------------------
Melody Extractor: extract melody contour from audio file.
----------------------------------------------------------------------
Args:
    input_files:    Audio files to be processed. 
                    Only the wav files would be considered.
    output_dir:     Directory for storing the results.

Optional args:
    Please refer to --help.
----------------------------------------------------------------------
Returns:
    Raw melody contour:         Text file of estimated melody contour 
                                in Hz with extenion of .raw.melody.
    MIDI-scale melody contour:  Text file of estimated melody contour 
                                in MIDI with extenion of .MIDI.melody.
    Smoothed melody contour:    Text file of moving-averged estimated 
                                melody contour in MIDI scale with extenion 
                                of .smooth.MIDI.melody.

"""
import glob, os, sys
import numpy as np
from essentia.standard import *
from GuitarTranscription_parameters import *

def mean_filter(data, kernel_size=9):
    """
    Smooth the melody contour with moving-average filter.
    :param data:            the input one-demensional to be processed.
    :param kernel_size:     the kernel size of the moving-average filter.
    :returns:               processeed data.
    """
    pseudo_data = data.copy()
    smooth = np.convolve(pseudo_data, np.ones(kernel_size)/kernel_size, mode='same')
    return smooth


def hertz2midi(melody_contour):
    """
    Convert pitch sequence from hertz to MIDI scale.

    :param melody_contour: array of pitch sequence.
    :returns             : melody contour in MIDI scale.

    """ 
    from numpy import inf
    melody_contour_MIDI = melody_contour.copy()
    melody_contour_MIDI = np.log(melody_contour_MIDI/float(440))
    melody_contour_MIDI =12*melody_contour_MIDI/np.log(2)+69
    melody_contour_MIDI[melody_contour_MIDI==-inf]=0

    return melody_contour_MIDI

def parse_input_files(input_files, ext='.wav'):
    """
    Collect all files by given extension and keywords.

    :param agrs:  class 'argparse.Namespace'.
    :param ext:   the string of file extension.
    :returns:     a list of stings of file name.

    """
    from os.path import basename, isdir
    import fnmatch
    import glob
    files = []

    # check what we have (file/path)
    if isdir(input_files):
        # use all files with .raw.melody in the given path
        files = fnmatch.filter(glob.glob(input_files+'/*'), '*'+ext)
    else:
        # file was given, append to list
        if basename(input_files).find(ext)!=-1:
            files.append(input_files)
    print '  Input files: '
    for f in files: print '    ', f
    return files

def parser():
    """
    Parses the command line arguments.

    :param lgd:       use local group delay weighting by default
    :param threshold: default value for threshold

    """
    import argparse
    # define parser
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description="""
    If invoked without any parameters, the software S1 Extract melody contour,
     track notes and timestmaps of intersection of ad continuous pitch sequence
     inthe given files, the pipeline is as follows,

        S1.1 Extract melody contour
        S1.2 Note tracking
        S1.3 Find continuously ascending/descending (CAD) F0 sequence patterns
        S1.4 Find intersection of note and pattern 
             (Candidate selection of {bend,slide,pull-off,hammer-on,normal})
    """)
    # general options
    p.add_argument('input_files', type=str, metavar='input_files',
                   help='files to be processed')
    p.add_argument('output_dir', type=str, metavar='output_dir',
                   help='output directory.')
    p.add_argument('-fs',   '--frameSize', type=int, dest='fs',  help="the frame size for computing pitch saliecnce",            default=2048)
    p.add_argument('-hs',   '--hopSize',    type=int, dest='hs',  help="the hop size with which the pitch salience function was computed.",    default=256)
    p.add_argument('-sr',   '--sampleRate', type=int, dest='sr',  help="the sampling rate of the audio signal [Hz].",              default=44100)
    p.add_argument('-maxf0','--maxf0',      type=int, dest='maxf0',   help="the maximum allowed frequency for salience function peaks (ignore contours with peaks above) [Hz].",     default=20000)

    p.add_argument('-fi','--filterIteration',      type=int, dest='fi',   help="number of iterations for the octave errors / pitch outlier filtering process",     default=2)

    p.add_argument('-minf0','--minf0',      type=int, dest='minf0',   help="the minimum allowed frequency for salience function peaks (ignore contours with peaks above) [Hz].",     default=82)
    p.add_argument('-ks','--kernelSize',    type=int, dest='ks',   help="the kernel size of median filter for smoothing the estimtated melody contour.",     default=5)
    p.add_argument('-gu', '--guessUnvoiced', action = 'store_true', dest = 'gu', help="estimate pitch for non-voiced segments by using non-salient contours when no salient ones are present in a frame.", default=True)
    p.add_argument('-no-gu', '--no-guessUnvoiced', action = 'store_false', dest = 'gu', help="turn off the guessUnvoiced.")
    # version
    p.add_argument('--version', action='version',
                   version='%(prog)spec 1.03 (2016-03-13)')
    # parse arguments
    args = p.parse_args()

    # return args
    return args
    

def main(args):
    print '============================'
    print 'Running melody extraction...'
    print '============================'
    # parse and list files to be processed
    files = parse_input_files(args.input_files)
    
    # create result directory
    if not os.path.exists(args.output_dir): os.makedirs(args.output_dir)
    print '  Output directory: ', '\n', '    ', args.output_dir

    # processing
    for f in files:
        # parse file name and extension
        ext = os.path.basename(f).split('.')[-1]
        name = os.path.basename(f).split('.')[0]

        # S1.1 initiate MELODIA
        pcm = PitchMelodia(harmonicWeight=harmonicWeight, minDuration=minDuration, 
            binResolution=binResolution, guessUnvoiced=args.gu, frameSize=args.fs, 
            hopSize=contour_hop, maxFrequency=args.maxf0, minFrequency=args.minf0, 
            filterIterations=filterIterations, magnitudeThreshold=magnitudeThreshold, 
            sampleRate=contour_sr, peakDistributionThreshold=peakDistributionThreshold)
        audio = MonoLoader(filename = f)()
        # run MELODIA
        melody_contour, pitchConfidence = pcm(audio)
        # save result: raw melody contour
        np.savetxt(args.output_dir+os.sep+name+'.raw.melody',melody_contour, fmt='%s')
        # save result: raw melody contour
        np.savetxt(args.output_dir+os.sep+name+'.pitch_confidence',pitchConfidence, fmt='%s')
        # convert Hz to MIDI scale
        melody_contour = hertz2midi(melody_contour)
        # save result: MIDI-scale melody contour
        np.savetxt(args.output_dir+os.sep+name+'.MIDI.melody',melody_contour, fmt='%s')
        # moving averaging filtering
        melody_contour = mean_filter(melody_contour,kernel_size=mean_filter_size)
        # save result: MIDI-scaled smoothed melody contour
        np.savetxt(args.output_dir+os.sep+name+'.MIDI.smooth.melody',melody_contour, fmt='%s')

if __name__ == '__main__':
    args = parser()
    main(args)