Last active
December 14, 2015 17:25
-
-
Save matbra/bf9c53f86f2ef388de90 to your computer and use it in GitHub Desktop.
This gist contains code that is part of a programming assignment (Peer Assessment 10a) for the Coursera course "Audio Signal Processing for Music Applications".
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sineModelMultiRes(x, fs, w, N, t, B): | |
""" | |
Analysis/synthesis of a sound using the sinusoidal model, without sine tracking | |
x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB | |
returns y: output array sound | |
This is an extension of the original sineModel() function that supports different DFT | |
resolutions/window lengths in different subbands. | |
w is assumed to be a Python list of N_w windows | |
N is a Python list of (corresponding) DFT lengths | |
B is a Python list with (N_w - 1) elements, containing the frequencies where | |
the bands are split (0 Hz and fs/2 are always included) | |
Hence, not only three bands are supported but multiple, if desired. | |
""" | |
# determine the half analysis window lengths | |
hM1 = np.zeros(len(w)) | |
hM2 = np.zeros(len(w)) | |
for idx_w, cur_w in enumerate(w): | |
hM1[idx_w] = int(math.floor((cur_w.size+1)/2)) # half analysis window size by rounding | |
hM2[idx_w] = int(math.floor(cur_w.size/2)) # half analysis window size by floor | |
N_bands = len(w) | |
Ns = 512 # FFT size for synthesis (even) | |
H = Ns/4 # Hop size used for analysis and synthesis | |
hNs = Ns/2 # half of synthesis FFT size | |
pin = np.max(hM1) # init sound pointer in middle of anal window | |
pend = x.size - max(hNs, np.max(hM1)) # last sample to start a frame | |
# fftbuffer = [np.zeros(n) for n in N] # initialize buffer for FFT | |
yw = np.zeros(Ns) # initialize output sound frame | |
y = np.zeros(x.size) # initialize output array | |
for cur_w in w: # normalize analysis window | |
cur_w /= sum(cur_w) | |
sw = np.zeros(Ns) # initialize synthesis window | |
ow = triang(2*H) # triangular window | |
sw[hNs-H:hNs+H] = ow # add triangular window | |
bh = blackmanharris(Ns) # blackmanharris window | |
bh = bh / sum(bh) # normalized blackmanharris window | |
sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window | |
while pin<pend: # while input sound pointer is within sound | |
#-----analysis----- | |
iploc_overall = [] | |
ipmag_overall = [] | |
ipphase_overall = [] | |
ipfreq_overall = [] | |
for idx_band in range(N_bands): | |
f_lower = 0 if idx_band==0 else B[idx_band-1] | |
f_upper = fs/2 if idx_band==N_bands-1 else B[idx_band] | |
x1 = x[pin-hM1[idx_band]:pin+hM2[idx_band]] | |
mX, pX = DFT.dftAnal(x1, w[idx_band], N[idx_band]) # compute dft | |
ploc = UF.peakDetection(mX, t) # detect locations of peaks | |
pmag = mX[ploc] # get the magnitude of the peaks | |
iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation | |
ipfreq = fs*iploc/float(N[idx_band]) # convert peak locations to Hertz | |
idx_freqs_in_cur_band = np.where((ipfreq >= f_lower) & (ipfreq < f_upper)) | |
iploc_overall.append(iploc[idx_freqs_in_cur_band]) | |
ipmag_overall.append(ipmag[idx_freqs_in_cur_band]) | |
ipphase_overall.append(ipphase[idx_freqs_in_cur_band]) | |
ipfreq_overall.append(ipfreq[idx_freqs_in_cur_band]) | |
#-----synthesis----- | |
Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum | |
fftbuffer = np.real(ifft(Y)) # compute inverse FFT | |
yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window | |
yw[hNs-1:] = fftbuffer[:hNs+1] | |
y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window | |
pin += H # advance sound pointer | |
return y |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment