%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


import soundfile as sf 
wavfile = 'audio/bird-thrush-nightingale-03.wav'
info = sf.info(wavfile)
print(info)

audio/bird-thrush-nightingale-03.wav
samplerate: 44100 Hz
channels: 1
duration: 5.490 s
format: WAV (Microsoft) [WAV]
subtype: Signed 16 bit PCM [PCM_16]


import IPython
IPython.display.Audio(wavfile)


print(int(info.samplerate*info.duration))

x, sr = sf.read(wavfile)
print(x.shape[0])

242119
242119


import matplotlib.pyplot as plt
import numpy as np
fontsize = 13

x, sr = sf.read(wavfile)
T = x.shape[0]

time_axis = np.arange(T)/sr

plt.figure(figsize=(10,4))
plt.plot(time_axis, x)
plt.xlabel('time (s)', fontsize=fontsize)
plt.ylabel('amplitude', fontsize=fontsize)
plt.title('waveform', fontsize=fontsize)

Text(0.5, 1.0, 'waveform')


from utils import load_ecg

x, sr = load_ecg()
T = x.shape[0]
time_axis = np.arange(T)/sr

plt.figure(figsize=(10,4))
plt.plot(time_axis, x)
plt.xlabel('time (s)', fontsize=fontsize)
plt.ylabel('amplitude', fontsize=fontsize)
plt.title('waveform', fontsize=fontsize)

Text(0.5, 1.0, 'waveform')


F = T # number of points for the DFT

x_dft = np.fft.fft(x, F) # DFT

freq_axis = np.arange(F)/F*sr # frequency axis in Hertz

plt.figure(figsize=(10,3))
plt.plot(freq_axis, np.abs(x_dft))
plt.title('Magnitude spectrum')
plt.xlabel('frequency (Hz)')

plt.figure(figsize=(10,3))
plt.plot(freq_axis, 10*np.log10(np.abs(x_dft)**2))
plt.title('Power spectrum in dB')
plt.xlabel('frequency (Hz)')

Text(0.5, 0, 'frequency (Hz)')


# zero-frequency coefficient
x_dft_0 = x_dft[0]
print("DFT coef. at frequency 0: ", end='')
print(x_dft_0)

# Nyquist-frequency coefficient
x_dft_nyq = x_dft[F//2]
print("DFT coef. at Nyquist frequency: ", end='')
print(x_dft_nyq)

DFT coef. at frequency 0: (642.9717821672234+0j)
DFT coef. at Nyquist frequency: (0.43257941181657067+0j)


# positive-frequency coefficient
x_dft_p = x_dft[1:F//2] # shape F/2-1

# negative-frequency coefficient
x_dft_n = x_dft[F//2+1:] # shape F/2-1

# using the Hermitian symetry property, we can predict the negative part from the positive one
x_dft_n_from_p = np.conj(np.flipud(x_dft_p))

# we can verify that Hermitian symmetry holds
print("Error: ", end='')
print(np.sum(x_dft_n - x_dft_n_from_p))

Error: (-1.511325786740514e-13-8.638922910364499e-14j)


x_dft = x_dft[:F//2+1] # only keep the positive frequencies (including zero and Nyquist freq.)
freq_axis = freq_axis[:F//2+1]

plt.figure(figsize=(10,5))
plt.plot(freq_axis, 10*np.log10(np.abs(x_dft)**2))
plt.title('Power spectrum in dB')
plt.xlabel('frequency (Hz)')

Text(0.5, 0, 'frequency (Hz)')


plt.figure(figsize=(10,5))
plt.plot(freq_axis, 10*np.log10(np.abs(x_dft)**2))
plt.title('Power spectrum in dB')
plt.xlabel('frequency (Hz)')
plt.xlim((0,60))

plt.figure(figsize=(10,4))
plt.plot(time_axis, x)
plt.xlabel('time (s)', fontsize=fontsize)
plt.ylabel('amplitude', fontsize=fontsize)
plt.title('waveform', fontsize=fontsize)

Text(0.5, 1.0, 'waveform')


# from utils import freq2index, recon_hermitian_symmetry_2

# # high-pass filtering
# ind_freq_HP = freq2index(?, sr, F) # cut-off freq. index of the high-pass filter
# x_dft[?] = ?

# # band-stop filtering
# ind_freq_BS = freq2index(?, sr, F) # center freq. index of the band-stop filter
# x_dft[?] = ?

# # reconstruct full spectrum using Hermitian symmetry
# x_dft_full = recon_hermitian_symmetry(x_dft)

# # reconstruct time domain signal with inverse DFT
# x_filt = np.real(np.fft.ifft(x_dft_full))


from utils import freq2index, recon_hermitian_symmetry

# high-pass filtering
ind_freq_HP = freq2index(2, sr, F) # cut-off freq. index of the high-pass filter
x_dft[:ind_freq_HP] = 0

# band-stop filtering
ind_freq_BS = freq2index(50, sr, F) # center freq. index of the band-stop filter
x_dft[ind_freq_BS-2:ind_freq_BS+2] = 0

# reconstruct full spectrum using Hermitian symmetry
x_dft_full = recon_hermitian_symmetry(x_dft)

# reconstruct time domain signal with inverse DFT
x_filt = np.real(np.fft.ifft(x_dft_full))


plt.figure(figsize=(10,4))
plt.plot(time_axis, x)
plt.xlabel('time (s)', fontsize=fontsize)
plt.ylabel('amplitude', fontsize=fontsize)
plt.title('waveform', fontsize=fontsize)

plt.figure(figsize=(10,4))
plt.plot(time_axis, x_filt)
plt.xlabel('time (s)', fontsize=fontsize)
plt.ylabel('amplitude', fontsize=fontsize)
plt.title('filtered waveform', fontsize=fontsize)

plt.figure(figsize=(10,4))
plt.plot(time_axis, x_filt)
plt.xlabel('time (s)', fontsize=fontsize)
plt.ylabel('amplitude', fontsize=fontsize)
plt.title('filtered waveform', fontsize=fontsize)
plt.xlim((3.2,4.0))

(3.2, 4.0)


IPython.display.Audio('audio/acdll.wav')


x, fs = sf.read('audio/acdll.wav')
x = x/np.max(x)

T = x.shape[0]
f = np.arange(T)*fs/T
t = np.arange(T)/fs

plt.figure(figsize=(10,3))

plt.plot(t, x)
plt.title('Waveform')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')

x_dft = np.fft.fft(x)

plt.figure(figsize=(10,3))
plt.plot(f[:int(T/2)], 20*np.log10(np.abs(x_dft[:int(T/2)])))
plt.title('Power spectrum')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude in dB')

Text(0, 0.5, 'Magnitude in dB')


fs = 16000 # sampling rate
wlen_sec = 32e-3 # STFT window length in seconds
hop_percent = 0.5  # hop size as a percentage of the window length
L = wlen_sec*fs # window length in samples
L = int(np.power(2, np.ceil(np.log2(L)))) # round to the next power of 2 to fasten fft 
H = int(hop_percent*L) # hop size in samples
n_fft = L # number of points (i.e. order) of the discrete Fourier transform
n_bins = L//2 + 1  # number of positive frequency bins


win = np.sin(np.arange(.5,L-.5+1)/L*np.pi); # sine analysis window
time_axis = np.arange(0, L)/fs
plt.plot(time_axis, win)
plt.xlabel('time (s)')
plt.ylabel('amplitude')
plt.title('sine window')

Text(0.5, 1.0, 'sine window')


from utils import preprocessing_stft

x = preprocessing_stft(x, L, H) # some preprocessing to deal with edges and ensure perfect reconstruction (you do not need to understand this for the moment)
T = x.shape[0] # signal length
n_frames = int( (T-L)/H ) + 1 # number of time frames

X = np.zeros( (n_bins, n_frames), dtype=np.cfloat ) # STFT matrix

#### STFT computation ####
for n in np.arange(n_frames):
    beg_frame = n*H
    end_frame = beg_frame + L
    x_n = x[beg_frame:end_frame]*win
    X_n = np.fft.fft(x_n)
    X[:,n] = X_n[:n_bins]


X2_dB = 10*np.log10(np.abs(X)**2 + 1e-10) # power spectrogram in dB

plt.figure(figsize=(10,7))
plt.imshow(X2_dB, origin='lower',  aspect='auto', cmap='magma', extent=[0, (n_frames-1)*H/fs, 0, fs/2])

plt.clim(vmin=-50, vmax=None)
plt.colorbar()   
plt.xlabel('time (s)')
plt.ylabel('frequency (Hz)')
plt.title('power spectrogram')

Text(0.5, 1.0, 'power spectrogram')


x_rec = np.zeros((n_frames-1)*H + L)

X_n_complete = np.zeros(L, dtype=np.cfloat)

#### Inverse STFT computation ####
for n in np.arange(n_frames):
    X_n_complete = recon_hermitian_symmetry(X[:,n])
    x_n_rec = np.real(np.fft.ifft(X_n_complete))
    ind_beg = n*H
    ind_end = ind_beg+L
    x_rec[ind_beg:ind_end] += x_n_rec*win


plt.plot(x-x_rec)

[<matplotlib.lines.Line2D at 0x7fe0d2515f70>]


N = 10
ola = np.zeros((N-1)*H+L) # array to save the result of the overlap-add

for n in np.arange(N):
    ind_beg = n*H
    ind_end = ind_beg+L
    ola[ind_beg:ind_end] += win**2
    
plt.plot(ola)
plt.title('overlap-add of the sine window with 50% overlap')

Text(0.5, 1.0, 'overlap-add of the sine window with 50% overlap')


wavfile = 'audio/glockenspiel.wav'

IPython.display.Audio(wavfile)


import librosa
import librosa.display

x, fs = sf.read(wavfile)

T = x.shape[0]
f = np.arange(T)*fs/T
t = np.arange(T)/fs

plt.figure(figsize=(12,3))

plt.plot(t, x)
plt.xlim((t[0], t[-1]))
plt.title('Waveform')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')

wlen_sec = 64e-3 # STFT window length in seconds

for wlen_sec in [16e-3, 32e-3, 64e-3]:

    hop_percent = 0.5  # hop size as a percentage of the window length
    L = wlen_sec*fs # window length in samples
    L = int(np.power(2, np.ceil(np.log2(L)))) # round to the next power of 2 to fasten fft 
    H = int(hop_percent*L) # hop size in samples
    n_fft = 2*L # number of points (i.e. order) of the discrete Fourier transform
    win = np.sin(np.arange(.5,L-.5+1)/L*np.pi); # sine analysis window

    X = librosa.stft(x, n_fft=n_fft, hop_length=H, win_length=L, window=win)
    X_db = librosa.amplitude_to_db(np.abs(X))

    plt.figure(figsize=(15,5))
    librosa.display.specshow(X_db, x_axis='time', y_axis='linear', sr=fs, hop_length=H, cmap='magma')
    plt.colorbar()
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.ylim((0,10000))
    plt.title('Power spectrogram with an analysis window of length ' + str(L) + ' samples')


from utils import dct2D, idct2D
from skimage.transform import resize
from skimage.color import rgb2gray
import skimage

# get astronaut from skimage.data in grayscale
im = rgb2gray(skimage.data.astronaut())
im = resize(im, (256, 256), anti_aliasing=True)

im_DCT = dct2D(im)
im_rec = idct2D(im_DCT)

# check if the reconstructed image is nearly equal to the original image
print(np.allclose(im, im_rec))

# plot original, DCT, and reconstructed images
fig, axs = plt.subplots(1, 3, figsize=(15, 15))

im1 = axs[0].imshow(im, cmap='gray')
axs[0].set_title('original image', size=12)
axs[0].axis('off')
cax = fig.add_axes([axs[0].get_position().x1+0.01,axs[0].get_position().y0,0.02,axs[0].get_position().height])
fig.colorbar(im1, cax=cax)

im2 = axs[1].imshow(np.log10(np.abs(im_DCT)), cmap='gray')
axs[1].set_title('log of the abs. DCT coeff.', size=12)
axs[1].axis('off')
cax = fig.add_axes([axs[1].get_position().x1+0.01,axs[1].get_position().y0,0.02,axs[1].get_position().height])
fig.colorbar(im2, cax=cax)

im3 = axs[2].imshow(im_rec, cmap='gray')
axs[2].set_title('reconstructed image', size=12)
axs[2].axis('off')
cax = fig.add_axes([axs[2].get_position().x1+0.01,axs[2].get_position().y0,0.02,axs[2].get_position().height])
fig.colorbar(im3, cax=cax)

True

<matplotlib.colorbar.Colorbar at 0x7fe0d2b87ca0>


im_flat = np.abs(im).flatten()

fig = plt.figure(figsize=(10,4))
ax = fig.add_subplot(111)
_ = ax.hist(im_flat, bins=100)
plt.yscale('log')
ax.set_title('Histogram of the image')

# Put you code here to plot the histogram of the DCT

Text(0.5, 1.0, 'Histogram of the image')


#### SOLUTION ####
im_flat = np.abs(im).flatten()

fig = plt.figure(figsize=(10,4))
ax = fig.add_subplot(111)
_ = ax.hist(im_flat, bins=100)
plt.yscale('log')
ax.set_title('Histogram of the image')

im_DCT_flat = np.abs(im_DCT).flatten()

fig = plt.figure(figsize=(10,4))
ax = fig.add_subplot(111)
_ = ax.hist(im_DCT_flat, bins=100)
plt.yscale('log')
ax.set_title('Histogram of the DCT')

Text(0.5, 1.0, 'Histogram of the DCT')


im_flat_sort = np.flipud(np.sort(im_flat)) # sort
im_DCT_flat_sort = np.flipud(np.sort(im_DCT_flat))

fig = plt.figure(figsize=(10,5))
plt.semilogy(im_flat_sort, '-+')
plt.semilogy(im_DCT_flat_sort, '-x')
plt.title('Sorted absolute value of the image and DCT coefficients')
plt.legend(('image', 'DCT'))

<matplotlib.legend.Legend at 0x7fe0d26b2cd0>


proportions = [1, 5, 10, 20] # proportion of DCT coefficients to keep

fig, axs = fig, axs = plt.subplots(1, len(proportions), figsize=(15, 15))

######### TO COMPLETE #########
n_pixels = im_DCT_flat_sort.shape[0] 

for i, prop in enumerate(proportions):
    
    thresh_index = int(prop/100*n_pixels)
    thresh_value = im_DCT_flat_sort[thresh_index]
        
    im_DCT_thresh = im_DCT.copy()
    im_DCT_thresh[np.abs(im_DCT_thresh) < thresh_value] = 0
    im_thresh = idct2D(im_DCT_thresh)
    
    axs[i].imshow(im_thresh, cmap='gray')
    axs[i].set_title('Reconstruction with ' + str(prop) + '% coeff.')


T_vec = np.array([16, 32, 64])
    
plt.subplots(3,2, figsize=(15,10))

nu0 = 0.1
nu1 = 0.13
F = 128
f = np.arange(F)
T_max = np.max(T_vec)

for ind, T in enumerate(T_vec):

    t = np.arange(T_max)

    ## Constant
    x = np.zeros(T_max)
    x[:T] = 1

    ## Sinusoid
    # x = np.sin(2*np.pi*nu0*t)
    # x[T:] = 0

    ## Sum of sinusoids
    # x = .5*np.sin(2*np.pi*nu0*t) + .5*np.sin(2*np.pi*nu1*t)
    # x[T:] = 0

    x_dtft = np.fft.fft(x, F)
    x_dft = np.fft.fft(x, T)

    plt.subplot(3,2,ind*2+1)
    plt.plot(t, x, '-')
    plt.plot(t, x, 'o', fillstyle='none')
    plt.xlim(0,T_max)
    plt.ylim(-1.1,1.1)

    if ind==0:
        plt.title('Finite-length signal')

    if ind==2:
        plt.xlabel('time')

    plt.yticks(np.array([0,1]), np.array([0,1]))

    plt.subplot(3,2,ind*2+2)
    plt.plot(f/F, np.abs(x_dtft), '-')
    plt.plot(np.arange(T)/T, np.abs(x_dft), 'o', fillstyle='none')

    if ind==0:
        plt.title('Magnitude spectra')
        plt.legend([r'DTFT $\hat{x}(\nu)$', r'DFT $\hat{x}(f) = \hat{x}(f/T)$'], loc='upper right', fontsize=16)

    if ind==2:
        plt.xlabel('frequency')

Common signal representations¶

Basics¶

Signal¶

Digital signal¶

Representations of signals in the discrete time and frequency domains¶

Time-domain representation¶

Real-world signals¶

The waveform¶

Frequency domain representation¶

Discrete Fourier transform (DFT)¶

Hermitian symmetry¶

Filtering of an ECG signal¶

Geometric interpretation and alternatives to the DFT¶

Time-frequency representation¶

Windowing¶

Direct STFT¶

Inverse STFT and overlap-add¶

Perfect reconstruction¶

Effect of the analysis window length and time-frequency resolution¶

Interpretation as the decomposition over a set of time-frequency atoms¶

Beyond audio spectrograms¶

Image, structure in the DCT domain, and compression¶

From 1D to 2D signal transformations¶

Structure of the image in the DCT domain¶

JPEG image compression¶

Take-home messages¶

Appendices¶

Discrete-time Fourier transform¶

Definition¶

Properties of the DTFT (time domain $\leftrightarrow$ frequency domain)¶

Real-world signals¶

Sampling the DTFT gives the DFT¶

DTFT and DFT of simple signals, precision and resolution¶

Constant signal¶

Sinusoid¶

Sum of sinusoids¶

References¶