%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('bmh')

import numpy as np

def make_sine_wave(f0, sampling_frequency, frame_size, phase=0):
    """Generates a sine wave of frequency f0.
    
    :param f0: float, fundamental frequency
    :param sampling_frequency: int, number of samples per second
    :param frame_size: int, number of samples in frame
    :return:
        - waveform - ndarray of waveform
    """
    t = np.arange(frame_size) / sampling_frequency
    return np.sin(2 * np.pi * f0 * t + phase)


def make_harmonic_wave(f0, sampling_frequency, frame_size, n_harmonics=10):
    """Generates a 1/f weighted harmonic (multiples of f0) wave of frequency f0.
    
    :param f0: float, fundamental frequency
    :param sampling_frequency: int, number of samples per second
    :param frame_size: int, number of samples in frame
    :param n_harmonics: int, number of harmonics to add
    :return:
        - waveform - ndarray of waveform
    """
    waveform = np.zeros((frame_size,), dtype=float)
    for f in [f0 * i for i in range(1, n_harmonics + 1)]:
        waveform += f0 / f * make_sine_wave(f, sampling_frequency, frame_size, phase=f)
    return waveform

sample_freq = 22050 # Hz
frame_size = 2048
time_vector = np.arange(frame_size) / sample_freq
signal = make_harmonic_wave(440, sample_freq, frame_size, n_harmonics=20)

fig, ax = plt.subplots()
ax.plot(time_vector, signal)
ax.set_xlabel('time (s)')
ax.set_title('time signal')

Text(0.5, 1.0, 'time signal')

windowed_signal = np.hamming(frame_size) * signal
dt = 1/sample_freq
freq_vector = np.fft.rfftfreq(frame_size, d=dt)
X = np.fft.rfft(windowed_signal)
log_X = np.log(np.abs(X))

fig, ax = plt.subplots()
ax.plot(freq_vector, log_X)
ax.set_xlabel('frequency (Hz)')
ax.set_title('Fourier spectrum')

Text(0.5, 1.0, 'Fourier spectrum')

cepstrum = np.fft.rfft(log_X)
df = freq_vector[1] - freq_vector[0]
quefrency_vector = np.fft.rfftfreq(log_X.size, df)

fig, ax = plt.subplots()
ax.plot(quefrency_vector, np.abs(cepstrum))
ax.set_xlabel('quefrency (s)')
ax.set_title('cepstrum')

Text(0.5, 1.0, 'cepstrum')

fig, ax = plt.subplots()
ax.vlines(1/440, 0, np.max(np.abs(cepstrum)), alpha=.2, lw=3, label='expected peak')
ax.plot(quefrency_vector, np.abs(cepstrum))
ax.set_xlabel('quefrency (s)')
ax.set_title('cepstrum')
ax.legend()

<matplotlib.legend.Legend at 0x176f49011d0>

import matplotlib.collections as collections

fig, ax = plt.subplots()
ax.vlines(1/440, 0, np.max(np.abs(cepstrum)), alpha=.2, lw=3, label='expected peak')
ax.plot(quefrency_vector, np.abs(cepstrum))
valid = (quefrency_vector > 1/640) & (quefrency_vector <= 1/82)
collection = collections.BrokenBarHCollection.span_where(
    quefrency_vector, ymin=0, ymax=np.abs(cepstrum).max(), where=valid, facecolor='green', alpha=0.5, label='valid pitches')
ax.add_collection(collection)
ax.set_xlabel('quefrency (s)')
ax.set_title('cepstrum')
ax.legend()

<matplotlib.legend.Legend at 0x176f4962ba8>

def compute_cepstrum(signal, sample_freq):
    """Computes cepstrum."""
    frame_size = signal.size
    windowed_signal = np.hamming(frame_size) * signal
    dt = 1/sample_freq
    freq_vector = np.fft.rfftfreq(frame_size, d=dt)
    X = np.fft.rfft(windowed_signal)
    log_X = np.log(np.abs(X))
    cepstrum = np.fft.rfft(log_X)
    df = freq_vector[1] - freq_vector[0]
    quefrency_vector = np.fft.rfftfreq(log_X.size, df)
    return quefrency_vector, cepstrum

def cepstrum_f0_detection(signal, sample_freq, fmin=82, fmax=640):
    """Returns f0 based on cepstral processing."""
    quefrency_vector, cepstrum = compute_cepstrum(signal, sample_freq)
    # extract peak in cepstrum in valid region
    valid = (quefrency_vector > 1/fmax) & (quefrency_vector <= 1/fmin)
    max_quefrency_index = np.argmax(np.abs(cepstrum)[valid])
    f0 = 1/quefrency_vector[valid][max_quefrency_index]
    return f0

cepstrum_f0_detection(signal, sample_freq)

441.43066406249994

harmonics = np.arange(1, 30)
f0s = []
for n_harmonics in harmonics:
    signal = make_harmonic_wave(440, sample_freq, frame_size, n_harmonics)
    f0s.append(cepstrum_f0_detection(signal, sample_freq))

fig, ax = plt.subplots()
ax.hlines(440, 0, harmonics.max(), label='expected', alpha=.2)
ax.plot(harmonics, f0s, '.')
ax.legend()

<matplotlib.legend.Legend at 0x176f49f67b8>

f0s = np.linspace(83, 639, num=100)
cepstrum_f0s = []
for f0 in f0s:
    signal = make_harmonic_wave(f0, sample_freq, frame_size, n_harmonics=10)
    cepstrum_f0s.append(cepstrum_f0_detection(signal, sample_freq))

fig, ax = plt.subplots()
ax.plot(f0s, cepstrum_f0s, '.')
ax.plot(f0s, f0s, label='expected', alpha=.2)
ax.legend()
ax.set_xlabel('true f0 (Hz)')
ax.set_ylabel('cepstrum based f0 (Hz)')

Text(0, 0.5, 'cepstrum based f0 (Hz)')

def plot_cepstrum(signal, sample_freq, expected_f0, ax=None):
    quefrency_vector, cepstrum = compute_cepstrum(signal, sample_freq)
    if ax is None:
        fig, ax = plt.subplots()
    ax.vlines(1/expected_f0, 0, np.max(np.abs(cepstrum)), alpha=1, lw=3, label='expected peak')
    ax.plot(quefrency_vector, np.abs(cepstrum))
    valid = (quefrency_vector > 1/640) & (quefrency_vector <= 1/82)
    collection = collections.BrokenBarHCollection.span_where(
        quefrency_vector, ymin=0, ymax=np.abs(cepstrum).max(), where=valid, facecolor='green', 
        alpha=0.4, label='valid pitches')
    ax.add_collection(collection)
    ax.set_xlabel('quefrency (s)')
    ax.legend()

low_pitch = make_harmonic_wave(83, sample_freq, frame_size)
high_pitch = make_harmonic_wave(500, sample_freq, frame_size)

fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(8, 6))
ax1.set_title('low pitch')
ax2.set_title('high pitch')
plot_cepstrum(low_pitch, sample_freq, 83, ax=ax1)
plot_cepstrum(high_pitch, sample_freq, 500, ax=ax2)
plt.tight_layout()

less_samples = make_harmonic_wave(600, sample_freq, frame_size)
more_samples = make_harmonic_wave(600, sample_freq, 2 * frame_size)

fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(8, 6))
ax1.set_title(f'less samples, pitch {cepstrum_f0_detection(less_samples, sample_freq):.3f}')
ax2.set_title(f'more samples, pitch {cepstrum_f0_detection(more_samples, sample_freq):.3f}')
plot_cepstrum(less_samples, sample_freq, 600, ax=ax1)
plot_cepstrum(more_samples, sample_freq, 600, ax=ax2)
plt.tight_layout()

low_sample_freq = make_harmonic_wave(600, sample_freq, frame_size)
high_sample_freq = make_harmonic_wave(600, 2 * sample_freq, frame_size)

fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(8, 6))
ax1.set_title(f'less samples, pitch {cepstrum_f0_detection(low_sample_freq, sample_freq):.3f}')
ax2.set_title(f'more samples, pitch {cepstrum_f0_detection(high_sample_freq, 2*sample_freq):.3f}')
plot_cepstrum(less_samples, sample_freq, 600, ax=ax1)
plot_cepstrum(high_sample_freq, 2 * sample_freq, 600, ax=ax2)
plt.tight_layout()

fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 5))
for ax, sample_freq_var in zip([ax1, ax2], 
                               [sample_freq, 2 * sample_freq]):
    f0s = np.linspace(83, 639, num=100)
    cepstrum_f0s = []
    for f0 in f0s:
        signal = make_harmonic_wave(f0, sample_freq_var, frame_size, n_harmonics=10)
        cepstrum_f0s.append(cepstrum_f0_detection(signal, sample_freq_var))
    ax.plot(f0s, cepstrum_f0s, '.')
    ax.plot(f0s, f0s, label='expected', alpha=.2)
    ax.legend()
    ax.set_xlabel('true f0 (Hz)')
    ax.set_ylabel('cepstrum based f0 (Hz)')
    ax.set_title(f'sampling frequency {sample_freq_var} Hz')

A Short Tutorial on Cepstral Analysis for Pitch-tracking

What is a cepstrum?¶

Extracting a pitch¶

Evaluating the algorithm's performance¶

Comments