YouTubeVideo('VSgafsbA6Wg')

from IPython.display import YouTubeVideo

YouTubeVideo('vUF6kvh5GJQ')

!youtube-dl vUF6kvh5GJQ

[youtube] vUF6kvh5GJQ: Downloading webpage
[youtube] vUF6kvh5GJQ: Downloading video info webpage
[youtube] vUF6kvh5GJQ: Extracting video information
[youtube] vUF6kvh5GJQ: Downloading MPD manifest
WARNING: Requested formats are incompatible for merge and will be merged into mkv.
[download] Opera singer Vocal Folds-vUF6kvh5GJQ.mkv has already been downloaded and merged

import moviepy.editor as mpy

clip = mpy.VideoFileClip('Opera singer Vocal Folds-vUF6kvh5GJQ.mkv')

sample_rate = clip.audio.fps
audio_data = clip.audio.to_soundarray()

audio_data.shape

(5026518, 2)

sample_rate

44100

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

fig, ax = plt.subplots(figsize=(14, 4))
t = np.arange(audio_data.shape[0], dtype=np.float) / sample_rate
ax.plot(t, audio_data)

[<matplotlib.lines.Line2D at 0x117b75ac8>,
 <matplotlib.lines.Line2D at 0x10b48b4a8>]

NFFT = sample_rate / 25

fig, ax = plt.subplots(figsize=(14, 4))
spectrum, freqs, time, im = ax.specgram(audio_data.mean(axis=1), NFFT=NFFT, pad_to=4096, Fs=sample_rate, 
                                        noverlap=512, mode='magnitude', cmap='plasma')
fig.colorbar(im)

<matplotlib.colorbar.Colorbar at 0x117d7e588>

my_dpi = 226.98

width, height = clip.size
width, height

(320, 240)

from moviepy.editor import VideoClip
from moviepy.video.io.bindings import mplfig_to_npimage

x = np.linspace(-2, 2, 200)

duration = 2

fig, ax = plt.subplots(figsize=(width/my_dpi, height/my_dpi), dpi=my_dpi)
def make_frame(t):
    ax.clear()
    ax.plot(x, np.sinc(x**2) + np.sin(x + 2*np.pi/duration * t), lw=3)
    ax.set_ylim(-1.5, 2.5)
    return mplfig_to_npimage(fig)

animation = VideoClip(make_frame, duration=duration)
plt.close(fig)
animation.ipython_display(fps=20, loop=True, autoplay=True)

 98%|█████████▊| 40/41 [00:02<00:00, 15.43it/s]

make_frame(0).shape

(240, 320, 3)

plt.imshow(make_frame(0))

<matplotlib.image.AxesImage at 0x1182b6be0>

clip.get_frame(0).shape

(240, 320, 3)

plt.imshow(clip.get_frame(0))

<matplotlib.image.AxesImage at 0x118a4ecc0>

mask_array = np.ones(clip.size[::-1]) * 0.6

mask = mpy.ImageClip(img=mask_array,ismask=True)

stack = mpy.CompositeVideoClip([clip.subclip(t_start=0, t_end=2), animation.set_mask(mask)], use_bgclip=True)

mpy.ipython_display(stack, loop=True, autoplay=True)

 98%|█████████▊| 50/51 [00:02<00:00, 17.60it/s]

fig, ax = plt.subplots(figsize=(width/my_dpi, height/my_dpi), dpi=my_dpi, )
selection = np.abs(0 - time) < 1.
freq_selection = freqs < 10000
ax.pcolorfast(time[selection], freqs, 20 * np.log10(spectrum[:, selection]))
ax.set_xticklabels([])
ax.set_yticklabels([])
plt.tight_layout(pad=0)
def make_specgram(t):
    ax.clear()
    selection = np.abs(t - time) < 1.
    ax.pcolorfast(time[selection], freqs[freq_selection], 20 * np.log10(spectrum[freq_selection, :][:, selection]))
    ax.axis('off')
    ax.set_frame_on(False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    return mplfig_to_npimage(fig)

specgram_animation = VideoClip(make_specgram).set_duration(clip.duration).set_audio(clip.audio)
plt.close(fig)

stack = mpy.CompositeVideoClip([clip, specgram_animation.set_mask(mask)], use_bgclip=True)
stack.subclip(t_start=30, t_end=90).write_videofile('output.mp4')

[MoviePy] >>>> Building video output.mp4
[MoviePy] Writing audio in outputTEMP_MPY_wvf_snd.mp3

100%|██████████| 1323/1323 [00:01<00:00, 995.20it/s]

[MoviePy] Done.
[MoviePy] Writing video output.mp4

100%|█████████▉| 1500/1501 [02:47<00:00,  9.31it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: output.mp4

YouTubeVideo('VSgafsbA6Wg')

Annotating a Vocal Folds YouTube Video with a Spectrogram using Moviepy

Finding a good vocal folds video¶

Extracting the audio¶

Visualizing the sound¶

Rendering the sound on top of the original video¶

Conclusions¶

Comments