You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
206 lines
6.8 KiB
Plaintext
206 lines
6.8 KiB
Plaintext
TITLE:: FluidMelBands
|
|
SUMMARY:: A Perceptually Spread Spectral Contour Descriptor in Real-Time
|
|
CATEGORIES:: Libraries>FluidDecomposition
|
|
RELATED:: Guides/FluCoMa, Guides/FluidDecomposition, Classes/FluidMFCC
|
|
|
|
DESCRIPTION::
|
|
This class implements a spectral shape descriptor where the amplitude is given for a number of equally spread perceptual bands. The spread is based on the Mel scale (https://en.wikipedia.org/wiki/Mel_scale) which is one of the first attempt to mimic pitch perception scientifically. This implementation allows to select the range and number of bands dynamically. It is part of the LINK:: Guides/FluidDecomposition:: of LINK:: Guides/FluCoMa::. For more explanations, learning material, and discussions on its musicianly uses, visit http://www.flucoma.org/
|
|
|
|
The process will return a multichannel control steam of size STRONG::maxNumBands::, which will be repeated if no change happens within the algorythm, i.e. when the hopSize is larger than the server's kr period.
|
|
|
|
CLASSMETHODS::
|
|
|
|
METHOD:: kr
|
|
The audio rate in, control rate out version of the object.
|
|
|
|
ARGUMENT:: in
|
|
The audio to be processed.
|
|
|
|
ARGUMENT:: numBands
|
|
The number of bands that will be perceptually equally distributed between STRONG::minFreq:: and STRONG::maxFreq::. It is limited by the STRONG::maxNumBands:: parameter. When the number is smaller than the maximum, the output is zero-padded.
|
|
|
|
ARGUMENT:: minFreq
|
|
The lower boundary of the lowest band of the model, in Hz.
|
|
|
|
ARGUMENT:: maxFreq
|
|
The highest boundary of the highest band of the model, in Hz.
|
|
|
|
ARGUMENT:: maxNumBands
|
|
The maximum number of Mel bands that can be modelled. This sets the number of channels of the output, and therefore cannot be modulated.
|
|
|
|
ARGUMENT:: normalize
|
|
This flag enables the scaling of the output to preserve the energy of the window. It is on (1) by default.
|
|
|
|
ARGUMENT:: windowSize
|
|
The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally, in line with Gabor Uncertainty principles. http://www.subsurfwiki.org/wiki/Gabor_uncertainty
|
|
|
|
ARGUMENT:: hopSize
|
|
The window hop size. As sinusoidal estimation relies on spectral frames, we need to move the window forward. It can be any size but low overlap will create audible artefacts. The -1 default value will default to half of windowSize (overlap of 2).
|
|
|
|
ARGUMENT:: fftSize
|
|
The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will use the next power of 2 equal or above the windowSize.
|
|
|
|
ARGUMENT:: maxFFTSize
|
|
How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated.
|
|
|
|
RETURNS::
|
|
A KR signal of STRONG::maxNumBands:: channels, giving the measure amplitudes for each band. The latency is windowSize.
|
|
|
|
|
|
EXAMPLES::
|
|
|
|
code::
|
|
//create a monitoring bus for the descriptors
|
|
b = Bus.new(\control,0,40);
|
|
|
|
//create a monitoring window for the values
|
|
|
|
(
|
|
w = Window("Mel Bands Monitor", Rect(10, 10, 620, 320)).front;
|
|
a = MultiSliderView(w,Rect(10, 10, 600, 300)).elasticMode_(1).isFilled_(1);
|
|
)
|
|
|
|
//run the window updating routine.
|
|
(
|
|
~winRange = 0.1;
|
|
r = Routine {
|
|
{
|
|
b.get({ arg val;
|
|
{
|
|
if(w.isClosed.not) {
|
|
a.value = val/~winRange;
|
|
}
|
|
}.defer
|
|
});
|
|
0.01.wait;
|
|
}.loop
|
|
}.play
|
|
)
|
|
|
|
//play a simple sound to observe the values
|
|
(
|
|
x = {
|
|
var source = SinOsc.ar(LFTri.kr(0.1).exprange(80,800),0,0.1);
|
|
Out.kr(b,FluidMelBands.kr(source,maxNumBands:40));
|
|
source.dup;
|
|
}.play;
|
|
)
|
|
|
|
// free this source
|
|
x.free
|
|
|
|
// load a more exciting one
|
|
c = Buffer.read(s,File.realpath(FluidMelBands.class.filenameSymbol).dirname.withTrailingSlash ++ "../AudioFiles/Tremblay-AaS-SynthTwoVoices-M.wav");
|
|
|
|
// analyse with parameters to be changed
|
|
(
|
|
x = {arg bands = 40, low = 20, high = 20000;
|
|
var source = PlayBuf.ar(1,c,loop:1);
|
|
Out.kr(b,FluidMelBands.kr(source, bands, low, high, 40) / 10);
|
|
source.dup;
|
|
}.play;
|
|
)
|
|
|
|
//set the winRange to a more informative value
|
|
~winRange = 0.05;
|
|
|
|
// observe the number of bands. The unused ones at the top are not updated
|
|
x.set(\bands,20)
|
|
|
|
// back to the full range
|
|
x.set(\bands,40)
|
|
|
|
// focus all the bands on a mid range: nothing to see!
|
|
x.set(\low,320, \high, 800)
|
|
|
|
// focusing on the low end shows the fft resolution issue. One could restart the analysis with a larger fft to show more precision
|
|
x.set(\low,20, \high, 160)
|
|
|
|
// back to full range
|
|
x.set(\low,20, \high, 20000)
|
|
|
|
// free everything
|
|
x.free;b.free;c.free;r.stop;
|
|
::
|
|
|
|
STRONG::A musical example: a perceptually spread vocoder::
|
|
|
|
CODE::
|
|
//load a source and define control bus for the resynthesis cluster
|
|
(
|
|
b = Bus.control(s,40);
|
|
c = Buffer.read(s,File.realpath(FluidMelBands.class.filenameSymbol).dirname.withTrailingSlash ++ "../AudioFiles/Nicol-LoopE-M.wav");
|
|
d = Group.new;
|
|
)
|
|
|
|
//play the source and send the analysis on the
|
|
(
|
|
x = {
|
|
arg dry = 0.2;
|
|
var source = PlayBuf.ar(1,c,loop:1);
|
|
Out.kr(b,FluidMelBands.kr(source,maxNumBands:40));
|
|
Out.ar(0, DelayN.ar(source,delaytime:1024*SampleDur.ir,mul:dry));
|
|
}.play;
|
|
)
|
|
|
|
// set the dry playback volume
|
|
x.set(\dry, 0.5)
|
|
|
|
|
|
// create a cluster of sines tuned on each MelBand center frequency, as a sort of vocoder.
|
|
(
|
|
var lowMel = 1127.010498 * ((20/700) + 1).log;
|
|
var highMel = 1127.010498 * ((20000/700) + 1).log;
|
|
var rangeMel = highMel - lowMel;
|
|
var stepMel = rangeMel / 41;
|
|
40.do({
|
|
arg i;
|
|
var freqMel = (stepMel * (i +1)) + lowMel;
|
|
var freq = ((freqMel/ 1127.01048).exp - 1 ) * 700;
|
|
{SinOsc.ar(freq,mul:Lag.kr(In.kr(b,40)[i],512*SampleDur.ir,0.5))}.play(d,1,addAction:\addToTail);
|
|
});
|
|
)
|
|
|
|
// free all
|
|
d.free; x.free; b.free; c.free;
|
|
|
|
/////////////////////////////////////
|
|
// instantiate a more dynamic vocoder:
|
|
// MouseX defines the bottom frequency and MouseY define the top frequency, between which the 40 bands of analysis and synthesis are perceptually equally spred
|
|
|
|
// the bus, source and group
|
|
(
|
|
b = Bus.control(s,40);
|
|
c = Buffer.read(s,File.realpath(FluidMelBands.class.filenameSymbol).dirname.withTrailingSlash ++ "../AudioFiles/Nicol-LoopE-M.wav");
|
|
d = Group.new;
|
|
)
|
|
|
|
// the modified source
|
|
(
|
|
x = {
|
|
arg dry = 0.2;
|
|
var source = PlayBuf.ar(1,c,loop:1);
|
|
Out.kr(b,FluidMelBands.kr(source,maxNumBands:40,minFreq:MouseX.kr().exprange(20,600),maxFreq:MouseY.kr().exprange(650,20000)));
|
|
Out.ar(0, DelayN.ar(source,delaytime:1024*SampleDur.ir,mul:dry));
|
|
}.play;
|
|
)
|
|
|
|
// the modified vocoder
|
|
(
|
|
40.do({
|
|
arg i;
|
|
{
|
|
var lowMel = 1127.010498 * ((MouseX.kr().exprange(20,600)/700) + 1).log;
|
|
var highMel = 1127.010498 * ((MouseY.kr().exprange(650,20000)/700) + 1).log;
|
|
var rangeMel = highMel - lowMel;
|
|
var stepMel = rangeMel / 41;
|
|
var freqMel = (stepMel * (i +1)) + lowMel;
|
|
var freq = ((freqMel/ 1127.01048).exp - 1 ) * 700;
|
|
SinOsc.ar(freq,mul:Lag.kr(In.kr(b,40)[i],512*SampleDur.ir,0.5))}.play(d,1,addAction:\addToTail);
|
|
});
|
|
)
|
|
|
|
// free all
|
|
d.free; x.free; b.free; c.free;
|
|
::
|