You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
flucoma-sc/test/13-massive-parallelisation-...

325 lines
9.2 KiB
Markdown

// Lookup in a KDTree using melbands
// Demonstration of a massive parallel approach to batch process swiftly in SC
s.options.numBuffers = 16384 //The method below for doing the analysus quickly needs lots of buffers
s.reboot
//Step 0: Make a corpus
//We'll jam together some random flucoma sounds for illustrative purposes
//Get some files
(
~audioexamples_path = FluidFilesPath()+/+"*.wav";
~allTheSounds = SoundFile.collect(~audioexamples_path);
~testSounds = ~allTheSounds;
~testSounds.do{|f| f.path.postln}; // print out the files that are loaded
)
//Load the files into individual buffers:
(
~audio_buffers = ~testSounds.collect{|f|
Buffer.readChannel(
server: s,
path:f.path,
channels:[0],
action:{("Loaded" + f.path).postln;}
)
};
)
//Do a segmentation of each buffer, in parallel
(
fork{
~index_buffers = ~audio_buffers.collect{Buffer.new};
s.sync;
~count = ~audio_buffers.size;
~audio_buffers.do{|src,i|
FluidBufOnsetSlice.process(
server:s,
source:src,
indices:~index_buffers[i],
metric: 9,
threshold:0.2,
minSliceLength: 17,
action:{
(~testSounds[i].path ++ ":" + ~index_buffers[i].numFrames + "slices").postln;
~count = ~count - 1;
if(~count == 0){"Done slicing".postln};
}
);
}
}
)
// we now have an array of index buffers, one per source buffer, each containing the segmentation points as a frame positions
// this allows us to make an array of sizes
~index_buffers.collect{|b| b.numFrames}.sum
//For each of these segments, let's make a datapoint using the mean melbands.
// There's a number of ways of skinning this cat w/r/t telling the server what to do, but here we want to minimize traffic between language and server, and also produce undertsandable code
//First, we'll grab the onset points as language-side arrays, then scroll through each slice getting the mean melbands
(
// - a dataset to keep the mean melbands in
~mels = FluidDataSet(s);
// - a dictionary to keep the slice points in for later playback
~slices = Dictionary();
//The code below (as well as needing lots of buffers), creates lots of threads and we need a big ass scheduling queue
~clock = TempoClock(queueSize:8192);
)
// Do the Mel analysis in a cunning parallel fashion
(
{
var counter, remaining;
var condition = Condition.new; // used to create a test condition to pause the routine ...
var index_arrays = Dictionary();
"Process started. Please wait.".postln;
~total_slice_count = ~index_buffers.collect{|b| b.numFrames}.sum + ~index_buffers.size; //we get an extra slice in buffer
~featurebuffers = ~total_slice_count.collect{Buffer.new}; // create a buffer per slice
//Make our dictionary FluidDataSet-shaped
~slices.put("cols",3);//[bufnum,start,end] for each slice
~slices.put("data",Dictionary());
//Collect each set of onsets into a language side array and store them in a dict
~index_buffers.do{|b,i| // iterate over the input buffer array
{
b.loadToFloatArray( // load to language side array
action:{|indices|
//Glue the first and last samples of the buffer on to the index list, and place in dictionary with the
//Buffer object as a key
index_arrays.put(~audio_buffers[i], Array.newFrom([0] ++ indices ++ (~audio_buffers[i].numFrames - 1)));
if(i==(~index_buffers.size-1)) {condition.unhang};
}
)
}.fork(stackSize:~total_slice_count);
};
condition.hang; //Pause until all the callbacks above have completed
"Arrays loaded. Starting on the analysis, please wait.".postln;
//For each of these lists of points, we want to scroll over the indices in pairs and get some mel bands
counter = 0;
remaining = ~total_slice_count;
s.sync;
// now iterate over Dict and calc melbands
index_arrays.keysValuesDo{|buffer, indices|
indices.doAdjacentPairs{|start,end,num|
var analysis = Routine({|counter|
FluidBufMelBands.processBlocking(
server:s,
source:buffer,
startFrame:start,
numFrames:(end-1) - start,
features:~featurebuffers[counter],
action:{
remaining = remaining - 1;
if(remaining == 0) { ~numMelBands = ~featurebuffers[0].numChannels;condition.unhang };
}
);
});
~slices["data"].put(counter,[buffer.bufnum,start,end]);
//I'm spawning new threads to wait for the analysis callback from the server. The final callback will un-hang this thread
analysis.value(counter); //Done differently to other blocks because I need to pass in the value of counter
counter = counter + 1;
}
};
condition.hang;
"Analysis of % slices done.\n".postf(~total_slice_count);
}.fork(clock:~clock);
)
// Run stats on each mel buffer
// create a stats buffer for each of the slices
~statsbuffers = ~total_slice_count.collect{Buffer.new}; // create n Slices buffers - to be filled with (40 mel bands * 7 stats)
// run stats on all the buffers
(
{
var remaining = ~total_slice_count;
~featurebuffers.do{|buffer,i|
FluidBufStats.processBlocking(
server:s,
source:buffer,
stats:~statsbuffers[i],
action:{
remaining = remaining - 1;
if(remaining == 0) { "done".postln};
}
);
};
}.fork(clock:~clock);
)
~featurebuffers.size
//Flatten each stats buffer into a data point
~flatbuffers = ~total_slice_count.collect{Buffer.new};// create an array of flatten stats
(
{
var remaining = ~total_slice_count;
~statsbuffers.do{|buffer,i|
FluidBufFlatten.processBlocking(
server:s,
source:buffer,
destination:~flatbuffers[i],
action:{
remaining = remaining - 1;
if(remaining == 0) { "Got flat points".postln; };
}
);
};
}.fork(clock:~clock);
)
//Ram each flat point into a data set. At this point we have more data than we need, but we'll prune in moment
(
"Filling dataset".postln;
~mels.clear;
// ~flatbuffers = flatbuffers;
~flatbuffers.do{|buf,i|
~mels.addPoint(i,buf);
};
~mels.print;
)
// Prune & standardise
// Tidy up the temp arrays of buffers we do not need anymore
(
"Cleaning".postln;
(~featurebuffers ++ ~statsbuffers ++ ~flatbuffers).do{|buf| buf.free};
)
//Above we sneakily made a dictionary of slice data for playback (bufnum,start,end). Let's throw it in a dataset
~slicedata = FluidDataSet(s); // will hold slice data (bufnum,start,end) for playback
//dict -> dataset
(
~slicedata.load(~slices);
~slicedata.print;
)
// Step 1. Let's prune and standardize before fitting to a tree
(
~meanmels = FluidDataSet(s);//will hold pruned mel data
~stdmels = FluidDataSet(s);//will standardised, pruned mel data
~standardizer = FluidStandardize(s);
~pruner = FluidDataSetQuery(s);
~tree = FluidKDTree(s,numNeighbours:10);//we have to supply the lookup data set when we make the tree (boo!)
)
//Prune, standardize and fit KDTree
(
{
~meanmels.clear;
~stdmels.clear;
~pruner.addRange(0,~numMelBands).transform(~mels,~meanmels); //prune with a 'query' -- so this is dropping all but ~meanmels
~standardizer.fitTransform(~meanmels,~stdmels);
~tree.fit(~stdmels,{"KDTree ready".postln});
}.fork(clock:~clock);
)
~meanmels.print
//Step 2: Set the FluidStandardizer and FluidKDTree up for listening
//set the buffers and busses needed
(
~stdInputPoint = Buffer.alloc(s,40);
~stdOutputPoint = Buffer.alloc(s,40);
~treeOutputPoint = Buffer.alloc(s,3 * 10);//numNeighbours x triples of bufnum,start,end
)
// let's play a random sound (to make sure we understand our data structure!
(
{
var randPoint, buf, start, stop, dur;
randPoint = ~slices["data"].keys.asArray.scramble[0]; // this good way of getting - but recast as strong
buf= ~slices["data"][randPoint][0];
start = ~slices["data"][randPoint][1];
stop = ~slices["data"][randPoint][2];
dur = stop - start;
BufRd.ar(1,buf, Line.ar(start,stop,dur/s.sampleRate, doneAction: 2), 0, 2);
}.play
)
// Query KD tree
// a target sound from outside our dataset
~inBuf = Buffer.readChannel(s, Platform.resourceDir +/+ "sounds/a11wlk01.wav", numFrames:15000, channels:[0]);
~inBuf.play
//OR one from within (but just the begining so beware of the difference!)
~inBuf = Buffer.alloc(s,15000);
~randomSlice = ~slices["data"].keys.asArray.scramble[0];
~audio_buffers[~slices["data"][~randomSlice][0]].copyData(~inBuf,srcStartAt: ~slices["data"][~randomSlice][1], numSamples: 15000.min(~slices["data"][~randomSlice][2] - (~slices["data"][~randomSlice][1])));
~inBuf.play
// now try getting a point, playing it, grabbing nearest neighbour and playing it ...
(
~inBufMels = Buffer(s);
~inBufStats = Buffer(s);
~inBufFlat = Buffer(s);
~inBufComp = Buffer(s);
~inBufStand = Buffer(s);
)
// FluidBuf Compose is buf version of dataSetQuery
(
FluidBufMelBands.process(s, ~inBuf, features: ~inBufMels, action: {
FluidBufStats.process(s, ~inBufMels, stats:~inBufStats, action: {
FluidBufFlatten.process(s, ~inBufStats, destination:~inBufFlat, action: {
FluidBufCompose.process(s, ~inBufFlat, numFrames: ~numMelBands, destination: ~inBufComp, action: {
~standardizer.transformPoint(~inBufComp, ~inBufStand, {
~tree.kNearest(~inBufStand,action:{ |a|a.postln;~nearest = a;})
})
})
})
})
})
)
// playback nearest in order
(
fork{
~nearest.do{|i|
var buf, start, stop, dur;
buf= ~slices["data"][i.asInteger][0];
start = ~slices["data"][i.asInteger][1];
stop = ~slices["data"][i.asInteger][2];
dur = (stop - start)/ s.sampleRate;
{BufRd.ar(1,buf, Line.ar(start,stop,dur, doneAction: 2), 0, 2);}.play;
i.postln;
dur.wait;
};
}
)