From bd79448be45dc13fd9cfe1e6e1f999cbf4204662 Mon Sep 17 00:00:00 2001 From: Pierre Alexandre Tremblay Date: Tue, 15 Dec 2020 16:54:44 +0000 Subject: [PATCH] Owen's massive parallelisation, customised and commented by PA --- .../13-massive-parallelisation-example.scd | 332 ++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 release-packaging/Examples/dataset/1-learning examples/13-massive-parallelisation-example.scd diff --git a/release-packaging/Examples/dataset/1-learning examples/13-massive-parallelisation-example.scd b/release-packaging/Examples/dataset/1-learning examples/13-massive-parallelisation-example.scd new file mode 100644 index 0000000..acf0323 --- /dev/null +++ b/release-packaging/Examples/dataset/1-learning examples/13-massive-parallelisation-example.scd @@ -0,0 +1,332 @@ +// Lookup in a KDTree using melbands +// Demonstration of a massive parallel approach to batch process swiftly in SC + +s.options.numBuffers = 16384 //The method below for doing the analysus quickly needs lots of buffers +s.reboot + +//Step 0: Make a corpus + +//We'll jam together some random flucoma sounds for illustrative purposes +//Get some files +( +~audioexamples_path = File.realpath(FluidBufMelBands.class.filenameSymbol).dirname.withTrailingSlash +/+ "../AudioFiles/*.wav"; +~allTheSounds = SoundFile.collect(~audioexamples_path); +~testSounds = ~allTheSounds; +~testSounds.do{|f| f.path.postln}; // print out the files that are loaded +) + +//Load the files into individual buffers: +( +~audio_buffers = ~testSounds.collect{|f| + Buffer.readChannel( + server: s, + path:f.path, + channels:[0], + action:{("Loaded" + f.path).postln;} + ) +}; +) + +//Do a segmentation of each buffer, in parallel +( +fork{ + ~index_buffers = ~audio_buffers.collect{Buffer.new}; + s.sync; + ~count = ~audio_buffers.size; + ~audio_buffers.do{|src,i| + FluidBufOnsetSlice.process( + server:s, + source:src, + indices:~index_buffers[i], + metric: 9, + threshold:0.2, + minSliceLength: 17, + action:{ + (~testSounds[i].path ++ ":" + ~index_buffers[i].numFrames + "slices").postln; + ~count = ~count - 1; + if(~count == 0){"Done slicing".postln}; + } + ); + } +} +) + +// we now have an array of index buffers, one per source buffer, each containing the segmentation points as a frame positions +// this allows us to make an array of sizes +~index_buffers.collect{|b| b.numFrames}.sum + +//For each of these segments, let's make a datapoint using the mean melbands. +// There's a number of ways of skinning this cat w/r/t telling the server what to do, but here we want to minimize traffic between language and server, and also produce undertsandable code + +//First, we'll grab the onset points as language-side arrays, then scroll through each slice getting the mean melbands +( +// - a dataset to keep the mean melbands in +~mels = FluidDataSet(s); +// - a dictionary to keep the slice points in for later playback +~slices = Dictionary(); +//The code below (as well as needing lots of buffers), creates lots of threads and we need a big ass scheduling queue +~clock = TempoClock(queueSize:8192); +) + + +// Do the Mel analysis in a cunning parallel fashion +( +{ + var counter, remaining; + var condition = Condition.new; // used to create a test condition to pause the routine ... + var index_arrays = Dictionary(); + + "Process started. Please wait.".postln; + + ~total_slice_count = ~index_buffers.collect{|b| b.numFrames}.sum + ~index_buffers.size; //we get an extra slice in buffer + ~featurebuffers = ~total_slice_count.collect{Buffer.new}; // create a buffer per slice + + //Make our dictionary FluidDataSet-shaped + ~slices.put("cols",3);//[bufnum,start,end] for each slice + ~slices.put("data",Dictionary()); + + //Collect each set of onsets into a language side array and store them in a dict + ~index_buffers.do{|b,i| // iterate over the 4 buffers + { + b.loadToFloatArray( // load to language side array + action:{|indices| + //Glue the first and last samples of the buffer on to the index list, and place in dictionary wiht the + //Buffer object as a key + + index_arrays.put(~audio_buffers[i], Array.newFrom([0] ++ indices ++ (~audio_buffers[i].numFrames - 1))); + + if(i==(~index_buffers.size-1)) {condition.unhang}; + } + ) + }.fork(stackSize:~total_slice_count); + }; + condition.hang; //Pause until all the callbacks above have completed + "Arrays loaded. Starting on the analysis, please wait.".postln; + + //For each of these lists of points, we want to scroll over the indices in pairs and get some mel bands + counter = 0; + remaining = ~total_slice_count; + + s.sync; + + // now iterate over Dict and calc melbands + + index_arrays.keysValuesDo{|buffer, indices| + indices.doAdjacentPairs{|start,end,num| + var analysis = Routine({|counter| + + FluidBufMelBands.processBlocking( + server:s, + source:buffer, + startFrame:start, + numFrames:(end-1) - start, + features:~featurebuffers[counter], + action:{ + remaining = remaining - 1; + if(remaining == 0) { ~numMelBands = ~featurebuffers[0].numChannels;condition.unhang }; + } + ); + }); + + ~slices["data"].put(counter,[buffer.bufnum,start,end]); + + //I'm spawning new threads to wait for the analysis callback from the server. The final callback will un-hang this thread + analysis.value(counter); //Done differently to other blocks because I need to pass in the value of counter + counter = counter + 1; + } + }; + condition.hang; + "Analysis of % slices done.\n".postf(~total_slice_count); +}.fork(clock:~clock); +) + + +// Run stats on each mel buffer + +// create a stats buffer for each of the slices +~statsbuffers = ~total_slice_count.collect{Buffer.new}; // create n Slices buffers - to be filled with (40 mel bands * 7 stats) + +// run stats on all the buffers +( +{ + var remaining = ~total_slice_count; + ~featurebuffers.do{|buffer,i| + FluidBufStats.processBlocking( + server:s, + source:buffer, + stats:~statsbuffers[i], + action:{ + remaining = remaining - 1; + if(remaining == 0) { "done".postln}; + } + ); + }; +}.fork(clock:~clock); +) + +~featurebuffers.size + +//Flatten each stats buffer into a data point +~flatbuffers = ~total_slice_count.collect{Buffer.new};// create an array of flatten stats + +( +{ + var remaining = ~total_slice_count; + ~statsbuffers.do{|buffer,i| + FluidBufFlatten.processBlocking( + server:s, + source:buffer, + destination:~flatbuffers[i], + action:{ + remaining = remaining - 1; + if(remaining == 0) { "Got flat points".postln; }; + } + ); + }; +}.fork(clock:~clock); +) + + +//Ram each flat point into a data set. At this point we have more data than we need, but we'll prune in moment +( +"Filling dataset".postln; +~mels.clear; + +// ~flatbuffers = flatbuffers; +~flatbuffers.do{|buf,i| + ~mels.addPoint(i,buf); +}; + +~mels.print; +) + + +// Prune & standardise + +// Tidy up the temp arrays of buffers we do not need anymore + +( +"Cleaning".postln; +(~featurebuffers ++ ~statsbuffers ++ ~flatbuffers).do{|buf| buf.free}; +) + +//Above we sneakily made a dictionary of slice data for playback (bufnum,start,end). Let's throw it in a dataset +( +~slicedata = FluidDataSet(s); // will hold slice data (bufnum,start,end) for playback +) + +//dict -> dataset +( +~slicedata.load(~slices); +~slicedata.print; +) + +// Step 1. Let's prune and standardize before fitting to a tree +( +~meanmels = FluidDataSet(s);//will hold pruned mel data +~stdmels = FluidDataSet(s);//will standardised, pruned mel data +~standardizer = FluidStandardize(s); +~pruner = FluidDataSetQuery(s); +~tree = FluidKDTree(s,numNeighbours:10,lookupDataSet:~slicedata);//we have to supply the lookup data set when we make the tree (boo!) +) + +//Prune, standardize and fit KDTree +( +{ + ~meanmels.clear; + ~stdmels.clear; + ~pruner.addRange(0,~numMelBands).transform(~mels,~meanmels); //prune with a 'query' -- so this is dropping all but ~meanmels + ~standardizer.fitTransform(~meanmels,~stdmels); + ~tree.fit(~stdmels,{"KDTree ready".postln}); +}.fork(clock:~clock); +) + +~meanmels.print + +//Step 2: Set the FluidStandardizer and FluidKDTree up for listening +//set the buffers and busses needed +( +~stdInputPoint = Buffer.alloc(s,40); +~stdOutputPoint = Buffer.alloc(s,40); +~treeOutputPoint = Buffer.alloc(s,3 * 10);//numNeighbours x triples of bufnum,start,end +) + + +// let's play a random sound (to make sure we understand our data structure! +( +{ + var randPoint, buf, start, stop, dur; + + randPoint = ~slices["data"].keys.asArray.scramble[0]; // this good way of getting - but recast as strong + + buf= ~slices["data"][randPoint][0]; + start = ~slices["data"][randPoint][1]; + stop = ~slices["data"][randPoint][2]; + + dur = stop - start; + + BufRd.ar(1,buf, Line.ar(start,stop,dur/s.sampleRate, doneAction: 2), 0, 2); +}.play +) + + +// Query KD tree + +// a target sound from outside our dataset +~inBuf = Buffer.readChannel(s, Platform.resourceDir +/+ "sounds/a11wlk01.wav", numFrames:15000, channels:[0]); +~inBuf.play + +//OR one from within +~inBuf = Buffer.alloc(s,15000); +~randomSlice = ~slices["data"].keys.asArray.scramble[0]; +~audio_buffers[~slices["data"][~randomSlice][0]].copyData(~inBuf,srcStartAt: ~slices["data"][~randomSlice][1], numSamples: 15000.min(~slices["data"][~randomSlice][2] - (~slices["data"][~randomSlice][1]))); +~inBuf.play + +// now try getting a point, playing it, grabbing nearest neighbour and playing it ... + +( +~inBufMels = Buffer(s); +~inBufStats = Buffer(s); +~inBufFlat = Buffer(s); +~inBufComp = Buffer(s); +~inBufStand = Buffer(s); +) + +// FluidBuf Compose is buf version of dataSetQuery + +( +FluidBufMelBands.process(s, ~inBuf, features: ~inBufMels, action: { + FluidBufStats.process(s, ~inBufMels, stats:~inBufStats, action: { + FluidBufFlatten.process(s, ~inBufStats, ~inBufFlat, action: { + FluidBufCompose.process(s, ~inBufFlat, numFrames: ~numMelBands, destination: ~inBufComp, action: { + ~standardizer.transformPoint(~inBufComp, ~inBufStand, { + ~tree.kNearest(~inBufStand,{ |a|a.postln;~nearest = a;}) + }) + }) + }) + }) +}) +) + +// playback nearest in order +( +fork{ + ~nearest.do{|i| + var buf, start, stop, dur; + + buf= ~slices["data"][i.asInteger][0]; + start = ~slices["data"][i.asInteger][1]; + stop = ~slices["data"][i.asInteger][2]; + dur = (stop - start)/ s.sampleRate; + {BufRd.ar(1,buf, Line.ar(start,stop,dur, doneAction: 2), 0, 2);}.play; + + i.postln; + dur.wait; + }; +} +) + + + + +