TITLE:: FluidPCA summary:: Dimensionality Reduction with Principal Component Analysis categories:: Dimensionality Reduction, Data Processing related:: Classes/FluidMDS, Classes/FluidDataSet DESCRIPTION:: https://scikit-learn.org/stable/modules/decomposition.html#principal-component-analysis-pca CLASSMETHODS:: METHOD:: new Make a new instance ARGUMENT:: server The server on which to run this model INSTANCEMETHODS:: PRIVATE:: init METHOD:: fit Train this model on a link::Classes/FluidDataSet:: but don't transform the data ARGUMENT:: dataset A link::Classes/FluidDataSet:: to analyse ARGUMENT:: k The number of dimensions to reduce to ARGUMENT:: action Run when done METHOD:: transform Given a trained model, apply the reduction to a source link::Classes/FluidDataSet:: and write to a destination. Can be the same ARGUMENT:: sourceDataset Source data, or the dataset name ARGUMENT:: destDataset Destination data, or the dataset name ARGUMENT:: action Run when done METHOD:: fitTransform link::Classes/FluidPCA#fit:: and link::Classes/FluidPCA#transform:: in a single pass ARGUMENT:: sourceDataset Source data, or the dataset name ARGUMENT:: destDataset Destination data, or the dataset name ARGUMENT:: k The number of dimensions to reduce to ARGUMENT:: action Run when done METHOD:: transformPoint Given a trained model, transform the data point in a link::Classes/Buffer:: and write to an output ARGUMENT:: sourceBuffer Input data ARGUMENT:: destBuffer Output data ARGUMENT:: action Run when done EXAMPLES:: code:: s.boot; //Preliminaries: we want some audio, a couple of FluidDataSets, some Buffers, a FluidStandardize and a FluidPCA ( ~audiofile = File.realpath(FluidBufPitch.class.filenameSymbol).dirname +/+ "../AudioFiles/Tremblay-ASWINE-ScratchySynth-M.wav"; ~raw = FluidDataSet(s,\pca_help_12D); ~reduced = FluidDataSet(s,\pca_help_2D); ~audio = Buffer.read(s,~audiofile); ~mfcc_feature = Buffer.new(s); ~stats = Buffer.new(s); ~datapoint = Buffer.alloc(s,12); ~standardizer = FluidStandardize(s); ~pca = FluidPCA(s); ) // Do a mfcc analysis on the audio, which gives us 13 points, and we'll throw the 0th away // Divide the time series in to 100, and take the mean of each segment and add this as a point to // the 'raw' FluidDataSet ( ~raw.clear; ~norm.clear; FluidBufMFCC.process(s,~audio,features:~mfcc_feature,action:{ "MFCC analysis.complete. Doing stats".postln; fork{ var chunkLen = (~mfcc_feature.numFrames / 100).asInteger; 100.do{ |i| s.sync; FluidBufStats.process(s,~mfcc_feature,startFrame:i*chunkLen,numFrames:chunkLen,startChan:1, stats:~stats, action:{ ~stats.loadToFloatArray(action:{ |statsdata| [statsdata[0],statsdata[1]].postln; ~datapoint.setn(0,[statsdata[0],statsdata[1]]); s.sync; ("Adding point" ++ i).postln; ~raw.addPoint(i,~datapoint); }) }); if(i == 99) {"Analysis done, dataset ready".postln} } } }); ) //First standardize our dataset, so that the MFCC dimensions are on comensurate scales //Then apply the PCA in-place on the standardized data //Download the dataset contents into an array for plotting ( ~standardizer.fit(~raw); ~standardizer.transform(~raw, ~reduced); ~pca.fitTransform(~raw,~reduced,2); ~reducedarray= Array.new(100); fork{ 100.do{|i| ~reduced.getPoint(i,~datapoint,{ ~datapoint.loadToFloatArray(action:{|a| ~reducedarray.add(Array.newFrom(a))}) }); s.sync; if(i==99){"Data downloaded".postln}; } } ) //Visualise the 2D projection of our original 12D data ( d = ~reducedarray.flatten(1).unlace.deepCollect(1, { |x| x.normalize}); // d = [20.collect{1.0.rand}, 20.collect{1.0.rand}]; w = Window("scatter", Rect(128, 64, 200, 200)); w.drawFunc = { Pen.use { d[0].size.do{|i| var x = (d[0][i]*200); var y = (d[1][i]*200); var r = Rect(x,y,5,5); Pen.fillColor = Color.blue; Pen.fillOval(r); } } }; w.refresh; w.front; ) ::