From b58360d4d41665f46d0145a0d96af6d960bde137 Mon Sep 17 00:00:00 2001 From: Owen Green Date: Wed, 20 May 2020 12:09:18 +0100 Subject: [PATCH] FluidPCA and FluidMDS updates and help files --- release-packaging/Classes/FluidMDS.sc | 18 +- release-packaging/Classes/FluidPCA.sc | 17 +- .../HelpSource/Classes/FluidMDS.schelp | 139 +++++++++++++++ .../HelpSource/Classes/FluidPCA.schelp | 163 ++++++++++++++++++ 4 files changed, 322 insertions(+), 15 deletions(-) create mode 100644 release-packaging/HelpSource/Classes/FluidMDS.schelp create mode 100644 release-packaging/HelpSource/Classes/FluidPCA.schelp diff --git a/release-packaging/Classes/FluidMDS.sc b/release-packaging/Classes/FluidMDS.sc index 9912b61..57139d3 100644 --- a/release-packaging/Classes/FluidMDS.sc +++ b/release-packaging/Classes/FluidMDS.sc @@ -1,17 +1,23 @@ FluidMDS : FluidManipulationClient { - var id; + classvar < manhattan = 0; + classvar < euclidean = 1; + classvar < sqeuclidean = 2; + classvar < max = 3; + classvar < min = 4; + classvar < kl = 5; + classvar < cosine = 5; - *new {|server| + *new {|server| var uid = UniqueID.next; - ^super.new(server,uid).init(uid); + ^super.new(server,uid)!?{|inst|inst.init(uid);inst} } init {|uid| id = uid; } - fitTransform{|sourceDataset, k, dist, destDataset, action| - this.pr_sendMsg(\fitTransform,[sourceDataset.asString, k, dist, destDataset.asString],action); - } + fitTransform{|sourceDataset, destDataset, k, dist, action| + this.prSendMsg(\fitTransform,[sourceDataset.asSymbol, destDataset.asSymbol, k, dist],action); + } } diff --git a/release-packaging/Classes/FluidPCA.sc b/release-packaging/Classes/FluidPCA.sc index ef7cf7a..29c4be8 100644 --- a/release-packaging/Classes/FluidPCA.sc +++ b/release-packaging/Classes/FluidPCA.sc @@ -11,15 +11,15 @@ FluidPCA : FluidManipulationClient { } fit{|dataset, k, action| - this.prSendMsg(\fit,[dataset.asString, k],action); + this.prSendMsg(\fit,[dataset.asSymbol, k],action); } transform{|sourceDataset, destDataset, action| - this.prSendMsg(\transform,[sourceDataset.asString, destDataset.asString],action); + this.prSendMsg(\transform,[sourceDataset.asSymbol, destDataset.asSymbol],action); } - fitTransform{|sourceDataset, k, destDataset, action| - this.prSendMsg(\fitTransform,[sourceDataset.asString,k, destDataset.asString],action); + fitTransform{|sourceDataset, destDataset, k, action| + this.prSendMsg(\fitTransform,[sourceDataset.asSymbol, destDataset.asSymbol, k],action); } @@ -28,11 +28,10 @@ FluidPCA : FluidManipulationClient { } cols {|action| - this.prSendMsg(\cols,[],action,[numbers(FluidMessageResponse,_,1,_)]); - } - rows {|action| - this.prSendMsg(\rows,[],action,[numbers(FluidMessageResponse,_,1,_)]); + action ?? {action = postit}; + + this.prSendMsg(\cols,[],action,[numbers(FluidMessageResponse,_,1,_)]); } read{|filename,action| @@ -43,4 +42,4 @@ FluidPCA : FluidManipulationClient { this.prSendMsg(\write,[filename],action); } -} +} diff --git a/release-packaging/HelpSource/Classes/FluidMDS.schelp b/release-packaging/HelpSource/Classes/FluidMDS.schelp new file mode 100644 index 0000000..d69d282 --- /dev/null +++ b/release-packaging/HelpSource/Classes/FluidMDS.schelp @@ -0,0 +1,139 @@ +TITLE:: FluidMDS +summary:: Dimensionality Reduction with Multidimensional Scaling +categories:: Dimensionality Reduction, Data Processing +related:: Classes/FluidMDS, Classes/FluidDataSet + +DESCRIPTION:: + +https://scikit-learn.org/stable/modules/manifold.html#multi-dimensional-scaling-mds + + +CLASSMETHODS:: + + +METHOD:: new +Make a new instance +ARGUMENT:: server +The server on which to run this model + +METHOD:: euclidean +Euclidean distance (default) + +METHOD:: sqeuclidean +Squared Euclidean distance + +METHOD:: manhattan +Manhattan distance + +METHOD:: max +Minowski max + +METHOD:: min +Minowski max + +METHOD:: kl +Symmetric Kulback Leiber divergance (only makes sense with non-negative data) + +METHOD:: cosine +Cosine distance + +INSTANCEMETHODS:: + +PRIVATE:: init + +METHOD:: fitTransform +Fit the model to a link::Classes/FluidDataSet:: and write the new projected data to a destination FluidDataSet. +ARGUMENT:: sourceDataset +Source data, or the dataset name +ARGUMENT:: destDataset +Destination data, or the dataset name +ARGUMENT:: k +The number of dimensions to reduce to +ARGUMENT:: dist +The distance metric to use (integer, 0-6, see flags above) +ARGUMENT:: action +Run when done + +EXAMPLES:: + +code:: +//Preliminaries: we want some audio, a couple of FluidDataSets, some Buffers, a FluidStandardize and a FluidMDS +( +~audiofile = File.realpath(FluidBufPitch.class.filenameSymbol).dirname +/+ "../AudioFiles/Tremblay-ASWINE-ScratchySynth-M.wav"; +~raw = FluidDataSet(s,\mds_help_12D); +~reduced = FluidDataSet(s,\mds_help_2D); +~audio = Buffer.read(s,~audiofile); +~mfcc_feature = Buffer.new(s); +~stats = Buffer.new(s); +~datapoint = Buffer.alloc(s,12); +~standardizer = FluidStandardize(s); +~mds = FluidMDS(s); +) + +// Do a mfcc analysis on the audio, which gives us 13 points, and we'll throw the 0th away +// Divide the time series in to 100, and take the mean of each segment and add this as a point to +// the 'raw' FluidDataSet +( +~raw.clear; +~norm.clear; +FluidBufMFCC.process(s,~audio,features:~mfcc_feature,action:{ + "MFCC analysis.complete. Doing stats".postln; + fork{ + var chunkLen = (~mfcc_feature.numFrames / 100).asInteger; + 100.do{ |i| + s.sync; FluidBufStats.process(s,~mfcc_feature,startFrame:i*chunkLen,numFrames:chunkLen,startChan:1, stats:~stats, action:{ + ~stats.loadToFloatArray(action:{ |statsdata| + [statsdata[0],statsdata[1]].postln; + ~datapoint.setn(0,[statsdata[0],statsdata[1]]); + s.sync; + ("Adding point" ++ i).postln; + ~raw.addPoint(i,~datapoint); + }) + }); + if(i == 99) {"Analysis done, dataset ready".postln} + } + } +}); +) + +//First standardize our dataset, so that the MFCC dimensions are on comensurate scales +//Then apply the MDS in-place on the standardized data to get 2 dimensions, using a Euclidean distance metric +//Download the dataset contents into an array for plotting +( +~standardizer.fit(~raw); +~standardizer.transform(~raw, ~reduced); +~mds.fitTransform(~raw,~reduced,2, FluidMDS.euclidean); +~reducedarray= Array.new(100); +fork{ + 100.do{|i| + ~reduced.getPoint(i,~datapoint,{ + + ~datapoint.loadToFloatArray(action:{|a| ~reducedarray.add(Array.newFrom(a))}) + }); + s.sync; + if(i==99){"Data downloaded".postln}; + } +} +) + +//Visualise the 2D projection of our original 12D data +( +d = ~reducedarray.flatten(1).unlace.deepCollect(1, { |x| x.normalize}); +// d = [20.collect{1.0.rand}, 20.collect{1.0.rand}]; +w = Window("scatter", Rect(128, 64, 200, 200)); +w.drawFunc = { + Pen.use { + d[0].size.do{|i| + var x = (d[0][i]*200); + var y = (d[1][i]*200); + var r = Rect(x,y,5,5); + Pen.fillColor = Color.blue; + Pen.fillOval(r); + } + } +}; +w.refresh; +w.front; +) + +:: diff --git a/release-packaging/HelpSource/Classes/FluidPCA.schelp b/release-packaging/HelpSource/Classes/FluidPCA.schelp new file mode 100644 index 0000000..d21fe7f --- /dev/null +++ b/release-packaging/HelpSource/Classes/FluidPCA.schelp @@ -0,0 +1,163 @@ +TITLE:: FluidPCA +summary:: Dimensionality Reduction with Principal Component Analysis +categories:: Dimensionality Reduction, Data Processing +related:: Classes/FluidMDS, Classes/FluidDataSet + +DESCRIPTION:: + +https://scikit-learn.org/stable/modules/decomposition.html#principal-component-analysis-pca + +CLASSMETHODS:: + +METHOD:: new +Make a new instance +ARGUMENT:: server +The server on which to run this model + +INSTANCEMETHODS:: + +PRIVATE:: init + +METHOD:: fit +Train this model on a link::Classes/FluidDataSet:: but don't transform the data +ARGUMENT:: dataset +A link::Classes/FluidDataSet:: to analyse +ARGUMENT:: k +The number of dimensions to reduce to +ARGUMENT:: action +Run when done + +METHOD:: transform +Given a trained model, apply the reduction to a source link::Classes/FluidDataSet:: and write to a destination. Can be the same +ARGUMENT:: sourceDataset +Source data, or the dataset name +ARGUMENT:: destDataset +Destination data, or the dataset name +ARGUMENT:: action +Run when done + +METHOD:: fitTransform +link::Classes/FluidPCA#fit:: and link::Classes/FluidPCA#transform:: in a single pass +ARGUMENT:: sourceDataset +Source data, or the dataset name +ARGUMENT:: destDataset +Destination data, or the dataset name +ARGUMENT:: k +The number of dimensions to reduce to +ARGUMENT:: action +Run when done + +METHOD:: transformPoint +Given a trained model, transform the data point in a link::Classes/Buffer:: and write to an output +ARGUMENT:: sourceBuffer +Input data +ARGUMENT:: destBuffer +Output data +ARGUMENT:: action +Run when done + +METHOD:: cols +Return the dimensionaliy of the data the model was trained on +ARGUMENT:: action +Run when done, taking the number of columns as an argument. If nil, defaults to posting to window + +METHOD:: read +Read a data set from a JSON file on disk +ARGUMENT:: filename +The absolute path of the JSON file to read +ARGUMENT:: action +A function to run when the file has been read +​ +METHOD:: write +Write the data set to disk as a JSON file. +ARGUMENT:: filename +Absolute path for the new file +ARGUMENT:: action +A function to run when the file has been written + +EXAMPLES:: + +code:: + +s.boot; +//Preliminaries: we want some audio, a couple of FluidDataSets, some Buffers, a FluidStandardize and a FluidPCA +( +~audiofile = File.realpath(FluidBufPitch.class.filenameSymbol).dirname +/+ "../AudioFiles/Tremblay-ASWINE-ScratchySynth-M.wav"; +~raw = FluidDataSet(s,\pca_help_12D); +~reduced = FluidDataSet(s,\pca_help_2D); +~audio = Buffer.read(s,~audiofile); +~mfcc_feature = Buffer.new(s); +~stats = Buffer.new(s); +~datapoint = Buffer.alloc(s,12); +~standardizer = FluidStandardize(s); +~pca = FluidPCA(s); +) + +// Do a mfcc analysis on the audio, which gives us 13 points, and we'll throw the 0th away +// Divide the time series in to 100, and take the mean of each segment and add this as a point to +// the 'raw' FluidDataSet +( +~raw.clear; +~norm.clear; +FluidBufMFCC.process(s,~audio,features:~mfcc_feature,action:{ + "MFCC analysis.complete. Doing stats".postln; + fork{ + var chunkLen = (~mfcc_feature.numFrames / 100).asInteger; + 100.do{ |i| + s.sync; FluidBufStats.process(s,~mfcc_feature,startFrame:i*chunkLen,numFrames:chunkLen,startChan:1, stats:~stats, action:{ + ~stats.loadToFloatArray(action:{ |statsdata| + [statsdata[0],statsdata[1]].postln; + ~datapoint.setn(0,[statsdata[0],statsdata[1]]); + s.sync; + ("Adding point" ++ i).postln; + ~raw.addPoint(i,~datapoint); + }) + }); + if(i == 99) {"Analysis done, dataset ready".postln} + } + } +}); +) + +//First standardize our dataset, so that the MFCC dimensions are on comensurate scales +//Then apply the PCA in-place on the standardized data +//Download the dataset contents into an array for plotting +( +~standardizer.fit(~raw); +~standardizer.transform(~raw, ~reduced); +~pca.fitTransform(~raw,~reduced,2); +~reducedarray= Array.new(100); +fork{ + 100.do{|i| + ~reduced.getPoint(i,~datapoint,{ + + ~datapoint.loadToFloatArray(action:{|a| ~reducedarray.add(Array.newFrom(a))}) + }); + s.sync; + if(i==99){"Data downloaded".postln}; + } +} +) + + +//Visualise the 2D projection of our original 12D data +( +d = ~reducedarray.flatten(1).unlace.deepCollect(1, { |x| x.normalize}); +// d = [20.collect{1.0.rand}, 20.collect{1.0.rand}]; +w = Window("scatter", Rect(128, 64, 200, 200)); +w.drawFunc = { + Pen.use { + d[0].size.do{|i| + var x = (d[0][i]*200); + var y = (d[1][i]*200); + var r = Rect(x,y,5,5); + Pen.fillColor = Color.blue; + Pen.fillOval(r); + } + } +}; +w.refresh; +w.front; +) + +::