FluidPCA and FluidMDS updates and help files

nix
Owen Green 6 years ago
parent a109652612
commit b58360d4d4

@ -1,17 +1,23 @@
FluidMDS : FluidManipulationClient {
var id;
classvar < manhattan = 0;
classvar < euclidean = 1;
classvar < sqeuclidean = 2;
classvar < max = 3;
classvar < min = 4;
classvar < kl = 5;
classvar < cosine = 5;
*new {|server|
*new {|server|
var uid = UniqueID.next;
^super.new(server,uid).init(uid);
^super.new(server,uid)!?{|inst|inst.init(uid);inst}
}
init {|uid|
id = uid;
}
fitTransform{|sourceDataset, k, dist, destDataset, action|
this.pr_sendMsg(\fitTransform,[sourceDataset.asString, k, dist, destDataset.asString],action);
}
fitTransform{|sourceDataset, destDataset, k, dist, action|
this.prSendMsg(\fitTransform,[sourceDataset.asSymbol, destDataset.asSymbol, k, dist],action);
}
}

@ -11,15 +11,15 @@ FluidPCA : FluidManipulationClient {
}
fit{|dataset, k, action|
this.prSendMsg(\fit,[dataset.asString, k],action);
this.prSendMsg(\fit,[dataset.asSymbol, k],action);
}
transform{|sourceDataset, destDataset, action|
this.prSendMsg(\transform,[sourceDataset.asString, destDataset.asString],action);
this.prSendMsg(\transform,[sourceDataset.asSymbol, destDataset.asSymbol],action);
}
fitTransform{|sourceDataset, k, destDataset, action|
this.prSendMsg(\fitTransform,[sourceDataset.asString,k, destDataset.asString],action);
fitTransform{|sourceDataset, destDataset, k, action|
this.prSendMsg(\fitTransform,[sourceDataset.asSymbol, destDataset.asSymbol, k],action);
}
@ -28,11 +28,10 @@ FluidPCA : FluidManipulationClient {
}
cols {|action|
this.prSendMsg(\cols,[],action,[numbers(FluidMessageResponse,_,1,_)]);
}
rows {|action|
this.prSendMsg(\rows,[],action,[numbers(FluidMessageResponse,_,1,_)]);
action ?? {action = postit};
this.prSendMsg(\cols,[],action,[numbers(FluidMessageResponse,_,1,_)]);
}
read{|filename,action|
@ -43,4 +42,4 @@ FluidPCA : FluidManipulationClient {
this.prSendMsg(\write,[filename],action);
}
}
}

@ -0,0 +1,139 @@
TITLE:: FluidMDS
summary:: Dimensionality Reduction with Multidimensional Scaling
categories:: Dimensionality Reduction, Data Processing
related:: Classes/FluidMDS, Classes/FluidDataSet
DESCRIPTION::
https://scikit-learn.org/stable/modules/manifold.html#multi-dimensional-scaling-mds
CLASSMETHODS::
METHOD:: new
Make a new instance
ARGUMENT:: server
The server on which to run this model
METHOD:: euclidean
Euclidean distance (default)
METHOD:: sqeuclidean
Squared Euclidean distance
METHOD:: manhattan
Manhattan distance
METHOD:: max
Minowski max
METHOD:: min
Minowski max
METHOD:: kl
Symmetric Kulback Leiber divergance (only makes sense with non-negative data)
METHOD:: cosine
Cosine distance
INSTANCEMETHODS::
PRIVATE:: init
METHOD:: fitTransform
Fit the model to a link::Classes/FluidDataSet:: and write the new projected data to a destination FluidDataSet.
ARGUMENT:: sourceDataset
Source data, or the dataset name
ARGUMENT:: destDataset
Destination data, or the dataset name
ARGUMENT:: k
The number of dimensions to reduce to
ARGUMENT:: dist
The distance metric to use (integer, 0-6, see flags above)
ARGUMENT:: action
Run when done
EXAMPLES::
code::
//Preliminaries: we want some audio, a couple of FluidDataSets, some Buffers, a FluidStandardize and a FluidMDS
(
~audiofile = File.realpath(FluidBufPitch.class.filenameSymbol).dirname +/+ "../AudioFiles/Tremblay-ASWINE-ScratchySynth-M.wav";
~raw = FluidDataSet(s,\mds_help_12D);
~reduced = FluidDataSet(s,\mds_help_2D);
~audio = Buffer.read(s,~audiofile);
~mfcc_feature = Buffer.new(s);
~stats = Buffer.new(s);
~datapoint = Buffer.alloc(s,12);
~standardizer = FluidStandardize(s);
~mds = FluidMDS(s);
)
// Do a mfcc analysis on the audio, which gives us 13 points, and we'll throw the 0th away
// Divide the time series in to 100, and take the mean of each segment and add this as a point to
// the 'raw' FluidDataSet
(
~raw.clear;
~norm.clear;
FluidBufMFCC.process(s,~audio,features:~mfcc_feature,action:{
"MFCC analysis.complete. Doing stats".postln;
fork{
var chunkLen = (~mfcc_feature.numFrames / 100).asInteger;
100.do{ |i|
s.sync; FluidBufStats.process(s,~mfcc_feature,startFrame:i*chunkLen,numFrames:chunkLen,startChan:1, stats:~stats, action:{
~stats.loadToFloatArray(action:{ |statsdata|
[statsdata[0],statsdata[1]].postln;
~datapoint.setn(0,[statsdata[0],statsdata[1]]);
s.sync;
("Adding point" ++ i).postln;
~raw.addPoint(i,~datapoint);
})
});
if(i == 99) {"Analysis done, dataset ready".postln}
}
}
});
)
//First standardize our dataset, so that the MFCC dimensions are on comensurate scales
//Then apply the MDS in-place on the standardized data to get 2 dimensions, using a Euclidean distance metric
//Download the dataset contents into an array for plotting
(
~standardizer.fit(~raw);
~standardizer.transform(~raw, ~reduced);
~mds.fitTransform(~raw,~reduced,2, FluidMDS.euclidean);
~reducedarray= Array.new(100);
fork{
100.do{|i|
~reduced.getPoint(i,~datapoint,{
~datapoint.loadToFloatArray(action:{|a| ~reducedarray.add(Array.newFrom(a))})
});
s.sync;
if(i==99){"Data downloaded".postln};
}
}
)
//Visualise the 2D projection of our original 12D data
(
d = ~reducedarray.flatten(1).unlace.deepCollect(1, { |x| x.normalize});
// d = [20.collect{1.0.rand}, 20.collect{1.0.rand}];
w = Window("scatter", Rect(128, 64, 200, 200));
w.drawFunc = {
Pen.use {
d[0].size.do{|i|
var x = (d[0][i]*200);
var y = (d[1][i]*200);
var r = Rect(x,y,5,5);
Pen.fillColor = Color.blue;
Pen.fillOval(r);
}
}
};
w.refresh;
w.front;
)
::

@ -0,0 +1,163 @@
TITLE:: FluidPCA
summary:: Dimensionality Reduction with Principal Component Analysis
categories:: Dimensionality Reduction, Data Processing
related:: Classes/FluidMDS, Classes/FluidDataSet
DESCRIPTION::
https://scikit-learn.org/stable/modules/decomposition.html#principal-component-analysis-pca
CLASSMETHODS::
METHOD:: new
Make a new instance
ARGUMENT:: server
The server on which to run this model
INSTANCEMETHODS::
PRIVATE:: init
METHOD:: fit
Train this model on a link::Classes/FluidDataSet:: but don't transform the data
ARGUMENT:: dataset
A link::Classes/FluidDataSet:: to analyse
ARGUMENT:: k
The number of dimensions to reduce to
ARGUMENT:: action
Run when done
METHOD:: transform
Given a trained model, apply the reduction to a source link::Classes/FluidDataSet:: and write to a destination. Can be the same
ARGUMENT:: sourceDataset
Source data, or the dataset name
ARGUMENT:: destDataset
Destination data, or the dataset name
ARGUMENT:: action
Run when done
METHOD:: fitTransform
link::Classes/FluidPCA#fit:: and link::Classes/FluidPCA#transform:: in a single pass
ARGUMENT:: sourceDataset
Source data, or the dataset name
ARGUMENT:: destDataset
Destination data, or the dataset name
ARGUMENT:: k
The number of dimensions to reduce to
ARGUMENT:: action
Run when done
METHOD:: transformPoint
Given a trained model, transform the data point in a link::Classes/Buffer:: and write to an output
ARGUMENT:: sourceBuffer
Input data
ARGUMENT:: destBuffer
Output data
ARGUMENT:: action
Run when done
METHOD:: cols
Return the dimensionaliy of the data the model was trained on
ARGUMENT:: action
Run when done, taking the number of columns as an argument. If nil, defaults to posting to window
METHOD:: read
Read a data set from a JSON file on disk
ARGUMENT:: filename
The absolute path of the JSON file to read
ARGUMENT:: action
A function to run when the file has been read
METHOD:: write
Write the data set to disk as a JSON file.
ARGUMENT:: filename
Absolute path for the new file
ARGUMENT:: action
A function to run when the file has been written
EXAMPLES::
code::
s.boot;
//Preliminaries: we want some audio, a couple of FluidDataSets, some Buffers, a FluidStandardize and a FluidPCA
(
~audiofile = File.realpath(FluidBufPitch.class.filenameSymbol).dirname +/+ "../AudioFiles/Tremblay-ASWINE-ScratchySynth-M.wav";
~raw = FluidDataSet(s,\pca_help_12D);
~reduced = FluidDataSet(s,\pca_help_2D);
~audio = Buffer.read(s,~audiofile);
~mfcc_feature = Buffer.new(s);
~stats = Buffer.new(s);
~datapoint = Buffer.alloc(s,12);
~standardizer = FluidStandardize(s);
~pca = FluidPCA(s);
)
// Do a mfcc analysis on the audio, which gives us 13 points, and we'll throw the 0th away
// Divide the time series in to 100, and take the mean of each segment and add this as a point to
// the 'raw' FluidDataSet
(
~raw.clear;
~norm.clear;
FluidBufMFCC.process(s,~audio,features:~mfcc_feature,action:{
"MFCC analysis.complete. Doing stats".postln;
fork{
var chunkLen = (~mfcc_feature.numFrames / 100).asInteger;
100.do{ |i|
s.sync; FluidBufStats.process(s,~mfcc_feature,startFrame:i*chunkLen,numFrames:chunkLen,startChan:1, stats:~stats, action:{
~stats.loadToFloatArray(action:{ |statsdata|
[statsdata[0],statsdata[1]].postln;
~datapoint.setn(0,[statsdata[0],statsdata[1]]);
s.sync;
("Adding point" ++ i).postln;
~raw.addPoint(i,~datapoint);
})
});
if(i == 99) {"Analysis done, dataset ready".postln}
}
}
});
)
//First standardize our dataset, so that the MFCC dimensions are on comensurate scales
//Then apply the PCA in-place on the standardized data
//Download the dataset contents into an array for plotting
(
~standardizer.fit(~raw);
~standardizer.transform(~raw, ~reduced);
~pca.fitTransform(~raw,~reduced,2);
~reducedarray= Array.new(100);
fork{
100.do{|i|
~reduced.getPoint(i,~datapoint,{
~datapoint.loadToFloatArray(action:{|a| ~reducedarray.add(Array.newFrom(a))})
});
s.sync;
if(i==99){"Data downloaded".postln};
}
}
)
//Visualise the 2D projection of our original 12D data
(
d = ~reducedarray.flatten(1).unlace.deepCollect(1, { |x| x.normalize});
// d = [20.collect{1.0.rand}, 20.collect{1.0.rand}];
w = Window("scatter", Rect(128, 64, 200, 200));
w.drawFunc = {
Pen.use {
d[0].size.do{|i|
var x = (d[0][i]*200);
var y = (d[1][i]*200);
var r = Rect(x,y,5,5);
Pen.fillColor = Color.blue;
Pen.fillOval(r);
}
}
};
w.refresh;
w.front;
)
::
Loading…
Cancel
Save