You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

164 lines
4.6 KiB
Plaintext

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

TITLE:: FluidPCA
summary:: Dimensionality Reduction with Principal Component Analysis
categories:: Dimensionality Reduction, Data Processing
related:: Classes/FluidMDS, Classes/FluidDataSet
DESCRIPTION::
https://scikit-learn.org/stable/modules/decomposition.html#principal-component-analysis-pca
CLASSMETHODS::
METHOD:: new
Make a new instance
ARGUMENT:: server
The server on which to run this model
INSTANCEMETHODS::
PRIVATE:: init
METHOD:: fit
Train this model on a link::Classes/FluidDataSet:: but don't transform the data
ARGUMENT:: dataset
A link::Classes/FluidDataSet:: to analyse
ARGUMENT:: k
The number of dimensions to reduce to
ARGUMENT:: action
Run when done
METHOD:: transform
Given a trained model, apply the reduction to a source link::Classes/FluidDataSet:: and write to a destination. Can be the same
ARGUMENT:: sourceDataset
Source data, or the dataset name
ARGUMENT:: destDataset
Destination data, or the dataset name
ARGUMENT:: action
Run when done
METHOD:: fitTransform
link::Classes/FluidPCA#fit:: and link::Classes/FluidPCA#transform:: in a single pass
ARGUMENT:: sourceDataset
Source data, or the dataset name
ARGUMENT:: destDataset
Destination data, or the dataset name
ARGUMENT:: k
The number of dimensions to reduce to
ARGUMENT:: action
Run when done
METHOD:: transformPoint
Given a trained model, transform the data point in a link::Classes/Buffer:: and write to an output
ARGUMENT:: sourceBuffer
Input data
ARGUMENT:: destBuffer
Output data
ARGUMENT:: action
Run when done
METHOD:: cols
Return the dimensionaliy of the data the model was trained on
ARGUMENT:: action
Run when done, taking the number of columns as an argument. If nil, defaults to posting to window
METHOD:: read
Read a data set from a JSON file on disk
ARGUMENT:: filename
The absolute path of the JSON file to read
ARGUMENT:: action
A function to run when the file has been read
METHOD:: write
Write the data set to disk as a JSON file.
ARGUMENT:: filename
Absolute path for the new file
ARGUMENT:: action
A function to run when the file has been written
EXAMPLES::
code::
s.boot;
//Preliminaries: we want some audio, a couple of FluidDataSets, some Buffers, a FluidStandardize and a FluidPCA
(
~audiofile = File.realpath(FluidBufPitch.class.filenameSymbol).dirname +/+ "../AudioFiles/Tremblay-ASWINE-ScratchySynth-M.wav";
~raw = FluidDataSet(s,\pca_help_12D);
~reduced = FluidDataSet(s,\pca_help_2D);
~audio = Buffer.read(s,~audiofile);
~mfcc_feature = Buffer.new(s);
~stats = Buffer.new(s);
~datapoint = Buffer.alloc(s,12);
~standardizer = FluidStandardize(s);
~pca = FluidPCA(s);
)
// Do a mfcc analysis on the audio, which gives us 13 points, and we'll throw the 0th away
// Divide the time series in to 100, and take the mean of each segment and add this as a point to
// the 'raw' FluidDataSet
(
~raw.clear;
~norm.clear;
FluidBufMFCC.process(s,~audio,features:~mfcc_feature,action:{
"MFCC analysis.complete. Doing stats".postln;
fork{
var chunkLen = (~mfcc_feature.numFrames / 100).asInteger;
100.do{ |i|
s.sync; FluidBufStats.process(s,~mfcc_feature,startFrame:i*chunkLen,numFrames:chunkLen,startChan:1, stats:~stats, action:{
~stats.loadToFloatArray(action:{ |statsdata|
[statsdata[0],statsdata[1]].postln;
~datapoint.setn(0,[statsdata[0],statsdata[1]]);
s.sync;
("Adding point" ++ i).postln;
~raw.addPoint(i,~datapoint);
})
});
if(i == 99) {"Analysis done, dataset ready".postln}
}
}
});
)
//First standardize our dataset, so that the MFCC dimensions are on comensurate scales
//Then apply the PCA in-place on the standardized data
//Download the dataset contents into an array for plotting
(
~standardizer.fit(~raw);
~standardizer.transform(~raw, ~reduced);
~pca.fitTransform(~raw,~reduced,2);
~reducedarray= Array.new(100);
fork{
100.do{|i|
~reduced.getPoint(i,~datapoint,{
~datapoint.loadToFloatArray(action:{|a| ~reducedarray.add(Array.newFrom(a))})
});
s.sync;
if(i==99){"Data downloaded".postln};
}
}
)
//Visualise the 2D projection of our original 12D data
(
d = ~reducedarray.flatten(1).unlace.deepCollect(1, { |x| x.normalize});
// d = [20.collect{1.0.rand}, 20.collect{1.0.rand}];
w = Window("scatter", Rect(128, 64, 200, 200));
w.drawFunc = {
Pen.use {
d[0].size.do{|i|
var x = (d[0][i]*200);
var y = (d[1][i]*200);
var r = Rect(x,y,5,5);
Pen.fillColor = Color.blue;
Pen.fillOval(r);
}
}
};
w.refresh;
w.front;
)
::