You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
190 lines
5.7 KiB
Plaintext
190 lines
5.7 KiB
Plaintext
TITLE:: FluidKMeans
|
|
summary:: Cluster data points with K-Means
|
|
categories:: FluidManipulation
|
|
related:: Classes/FluidDataSet, Classes/FluidLabelSet, Classes/FluidKNNClassifier, Classes/FluidKNNRegressor
|
|
|
|
DESCRIPTION::
|
|
Uses the K-Means algorithm to learn clusters from a link::Classes/FluidDataSet::
|
|
|
|
https://scikit-learn.org/stable/tutorial/statistical_inference/unsupervised_learning.html#clustering-grouping-observations-together
|
|
|
|
CLASSMETHODS::
|
|
|
|
METHOD:: new
|
|
Construct a new K Means model on the passed server. The parameters code::numClusters:: and code::maxIter:: can be modulated on an existing instance.
|
|
ARGUMENT:: server
|
|
If nil will use Server.default.
|
|
ARGUMENT:: numClusters
|
|
The number of clusters to classify data into.
|
|
ARGUMENT:: maxIter
|
|
The maximum number of iterations the algorithm will use whilst fitting.
|
|
|
|
|
|
INSTANCEMETHODS::
|
|
|
|
PRIVATE::k
|
|
|
|
METHOD:: fit
|
|
Identify code::k:: clusters in a link::Classes/FluidDataSet::
|
|
ARGUMENT:: dataSet
|
|
A link::Classes/FluidDataSet:: of data points.
|
|
ARGUMENT:: action
|
|
A function to run when fitting is complete, taking as its argument an array with the number of data points for each cluster.
|
|
|
|
METHOD:: predict
|
|
Given a trained object, return the cluster ID for each data point in a DataSet to a label set.
|
|
ARGUMENT:: dataSet
|
|
a link::Classes/FluidDataSet:: containing the data to predict.
|
|
ARGUMENT:: labelSet
|
|
a link::Classes/FluidLabelSet:: to retrieve the predicted clusters.
|
|
ARGUMENT:: action
|
|
A function to run when the server responds.
|
|
|
|
METHOD:: fitPredict
|
|
Run link::Classes/FluidKMeans#*fit:: and link::Classes/FluidKMeans#*predict:: in a single pass: i.e. train the model on the incoming link::Classes/FluidDataSet:: and then return the learned clustering to the passed link::Classes/FluidLabelSet::
|
|
ARGUMENT:: dataSet
|
|
a link::Classes/FluidDataSet:: containing the data to fit and predict.
|
|
ARGUMENT:: labelSet
|
|
a link::Classes/FluidLabelSet:: to retrieve the predicted clusters.
|
|
ARGUMENT:: action
|
|
A function to run when the server responds
|
|
|
|
METHOD:: predictPoint
|
|
Given a trained object, return the cluster ID for a data point in a link::Classes/Buffer::
|
|
ARGUMENT:: buffer
|
|
a link::Classes/Buffer:: containing a data point.
|
|
ARGUMENT:: action
|
|
A function to run when the server responds, taking the ID of the cluster as its argument.
|
|
|
|
METHOD:: predict
|
|
Report cluster assignments for previously unseen data.
|
|
ARGUMENT:: dataSet
|
|
A link::Classes/FluidDataSet:: of data points.
|
|
ARGUMENT:: labelSet
|
|
A link::Classes/FluidLabelSet:: to contain assignments.
|
|
ARGUMENT:: action
|
|
A function to run when complete, taking an array of the counts for each category as its argument.
|
|
|
|
|
|
EXAMPLES::
|
|
Server.default.options.outDevice = "Built-in Output"
|
|
code::
|
|
|
|
(
|
|
//Make some clumped 2D points and place into a DataSet
|
|
~points = (4.collect{
|
|
64.collect{(1.sum3rand) + [1,-1].choose}.clump(2)
|
|
}).flatten(1) * 0.5;
|
|
fork{
|
|
~dataSet = FluidDataSet.new(s,\kmeans_help_rand2d);
|
|
d = Dictionary.with(
|
|
*[\cols -> 2,\data -> Dictionary.newFrom(
|
|
~points.collect{|x, i| [i, x]}.flatten)]);
|
|
s.sync;
|
|
~dataSet.load(d, {~dataSet.print});
|
|
}
|
|
)
|
|
|
|
|
|
// Create a KMeans instance and a LabelSet for the cluster labels in the server
|
|
~clusters = FluidLabelSet(s);
|
|
~kmeans = FluidKMeans(s);
|
|
|
|
// Fit into 4 clusters
|
|
(
|
|
~kmeans.fitPredict(~dataSet,~clusters,action: {|c|
|
|
"Fitted.\n # Points in each cluster:".postln;
|
|
c.do{|x,i|
|
|
("Cluster" + i + "->" + x.asInteger + "points").postln;
|
|
}
|
|
});
|
|
)
|
|
|
|
// Cols of kmeans should match DataSet, size is the number of clusters
|
|
|
|
~kmeans.cols;
|
|
~kmeans.size;
|
|
~kmeans.dump;
|
|
|
|
~clusters.getLabel(0,{|clusterID|
|
|
(0.asString+clusterID).postln;
|
|
});
|
|
|
|
// Retrieve labels of clustered points
|
|
(
|
|
~assignments = Array.new(128);
|
|
fork{
|
|
128.do{ |i|
|
|
~clusters.getLabel(i,{|clusterID|
|
|
(i.asString+clusterID).postln;
|
|
~assignments.add(clusterID)
|
|
});
|
|
s.sync;
|
|
}
|
|
}
|
|
)
|
|
|
|
//or faster by sorting the IDs
|
|
~clusters.dump{|x|~assignments = x.at("data").atAll(x.at("data").keys.asArray.sort{|a,b|a.asInteger < b.asInteger}).flatten.postln;}
|
|
|
|
//Visualise: we're hoping to see colours neatly mapped to quandrants...
|
|
(
|
|
d = ((~points + 1) * 0.5).flatten(1).unlace;
|
|
w = Window("scatter", Rect(128, 64, 200, 200));
|
|
~colours = [Color.blue,Color.red,Color.green,Color.magenta];
|
|
w.drawFunc = {
|
|
Pen.use {
|
|
d[0].size.do{|i|
|
|
var x = (d[0][i]*200);
|
|
var y = (d[1][i]*200);
|
|
var r = Rect(x,y,5,5);
|
|
Pen.fillColor = ~colours[~assignments[i].asInteger];
|
|
Pen.fillOval(r);
|
|
}
|
|
}
|
|
};
|
|
w.refresh;
|
|
w.front;
|
|
)
|
|
|
|
// single point transform on arbitrary value
|
|
~inbuf = Buffer.loadCollection(s,0.5.dup);
|
|
~kmeans.predictPoint(~inbuf,{|x|x.postln;});
|
|
::
|
|
|
|
subsection:: Queries in a Synth
|
|
|
|
This is the equivalent of predictPoint, but wholly on the server
|
|
|
|
code::
|
|
|
|
outputPoint.getToFloatArray(action:{|a|a.postln})
|
|
(
|
|
{
|
|
var trig = Impulse.kr(5);
|
|
var point = WhiteNoise.kr(1.dup);
|
|
var inputPoint = LocalBuf(2);
|
|
var outputPoint = LocalBuf(1);
|
|
Poll.kr(trig, point, [\pointX,\pointY]);
|
|
point.collect{ |p,i| BufWr.kr([p],inputPoint,i)};
|
|
~kmeans.kr(trig,inputPoint,outputPoint);
|
|
Poll.kr(trig,BufRd.kr(1,outputPoint,0,interpolation:0),\cluster);
|
|
}.play;
|
|
)
|
|
|
|
// to sonify the output, here are random values alternating quadrant, generated more quickly as the cursor moves rightwards
|
|
(
|
|
{
|
|
var trig = Impulse.kr(MouseX.kr(0,1).exprange(0.5,ControlRate.ir / 2));
|
|
var step = Stepper.kr(trig,max:3);
|
|
var point = TRand.kr(-0.1, [0.1, 0.1], trig) + [step.mod(2).linlin(0,1,-0.6,0.6),step.div(2).linlin(0,1,-0.6,0.6)] ;
|
|
var inputPoint = LocalBuf(2);
|
|
var outputPoint = LocalBuf(1);
|
|
point.collect{|p,i| BufWr.kr([p],inputPoint,i)};
|
|
~kmeans.kr(trig,inputPoint,outputPoint);
|
|
SinOsc.ar((BufRd.kr(1,outputPoint,0,interpolation:0) + 69).midicps,mul: 0.1);
|
|
}.play;
|
|
)
|
|
|
|
::
|