flucoma-sc/release-packaging/HelpSource/Classes/FluidKMeans.schelp

TITLE:: FluidKMeans
summary:: Cluster data points with K-Means
categories:: FluidManipulation
related:: Classes/FluidDataSet, Classes/FluidLabelSet, Classes/FluidKNNClassifier, Classes/FluidKNNRegressor

DESCRIPTION::
Uses the K-Means algorithm to learn clusters from a link::Classes/FluidDataSet::

https://scikit-learn.org/stable/tutorial/statistical_inference/unsupervised_learning.html#clustering-grouping-observations-together

CLASSMETHODS::

METHOD:: new
Construct a new K Means model on the passed server.
ARGUMENT:: server
If nil will use Server.default.

INSTANCEMETHODS::

PRIVATE::k

METHOD:: fit
Identify code::k:: clusters in a link::Classes/FluidDataSet::
ARGUMENT:: dataset
A link::Classes/FluidDataSet:: of data points.
ARGUMENT:: k
The number of clusters to identify in the data set.
ARGUMENT:: maxIter
Maximum number of iterations.
ARGUMENT:: action
A function to run when fitting is complete, taking as its argument an array with the number of data points for each cluster.

METHOD:: predict
Given a trained object, return the cluster ID for each data point in a dataset to a label set.
ARGUMENT:: dataset
a link::Classes/FluidDataSet:: containing the data to predict.
ARGUMENT:: labelset
a link::Classes/FluidLabelSet:: to retrieve the predicted clusters.
ARGUMENT:: action
A function to run when the server responds.

METHOD:: fitPredict
Run link::Classes/FluidKMeans#*fit:: and link::Classes/FluidKMeans#*predict:: in a single pass: i.e. train the model on the incoming link::Classes/FluidDataSet:: and then return the learned clustering to the passed link::Classes/FluidLabelSet::
ARGUMENT:: dataset
a link::Classes/FluidDataSet:: containing the data to fit and predict.
ARGUMENT:: labelset
a link::Classes/FluidLabelSet:: to retrieve the predicted clusters.
ARGUMENT:: k
The number of clusters.
ARGUMENT:: maxIter
Maximum number of iterations.
ARGUMENT:: action
A function to run when the server responds

METHOD:: predictPoint
Given a trained object, return the cluster ID for a data point in a link::Classes/Buffer::
ARGUMENT:: buffer
a link::Classes/Buffer:: containing a data point.
ARGUMENT:: action
A function to run when the server responds, taking the ID of the cluster as its argument.

METHOD:: predict
Report cluster assignments for previously unseen data.
ARGUMENT:: dataset
A link::Classes/FluidDataSet:: of data points.
ARGUMENT:: labelset
A link::Classes/FluidLabelSet:: to contain assignments.
ARGUMENT:: action
A function to run when complete, taking an array of the counts for each category as its argument.


EXAMPLES::
Server.default.options.outDevice = "Built-in Output"
code::

(
//Make some clumped 2D points and place into a dataset
~points = (4.collect{
		       64.collect{(1.sum3rand) + [1,-1].choose}.clump(2)
	       }).flatten(1) * 0.5;
fork{
    ~dataset = FluidDataSet.new(s,\kmeans_help_rand2d);
    d = Dictionary.with(
        *[\cols -> 2,\data -> Dictionary.newFrom(
			~points.collect{|x, i| [i, x]}.flatten)]);
    s.sync;
    ~dataset.load(d, {~dataset.print});
}
)


// Create a KMeans instance and a LabelSet for the cluster labels in the server
~clusters = FluidLabelSet(s,\kmeans_help_clusters);
~kmeans = FluidKMeans(s);

// Fit into 4 clusters
(
~kmeans.fitPredict(~dataset,~clusters, 4, action: {|c|
		"Fitted.\n # Points in each cluster:".postln;
		c.do{|x,i|
			("Cluster" + i + "->" + x.asInteger + "points").postln;
		}
	});
)

// Cols of kmeans should match dataset, size is the number of clusters
~kmeans.cols;
~kmeans.size;
~kmeans.dump;

// Retrieve labels of clustered points
(
~assignments = Array.new(128);
fork{
    128.do{ |i|
        ~clusters.getLabel(i,{|clusterID|
            (i.asString+clusterID).postln;
            ~assignments.add(clusterID)
        });
        s.sync;
    }
}
)

//or faster by sorting the IDs
~clusters.dump{|x|~assignments = x.at("data").atAll(x.at("data").keys.asArray.sort{|a,b|a.asInteger < b.asInteger}).flatten.postln;}

//Visualise: we're hoping to see colours neatly mapped to quandrants...
(
d = ((~points + 1) * 0.5).flatten(1).unlace;
w = Window("scatter", Rect(128, 64, 200, 200));
~colours = [Color.blue,Color.red,Color.green,Color.magenta];
w.drawFunc = {
	Pen.use {
		d[0].size.do{|i|
			var x = (d[0][i]*200);
			var y = (d[1][i]*200);
			var r = Rect(x,y,5,5);
			Pen.fillColor = ~colours[~assignments[i].asInteger];
			Pen.fillOval(r);
		}
	}
};
w.refresh;
w.front;
)

//Querying on the server using busses and buffers:
//This is the equivalent of predictPoint, but wholly on the server
//FluidKMeans is accessed via its own synth, so we need to use
//a bus to communicate with it. The inBus receives a trigger to query, using data
//from inBuffer; a trigger is then send to outBus with the prediction in outBuffer
(
~ib = Bus.audio(s); // input bus must be audio (for now)
~ob = Bus.control(s); //output bus can be kr
~tempPoint = Buffer.alloc(s,1,2);
~inpPoint = Buffer.alloc(s,2);
~outPoint = Buffer.alloc(s,1);
)

~tempPoint.getn(0,2,{|x|x.post})

~inpPoint.getn(0,2,{|x|x.post})

//We make two Synths. One, before FluidKMeans, generates a random point and sends
//a trigger to query. The second, after FluidKMeans, gives us the predicted cluster //triggering upadtes from the outBus
(
//Set properties on FluidKMeans:
~kmeans.inBus_(~ib).outBus_(~ob).inBuffer_(~inpPoint).outBuffer_(~outPoint);
//pitching
{
	var trig = Impulse.kr(1);
	var point = WhiteNoise.kr(1.dup);
	var copied;
	BufWr.kr(point,~tempPoint,0);
	Poll.kr(trig, point, [\pointX,\pointY]);
	copied = Done.kr(FluidBufFlatten.kr(~tempPoint, ~inpPoint, trig: trig));
	Out.ar(~ib.index,K2A.ar(copied > Delay1.kr(copied)));
}.play(~kmeans.synth,addAction:\addBefore);
//catching
{
	Poll.kr(In.kr(~ob),Latch.kr(BufRd.kr(1,~outPoint,0,interpolation:0),In.kr(~ob)),\cluster);
}.play(~kmeans.synth,addAction:\addAfter);
)

// to sonify the output, here are random values alternating quadrant.
(
//Set properties on FluidKMeans:
~kmeans.inBus_(~ib).outBus_(~ob).inBuffer_(~inpPoint).outBuffer_(~outPoint);
//pitching
{
	var count, trig, point, copied;
	trig = Impulse.kr(MouseX.kr(0,1).exprange(0.5,1000).poll);
	count = Stepper.kr(trig,0,0,3);
	point = Latch.kr(WhiteNoise.ar([0.1,0.1],[count.div(2)-0.5,count.mod(2)-0.5]),trig);
	BufWr.kr(point,~tempPoint,0);
	copied = Done.kr(FluidBufFlatten.kr(~tempPoint, ~inpPoint, trig: trig));
	Out.ar(~ib.index,K2A.ar(copied > Delay1.kr(copied)));
	trig*0.1;
}.play(~kmeans.synth,addAction:\addBefore);
//catching
{
	SinOsc.ar((Latch.kr(BufRd.kr(1,~outPoint,0,interpolation:0),In.kr(~ob)) + 69).midicps,mul: 0.1);
}.play(~kmeans.synth,addAction:\addAfter);
)
::