From 48ee95f77c1be5f404f52eca0c164b1a6d218ef1 Mon Sep 17 00:00:00 2001
From: Gerard <g.roma@hud.ac.uk>
Date: Thu, 11 Jun 2020 19:53:20 +0100
Subject: [PATCH] KMeans  review

---
 release-packaging/Classes/FluidKMeans.sc      |  56 +++++----
 .../HelpSource/Classes/FluidKMeans.schelp     | 106 ++++++++----------
 2 files changed, 81 insertions(+), 81 deletions(-)

diff --git a/release-packaging/Classes/FluidKMeans.sc b/release-packaging/Classes/FluidKMeans.sc
index 462f2f7..5ebd7cb 100644
--- a/release-packaging/Classes/FluidKMeans.sc
+++ b/release-packaging/Classes/FluidKMeans.sc
@@ -1,33 +1,43 @@
 FluidKMeans : FluidManipulationClient {
 
-    var <>k;
+	var <>k;
 
-    *new {|server|
-  		var uid = UniqueID.next;
-  		^super.new(server,uid)!?{|inst|inst.init(uid);inst}
-  	}
+	*new {|server|
+		var uid = UniqueID.next;
+		^super.new(server,uid)!?{|inst|inst.init(uid);inst}
+	}
 
-  	init {|uid|
-  		id = uid;
-  	}
+	init {|uid|
+		id = uid;
+	}
 
-    fit{|dataset,k, maxIter = 100, buffer, action|
-       buffer = buffer ? -1;
-        this.k = k;
-		// this.prSendMsg(\fit,[dataset.asSymbol, k,maxIter, buffer.asUGenInput],action,[numbers(FluidMessageResponse,_,k,_)]);
-	    this.prSendMsg(\fit,[dataset.asSymbol, k,maxIter],action,[numbers(FluidMessageResponse,_,k,_)]);
+	fit{|dataset,k, maxIter = 100, action|
+		this.k = k;
+		this.prSendMsg(\fit,
+			[dataset.asSymbol, k,maxIter], action,
+			[numbers(FluidMessageResponse,_,k,_)]
+		);
 	}
 
-    fitPredict{|dataset,labelset, k, maxIter = 100, action|
-        this.k = k;
-		this.prSendMsg(\fitPredict,[dataset.asSymbol,labelset.asSymbol,  k,maxIter],action,[numbers(FluidMessageResponse,_,k,_)]);
-    }
+	fitPredict{|dataset, labelset, k, maxIter = 100, action|
+		this.k = k;
+		this.prSendMsg(\fitPredict,
+			[dataset.asSymbol,labelset.asSymbol, k,maxIter],
+			action,[numbers(FluidMessageResponse,_,k,_)]
+		);
+	}
 
-    predict{ |dataset, labelset,action|
-        this.prSendMsg(\predict,[dataset.asSymbol, labelset.asSymbol],action,[numbers(FluidMessageResponse,_,this.k,_)]);
-    }
+	predict{ |dataset, labelset,action|
+		this.prSendMsg(\predict,
+			[dataset.asSymbol, labelset.asSymbol], action,
+			[numbers(FluidMessageResponse,_,this.k,_)]
+		);
+	}
 
-    predictPoint { |buffer, action|
-        this.prSendMsg(\predictPoint,[buffer.asUGenInput],action,[number(FluidMessageResponse,_,_)]);
-    }
+	predictPoint { |buffer, action|
+		this.prSendMsg(\predictPoint,
+			[buffer.asUGenInput], action,
+			[number(FluidMessageResponse,_,_)]
+		);
+	}
 }
diff --git a/release-packaging/HelpSource/Classes/FluidKMeans.schelp b/release-packaging/HelpSource/Classes/FluidKMeans.schelp
index 83908b4..6912197 100644
--- a/release-packaging/HelpSource/Classes/FluidKMeans.schelp
+++ b/release-packaging/HelpSource/Classes/FluidKMeans.schelp
@@ -1,7 +1,7 @@
 TITLE:: FluidKMeans
 summary:: Cluster data points with K-Means
 categories:: FluidManipulation
-related:: Classes/FluidDataSet, Classes/FluidLabelSet, Classes/FluidKNN
+related:: Classes/FluidDataSet, Classes/FluidLabelSet, Classes/FluidKNNClassifier, Classes/FluidKNNRegressor
 
 DESCRIPTION::
 Uses the K-Means algorithm to learn clusters from a link::Classes/FluidDataSet::
@@ -11,9 +11,9 @@ https://scikit-learn.org/stable/tutorial/statistical_inference/unsupervised_lear
 CLASSMETHODS::
 
 METHOD:: new
-Construct a new K Means model on the passed server
+Construct a new K Means model on the passed server.
 ARGUMENT:: server
-If nil will use Server.default
+If nil will use Server.default.
 
 INSTANCEMETHODS::
 
@@ -22,118 +22,108 @@ PRIVATE::k
 METHOD:: fit
 Identify code::k:: clusters in a link::Classes/FluidDataSet::
 ARGUMENT:: dataset
-A link::Classes/FluidDataSet:: of data points
+A link::Classes/FluidDataSet:: of data points.
 ARGUMENT:: k
-The number of clusters to identify in the data set
+The number of clusters to identify in the data set.
 ARGUMENT:: maxIter
-Maximum number of iterations to use partitioning the data
-ARGUMENT:: buffer
-Seed centroids for clusters WARNING:: Not yet implemented ::
+Maximum number of iterations.
 ARGUMENT:: action
-A function to run when fitting is complete, taking as its argument an array with the number of data points for each cluster
+A function to run when fitting is complete, taking as its argument an array with the number of data points for each cluster.
 
 METHOD:: predict
 Given a trained object, return the cluster ID for each data point in a dataset to a label set.
 ARGUMENT:: dataset
-a link::Classes/FluidDataSet:: containing the data to predict
+a link::Classes/FluidDataSet:: containing the data to predict.
 ARGUMENT:: labelset
-a link::Classes/FluidLabelSet:: to reveive the predicted clusters
+a link::Classes/FluidLabelSet:: to retrieve the predicted clusters.
 ARGUMENT:: action
-A function to run when the server responds
+A function to run when the server responds.
 
 METHOD:: fitPredict
 Run link::Classes/FluidKMeans#*fit:: and link::Classes/FluidKMeans#*predict:: in a single pass: i.e. train the model on the incoming link::Classes/FluidDataSet:: and then return the learned clustering to the passed link::Classes/FluidLabelSet::
 ARGUMENT:: dataset
-a link::Classes/FluidDataSet:: containing the data to fit and predict
+a link::Classes/FluidDataSet:: containing the data to fit and predict.
 ARGUMENT:: labelset
-a link::Classes/FluidLabelSet:: to reveive the predicted clusters
+a link::Classes/FluidLabelSet:: to retrieve the predicted clusters.
 ARGUMENT:: k
-The number of clusters to identify in the data set
+The number of clusters.
 ARGUMENT:: maxIter
-Maximum number of iterations to use partitioning the data
+Maximum number of iterations.
 ARGUMENT:: action
 A function to run when the server responds
 
 METHOD:: predictPoint
 Given a trained object, return the cluster ID for a data point in a link::Classes/Buffer::
 ARGUMENT:: buffer
-a link::Classes/Buffer:: containing a data point
+a link::Classes/Buffer:: containing a data point.
 ARGUMENT:: action
-A function to run when the server responds, taking the ID of the cluser as its argument
+A function to run when the server responds, taking the ID of the cluster as its argument.
 
 METHOD:: predict
-Report cluster assignments for previously unseen data
+Report cluster assignments for previously unseen data.
 ARGUMENT:: dataset
-A link::Classes/FluidDataSet:: of data points
+A link::Classes/FluidDataSet:: of data points.
 ARGUMENT:: labelset
-A link::Classes/FluidLabelSet:: to contain assigments
+A link::Classes/FluidLabelSet:: to contain assignments.
 ARGUMENT:: action
-A function to run when complete, taking an array of the counts for each catgegory as its argument
+A function to run when complete, taking an array of the counts for each category as its argument.
 
 
 EXAMPLES::
 Server.default.options.outDevice = "Built-in Output"
 code::
 
-//A dataset for our points, a labelset for cluster labels
 (
-~dataset= FluidDataSet(s,\kdtree_help_rand2d);
-
-~clusters = FluidLabelSet(s,\kmeans_help_clusters);
-)
-
 //Make some clumped 2D points and place into a dataset
-(
-~points = (4.collect{64.collect{(1.sum3rand) + [1,-1].choose}.clump(2)}).flatten(1) * 0.5;
-~dataset.clear;
-~tmpbuf = Buffer.alloc(s,2);
+~points = (4.collect{
+		       64.collect{(1.sum3rand) + [1,-1].choose}.clump(2)
+	       }).flatten(1) * 0.5;
 fork{
+    ~dataset = FluidDataSet.new(s,\kmeans_help_rand2d);
+    d = Dictionary.with(
+        *[\cols -> 2,\data -> Dictionary.newFrom(
+			~points.collect{|x, i| [i, x]}.flatten)]);
     s.sync;
-    ~points.do{|x,i|
-        (""++(i+1)++"/128").postln;
-        ~tmpbuf.setn(0,x);
-        ~dataset.addPoint(i,~tmpbuf);
-        s.sync
-    }
+    ~dataset.load(d, {~dataset.print});
 }
 )
 
-//Make a new k means model, fit it to the dataset and return the discovered clusters to a labelset
-(
-fork{
-	~clusters.clear;
-	~kmeans = FluidKMeans(s);
-    s.sync;
-	~kmeans.fitPredict(~dataset,~clusters, 4,action: {|c|
+
+// Create a KMeans instance and a LabelSet for the cluster labels in the server
+~clusters = FluidLabelSet(s,\kmeans_help_clusters);
+~kmeans = FluidKMeans(s);
+
+// Fit into 4 clusters
+~kmeans.fitPredict(~dataset,~clusters, 4, action: {|c|
 		"Fitted.\n # Points in each cluster:".postln;
 		c.do{|x,i|
 			("Cluster" + i + "->" + x.asInteger + "points").postln;
 		}
 	});
-}
-)
 
-//Dims of kmeans should match dataset
-~kmeans.cols
 
-//Return labels of clustered points
+// Cols of kmeans should match dataset, size is the number of clusters
+~kmeans.cols;
+~kmeans.size;
+~kmeans.dump;
+
+// Retrieve labels of clustered points
 (
 ~assignments = Array.new(128);
 fork{
-	128.do{ |i|
-		~clusters.getLabel(i,{|clusterID|
-			(i.asString+clusterID).postln;
-			~assignments.add(clusterID)
-		});
-		s.sync;
-	}
+    128.do{ |i|
+        ~clusters.getLabel(i,{|clusterID|
+            (i.asString+clusterID).postln;
+            ~assignments.add(clusterID)
+        });
+        s.sync;
+    }
 }
 )
 
 //Visualise: we're hoping to see colours neatly mapped to quandrants...
 (
 d = ((~points + 1) * 0.5).flatten(1).unlace;
-// d = [20.collect{1.0.rand}, 20.collect{1.0.rand}];
 w = Window("scatter", Rect(128, 64, 200, 200));
 ~colours = [Color.blue,Color.red,Color.green,Color.magenta];
 w.drawFunc = {