aboutsummaryrefslogtreecommitdiff
path: root/km-init-forgy.c
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-02-03 18:36:52 -0500
committerPaul Duncan <pabs@pablotron.org>2019-02-03 18:36:52 -0500
commit33a722132491ebdd31722f0cada0f81f6b082282 (patch)
tree389ae93db5d0a1ff8085ea5f38026d5e060deea1 /km-init-forgy.c
parentaa74bd04f66217ff4d617924630b13d721578159 (diff)
downloadkmeans-33a722132491ebdd31722f0cada0f81f6b082282.tar.bz2
kmeans-33a722132491ebdd31722f0cada0f81f6b082282.zip
cluster init refactoring, fix best sort, add km_score()
Diffstat (limited to 'km-init-forgy.c')
-rw-r--r--km-init-forgy.c51
1 files changed, 51 insertions, 0 deletions
diff --git a/km-init-forgy.c b/km-init-forgy.c
new file mode 100644
index 0000000..687c528
--- /dev/null
+++ b/km-init-forgy.c
@@ -0,0 +1,51 @@
+#include <stdbool.h> // bool
+#include <string.h> // memset()
+#include "util.h"
+#include "km.h"
+
+// init a set with num_clusters clusters of shape num_floats by picking
+// random initial points from the set
+bool
+km_init_forgy(
+ km_set_t * const cs,
+ const size_t num_clusters,
+ const km_set_t * const set,
+ km_rand_t * const rs
+) {
+ const size_t num_floats = set->shape.num_floats,
+ stride = sizeof(float) * num_floats;
+
+ // init cluster shape
+ const km_shape_t shape = {
+ .num_floats = num_floats,
+ .num_ints = 1,
+ };
+
+ // get random row offsets
+ size_t rows[num_clusters];
+ if (!km_rand_fill_sizes(rs, num_clusters, rows)) {
+ // return failure
+ return false;
+ }
+
+ // generate random cluster centers
+ float floats[num_floats * num_clusters];
+ for (size_t i = 0; i < num_clusters; i++) {
+ const size_t row_num = rows[i] % set->num_rows;
+ const float * const row_floats = km_set_get_row(set, row_num);
+ memcpy(floats + i * num_floats, row_floats, stride);
+ }
+
+ // FIXME: should probably be heap-allocated
+ int ints[num_clusters];
+ memset(ints, 0, sizeof(ints));
+
+ // init cluster set
+ if (!km_set_init(cs, &shape, num_clusters)) {
+ // return failure
+ return false;
+ }
+
+ // add data, return result
+ return km_set_push(cs, num_clusters, floats, ints);
+}