aboutsummaryrefslogtreecommitdiff
path: root/km-init-kmeans.c
diff options
context:
space:
mode:
Diffstat (limited to 'km-init-kmeans.c')
-rw-r--r--km-init-kmeans.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/km-init-kmeans.c b/km-init-kmeans.c
new file mode 100644
index 0000000..e49f879
--- /dev/null
+++ b/km-init-kmeans.c
@@ -0,0 +1,94 @@
+#include <stdbool.h> // bool
+#include <string.h> // memset()
+#include <float.h> // FLT_MAX
+#include "util.h"
+#include "km.h"
+
+static const float *
+get_random_row(
+ const km_set_t * const set,
+ km_rand_t * const rs
+) {
+ // get random offset
+ size_t ofs = 0;
+ if (!km_rand_fill_sizes(rs, 1, &ofs)) {
+ die("km_rand_fill_sizes()");
+ }
+
+ return km_set_get_row(set, ofs % set->num_rows);
+}
+
+// init a cluster set of num_clusters by picking the first cluster from
+// the set of points at random
+// random initial points from the set
+bool
+km_init_kmeans(
+ km_set_t * const cs,
+ const size_t num_clusters,
+ const km_set_t * const set,
+ km_rand_t * const rs
+) {
+ const size_t num_floats = set->shape.num_floats,
+ stride = sizeof(float) * num_floats;
+
+ // row values (filled below)
+ float floats[num_floats * num_clusters];
+
+ // pick first row randomly
+ {
+ // get random row
+ const float * const vals = get_random_row(set, rs);
+
+ // copy row values to floats buffer
+ memcpy(floats, vals, stride);
+ }
+
+ for (size_t i = 1; i < num_clusters;) {
+ // get random row
+ const float * const vals = get_random_row(set, rs);
+
+ // calculate squared distance to nearest cluster
+ float min_d2 = FLT_MAX;
+ for (size_t j = 0; j < i; j++) {
+ const float d2 = distance_squared(num_floats, vals, floats + j * stride);
+ if (d2 < min_d2) {
+ min_d2 = d2;
+ }
+ }
+
+ // get a random floating point value
+ float rand_val = 0;
+ if (!km_rand_fill(rs, 1, &rand_val)) {
+ // return failure
+ return false;
+ }
+
+ // check random value
+ if (rand_val < min_d2) {
+ // copy row values
+ memcpy(floats + i * num_floats, vals, stride);
+
+ // increment cluster count
+ i++;
+ }
+ }
+
+ // FIXME: should probably be heap-allocated
+ int ints[num_clusters];
+ memset(ints, 0, sizeof(ints));
+
+ // init cluster shape
+ const km_shape_t shape = {
+ .num_floats = num_floats,
+ .num_ints = 1,
+ };
+
+ // init cluster set
+ if (!km_set_init(cs, &shape, num_clusters)) {
+ // return failure
+ return false;
+ }
+
+ // add data, return result
+ return km_set_push(cs, num_clusters, floats, ints);
+}