diff options
Diffstat (limited to 'km-init-kmeans.c')
-rw-r--r-- | km-init-kmeans.c | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/km-init-kmeans.c b/km-init-kmeans.c new file mode 100644 index 0000000..e49f879 --- /dev/null +++ b/km-init-kmeans.c @@ -0,0 +1,94 @@ +#include <stdbool.h> // bool +#include <string.h> // memset() +#include <float.h> // FLT_MAX +#include "util.h" +#include "km.h" + +static const float * +get_random_row( + const km_set_t * const set, + km_rand_t * const rs +) { + // get random offset + size_t ofs = 0; + if (!km_rand_fill_sizes(rs, 1, &ofs)) { + die("km_rand_fill_sizes()"); + } + + return km_set_get_row(set, ofs % set->num_rows); +} + +// init a cluster set of num_clusters by picking the first cluster from +// the set of points at random +// random initial points from the set +bool +km_init_kmeans( + km_set_t * const cs, + const size_t num_clusters, + const km_set_t * const set, + km_rand_t * const rs +) { + const size_t num_floats = set->shape.num_floats, + stride = sizeof(float) * num_floats; + + // row values (filled below) + float floats[num_floats * num_clusters]; + + // pick first row randomly + { + // get random row + const float * const vals = get_random_row(set, rs); + + // copy row values to floats buffer + memcpy(floats, vals, stride); + } + + for (size_t i = 1; i < num_clusters;) { + // get random row + const float * const vals = get_random_row(set, rs); + + // calculate squared distance to nearest cluster + float min_d2 = FLT_MAX; + for (size_t j = 0; j < i; j++) { + const float d2 = distance_squared(num_floats, vals, floats + j * stride); + if (d2 < min_d2) { + min_d2 = d2; + } + } + + // get a random floating point value + float rand_val = 0; + if (!km_rand_fill(rs, 1, &rand_val)) { + // return failure + return false; + } + + // check random value + if (rand_val < min_d2) { + // copy row values + memcpy(floats + i * num_floats, vals, stride); + + // increment cluster count + i++; + } + } + + // FIXME: should probably be heap-allocated + int ints[num_clusters]; + memset(ints, 0, sizeof(ints)); + + // init cluster shape + const km_shape_t shape = { + .num_floats = num_floats, + .num_ints = 1, + }; + + // init cluster set + if (!km_set_init(cs, &shape, num_clusters)) { + // return failure + return false; + } + + // add data, return result + return km_set_push(cs, num_clusters, floats, ints); +} |