aboutsummaryrefslogtreecommitdiff
path: root/km-init-kmeans.c
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-02-05 00:22:15 -0500
committerPaul Duncan <pabs@pablotron.org>2019-02-05 00:22:15 -0500
commitf557d1f49a2914c6084dd18efc783395228d8ce0 (patch)
tree51111fc899f01956cbab948b87c241478576870d /km-init-kmeans.c
parentb5065ea43cb13c0b553874305b53963176c70f59 (diff)
downloadkmeans-f557d1f49a2914c6084dd18efc783395228d8ce0.tar.bz2
kmeans-f557d1f49a2914c6084dd18efc783395228d8ce0.zip
mv *.[hc] src/
Diffstat (limited to 'km-init-kmeans.c')
-rw-r--r--km-init-kmeans.c124
1 files changed, 0 insertions, 124 deletions
diff --git a/km-init-kmeans.c b/km-init-kmeans.c
deleted file mode 100644
index f0c1b5f..0000000
--- a/km-init-kmeans.c
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <stdbool.h> // bool
-#include <string.h> // memset()
-#include <float.h> // FLT_MAX
-#include "util.h"
-#include "km.h"
-
-// sum the squared distance of every row in set from this point
-static inline float
-sum_distance_squared(
- const float * const floats,
- const km_set_t * const set
-) {
- float r = 0;
-
- // sum squared distances
- for (size_t i = 0; i < set->num_rows; i++) {
- const float * const vals = km_set_get_row(set, i);
- r += distance_squared(set->shape.num_floats, floats, vals);
- }
-
- // return result
- return r;
-}
-
-static const float *
-get_random_row(
- const km_set_t * const set,
- km_rand_t * const rs
-) {
- // get random offset
- size_t ofs = 0;
- if (!km_rand_get_sizes(rs, 1, &ofs)) {
- die("km_rand_fill_sizes()");
- }
-
- return km_set_get_row(set, ofs % set->num_rows);
-}
-
-// init a cluster set of num_clusters by picking the first cluster from
-// the set of points at random
-// random initial points from the set
-bool
-km_init_kmeans(
- km_set_t * const cs,
- const size_t num_clusters,
- const km_set_t * const set,
- km_rand_t * const rs
-) {
- const size_t num_floats = set->shape.num_floats,
- stride = sizeof(float) * num_floats;
-
- // row values (filled below)
- float sums[num_clusters];
- float floats[num_floats * num_clusters];
-
- // pick first row randomly
- {
- // get random row
- const float * const vals = get_random_row(set, rs);
-
- // copy row values to floats buffer
- memcpy(floats, vals, stride);
-
- // copy row values to floats buffer
- sums[0] = sum_distance_squared(floats, set);
- }
-
- for (size_t i = 1; i < num_clusters;) {
- // get random row
- const float * const vals = get_random_row(set, rs);
-
- // calculate squared distance to nearest cluster
- size_t ofs = 0;
- float min_d2 = FLT_MAX;
- for (size_t j = 0; j < i; j++) {
- const float d2 = distance_squared(num_floats, vals, floats + j * stride);
- if (d2 < min_d2) {
- ofs = j;
- min_d2 = d2;
- }
- }
-
- // get a random floating point value
- float rand_val = 0;
- if (!km_rand_get_floats(rs, 1, &rand_val)) {
- // return failure
- return false;
- }
-
- // check random value
- if (rand_val / sums[ofs] < min_d2) {
- // get destination
- float * const dst = floats + i * num_floats;
-
- // copy row values
- memcpy(dst, vals, stride);
-
- // calculate total cluster distance
- sums[i] = sum_distance_squared(dst, set);
-
- // increment cluster count
- i++;
- }
- }
-
- // FIXME: should probably be heap-allocated
- int ints[num_clusters];
- memset(ints, 0, sizeof(ints));
-
- // init cluster shape
- const km_shape_t shape = {
- .num_floats = num_floats,
- .num_ints = 1,
- };
-
- // init cluster set
- if (!km_set_init(cs, &shape, num_clusters)) {
- // return failure
- return false;
- }
-
- // add data, return result
- return km_set_push(cs, num_clusters, floats, ints);
-}