diff options
Diffstat (limited to 'km-init-kmeans.c')
-rw-r--r-- | km-init-kmeans.c | 124 |
1 files changed, 0 insertions, 124 deletions
diff --git a/km-init-kmeans.c b/km-init-kmeans.c deleted file mode 100644 index f0c1b5f..0000000 --- a/km-init-kmeans.c +++ /dev/null @@ -1,124 +0,0 @@ -#include <stdbool.h> // bool -#include <string.h> // memset() -#include <float.h> // FLT_MAX -#include "util.h" -#include "km.h" - -// sum the squared distance of every row in set from this point -static inline float -sum_distance_squared( - const float * const floats, - const km_set_t * const set -) { - float r = 0; - - // sum squared distances - for (size_t i = 0; i < set->num_rows; i++) { - const float * const vals = km_set_get_row(set, i); - r += distance_squared(set->shape.num_floats, floats, vals); - } - - // return result - return r; -} - -static const float * -get_random_row( - const km_set_t * const set, - km_rand_t * const rs -) { - // get random offset - size_t ofs = 0; - if (!km_rand_get_sizes(rs, 1, &ofs)) { - die("km_rand_fill_sizes()"); - } - - return km_set_get_row(set, ofs % set->num_rows); -} - -// init a cluster set of num_clusters by picking the first cluster from -// the set of points at random -// random initial points from the set -bool -km_init_kmeans( - km_set_t * const cs, - const size_t num_clusters, - const km_set_t * const set, - km_rand_t * const rs -) { - const size_t num_floats = set->shape.num_floats, - stride = sizeof(float) * num_floats; - - // row values (filled below) - float sums[num_clusters]; - float floats[num_floats * num_clusters]; - - // pick first row randomly - { - // get random row - const float * const vals = get_random_row(set, rs); - - // copy row values to floats buffer - memcpy(floats, vals, stride); - - // copy row values to floats buffer - sums[0] = sum_distance_squared(floats, set); - } - - for (size_t i = 1; i < num_clusters;) { - // get random row - const float * const vals = get_random_row(set, rs); - - // calculate squared distance to nearest cluster - size_t ofs = 0; - float min_d2 = FLT_MAX; - for (size_t j = 0; j < i; j++) { - const float d2 = distance_squared(num_floats, vals, floats + j * stride); - if (d2 < min_d2) { - ofs = j; - min_d2 = d2; - } - } - - // get a random floating point value - float rand_val = 0; - if (!km_rand_get_floats(rs, 1, &rand_val)) { - // return failure - return false; - } - - // check random value - if (rand_val / sums[ofs] < min_d2) { - // get destination - float * const dst = floats + i * num_floats; - - // copy row values - memcpy(dst, vals, stride); - - // calculate total cluster distance - sums[i] = sum_distance_squared(dst, set); - - // increment cluster count - i++; - } - } - - // FIXME: should probably be heap-allocated - int ints[num_clusters]; - memset(ints, 0, sizeof(ints)); - - // init cluster shape - const km_shape_t shape = { - .num_floats = num_floats, - .num_ints = 1, - }; - - // init cluster set - if (!km_set_init(cs, &shape, num_clusters)) { - // return failure - return false; - } - - // add data, return result - return km_set_push(cs, num_clusters, floats, ints); -} |