#include // bool #include // memset() #include // FLT_MAX #include "util.h" #include "km.h" static const float * get_random_row( const km_set_t * const set, km_rand_t * const rs ) { // get random offset size_t ofs = 0; if (!km_rand_fill_sizes(rs, 1, &ofs)) { die("km_rand_fill_sizes()"); } return km_set_get_row(set, ofs % set->num_rows); } // init a cluster set of num_clusters by picking the first cluster from // the set of points at random // random initial points from the set bool km_init_kmeans( km_set_t * const cs, const size_t num_clusters, const km_set_t * const set, km_rand_t * const rs ) { const size_t num_floats = set->shape.num_floats, stride = sizeof(float) * num_floats; // row values (filled below) float floats[num_floats * num_clusters]; // pick first row randomly { // get random row const float * const vals = get_random_row(set, rs); // copy row values to floats buffer memcpy(floats, vals, stride); } for (size_t i = 1; i < num_clusters;) { // get random row const float * const vals = get_random_row(set, rs); // calculate squared distance to nearest cluster float min_d2 = FLT_MAX; for (size_t j = 0; j < i; j++) { const float d2 = distance_squared(num_floats, vals, floats + j * stride); if (d2 < min_d2) { min_d2 = d2; } } // get a random floating point value float rand_val = 0; if (!km_rand_fill(rs, 1, &rand_val)) { // return failure return false; } // check random value if (rand_val < min_d2) { // copy row values memcpy(floats + i * num_floats, vals, stride); // increment cluster count i++; } } // FIXME: should probably be heap-allocated int ints[num_clusters]; memset(ints, 0, sizeof(ints)); // init cluster shape const km_shape_t shape = { .num_floats = num_floats, .num_ints = 1, }; // init cluster set if (!km_set_init(cs, &shape, num_clusters)) { // return failure return false; } // add data, return result return km_set_push(cs, num_clusters, floats, ints); }