1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#include <stdbool.h> // bool
#include <string.h> // memset()
#include <float.h> // FLT_MAX
#include "util.h"
#include "km.h"
static const float *
get_random_row(
const km_set_t * const set,
km_rand_t * const rs
) {
// get random offset
size_t ofs = 0;
if (!km_rand_get_sizes(rs, 1, &ofs)) {
die("km_rand_fill_sizes()");
}
return km_set_get_row(set, ofs % set->num_rows);
}
// init a cluster set of num_clusters by picking the first cluster from
// the set of points at random
// random initial points from the set
bool
km_init_kmeans(
km_set_t * const cs,
const size_t num_clusters,
const km_set_t * const set,
km_rand_t * const rs
) {
const size_t num_floats = set->shape.num_floats,
stride = sizeof(float) * num_floats;
// row values (filled below)
float floats[num_floats * num_clusters];
// pick first row randomly
{
// get random row
const float * const vals = get_random_row(set, rs);
// copy row values to floats buffer
memcpy(floats, vals, stride);
}
for (size_t i = 1; i < num_clusters;) {
// get random row
const float * const vals = get_random_row(set, rs);
// calculate squared distance to nearest cluster
float min_d2 = FLT_MAX;
for (size_t j = 0; j < i; j++) {
const float d2 = distance_squared(num_floats, vals, floats + j * stride);
if (d2 < min_d2) {
min_d2 = d2;
}
}
// get a random floating point value
float rand_val = 0;
if (!km_rand_get_floats(rs, 1, &rand_val)) {
// return failure
return false;
}
// check random value
if (rand_val < min_d2) {
// copy row values
memcpy(floats + i * num_floats, vals, stride);
// increment cluster count
i++;
}
}
// FIXME: should probably be heap-allocated
int ints[num_clusters];
memset(ints, 0, sizeof(ints));
// init cluster shape
const km_shape_t shape = {
.num_floats = num_floats,
.num_ints = 1,
};
// init cluster set
if (!km_set_init(cs, &shape, num_clusters)) {
// return failure
return false;
}
// add data, return result
return km_set_push(cs, num_clusters, floats, ints);
}
|