aboutsummaryrefslogtreecommitdiff
path: root/km.h
blob: 642560b176b5306eca99271d1ca888e79f836864 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#ifndef KM_H
#define KM_H

#include <stddef.h> // size_t
#include <stdint.h> // uint8_t
#include <stdio.h> // FILE

// forward typedef for callbacks
typedef struct km_rand_src_t_ km_rand_src_t;

// random number source callbacks
typedef struct {
  _Bool (*fill)(km_rand_src_t * const, const size_t, float * const);
  void (*fini)(km_rand_src_t * const);
} km_rand_src_cbs_t;

// random number source
struct km_rand_src_t_ {
  const km_rand_src_cbs_t *cbs;
  void *data;
};

// fill buffer with N random floats
_Bool km_rand_src_fill(km_rand_src_t * const, const size_t, float * const);

// finalize random source
void km_rand_src_fini(km_rand_src_t * const);

// init system random source (uses system rand())
void km_rand_src_system_init(km_rand_src_t *);

// shape of data set
typedef struct {
  size_t num_floats,
         num_ints;
} km_shape_t;

typedef enum {
  KM_SET_STATE_INIT,
  KM_SET_STATE_NORMALIZED,
  KM_SET_STATE_FINI,
  KM_SET_STATE_LAST,
} km_set_state_t;

// data set
typedef struct {
  km_set_state_t state;

  float *floats,
        *bounds;
  int *ints;

  km_shape_t shape;

  size_t num_rows,
         capacity;
} km_set_t;

// init data set with shape and initial size
_Bool km_set_init(km_set_t * const, const km_shape_t *, const size_t);
// finalize data set
void km_set_fini(km_set_t * const);

// append rows to data set, growing set if necessary
_Bool km_set_push(km_set_t *, const size_t, const float *, const int *);

// normalize data set
_Bool km_set_normalize(km_set_t * const);

// get pointer to data set row
float *km_set_get_row(const km_set_t *, const size_t);

typedef struct {
  float d2;
  size_t cluster;
} km_row_map_t;

typedef struct {
  const float *means;
  const float *variances;
  const size_t num_clusters;
} km_solve_stats_t;

// init a set with num_clusters clusters of shape num_floats by picking
// random cluster centers
_Bool km_set_init_rand_clusters(
  km_set_t *,
  const size_t,
  const size_t,
  km_rand_src_t *
);

typedef struct {
  void (*on_step)(const km_set_t *, const km_row_map_t *, void *);
  void (*on_stats)(const km_set_t *, const km_solve_stats_t *, void *);
} km_solve_cbs_t;

// use k-means to iteratively update the cluster centroids until there
// are no more changes to the centroids
_Bool
km_solve(
  km_set_t * const,
  const km_set_t * const,
  const km_solve_cbs_t * const,
  void * const
);

typedef struct {
  const km_set_t * const cluster_set;

  const float mean_distance,
              mean_variance,
              mean_cluster_size;

  const size_t num_clusters,
               num_empty_clusters;
} km_find_data_t;

typedef _Bool (*km_find_init_cb_t)(
  km_set_t * const cluster_set,
  const size_t num_floats,
  const size_t num_clusters,
  void *cb_data
);

typedef _Bool (*km_find_fini_cb_t)(
  km_set_t * const cluster_set,
  void *cb_data
);

typedef void (*km_find_data_cb_t)(
  const km_find_data_t * const,
  void *
);

typedef struct {
  size_t max_clusters;
  size_t num_tests;
  km_rand_src_t *rand_src;

  km_find_init_cb_t on_init;
  km_find_fini_cb_t on_fini;
  km_find_data_cb_t on_data;
} km_find_cbs_t;

// repeatedly test different cluster sizes and report results
_Bool
km_find(
  const km_set_t * const set,
  const km_find_cbs_t * const cbs,
  void * const cb_data
);

// render set to rgb buffer
void
km_set_draw(
  const km_set_t * const set,
  uint8_t * const rgb,
  const size_t width,
  const size_t height,
  const uint32_t color
);

// load callbacks
typedef struct {
  _Bool (*on_shape)(const km_shape_t * const, void *);
  _Bool (*on_row)(const float *, const int *, void *);
  void (*on_error)(const char * const, void *);
} km_load_cbs_t;

// load set from file
_Bool
km_load(
  FILE * const fh,
  const km_load_cbs_t * const cbs,
  void * const cb_data
);

_Bool
km_load_path(
  const char * const path,
  const km_load_cbs_t * const cbs,
  void * const cb_data
);

#endif /* KM_H */