From f557d1f49a2914c6084dd18efc783395228d8ce0 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Tue, 5 Feb 2019 00:22:15 -0500 Subject: mv *.[hc] src/ --- src/km.h | 230 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 src/km.h (limited to 'src/km.h') diff --git a/src/km.h b/src/km.h new file mode 100644 index 0000000..5a34261 --- /dev/null +++ b/src/km.h @@ -0,0 +1,230 @@ +#ifndef KM_H +#define KM_H + +#include // size_t +#include // uint8_t +#include // FILE + +// forward typedef for callbacks +typedef struct km_rand_t_ km_rand_t; + +// random number source callbacks +typedef struct { + _Bool (*get_floats)(km_rand_t * const, const size_t, float * const); + _Bool (*get_sizes)(km_rand_t * const, const size_t, size_t * const); + void (*fini)(km_rand_t * const); +} km_rand_cbs_t; + +// random number source +struct km_rand_t_ { + const km_rand_cbs_t *cbs; + void *data; +}; + +// fill buffer with N random floats +_Bool km_rand_get_floats(km_rand_t * const, const size_t, float * const); + +// fill buffer with N random size_ts +_Bool km_rand_get_sizes(km_rand_t * const, const size_t, size_t * const); + +// finalize random source +void km_rand_fini(km_rand_t * const); + +// init libc rng (use libc rand()) +void km_rand_init_libc(km_rand_t *); + +// init file source rng (e.g. "/dev/urandom") +_Bool km_rand_init_path(km_rand_t * const, const char *); + +// init erand48 rng (use erand48()) +_Bool km_rand_init_erand48(km_rand_t * const, const uint64_t); + +// shape of data set +typedef struct { + size_t num_floats, + num_ints; +} km_shape_t; + +typedef enum { + KM_SET_STATE_INIT, + KM_SET_STATE_NORMALIZED, + KM_SET_STATE_FINI, + KM_SET_STATE_LAST, +} km_set_state_t; + +// data set +typedef struct { + km_set_state_t state; + + float *floats, + *bounds; + int *ints; + + km_shape_t shape; + + size_t num_rows, + capacity; +} km_set_t; + +// init data set with shape and initial size +_Bool km_set_init(km_set_t * const, const km_shape_t *, const size_t); +// finalize data set +void km_set_fini(km_set_t * const); + +// append rows to data set, growing set if necessary +_Bool km_set_push(km_set_t *, const size_t, const float *, const int *); + +// deep copy data set +_Bool km_set_copy(km_set_t * const, const km_set_t * const); + +// normalize data set +_Bool km_set_normalize(km_set_t * const); + +// get pointer to data set row +float *km_set_get_row(const km_set_t *, const size_t); + +typedef struct { + float d2, + d2_near; + size_t cluster; +} km_row_map_t; + +typedef struct { + const float sum; + + const float silouette; + + const float *mean_dists; + const float *mean_nears; + + const size_t num_clusters; +} km_solve_stats_t; + +typedef struct { + void (*on_step)(const km_set_t *, const km_row_map_t *, void *); + void (*on_stats)(const km_set_t *, const km_solve_stats_t *, void *); +} km_solve_cbs_t; + +// use k-means to iteratively update the cluster centroids until there +// are no more changes to the centroids +_Bool +km_solve( + km_set_t * const, + const km_set_t * const, + const km_solve_cbs_t * const, + void * const +); + +typedef struct { + const km_set_t * const cluster_set; + + float distance_sum, + silouette, + mean_cluster_size; + + const size_t num_clusters, + num_empty_clusters; +} km_find_data_t; + +typedef _Bool (*km_find_init_cb_t)( + km_set_t * const cluster_set, + const size_t num_clusters, + const km_set_t * const data_set, + void *cb_data +); + +typedef _Bool (*km_find_fini_cb_t)( + km_set_t * const cluster_set, + void *cb_data +); + +typedef _Bool (*km_find_best_cb_t)( + const float score, + const km_set_t * const cluster_set, + void *cb_data +); + +typedef void (*km_find_data_cb_t)( + const km_find_data_t * const, + void * +); + +typedef struct { + // maximum number of clusters + size_t max_clusters; + + // maximum number of tests per cluster + size_t num_tests; + + km_find_init_cb_t on_init; + km_find_fini_cb_t on_fini; + km_find_data_cb_t on_data; + km_find_best_cb_t on_best; +} km_find_cbs_t; + +// repeatedly test different cluster sizes and report results +_Bool +km_find( + const km_set_t * const set, + const km_find_cbs_t * const cbs, + void * const cb_data +); + +// render set to rgb buffer +void +km_set_draw( + const km_set_t * const set, + uint8_t * const rgb, + const size_t width, + const size_t height, + const int dot_size, + const uint32_t color +); + +// load callbacks +typedef struct { + _Bool (*on_shape)(const km_shape_t * const, void *); + _Bool (*on_row)(const float *, const int *, void *); + void (*on_error)(const char * const, void *); +} km_load_cbs_t; + +// load set from file handle +_Bool +km_load( + FILE * const fh, + const km_load_cbs_t * const cbs, + void * const cb_data +); + +// load set from path +_Bool km_load_path( + const char * const path, + const km_load_cbs_t * const cbs, + void * const cb_data +); + +typedef enum { + // rand: init cluster set by picking random centroids + KM_INIT_TYPE_RAND, + + // forgy: init cluster set by picking random rows from set + KM_INIT_TYPE_FORGY, + + // kmeans++: ... + KM_INIT_TYPE_KMEANS, + + KM_INIT_TYPE_LAST, +} km_init_type_t; + +km_init_type_t km_init_get_type(const char * const); + +// init cluster set using given method +_Bool km_init( + km_set_t * const, + const km_init_type_t, + const size_t, + const km_set_t * const, + km_rand_t * const +); + +#endif /* KM_H */ -- cgit v1.2.3