#ifndef KM_H #define KM_H #include // size_t #include // uint8_t #include // FILE // forward typedef for callbacks typedef struct km_rand_t_ km_rand_t; // random number source callbacks typedef struct { _Bool (*get_floats)(km_rand_t * const, const size_t, float * const); _Bool (*get_sizes)(km_rand_t * const, const size_t, size_t * const); void (*fini)(km_rand_t * const); } km_rand_cbs_t; // random number source struct km_rand_t_ { const km_rand_cbs_t *cbs; void *data; }; // fill buffer with N random floats _Bool km_rand_get_floats(km_rand_t * const, const size_t, float * const); // fill buffer with N random size_ts _Bool km_rand_get_sizes(km_rand_t * const, const size_t, size_t * const); // finalize random source void km_rand_fini(km_rand_t * const); // init libc rng (use libc rand()) void km_rand_init_libc(km_rand_t *); // init file source rng (e.g. "/dev/urandom") _Bool km_rand_init_path(km_rand_t * const, const char *); // init erand48 rng (use erand48()) _Bool km_rand_init_erand48(km_rand_t * const, const uint64_t); // shape of data set typedef struct { size_t num_floats, num_ints; } km_shape_t; typedef enum { KM_SET_STATE_INIT, KM_SET_STATE_NORMALIZED, KM_SET_STATE_FINI, KM_SET_STATE_LAST, } km_set_state_t; // data set typedef struct { km_set_state_t state; float *floats, *bounds; int *ints; km_shape_t shape; size_t num_rows, capacity; } km_set_t; // init data set with shape and initial size _Bool km_set_init(km_set_t * const, const km_shape_t *, const size_t); // finalize data set void km_set_fini(km_set_t * const); // append rows to data set, growing set if necessary _Bool km_set_push(km_set_t *, const size_t, const float *, const int *); // deep copy data set _Bool km_set_copy(km_set_t * const, const km_set_t * const); // normalize data set _Bool km_set_normalize(km_set_t * const); // get pointer to data set row float *km_set_get_row(const km_set_t *, const size_t); // get pointer to data set row ints int *km_set_get_row_ints(const km_set_t *, const size_t); typedef struct { float d2, d2_near; size_t cluster; } km_row_map_t; typedef struct { const float sum; const float silouette; const float *mean_dists; const float *mean_nears; const size_t num_clusters; } km_solve_stats_t; typedef struct { void (*on_step)(const km_set_t *, const km_row_map_t *, void *); void (*on_stats)(const km_set_t *, const km_solve_stats_t *, void *); } km_solve_cbs_t; // use k-means to iteratively update the cluster centroids until there // are no more changes to the centroids _Bool km_solve( km_set_t * const, const km_set_t * const, const km_solve_cbs_t * const, void * const ); typedef struct { const km_set_t * const cluster_set; float distance_sum, silouette, mean_cluster_size; const size_t num_clusters, num_empty_clusters; } km_find_data_t; typedef _Bool (*km_find_init_cb_t)( km_set_t * const cluster_set, const size_t num_clusters, const km_set_t * const data_set, void *cb_data ); typedef _Bool (*km_find_fini_cb_t)( km_set_t * const cluster_set, void *cb_data ); typedef _Bool (*km_find_best_cb_t)( const float score, const km_set_t * const cluster_set, void *cb_data ); typedef void (*km_find_data_cb_t)( const km_find_data_t * const, void * ); typedef struct { // maximum number of clusters size_t max_clusters; // maximum number of tests per cluster size_t num_tests; km_find_init_cb_t on_init; km_find_fini_cb_t on_fini; km_find_data_cb_t on_data; km_find_best_cb_t on_best; } km_find_cbs_t; // repeatedly test different cluster sizes and report results _Bool km_find( const km_set_t * const set, const km_find_cbs_t * const cbs, void * const cb_data ); // render set to rgb buffer void km_set_draw( const km_set_t * const set, uint8_t * const rgb, const size_t width, const size_t height, const int dot_size, const uint32_t color ); // load callbacks typedef struct { _Bool (*on_shape)(const km_shape_t * const, void *); _Bool (*on_row)(const float *, const int *, void *); void (*on_error)(const char * const, void *); } km_load_cbs_t; // load set from file handle _Bool km_load( FILE * const fh, const km_load_cbs_t * const cbs, void * const cb_data ); // print set to file handle _Bool km_set_print( const km_set_t * const, FILE * const fh ); // load set from path _Bool km_load_path( const char * const path, const km_load_cbs_t * const cbs, void * const cb_data ); typedef enum { // rand: init cluster set by picking random centroids KM_INIT_TYPE_RAND, // forgy: init cluster set by picking random rows from set KM_INIT_TYPE_FORGY, // kmeans++: ... KM_INIT_TYPE_KMEANS, KM_INIT_TYPE_LAST, } km_init_type_t; km_init_type_t km_init_get_type(const char * const); // init cluster set using given method _Bool km_init( km_set_t * const, const km_init_type_t, const size_t, const km_set_t * const, km_rand_t * const ); #endif /* KM_H */