#include // bool #include // fprintf() #include // EXIT_{FAILURE,SUCCESS} #include // exit() #include // memset() #include // fabsf() #define STB_IMAGE_WRITE_IMPLEMENTATION #define STB_ONLY_PNG #include "stb_image_write.h" #include "util.h" #include "km.h" // maximum number of clusters to test #define MAX_CLUSTERS 10 // number of tests to run on each cluster size #define NUM_TESTS 100 #define MAX_BEST 10 // output image size #define IM_WIDTH 128 #define IM_HEIGHT 128 #define IM_STRIDE (3 * IM_WIDTH) // static output image data buffer static uint8_t im_data[3 * IM_WIDTH * IM_HEIGHT]; typedef struct { float score; km_set_t set; } best_item_t; typedef struct { // cluster initialization method km_init_type_t init_type; // random number source km_rand_t rs; struct { float distance, silhouette, cluster_size; size_t num_empty; } rows[MAX_CLUSTERS - 2]; // best clusters best_item_t best[MAX_BEST]; size_t num_best; } ctx_t; static inline size_t ctx_get_max_best( const ctx_t * const ctx ) { return MIN(ctx->num_best, MAX_BEST); } static int best_score_cmp( const void * const ap, const void * const bp ) { const best_item_t * const a = ap; const best_item_t * const b = bp; return (a->score > b->score) ? 1 : -1; } static void ctx_best_sort( ctx_t * const ctx ) { // sort best sets by ascending score (worst to best) qsort( ctx->best, ctx_get_max_best(ctx), sizeof(best_item_t), best_score_cmp ); } static bool load_on_shape( const km_shape_t * const shape, void * const cb_data ) { km_set_t * const set = cb_data; // D("shape: %zu floats, %zu ints", shape->num_floats, shape->num_ints); // init set if (!km_set_init(set, shape, 1000)) { die("km_set_init() failed"); } // return success return true; } static bool load_on_row( const float * const floats, const int * const ints, void * const cb_data ) { km_set_t * const set = cb_data; // push row if (!km_set_push(set, 1, floats, ints)) { die("km_set_push_rows() failed"); } // return success return true; } static void load_on_error( const char * const err, void * const cb_data ) { UNUSED(cb_data); die("load failed: %s", err); } static const km_load_cbs_t LOAD_CBS = { .on_shape = load_on_shape, .on_row = load_on_row, .on_error = load_on_error, }; static bool find_on_init( km_set_t * const cs, const size_t num_clusters, const km_set_t * const set, void *cb_data ) { ctx_t * const ctx = cb_data; return km_init(cs, ctx->init_type, num_clusters, set, &(ctx->rs)); } static bool find_on_fini( km_set_t * const cs, void *cb_data ) { UNUSED(cb_data); km_set_fini(cs); return true; } static void find_on_data( const km_find_data_t * const data, void *cb_data ) { ctx_t * const ctx = cb_data; const size_t ofs = data->num_clusters - 2; ctx->rows[ofs].distance += data->distance_sum; ctx->rows[ofs].silhouette += data->silhouette; ctx->rows[ofs].cluster_size += data->mean_cluster_size; ctx->rows[ofs].num_empty += data->num_empty_clusters; } static bool find_on_best( const float score, const km_set_t * const cs, void *cb_data ) { ctx_t * const ctx = cb_data; D("new best: score = %0.3f, num_clusters = %zu", score, cs->num_rows); // get pointer to destination set // (note: data->best is a ring buffer) const size_t ofs = ctx->num_best % MAX_BEST; ctx->best[ofs].score = score; km_set_t * const dst = &(ctx->best[ofs].set); if (ctx->num_best >= MAX_BEST) { // finalize old best data set km_set_fini(dst); } // copy data set to best ring buffer if (!km_set_copy(dst, cs)) { die("km_set_copy()"); } // increment best count ctx->num_best++; // return success return true; } // init find config static const km_find_cbs_t FIND_CBS = { .max_clusters = MAX_CLUSTERS, .num_tests = NUM_TESTS, .on_init = find_on_init, .on_fini = find_on_fini, .on_data = find_on_data, .on_best = find_on_best, }; static void ctx_save_png( const ctx_t * const ctx, const char * const png_path, const km_set_t * const set ) { // clear image data to white memset(im_data, 0xff, sizeof(im_data)); // draw data points km_set_draw(set, im_data, IM_WIDTH, IM_HEIGHT, 1, 0x000000); if (!stbi_write_png(png_path, IM_WIDTH, IM_HEIGHT, 3, im_data, IM_STRIDE)) { die("stbi_write_png(\"%s\")", png_path); } // draw best centroids in green const km_set_t * const best_set = &(ctx->best[ctx_get_max_best(ctx) - 1].set); km_set_draw(best_set, im_data, IM_WIDTH, IM_HEIGHT, 3, 0x00ff00); // save png if (!stbi_write_png(png_path, IM_WIDTH, IM_HEIGHT, 3, im_data, IM_STRIDE)) { die("stbi_write_png(\"%s\")", png_path); } } static void ctx_best_print( const ctx_t * const ctx, FILE * const fh ) { if (!km_set_print(&(ctx->best[ctx_get_max_best(ctx) - 1].set), fh)) { die("km_set_print()"); } } static const char USAGE_FORMAT[] = "Usage: %s [init] [data_path] \n" "\n" "Arguments:\n" "* init: Cluster init method (one of \"rand\" or \"set\").\n" "* data_path: Path to input data file.\n" "* png_path: Path to output PNG (optional).\n" ""; int main(int argc, char *argv[]) { // check command-line arguments if (argc < 3) { fprintf(stderr, USAGE_FORMAT, argv[0]); return EXIT_FAILURE; } // get command-line arguments const char * const init_type_name = argv[1]; const char * const data_path = argv[2]; const char * const png_path = (argc > 3) ? argv[3] : NULL; // init rng srand(getpid()); const int rand_seed = rand(); // init context ctx_t ctx; memset(&ctx, 0, sizeof(ctx_t)); ctx.init_type = km_init_get_type(init_type_name); // init ctx rng km_rand_init_erand48(&(ctx.rs), rand_seed); // init data set km_set_t set; if (!km_load_path(data_path, &LOAD_CBS, &set)) { die("km_load_path(\"%s\") failed", data_path); } // normalize data set if (!km_set_normalize(&set)) { die("km_set_normalize() failed"); } // find best solutions if (!km_find(&set, &FIND_CBS, &ctx)) { die("km_find()"); } // sort best results from lowest to highest ctx_best_sort(&ctx); // print best ctx_best_print(&ctx, stdout); if (png_path) { // save png of normalized data set and best clusters ctx_save_png(&ctx, png_path, &set); } // finalize data set km_set_fini(&set); // return success return 0; }