diff options
author | Paul Duncan <pabs@pablotron.org> | 2019-02-04 22:43:47 -0500 |
---|---|---|
committer | Paul Duncan <pabs@pablotron.org> | 2019-02-04 22:43:47 -0500 |
commit | dbebf993978def0134af9d0b98a9cdb028551e9b (patch) | |
tree | de5a688d1a8229443ee05f5077805881730689da /main.c | |
parent | cb7823eaa631c43ed2f9620c30e9fbbbe574bd41 (diff) | |
download | kmeans-dbebf993978def0134af9d0b98a9cdb028551e9b.tar.bz2 kmeans-dbebf993978def0134af9d0b98a9cdb028551e9b.zip |
use silouette method for scoring
Diffstat (limited to 'main.c')
-rw-r--r-- | main.c | 32 |
1 files changed, 15 insertions, 17 deletions
@@ -13,7 +13,7 @@ #include "km.h" #define MAX_CLUSTERS 10 -#define NUM_TESTS 100 +#define NUM_TESTS 300 #define MAX_BEST 10 #define IM_WIDTH 128 @@ -34,9 +34,9 @@ typedef struct { struct { float distance, - variance, + silouette, cluster_size; - size_t num_empty_clusters; + size_t num_empty; } rows[MAX_CLUSTERS - 2]; // best clusters @@ -62,7 +62,7 @@ ctx_best_sort( // sort best sets by ascending score (worst to best) qsort( ctx->best, - (ctx->num_best % MAX_BEST), + MIN(ctx->num_best, MAX_BEST), sizeof(best_item_t), best_score_cmp ); @@ -162,10 +162,10 @@ find_on_data( ctx_t * const ctx = cb_data; const size_t ofs = data->num_clusters - 2; - ctx->rows[ofs].distance += data->mean_distance; - ctx->rows[ofs].variance += data->mean_variance; + ctx->rows[ofs].distance += data->distance_sum; + ctx->rows[ofs].silouette += data->silouette; ctx->rows[ofs].cluster_size += data->mean_cluster_size; - ctx->rows[ofs].num_empty_clusters += data->num_empty_clusters; + ctx->rows[ofs].num_empty += data->num_empty_clusters; } static bool @@ -184,7 +184,6 @@ find_on_best( if (ctx->num_best >= MAX_BEST) { // finalize old best data set - // D("finalizing old best %zu", ctx->num_best); km_set_fini(dst); } @@ -220,17 +219,15 @@ ctx_csv_print_row( ) { const size_t num_clusters = i + 2; const float mean_distance = ctx->rows[i].distance / NUM_TESTS, - mean_variance = ctx->rows[i].variance / NUM_TESTS, mean_cluster_size = ctx->rows[i].cluster_size / NUM_TESTS, - mean_empty = 1.0 * ctx->rows[i].num_empty_clusters / NUM_TESTS, - score = km_score(mean_distance, mean_empty); + mean_empty = 1.0 * ctx->rows[i].num_empty / NUM_TESTS, + score = ctx->rows[i].silouette / NUM_TESTS; // print result - fprintf(fh, "%zu,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f\n", + fprintf(fh, "%zu,%0.3f,%0.3f,%0.3f,%0.3f\n", num_clusters, score, mean_distance, - mean_variance, mean_cluster_size, mean_empty ); @@ -242,7 +239,7 @@ ctx_csv_print( FILE * const fh ) { // print headers - fprintf(fh, "#,score,distance,variance,cluster_size,empty_clusters\n"); + fprintf(fh, "#,score,distance,size,empty\n"); // print rows for (size_t i = 0; i < MAX_CLUSTERS - 2; i++) { @@ -260,12 +257,13 @@ save_on_best( const float score, void * const cb_data ) { + const ctx_t * const ctx = cb_data; UNUSED(score); - UNUSED(cb_data); // convert rank to channel brightness const uint8_t ch = 0x33 + (0xff - 0x33) * (1.0 * rank + 1) / (MAX_BEST); - const uint32_t color = (ch & 0xff) << 16; + const uint8_t shift = (rank == MIN(ctx->num_best, MAX_BEST) - 1) ? 8 : 16; + const uint32_t color = (ch & 0xff) << shift; // const uint32_t color = 0xff0000; // D("rank = %zu, score = %0.3f, size = %zu, color = %06x", rank, score, set->num_rows, color); @@ -289,7 +287,7 @@ ctx_save_png( } // draw best cluster points - ctx_best_each(ctx, save_on_best, NULL); + ctx_best_each(ctx, save_on_best, (void*) ctx); // save png if (!stbi_write_png(png_path, IM_WIDTH, IM_HEIGHT, 3, im_data, IM_STRIDE)) { |