aboutsummaryrefslogtreecommitdiff
path: root/main.c
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-02-04 22:43:47 -0500
committerPaul Duncan <pabs@pablotron.org>2019-02-04 22:43:47 -0500
commitdbebf993978def0134af9d0b98a9cdb028551e9b (patch)
treede5a688d1a8229443ee05f5077805881730689da /main.c
parentcb7823eaa631c43ed2f9620c30e9fbbbe574bd41 (diff)
downloadkmeans-dbebf993978def0134af9d0b98a9cdb028551e9b.tar.bz2
kmeans-dbebf993978def0134af9d0b98a9cdb028551e9b.zip
use silouette method for scoring
Diffstat (limited to 'main.c')
-rw-r--r--main.c32
1 files changed, 15 insertions, 17 deletions
diff --git a/main.c b/main.c
index 5629cd5..ed46af7 100644
--- a/main.c
+++ b/main.c
@@ -13,7 +13,7 @@
#include "km.h"
#define MAX_CLUSTERS 10
-#define NUM_TESTS 100
+#define NUM_TESTS 300
#define MAX_BEST 10
#define IM_WIDTH 128
@@ -34,9 +34,9 @@ typedef struct {
struct {
float distance,
- variance,
+ silouette,
cluster_size;
- size_t num_empty_clusters;
+ size_t num_empty;
} rows[MAX_CLUSTERS - 2];
// best clusters
@@ -62,7 +62,7 @@ ctx_best_sort(
// sort best sets by ascending score (worst to best)
qsort(
ctx->best,
- (ctx->num_best % MAX_BEST),
+ MIN(ctx->num_best, MAX_BEST),
sizeof(best_item_t),
best_score_cmp
);
@@ -162,10 +162,10 @@ find_on_data(
ctx_t * const ctx = cb_data;
const size_t ofs = data->num_clusters - 2;
- ctx->rows[ofs].distance += data->mean_distance;
- ctx->rows[ofs].variance += data->mean_variance;
+ ctx->rows[ofs].distance += data->distance_sum;
+ ctx->rows[ofs].silouette += data->silouette;
ctx->rows[ofs].cluster_size += data->mean_cluster_size;
- ctx->rows[ofs].num_empty_clusters += data->num_empty_clusters;
+ ctx->rows[ofs].num_empty += data->num_empty_clusters;
}
static bool
@@ -184,7 +184,6 @@ find_on_best(
if (ctx->num_best >= MAX_BEST) {
// finalize old best data set
- // D("finalizing old best %zu", ctx->num_best);
km_set_fini(dst);
}
@@ -220,17 +219,15 @@ ctx_csv_print_row(
) {
const size_t num_clusters = i + 2;
const float mean_distance = ctx->rows[i].distance / NUM_TESTS,
- mean_variance = ctx->rows[i].variance / NUM_TESTS,
mean_cluster_size = ctx->rows[i].cluster_size / NUM_TESTS,
- mean_empty = 1.0 * ctx->rows[i].num_empty_clusters / NUM_TESTS,
- score = km_score(mean_distance, mean_empty);
+ mean_empty = 1.0 * ctx->rows[i].num_empty / NUM_TESTS,
+ score = ctx->rows[i].silouette / NUM_TESTS;
// print result
- fprintf(fh, "%zu,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f\n",
+ fprintf(fh, "%zu,%0.3f,%0.3f,%0.3f,%0.3f\n",
num_clusters,
score,
mean_distance,
- mean_variance,
mean_cluster_size,
mean_empty
);
@@ -242,7 +239,7 @@ ctx_csv_print(
FILE * const fh
) {
// print headers
- fprintf(fh, "#,score,distance,variance,cluster_size,empty_clusters\n");
+ fprintf(fh, "#,score,distance,size,empty\n");
// print rows
for (size_t i = 0; i < MAX_CLUSTERS - 2; i++) {
@@ -260,12 +257,13 @@ save_on_best(
const float score,
void * const cb_data
) {
+ const ctx_t * const ctx = cb_data;
UNUSED(score);
- UNUSED(cb_data);
// convert rank to channel brightness
const uint8_t ch = 0x33 + (0xff - 0x33) * (1.0 * rank + 1) / (MAX_BEST);
- const uint32_t color = (ch & 0xff) << 16;
+ const uint8_t shift = (rank == MIN(ctx->num_best, MAX_BEST) - 1) ? 8 : 16;
+ const uint32_t color = (ch & 0xff) << shift;
// const uint32_t color = 0xff0000;
// D("rank = %zu, score = %0.3f, size = %zu, color = %06x", rank, score, set->num_rows, color);
@@ -289,7 +287,7 @@ ctx_save_png(
}
// draw best cluster points
- ctx_best_each(ctx, save_on_best, NULL);
+ ctx_best_each(ctx, save_on_best, (void*) ctx);
// save png
if (!stbi_write_png(png_path, IM_WIDTH, IM_HEIGHT, 3, im_data, IM_STRIDE)) {