aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-02-03 15:39:45 -0500
committerPaul Duncan <pabs@pablotron.org>2019-02-03 15:39:45 -0500
commita30b8be1ede707f417df6afa5736f6fb75124588 (patch)
treed87ad4273ce9e68b1b7a22ef088ff2fdd569a133
parentdf06fe6084b1c1673348050309a3a0cb99b6634f (diff)
downloadkmeans-a30b8be1ede707f417df6afa5736f6fb75124588.tar.bz2
kmeans-a30b8be1ede707f417df6afa5736f6fb75124588.zip
sort best, make png optional, clean up debug statements
-rw-r--r--main.c76
-rw-r--r--util.h1
2 files changed, 52 insertions, 25 deletions
diff --git a/main.c b/main.c
index 4d86b09..73d5655 100644
--- a/main.c
+++ b/main.c
@@ -21,6 +21,11 @@
#define IM_STRIDE (3 * IM_WIDTH)
typedef struct {
+ float score;
+ km_set_t set;
+} best_item_t;
+
+typedef struct {
km_rand_t rs;
struct {
@@ -30,31 +35,45 @@ typedef struct {
size_t num_empty_clusters;
} rows[MAX_CLUSTERS - 2];
- km_set_t best[MAX_BEST];
+ best_item_t best[MAX_BEST];
size_t num_best;
} find_t;
+static int
+best_score_cmp(
+ const void * const ap,
+ const void * const bp
+) {
+ const best_item_t * const a = ap;
+ const best_item_t * const b = bp;
+
+ return (a->score > b->score) ? -1 : 1;
+}
+
+static void
+find_sort_best(
+ find_t * const find_data
+) {
+ // sort best sets by ascending score (worst to best)
+ qsort(
+ find_data->best,
+ (find_data->num_best % MAX_BEST),
+ sizeof(best_item_t),
+ best_score_cmp
+ );
+}
+
static void
find_each_best(
const find_t * const find_data,
- void (*on_best)(const km_set_t * const, const size_t, void *),
+ void (*on_best)(const km_set_t * const, const size_t, const float, void *),
void * const cb_data
) {
- // if the number of best sets is greater than MAX_BEST, then
- // find_data->best is actually a ring buffer
- const bool is_ring = find_data->num_best >= MAX_BEST;
-
- if (!on_best) {
- return;
- }
-
- // walk best sets
- for (size_t i = 0; i < MAX_BEST; i++) {
- // calculate set offset
- const size_t ofs = i + (is_ring ? (find_data->num_best + 1) : 0);
-
- // emit set
- on_best(find_data->best + (ofs % MAX_BEST), i, cb_data);
+ if (on_best) {
+ // walk best sets and emit each one
+ for (size_t i = 0; i < MIN(find_data->num_best, MAX_BEST); i++) {
+ on_best(&(find_data->best[i].set), i, find_data->best[i].score, cb_data);
+ }
}
}
@@ -65,7 +84,7 @@ load_on_shape(
) {
km_set_t * const set = cb_data;
- fprintf(stderr, "DEBUG: shape = { %zu, %zu }\n", shape->num_floats, shape->num_ints);
+ D("shape: %zu floats, %zu ints", shape->num_floats, shape->num_ints);
// init set
if (!km_set_init(set, shape, 100)) {
@@ -150,13 +169,13 @@ find_on_best(
const km_set_t * const cs,
void *cb_data
) {
- find_t *find_data = cb_data;
+ find_t * const find_data = cb_data;
D("best score = %0.3f, num_clusters = %zu", score, cs->num_rows);
// get pointer to destination set
// (note: data->best is a ring buffer)
- km_set_t *dst = find_data->best + (find_data->num_best % MAX_BEST);
+ km_set_t *dst = &(find_data->best[find_data->num_best % MAX_BEST].set);
if (find_data->num_best >= MAX_BEST) {
// finalize old best data set
@@ -247,15 +266,17 @@ static void
save_on_best(
const km_set_t * const set,
const size_t rank,
+ const float score,
void * const cb_data
) {
UNUSED(cb_data);
+
// convert rank to channel brightness
const uint8_t ch = 0x66 + (0xff - 0x66) * (1.0 * rank) / (MAX_BEST - 1);
const uint32_t color = (ch & 0xff) << 16;
// const uint32_t color = 0xff0000;
- D("rank = %zu, color = %u", rank, color);
+ D("rank = %zu, score = %0.3f, size = %zu, color = %06x", rank, score, set->num_rows, color);
// draw clusters
km_set_draw(set, im_data, IM_WIDTH, IM_HEIGHT, 3, color);
@@ -288,7 +309,7 @@ save_png(
int main(int argc, char *argv[]) {
// check command-line
if (argc < 2) {
- fprintf(stderr, "Usage: %s <data>\n", argv[0]);
+ fprintf(stderr, "Usage: %s <data_path> <png_path>\n", argv[0]);
return EXIT_FAILURE;
}
@@ -310,7 +331,7 @@ int main(int argc, char *argv[]) {
die("km_set_normalize() failed");
}
- // find best solution
+ // find best solutions
if (!km_find(&set, &FIND_CBS, &find_data)) {
die("km_find()");
}
@@ -318,8 +339,13 @@ int main(int argc, char *argv[]) {
// print csv
print_csv(&find_data);
- // save png of normalized data set and best clusters
- save_png("data.png", &set, &find_data);
+ // sort best results from lowest to highest
+ find_sort_best(&find_data);
+
+ if (argc > 2) {
+ // save png of normalized data set and best clusters
+ save_png(argv[2], &set, &find_data);
+ }
// finalize data set
km_set_fini(&set);
diff --git a/util.h b/util.h
index eeda5a7..cef238c 100644
--- a/util.h
+++ b/util.h
@@ -4,6 +4,7 @@
#include <stdio.h> // fprintf()
#include <stdlib.h> // exit()
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define UNUSED(a) ((void) (a))