diff options
Diffstat (limited to 'km-find.c')
-rw-r--r-- | km-find.c | 16 |
1 files changed, 11 insertions, 5 deletions
@@ -6,7 +6,8 @@ #define MIN_CLUSTER_DISTANCE 0.0001 typedef struct { - float distance_sum, + float sum, + mean_sum, variance_sum; size_t num_empty_clusters; } find_solve_data_t; @@ -17,7 +18,7 @@ find_get_mean_distance( const size_t num_clusters ) { const bool num_filled = num_clusters - d->num_empty_clusters; - return num_filled ? (d->distance_sum / num_filled) : 0; + return num_filled ? (d->mean_sum / num_filled) : 0; } static float @@ -55,12 +56,15 @@ find_solve_on_stats( find_solve_data_t * const solve_data = cb_data; UNUSED(set); + // save total sum + solve_data->sum = stats->sum; + // calculate numerator for the average distance across all clusters in // this test for (size_t i = 0; i < stats->num_clusters; i++) { if (fabsf(stats->means[i]) > MIN_CLUSTER_DISTANCE) { // increment mean count - solve_data->distance_sum += stats->means[i]; + solve_data->mean_sum += stats->means[i]; solve_data->variance_sum += stats->variances[i]; } else { // increment empty cluster count @@ -107,7 +111,8 @@ km_find( // init solve data find_solve_data_t solve_data = { - .distance_sum = 0, + .sum = 0, + .mean_sum = 0, .variance_sum = 0, .num_empty_clusters = 0, }; @@ -122,6 +127,7 @@ km_find( const km_find_data_t result = { .cluster_set = &cs, .num_clusters = i, + .distance_sum = solve_data.sum, .mean_distance = find_get_mean_distance(&solve_data, i), .mean_variance = find_get_mean_variance(&solve_data, i), .mean_cluster_size = find_get_mean_cluster_size(&cs), @@ -132,7 +138,7 @@ km_find( cbs->on_data(&result, cb_data); // score result - float score = km_score(result.mean_distance, result.num_empty_clusters); + float score = km_score(result.distance_sum, result.num_empty_clusters); if (score > best_score) { // emit new best result |