diff options
Diffstat (limited to 'km-find.c')
-rw-r--r-- | km-find.c | 54 |
1 files changed, 9 insertions, 45 deletions
@@ -7,31 +7,11 @@ typedef struct { float sum, - mean_sum, - variance_sum; - size_t num_empty_clusters; + silouette; } find_solve_data_t; static float -find_get_mean_distance( - const find_solve_data_t * const d, - const size_t num_clusters -) { - const bool num_filled = num_clusters - d->num_empty_clusters; - return num_filled ? (d->mean_sum / num_filled) : 0; -} - -static float -find_get_mean_variance( - const find_solve_data_t * const d, - const size_t num_clusters -) { - const bool num_filled = num_clusters - d->num_empty_clusters; - return num_filled ? (d->variance_sum / num_filled) : 0; -} - -static float -find_get_mean_cluster_size( +get_mean_cluster_size( const km_set_t * const set ) { float sum = 0; @@ -56,21 +36,9 @@ find_solve_on_stats( find_solve_data_t * const solve_data = cb_data; UNUSED(set); - // save total sum + // save total sum and silouette solve_data->sum = stats->sum; - - // calculate numerator for the average distance across all clusters in - // this test - for (size_t i = 0; i < stats->num_clusters; i++) { - if (fabsf(stats->means[i]) > MIN_CLUSTER_DISTANCE) { - // increment mean count - solve_data->mean_sum += stats->means[i]; - solve_data->variance_sum += stats->variances[i]; - } else { - // increment empty cluster count - solve_data->num_empty_clusters++; - } - } + solve_data->silouette = stats->silouette; } static const km_solve_cbs_t @@ -111,10 +79,8 @@ km_find( // init solve data find_solve_data_t solve_data = { - .sum = 0, - .mean_sum = 0, - .variance_sum = 0, - .num_empty_clusters = 0, + .sum = 0, + .silouette = 0, }; // solve test @@ -128,17 +94,15 @@ km_find( .cluster_set = &cs, .num_clusters = i, .distance_sum = solve_data.sum, - .mean_distance = find_get_mean_distance(&solve_data, i), - .mean_variance = find_get_mean_variance(&solve_data, i), - .mean_cluster_size = find_get_mean_cluster_size(&cs), - .num_empty_clusters = solve_data.num_empty_clusters, + .silouette = solve_data.silouette, + .mean_cluster_size = get_mean_cluster_size(&cs), }; // emit result cbs->on_data(&result, cb_data); // score result - float score = km_score(result.distance_sum, result.num_empty_clusters); + const float score = solve_data.silouette; if (score > best_score) { // emit new best result |