aboutsummaryrefslogtreecommitdiff
path: root/km-find.c
diff options
context:
space:
mode:
Diffstat (limited to 'km-find.c')
-rw-r--r--km-find.c54
1 files changed, 9 insertions, 45 deletions
diff --git a/km-find.c b/km-find.c
index 7cd24f6..3716123 100644
--- a/km-find.c
+++ b/km-find.c
@@ -7,31 +7,11 @@
typedef struct {
float sum,
- mean_sum,
- variance_sum;
- size_t num_empty_clusters;
+ silouette;
} find_solve_data_t;
static float
-find_get_mean_distance(
- const find_solve_data_t * const d,
- const size_t num_clusters
-) {
- const bool num_filled = num_clusters - d->num_empty_clusters;
- return num_filled ? (d->mean_sum / num_filled) : 0;
-}
-
-static float
-find_get_mean_variance(
- const find_solve_data_t * const d,
- const size_t num_clusters
-) {
- const bool num_filled = num_clusters - d->num_empty_clusters;
- return num_filled ? (d->variance_sum / num_filled) : 0;
-}
-
-static float
-find_get_mean_cluster_size(
+get_mean_cluster_size(
const km_set_t * const set
) {
float sum = 0;
@@ -56,21 +36,9 @@ find_solve_on_stats(
find_solve_data_t * const solve_data = cb_data;
UNUSED(set);
- // save total sum
+ // save total sum and silouette
solve_data->sum = stats->sum;
-
- // calculate numerator for the average distance across all clusters in
- // this test
- for (size_t i = 0; i < stats->num_clusters; i++) {
- if (fabsf(stats->means[i]) > MIN_CLUSTER_DISTANCE) {
- // increment mean count
- solve_data->mean_sum += stats->means[i];
- solve_data->variance_sum += stats->variances[i];
- } else {
- // increment empty cluster count
- solve_data->num_empty_clusters++;
- }
- }
+ solve_data->silouette = stats->silouette;
}
static const km_solve_cbs_t
@@ -111,10 +79,8 @@ km_find(
// init solve data
find_solve_data_t solve_data = {
- .sum = 0,
- .mean_sum = 0,
- .variance_sum = 0,
- .num_empty_clusters = 0,
+ .sum = 0,
+ .silouette = 0,
};
// solve test
@@ -128,17 +94,15 @@ km_find(
.cluster_set = &cs,
.num_clusters = i,
.distance_sum = solve_data.sum,
- .mean_distance = find_get_mean_distance(&solve_data, i),
- .mean_variance = find_get_mean_variance(&solve_data, i),
- .mean_cluster_size = find_get_mean_cluster_size(&cs),
- .num_empty_clusters = solve_data.num_empty_clusters,
+ .silouette = solve_data.silouette,
+ .mean_cluster_size = get_mean_cluster_size(&cs),
};
// emit result
cbs->on_data(&result, cb_data);
// score result
- float score = km_score(result.distance_sum, result.num_empty_clusters);
+ const float score = solve_data.silouette;
if (score > best_score) {
// emit new best result