aboutsummaryrefslogtreecommitdiff
path: root/km.c
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-02-03 04:56:01 -0500
committerPaul Duncan <pabs@pablotron.org>2019-02-03 04:56:01 -0500
commitb3d3cebbc3dae2c975b9bb0f0c77f5dc845d7fb8 (patch)
tree00797946805b2eb24896bb6da7a1f4090f55fef9 /km.c
parent1e3dc2401263a436a32377e207352ffbe6984e8f (diff)
downloadkmeans-b3d3cebbc3dae2c975b9bb0f0c77f5dc845d7fb8.tar.bz2
kmeans-b3d3cebbc3dae2c975b9bb0f0c77f5dc845d7fb8.zip
mv km{,-set}.c
Diffstat (limited to 'km.c')
-rw-r--r--km.c249
1 files changed, 0 insertions, 249 deletions
diff --git a/km.c b/km.c
deleted file mode 100644
index 5b9ae4c..0000000
--- a/km.c
+++ /dev/null
@@ -1,249 +0,0 @@
-#include <stdbool.h> // bool
-#include <stdint.h> // size_t
-#include <string.h> // memcpy()
-#include <stdlib.h> // rand()
-#include "util.h"
-#include "km.h"
-
-#define MIN_ROWS (4096 / sizeof(float))
-
-// grow data set
-static bool
-km_set_grow(
- km_set_t * const set,
- const size_t capacity
-) {
- // alloc floats
- const size_t num_floats = set->shape.num_floats * capacity;
- float * const floats = malloc(sizeof(float) * num_floats);
- if (!floats) {
- // return failure
- return false;
- }
-
- // alloc ints
- const size_t num_ints = set->shape.num_ints * capacity;
- int * const ints = malloc(sizeof(int) * num_ints);
- if (!ints) {
- // return failure
- return false;
- }
-
- // update set
- set->floats = floats;
- set->ints = ints;
- set->capacity = capacity;
-
- // return success
- return true;
-}
-
-// init data set with shape and initial size
-bool
-km_set_init(
- km_set_t * const set,
- const km_shape_t * const shape,
- const size_t row_capacity
-) {
- // alloc bounds
- float * const bounds = malloc(2 * sizeof(float) * shape->num_floats);
- if (!bounds) {
- // return error
- return false;
- }
-
- set->state = KM_SET_STATE_INIT;
- set->floats = NULL;
- set->ints = NULL;
- set->shape = *shape;
- set->num_rows = 0;
- set->capacity = 0;
- set->bounds = bounds;
-
- return km_set_grow(set, MAX(MIN_ROWS, row_capacity + 1));
-}
-
-// finalize data set
-void
-km_set_fini(km_set_t * const set) {
- if (set->state == KM_SET_STATE_FINI) {
- return;
- }
-
- if (set->bounds) {
- // free bounds
- free(set->bounds);
- set->bounds = NULL;
- }
-
- if (set->floats) {
- // free floats
- free(set->floats);
- set->floats = NULL;
- }
-
- if (set->ints) {
- // free ints
- free(set->ints);
- set->ints = NULL;
- }
-
- // shrink capacity
- set->capacity = 0;
-
- // set state
- set->state = KM_SET_STATE_FINI;
-}
-
-// append rows to data set, growing set if necessary
-bool
-km_set_push(
- km_set_t * const set,
- const size_t num_rows,
- const float * const floats,
- const int * const ints
-) {
- // check state
- if (set->state != KM_SET_STATE_INIT) {
- // return failure
- return false;
- }
-
- const size_t capacity_needed = set->num_rows + num_rows;
- // FIXME: potential overflow here
- if (capacity_needed >= set->capacity) {
- // crappy growth algorithm
- const size_t new_capacity = 2 * capacity_needed + 1;
-
- // resize storage
- if (!km_set_grow(set, MAX(MIN_ROWS, new_capacity))) {
- return false;
- }
- }
-
- // copy floats
- const size_t num_floats = set->shape.num_floats;
- if (num_floats > 0) {
- float * const dst = set->floats + num_floats * set->num_rows;
- const size_t stride = sizeof(float) * num_floats;
-
- // copy floats
- memcpy(dst, floats, stride * num_rows);
-
- if (!set->num_rows) {
- // there were no rows, so populate bounds with first row
- memcpy(set->bounds, floats, stride);
- memcpy(set->bounds + num_floats, floats, stride);
- }
-
- for (size_t i = 0; i < num_rows; i++) {
- for (size_t j = 0; j < num_floats; j++) {
- const float val = floats[i * num_floats + j];
-
- if (val < set->bounds[j]) {
- // update min bound
- set->bounds[j] = val;
- }
-
- if (val > set->bounds[num_floats + j]) {
- // update max bound
- set->bounds[num_floats + j] = val;
- }
- }
- }
- }
-
- // copy ints
- const size_t num_ints = set->shape.num_ints;
- if (num_ints > 0) {
- int * const dst = set->ints + num_ints * set->num_rows;
- const size_t stride = sizeof(int) * num_ints;
-
- // copy ints
- memcpy(dst, ints, stride * num_rows);
- }
-
- // increment row count
- set->num_rows += num_rows;
-
- // return success
- return true;
-}
-
-bool
-km_set_normalize(
- km_set_t * const set
-) {
- const size_t num_floats = set->shape.num_floats;
-
- // check set state
- if (set->state != KM_SET_STATE_INIT) {
- // return failure
- return false;
- }
-
- // normalize values
- for (size_t i = 0; i < set->num_rows; i++) {
- for (size_t j = 0; j < num_floats; j++) {
- const size_t ofs = i * num_floats + j;
- const float val = set->floats[ofs],
- min = set->bounds[j],
- max = set->bounds[num_floats + j];
-
- // normalize and write value
- set->floats[ofs] = (val - min) / (max - min);
- }
- }
-
- // set state
- set->state = KM_SET_STATE_NORMALIZED;
-
- // return success
- return true;
-}
-
-// get row from data set
-float *
-km_set_get_row(
- const km_set_t * const set,
- const size_t i
-) {
- const size_t num_floats = set->shape.num_floats;
- return set->floats + i * num_floats;
-}
-
-// init a set with num_clusters clusters of shape num_floats by picking
-// random cluster centers
-bool
-km_set_init_rand_clusters(
- km_set_t * const cs,
- const size_t num_floats,
- const size_t num_clusters,
- km_rand_src_t * const rs
-) {
- // init cluster shape
- const km_shape_t shape = {
- .num_floats = num_floats,
- .num_ints = 1,
- };
-
- // generate random cluster centers
- float floats[num_floats * num_clusters];
- if (!km_rand_src_fill(rs, num_floats * num_clusters, floats)) {
- // return failure
- return false;
- }
-
- // FIXME: should probably be heap-allocated
- int ints[num_clusters];
- memset(ints, 0, sizeof(ints));
-
- // init cluster set
- if (!km_set_init(cs, &shape, num_clusters)) {
- // return failure
- return false;
- }
-
- // add data, return result
- return km_set_push(cs, num_clusters, floats, ints);
-}