diff options
author | Paul Duncan <pabs@pablotron.org> | 2019-02-03 04:56:01 -0500 |
---|---|---|
committer | Paul Duncan <pabs@pablotron.org> | 2019-02-03 04:56:01 -0500 |
commit | b3d3cebbc3dae2c975b9bb0f0c77f5dc845d7fb8 (patch) | |
tree | 00797946805b2eb24896bb6da7a1f4090f55fef9 /km.c | |
parent | 1e3dc2401263a436a32377e207352ffbe6984e8f (diff) | |
download | kmeans-b3d3cebbc3dae2c975b9bb0f0c77f5dc845d7fb8.tar.bz2 kmeans-b3d3cebbc3dae2c975b9bb0f0c77f5dc845d7fb8.zip |
mv km{,-set}.c
Diffstat (limited to 'km.c')
-rw-r--r-- | km.c | 249 |
1 files changed, 0 insertions, 249 deletions
@@ -1,249 +0,0 @@ -#include <stdbool.h> // bool -#include <stdint.h> // size_t -#include <string.h> // memcpy() -#include <stdlib.h> // rand() -#include "util.h" -#include "km.h" - -#define MIN_ROWS (4096 / sizeof(float)) - -// grow data set -static bool -km_set_grow( - km_set_t * const set, - const size_t capacity -) { - // alloc floats - const size_t num_floats = set->shape.num_floats * capacity; - float * const floats = malloc(sizeof(float) * num_floats); - if (!floats) { - // return failure - return false; - } - - // alloc ints - const size_t num_ints = set->shape.num_ints * capacity; - int * const ints = malloc(sizeof(int) * num_ints); - if (!ints) { - // return failure - return false; - } - - // update set - set->floats = floats; - set->ints = ints; - set->capacity = capacity; - - // return success - return true; -} - -// init data set with shape and initial size -bool -km_set_init( - km_set_t * const set, - const km_shape_t * const shape, - const size_t row_capacity -) { - // alloc bounds - float * const bounds = malloc(2 * sizeof(float) * shape->num_floats); - if (!bounds) { - // return error - return false; - } - - set->state = KM_SET_STATE_INIT; - set->floats = NULL; - set->ints = NULL; - set->shape = *shape; - set->num_rows = 0; - set->capacity = 0; - set->bounds = bounds; - - return km_set_grow(set, MAX(MIN_ROWS, row_capacity + 1)); -} - -// finalize data set -void -km_set_fini(km_set_t * const set) { - if (set->state == KM_SET_STATE_FINI) { - return; - } - - if (set->bounds) { - // free bounds - free(set->bounds); - set->bounds = NULL; - } - - if (set->floats) { - // free floats - free(set->floats); - set->floats = NULL; - } - - if (set->ints) { - // free ints - free(set->ints); - set->ints = NULL; - } - - // shrink capacity - set->capacity = 0; - - // set state - set->state = KM_SET_STATE_FINI; -} - -// append rows to data set, growing set if necessary -bool -km_set_push( - km_set_t * const set, - const size_t num_rows, - const float * const floats, - const int * const ints -) { - // check state - if (set->state != KM_SET_STATE_INIT) { - // return failure - return false; - } - - const size_t capacity_needed = set->num_rows + num_rows; - // FIXME: potential overflow here - if (capacity_needed >= set->capacity) { - // crappy growth algorithm - const size_t new_capacity = 2 * capacity_needed + 1; - - // resize storage - if (!km_set_grow(set, MAX(MIN_ROWS, new_capacity))) { - return false; - } - } - - // copy floats - const size_t num_floats = set->shape.num_floats; - if (num_floats > 0) { - float * const dst = set->floats + num_floats * set->num_rows; - const size_t stride = sizeof(float) * num_floats; - - // copy floats - memcpy(dst, floats, stride * num_rows); - - if (!set->num_rows) { - // there were no rows, so populate bounds with first row - memcpy(set->bounds, floats, stride); - memcpy(set->bounds + num_floats, floats, stride); - } - - for (size_t i = 0; i < num_rows; i++) { - for (size_t j = 0; j < num_floats; j++) { - const float val = floats[i * num_floats + j]; - - if (val < set->bounds[j]) { - // update min bound - set->bounds[j] = val; - } - - if (val > set->bounds[num_floats + j]) { - // update max bound - set->bounds[num_floats + j] = val; - } - } - } - } - - // copy ints - const size_t num_ints = set->shape.num_ints; - if (num_ints > 0) { - int * const dst = set->ints + num_ints * set->num_rows; - const size_t stride = sizeof(int) * num_ints; - - // copy ints - memcpy(dst, ints, stride * num_rows); - } - - // increment row count - set->num_rows += num_rows; - - // return success - return true; -} - -bool -km_set_normalize( - km_set_t * const set -) { - const size_t num_floats = set->shape.num_floats; - - // check set state - if (set->state != KM_SET_STATE_INIT) { - // return failure - return false; - } - - // normalize values - for (size_t i = 0; i < set->num_rows; i++) { - for (size_t j = 0; j < num_floats; j++) { - const size_t ofs = i * num_floats + j; - const float val = set->floats[ofs], - min = set->bounds[j], - max = set->bounds[num_floats + j]; - - // normalize and write value - set->floats[ofs] = (val - min) / (max - min); - } - } - - // set state - set->state = KM_SET_STATE_NORMALIZED; - - // return success - return true; -} - -// get row from data set -float * -km_set_get_row( - const km_set_t * const set, - const size_t i -) { - const size_t num_floats = set->shape.num_floats; - return set->floats + i * num_floats; -} - -// init a set with num_clusters clusters of shape num_floats by picking -// random cluster centers -bool -km_set_init_rand_clusters( - km_set_t * const cs, - const size_t num_floats, - const size_t num_clusters, - km_rand_src_t * const rs -) { - // init cluster shape - const km_shape_t shape = { - .num_floats = num_floats, - .num_ints = 1, - }; - - // generate random cluster centers - float floats[num_floats * num_clusters]; - if (!km_rand_src_fill(rs, num_floats * num_clusters, floats)) { - // return failure - return false; - } - - // FIXME: should probably be heap-allocated - int ints[num_clusters]; - memset(ints, 0, sizeof(ints)); - - // init cluster set - if (!km_set_init(cs, &shape, num_clusters)) { - // return failure - return false; - } - - // add data, return result - return km_set_push(cs, num_clusters, floats, ints); -} |