aboutsummaryrefslogtreecommitdiff
path: root/src/km-set.c
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-02-05 00:22:15 -0500
committerPaul Duncan <pabs@pablotron.org>2019-02-05 00:22:15 -0500
commitf557d1f49a2914c6084dd18efc783395228d8ce0 (patch)
tree51111fc899f01956cbab948b87c241478576870d /src/km-set.c
parentb5065ea43cb13c0b553874305b53963176c70f59 (diff)
downloadkmeans-f557d1f49a2914c6084dd18efc783395228d8ce0.tar.bz2
kmeans-f557d1f49a2914c6084dd18efc783395228d8ce0.zip
mv *.[hc] src/
Diffstat (limited to 'src/km-set.c')
-rw-r--r--src/km-set.c267
1 files changed, 267 insertions, 0 deletions
diff --git a/src/km-set.c b/src/km-set.c
new file mode 100644
index 0000000..69ae6c0
--- /dev/null
+++ b/src/km-set.c
@@ -0,0 +1,267 @@
+#include <stdbool.h> // bool
+#include <stdint.h> // size_t
+#include <string.h> // memcpy()
+#include <stdlib.h> // rand()
+#include <errno.h> // errno
+#include "util.h"
+#include "km.h"
+
+#define MIN_ROWS (4096 / sizeof(float))
+
+// grow data set
+static bool
+km_set_grow(
+ km_set_t * const set,
+ const size_t capacity
+) {
+ float *floats = NULL;
+ const size_t floats_size = sizeof(float) * set->shape.num_floats * capacity;
+ // fprintf(stderr, "floats_size = %zu\n", floats_size);
+ if (floats_size > 0) {
+ // alloc floats
+ floats = realloc(set->floats, floats_size);
+ if (!floats) {
+ // return failure
+ return false;
+ }
+ }
+
+ int *ints = NULL;
+ const size_t ints_size = sizeof(int) * set->shape.num_ints * capacity;
+ // fprintf(stderr, "ints_size = %zu\n", ints_size);
+ if (ints_size > 0) {
+ // alloc ints
+ ints = realloc(set->ints, ints_size);
+ if (!ints) {
+ // return failure
+ return false;
+ }
+ }
+
+ // update set
+ set->floats = floats;
+ set->ints = ints;
+ set->capacity = capacity;
+
+ // return success
+ return true;
+}
+
+// init data set with shape and initial size
+bool
+km_set_init(
+ km_set_t * const set,
+ const km_shape_t * const shape,
+ const size_t row_capacity
+) {
+ // alloc bounds
+ float * const bounds = malloc(2 * sizeof(float) * shape->num_floats);
+ if (!bounds) {
+ // return error
+ return false;
+ }
+
+ set->state = KM_SET_STATE_INIT;
+ set->floats = NULL;
+ set->ints = NULL;
+ set->shape = *shape;
+ set->num_rows = 0;
+ set->capacity = 0;
+ set->bounds = bounds;
+
+ return km_set_grow(set, MAX(MIN_ROWS, row_capacity + 1));
+}
+
+// finalize data set
+void
+km_set_fini(km_set_t * const set) {
+ if (set->state == KM_SET_STATE_FINI) {
+ return;
+ }
+
+ if (set->bounds) {
+ // free bounds
+ free(set->bounds);
+ set->bounds = NULL;
+ }
+
+ if (set->floats) {
+ // free floats
+ free(set->floats);
+ set->floats = NULL;
+ }
+
+ if (set->ints) {
+ // free ints
+ free(set->ints);
+ set->ints = NULL;
+ }
+
+ // shrink capacity
+ set->capacity = 0;
+
+ // set state
+ set->state = KM_SET_STATE_FINI;
+}
+
+// append rows to data set, growing set if necessary
+bool
+km_set_push(
+ km_set_t * const set,
+ const size_t num_rows,
+ const float * const floats,
+ const int * const ints
+) {
+ // check state
+ if (set->state != KM_SET_STATE_INIT) {
+ // return failure
+ return false;
+ }
+
+ const size_t capacity_needed = set->num_rows + num_rows;
+ // FIXME: potential overflow here
+ if (capacity_needed >= set->capacity) {
+ // crappy growth algorithm
+ const size_t new_capacity = 2 * capacity_needed + 1;
+
+ // resize storage
+ if (!km_set_grow(set, MAX(MIN_ROWS, new_capacity))) {
+ return false;
+ }
+ }
+
+ // copy floats
+ const size_t num_floats = set->shape.num_floats;
+ if (num_floats > 0) {
+ float * const dst = set->floats + num_floats * set->num_rows;
+ const size_t stride = sizeof(float) * num_floats;
+
+ // copy floats
+ memcpy(dst, floats, stride * num_rows);
+
+ if (!set->num_rows) {
+ // there were no rows, so populate bounds with first row
+ memcpy(set->bounds, floats, stride);
+ memcpy(set->bounds + num_floats, floats, stride);
+ }
+
+ for (size_t i = 0; i < num_rows; i++) {
+ for (size_t j = 0; j < num_floats; j++) {
+ const float val = floats[i * num_floats + j];
+
+ if (val < set->bounds[j]) {
+ // update min bound
+ set->bounds[j] = val;
+ }
+
+ if (val > set->bounds[num_floats + j]) {
+ // update max bound
+ set->bounds[num_floats + j] = val;
+ }
+ }
+ }
+ }
+
+ // copy ints
+ const size_t num_ints = set->shape.num_ints;
+ if (num_ints > 0) {
+ int * const dst = set->ints + num_ints * set->num_rows;
+ const size_t stride = sizeof(int) * num_ints;
+
+ // copy ints
+ memcpy(dst, ints, stride * num_rows);
+ }
+
+ // increment row count
+ set->num_rows += num_rows;
+
+ // return success
+ return true;
+}
+
+bool
+km_set_copy(
+ km_set_t * const dst,
+ const km_set_t * const src
+) {
+ if (src->state != KM_SET_STATE_INIT && src->state != KM_SET_STATE_NORMALIZED) {
+ // return failure
+ D("invalid state");
+ return false;
+ }
+
+ // init dst set
+ if (!km_set_init(dst, &(src->shape), src->num_rows)) {
+ // return failure
+ D("km_set_init()");
+ return false;
+ }
+
+ // copy floats
+ if (src->shape.num_floats > 0) {
+ const size_t stride = sizeof(float) * src->shape.num_floats;
+
+ // copy floats
+ memcpy(dst->floats, src->floats, stride * src->num_rows);
+
+ // copy bounds
+ memcpy(dst->bounds, src->bounds, 2 * stride);
+ }
+
+ // copy ints
+ const size_t num_ints = src->shape.num_ints;
+ if (num_ints > 0) {
+ const size_t stride = sizeof(int) * num_ints;
+
+ // copy ints
+ memcpy(dst->ints, src->ints, stride * src->num_rows);
+ }
+
+ // increment row count
+ dst->num_rows = src->num_rows;
+
+ // return success
+ return true;
+}
+
+bool
+km_set_normalize(
+ km_set_t * const set
+) {
+ const size_t num_floats = set->shape.num_floats;
+
+ // check set state
+ if (set->state != KM_SET_STATE_INIT) {
+ // return failure
+ return false;
+ }
+
+ // normalize values
+ for (size_t i = 0; i < set->num_rows; i++) {
+ for (size_t j = 0; j < num_floats; j++) {
+ const size_t ofs = i * num_floats + j;
+ const float val = set->floats[ofs],
+ min = set->bounds[j],
+ max = set->bounds[num_floats + j];
+
+ // normalize and write value
+ set->floats[ofs] = (val - min) / (max - min);
+ }
+ }
+
+ // set state
+ set->state = KM_SET_STATE_NORMALIZED;
+
+ // return success
+ return true;
+}
+
+// get row from data set
+float *
+km_set_get_row(
+ const km_set_t * const set,
+ const size_t i
+) {
+ const size_t num_floats = set->shape.num_floats;
+ return set->floats + i * num_floats;
+}