diff options
author | Paul Duncan <pabs@pablotron.org> | 2019-02-05 00:22:15 -0500 |
---|---|---|
committer | Paul Duncan <pabs@pablotron.org> | 2019-02-05 00:22:15 -0500 |
commit | f557d1f49a2914c6084dd18efc783395228d8ce0 (patch) | |
tree | 51111fc899f01956cbab948b87c241478576870d /src/km-set.c | |
parent | b5065ea43cb13c0b553874305b53963176c70f59 (diff) | |
download | kmeans-f557d1f49a2914c6084dd18efc783395228d8ce0.tar.bz2 kmeans-f557d1f49a2914c6084dd18efc783395228d8ce0.zip |
mv *.[hc] src/
Diffstat (limited to 'src/km-set.c')
-rw-r--r-- | src/km-set.c | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/src/km-set.c b/src/km-set.c new file mode 100644 index 0000000..69ae6c0 --- /dev/null +++ b/src/km-set.c @@ -0,0 +1,267 @@ +#include <stdbool.h> // bool +#include <stdint.h> // size_t +#include <string.h> // memcpy() +#include <stdlib.h> // rand() +#include <errno.h> // errno +#include "util.h" +#include "km.h" + +#define MIN_ROWS (4096 / sizeof(float)) + +// grow data set +static bool +km_set_grow( + km_set_t * const set, + const size_t capacity +) { + float *floats = NULL; + const size_t floats_size = sizeof(float) * set->shape.num_floats * capacity; + // fprintf(stderr, "floats_size = %zu\n", floats_size); + if (floats_size > 0) { + // alloc floats + floats = realloc(set->floats, floats_size); + if (!floats) { + // return failure + return false; + } + } + + int *ints = NULL; + const size_t ints_size = sizeof(int) * set->shape.num_ints * capacity; + // fprintf(stderr, "ints_size = %zu\n", ints_size); + if (ints_size > 0) { + // alloc ints + ints = realloc(set->ints, ints_size); + if (!ints) { + // return failure + return false; + } + } + + // update set + set->floats = floats; + set->ints = ints; + set->capacity = capacity; + + // return success + return true; +} + +// init data set with shape and initial size +bool +km_set_init( + km_set_t * const set, + const km_shape_t * const shape, + const size_t row_capacity +) { + // alloc bounds + float * const bounds = malloc(2 * sizeof(float) * shape->num_floats); + if (!bounds) { + // return error + return false; + } + + set->state = KM_SET_STATE_INIT; + set->floats = NULL; + set->ints = NULL; + set->shape = *shape; + set->num_rows = 0; + set->capacity = 0; + set->bounds = bounds; + + return km_set_grow(set, MAX(MIN_ROWS, row_capacity + 1)); +} + +// finalize data set +void +km_set_fini(km_set_t * const set) { + if (set->state == KM_SET_STATE_FINI) { + return; + } + + if (set->bounds) { + // free bounds + free(set->bounds); + set->bounds = NULL; + } + + if (set->floats) { + // free floats + free(set->floats); + set->floats = NULL; + } + + if (set->ints) { + // free ints + free(set->ints); + set->ints = NULL; + } + + // shrink capacity + set->capacity = 0; + + // set state + set->state = KM_SET_STATE_FINI; +} + +// append rows to data set, growing set if necessary +bool +km_set_push( + km_set_t * const set, + const size_t num_rows, + const float * const floats, + const int * const ints +) { + // check state + if (set->state != KM_SET_STATE_INIT) { + // return failure + return false; + } + + const size_t capacity_needed = set->num_rows + num_rows; + // FIXME: potential overflow here + if (capacity_needed >= set->capacity) { + // crappy growth algorithm + const size_t new_capacity = 2 * capacity_needed + 1; + + // resize storage + if (!km_set_grow(set, MAX(MIN_ROWS, new_capacity))) { + return false; + } + } + + // copy floats + const size_t num_floats = set->shape.num_floats; + if (num_floats > 0) { + float * const dst = set->floats + num_floats * set->num_rows; + const size_t stride = sizeof(float) * num_floats; + + // copy floats + memcpy(dst, floats, stride * num_rows); + + if (!set->num_rows) { + // there were no rows, so populate bounds with first row + memcpy(set->bounds, floats, stride); + memcpy(set->bounds + num_floats, floats, stride); + } + + for (size_t i = 0; i < num_rows; i++) { + for (size_t j = 0; j < num_floats; j++) { + const float val = floats[i * num_floats + j]; + + if (val < set->bounds[j]) { + // update min bound + set->bounds[j] = val; + } + + if (val > set->bounds[num_floats + j]) { + // update max bound + set->bounds[num_floats + j] = val; + } + } + } + } + + // copy ints + const size_t num_ints = set->shape.num_ints; + if (num_ints > 0) { + int * const dst = set->ints + num_ints * set->num_rows; + const size_t stride = sizeof(int) * num_ints; + + // copy ints + memcpy(dst, ints, stride * num_rows); + } + + // increment row count + set->num_rows += num_rows; + + // return success + return true; +} + +bool +km_set_copy( + km_set_t * const dst, + const km_set_t * const src +) { + if (src->state != KM_SET_STATE_INIT && src->state != KM_SET_STATE_NORMALIZED) { + // return failure + D("invalid state"); + return false; + } + + // init dst set + if (!km_set_init(dst, &(src->shape), src->num_rows)) { + // return failure + D("km_set_init()"); + return false; + } + + // copy floats + if (src->shape.num_floats > 0) { + const size_t stride = sizeof(float) * src->shape.num_floats; + + // copy floats + memcpy(dst->floats, src->floats, stride * src->num_rows); + + // copy bounds + memcpy(dst->bounds, src->bounds, 2 * stride); + } + + // copy ints + const size_t num_ints = src->shape.num_ints; + if (num_ints > 0) { + const size_t stride = sizeof(int) * num_ints; + + // copy ints + memcpy(dst->ints, src->ints, stride * src->num_rows); + } + + // increment row count + dst->num_rows = src->num_rows; + + // return success + return true; +} + +bool +km_set_normalize( + km_set_t * const set +) { + const size_t num_floats = set->shape.num_floats; + + // check set state + if (set->state != KM_SET_STATE_INIT) { + // return failure + return false; + } + + // normalize values + for (size_t i = 0; i < set->num_rows; i++) { + for (size_t j = 0; j < num_floats; j++) { + const size_t ofs = i * num_floats + j; + const float val = set->floats[ofs], + min = set->bounds[j], + max = set->bounds[num_floats + j]; + + // normalize and write value + set->floats[ofs] = (val - min) / (max - min); + } + } + + // set state + set->state = KM_SET_STATE_NORMALIZED; + + // return success + return true; +} + +// get row from data set +float * +km_set_get_row( + const km_set_t * const set, + const size_t i +) { + const size_t num_floats = set->shape.num_floats; + return set->floats + i * num_floats; +} |