From b3d3cebbc3dae2c975b9bb0f0c77f5dc845d7fb8 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sun, 3 Feb 2019 04:56:01 -0500 Subject: mv km{,-set}.c --- Makefile | 2 +- km-set.c | 249 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ km.c | 249 --------------------------------------------------------------- 3 files changed, 250 insertions(+), 250 deletions(-) create mode 100644 km-set.c delete mode 100644 km.c diff --git a/Makefile b/Makefile index 6ddfbc0..61e8f9e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ APP=km-test CFLAGS=-W -Wall -Wextra -pedantic -std=c11 -O2 -OBJS=km.o km-draw.o km-load.o km-find.o km-rand-src.o km-solve.o main.o +OBJS=km-set.o km-draw.o km-load.o km-find.o km-rand-src.o km-solve.o main.o LIBS=-lm .PHONY=all clean diff --git a/km-set.c b/km-set.c new file mode 100644 index 0000000..5b9ae4c --- /dev/null +++ b/km-set.c @@ -0,0 +1,249 @@ +#include // bool +#include // size_t +#include // memcpy() +#include // rand() +#include "util.h" +#include "km.h" + +#define MIN_ROWS (4096 / sizeof(float)) + +// grow data set +static bool +km_set_grow( + km_set_t * const set, + const size_t capacity +) { + // alloc floats + const size_t num_floats = set->shape.num_floats * capacity; + float * const floats = malloc(sizeof(float) * num_floats); + if (!floats) { + // return failure + return false; + } + + // alloc ints + const size_t num_ints = set->shape.num_ints * capacity; + int * const ints = malloc(sizeof(int) * num_ints); + if (!ints) { + // return failure + return false; + } + + // update set + set->floats = floats; + set->ints = ints; + set->capacity = capacity; + + // return success + return true; +} + +// init data set with shape and initial size +bool +km_set_init( + km_set_t * const set, + const km_shape_t * const shape, + const size_t row_capacity +) { + // alloc bounds + float * const bounds = malloc(2 * sizeof(float) * shape->num_floats); + if (!bounds) { + // return error + return false; + } + + set->state = KM_SET_STATE_INIT; + set->floats = NULL; + set->ints = NULL; + set->shape = *shape; + set->num_rows = 0; + set->capacity = 0; + set->bounds = bounds; + + return km_set_grow(set, MAX(MIN_ROWS, row_capacity + 1)); +} + +// finalize data set +void +km_set_fini(km_set_t * const set) { + if (set->state == KM_SET_STATE_FINI) { + return; + } + + if (set->bounds) { + // free bounds + free(set->bounds); + set->bounds = NULL; + } + + if (set->floats) { + // free floats + free(set->floats); + set->floats = NULL; + } + + if (set->ints) { + // free ints + free(set->ints); + set->ints = NULL; + } + + // shrink capacity + set->capacity = 0; + + // set state + set->state = KM_SET_STATE_FINI; +} + +// append rows to data set, growing set if necessary +bool +km_set_push( + km_set_t * const set, + const size_t num_rows, + const float * const floats, + const int * const ints +) { + // check state + if (set->state != KM_SET_STATE_INIT) { + // return failure + return false; + } + + const size_t capacity_needed = set->num_rows + num_rows; + // FIXME: potential overflow here + if (capacity_needed >= set->capacity) { + // crappy growth algorithm + const size_t new_capacity = 2 * capacity_needed + 1; + + // resize storage + if (!km_set_grow(set, MAX(MIN_ROWS, new_capacity))) { + return false; + } + } + + // copy floats + const size_t num_floats = set->shape.num_floats; + if (num_floats > 0) { + float * const dst = set->floats + num_floats * set->num_rows; + const size_t stride = sizeof(float) * num_floats; + + // copy floats + memcpy(dst, floats, stride * num_rows); + + if (!set->num_rows) { + // there were no rows, so populate bounds with first row + memcpy(set->bounds, floats, stride); + memcpy(set->bounds + num_floats, floats, stride); + } + + for (size_t i = 0; i < num_rows; i++) { + for (size_t j = 0; j < num_floats; j++) { + const float val = floats[i * num_floats + j]; + + if (val < set->bounds[j]) { + // update min bound + set->bounds[j] = val; + } + + if (val > set->bounds[num_floats + j]) { + // update max bound + set->bounds[num_floats + j] = val; + } + } + } + } + + // copy ints + const size_t num_ints = set->shape.num_ints; + if (num_ints > 0) { + int * const dst = set->ints + num_ints * set->num_rows; + const size_t stride = sizeof(int) * num_ints; + + // copy ints + memcpy(dst, ints, stride * num_rows); + } + + // increment row count + set->num_rows += num_rows; + + // return success + return true; +} + +bool +km_set_normalize( + km_set_t * const set +) { + const size_t num_floats = set->shape.num_floats; + + // check set state + if (set->state != KM_SET_STATE_INIT) { + // return failure + return false; + } + + // normalize values + for (size_t i = 0; i < set->num_rows; i++) { + for (size_t j = 0; j < num_floats; j++) { + const size_t ofs = i * num_floats + j; + const float val = set->floats[ofs], + min = set->bounds[j], + max = set->bounds[num_floats + j]; + + // normalize and write value + set->floats[ofs] = (val - min) / (max - min); + } + } + + // set state + set->state = KM_SET_STATE_NORMALIZED; + + // return success + return true; +} + +// get row from data set +float * +km_set_get_row( + const km_set_t * const set, + const size_t i +) { + const size_t num_floats = set->shape.num_floats; + return set->floats + i * num_floats; +} + +// init a set with num_clusters clusters of shape num_floats by picking +// random cluster centers +bool +km_set_init_rand_clusters( + km_set_t * const cs, + const size_t num_floats, + const size_t num_clusters, + km_rand_src_t * const rs +) { + // init cluster shape + const km_shape_t shape = { + .num_floats = num_floats, + .num_ints = 1, + }; + + // generate random cluster centers + float floats[num_floats * num_clusters]; + if (!km_rand_src_fill(rs, num_floats * num_clusters, floats)) { + // return failure + return false; + } + + // FIXME: should probably be heap-allocated + int ints[num_clusters]; + memset(ints, 0, sizeof(ints)); + + // init cluster set + if (!km_set_init(cs, &shape, num_clusters)) { + // return failure + return false; + } + + // add data, return result + return km_set_push(cs, num_clusters, floats, ints); +} diff --git a/km.c b/km.c deleted file mode 100644 index 5b9ae4c..0000000 --- a/km.c +++ /dev/null @@ -1,249 +0,0 @@ -#include // bool -#include // size_t -#include // memcpy() -#include // rand() -#include "util.h" -#include "km.h" - -#define MIN_ROWS (4096 / sizeof(float)) - -// grow data set -static bool -km_set_grow( - km_set_t * const set, - const size_t capacity -) { - // alloc floats - const size_t num_floats = set->shape.num_floats * capacity; - float * const floats = malloc(sizeof(float) * num_floats); - if (!floats) { - // return failure - return false; - } - - // alloc ints - const size_t num_ints = set->shape.num_ints * capacity; - int * const ints = malloc(sizeof(int) * num_ints); - if (!ints) { - // return failure - return false; - } - - // update set - set->floats = floats; - set->ints = ints; - set->capacity = capacity; - - // return success - return true; -} - -// init data set with shape and initial size -bool -km_set_init( - km_set_t * const set, - const km_shape_t * const shape, - const size_t row_capacity -) { - // alloc bounds - float * const bounds = malloc(2 * sizeof(float) * shape->num_floats); - if (!bounds) { - // return error - return false; - } - - set->state = KM_SET_STATE_INIT; - set->floats = NULL; - set->ints = NULL; - set->shape = *shape; - set->num_rows = 0; - set->capacity = 0; - set->bounds = bounds; - - return km_set_grow(set, MAX(MIN_ROWS, row_capacity + 1)); -} - -// finalize data set -void -km_set_fini(km_set_t * const set) { - if (set->state == KM_SET_STATE_FINI) { - return; - } - - if (set->bounds) { - // free bounds - free(set->bounds); - set->bounds = NULL; - } - - if (set->floats) { - // free floats - free(set->floats); - set->floats = NULL; - } - - if (set->ints) { - // free ints - free(set->ints); - set->ints = NULL; - } - - // shrink capacity - set->capacity = 0; - - // set state - set->state = KM_SET_STATE_FINI; -} - -// append rows to data set, growing set if necessary -bool -km_set_push( - km_set_t * const set, - const size_t num_rows, - const float * const floats, - const int * const ints -) { - // check state - if (set->state != KM_SET_STATE_INIT) { - // return failure - return false; - } - - const size_t capacity_needed = set->num_rows + num_rows; - // FIXME: potential overflow here - if (capacity_needed >= set->capacity) { - // crappy growth algorithm - const size_t new_capacity = 2 * capacity_needed + 1; - - // resize storage - if (!km_set_grow(set, MAX(MIN_ROWS, new_capacity))) { - return false; - } - } - - // copy floats - const size_t num_floats = set->shape.num_floats; - if (num_floats > 0) { - float * const dst = set->floats + num_floats * set->num_rows; - const size_t stride = sizeof(float) * num_floats; - - // copy floats - memcpy(dst, floats, stride * num_rows); - - if (!set->num_rows) { - // there were no rows, so populate bounds with first row - memcpy(set->bounds, floats, stride); - memcpy(set->bounds + num_floats, floats, stride); - } - - for (size_t i = 0; i < num_rows; i++) { - for (size_t j = 0; j < num_floats; j++) { - const float val = floats[i * num_floats + j]; - - if (val < set->bounds[j]) { - // update min bound - set->bounds[j] = val; - } - - if (val > set->bounds[num_floats + j]) { - // update max bound - set->bounds[num_floats + j] = val; - } - } - } - } - - // copy ints - const size_t num_ints = set->shape.num_ints; - if (num_ints > 0) { - int * const dst = set->ints + num_ints * set->num_rows; - const size_t stride = sizeof(int) * num_ints; - - // copy ints - memcpy(dst, ints, stride * num_rows); - } - - // increment row count - set->num_rows += num_rows; - - // return success - return true; -} - -bool -km_set_normalize( - km_set_t * const set -) { - const size_t num_floats = set->shape.num_floats; - - // check set state - if (set->state != KM_SET_STATE_INIT) { - // return failure - return false; - } - - // normalize values - for (size_t i = 0; i < set->num_rows; i++) { - for (size_t j = 0; j < num_floats; j++) { - const size_t ofs = i * num_floats + j; - const float val = set->floats[ofs], - min = set->bounds[j], - max = set->bounds[num_floats + j]; - - // normalize and write value - set->floats[ofs] = (val - min) / (max - min); - } - } - - // set state - set->state = KM_SET_STATE_NORMALIZED; - - // return success - return true; -} - -// get row from data set -float * -km_set_get_row( - const km_set_t * const set, - const size_t i -) { - const size_t num_floats = set->shape.num_floats; - return set->floats + i * num_floats; -} - -// init a set with num_clusters clusters of shape num_floats by picking -// random cluster centers -bool -km_set_init_rand_clusters( - km_set_t * const cs, - const size_t num_floats, - const size_t num_clusters, - km_rand_src_t * const rs -) { - // init cluster shape - const km_shape_t shape = { - .num_floats = num_floats, - .num_ints = 1, - }; - - // generate random cluster centers - float floats[num_floats * num_clusters]; - if (!km_rand_src_fill(rs, num_floats * num_clusters, floats)) { - // return failure - return false; - } - - // FIXME: should probably be heap-allocated - int ints[num_clusters]; - memset(ints, 0, sizeof(ints)); - - // init cluster set - if (!km_set_init(cs, &shape, num_clusters)) { - // return failure - return false; - } - - // add data, return result - return km_set_push(cs, num_clusters, floats, ints); -} -- cgit v1.2.3