From 159c42498365913f6ed400e13c77798d041a7d43 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Mon, 4 Feb 2019 15:33:35 -0500 Subject: add rand-{path,erand48}, minor fixes --- Makefile | 3 +- km-init-kmeans.c | 34 +++++++++++- km-rand-erand48.c | 89 ++++++++++++++++++++++++++++++++ km-rand-libc.c | 56 ++++++++++++++++++++ km-rand-path.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ km-rand.c | 53 ------------------- km.h | 10 +++- main.c | 6 ++- 8 files changed, 343 insertions(+), 60 deletions(-) create mode 100644 km-rand-erand48.c create mode 100644 km-rand-libc.c create mode 100644 km-rand-path.c diff --git a/Makefile b/Makefile index 99aebc9..0372ecf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ APP=km-test CFLAGS=-W -Wall -Wextra -Werror -pedantic -std=c11 -O2 OBJS=km-set.o km-draw.o km-load.o km-find.o km-rand.o km-solve.o \ - km-init-rand.o km-init-forgy.o km-init-kmeans.o km-init.o main.o + km-init-rand.o km-init-forgy.o km-init-kmeans.o km-init.o main.o \ + km-rand-libc.o km-rand-path.o km-rand-erand48.o LIBS=-lm .PHONY=all clean diff --git a/km-init-kmeans.c b/km-init-kmeans.c index 61d67a7..f0c1b5f 100644 --- a/km-init-kmeans.c +++ b/km-init-kmeans.c @@ -4,6 +4,24 @@ #include "util.h" #include "km.h" +// sum the squared distance of every row in set from this point +static inline float +sum_distance_squared( + const float * const floats, + const km_set_t * const set +) { + float r = 0; + + // sum squared distances + for (size_t i = 0; i < set->num_rows; i++) { + const float * const vals = km_set_get_row(set, i); + r += distance_squared(set->shape.num_floats, floats, vals); + } + + // return result + return r; +} + static const float * get_random_row( const km_set_t * const set, @@ -32,6 +50,7 @@ km_init_kmeans( stride = sizeof(float) * num_floats; // row values (filled below) + float sums[num_clusters]; float floats[num_floats * num_clusters]; // pick first row randomly @@ -41,6 +60,9 @@ km_init_kmeans( // copy row values to floats buffer memcpy(floats, vals, stride); + + // copy row values to floats buffer + sums[0] = sum_distance_squared(floats, set); } for (size_t i = 1; i < num_clusters;) { @@ -48,10 +70,12 @@ km_init_kmeans( const float * const vals = get_random_row(set, rs); // calculate squared distance to nearest cluster + size_t ofs = 0; float min_d2 = FLT_MAX; for (size_t j = 0; j < i; j++) { const float d2 = distance_squared(num_floats, vals, floats + j * stride); if (d2 < min_d2) { + ofs = j; min_d2 = d2; } } @@ -64,9 +88,15 @@ km_init_kmeans( } // check random value - if (rand_val < min_d2) { + if (rand_val / sums[ofs] < min_d2) { + // get destination + float * const dst = floats + i * num_floats; + // copy row values - memcpy(floats + i * num_floats, vals, stride); + memcpy(dst, vals, stride); + + // calculate total cluster distance + sums[i] = sum_distance_squared(dst, set); // increment cluster count i++; diff --git a/km-rand-erand48.c b/km-rand-erand48.c new file mode 100644 index 0000000..de84b93 --- /dev/null +++ b/km-rand-erand48.c @@ -0,0 +1,89 @@ +#include // bool +#define _DEFAULT_SOURCE +#include // drand48_r() +#include // fopen() +#include "util.h" +#include "km.h" + +// get N get_floats +static bool +on_get_floats( + km_rand_t * const rs, + const size_t num_vals, + float * const vals +) { + unsigned short * const data = rs->data; + + // generate results + for (size_t i = 0; i < num_vals; i++) { + vals[i] = erand48(data); + } + + // return success + return true; +} + +// fill sizes callback for system random source +static bool +on_get_sizes( + km_rand_t * const rs, + const size_t num_vals, + size_t * const vals +) { + unsigned short * const data = rs->data; + + // generate results + for (size_t i = 0; i < num_vals; i++) { + vals[i] = nrand48(data); + } + + // return success + return true; +} + +static void +on_fini( + km_rand_t * const rs +) { + unsigned short * data = rs->data; + + // free rng data + free(data); + + // clear pointer + rs->data = NULL; +} + +// path random source callbacks +static const km_rand_cbs_t +ERAND48_RAND_CBS = { + .get_floats = on_get_floats, + .get_sizes = on_get_sizes, + .fini = on_fini, +}; + +// init system random source (uses system rand()) +bool +km_rand_init_erand48( + km_rand_t * const rs, + const uint64_t seed +) { + // alloc rng data + unsigned short * const data = malloc(3 * sizeof(unsigned short)); + if (!data) { + // return failure + return false; + } + + // populate seed + data[0] = seed & 0xffff; + data[1] = (seed >> 16) & 0xffff; + data[2] = (seed >> 32) & 0xffff; + + // populate context + rs->cbs = &ERAND48_RAND_CBS; + rs->data = data; + + // return success + return true; +} diff --git a/km-rand-libc.c b/km-rand-libc.c new file mode 100644 index 0000000..da1e4b0 --- /dev/null +++ b/km-rand-libc.c @@ -0,0 +1,56 @@ +#include // bool +#include "util.h" +#include "km.h" + +// libc random source get_floats +static bool +on_get_floats( + km_rand_t * const rs, + const size_t num_vals, + float * const vals +) { + UNUSED(rs); + + // generate results + for (size_t i = 0; i < num_vals; i++) { + vals[i] = 1.0 * rand() / RAND_MAX; + } + + // return success + return true; +} + +// fill sizes callback for system random source +static bool +on_get_sizes( + km_rand_t * const rs, + const size_t num_vals, + size_t * const vals +) { + UNUSED(rs); + + // generate results + for (size_t i = 0; i < num_vals; i++) { + vals[i] = rand(); + } + + // return success + return true; +} + +// system random source callbacks +static const km_rand_cbs_t +LIBC_RAND_CBS = { + .get_floats = on_get_floats, + .get_sizes = on_get_sizes, + .fini = NULL, +}; + +// init system random source (uses system rand()) +void +km_rand_init_libc( + km_rand_t * const rs +) { + rs->cbs = &LIBC_RAND_CBS; + rs->data = NULL; +} diff --git a/km-rand-path.c b/km-rand-path.c new file mode 100644 index 0000000..6597038 --- /dev/null +++ b/km-rand-path.c @@ -0,0 +1,152 @@ +#include // bool +#include // memset() +#include // fopen() +#include "util.h" +#include "km.h" + +#define MAX_BUF_ITEMS (1 << 20) + +typedef struct { + FILE *fh; + size_t ofs; + uint64_t buf[MAX_BUF_ITEMS]; +} ctx_t; + +static inline bool +refill( + ctx_t * const ctx +) { + ctx->ofs = 0; + // D("reading %zu bytes", sizeof(ctx->buf)); + return !!fread(ctx->buf, sizeof(ctx->buf), 1, ctx->fh); +} + +static bool +next( + ctx_t * const ctx, + uint64_t * const r +) { + if ((ctx->ofs >= MAX_BUF_ITEMS - 1) && !refill(ctx)) { + // return failure + return false; + } + + // get next value, increment offset + *r = ctx->buf[ctx->ofs]; + ctx->ofs++; + + // return success + return true; +} + +// get N get_floats +static bool +on_get_floats( + km_rand_t * const rs, + const size_t num_vals, + float * const vals +) { + ctx_t * const ctx = rs->data; + + // generate results + for (size_t i = 0; i < num_vals; i++) { + uint64_t v; + + if (!next(ctx, &v)) { + // return failure + return false; + } + + // add value + vals[i] = 1.0 * v / RAND_MAX; + } + + // return success + return true; +} + +// fill sizes callback for system random source +static bool +on_get_sizes( + km_rand_t * const rs, + const size_t num_vals, + size_t * const vals +) { + ctx_t * const ctx = rs->data; + + // generate results + for (size_t i = 0; i < num_vals; i++) { + uint64_t v; + + if (!next(ctx, &v)) { + // return failure + return false; + } + + // add value + vals[i] = v; + } + + // return success + return true; +} + +static void +on_fini( + km_rand_t * const rs +) { + ctx_t * const ctx = rs->data; + + // close file + fclose(ctx->fh); + + // free context + free(ctx); + + rs->data = NULL; +} + +// path random source callbacks +static const km_rand_cbs_t +PATH_RAND_CBS = { + .get_floats = on_get_floats, + .get_sizes = on_get_sizes, + .fini = on_fini, +}; + +// init system random source (uses system rand()) +bool +km_rand_init_path( + km_rand_t * const rs, + const char * const path +) { + // open file + FILE *fh = fopen(path, "rb"); + if (!fh) { + // return failure + return false; + } + + // alloc context + ctx_t * const ctx = malloc(sizeof(ctx_t)); + if (!ctx) { + // return failure + return false; + } + + // populate context + ctx->fh = fh; + + // fill buffer + if (!refill(ctx)) { + // return failure + return false; + } + + // populate random source + rs->cbs = &PATH_RAND_CBS; + rs->data = ctx; + + // return success + return true; +} diff --git a/km-rand.c b/km-rand.c index bb3a2cc..e777069 100644 --- a/km-rand.c +++ b/km-rand.c @@ -31,56 +31,3 @@ km_rand_fini( rs->cbs->fini(rs); } } - -// libc random source get_floats -static bool -rand_libc_get_floats( - km_rand_t * const rs, - const size_t num_vals, - float * const vals -) { - UNUSED(rs); - - // generate results - for (size_t i = 0; i < num_vals; i++) { - vals[i] = 1.0 * rand() / RAND_MAX; - } - - // return success - return true; -} - -// fill sizes callback for system random source -static bool -rand_libc_get_sizes( - km_rand_t * const rs, - const size_t num_vals, - size_t * const vals -) { - UNUSED(rs); - - // generate results - for (size_t i = 0; i < num_vals; i++) { - vals[i] = rand(); - } - - // return success - return true; -} - -// system random source callbacks -static const km_rand_cbs_t -RAND_LIBC_CBS = { - .get_floats = rand_libc_get_floats, - .get_sizes = rand_libc_get_sizes, - .fini = NULL, -}; - -// init system random source (uses system rand()) -void -km_rand_init_libc( - km_rand_t * const rs -) { - rs->cbs = &RAND_LIBC_CBS; - rs->data = NULL; -} diff --git a/km.h b/km.h index 7858933..097026e 100644 --- a/km.h +++ b/km.h @@ -30,12 +30,15 @@ _Bool km_rand_get_sizes(km_rand_t * const, const size_t, size_t * const); // finalize random source void km_rand_fini(km_rand_t * const); -// init libc random source (use libc rand()) +// init libc rng (use libc rand()) void km_rand_init_libc(km_rand_t *); -// init random file source (e.g. "/dev/urandom") +// init file source rng (e.g. "/dev/urandom") _Bool km_rand_init_path(km_rand_t * const, const char *); +// init erand48 rng (use erand48()) +_Bool km_rand_init_erand48(km_rand_t * const, const uint64_t); + // shape of data set typedef struct { size_t num_floats, @@ -141,7 +144,10 @@ typedef void (*km_find_data_cb_t)( ); typedef struct { + // maximum number of clusters size_t max_clusters; + + // maximum number of tests per cluster size_t num_tests; km_find_init_cb_t on_init; diff --git a/main.c b/main.c index 8e94e87..5629cd5 100644 --- a/main.c +++ b/main.c @@ -264,7 +264,7 @@ save_on_best( UNUSED(cb_data); // convert rank to channel brightness - const uint8_t ch = 0x66 + (0xff - 0x66) * (1.0 * rank) / (MAX_BEST - 1); + const uint8_t ch = 0x33 + (0xff - 0x33) * (1.0 * rank + 1) / (MAX_BEST); const uint32_t color = (ch & 0xff) << 16; // const uint32_t color = 0xff0000; // D("rank = %zu, score = %0.3f, size = %zu, color = %06x", rank, score, set->num_rows, color); @@ -324,9 +324,11 @@ int main(int argc, char *argv[]) { // init context ctx_t ctx; memset(&ctx, 0, sizeof(ctx_t)); - km_rand_init_libc(&(ctx.rs)); ctx.init_type = km_init_get_type(init_type_name); + // init ctx rng + km_rand_init_erand48(&(ctx.rs), rand()); + // init data set km_set_t set; if (!km_load_path(data_path, &LOAD_CBS, &set)) { -- cgit v1.2.3