aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2019-02-04 15:33:35 -0500
committerPaul Duncan <pabs@pablotron.org>2019-02-04 15:33:35 -0500
commit159c42498365913f6ed400e13c77798d041a7d43 (patch)
tree96cc3b14bbbbd44dc5173e89b85cd2c8228e86e4
parentf4a38b43d43f9395d6042d234a5e0ada7455ace1 (diff)
downloadkmeans-159c42498365913f6ed400e13c77798d041a7d43.tar.bz2
kmeans-159c42498365913f6ed400e13c77798d041a7d43.zip
add rand-{path,erand48}, minor fixes
-rw-r--r--Makefile3
-rw-r--r--km-init-kmeans.c34
-rw-r--r--km-rand-erand48.c89
-rw-r--r--km-rand-libc.c56
-rw-r--r--km-rand-path.c152
-rw-r--r--km-rand.c53
-rw-r--r--km.h10
-rw-r--r--main.c6
8 files changed, 343 insertions, 60 deletions
diff --git a/Makefile b/Makefile
index 99aebc9..0372ecf 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,8 @@
APP=km-test
CFLAGS=-W -Wall -Wextra -Werror -pedantic -std=c11 -O2
OBJS=km-set.o km-draw.o km-load.o km-find.o km-rand.o km-solve.o \
- km-init-rand.o km-init-forgy.o km-init-kmeans.o km-init.o main.o
+ km-init-rand.o km-init-forgy.o km-init-kmeans.o km-init.o main.o \
+ km-rand-libc.o km-rand-path.o km-rand-erand48.o
LIBS=-lm
.PHONY=all clean
diff --git a/km-init-kmeans.c b/km-init-kmeans.c
index 61d67a7..f0c1b5f 100644
--- a/km-init-kmeans.c
+++ b/km-init-kmeans.c
@@ -4,6 +4,24 @@
#include "util.h"
#include "km.h"
+// sum the squared distance of every row in set from this point
+static inline float
+sum_distance_squared(
+ const float * const floats,
+ const km_set_t * const set
+) {
+ float r = 0;
+
+ // sum squared distances
+ for (size_t i = 0; i < set->num_rows; i++) {
+ const float * const vals = km_set_get_row(set, i);
+ r += distance_squared(set->shape.num_floats, floats, vals);
+ }
+
+ // return result
+ return r;
+}
+
static const float *
get_random_row(
const km_set_t * const set,
@@ -32,6 +50,7 @@ km_init_kmeans(
stride = sizeof(float) * num_floats;
// row values (filled below)
+ float sums[num_clusters];
float floats[num_floats * num_clusters];
// pick first row randomly
@@ -41,6 +60,9 @@ km_init_kmeans(
// copy row values to floats buffer
memcpy(floats, vals, stride);
+
+ // copy row values to floats buffer
+ sums[0] = sum_distance_squared(floats, set);
}
for (size_t i = 1; i < num_clusters;) {
@@ -48,10 +70,12 @@ km_init_kmeans(
const float * const vals = get_random_row(set, rs);
// calculate squared distance to nearest cluster
+ size_t ofs = 0;
float min_d2 = FLT_MAX;
for (size_t j = 0; j < i; j++) {
const float d2 = distance_squared(num_floats, vals, floats + j * stride);
if (d2 < min_d2) {
+ ofs = j;
min_d2 = d2;
}
}
@@ -64,9 +88,15 @@ km_init_kmeans(
}
// check random value
- if (rand_val < min_d2) {
+ if (rand_val / sums[ofs] < min_d2) {
+ // get destination
+ float * const dst = floats + i * num_floats;
+
// copy row values
- memcpy(floats + i * num_floats, vals, stride);
+ memcpy(dst, vals, stride);
+
+ // calculate total cluster distance
+ sums[i] = sum_distance_squared(dst, set);
// increment cluster count
i++;
diff --git a/km-rand-erand48.c b/km-rand-erand48.c
new file mode 100644
index 0000000..de84b93
--- /dev/null
+++ b/km-rand-erand48.c
@@ -0,0 +1,89 @@
+#include <stdbool.h> // bool
+#define _DEFAULT_SOURCE
+#include <stdlib.h> // drand48_r()
+#include <stdio.h> // fopen()
+#include "util.h"
+#include "km.h"
+
+// get N get_floats
+static bool
+on_get_floats(
+ km_rand_t * const rs,
+ const size_t num_vals,
+ float * const vals
+) {
+ unsigned short * const data = rs->data;
+
+ // generate results
+ for (size_t i = 0; i < num_vals; i++) {
+ vals[i] = erand48(data);
+ }
+
+ // return success
+ return true;
+}
+
+// fill sizes callback for system random source
+static bool
+on_get_sizes(
+ km_rand_t * const rs,
+ const size_t num_vals,
+ size_t * const vals
+) {
+ unsigned short * const data = rs->data;
+
+ // generate results
+ for (size_t i = 0; i < num_vals; i++) {
+ vals[i] = nrand48(data);
+ }
+
+ // return success
+ return true;
+}
+
+static void
+on_fini(
+ km_rand_t * const rs
+) {
+ unsigned short * data = rs->data;
+
+ // free rng data
+ free(data);
+
+ // clear pointer
+ rs->data = NULL;
+}
+
+// path random source callbacks
+static const km_rand_cbs_t
+ERAND48_RAND_CBS = {
+ .get_floats = on_get_floats,
+ .get_sizes = on_get_sizes,
+ .fini = on_fini,
+};
+
+// init system random source (uses system rand())
+bool
+km_rand_init_erand48(
+ km_rand_t * const rs,
+ const uint64_t seed
+) {
+ // alloc rng data
+ unsigned short * const data = malloc(3 * sizeof(unsigned short));
+ if (!data) {
+ // return failure
+ return false;
+ }
+
+ // populate seed
+ data[0] = seed & 0xffff;
+ data[1] = (seed >> 16) & 0xffff;
+ data[2] = (seed >> 32) & 0xffff;
+
+ // populate context
+ rs->cbs = &ERAND48_RAND_CBS;
+ rs->data = data;
+
+ // return success
+ return true;
+}
diff --git a/km-rand-libc.c b/km-rand-libc.c
new file mode 100644
index 0000000..da1e4b0
--- /dev/null
+++ b/km-rand-libc.c
@@ -0,0 +1,56 @@
+#include <stdbool.h> // bool
+#include "util.h"
+#include "km.h"
+
+// libc random source get_floats
+static bool
+on_get_floats(
+ km_rand_t * const rs,
+ const size_t num_vals,
+ float * const vals
+) {
+ UNUSED(rs);
+
+ // generate results
+ for (size_t i = 0; i < num_vals; i++) {
+ vals[i] = 1.0 * rand() / RAND_MAX;
+ }
+
+ // return success
+ return true;
+}
+
+// fill sizes callback for system random source
+static bool
+on_get_sizes(
+ km_rand_t * const rs,
+ const size_t num_vals,
+ size_t * const vals
+) {
+ UNUSED(rs);
+
+ // generate results
+ for (size_t i = 0; i < num_vals; i++) {
+ vals[i] = rand();
+ }
+
+ // return success
+ return true;
+}
+
+// system random source callbacks
+static const km_rand_cbs_t
+LIBC_RAND_CBS = {
+ .get_floats = on_get_floats,
+ .get_sizes = on_get_sizes,
+ .fini = NULL,
+};
+
+// init system random source (uses system rand())
+void
+km_rand_init_libc(
+ km_rand_t * const rs
+) {
+ rs->cbs = &LIBC_RAND_CBS;
+ rs->data = NULL;
+}
diff --git a/km-rand-path.c b/km-rand-path.c
new file mode 100644
index 0000000..6597038
--- /dev/null
+++ b/km-rand-path.c
@@ -0,0 +1,152 @@
+#include <stdbool.h> // bool
+#include <string.h> // memset()
+#include <stdio.h> // fopen()
+#include "util.h"
+#include "km.h"
+
+#define MAX_BUF_ITEMS (1 << 20)
+
+typedef struct {
+ FILE *fh;
+ size_t ofs;
+ uint64_t buf[MAX_BUF_ITEMS];
+} ctx_t;
+
+static inline bool
+refill(
+ ctx_t * const ctx
+) {
+ ctx->ofs = 0;
+ // D("reading %zu bytes", sizeof(ctx->buf));
+ return !!fread(ctx->buf, sizeof(ctx->buf), 1, ctx->fh);
+}
+
+static bool
+next(
+ ctx_t * const ctx,
+ uint64_t * const r
+) {
+ if ((ctx->ofs >= MAX_BUF_ITEMS - 1) && !refill(ctx)) {
+ // return failure
+ return false;
+ }
+
+ // get next value, increment offset
+ *r = ctx->buf[ctx->ofs];
+ ctx->ofs++;
+
+ // return success
+ return true;
+}
+
+// get N get_floats
+static bool
+on_get_floats(
+ km_rand_t * const rs,
+ const size_t num_vals,
+ float * const vals
+) {
+ ctx_t * const ctx = rs->data;
+
+ // generate results
+ for (size_t i = 0; i < num_vals; i++) {
+ uint64_t v;
+
+ if (!next(ctx, &v)) {
+ // return failure
+ return false;
+ }
+
+ // add value
+ vals[i] = 1.0 * v / RAND_MAX;
+ }
+
+ // return success
+ return true;
+}
+
+// fill sizes callback for system random source
+static bool
+on_get_sizes(
+ km_rand_t * const rs,
+ const size_t num_vals,
+ size_t * const vals
+) {
+ ctx_t * const ctx = rs->data;
+
+ // generate results
+ for (size_t i = 0; i < num_vals; i++) {
+ uint64_t v;
+
+ if (!next(ctx, &v)) {
+ // return failure
+ return false;
+ }
+
+ // add value
+ vals[i] = v;
+ }
+
+ // return success
+ return true;
+}
+
+static void
+on_fini(
+ km_rand_t * const rs
+) {
+ ctx_t * const ctx = rs->data;
+
+ // close file
+ fclose(ctx->fh);
+
+ // free context
+ free(ctx);
+
+ rs->data = NULL;
+}
+
+// path random source callbacks
+static const km_rand_cbs_t
+PATH_RAND_CBS = {
+ .get_floats = on_get_floats,
+ .get_sizes = on_get_sizes,
+ .fini = on_fini,
+};
+
+// init system random source (uses system rand())
+bool
+km_rand_init_path(
+ km_rand_t * const rs,
+ const char * const path
+) {
+ // open file
+ FILE *fh = fopen(path, "rb");
+ if (!fh) {
+ // return failure
+ return false;
+ }
+
+ // alloc context
+ ctx_t * const ctx = malloc(sizeof(ctx_t));
+ if (!ctx) {
+ // return failure
+ return false;
+ }
+
+ // populate context
+ ctx->fh = fh;
+
+ // fill buffer
+ if (!refill(ctx)) {
+ // return failure
+ return false;
+ }
+
+ // populate random source
+ rs->cbs = &PATH_RAND_CBS;
+ rs->data = ctx;
+
+ // return success
+ return true;
+}
diff --git a/km-rand.c b/km-rand.c
index bb3a2cc..e777069 100644
--- a/km-rand.c
+++ b/km-rand.c
@@ -31,56 +31,3 @@ km_rand_fini(
rs->cbs->fini(rs);
}
}
-
-// libc random source get_floats
-static bool
-rand_libc_get_floats(
- km_rand_t * const rs,
- const size_t num_vals,
- float * const vals
-) {
- UNUSED(rs);
-
- // generate results
- for (size_t i = 0; i < num_vals; i++) {
- vals[i] = 1.0 * rand() / RAND_MAX;
- }
-
- // return success
- return true;
-}
-
-// fill sizes callback for system random source
-static bool
-rand_libc_get_sizes(
- km_rand_t * const rs,
- const size_t num_vals,
- size_t * const vals
-) {
- UNUSED(rs);
-
- // generate results
- for (size_t i = 0; i < num_vals; i++) {
- vals[i] = rand();
- }
-
- // return success
- return true;
-}
-
-// system random source callbacks
-static const km_rand_cbs_t
-RAND_LIBC_CBS = {
- .get_floats = rand_libc_get_floats,
- .get_sizes = rand_libc_get_sizes,
- .fini = NULL,
-};
-
-// init system random source (uses system rand())
-void
-km_rand_init_libc(
- km_rand_t * const rs
-) {
- rs->cbs = &RAND_LIBC_CBS;
- rs->data = NULL;
-}
diff --git a/km.h b/km.h
index 7858933..097026e 100644
--- a/km.h
+++ b/km.h
@@ -30,12 +30,15 @@ _Bool km_rand_get_sizes(km_rand_t * const, const size_t, size_t * const);
// finalize random source
void km_rand_fini(km_rand_t * const);
-// init libc random source (use libc rand())
+// init libc rng (use libc rand())
void km_rand_init_libc(km_rand_t *);
-// init random file source (e.g. "/dev/urandom")
+// init file source rng (e.g. "/dev/urandom")
_Bool km_rand_init_path(km_rand_t * const, const char *);
+// init erand48 rng (use erand48())
+_Bool km_rand_init_erand48(km_rand_t * const, const uint64_t);
+
// shape of data set
typedef struct {
size_t num_floats,
@@ -141,7 +144,10 @@ typedef void (*km_find_data_cb_t)(
);
typedef struct {
+ // maximum number of clusters
size_t max_clusters;
+
+ // maximum number of tests per cluster
size_t num_tests;
km_find_init_cb_t on_init;
diff --git a/main.c b/main.c
index 8e94e87..5629cd5 100644
--- a/main.c
+++ b/main.c
@@ -264,7 +264,7 @@ save_on_best(
UNUSED(cb_data);
// convert rank to channel brightness
- const uint8_t ch = 0x66 + (0xff - 0x66) * (1.0 * rank) / (MAX_BEST - 1);
+ const uint8_t ch = 0x33 + (0xff - 0x33) * (1.0 * rank + 1) / (MAX_BEST);
const uint32_t color = (ch & 0xff) << 16;
// const uint32_t color = 0xff0000;
// D("rank = %zu, score = %0.3f, size = %zu, color = %06x", rank, score, set->num_rows, color);
@@ -324,9 +324,11 @@ int main(int argc, char *argv[]) {
// init context
ctx_t ctx;
memset(&ctx, 0, sizeof(ctx_t));
- km_rand_init_libc(&(ctx.rs));
ctx.init_type = km_init_get_type(init_type_name);
+ // init ctx rng
+ km_rand_init_erand48(&(ctx.rs), rand());
+
// init data set
km_set_t set;
if (!km_load_path(data_path, &LOAD_CBS, &set)) {