summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main.c115
1 files changed, 71 insertions, 44 deletions
diff --git a/main.c b/main.c
index 66f4395..9d34f6d 100644
--- a/main.c
+++ b/main.c
@@ -1,5 +1,8 @@
#include <stdbool.h>
#include <stdlib.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
#include <openssl/rand.h>
#include <GL/glew.h>
@@ -17,18 +20,18 @@
// logging macros
#define LOG_C(...) SDL_LogCritical(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)
#define LOG_W(...) SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)
-#define LOG_V(...) SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)
#define LOG_I(...) SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)
#define LOG_D(...) SDL_LogDebug(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)
+#define LOG_V(...) SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)
-// number of time records to keep (must be power of two - 1)
-#define MAX_TIMES 0xFF
+// number of time records to keep (must be power of two)
+#define MAX_TIMES 0x100
// custom SDL_USEREVENT code for timer
#define EVENT_CODE_TIMER 0
// use compute shader to update particles?
-#define USE_COMPUTE_SHADER true
+#define USE_COMPUTE_SHADER false
// enable gl debug
#define USE_GL_DEBUG false
@@ -37,7 +40,7 @@
#define NUM_PARTICLES 1024
// number of compute shader workgroups
-#define WORKGROUP_SIZE 1
+#define WORKGROUP_SIZE 1024
typedef struct {
const GLuint type;
@@ -54,6 +57,7 @@ typedef struct {
} times[MAX_TIMES];
uint32_t times_ofs;
+ // flags
bool done,
fullscreen;
} context_t;
@@ -66,6 +70,12 @@ verts[] = {
0.0, -1, 0,
};
+typedef struct {
+ float positions[4 * NUM_PARTICLES],
+ velocities[4 * NUM_PARTICLES],
+ colors[4 * NUM_PARTICLES];
+} particles_t;
+
// shared particle shader storage buffer definition
#define PARTICLE_BUFFER_DECLARATION \
"layout (std430, binding = 1) buffer particles {\n" \
@@ -88,7 +98,7 @@ cs_src[] =
"\n"
"void main() {\n"
" mediump uint i = gl_GlobalInvocationID.x;\n"
- " positions[i].xy += 0.016 * velocities[i].xy;\n"
+ " positions[i] += 0.016 * velocities[i];\n"
"}\n";
// vertex shader
@@ -250,36 +260,49 @@ init_particles(void) {
exit(EXIT_FAILURE);
}
- // populate data
- float data[3 * 4 * NUM_PARTICLES];
+ // populate particle data
+ particles_t data;
+
#pragma omp parallel for
for (int i = 0; i < NUM_PARTICLES; i++) {
- float x = 2.0 * seeds[5 * i + 0] / 0xFF - 1.0,
- y = 2.0 * seeds[5 * i + 1] / 0xFF - 1.0;
-
- // LOG_D("%f,%f", x, y);
-
- // populate position
- data[4 * i + 0] = x;
- data[4 * i + 1] = y;
- data[4 * i + 2] = 0.0;
- data[4 * i + 3] = 1.0;
-
- // populate random velocity
- data[4 * (NUM_PARTICLES + i) + 0] = 2.0 * seeds[5 * i + 2] / 0xFF - 1.0;
- data[4 * (NUM_PARTICLES + i) + 1] = 2.0 * seeds[5 * i + 3] / 0xFF - 1.0;
-
- // populate colors
- uint8_t cs = seeds[5 * i + 4];
- data[4 * (2 * NUM_PARTICLES + i) + 0] = color_map(cs & 0x3);
- data[4 * (2 * NUM_PARTICLES + i) + 1] = color_map((cs >> 2) & 0x3);
- data[4 * (2 * NUM_PARTICLES + i) + 2] = color_map((cs >> 4) & 0x3);
- data[4 * (2 * NUM_PARTICLES + i) + 3] = 1.0;
+ // calculate position
+ const float px = 2.0 * seeds[5 * i + 0] / 0xFF - 1.0,
+ py = 2.0 * seeds[5 * i + 1] / 0xFF - 1.0;
+
+ // LOG_D("%f,%f", px, py);
+
+ // set position
+ data.positions[4 * i + 0] = px;
+ data.positions[4 * i + 1] = py;
+ data.positions[4 * i + 2] = 0.0;
+ data.positions[4 * i + 3] = 1.0;
+
+ // calculate velocity
+ const float vx = 2.0 * seeds[5 * i + 2] / 0xFF - 1.0,
+ vy = 2.0 * seeds[5 * i + 3] / 0xFF - 1.0;
+
+ // set velocity
+ data.velocities[4 * i + 0] = vx;
+ data.velocities[4 * i + 1] = vy;
+ data.velocities[4 * i + 2] = 0;
+ data.velocities[4 * i + 3] = 0;
+
+ // calculate color
+ const uint8_t cs = seeds[5 * i + 4];
+ const float r = color_map(cs & 0x3),
+ g = color_map((cs >> 2) & 0x3),
+ b = color_map((cs >> 4) & 0x3);
+
+ // set color
+ data.colors[4 * i + 0] = r;
+ data.colors[4 * i + 1] = g;
+ data.colors[4 * i + 2] = b;
+ data.colors[4 * i + 3] = 1.0;
}
// populate buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
- glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
// return buffer
@@ -305,10 +328,10 @@ update_particles(
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
// map buffer
- float *data = glMapBufferRange(
+ particles_t *data = glMapBufferRange(
GL_SHADER_STORAGE_BUFFER,
0,
- 3 * NUM_PARTICLES * 4 * sizeof(float),
+ sizeof(particles_t),
GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
);
@@ -316,19 +339,19 @@ update_particles(
#pragma omp parallel for
for (int i = 0; i < NUM_PARTICLES; i++) {
// update x coordinate
- data[4 * i + 0] += 0.016 * data[4 * (NUM_PARTICLES + i) + 0];
- if (data[4 * i + 0] < -1.1) {
- data[4 * i + 0] += 2.2;
- } else if (data[4 * i + 0] > 1.1) {
- data[4 * i + 0] -= 2.2;
+ data->positions[4 * i + 0] += 0.016 * data->velocities[4 * i + 0];
+ if (data->positions[4 * i + 0] < -1.1) {
+ data->positions[4 * i + 0] += 2.2;
+ } else if (data->positions[4 * i + 0] > 1.1) {
+ data->positions[4 * i + 0] -= 2.2;
}
// update y coordinate
- data[4 * i + 1] += 0.016 * data[4 * (NUM_PARTICLES + i) + 1];
- if (data[4 * i + 1] < -1.1) {
- data[4 * i + 1] += 2.2;
- } else if (data[4 * i + 1] > 1.1) {
- data[4 * i + 1] -= 2.2;
+ data->positions[4 * i + 1] += 0.016 * data->velocities[4 * i + 1];
+ if (data->positions[4 * i + 1] < -1.1) {
+ data->positions[4 * i + 1] += 2.2;
+ } else if (data->positions[4 * i + 1] > 1.1) {
+ data->positions[4 * i + 1] -= 2.2;
}
}
@@ -362,6 +385,10 @@ ctx_init(context_t * const ctx) {
exit(EXIT_FAILURE);
}
+ #ifdef _OPENMP
+ LOG_I("OpenMP enabled. max_threads = %d", omp_get_max_threads());
+ #endif /* _OPENMP */
+
// create window
ctx->win = SDL_CreateWindow(
"Compute Test",
@@ -563,7 +590,7 @@ int main(int argc, char *argv[]) {
context_t ctx;
ctx_init(&ctx);
- // init timer (5 seconds)
+ // init timer (5 second interval)
SDL_TimerID timer_id = SDL_AddTimer(5000, timer_cb, NULL);
// generate vertex array
@@ -624,7 +651,7 @@ int main(int argc, char *argv[]) {
// save end time
ctx.times[ctx.times_ofs].draw_end = SDL_GetTicks();
- ctx.times_ofs = (ctx.times_ofs + 1) & MAX_TIMES;
+ ctx.times_ofs = (ctx.times_ofs + 1) & (MAX_TIMES - 1);
// handle events
handle_events(&ctx);