diff options
-rw-r--r-- | main.c | 115 |
1 files changed, 71 insertions, 44 deletions
@@ -1,5 +1,8 @@ #include <stdbool.h> #include <stdlib.h> +#ifdef _OPENMP +#include <omp.h> +#endif /* _OPENMP */ #include <openssl/rand.h> #include <GL/glew.h> @@ -17,18 +20,18 @@ // logging macros #define LOG_C(...) SDL_LogCritical(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) #define LOG_W(...) SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) -#define LOG_V(...) SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) #define LOG_I(...) SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) #define LOG_D(...) SDL_LogDebug(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) +#define LOG_V(...) SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) -// number of time records to keep (must be power of two - 1) -#define MAX_TIMES 0xFF +// number of time records to keep (must be power of two) +#define MAX_TIMES 0x100 // custom SDL_USEREVENT code for timer #define EVENT_CODE_TIMER 0 // use compute shader to update particles? -#define USE_COMPUTE_SHADER true +#define USE_COMPUTE_SHADER false // enable gl debug #define USE_GL_DEBUG false @@ -37,7 +40,7 @@ #define NUM_PARTICLES 1024 // number of compute shader workgroups -#define WORKGROUP_SIZE 1 +#define WORKGROUP_SIZE 1024 typedef struct { const GLuint type; @@ -54,6 +57,7 @@ typedef struct { } times[MAX_TIMES]; uint32_t times_ofs; + // flags bool done, fullscreen; } context_t; @@ -66,6 +70,12 @@ verts[] = { 0.0, -1, 0, }; +typedef struct { + float positions[4 * NUM_PARTICLES], + velocities[4 * NUM_PARTICLES], + colors[4 * NUM_PARTICLES]; +} particles_t; + // shared particle shader storage buffer definition #define PARTICLE_BUFFER_DECLARATION \ "layout (std430, binding = 1) buffer particles {\n" \ @@ -88,7 +98,7 @@ cs_src[] = "\n" "void main() {\n" " mediump uint i = gl_GlobalInvocationID.x;\n" - " positions[i].xy += 0.016 * velocities[i].xy;\n" + " positions[i] += 0.016 * velocities[i];\n" "}\n"; // vertex shader @@ -250,36 +260,49 @@ init_particles(void) { exit(EXIT_FAILURE); } - // populate data - float data[3 * 4 * NUM_PARTICLES]; + // populate particle data + particles_t data; + #pragma omp parallel for for (int i = 0; i < NUM_PARTICLES; i++) { - float x = 2.0 * seeds[5 * i + 0] / 0xFF - 1.0, - y = 2.0 * seeds[5 * i + 1] / 0xFF - 1.0; - - // LOG_D("%f,%f", x, y); - - // populate position - data[4 * i + 0] = x; - data[4 * i + 1] = y; - data[4 * i + 2] = 0.0; - data[4 * i + 3] = 1.0; - - // populate random velocity - data[4 * (NUM_PARTICLES + i) + 0] = 2.0 * seeds[5 * i + 2] / 0xFF - 1.0; - data[4 * (NUM_PARTICLES + i) + 1] = 2.0 * seeds[5 * i + 3] / 0xFF - 1.0; - - // populate colors - uint8_t cs = seeds[5 * i + 4]; - data[4 * (2 * NUM_PARTICLES + i) + 0] = color_map(cs & 0x3); - data[4 * (2 * NUM_PARTICLES + i) + 1] = color_map((cs >> 2) & 0x3); - data[4 * (2 * NUM_PARTICLES + i) + 2] = color_map((cs >> 4) & 0x3); - data[4 * (2 * NUM_PARTICLES + i) + 3] = 1.0; + // calculate position + const float px = 2.0 * seeds[5 * i + 0] / 0xFF - 1.0, + py = 2.0 * seeds[5 * i + 1] / 0xFF - 1.0; + + // LOG_D("%f,%f", px, py); + + // set position + data.positions[4 * i + 0] = px; + data.positions[4 * i + 1] = py; + data.positions[4 * i + 2] = 0.0; + data.positions[4 * i + 3] = 1.0; + + // calculate velocity + const float vx = 2.0 * seeds[5 * i + 2] / 0xFF - 1.0, + vy = 2.0 * seeds[5 * i + 3] / 0xFF - 1.0; + + // set velocity + data.velocities[4 * i + 0] = vx; + data.velocities[4 * i + 1] = vy; + data.velocities[4 * i + 2] = 0; + data.velocities[4 * i + 3] = 0; + + // calculate color + const uint8_t cs = seeds[5 * i + 4]; + const float r = color_map(cs & 0x3), + g = color_map((cs >> 2) & 0x3), + b = color_map((cs >> 4) & 0x3); + + // set color + data.colors[4 * i + 0] = r; + data.colors[4 * i + 1] = g; + data.colors[4 * i + 2] = b; + data.colors[4 * i + 3] = 1.0; } // populate buffer glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); - glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_DYNAMIC_DRAW); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); // return buffer @@ -305,10 +328,10 @@ update_particles( glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); // map buffer - float *data = glMapBufferRange( + particles_t *data = glMapBufferRange( GL_SHADER_STORAGE_BUFFER, 0, - 3 * NUM_PARTICLES * 4 * sizeof(float), + sizeof(particles_t), GL_MAP_READ_BIT | GL_MAP_WRITE_BIT ); @@ -316,19 +339,19 @@ update_particles( #pragma omp parallel for for (int i = 0; i < NUM_PARTICLES; i++) { // update x coordinate - data[4 * i + 0] += 0.016 * data[4 * (NUM_PARTICLES + i) + 0]; - if (data[4 * i + 0] < -1.1) { - data[4 * i + 0] += 2.2; - } else if (data[4 * i + 0] > 1.1) { - data[4 * i + 0] -= 2.2; + data->positions[4 * i + 0] += 0.016 * data->velocities[4 * i + 0]; + if (data->positions[4 * i + 0] < -1.1) { + data->positions[4 * i + 0] += 2.2; + } else if (data->positions[4 * i + 0] > 1.1) { + data->positions[4 * i + 0] -= 2.2; } // update y coordinate - data[4 * i + 1] += 0.016 * data[4 * (NUM_PARTICLES + i) + 1]; - if (data[4 * i + 1] < -1.1) { - data[4 * i + 1] += 2.2; - } else if (data[4 * i + 1] > 1.1) { - data[4 * i + 1] -= 2.2; + data->positions[4 * i + 1] += 0.016 * data->velocities[4 * i + 1]; + if (data->positions[4 * i + 1] < -1.1) { + data->positions[4 * i + 1] += 2.2; + } else if (data->positions[4 * i + 1] > 1.1) { + data->positions[4 * i + 1] -= 2.2; } } @@ -362,6 +385,10 @@ ctx_init(context_t * const ctx) { exit(EXIT_FAILURE); } + #ifdef _OPENMP + LOG_I("OpenMP enabled. max_threads = %d", omp_get_max_threads()); + #endif /* _OPENMP */ + // create window ctx->win = SDL_CreateWindow( "Compute Test", @@ -563,7 +590,7 @@ int main(int argc, char *argv[]) { context_t ctx; ctx_init(&ctx); - // init timer (5 seconds) + // init timer (5 second interval) SDL_TimerID timer_id = SDL_AddTimer(5000, timer_cb, NULL); // generate vertex array @@ -624,7 +651,7 @@ int main(int argc, char *argv[]) { // save end time ctx.times[ctx.times_ofs].draw_end = SDL_GetTicks(); - ctx.times_ofs = (ctx.times_ofs + 1) & MAX_TIMES; + ctx.times_ofs = (ctx.times_ofs + 1) & (MAX_TIMES - 1); // handle events handle_events(&ctx); |