diff options
| -rw-r--r-- | main.c | 115 | 
1 files changed, 71 insertions, 44 deletions
@@ -1,5 +1,8 @@  #include <stdbool.h>  #include <stdlib.h> +#ifdef _OPENMP +#include <omp.h> +#endif /* _OPENMP */  #include <openssl/rand.h>  #include <GL/glew.h> @@ -17,18 +20,18 @@  // logging macros  #define LOG_C(...) SDL_LogCritical(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)  #define LOG_W(...) SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) -#define LOG_V(...) SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)  #define LOG_I(...) SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__)  #define LOG_D(...) SDL_LogDebug(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) +#define LOG_V(...) SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) -// number of time records to keep (must be power of two - 1) -#define MAX_TIMES 0xFF +// number of time records to keep (must be power of two) +#define MAX_TIMES 0x100  // custom SDL_USEREVENT code for timer  #define EVENT_CODE_TIMER 0  // use compute shader to update particles? -#define USE_COMPUTE_SHADER true +#define USE_COMPUTE_SHADER false  // enable gl debug  #define USE_GL_DEBUG false @@ -37,7 +40,7 @@  #define NUM_PARTICLES 1024  // number of compute shader workgroups -#define WORKGROUP_SIZE 1 +#define WORKGROUP_SIZE 1024  typedef struct {    const GLuint type; @@ -54,6 +57,7 @@ typedef struct {    } times[MAX_TIMES];    uint32_t times_ofs; +  // flags    bool done,         fullscreen;  } context_t; @@ -66,6 +70,12 @@ verts[] = {     0.0, -1, 0,  }; +typedef struct { +  float positions[4 * NUM_PARTICLES], +        velocities[4 * NUM_PARTICLES], +        colors[4 * NUM_PARTICLES]; +} particles_t; +  // shared particle shader storage buffer definition  #define PARTICLE_BUFFER_DECLARATION \    "layout (std430, binding = 1) buffer particles {\n"   \ @@ -88,7 +98,7 @@ cs_src[] =    "\n"    "void main() {\n"    "  mediump uint i = gl_GlobalInvocationID.x;\n" -  "  positions[i].xy += 0.016 * velocities[i].xy;\n" +  "  positions[i] += 0.016 * velocities[i];\n"    "}\n";  // vertex shader @@ -250,36 +260,49 @@ init_particles(void) {      exit(EXIT_FAILURE);    } -  // populate data -  float data[3 * 4 * NUM_PARTICLES]; +  // populate particle data +  particles_t data; +    #pragma omp parallel for    for (int i = 0; i < NUM_PARTICLES; i++) { -    float x = 2.0 * seeds[5 * i + 0] / 0xFF - 1.0, -          y = 2.0 * seeds[5 * i + 1] / 0xFF - 1.0; - -    // LOG_D("%f,%f", x, y); - -    // populate position -    data[4 * i + 0] = x; -    data[4 * i + 1] = y; -    data[4 * i + 2] = 0.0; -    data[4 * i + 3] = 1.0; - -    // populate random velocity -    data[4 * (NUM_PARTICLES + i) + 0] = 2.0 * seeds[5 * i + 2] / 0xFF - 1.0; -    data[4 * (NUM_PARTICLES + i) + 1] = 2.0 * seeds[5 * i + 3] / 0xFF - 1.0; - -    // populate colors -    uint8_t cs = seeds[5 * i + 4]; -    data[4 * (2 * NUM_PARTICLES + i) + 0] = color_map(cs & 0x3); -    data[4 * (2 * NUM_PARTICLES + i) + 1] = color_map((cs >> 2) & 0x3); -    data[4 * (2 * NUM_PARTICLES + i) + 2] = color_map((cs >> 4) & 0x3); -    data[4 * (2 * NUM_PARTICLES + i) + 3] = 1.0; +    // calculate position +    const float px = 2.0 * seeds[5 * i + 0] / 0xFF - 1.0, +                py = 2.0 * seeds[5 * i + 1] / 0xFF - 1.0; + +    // LOG_D("%f,%f", px, py); + +    // set position +    data.positions[4 * i + 0] = px; +    data.positions[4 * i + 1] = py; +    data.positions[4 * i + 2] = 0.0; +    data.positions[4 * i + 3] = 1.0; + +    // calculate velocity +    const float vx = 2.0 * seeds[5 * i + 2] / 0xFF - 1.0, +                vy = 2.0 * seeds[5 * i + 3] / 0xFF - 1.0; + +    // set velocity +    data.velocities[4 * i + 0] = vx; +    data.velocities[4 * i + 1] = vy; +    data.velocities[4 * i + 2] = 0; +    data.velocities[4 * i + 3] = 0; + +    // calculate color +    const uint8_t cs = seeds[5 * i + 4]; +    const float r = color_map(cs & 0x3), +                g = color_map((cs >> 2) & 0x3), +                b = color_map((cs >> 4) & 0x3); + +    // set color +    data.colors[4 * i + 0] = r; +    data.colors[4 * i + 1] = g; +    data.colors[4 * i + 2] = b; +    data.colors[4 * i + 3] = 1.0;    }    // populate buffer    glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); -  glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW); +  glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_DYNAMIC_DRAW);    glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);    // return buffer @@ -305,10 +328,10 @@ update_particles(      glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);      // map buffer -    float *data = glMapBufferRange( +    particles_t *data = glMapBufferRange(        GL_SHADER_STORAGE_BUFFER,        0, -      3 * NUM_PARTICLES * 4 * sizeof(float), +      sizeof(particles_t),        GL_MAP_READ_BIT | GL_MAP_WRITE_BIT      ); @@ -316,19 +339,19 @@ update_particles(      #pragma omp parallel for      for (int i = 0; i < NUM_PARTICLES; i++) {        // update x coordinate -      data[4 * i + 0] += 0.016 * data[4 * (NUM_PARTICLES + i) + 0]; -      if (data[4 * i + 0] < -1.1) { -        data[4 * i + 0] += 2.2; -      } else if (data[4 * i + 0] > 1.1) { -        data[4 * i + 0] -= 2.2; +      data->positions[4 * i + 0] += 0.016 * data->velocities[4 * i + 0]; +      if (data->positions[4 * i + 0] < -1.1) { +        data->positions[4 * i + 0] += 2.2; +      } else if (data->positions[4 * i + 0] > 1.1) { +        data->positions[4 * i + 0] -= 2.2;        }        // update y coordinate -      data[4 * i + 1] += 0.016 * data[4 * (NUM_PARTICLES + i) + 1]; -      if (data[4 * i + 1] < -1.1) { -        data[4 * i + 1] += 2.2; -      } else if (data[4 * i + 1] > 1.1) { -        data[4 * i + 1] -= 2.2; +      data->positions[4 * i + 1] += 0.016 * data->velocities[4 * i + 1]; +      if (data->positions[4 * i + 1] < -1.1) { +        data->positions[4 * i + 1] += 2.2; +      } else if (data->positions[4 * i + 1] > 1.1) { +        data->positions[4 * i + 1] -= 2.2;        }      } @@ -362,6 +385,10 @@ ctx_init(context_t * const ctx) {      exit(EXIT_FAILURE);    } +  #ifdef _OPENMP +  LOG_I("OpenMP enabled.  max_threads = %d", omp_get_max_threads()); +  #endif /* _OPENMP */ +    // create window    ctx->win = SDL_CreateWindow(      "Compute Test", @@ -563,7 +590,7 @@ int main(int argc, char *argv[]) {    context_t ctx;    ctx_init(&ctx); -  // init timer (5 seconds) +  // init timer (5 second interval)    SDL_TimerID timer_id = SDL_AddTimer(5000, timer_cb, NULL);    // generate vertex array @@ -624,7 +651,7 @@ int main(int argc, char *argv[]) {      // save end time      ctx.times[ctx.times_ofs].draw_end = SDL_GetTicks(); -    ctx.times_ofs = (ctx.times_ofs + 1) & MAX_TIMES; +    ctx.times_ofs = (ctx.times_ofs + 1) & (MAX_TIMES - 1);      // handle events      handle_events(&ctx);  | 
