#include #include #ifdef _OPENMP #include #endif /* _OPENMP */ #include #include #include #define UNUSED(a) ((void) (a)) // string utility macros #define SS(v) #v #define S(v) SS(v) // default log level #define DEFAULT_LOG_PRIORITY SDL_LOG_PRIORITY_VERBOSE // logging macros #define LOG_C(...) SDL_LogCritical(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) #define LOG_W(...) SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) #define LOG_I(...) SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) #define LOG_D(...) SDL_LogDebug(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) #define LOG_V(...) SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, __VA_ARGS__) // number of time records to keep (must be power of two) #define MAX_TIMES 0x100 // custom SDL_USEREVENT code for timer #define EVENT_CODE_TIMER 0 // use compute shader to update particles? // (if false, map the buffer and populate it on the cpu) #define USE_COMPUTE_SHADER true // enable gl debug #define USE_GL_DEBUG false // number of particles #define NUM_PARTICLES 2048 // size of compute shader workgroups #define WORKGROUP_SIZE 1024 typedef struct { const GLuint type; const char *src; } shader_t; typedef struct { SDL_Window *win; // times ring buffer struct { uint32_t draw_start, draw_end; } times[MAX_TIMES]; uint32_t times_ofs; // flags bool done, fullscreen; } context_t; // triangle vertices static const float verts[] = { -1, 1, 0, 1, 1, 0, 0.0, -1, 0, }; typedef struct { float x, y; } vec2_t; typedef struct { vec2_t positions[NUM_PARTICLES], velocities[NUM_PARTICLES]; uint32_t colors[NUM_PARTICLES]; struct { float r, g, b, a; } palette[256]; } particles_t; // shared particle shader storage buffer definition #define PARTICLE_BUFFER \ "layout (std430, binding = 1) buffer particles {\n" \ " mediump vec2 positions[" S(NUM_PARTICLES) "];\n" \ " mediump vec2 velocities[" S(NUM_PARTICLES) "];\n" \ " mediump uint colors[" S(NUM_PARTICLES) "];\n" \ " mediump vec4 palette[256];\n" \ "};\n" // real compute shader static const char cs_src[] = "#version 450 core\n" #if USE_GL_DEBUG "#pragma debug(on)\n" #endif /* USE_GL_DEBUG */ "\n" "layout(local_size_x = " S(WORKGROUP_SIZE) ") in;\n" "\n" PARTICLE_BUFFER "\n" "uniform mediump float delta;\n" "\n" "void main() {\n" " mediump uint i = gl_GlobalInvocationID.x;\n" " positions[i] += delta * velocities[i];\n" "\n" " if (positions[i].x > 1.1) {\n" " positions[i].x -= 2.2;\n" " } else if (positions[i].x < -1.1) {\n" " positions[i].x += 2.2;\n" " }\n" "\n" " if (positions[i].y > 1.1) {\n" " positions[i].y -= 2.2;\n" " } else if (positions[i].y < -1.1) {\n" " positions[i].y += 2.2;\n" " }\n" "}\n"; // vertex shader static const char vs_src[] = "#version 450 core\n" "\n" PARTICLE_BUFFER "\n" "layout (location = 0) in mediump vec3 pos;\n" "out mediump flat int instance_id;\n" "\n" "void main() {\n" " gl_Position = vec4(positions[gl_InstanceID], 0, 1) + 0.025 * vec4(pos, 1);\n" " instance_id = gl_InstanceID;\n" "}\n"; // fragment shader static const char fs_src[] = "#version 450 core\n" "\n" PARTICLE_BUFFER "\n" "in mediump flat int instance_id;\n" "out mediump vec4 color;\n" "\n" "uniform mediump float time;\n" "\n" "void main() {\n" " mediump float alpha = 0.5f + 0.5f * (0.5f + 0.5 * sin(time * 3.141));\n" " color = palette[colors[instance_id]] * alpha;\n" "}\n"; static const shader_t COMPUTE_SHADERS[] = { { GL_COMPUTE_SHADER, cs_src }, }; static const shader_t RENDER_SHADERS[] = { { GL_VERTEX_SHADER, vs_src }, { GL_FRAGMENT_SHADER, fs_src }, }; static void debug_cb( GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *arg ) { UNUSED(source); UNUSED(type); UNUSED(id); UNUSED(severity); UNUSED(length); UNUSED(arg); LOG_D("GL: %s", message); } static GLuint compile_shader( const GLenum type, const char *src ) { // create shader GLuint r = glCreateShader(type); // compile shader // LOG_D("src = \"%s\"", src); glShaderSource(r, 1, &src, NULL); glCompileShader(r); // check for error GLint ok; glGetShaderiv(r, GL_COMPILE_STATUS, &ok); if (!ok) { GLchar log[1024]; glGetShaderInfoLog(r, sizeof(log), NULL, log); LOG_C("glCompileShader() failed: %s", log); exit(EXIT_FAILURE); } // return result return r; } static GLuint link_program( const size_t num_shaders, const shader_t *shaders ) { // create program GLuint r = glCreateProgram(); // compile shaders // (FIXME: check num_shaders < 8) GLuint ids[8]; if (num_shaders >= 8) { LOG_C("too many shaders: %lu", num_shaders); exit(EXIT_FAILURE); } // create and attach each shader for (size_t i = 0; i < num_shaders; i++) { // compile shader ids[i] = compile_shader(shaders[i].type, shaders[i].src); // attach shader glAttachShader(r, ids[i]); } // link program glLinkProgram(r); // check link status GLint ok; glGetProgramiv(r, GL_LINK_STATUS, &ok); if (!ok) { GLchar log[1024]; glGetProgramInfoLog(r, sizeof(log), NULL, log); LOG_C("glLinkProgram() failed: %s", log); exit(EXIT_FAILURE); } // delete shaders for (size_t i = 0; i < num_shaders; i++) { glDeleteShader(ids[i]); } // return result return r; } static float color_map(uint8_t bits) { switch (bits & 0x3) { case 0x3: return 1.0; case 0x2: return 0.75; case 0x1: return 0.5; default: return 0.25; } } static void init_palette( particles_t * const data ) { // generate palette seed data uint8_t seeds[256]; if (!RAND_bytes(seeds, 256)) { LOG_C("RAND_bytes() failed"); exit(EXIT_FAILURE); } #pragma omp parallel for for (int i = 0; i < 256; i++) { // get seed const uint8_t cs = seeds[i]; // set color data->palette[i].r = color_map(cs & 0x3); data->palette[i].g = color_map((cs >> 2) & 0x3); data->palette[i].b = color_map((cs >> 4) & 0x3); data->palette[i].a = 1.0; } } static GLuint init_particles(void) { // generate seed data uint8_t seeds[5 * NUM_PARTICLES]; if (!RAND_bytes(seeds, 5 * NUM_PARTICLES)) { LOG_C("RAND_bytes() failed"); exit(EXIT_FAILURE); } // populate particle data particles_t data; #pragma omp parallel for for (int i = 0; i < NUM_PARTICLES; i++) { // set position data.positions[i].x = 2.0 * seeds[5 * i + 0] / 0xFF - 1.0; data.positions[i].y = 2.0 * seeds[5 * i + 1] / 0xFF - 1.0; // set velocity data.velocities[i].x = 2.0 * seeds[5 * i + 2] / 0xFF - 1.0; data.velocities[i].y = 2.0 * seeds[5 * i + 3] / 0xFF - 1.0; // set color data.colors[i] = seeds[5 * i + 4] & 0x3f; } // generate palette init_palette(&data); // create shader storage buffer GLuint ssbo; glGenBuffers(1, &ssbo); // populate buffer glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(particles_t), &data, GL_DYNAMIC_DRAW); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); // return buffer return ssbo; } #define GL(op) do { \ op; \ /* get last GL error */ \ GLenum err = glGetError(); \ \ if (err != GL_NO_ERROR) { \ switch (err) { \ case GL_OUT_OF_MEMORY: \ LOG_C("GL Error: out of memory"); \ break; \ case GL_INVALID_FRAMEBUFFER_OPERATION: \ LOG_C("GL Error: invalid framebuffer operation"); \ break; \ case GL_INVALID_ENUM: \ LOG_C("GL Error: invalid enum"); \ break; \ case GL_INVALID_VALUE: \ LOG_C("GL Error: invalid value"); \ break; \ case GL_INVALID_OPERATION: \ LOG_C("GL Error: invalid operation"); \ break; \ default: \ LOG_C("GL Error: unknown error: %u", err); \ } \ exit(EXIT_FAILURE); \ } \ } while (0) static void update_particles( const GLuint ssbo, const GLuint compute_prog, const GLint u_delta, const float delta ) { if (USE_COMPUTE_SHADER) { UNUSED(ssbo); glUseProgram(compute_prog); glUniform1f(u_delta, delta); glDispatchCompute(NUM_PARTICLES / WORKGROUP_SIZE, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); } else { UNUSED(compute_prog); UNUSED(u_delta); // bind shader storage buffer glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); // map buffer particles_t *data = glMapBufferRange( GL_SHADER_STORAGE_BUFFER, 0, sizeof(particles_t), GL_MAP_READ_BIT | GL_MAP_WRITE_BIT ); // update particle positions #pragma omp parallel for for (int i = 0; i < NUM_PARTICLES; i++) { // update x coordinate data->positions[i].x += delta * data->velocities[i].x; if (data->positions[i].x < -1.1) { data->positions[i].x += 2.2; } else if (data->positions[i].x > 1.1) { data->positions[i].x -= 2.2; } // update y coordinate data->positions[i].y += delta * data->velocities[i].y; if (data->positions[i].y < -1.1) { data->positions[i].y += 2.2; } else if (data->positions[i].y > 1.1) { data->positions[i].y -= 2.2; } } // unmap and unbind data buffer glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); } } static void update_viewport( SDL_Window *win ) { int w, h; // get window drawable size SDL_GL_GetDrawableSize(win, &w, &h); // set viewport size glViewport(0, 0, w, h); } static void ctx_init(context_t * const ctx) { // set log level SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, DEFAULT_LOG_PRIORITY); // init sdl if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER) < 0) { LOG_C("SDL_Init() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } #ifdef _OPENMP LOG_I("OpenMP enabled. max_threads = %d", omp_get_max_threads()); #endif /* _OPENMP */ // create window ctx->win = SDL_CreateWindow( "Compute Test", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 640, 480, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE ); // check for error if (!ctx->win) { LOG_C("SDL_CreateWindow() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } // set gl context majory version hint if (SDL_GL_SetAttribute( SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE ) < 0) { LOG_C("SDL_GL_SetAttribute() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } // set gl context majory version hint if (SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4) < 0) { LOG_C("SDL_GL_SetAttribute() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } // set gl context minor version hint if (SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5) < 0) { LOG_C("SDL_GL_SetAttribute() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } if (USE_GL_DEBUG) { // set debug context if (SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG) < 0) { LOG_C("SDL_GL_SetAttribute() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } } // create GL context SDL_GLContext context = SDL_GL_CreateContext(ctx->win); if (!context) { LOG_C("SDL_GL_CreateContext() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } // init glew // (do this immediately after GL context init) { GLenum err = glewInit(); if (err != GLEW_OK) { LOG_C("glewInit() failed: %s", glewGetErrorString(err)); exit(EXIT_FAILURE); } } if (USE_GL_DEBUG) { // enable debugging glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(debug_cb, ctx); } // print workgroup size limits { int sizes[3]; for (int i = 0; i < 3; i++) { glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, sizes + i); } // log workgroup sizes LOG_I("workgroup_sizes = [%d, %d, %d]", sizes[0], sizes[1], sizes[2]); } // print shader storage block size limit { GLint val; glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &val); // log size limit LOG_I("GL_MAX_SHADER_STORAGE_BLOCK_SIZE = %d", val); } // set viewport size // (after GLEW init) update_viewport(ctx->win); // set swap interval // (after context init) if (SDL_GL_SetSwapInterval(1) < 0) { LOG_C("SDL_GL_SetSwapInterval() failed: %s", SDL_GetError()); exit(EXIT_FAILURE); } // init times ring buffer memset(ctx->times, 0, sizeof(ctx->times)); ctx->times_ofs = 0; // init flags ctx->done = false; ctx->fullscreen = false; } static void handle_events( context_t * const ctx ) { SDL_Event ev; while (SDL_PollEvent(&ev)) { switch (ev.type) { case SDL_QUIT: ctx->done = true; break; case SDL_KEYDOWN: switch (ev.key.keysym.sym) { case SDLK_q: case SDLK_ESCAPE: ctx->done = true; break; case SDLK_F11: // toggle fullscreen ctx->fullscreen = !ctx->fullscreen; if (SDL_SetWindowFullscreen( ctx->win, ctx->fullscreen ? SDL_WINDOW_FULLSCREEN_DESKTOP : 0 ) < 0) { // log warning LOG_W("SDL_SetWindowFullscreen() failed: %s", SDL_GetError()); ctx->fullscreen = !ctx->fullscreen; } break; } break; case SDL_WINDOWEVENT: switch (ev.window.event) { case SDL_WINDOWEVENT_SIZE_CHANGED: { // map event to window SDL_Window *ev_win = SDL_GetWindowFromID(ev.window.windowID); if (ev_win == ctx->win) { // update viewport update_viewport(ctx->win); } else { LOG_W("ignoring SIZE_CHANGED event from unknown window"); } } break; } break; case SDL_USEREVENT: if (ev.user.code == EVENT_CODE_TIMER) { uint32_t sum = 0; // #pragma omp parallel for reduction(+:sum) for (int i = 0; i < MAX_TIMES; i++) { sum += ctx->times[i].draw_end - ctx->times[i].draw_start; } // print fps LOG_I("fps: %f", 1.0 * sum / MAX_TIMES); } break; } } } static Uint32 timer_cb( Uint32 interval, void *arg ) { SDL_Event ev; UNUSED(arg); // build synthetic event ev.type = SDL_USEREVENT; ev.user.type = SDL_USEREVENT; ev.user.code = EVENT_CODE_TIMER; ev.user.data1 = NULL; ev.user.data2 = NULL; // add event to queue SDL_PushEvent(&ev); // return interval return interval; } int main(int argc, char *argv[]) { UNUSED(argc); UNUSED(argv); // init context context_t ctx; ctx_init(&ctx); // init timer (5 second interval) SDL_TimerID timer_id = SDL_AddTimer(5000, timer_cb, NULL); // generate shader storage buffer GLuint ssbo = init_particles(); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, ssbo); // generate vertex array GLuint vao; glGenVertexArrays(1, &vao); glBindVertexArray(vao); // generate vertex buffer GLuint vbo; glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STATIC_DRAW); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat), (GLvoid*) 0); glEnableVertexAttribArray(0); // unbind vao glBindVertexArray(0); // link compute program, get uniform GLuint compute_prog = link_program(1, COMPUTE_SHADERS); GLint u_delta = glGetUniformLocation(compute_prog, "delta"); // link render program, get uniform GLuint render_prog = link_program(2, RENDER_SHADERS); GLint u_time = glGetUniformLocation(render_prog, "time"); // main loop while (!ctx.done) { // get start time, then calculate delta const uint32_t now = SDL_GetTicks(); const uint32_t last_times_ofs = (ctx.times_ofs - 1) & (MAX_TIMES - 1); const float delta = (now - ctx.times[last_times_ofs].draw_start) / 1000.0; // save start time ctx.times[ctx.times_ofs].draw_start = now; // update particles update_particles(ssbo, compute_prog, u_delta, delta); // clear screen glClearColor(0, 0, 0, 1); glClear(GL_COLOR_BUFFER_BIT); // use render program, set uniform glUseProgram(render_prog); glUniform1f(u_time, now / 1000.0); // draw glBindVertexArray(vao); glDrawArraysInstanced(GL_TRIANGLES, 0, 3, NUM_PARTICLES); glBindVertexArray(0); // swap buffer SDL_GL_SwapWindow(ctx.win); // save end time ctx.times[ctx.times_ofs].draw_end = SDL_GetTicks(); ctx.times_ofs = (ctx.times_ofs + 1) & (MAX_TIMES - 1); // handle events handle_events(&ctx); } // remove timer if (SDL_RemoveTimer(timer_id) == SDL_FALSE) { LOG_W("SDL_RemoveTimer() failed"); } // delete GL context SDL_GLContext context = SDL_GL_GetCurrentContext(); if (context) { SDL_GL_DeleteContext(context); } // destroy window SDL_DestroyWindow(ctx.win); // return success return EXIT_SUCCESS; }