SSBOの使い方 (compute shader)
やっとSSBOの上手い使い方を見つけた。
今回の例で書いてみる。
vertex shader とCompute Shaderに
struct Particle{ vec4 pos; }; layout(std430, binding=7) buffer particles{ Particle par[]; };
これを書く。
compute shaderでは
uint id = gl_GlobalInvocationID.x; par[id].pos = vec4(0,0,0,1);
とかする。
vertex shaderでは
int id = gl_VertexID; gl_Position = par[id].pos;
とかする。glDrawElementsInstancedを使う時は、gl_InstanceIDになるかもしれない。
バッファーのバインドの仕方は、簡単。
ssbo = glGenBuffers(1) glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo) glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * 4 * max_num, None, GL_STATIC_DRAW) glUseProgram(program); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, ssbo) glUseProgram(computeProg) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, ssbo)
ここに登場する7
は任意なので、何でもいい。この数字はshaderの中の
layout(std430, binding=7) buffer ....
と同じにする。
もうこれで、transformfeedbackを書く気は起らない。
from OpenGL.GL import * from OpenGL.WGL import * from ctypes import * from ctypes.wintypes import * import sys vsh = """ #version 430 struct Particle{ vec4 pos; }; layout(std430, binding=7) buffer particles{ Particle par[]; }; uniform vec2 resolution; mat4 perspective(float fov, float aspect, float near, float far) { float v = 1./tan(radians(fov/2.)), u = v/aspect, w = near-far; return mat4(u,0,0,0,0,v,0,0,0,0,(near+far)/w,-1,0,0,near*far*2./w,1); } mat4 lookAt(vec3 eye, vec3 center, vec3 up) { vec3 w = normalize(eye - center); vec3 u = normalize(cross(up, w)); vec3 v = normalize(cross(w, u)); return mat4( u.x, v.x, w.x, 0, u.y, v.y, w.y, 0, u.z, v.z, w.z, 0, -dot(u, eye), -dot(u, eye), -dot(w, eye), 1 ); } void main(void){ mat4 pMat = perspective(45.0, resolution.x / resolution.y, 0.1, 200.0); vec3 camera = vec3(0,5,10); vec3 center = vec3(0,0,0); mat4 vMat = lookAt(camera, center, vec3(0,1,0)); gl_Position = pMat*vMat*par[gl_VertexID].pos; } """ fsh = """ #version 430 out vec4 fragColor; void main() { fragColor = vec4(1.0); } """ csh = """ #version 430 struct Particle{ vec4 pos; }; layout(std430, binding=7) buffer particles{ Particle par[]; }; uniform float time; uniform uint max_num; layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in; #define PI 3.14159265359 #define PI2 ( PI * 2.0 ) vec2 rotate( in vec2 p, in float t ) { return p * cos( -t ) + vec2( p.y, -p.x ) * sin( -t ); } float hash(float n) { return fract(sin(n)*753.5453123); } void main(){ uint id = gl_GlobalInvocationID.x; float theta = hash(float(id)*0.3123887) * PI2 + time; par[id].pos.x = cos(theta)+1.5; par[id].pos.y = sin(theta)*1.8; par[id].pos.z = 0.0; par[id].pos.w = 1.0; par[id].pos.xz = rotate(par[id].pos.xz, hash(float(id)*0.5123)*PI2); par[id].pos.xyz *= 2.0; } """ winmm = windll.winmm kernel32 = windll.kernel32 user32 = windll.user32 XRES = 640 YRES = 480 WS_OVERLAPPEDWINDOW = 0xcf0000 WS_VISIBLE = 0x10000000 PM_REMOVE = 1 WM_NCLBUTTONDOWN = 161 HTCLOSE = 20 VK_ESCAPE = 27 PFD_SUPPORT_OPENGL = 32 PFD_DOUBLEBUFFER = 1 hWnd = user32.CreateWindowExA(0,0xC018,0,WS_OVERLAPPEDWINDOW|WS_VISIBLE,30,30,XRES,YRES,0,0,0,0) hdc = user32.GetDC(hWnd) user32.SetForegroundWindow(hWnd) pfd = PIXELFORMATDESCRIPTOR(0,1,PFD_SUPPORT_OPENGL|PFD_DOUBLEBUFFER,32,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,0,0) SetPixelFormat(hdc, ChoosePixelFormat(hdc, pfd), pfd) hGLrc = wglCreateContext(hdc) wglMakeCurrent(hdc, hGLrc) max_num = 50000 glClearColor(0, 0, 0, 1) glEnable(GL_CULL_FACE) glCullFace(GL_BACK) glEnable(GL_DEPTH_TEST) glDepthFunc(GL_LEQUAL) program = glCreateProgram() for s, t in zip((vsh, fsh), (GL_VERTEX_SHADER, GL_FRAGMENT_SHADER)): shader = glCreateShader(t) glShaderSource(shader, s) glCompileShader(shader) if glGetShaderiv(shader, GL_COMPILE_STATUS) != GL_TRUE: raise RuntimeError(glGetShaderInfoLog(shader).decode()) glAttachShader(program, shader) glLinkProgram(program) glUseProgram(program) glUniform2f(glGetUniformLocation(program, "resolution"), XRES , YRES) computeProg = glCreateProgram() shader = glCreateShader(GL_COMPUTE_SHADER) glShaderSource(shader, csh) glCompileShader(shader) if glGetShaderiv(shader, GL_COMPILE_STATUS) != GL_TRUE: raise RuntimeError(glGetShaderInfoLog(shader).decode()) glAttachShader(computeProg, shader) glLinkProgram(computeProg) glUseProgram(computeProg) ssbo = glGenBuffers(1) glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo) glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * 4 * max_num, None, GL_STATIC_DRAW) glUseProgram(program); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, ssbo) glUseProgram(computeProg) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, ssbo) duration = 60 msg = MSG() lpmsg = pointer(msg) zero = winmm.timeGetTime() done = False fps, cnt, s0 = 0, 0, 0 while done==False: while user32.PeekMessageA(lpmsg, 0, 0, 0, PM_REMOVE): if (msg.message == WM_NCLBUTTONDOWN and msg.wParam == HTCLOSE): done = True user32.DispatchMessageA(lpmsg) if(user32.GetAsyncKeyState(VK_ESCAPE)): done = True t = (winmm.timeGetTime() - zero)*0.001 glUseProgram(computeProg); glUniform1f(glGetUniformLocation(computeProg, "time"), t) glDispatchCompute(max_num//128, 1, 1) glUseProgram(program); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) glDrawArrays(GL_POINTS, 0, max_num) SwapBuffers(hdc) cnt += 1 if (t - s0 > 1): fps = cnt cnt = 0 s0 = t sys.stdout.write("\r FPS : %d TIME : %f" %(fps,t)) sys.stdout.flush() if (t > duration): done = True wglMakeCurrent(0, 0) wglDeleteContext(hGLrc) user32.ReleaseDC(hWnd, hdc) user32.PostQuitMessage(0) user32.DestroyWindow(hWnd)
scriptの中で音楽を使わないのにwinmm
を使っているのは、winmm.timeGetTime()
で得られる時間の精度が良いからです。