Compute Shaderを書いてみた
pythonを使って、conpute shaderを書いてみた。
compute shaderでtextureを作り、fragment shaderでtextureとして貼り付けてます。とりあえず、変数をどう取り扱うかのテストを兼ねてます。
from OpenGL.GL import * from OpenGL.WGL import * from ctypes import * from ctypes.wintypes import * import sys vsh = """ #version 430 void main() { gl_Position = vec4(ivec2(gl_VertexID & 1, gl_VertexID >> 1 & 1)*2-1, 1, 1); } """ fsh = """ #version 430 uniform vec2 resolution; uniform sampler2D tex2d; out vec4 fragColor; void main() { vec2 p = gl_FragCoord.xy/ resolution; fragColor = texture(tex2d, p); } """ csh = """ #version 430 uniform vec2 resolution; uniform float time; writeonly uniform image2D destTex; layout(local_size_x=16, local_size_y=16) in; void main() { vec2 fragCoord = vec2(gl_GlobalInvocationID.xy); vec2 p = (fragCoord * 2.0 - resolution) / resolution.y; vec3 col= vec3(0.1); float de = abs(abs(p.x) + abs(p.y) - 1.0); col = vec3(mix(vec3(1,0.8,0.1),col,smoothstep(0.0,0.005,de))); imageStore( destTex, ivec2(gl_GlobalInvocationID.xy), vec4(col,0) ); } """ winmm = windll.winmm kernel32 = windll.kernel32 user32 = windll.user32 XRES = 640 YRES = 480 WS_OVERLAPPEDWINDOW = 0xcf0000 WS_VISIBLE = 0x10000000 PM_REMOVE = 1 WM_NCLBUTTONDOWN = 161 HTCLOSE = 20 VK_ESCAPE = 27 PFD_SUPPORT_OPENGL = 32 PFD_DOUBLEBUFFER = 1 hWnd = user32.CreateWindowExA(0,0xC018,0,WS_OVERLAPPEDWINDOW|WS_VISIBLE,30,30,XRES,YRES,0,0,0,0) hdc = user32.GetDC(hWnd) user32.SetForegroundWindow(hWnd) pfd = PIXELFORMATDESCRIPTOR(0,1,PFD_SUPPORT_OPENGL|PFD_DOUBLEBUFFER,32,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,0,0) SetPixelFormat(hdc, ChoosePixelFormat(hdc, pfd), pfd) hGLrc = wglCreateContext(hdc) wglMakeCurrent(hdc, hGLrc) glClearColor(0, 0, 0, 1) glEnable(GL_CULL_FACE) glCullFace(GL_BACK) glEnable(GL_DEPTH_TEST) glDepthFunc(GL_LEQUAL) global program program = glCreateProgram() for s, t in zip((vsh, fsh), (GL_VERTEX_SHADER, GL_FRAGMENT_SHADER)): shader = glCreateShader(t) glShaderSource(shader, s) glCompileShader(shader) if glGetShaderiv(shader, GL_COMPILE_STATUS) != GL_TRUE: raise RuntimeError(glGetShaderInfoLog(shader).decode()) glAttachShader(program, shader) glLinkProgram(program) glUseProgram(program) glUniform2f(glGetUniformLocation(program, "resolution"), XRES , YRES) computeProg = glCreateProgram() shader = glCreateShader(GL_COMPUTE_SHADER) glShaderSource(shader, csh) glCompileShader(shader) if glGetShaderiv(shader, GL_COMPILE_STATUS) != GL_TRUE: raise RuntimeError(glGetShaderInfoLog(shader).decode()) glAttachShader(computeProg, shader) glLinkProgram(computeProg) glUseProgram(computeProg) glUniform2f(glGetUniformLocation(computeProg, "resolution"), XRES , YRES) texture = glGenTextures(1) glActiveTexture(GL_TEXTURE0) glBindTexture(GL_TEXTURE_2D, texture) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, XRES, YRES, 0, GL_RGBA, GL_FLOAT, None) glBindImageTexture(0, texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F) glUseProgram(computeProg); glUniform1i(glGetUniformLocation(computeProg, "destTex"), 0) glUseProgram(program); glUniform1i(glGetUniformLocation(program, "tex2d"), 0) duration = 60 msg = MSG() lpmsg = pointer(msg) zero = winmm.timeGetTime() done = False fps, cnt, s0 = 0, 0, 0 while done==False: while user32.PeekMessageA(lpmsg, 0, 0, 0, PM_REMOVE): if (msg.message == WM_NCLBUTTONDOWN and msg.wParam == HTCLOSE): done = True user32.DispatchMessageA(lpmsg) if(user32.GetAsyncKeyState(VK_ESCAPE)): done = True t = (winmm.timeGetTime() - zero)*0.001 glUseProgram(computeProg); glUniform1f(glGetUniformLocation(computeProg, "time"), t) glDispatchCompute(XRES//16, YRES//16, 1) glUseProgram(program); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) glDrawArrays(GL_TRIANGLE_STRIP, 0, 4) SwapBuffers(hdc) cnt += 1 if (t - s0 > 1): fps = cnt cnt = 0 s0 = t sys.stdout.write("\r FPS : %d TIME : %f" %(fps,t)) sys.stdout.flush() if (t > duration): done = True wglMakeCurrent(0, 0) wglDeleteContext(hGLrc) user32.ReleaseDC(hWnd, hdc) user32.PostQuitMessage(0) user32.DestroyWindow(hWnd)
compute shaderに登場してきたglDispatchCompute(XRES//16, YRES//16, 1)
とshaderの中のlayout(local_size_x=16, local_size_y=16) in;
について調べてみます。
この2つには関係を手探りで調べていきます。
glDispatchCompute(XRES//1 YRES//1, 1) -- layout(local_size_x=1, local_size_y=1) in; -- FPS 60
glDispatchCompute(XRES//16, YRES//16, 1) -- layout(local_size_x=16, local_size_y=16) in; -- FPS 60
glDispatchCompute(XRES//32, YRES//32, 1) -- layout(local_size_x=32, local_size_y=32) in; -- FPS 60
glDispatchCompute(XRES//32, YRES//32, 1) -- layout(local_size_x=16, local_size_y=16) in; -- 画面表示が4分の1
もうちょい、GPUの負荷が増えるソースを書いて、又試してみます。
画面で座標を取るのは、
vec2 fragCoord = vec2(gl_GlobalInvocationID.xy); vec2 p = (fragCoord * 2.0 - resolution) / resolution.y;
で良さそう。
gl_GlobalInvocationID
はuvec3
みたい。