Skip to content

πŸ“„πŸŒ” 예제. μœ λ‹ˆν‹° μ»΄ν“¨νŠΈ 셰이더

Notifications You must be signed in to change notification settings


Folders and files

Last commit message
Last commit date

Latest commit



1 Commit

Repository files navigation


  • 일반 λ Œλ”λ§ νŒŒμ΄ν”„λΌμΈκ³Ό λ³„λ„λ‘œ κ·Έλž˜ν”½ μΉ΄λ“œμ—μ„œ μ‹€ν–‰λ˜λŠ” ν”„λ‘œκ·Έλž¨.
  • κ·Έλž˜ν”½μŠ€μ™€ 관련이 μ—†λŠ” ν™˜κ²½μ—μ„œλ„ GPU의 μ„±λŠ₯을 ν™œμš©ν•  수 μžˆλŠ” API ν•„μš”
    • GPGPU APIλ“±μž₯ (General-purpose computing on graphics processing units)
OpenCL Khronos Group
CUDA C Nvidia
DirectCompute Microsoft
Platform Graphic Library
Windows OpenGL 4.3 / Vulkan / DirectX 11,12, / Shader Model 5.0
Linux OpenGL 4.3 / Vulkan
macOS Metal
Android OpenGL ES 3.1 / Vulkan
iOS Metal
λ Œλ” νŒŒμ΄ν”„λΌμΈ 독립
VS > HS > DS > GS > PS Compute Shader

Group / Thread / Kernel

Group μ“°λ ˆλ“œμ˜ κ·Έλ£Ή. λ™μ‹œ μ“°λ ˆλ“œ μ‹€ν–‰ .Dispatch
Thread 컀널 μ‹€ν–‰ν•˜λŠ” λ‹¨μœ„ [numthreads]
Kernel GPUκ°€ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜ #pragma kernel
SV_GroupID .Dispatch(3, 2, 1)
SV_GroupThreadID [numthreads(2, 2, 1)]
SV_DispatchThreadID SV_GroupID * numthreads + SV_GroupThreadID
SV_GroupIndex [0, (numthreadsX * numthreadsY * numThreadsZ) – 1]


/// ==================== .cs
_shader.Dispatch(kernelIndex, 3, 2, 1);

/// ==================== .compute
[numthreads(2, 2, 1)]

uint3 groupID    : SV_GroupID
uint3 threadID   : SV_GroupThreadID
uint3 dispatchID : SV_DispatchThreadID
uint  groupIndex : SV_GroupIndex

// SV_DispatchThreadID = SV_GroupID * numthreads + GroupThreadID
dispatchID == groupID * uint3(2, 2, 1) + threadID;

// SV_GroupIndex =   SV_GroupThreadID.z * numthreads.x * numthreads.y
//                 + SV_GroupThreadID.y * numthreads.x
//                 + SV_GroupThreadID.x
groupIndex == threadID.z * 2 * 2 + threadID.y * 2 + threadID.x;

uint index = dispatchID.x + dispatchID.y * 6;

_Buffer[index] = index;
//  0  1  2  3  4  5
//  6  7  8  9 10 11
// 12 13 14 15 16 17
// 18 19 20 21 22 23

_Buffer[index] = groupID.x;   | _Buffer[index] = groupID.y;
//  0  0  1  1  2  2          |  0  0  0  0  0  0
//  0  0  1  1  2  2          |  0  0  0  0  0  0
//  0  0  1  1  2  2          |  1  1  1  1  1  1
//  0  0  1  1  2  2          |  1  1  1  1  1  1
_Buffer[index] = threadID.x;  | _Buffer[index] = threadID.y;
//  0  1  0  1  0  1          |  0  0  0  0  0  0
//  0  1  0  1  0  1          |  1  1  1  1  1  1
//  0  1  0  1  0  1          |  0  0  0  0  0  0
//  0  1  0  1  0  1          |  1  1  1  1  1  1
_Buffer[index] = dispatchID.x;| _Buffer[index] = dispatchID.y
//  0  1  2  3  4  5          |  0  0  0  0  0  0
//  0  1  2  3  4  5          |  1  1  1  1  1  1
//  0  1  2  3  4  5          |  2  2  2  2  2  2
//  0  1  2  3  4  5          |  3  3  3  3  3  3

_Buffer[index] = groupIndex;
//  0  1  0  1  0  1
//  2  3  2  3  2  3
//  0  1  0  1  0  1
//  2  3  2  3  2  3


#pragma kernel CSMain
RWTexture2D<float4> Result;

void CSMain (uint3 id : SV_DispatchThreadID)
    Result[id.xy] = float4(1, 0, 0, 1);
RenderTexture renderTexture;
Material material;
ComputeShader shader;

// Shader Model 5.0 level pixel or compute shaders can write into arbitrary locations of some textures, called "unordered access views" in UsingDX11GL3Features
renderTexture.enableRandomWrite = true;
// Universal's Unlit using `_BaseMap` as Main Texture.
material.SetTexture("_MainTex", renderTexture);

// .compute: #pragma kernel CSMain
int kernelHandle = shader.FindKernel("CSMain");
// .compute: RWTexture2D<float4> Result;
shader.SetTexture(kernelHandle, "Result", renderTexture); 
// .cs
shader.Dispatch(KernelHandle, 3, 2, 1);

// .compute
[numthreads(4, 4, 1)]
void CSMain (uint3 id : SV_DispatchThreadID)

// [total]
// (3 * 2 * 1) * (4 * 4 * 1) = 96 threads.
new RenderTexture(width: 256, height: 256, depth: 0);
// 256 x 256
// (0, 256)    (256, 256)
//      +-------+
//      |   .   |
//      |   .   |
//      +-------+
// (0,   0)    (256,   0)

// GroupID
// Dispatch(int kernelIndex, int threadGroupsX, int threadGroupsY, int threadGroupsZ);
shader.Dispatch(kernelHandle, 256 /  8, 256 /  8, 1);
shader.Dispatch(kernelHandle, 256 / 16, 256 / 16, 1);
// 32 * 32 * 1 | 256 / 8  = 32
// 16 * 16 * 1 | 256 / 16 = 16

// ThreadID
[numthreads(8, 8, 1)]
void CSMain (uint3 id : SV_DispatchThreadID)
// 8 * 8 * 1

// DispatchedThreadID
// 32 * 8 = 256  | ThreadGroup * NumThreads
// 16 * 8 = 128  | ThreadGroup * NumThreads

// (0, 256)    (256, 256)
//      +-------+
//      |   .   |
//      |   .   |
//      +-------+
// (0,   0)    (256,   0)
// (0, 128)    (128, 128)
//      +---+
//      |   |
//      +---+
// (0,   0)    (128,   0)
// (32 * 32 * 1) * (8 * 8 * 1) = 65536
// (16 * 16 * 1) * (8 * 8 * 1) = 16384


  • ꡬ쑰체 λ„˜κΈΈλ•Œ μ‚¬μš©
StructuredBuffer<T> 읽기 κ°€λŠ₯
RWStructuredBuffer<T> 읽기/μ“°κΈ° κ°€λŠ₯
AppendStructuredBuffer<T> push ComputeBufferType.Append
ComsumeStructuredBuffer<T> pop


cmd.Blit(_CameraSource, _Source);

// BaD
// _feature.shader.Dispatch(_handleKernel, _groupSize.x, _groupSize.y, 1);

// Good
cmd.DispatchCompute(_feature.shader, _handleKernel, _groupSize.x, _groupSize.y, 1);

cmd.Blit(_Output, _CameraSource);
// Bad - gamma
// textureToMake = new RenderTexture(
//     _texSize.x / divide,
//     _texSize.y / divide,
//     0)
// {
//     enableRandomWrite = true
// };
// textureToMake.Create();

// Good - linear
textureToMake = new RenderTexture(
    _texSize.x / divide,
    _texSize.y / divide,
    format: RenderTextureFormat.ARGB32,
    readWrite: RenderTextureReadWrite.Linear)
    enableRandomWrite = true


// shared
#pragma kernel CS_1
#pragma kernel CS_2

shared Texture2D<float4> _Source;
shared RWTexture2D<float4> _Output;
StructuredBuffer<Particle> _ParticleBuffer;

struct VStoFS
    float4 positionNDC  : SV_POSITION;
    float psize         : PSIZE;

// vertexID
//   point : 0
//   triangle : [0, 1, 2]
VStoFS vert(uint vertexID : SV_VertexID, uint instanceID : SV_InstanceID)
  Particle p = _ParticleBuffer[instanceID];



πŸ“„πŸŒ” 예제. μœ λ‹ˆν‹° μ»΄ν“¨νŠΈ 셰이더




