Refactor GPU and Metal langauges in their own separate renderers (tin…

…ygrad#2033) * Refactor GPU and Metal langauges in their own separate renderers * remove CStyleLanguage imports * move renderers too
JonathanPlasse · Oct 10, 2023 · 71d93ff · 71d93ff
1 parent f139060
commit 71d93ff
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 16 deletions.
diff --git a/tinygrad/renderer/metal.py b/tinygrad/renderer/metal.py
@@ -0,0 +1,16 @@
+import functools
+from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
+
+class MetalLanguage(CStyleLanguage):
+ kernel_prefix = "#include <metal_stdlib>\nusing namespace metal;\nkernel "
+ buffer_prefix = "device "
+ smem_prefix = "threadgroup "
+ arg_int_prefix = "constant int&"
+ barrier = "threadgroup_barrier(mem_flags::mem_threadgroup);"
+ float4 = "float4"
+ uses_ptr_arithmetic=True
+ gid = [f"gid.{chr(120+i)}" for i in range(3)]
+ lid = [f"lid.{chr(120+i)}" for i in range(3)]
+ extra_args = ['uint3 gid [[threadgroup_position_in_grid]]', 'uint3 lid [[thread_position_in_threadgroup]]']
+
+MetalRenderer = functools.partial(uops_to_cstyle, MetalLanguage())
diff --git a/tinygrad/renderer/opencl.py b/tinygrad/renderer/opencl.py
@@ -0,0 +1,19 @@
+import functools
+from tinygrad.helpers import dtypes
+from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
+
+type_map = { dtypes.uint8: "uchar", dtypes.uint32: "uint", dtypes.uint64: "ulong" }
+class OpenCLLanguage(CStyleLanguage):
+ kernel_prefix = "__kernel "
+ buffer_prefix = "__global "
+ smem_align = "__attribute__ ((aligned (16))) "
+ smem_prefix = "__local "
+ arg_int_prefix = "const int"
+ half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+ barrier = "barrier(CLK_LOCAL_MEM_FENCE);"
+ float4 = "(float4)"
+ gid = [f'get_group_id({i})' for i in range(3)]
+ lid = [f'get_local_id({i})' for i in range(3)]
+ uses_vload=True
+
+OpenCLRenderer = functools.partial(uops_to_cstyle, OpenCLLanguage())
diff --git a/tinygrad/runtime/ops_gpu.py b/tinygrad/runtime/ops_gpu.py
@@ -1,13 +1,13 @@
 from __future__ import annotations
-import pathlib, functools
+import pathlib
 import numpy as np
 import pyopencl as cl # type: ignore
 from typing import Optional, List
 from tinygrad.helpers import DEBUG, getenv, prod, ImageDType, OSX, fromimport
 from tinygrad.ops import Compiled
+from tinygrad.renderer.opencl import OpenCLRenderer
 from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTransfer
 from tinygrad.codegen.kernel import LinearizerOptions
-from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
 
 OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
 
@@ -103,9 +103,4 @@ def __call__(self, global_size, local_size, *bufs, wait=False) -> Optional[float
  return None
  return None
 
-renderer = functools.partial(uops_to_cstyle, CStyleLanguage(
- kernel_prefix = "__kernel ", buffer_prefix = "__global ", smem_align = "__attribute__ ((aligned (16))) ", smem_prefix = "__local ", arg_int_prefix = "const int",
- half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable",
- barrier = "barrier(CLK_LOCAL_MEM_FENCE);", float4 = "(float4)",
- gid = [f'get_group_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)], uses_vload=True))
-GPUBuffer = Compiled(CLBuffer, LinearizerOptions(), renderer, CLProgram, CL.synchronize)
+GPUBuffer = Compiled(CLBuffer, LinearizerOptions(), OpenCLRenderer, CLProgram, CL.synchronize)
diff --git a/tinygrad/runtime/ops_metal.py b/tinygrad/runtime/ops_metal.py
@@ -1,11 +1,11 @@
 # pip3 install pyobjc-framework-Metal pyobjc-framework-Cocoa pyobjc-framework-libdispatch
-import os, subprocess, pathlib, functools, ctypes
+import os, subprocess, pathlib, ctypes
 import Metal, Cocoa, libdispatch # type: ignore
 from typing import List, Any, Tuple
 from tinygrad.codegen.kernel import LinearizerOptions
-from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
 from tinygrad.helpers import prod, getenv, DEBUG, DType, dtypes
 from tinygrad.ops import Compiled, ASTRunner, BasicBatchExecutor
+from tinygrad.renderer.metal import MetalRenderer
 from tinygrad.runtime.lib import RawBufferMapped, LRUAllocator
 
 METAL_XCODE = getenv("METAL_XCODE")
@@ -100,9 +100,4 @@ def __call__(self, global_size, local_size, *bufs, wait=False):
  return command_buffer.GPUEndTime() - command_buffer.GPUStartTime()
  METAL.mtl_buffers_in_flight.append(command_buffer)
 
-renderer = functools.partial(uops_to_cstyle, CStyleLanguage(
- kernel_prefix = "#include <metal_stdlib>\nusing namespace metal;\nkernel ", buffer_prefix = "device ", smem_prefix = "threadgroup ", arg_int_prefix = "constant int&",
- barrier = "threadgroup_barrier(mem_flags::mem_threadgroup);", float4 = "float4", uses_ptr_arithmetic=True,
- gid = [f"gid.{chr(120+i)}" for i in range(3)], lid = [f"lid.{chr(120+i)}" for i in range(3)],
- extra_args = ['uint3 gid [[threadgroup_position_in_grid]]', 'uint3 lid [[thread_position_in_threadgroup]]']))
-MetalBuffer = Compiled(RawMetalBuffer, LinearizerOptions(device="METAL"), renderer, MetalProgram, METAL.synchronize, MetalBatchExecutor)
+MetalBuffer = Compiled(RawMetalBuffer, LinearizerOptions(device="METAL"), MetalRenderer, MetalProgram, METAL.synchronize, MetalBatchExecutor)