forked from LeeSureman/Flat-Lattice-Transformer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpu_utils.py
111 lines (100 loc) · 3.87 KB
/
gpu_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from subprocess import Popen, PIPE
from distutils import spawn
import os
import math
import random
import time
import sys
import platform
__version__ = '1.4.0'
class GPU:
def __init__(self, ID, uuid, load, memoryTotal, memoryUsed, memoryFree, driver, gpu_name, serial, display_mode,
display_active, temp_gpu):
self.id = ID
self.uuid = uuid
self.load = load
self.memoryUtil = float(memoryUsed) / float(memoryTotal)
self.memoryTotal = memoryTotal
self.memoryUsed = memoryUsed
self.memoryFree = memoryFree
self.driver = driver
self.name = gpu_name
self.serial = serial
self.display_mode = display_mode
self.display_active = display_active
self.temperature = temp_gpu
def safeFloatCast(strNumber):
try:
number = float(strNumber)
except ValueError:
number = float('nan')
return number
def getGPUs():
if platform.system() == "Windows":
# If the platform is Windows and nvidia-smi
# could not be found from the environment path,
# try to find it from system drive with default installation path
nvidia_smi = spawn.find_executable('nvidia-smi')
if nvidia_smi is None:
nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
else:
nvidia_smi = "nvidia-smi"
# Get ID, processing and memory utilization for all GPUs
try:
p = Popen([nvidia_smi,
"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
"--format=csv,noheader,nounits"], stdout=PIPE)
stdout, stderror = p.communicate()
except:
return []
output = stdout.decode('UTF-8')
# output = output[2:-1] # Remove b' and ' from string added by python
# print(output)
## Parse output
# Split on line break
lines = output.split(os.linesep)
# print(lines)
numDevices = len(lines) - 1
GPUs = []
for g in range(numDevices):
line = lines[g]
# print(line)
vals = line.split(', ')
# print(vals)
for i in range(12):
# print(vals[i])
if (i == 0):
deviceIds = int(vals[i])
elif (i == 1):
uuid = vals[i]
elif (i == 2):
gpuUtil = safeFloatCast(vals[i]) / 100
elif (i == 3):
memTotal = safeFloatCast(vals[i])
elif (i == 4):
memUsed = safeFloatCast(vals[i])
elif (i == 5):
memFree = safeFloatCast(vals[i])
elif (i == 6):
driver = vals[i]
elif (i == 7):
gpu_name = vals[i]
elif (i == 8):
serial = vals[i]
elif (i == 9):
display_active = vals[i]
elif (i == 10):
display_mode = vals[i]
elif (i == 11):
temp_gpu = safeFloatCast(vals[i]);
GPUs.append(GPU(deviceIds, uuid, gpuUtil, memTotal, memUsed, memFree, driver, gpu_name, serial, display_mode,
display_active, temp_gpu))
return GPUs # (deviceIds, gpuUtil, memUtil)
def getAvailabilityGPU(GPUs, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
# Determine, which GPUs are available
GPUavailability = [1 if (gpu.memoryFree>=memoryFree) and (gpu.load < maxLoad or (includeNan and math.isnan(gpu.load))) and (gpu.memoryUtil < maxMemory or (includeNan and math.isnan(gpu.memoryUtil))) and ((gpu.id not in excludeID) and (gpu.uuid not in excludeUUID)) else 0 for gpu in GPUs]
gpus = [gpu for gpu, use in zip(GPUs, GPUavailability) if use]
return gpus
if __name__ == '__main__':
gpus = getGPUs()
print(gpus)