|
|
|
@ -282,15 +282,15 @@ def watch(session, fsdev, iface, tmpfs_color, other_cache_color): |
|
|
|
flags = fcntl.fcntl(sys.stdin, fcntl.F_GETFL) |
|
|
|
flags = fcntl.fcntl(sys.stdin, fcntl.F_GETFL) |
|
|
|
fcntl.fcntl(sys.stdin, fcntl.F_SETFL, flags | os.O_NONBLOCK) |
|
|
|
fcntl.fcntl(sys.stdin, fcntl.F_SETFL, flags | os.O_NONBLOCK) |
|
|
|
|
|
|
|
|
|
|
|
readlist = [psar.stdout, sys.stdin] |
|
|
|
|
|
|
|
if pgpu: |
|
|
|
|
|
|
|
readlist.append(pgpu.stdout) |
|
|
|
|
|
|
|
# Gather data from sar output |
|
|
|
# Gather data from sar output |
|
|
|
curr_gpu_util = 0 |
|
|
|
curr_gpu_util = 0 |
|
|
|
curr_gpu_mem = 0 |
|
|
|
curr_gpu_mem = 0 |
|
|
|
|
|
|
|
|
|
|
|
while 1: |
|
|
|
while 1: |
|
|
|
# Await sar output or a command sent from command handler in sargraph.py |
|
|
|
# Await sar output or a command sent from command handler in sargraph.py |
|
|
|
|
|
|
|
readlist = [psar.stdout, sys.stdin] |
|
|
|
|
|
|
|
if pgpu: |
|
|
|
|
|
|
|
readlist.append(pgpu.stdout) |
|
|
|
rlist, _, _ = select.select(readlist, [], [], 0.25) |
|
|
|
rlist, _, _ = select.select(readlist, [], [], 0.25) |
|
|
|
now = datetime.datetime.now() |
|
|
|
now = datetime.datetime.now() |
|
|
|
if sys.stdin in rlist: |
|
|
|
if sys.stdin in rlist: |
|
|
|
@ -394,12 +394,26 @@ def watch(session, fsdev, iface, tmpfs_color, other_cache_color): |
|
|
|
|
|
|
|
|
|
|
|
if pgpu and pgpu.stdout in rlist: |
|
|
|
if pgpu and pgpu.stdout in rlist: |
|
|
|
line = pgpu.stdout.readline().decode('utf-8') |
|
|
|
line = pgpu.stdout.readline().decode('utf-8') |
|
|
|
curr_gpu_util, curr_gpu_mem = [ |
|
|
|
if pgpu.poll() is not None: |
|
|
|
int(val.strip()) for val in line.split(', ') |
|
|
|
print("nvidia-smi stopped working, reason:") |
|
|
|
] |
|
|
|
print(line) |
|
|
|
if MAX_USED_GPU_RAM < curr_gpu_mem: |
|
|
|
print(f"Error code: {pgpu.returncode}") |
|
|
|
MAX_USED_GPU_RAM = curr_gpu_mem |
|
|
|
print("Closing the GPU statistics collection") |
|
|
|
TOTAL_GPU_LOAD += curr_gpu_util |
|
|
|
pgpu = None |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
curr_gpu_util, curr_gpu_mem = [ |
|
|
|
|
|
|
|
int(val.strip()) for val in line.split(', ') |
|
|
|
|
|
|
|
] |
|
|
|
|
|
|
|
if MAX_USED_GPU_RAM < curr_gpu_mem: |
|
|
|
|
|
|
|
MAX_USED_GPU_RAM = curr_gpu_mem |
|
|
|
|
|
|
|
TOTAL_GPU_LOAD += curr_gpu_util |
|
|
|
|
|
|
|
except ValueError: |
|
|
|
|
|
|
|
print(f"nvidia-smi error readout: {line}") |
|
|
|
|
|
|
|
if "Unknown Error" in line: |
|
|
|
|
|
|
|
# No valid readouts from now on, let's terminate current nvidia-smi session |
|
|
|
|
|
|
|
pgpu.terminate() |
|
|
|
|
|
|
|
pgpu = None |
|
|
|
|
|
|
|
|
|
|
|
line = [ |
|
|
|
line = [ |
|
|
|
timestamp, |
|
|
|
timestamp, |
|
|
|
|