#!/usr/bin/env python3 # # (c) 2019-2023 Antmicro # License: Apache-2.0 # import datetime import fcntl import os import re import select import signal import subprocess import sys import time import graph from common import * die = 0 # Initialize summary variables SAMPLE_NUMBER = 0 TOTAL_RAM = 0 START_DATE = "" END_DATE = "" TOTAL_LOAD = 0.0 MAX_USED_RAM = 0 MAX_USED_FS = 0 TOTAL_FS = 0 MAX_TX = 0 MAX_RX = 0 START_TX = 0 START_RX = 0 END_TX = 0 END_RX = 0 TOTAL_GPU_LOAD = 0.0 TOTAL_GPU_RAM = 0 MAX_USED_GPU_RAM = 0 FS_NAME = None FS_SAR_INDEX = None IFACE_NAME = None IFACE_SAR_INDEX = None # Handle SIGTERM def kill_handler(a, b): global die die = 1 # Read a single table from sar output def read_table(f): # Find the header while True: header = f.readline().decode().split() if len(header) > 0: break # The first columns is always just time header[0] = 'time' table = {} for title in header: table[title] = [] # Read rows while True: row = f.readline().decode().split() if len(row) <= 0: break for i, value in enumerate(row): table[header[i]].append(value) return table # Read received/sent bytes from a given interface's sys stats def read_iface_stats(iface): with open(f"/sys/class/net/{iface}/statistics/rx_bytes") as f: rx = scan(r"(\d+)", int, f.readline()) with open(f"/sys/class/net/{iface}/statistics/tx_bytes") as f: tx = scan(r"(\d+)", int, f.readline()) return rx, tx # Initialize 'data.txt' where the data is dumped def initialize(session, machine): global TOTAL_RAM global TOTAL_GPU_RAM with open("/proc/meminfo") as f: TOTAL_RAM = int(scan("MemTotal:\s+(\d+)", float, f.read())) uname = machine.split(" ")[0:2] uname = f"{uname[0]} {uname[1]}" cpus = int(machine.split(" CPU)")[0].split("(")[-1]) cpu_name = "unknown" with open("/proc/cpuinfo") as f: for line in f: if "model name" in line: cpu_name = line.replace("\n", "").split(": ")[1] break header = [ f"# sargraph version: {SARGRAPH_VERSION}", f"pid: {os.getpid()}", f"machine: {uname}", f"cpu count: {cpus}", f"cpu: {cpu_name}" ] try: pgpu = subprocess.run( 'nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits'.split(' '), capture_output=True ) if pgpu.returncode == 0: gpuname, gpudriver, memory_total = pgpu.stdout.decode('utf-8').rsplit(', ', 2) header.extend([ f"gpu: {gpuname}", f"gpu driver: {gpudriver}" ]) TOTAL_GPU_RAM = int(memory_total) except Exception as e: print(e) pass with open(f"{session}.txt", "w") as f: print(*header, sep=", ", file=f) # Add a summary comment to 'data.txt' def summarize(session): # Is there anything to be summarized? if SAMPLE_NUMBER == 0: return average_load = TOTAL_LOAD / float(SAMPLE_NUMBER) max_used_ram = MAX_USED_RAM * 1024.0 total_ram = TOTAL_RAM * 1024.0 max_used_fs = MAX_USED_FS * 1024.0 * 1024.0 total_fs = TOTAL_FS * 1024 * 1024 max_tx = MAX_TX / 128 # kB/s to Mb/s max_rx = MAX_RX / 128 # kB/s to Mb/s total_tx = END_TX-START_TX total_rx = END_RX-START_RX sdt = datetime.datetime.strptime(START_DATE, '%Y-%m-%d %H:%M:%S') edt = datetime.datetime.strptime(END_DATE, '%Y-%m-%d %H:%M:%S') delta_t = (edt - sdt).total_seconds() summary = [ f"# total ram: {total_ram:.2f} B", f"total disk space: {total_fs:.2f} B", f"max ram used: {max_used_ram:.2f} B", f"max disk used: {max_used_fs:.2f} B", f"average load: {average_load:.2f} %", f"observed disk: {FS_NAME}", f"max received: {max_rx:.2f} Mb/s", f"max sent: {max_tx:.2f} Mb/s", f"observed network: {IFACE_NAME}", f"duration: {delta_t} seconds", f"total received: {total_rx} b", f"total sent: {total_tx} b" ] if TOTAL_GPU_RAM != 0: summary.extend([ f"total gpu ram: {TOTAL_GPU_RAM * 1024 * 1024:.2f} B", # default units are MiB f"max gpu ram used: {MAX_USED_GPU_RAM * 1024 * 1024:.2f} B", # default units are MiB f"average gpu load: {TOTAL_GPU_LOAD / SAMPLE_NUMBER:.2f} %" ]) with open(f"{session}.txt", "a") as f: print(*summary, sep=", ", file=f) # Run sar and gather data from it def watch(session, fsdev, iface): global SAMPLE_NUMBER global START_DATE global END_DATE global TOTAL_LOAD global MAX_USED_RAM global MAX_USED_FS global MAX_RX global MAX_TX global TOTAL_FS global START_RX global START_TX global END_RX global END_TX global TOTAL_RAM global FS_SAR_INDEX global FS_NAME global IFACE_NAME global IFACE_SAR_INDEX global TOTAL_GPU_LOAD global TOTAL_GPU_RAM global MAX_USED_GPU_RAM global die # Was a graph alreay produced by save command from sargraph? dont_plot = False my_env = os.environ my_env["S_TIME_FORMAT"] = "ISO" p = run_or_fail("sar", "-F", "-u", "-r", "-n", "DEV", "1", stdout=subprocess.PIPE, env=my_env) # subprocess for GPU data fetching in the background try: pgpu = subprocess.Popen( 'nvidia-smi --query-gpu=utilization.gpu,memory.used --format=csv,noheader,nounits -l 1'.split(' '), stdout=subprocess.PIPE, env=my_env ) except: pgpu = None machine = p.stdout.readline().decode() initialize(session, machine) p.stdout.readline() signal.signal(signal.SIGTERM, kill_handler) # Make stdin nonblocking to continue working when no command is sent flags = fcntl.fcntl(sys.stdin, fcntl.F_GETFL) fcntl.fcntl(sys.stdin, fcntl.F_SETFL, flags | os.O_NONBLOCK) readlist = [p.stdout, sys.stdin] if pgpu: readlist.append(pgpu.stdout) # Gather data from sar output curr_gpu_util = 0 curr_gpu_mem = 0 while 1: # Await sar output or a command sent from command handler in sargraph.py rlist, _, _ = select.select(readlist, [], [], 0.25) now = datetime.datetime.now() if sys.stdin in rlist: label_line = sys.stdin.readline().replace("\n", "") if label_line.startswith("command:"): label_line = label_line[len("command:"):] if label_line.startswith("q:"): label_line = label_line[len("q:"):] summarize(session) if label_line == "none": pass elif label_line: graph.graph(session, label_line) elif not dont_plot: graph.graph(session) dont_plot = True die = 1 break elif label_line.startswith("s:"): label_line = label_line[len("s:"):] dont_plot = True if label_line != "none": summarize(session) if not label_line: graph.graph(session) else: graph.graph(session, label_line) elif label_line.startswith('label:'): label_line = label_line[len('label:'):] with open(f"{session}.txt", "a") as f: timestamp = now.strftime("%Y-%m-%d-%H:%M:%S") print(f"# {timestamp} label: {label_line}", file=f) if p.stdout not in rlist: continue date = now.strftime("%Y-%m-%d") daytime = now.strftime("%H:%M:%S") # Read and process CPU data cpu_data = read_table(p.stdout) if START_DATE == "": START_DATE = date + " " + daytime TOTAL_LOAD += stof(cpu_data["%user"][0]) SAMPLE_NUMBER += 1 # Read and process RAM data ram_data = read_table(p.stdout) if TOTAL_RAM == 0: TOTAL_RAM = (int(ram_data['kbmemused'][0]) + int(ram_data['kbmemfree'][0])) if MAX_USED_RAM < int(ram_data['kbmemused'][0]): MAX_USED_RAM = int(ram_data['kbmemused'][0]) # Read and process network data net_data = read_table(p.stdout) if IFACE_SAR_INDEX is None: if iface: IFACE_SAR_INDEX = net_data['IFACE'].index(iface) else: maxj, maxv = 0, 0 for j, used in enumerate(net_data['IFACE']): v = stof(net_data['rxkB/s'][j]) if maxv < v: maxj, maxv = j, v IFACE_SAR_INDEX = maxj if IFACE_NAME is None: IFACE_NAME = net_data['IFACE'][IFACE_SAR_INDEX] if START_RX <= 0 or START_TX <= 0: START_RX, START_TX = read_iface_stats(IFACE_NAME) END_RX, END_TX = read_iface_stats(IFACE_NAME) if MAX_RX < stof(net_data['rxkB/s'][IFACE_SAR_INDEX]): MAX_RX = stof(net_data['rxkB/s'][IFACE_SAR_INDEX]) if MAX_TX < stof(net_data['txkB/s'][IFACE_SAR_INDEX]): MAX_TX = stof(net_data['txkB/s'][IFACE_SAR_INDEX]) # Read and process FS data fs_data = read_table(p.stdout) if FS_SAR_INDEX is None: if fsdev: FS_SAR_INDEX = fs_data['FILESYSTEM'].index(fsdev) else: maxj, maxv = 0, 0 for j, free in enumerate(fs_data['MBfsfree']): v = stof(fs_data['MBfsfree'][j]) + stof(fs_data['MBfsused'][j]) if maxv < v: maxj, maxv = j, v FS_SAR_INDEX = maxj if FS_NAME is None: FS_NAME = fs_data["FILESYSTEM"][FS_SAR_INDEX] if TOTAL_FS == 0: TOTAL_FS = (stof(fs_data['MBfsused'][FS_SAR_INDEX]) + stof(fs_data['MBfsfree'][FS_SAR_INDEX])) if MAX_USED_FS < int(fs_data['MBfsused'][FS_SAR_INDEX]): MAX_USED_FS = int(fs_data['MBfsused'][FS_SAR_INDEX]) END_DATE = date + " " + daytime timestamp = date + "-" + daytime if pgpu and pgpu.stdout in rlist: line = pgpu.stdout.readline().decode('utf-8') curr_gpu_util, curr_gpu_mem = [ int(val.strip()) for val in line.split(', ') ] if MAX_USED_GPU_RAM < curr_gpu_mem: MAX_USED_GPU_RAM = curr_gpu_mem TOTAL_GPU_LOAD += curr_gpu_util with open(f"{session}.txt", "a") as f: line = [ timestamp, cpu_data['%user'][0], ram_data['%memused'][0], fs_data['%fsused'][FS_SAR_INDEX], stof(net_data['rxkB/s'][IFACE_SAR_INDEX])/128, # kB/s to Mb/s stof(net_data['txkB/s'][IFACE_SAR_INDEX])/128 # kB/s to Mb/s ] if pgpu and TOTAL_GPU_RAM != 0: line.extend([ f'{curr_gpu_util:.2f}', f'{curr_gpu_mem / TOTAL_GPU_RAM * 100.0:.2f}' ]) print(*line, file=f) if die: break # This runs if we were stopped by SIGTERM and no plot was made so far if not dont_plot: summarize(session) plot.plot(session)