Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate Broadcom Devices in amd-smi framework #71

Open
wants to merge 11 commits into
base: amd-staging
Choose a base branch
from
1,430 changes: 946 additions & 484 deletions amdsmi_cli/amdsmi_commands.py

Large diffs are not rendered by default.

343 changes: 339 additions & 4 deletions amdsmi_cli/amdsmi_helpers.py

Large diffs are not rendered by default.

99 changes: 98 additions & 1 deletion amdsmi_cli/amdsmi_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ def _convert_json_to_tabular(self, json_object: Dict[str, any], dynamic=False):
elif key == 'gpu':
stored_gpu = string_value
table_values += string_value.rjust(3)
elif key == 'brcm_nic':
table_values += string_value.rjust(3)
elif key == 'brcm_switch':
table_values += string_value.rjust(3)
elif key == 'timestamp':
stored_timestamp = string_value
table_values += string_value.rjust(10) + ' '
Expand All @@ -135,6 +139,27 @@ def _convert_json_to_tabular(self, json_object: Dict[str, any], dynamic=False):
table_values += string_value.rjust(12)
elif key in ['pcie_replay']:
table_values += string_value.rjust(13)
#BRCM Device Metrics
#NIC
elif key == "NIC_TEMP_CURRENT":
table_values += string_value.rjust(21)
elif key == "NIC_TEMP_CRIT_ALARM":
table_values += string_value.rjust(22)
elif key == "NIC_TEMP_EMERGENCY_ALARM":
table_values += string_value.rjust(26)
elif key == "NIC_TEMP_SHUTDOWN_ALARM":
table_values += string_value.rjust(25)
elif key == "NIC_TEMP_MAX_ALARM":
table_values += string_value.rjust(20)
#SWITCH
elif key == "CURRENT_LINK_SPEED":
table_values += string_value.rjust(20)
elif key == "MAX_LINK_SPEED":
table_values += string_value.rjust(16)
elif key == "CURRENT_LINK_WIDTH":
table_values += string_value.rjust(20)
elif key == "MAX_LINK_WIDTH":
table_values += string_value.rjust(16)
# Only for handling topology tables
elif 'gpu_' in key:
table_values += string_value.ljust(13)
Expand Down Expand Up @@ -218,7 +243,7 @@ def _convert_json_to_human_readable(self, json_object: Dict[str, any]):
# Increase tabbing for device arguments by pulling them out of the main dictionary and assiging them to an empty string
tabbed_dictionary = {}
for key, value in capitalized_json.items():
if key not in ["GPU", "CPU", "CORE"]:
if key not in ["GPU", "CPU", "CORE","BRCM_NIC","BRCM_SWITCH"]:
tabbed_dictionary[key] = value

for key, value in tabbed_dictionary.items():
Expand Down Expand Up @@ -332,6 +357,30 @@ def store_output(self, device_handle, argument, data):
"""
gpu_id = self.helpers.get_gpu_id_from_device_handle(device_handle)
self._store_output_amdsmi(gpu_id=gpu_id, argument=argument, data=data)
def store_nic_output(self, device_handle, argument, data):
""" Convert device handle to nic id and store output
params:
device_handle - device handle object to the target device output
argument (str) - key to store data
data (dict | list) - Data store against argument
return:
Nothing
"""
nic_id = self.helpers.get_nic_id_from_device_handle(device_handle)
self._store_nic_output_amdsmi(nic_id=nic_id, argument=argument, data=data)


def store_switch_output(self, device_handle, argument, data):
""" Convert device handle to nic id and store output
params:
device_handle - device handle object to the target device output
argument (str) - key to store data
data (dict | list) - Data store against argument
return:
Nothing
"""
switch_id = self.helpers.get_switch_id_from_device_handle(device_handle)
self._store_switch_output_amdsmi(switch_id=switch_id, argument=argument, data=data)


def store_cpu_output(self, device_handle, argument, data):
Expand Down Expand Up @@ -424,7 +473,55 @@ def _store_output_amdsmi(self, gpu_id, argument, data):
self.output[argument] = data
else:
raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")

def _store_nic_output_amdsmi(self, nic_id, argument, data):
if argument == 'timestamp': # Make sure timestamp is the first element in the output
self.output['timestamp'] = int(time.time())

if self.is_json_format() or self.is_human_readable_format():
self.output['brcm_nic'] = int(nic_id)
if argument == 'values' and isinstance(data, dict):

self.output.update(data)
else:

self.output[argument] = data
elif self.is_csv_format():
self.output['brcm_nic'] = int(nic_id)

if argument == 'values' or isinstance(data, dict):
flat_dict = self.flatten_dict(data)
self.output.update(flat_dict)
else:
self.output[argument] = data
else:
raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")


def _store_switch_output_amdsmi(self, switch_id, argument, data):
if argument == 'timestamp': # Make sure timestamp is the first element in the output
self.output['timestamp'] = int(time.time())

if self.is_json_format() or self.is_human_readable_format():
self.output['brcm_switch'] = int(switch_id)
if argument == 'values' and isinstance(data, dict):

self.output.update(data)
else:

self.output[argument] = data
elif self.is_csv_format():
self.output['brcm_switch'] = int(switch_id)

if argument == 'values' or isinstance(data, dict):
flat_dict = self.flatten_dict(data)
self.output.update(flat_dict)
else:
self.output[argument] = data
else:
raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")



def _store_output_rocmsmi(self, gpu_id, argument, data):
if self.is_json_format():
Expand Down
77 changes: 76 additions & 1 deletion amdsmi_cli/amdsmi_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ def __init__(self, version, list, static, firmware, bad_pages, metric,
else:
self.gpu_choices = {}
self.gpu_choices_str = ""

if self.helpers.is_amdgpu_initialized():
self.nic_choices, self.nic_choices_str = self.helpers.get_nic_choices()
else:
self.nic_choices = {}
self.nic_choices_str = ""
if self.helpers.is_amdgpu_initialized():
self.switch_choices, self.switch_choices_str = self.helpers.get_switch_choices()
else:
self.switch_choices = {}
self.switch_choices_str = ""

if self.helpers.is_amd_hsmp_initialized():
self.cpu_choices, self.cpu_choices_str = self.helpers.get_cpu_choices()
Expand Down Expand Up @@ -349,6 +360,62 @@ def __call__(self, parser, args, values, option_string=None):
True, False, False)

return _GPUSelectAction


def _nic_select(self, nic_choices):

""" Custom argparse action to return the device handle(s) for the nics(s) selected
This will set the destination (args.nic) to a list of 1 or more device handles
If 1 or more device handles are not found then raise an ArgumentError for the first invalid nic seen
"""

amdsmi_helpers = self.helpers
class _NICSelectAction(argparse.Action):
ouputformat=self.helpers.get_output_format()
# Checks the values
def __call__(self, parser, args, values, option_string=None):
if "all" in nic_choices:
del nic_choices["all"]
status, selected_device_handles = amdsmi_helpers.get_device_handles_from_nic_selections(nic_selections=values,
nic_choices=nic_choices)
if status:
setattr(args, self.dest, selected_device_handles)
else:
if selected_device_handles == '':
raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--nic", _NICSelectAction.ouputformat)
else:
raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, _NICSelectAction.ouputformat)
count=len(selected_device_handles)

return _NICSelectAction


def _switch_select(self, switch_choices):

""" Custom argparse action to return the device handle(s) for the switchs(s) selected
This will set the destination (args.switch) to a list of 1 or more device handles
If 1 or more device handles are not found then raise an ArgumentError for the first invalid switch seen
"""

amdsmi_helpers = self.helpers
class _switchSelectAction(argparse.Action):
ouputformat=self.helpers.get_output_format()
# Checks the values
def __call__(self, parser, args, values, option_string=None):
if "all" in switch_choices:
del switch_choices["all"]
status, selected_device_handles = amdsmi_helpers.get_device_handles_from_switch_selections(switch_selections=values,
switch_choices=switch_choices)
if status:
setattr(args, self.dest, selected_device_handles)
else:
if selected_device_handles == '':
raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--switch", _switchSelectAction.ouputformat)
else:
raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, _switchSelectAction.ouputformat)
count=len(selected_device_handles)

return _switchSelectAction


def _cpu_select(self, cpu_choices):
Expand Down Expand Up @@ -471,6 +538,8 @@ def _validate_cpu_core(self, value):
def _add_device_arguments(self, subcommand_parser: argparse.ArgumentParser, required=False):
# Device arguments help text
gpu_help = f"Select a GPU ID, BDF, or UUID from the possible choices:\n{self.gpu_choices_str}"
nic_help = f"Select a NIC ID, BDF, or UUID from the possible choices:\n{self.nic_choices_str}"
switch_help = f"Select a SWITCH ID, BDF, or UUID from the possible choices:\n{self.switch_choices_str}"
vf_help = "Gets general information about the specified VF (timeslice, fb info, …).\
\nAvailable only on virtualization OSs"
cpu_help = f"Select a CPU ID from the possible choices:\n{self.cpu_choices_str}"
Expand All @@ -483,7 +552,13 @@ def _add_device_arguments(self, subcommand_parser: argparse.ArgumentParser, requ
if self.helpers.is_amdgpu_initialized():
device_args.add_argument('-g', '--gpu', action=self._gpu_select(self.gpu_choices),
nargs='+', help=gpu_help)

if self.helpers.is_amdgpu_initialized():
device_args.add_argument('-bn', '--nic', action=self._nic_select(self.nic_choices),
nargs='+', help=nic_help)

if self.helpers.is_amdgpu_initialized():
device_args.add_argument('-bs', '--switch', action=self._switch_select(self.switch_choices),
nargs='+', help=switch_help)
if self.helpers.is_amd_hsmp_initialized():
device_args.add_argument('-U', '--cpu', type=self._validate_cpu_core,
action=self._cpu_select(self.cpu_choices),
Expand Down
5 changes: 5 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ add_executable(${SMI_NODRM_EXAMPLE_EXE} "amd_smi_nodrm_example.cc")
target_link_libraries(${SMI_NODRM_EXAMPLE_EXE} ${AMD_SMI_TARGET})
add_dependencies(${SMI_NODRM_EXAMPLE_EXE} ${AMD_SMI_TARGET})

set(SMI_LIST_EXAMPLE_EXE "amd_smi_list")
add_executable(${SMI_LIST_EXAMPLE_EXE} "amd_smi_list.cc")
target_link_libraries(${SMI_LIST_EXAMPLE_EXE} ${AMD_SMI_TARGET})
add_dependencies(${SMI_LIST_EXAMPLE_EXE} ${AMD_SMI_TARGET})

if(ENABLE_ESMI_LIB)
set(ESMI_SAMPLE_EXE "amd_smi_esmi_ex")
add_executable(${ESMI_SAMPLE_EXE} "amdsmi_esmi_intg_example.cc")
Expand Down
Loading
Loading