├── .editorconfig ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── docs.md │ └── rfe.md ├── PULL_REQUEST_TEMPLATE.md ├── config │ ├── markdown_link.json │ └── markdown_style.yaml ├── dependabot.yml └── workflows │ ├── backport.yaml │ ├── license-check.yaml │ ├── link-check.yaml │ ├── shellcheck.yaml │ ├── shfmt.yaml │ ├── spellcheck.yaml │ ├── stale.yml │ └── style-check.yaml ├── .gitignore ├── .license_check.yaml ├── .shellcheck ├── .spelling ├── Jenkinsfile.github ├── LICENSE ├── Makefile ├── README.md ├── RELEASE_NOTES.md ├── api ├── README.md ├── bos.md ├── bss.md ├── capmc.md ├── cfs.md ├── console.md ├── firmware-action.md ├── hbtd.md ├── hmnfd.md ├── ims.md ├── index.md ├── nls.md ├── power-control.md ├── scsd.md ├── sls.md ├── smd.md ├── sts.md └── tapms-operator.md ├── background ├── README.md ├── certificate_authority.md ├── index.md ├── ncn_bios.md ├── ncn_boot_workflow.md ├── ncn_firmware.md ├── ncn_images.md ├── ncn_kdump.md ├── ncn_kernel.md ├── ncn_mounts_and_filesystems.md ├── ncn_networking.md ├── ncn_operating_system_releases.md └── ncn_plan_of_record.md ├── docs-csm.spec ├── gen-api.sh ├── glossary.md ├── img ├── 3rd │ └── redfish.png ├── CEC_Display_Controls_CEC_Actions.svg ├── CEC_Front_Panel.svg ├── Management_Network_Connections_Liquid_Cooled.png ├── SBPS_Architecture_Diagram.jpg ├── SBPS_flow_diagram.jpg ├── UAN_transition_CSM_1.2.png ├── bmc-reboot-ilo.png ├── bmc-virtual-media-boot-gigabyte.png ├── bmc-virtual-media-gigabyte-settings.png ├── bmc-virtual-media-ilo.png ├── bmc-virtual-media-intel-menu.png ├── bmc-virtual-media-intel-mounted.png ├── bmc-virtual-media-intel.png ├── bmc-virtual-media-settings-gigabyte.png ├── bmc-virtual-media-start-gigabyte.png ├── boot-flow.jpg ├── cmos1.png ├── cmos2.png ├── cmos3.png ├── fw-gb-1.png ├── fw-gb-2.png ├── fw-gb-3.png ├── fw-gb-4.png ├── fw-ilo-1.png ├── fw-ilo-2.png ├── fw-ilo-3.png ├── fw-ilo-4.png ├── fw-ilo-5.png ├── install │ └── shcd-hmn-tab-unexpected-data.png ├── network │ ├── AGG-SHCD.png │ ├── CDU-.png │ ├── CDU-CMM-SHCD.png │ ├── CDU-SHCD.png │ ├── CDU-Wiring.png │ ├── CECLAG.png │ ├── DL325-back.png │ ├── DL385-back.png │ ├── GigaIntel_application.png │ ├── GigaIntel_storage.png │ ├── Gigaintel_Master.png │ ├── Gigaintel_UAN.png │ ├── Gigaintel_Worker.png │ ├── HPE_Master.png │ ├── HPE_Storage.png │ ├── HPE_Storage_large.png │ ├── HPE_UAN.png │ ├── HPE_Worker.png │ ├── Leaf-SHCD.png │ ├── Leaf-Wiring.png │ ├── SHCD-40G_10G.png │ ├── Spine-SHCD.png │ ├── XL645D-back.png │ ├── XL675D-back.png │ ├── bican_cable.png │ ├── can-diagram.png │ ├── edge_shcd.png │ ├── gigabyte-master.png │ ├── gigabyte-storage.png │ ├── gigabyte-uan.png │ ├── gigabyte-worker.png │ └── management_network │ │ ├── architecture_comparison.png │ │ ├── exascale.png │ │ ├── large.png │ │ ├── mclag_link_ha.png │ │ ├── medium.png │ │ ├── member_power_failure.png │ │ ├── network_traffic_pattern.png │ │ ├── scenario-a-topology.png │ │ ├── scenario-b-topology.png │ │ ├── small.png │ │ ├── vsx_isl_ha.png │ │ └── vsx_split.png ├── operations │ ├── Add_Client_in_Keycloak.png │ ├── AutomationFrameworkWorkflow.png │ ├── BGP_Peering.png │ ├── BMC_Firmware_Dashboard.png │ ├── BMC_Firmware_Information_Login_Page.png │ ├── BSS_Missing_an_Artifact.png │ ├── Boot_Flow.png │ ├── CAN_CHN_27_Subnet.png │ ├── CAN_Dual-Spine_Configuration.png │ ├── CAN_Point_to_Point.png │ ├── CAN_Single_Gateway.png │ ├── CA_Certificate_Settings.png │ ├── CDU_Circuit_Breakers.png │ ├── CDU_Circuit_Breakers.svg │ ├── CFS_Automated_Session_Flow.png │ ├── CFS_Single_Session_Flow.png │ ├── CMN_25_Subnet.png │ ├── CN_POST_Call.png │ ├── CN_Setup_Menu.png │ ├── DHCP_Helper.png │ ├── DNS_architecture.png │ ├── Dashboard_Remote_Control.png │ ├── EEPROM_Error_Dropping_to_Dracut_Emergency_Shell.png │ ├── ErrorDashboard1.png │ ├── ErrorDashboard2.png │ ├── ExternalDNS.png │ ├── Full_UEFI_Prompt.png │ ├── GossTestsDashboard.png │ ├── GossWorkflow.png │ ├── Intel_Integrated_BMC_Console_Launch_Console_Button.png │ ├── Intel_Integrated_BMC_Console_Login_Page.png │ ├── Intel_Integrated_BMC_Console_Remote_Control_Tab.png │ ├── Intel_Integrated_BMC_Console_Summary_Page.png │ ├── Java_Control_Panel_Security_Tab_Linux.png │ ├── Java_Control_Panel_Security_Tab_MAC.png │ ├── Java_Control_Panel_Security_Tab_Windows.png │ ├── Java_iKVM_Viewer.png │ ├── KVM_Main_Menu.png │ ├── KVM_Options.png │ ├── KVM_Unit_Opened.png │ ├── KVM_Unit_Pulled_Out.png │ ├── Keychain_Access_Utility.png │ ├── Keycloak_Admin-role_Mapper.png │ ├── Keycloak_Client_Details.png │ ├── Keycloak_Client_Secret.png │ ├── Keycloak_Client_Settings.png │ ├── Keycloak_Client_Token_Lifetime.png │ ├── Keycloak_Gatekeeper_Client.png │ ├── Keycloak_Global_Session_Lifetime.png │ ├── Keycloak_Global_Token_Lifetime.png │ ├── Keycloak_add_nexus_permission.png │ ├── Keycloak_client_scopes_table.png │ ├── Keykloak_Gatekeeper_Settings.png │ ├── Kibana_Clusterstor_Logs-Dropdown.png │ ├── Kibana_Discover_Dashboard.png │ ├── Kibana_Results_Time_Period.png │ ├── LDAP_User_Federation_Mappers.png │ ├── Liquid_Cooled_Cabinet_Front.svg │ ├── Liquid_Cooled_Cabinet_PDU.svg │ ├── Liquid_Cooled_Cabinet_Rear.svg │ ├── Liquid_Cooled_Cabinet_Slot_Numbering_Front.svg │ ├── Liquid_Cooled_Cabinet_Slot_Numbering_Rear.svg │ ├── Liquid_Cooled_TDS_Cabinet_PDU.svg │ ├── Management_NCN_Interfaces_and_VLANs.png │ ├── Mellanox_SN2700.png │ ├── Nexus_Admin_Account.png │ ├── Nexus_Anonymous_Account.png │ ├── Nexus_Browse_Page.png │ ├── Nexus_Compact_Task.png │ ├── Nexus_Create_Task.png │ ├── Nexus_Delete_Asset.png │ ├── Nexus_Delete_Repository.png │ ├── Nexus_New_Task.png │ ├── Nexus_Repodata_Attributes.png │ ├── Nexus_Repodata_Attributes_After.png │ ├── Nexus_Repodata_Summary.png │ ├── Nexus_Repodata_Summary_After.png │ ├── Nexus_Repository_Admin_Page.png │ ├── Nexus_Repository_List.png │ ├── Nexus_Task_Confirmation.png │ ├── Nexus_Task_Detail.png │ ├── Nexus_Task_Detail_Last_Run.png │ ├── Nexus_Task_Location.png │ ├── Nexus_Task_Type_Selection.png │ ├── Nexus_Tasks_Page.png │ ├── Nexus_Web_UI.png │ ├── Node_Console.png │ ├── PKI_Infrastructure.png │ ├── PSU_Status.svg │ ├── Security_Infrastructure.png │ ├── Simple_UEFI_Prompt.png │ ├── Sma_dashboard_console_hostname.png │ ├── Sma_dashboard_home_page.png │ ├── Sma_dashboard_index_dropdown.png │ ├── Sma_dashboard_time_period.png │ ├── TFTP_without_a_Route_Back_to_the_Node.png │ ├── TimingDashboard.png │ ├── VictoriaMetrics_Arcitecture.jpg │ ├── Wireshark_Healthy_DHCP_Discover_Sequence.png │ ├── add-node.gif │ ├── boot_orchestration │ │ ├── bos_v1_boot.gif │ │ ├── bos_v1_reconfigure.gif │ │ ├── bos_v1_shutdown.gif │ │ ├── bos_v2_boot.drawio │ │ ├── bos_v2_boot.png │ │ ├── bos_v2_reboot.drawio │ │ ├── bos_v2_reboot.png │ │ ├── bos_v2_shutdown.drawio │ │ ├── bos_v2_shutdown.png │ │ ├── bos_v2_status_transitions.drawio │ │ └── bos_v2_status_transitions.png │ ├── branch_workflow.png │ ├── crus_upgrade.gif │ ├── diagram_csm_stack_upgrade_04022025.png │ ├── diagram_upgrade_csm_manually_and_additional_products_with_IUF_101524.png │ ├── diagram_upgrade_csm_with_IUF_101524.png │ ├── dns.svg │ ├── gitea_repositories.png │ ├── gitea_repository_settings.png │ ├── gitea_repository_visibility.png │ ├── image-create.gif │ ├── image-customize.gif │ ├── iuf_initial_install_workflow_update_hsn_01132025.png │ └── remove-nodes.gif ├── shcd-rack-example.png ├── spit-services.jpg ├── upgrade-deploy.jpg └── usb-flow.jpg ├── index.md ├── install ├── README.md ├── collect_mac_addresses_for_ncns.md ├── collecting_bmc_mac_addresses.md ├── collecting_ncn_mac_addresses.md ├── configure_administrative_access.md ├── configure_management_network.md ├── create_application_node_config_yaml.md ├── create_cabinets_yaml.md ├── create_hmn_connections_json.md ├── create_ncn_metadata_csv.md ├── create_switch_metadata_csv.md ├── create_system_configuration_using_cluster_discovery_service.md ├── create_system_configuration_using_shcd.md ├── csm_installation_failure.md ├── deploy_final_non-compute_node.md ├── deploy_non-compute_nodes.md ├── index.md ├── install_csm_services.md ├── livecd │ ├── Access_LiveCD_USB_Device_After_Reboot.md │ ├── Boot_LiveCD_RemoteISO.md │ ├── Boot_LiveCD_USB.md │ ├── Reinstall_LiveCD_USB.md │ └── Reset_root_Password_on_a_LiveCD_USB.md ├── pre-installation.md ├── prepare_compute_nodes.md ├── prepare_site_init.md ├── re-installation.md ├── scripts │ ├── backup-pit-data.sh │ ├── craycli_init.py │ ├── csm_services │ │ ├── install.yaml │ │ └── steps │ │ │ ├── 1.initialize_bootstrap_registry.yaml │ │ │ ├── 2.create_site_init_secret.yaml │ │ │ ├── 3.deploy_sealed_secret_decryption_key.yaml │ │ │ ├── 4.deploy_csm_application_and_services.yaml │ │ │ ├── 5.setup_nexus.yaml │ │ │ ├── 6.set_management_NCN_to_use_unbound.yaml │ │ │ └── 7.docs_csm_upload_rebuild_templates.yaml │ ├── generate-customization-secrets.sh │ └── install-goss-tests.sh ├── shcd_hmn_connections_rules.md ├── troubleshooting_ceph_csi.md ├── troubleshooting_installation.md ├── troubleshooting_pxe_boot.md ├── troubleshooting_unused_drives_on_storage_nodes.md └── troubleshooting_utility_storage_node_installation.md ├── introduction ├── README.md ├── csm_overview.md ├── deprecated_features │ ├── CAPMC_Deprecation_Notice.md │ ├── README.md │ └── index.md ├── documentation_conventions.md ├── img │ ├── github_heading_icon.png │ ├── github_heading_navigation.png │ ├── html_docs_csm_version_selection.png │ ├── html_heading_icon.png │ └── html_heading_navigation.png ├── index.md ├── templates │ └── disclaimers.md └── viewing_csm_documentation.md ├── operations ├── Backup_HMS.md ├── CSM_product_management │ ├── Change_Passwords_and_Credentials.md │ ├── Configure_CSM_Packages_with_CFS.md │ ├── Configure_Keycloak_Account.md │ ├── Configure_the_root_Password_and_SSH_Keys_in_Vault.md │ ├── Post_Install_Customizations.md │ ├── Redeploying_a_Chart.md │ ├── Remove_Artifacts_from_Product_Installations.md │ ├── Set_Up_Passwordless_SSH.md │ └── Validate_Signed_RPMs.md ├── Component_Names_xnames.md ├── README.md ├── Restore_HMS.md ├── System_Recovery │ ├── PBS_Service_Recovery.md │ ├── Slurm_Service_Recovery.md │ └── System_Recovery.md ├── argo │ ├── Using_Argo_Workflows.md │ └── Using_the_Argo_UI.md ├── artifact_management │ ├── Artifact_Management.md │ ├── Generate_Temporary_S3_Credentials.md │ ├── Manage_Artifacts_with_the_Cray_CLI.md │ └── Use_S3_Libraries_and_Clients.md ├── bare_metal │ ├── Bare-Metal.md │ └── Change_River_BMC_Credentials.md ├── boot_orchestration │ ├── BOS_API_Versions.md │ ├── BOS_Services.md │ ├── BOS_Workflows.md │ ├── Boot_Issue_Symptom_Node_Console_or_Logs_Indicate_that_the_Server_Response_has_Timed_Out.md │ ├── Boot_Issue_Symptom_Node_HSN_Interface_Does_Not_Appear_or_Shows_No_Link_Detected.md │ ├── Boot_Orchestration.md │ ├── Boot_UANs.md │ ├── Cheatsheet.md │ ├── Component_Status.md │ ├── Components.md │ ├── Compute_Node_Boot_Issue_Symptom_Duplicate_Address_Warnings_and_Declined_DHCP_Offers_in_Logs.md │ ├── Compute_Node_Boot_Issue_Symptom_Message_About_Invalid_EEPROM_Checksum_in_Node_Console_or_Log.md │ ├── Compute_Node_Boot_Issue_Symptom_Node_is_Not_Able_to_Download_the_Required_Artifacts.md │ ├── Compute_Node_Boot_Sequence.md │ ├── Create_a_Session_Template_to_Boot_Compute_Nodes_with_SBPS.md │ ├── Customize_iPXE_Binary_Names.md │ ├── Determine_Which_BOS_Session_Booted_A_Node.md │ ├── Edit_the_iPXE_Embedded_Boot_Script.md │ ├── Exporting_and_Importing_BOS_Data.md │ ├── Exporting_and_Importing_BSS_Data.md │ ├── Healthy_Compute_Node_Boot_Process.md │ ├── Kernel_Boot_Parameters.md │ ├── Limit_the_Scope_of_a_BOS_Session.md │ ├── Log_File_Locations_and_Ports_Used_in_Compute_Node_Boot_Troubleshooting.md │ ├── Manage_a_BOS_Session.md │ ├── Manage_a_Session_Template.md │ ├── Multi_tenancy_with_BOS.md │ ├── Node_Boot_Root_Cause_Analysis.md │ ├── Options.md │ ├── Redeploy_the_IPXE_and_TFTP_Services.md │ ├── Rolling_Upgrades.md │ ├── Session_Templates.md │ ├── Sessions.md │ ├── Stage_Changes_with_BOS.md │ ├── Tools_for_Resolving_Boot_Issues.md │ ├── Troubleshoot_Compute_Node_Boot_Issues_Related_to_Dynamic_Host_Configuration_Protocol_DHCP.md │ ├── Troubleshoot_Compute_Node_Boot_Issues_Related_to_Trivial_File_Transfer_Protocol_TFTP.md │ ├── Troubleshoot_Compute_Node_Boot_Issues_Related_to_Unified_Extensible_Firmware_Interface_UEFI.md │ ├── Troubleshoot_Compute_Node_Boot_Issues_Related_to_the_Boot_Script_Service_BSS.md │ ├── Troubleshoot_Compute_Node_Boot_Issues_Using_Kubernetes.md │ ├── Troubleshoot_UAN_Boot_Issues.md │ ├── Upload_Node_Boot_Information_to_Boot_Script_Service_BSS.md │ └── View_the_Status_of_a_BOS_Session.md ├── cani │ ├── Add_A_Blade_To_A_Cabinet_In_SLS.md │ └── Add_A_Cabinet_To_SLS.md ├── configuration_management │ ├── ARP_cache_tuning.md │ ├── Accessing_Sat_Bootprep_Files.md │ ├── Adding_Additional_Inventory.md │ ├── Ansible_Execution_Environments.md │ ├── Ansible_Log_Collection.md │ ├── Automatic_Configuration_Management.md │ ├── Automatic_Session_Deletion_with_session_ttl.md │ ├── Backup_and_Restore_VCS_Data.md │ ├── CFS_Commands_Cheat_Sheet.md │ ├── CFS_Components.md │ ├── CFS_Configurations.md │ ├── CFS_Flow_Diagrams.md │ ├── CFS_Global_Options.md │ ├── CFS_Key_Management.md │ ├── CFS_Session_Inventory.md │ ├── CFS_Sessions.md │ ├── CFS_Sources.md │ ├── Change_the_Ansible_Verbosity.md │ ├── Configuration_Management.md │ ├── Configure_Ansible.md │ ├── Configure_HSN_NIC_Bonding.md │ ├── Create_a_Node_Personalization_CFS_Session.md │ ├── Create_an_Image_Customization_CFS_Session.md │ ├── Create_and_Populate_a_VCS_Configuration_Repository.md │ ├── Customize_Configuration_Values.md │ ├── Differences_Between_the_V2_and_V3_CFS_APIs.md │ ├── Enable_Ansible_Profiling.md │ ├── Exporting_and_Importing_CFS_Data.md │ ├── Git_Operations.md │ ├── Management_Node_Image_Customization.md │ ├── Management_Node_Personalization.md │ ├── Managing_Sensitive_Tenant_Information_in_VCS_with_SOPS.md │ ├── Paging_CFS_Records.md │ ├── Set_Limits_for_a_Configuration_Session.md │ ├── Specifying_Hosts_and_Groups.md │ ├── Target_Ansible_Tasks_for_Image_Customization.md │ ├── Track_the_Status_of_a_Session.md │ ├── Troubleshoot_CFS_Issues.md │ ├── Troubleshoot_CFS_Session_Failed.md │ ├── Troubleshoot_CFS_Session_Failing_to_Complete.md │ ├── Troubleshoot_CFS_Sessions_Failing_to_Start.md │ ├── Update_a_CFS_Configuration.md │ ├── Update_the_Privacy_Settings_for_Gitea_Configuration_Content_Repositories.md │ ├── VCS_Administrative_User.md │ ├── VCS_Branching_Strategy.md │ ├── Version_Control_Service_VCS.md │ ├── View_Configuration_Session_Logs.md │ ├── Write_Ansible_Code_for_CFS.md │ └── iSCSI_SBPS_Node_Personalization.md ├── configure_cray_cli.md ├── conman │ ├── Access_Compute_Node_Logs.md │ ├── Access_Console_Log_Data_Via_the_System_Monitoring_Framework_SMF.md │ ├── Complete_Reset_of_the_Console_Services.md │ ├── ConMan.md │ ├── Configure_Log_Rotation.md │ ├── Console_Services_Troubleshooting_Guide.md │ ├── Disable_ConMan_After_System_Software_Installation.md │ ├── Establish_a_Serial_Connection_to_NCNs.md │ ├── Log_in_to_a_Node_Using_ConMan.md │ ├── Manage_Node_Consoles.md │ ├── Troubleshoot_ConMan_Asking_for_Password_on_SSH_Connection.md │ ├── Troubleshoot_ConMan_Blocking_Access_to_a_Node_BMC.md │ ├── Troubleshoot_ConMan_Failing_to_Connect_to_a_Console.md │ └── Troubleshoot_ConMan_Node_Pod_Stuck_Terminating.md ├── firmware │ ├── FASUpdate_Script.md │ ├── FAS_Admin_Procedures.md │ ├── FAS_CLI.md │ ├── FAS_DBCleanup.md │ ├── FAS_Filters.md │ ├── FAS_Images_Backup_Restore.md │ ├── FAS_Paradise.md │ ├── FAS_Recipes.md │ ├── FAS_Update_iLO5_2.78.md │ ├── FAS_Use_Cases.md │ ├── Firmware_Upgrade_using_SPP_on_HPE_ProLiant_Servers.md │ ├── Update_Firmware_with_FAS.md │ ├── Updating_Firmware_m001.md │ ├── Updating_Firmware_without_FAS.md │ └── Upload_Olympus_BMC_Recovery_Firmware_into_TFTP_Server.md ├── hardware_state_manager │ ├── Add_a_Switch_to_the_HSM_Database.md │ ├── Add_an_NCN_to_the_HSM_Database.md │ ├── Component_Group_Members.md │ ├── Component_Groups_and_Partitions.md │ ├── Component_Memberships.md │ ├── Component_Partition_Members.md │ ├── Create_a_Backup_of_the_HSM_Postgres_Database.md │ ├── HSM_Backup_User_Data.md │ ├── HSM_Roles_and_Subroles.md │ ├── Hardware_Management_Services_HMS_Locking_API.md │ ├── Hardware_State_Manager.md │ ├── Hardware_State_Manager_HSM_State_and_Flag_Fields.md │ ├── Lock_and_Unlock_Management_Nodes.md │ ├── Manage_Component_Groups.md │ ├── Manage_Component_Partitions.md │ ├── Manage_HMS_Locks.md │ ├── Restore_HSM_Postgres_from_Backup.md │ ├── Restore_HSM_Postgres_without_a_Backup.md │ ├── Set_BMC_Management_Role.md │ └── scripts │ │ └── backup_smd_postgres.sh ├── hmcollector │ └── adjust_hmcollector_resource_limits_requests.md ├── hpe_pdu │ └── hpe_pdu_admin_procedures.md ├── image_management │ ├── Build_a_New_UAN_Image_Using_the_Default_Recipe.md │ ├── Build_an_Image_Using_IMS_REST_Service.md │ ├── Configure_IMS_to_Use_DKMS.md │ ├── Configure_IMS_to_validate_rpms.md │ ├── Configure_a_Remote_Build_Node.md │ ├── Convert_TGZ_Archives_to_SquashFS_Images.md │ ├── Customize_an_Image_Root_Using_IMS.md │ ├── Delete_or_Recover_Deleted_IMS_Content.md │ ├── Exporting_and_Importing_IMS_Data.md │ ├── Image_Job_Performance.md │ ├── Image_Management.md │ ├── Image_Management_Workflows.md │ ├── Import_External_Image_to_IMS.md │ ├── Import_NCN_Image_to_IMS.md │ ├── Troubleshoot_Large_Image.md │ ├── Troubleshoot_Remote_Build_Node.md │ ├── Troubleshoot_zypper_interaction.md │ ├── Upload_and_Register_an_Image_Recipe.md │ └── Working_With_aarch64_Images.md ├── index.md ├── iscsi_sbps │ └── iscsi_sbps.md ├── iuf │ ├── IUF.md │ ├── examples │ │ ├── iuf_abort.md │ │ ├── iuf_activity.md │ │ ├── iuf_list_activities.md │ │ ├── iuf_list_stages.md │ │ ├── iuf_restart.md │ │ ├── iuf_resume.md │ │ ├── iuf_run.md │ │ └── iuf_workflow.md │ ├── stages │ │ ├── deliver_product.md │ │ ├── deploy_product.md │ │ ├── managed_nodes_rollout.md │ │ ├── management_nodes_rollout.md │ │ ├── post_install_check.md │ │ ├── post_install_service_check.md │ │ ├── pre_install_check.md │ │ ├── prepare_images.md │ │ ├── process_media.md │ │ ├── update_cfs_config.md │ │ └── update_vcs_config.md │ └── workflows │ │ ├── admin_directory.md │ │ ├── backup.md │ │ ├── configuration.md │ │ ├── configuration_of_SFM.md │ │ ├── deploy_product.md │ │ ├── image_preparation.md │ │ ├── install_or_upgrade_additional_products_with_iuf.md │ │ ├── managed_rollout.md │ │ ├── management_rollout.md │ │ ├── preparation.md │ │ ├── product_delivery.md │ │ ├── slingshot_management_network_switch_updates.md │ │ ├── upgrade_csm_and_additional_products_with_iuf.md │ │ └── validate_deployment.md ├── kubernetes │ ├── About_Kubernetes_Taints_and_Labels.md │ ├── About_Postgres.md │ ├── About_etcd.md │ ├── About_kubectl.md │ ├── Backups_for_Etcd_Clusters_Running_in_Kubernetes.md │ ├── Cert_Renewal_for_Kubernetes_and_Bare_Metal_EtcD.md │ ├── Check_for_and_Clear_etcd_Cluster_Alarms.md │ ├── Check_the_Health_of_etcd_Clusters.md │ ├── Clear_Space_in_an_etcd_Cluster_Database.md │ ├── Configure_kubectl_Credentials_to_Access_the_Kubernetes_APIs.md │ ├── Containerd.md │ ├── Create_a_Manual_Backup_of_a_Healthy_Bare-Metal_etcd_Cluster.md │ ├── Create_a_Manual_Backup_of_a_Healthy_etcd_Cluster.md │ ├── Determine_if_Pods_are_Hitting_Resource_Limits.md │ ├── Disaster_Recovery_Postgres.md │ ├── Fix_Failed_to_start_etcd_on_Master.md │ ├── Increase_Kafka_Pod_Resource_Limits.md │ ├── Increase_PVC_size_in_an_etcd_Cluster_Database.md │ ├── Increase_Pod_Resource_Limits.md │ ├── Kubernetes.md │ ├── Kubernetes_Networking.md │ ├── Kubernetes_Storage.md │ ├── Kyverno.md │ ├── Pod_Resource_Limits.md │ ├── Rebuild_Unhealthy_etcd_Clusters.md │ ├── Recover_from_Postgres_WAL_Event.md │ ├── Repopulate_Data_in_etcd_Clusters_When_Rebuilding_Them.md │ ├── Report_the_Endpoint_Status_for_etcd_Clusters.md │ ├── Restore_Bare-Metal_etcd_Clusters_from_an_S3_Snapshot.md │ ├── Restore_Postgres.md │ ├── Restore_an_etcd_Cluster_from_a_Backup.md │ ├── Retrieve_Cluster_Health_Information_Using_Kubernetes.md │ ├── TDS_Lower_CPU_Requests.md │ ├── Troubleshoot_Intermittent_503s.md │ ├── Troubleshoot_Postgres_Database.md │ ├── View_Postgres_Information_for_System_Databases.md │ └── encryption │ │ ├── README.md │ │ └── index.md ├── multi-tenancy │ ├── CrayHncManager.md │ ├── Create_a_Tenant.md │ ├── ExampleWorkflow.md │ ├── GlobalTenantHooks.md │ ├── Modify_a_Tenant.md │ ├── Overview.md │ ├── Remove_a_Tenant.md │ ├── SlurmOperator.md │ ├── Tapms.md │ ├── TenantAdminConfig.md │ ├── Vault.md │ ├── hpe_slingshot_network_operator.md │ └── images │ │ ├── groupmembership.png │ │ ├── keycloakclient.png │ │ ├── oidctoken.png │ │ └── usergroups.png ├── network │ ├── Access_to_System_Management_Services.md │ ├── Connect_to_Switch_Over_USB_Serial_Cable.md │ ├── Connect_to_the_HPE_Cray_EX_Environment.md │ ├── Create_a_CSM_Configuration_Upgrade_Plan.md │ ├── Default_IP_Address_Ranges.md │ ├── Gateway_Testing.md │ ├── Network.md │ ├── customer_accessible_networks │ │ ├── Connect_to_the_CMN_CAN.md │ │ ├── Customer_Accessible_Networks.md │ │ ├── Dual_Spine_Configuration.md │ │ ├── Externally_Exposed_Services.md │ │ ├── Troubleshoot_CMN_Issues.md │ │ ├── bi-can_arista_aruba_config.md │ │ ├── bi-can_arista_metallb_peering.md │ │ └── can_to_chn │ │ │ ├── README.md │ │ │ ├── index.md │ │ │ ├── network │ │ │ ├── chn_enable.md │ │ │ └── network_upgrade_1.2_to_1.3.md │ │ │ └── scripts │ │ │ ├── bss │ │ │ ├── bss_remove_can.py │ │ │ └── post-bootparameters.sh │ │ │ ├── sls │ │ │ ├── add_computes_to_chn.py │ │ │ ├── csm_can_to_chn │ │ │ │ ├── __init__.py │ │ │ │ └── sls_updates.py │ │ │ ├── sls_can_to_chn.py │ │ │ ├── sls_del_can.py │ │ │ └── sls_utils │ │ │ │ ├── Managers.py │ │ │ │ ├── Networks.py │ │ │ │ ├── README.md │ │ │ │ ├── Reservations.py │ │ │ │ ├── __init__.py │ │ │ │ ├── index.md │ │ │ │ ├── ipam.py │ │ │ │ ├── json_utils.py │ │ │ │ └── schemas │ │ │ │ ├── sls_networks_schema.json │ │ │ │ ├── sls_reservations_schema.json │ │ │ │ └── sls_subnets_schema.json │ │ │ └── util │ │ │ └── update-customizations-network.sh │ ├── dhcp │ │ ├── Customize_boot_file.md │ │ ├── DHCP.md │ │ └── Troubleshoot_DHCP_Issues.md │ ├── dns │ │ ├── DNS.md │ │ ├── Manage_the_DNS_Unbound_Resolver.md │ │ ├── PowerDNS_Configuration.md │ │ ├── PowerDNS_migration.md │ │ ├── Troubleshoot_Common_DNS_Issues.md │ │ └── Troubleshoot_PowerDNS.md │ ├── external_dns │ │ ├── External_DNS.md │ │ ├── External_DNS_Failing_to_Discover_Services_Workaround.md │ │ ├── External_DNS_csi_config_init_Input_Values.md │ │ ├── Ingress_Routing.md │ │ ├── Troubleshoot_DNS_Configuration_Issues.md │ │ ├── Troubleshoot_Systems_Not_Provisioned_with_External_IP_Addresses.md │ │ └── Update_the_cmn-external-dns_Value_Post-Installation.md │ ├── management_network │ │ ├── README.md │ │ ├── added_hardware.md │ │ ├── apply_custom_config.md │ │ ├── apply_switch_configurations.md │ │ ├── aruba │ │ │ ├── 8021x.md │ │ │ ├── README.md │ │ │ ├── acl.md │ │ │ ├── arp.md │ │ │ ├── backup.md │ │ │ ├── bgp_basic.md │ │ │ ├── bluetooth.md │ │ │ ├── cable_diagnostics.md │ │ │ ├── check_bgp_and_metallb.md │ │ │ ├── check_current_dhcp_leases.md │ │ │ ├── check_dhcp_lease_is_getting_allocated.md │ │ │ ├── check_hsm.md │ │ │ ├── check_kea_dhcp_logs.md │ │ │ ├── classifier_policies.md │ │ │ ├── compute_uan_application_nodes.md │ │ │ ├── dhcp_decline.md │ │ │ ├── dns-client.md │ │ │ ├── domain_name.md │ │ │ ├── duplicate_ip.md │ │ │ ├── exec_banner.md │ │ │ ├── hostname.md │ │ │ ├── igmp.md │ │ │ ├── index.md │ │ │ ├── initial_prioritization.md │ │ │ ├── intro.md │ │ │ ├── key_features.md │ │ │ ├── lag.md │ │ │ ├── lldp.md │ │ │ ├── locator_led.md │ │ │ ├── loopback.md │ │ │ ├── mac_auth.md │ │ │ ├── management_interface.md │ │ │ ├── management_network_configuration_example.md │ │ │ ├── management_network_function_in_detail.md │ │ │ ├── mclag_isl_ha.md │ │ │ ├── mclag_link_ha.md │ │ │ ├── mclag_power_failure.md │ │ │ ├── mclag_split.md │ │ │ ├── mlag.md │ │ │ ├── motd.md │ │ │ ├── msdp.md │ │ │ ├── mstp.md │ │ │ ├── native_vlan.md │ │ │ ├── ncn_tcpdump.md │ │ │ ├── ncns_on_install.md │ │ │ ├── network_naming_function.md │ │ │ ├── network_topologies.md │ │ │ ├── network_traffic_pattern.md │ │ │ ├── notice.md │ │ │ ├── ntp.md │ │ │ ├── ospfv2.md │ │ │ ├── physical_interfaces.md │ │ │ ├── pim.md │ │ │ ├── port_mirroring.md │ │ │ ├── port_security.md │ │ │ ├── queuing_and_scheduling.md │ │ │ ├── radius.md │ │ │ ├── reboot_pxe_fail.md │ │ │ ├── redundant_power_supplies.md │ │ │ ├── remote_logging.md │ │ │ ├── requirements_and_optional_configuration.md │ │ │ ├── routed_interface.md │ │ │ ├── scenario-a.md │ │ │ ├── scenario-b.md │ │ │ ├── snmp-agent.md │ │ │ ├── snmp-community.md │ │ │ ├── snmp_trap.md │ │ │ ├── snmpv3_users.md │ │ │ ├── spine_leaf_architecture.md │ │ │ ├── spine_leaf_architecture2.md │ │ │ ├── ssh.md │ │ │ ├── static_routing.md │ │ │ ├── status_of_cray-dhcp-kea_pods.md │ │ │ ├── tacacs.md │ │ │ ├── test_tftp_traffic.md │ │ │ ├── typical_VSX_configuration.md │ │ │ ├── typical_edge_port_configuration.md │ │ │ ├── typical_mclag_port_configuration.md │ │ │ ├── udld.md │ │ │ ├── upgrade.md │ │ │ ├── verify-switches_are_forwarding_dhcp_traffic.md │ │ │ ├── verify_bgp.md │ │ │ ├── verify_dhcp_traffic_on_workers.md │ │ │ ├── verify_route_to_tftp.md │ │ │ ├── vlan.md │ │ │ ├── vlan_interface.md │ │ │ ├── vlan_trunking_8021q.md │ │ │ ├── vsf.md │ │ │ ├── vsx.md │ │ │ ├── vsx_architecture.md │ │ │ ├── vsx_switch_replacement.md │ │ │ ├── vsx_sync.md │ │ │ ├── web-ui.md │ │ │ └── zeroize.md │ │ ├── aruba_split_cables.md │ │ ├── backup_custom_configurations.md │ │ ├── bican_support_matrix.md │ │ ├── bican_switch_configuration.md │ │ ├── bican_technical_details.md │ │ ├── bican_technical_summary.md │ │ ├── bonded_uan.md │ │ ├── cable_management_network_servers.md │ │ ├── canu │ │ │ ├── README.md │ │ │ ├── canu_installation.md │ │ │ ├── canu_validation_error.md │ │ │ ├── canu_verify_generate_compare_switch_configuration.md │ │ │ ├── custom_config.md │ │ │ ├── index.md │ │ │ ├── initializing_canu.md │ │ │ ├── introduction_to_canu.md │ │ │ ├── quick_start_guide_to_canu.md │ │ │ ├── uninstall_canu.md │ │ │ ├── update_canu_from_csm_tarball.md │ │ │ └── using_canu_to_generate_full_network_config.md │ │ ├── canu_ansible_inventory.md │ │ ├── canu_install_update.md │ │ ├── collect_data.md │ │ ├── config_management.md │ │ ├── configure_snmp.md │ │ ├── dell │ │ │ ├── README.md │ │ │ ├── acl.md │ │ │ ├── arp.md │ │ │ ├── backup.md │ │ │ ├── dns-client.md │ │ │ ├── domain_name.md │ │ │ ├── hostname.md │ │ │ ├── igmp.md │ │ │ ├── index.md │ │ │ ├── lag.md │ │ │ ├── lldp.md │ │ │ ├── locator_led.md │ │ │ ├── loopback.md │ │ │ ├── management_interface.md │ │ │ ├── mstp.md │ │ │ ├── ntp.md │ │ │ ├── physical_interfaces.md │ │ │ ├── qos.md │ │ │ ├── remote_logging.md │ │ │ ├── reset.md │ │ │ ├── snmp-community.md │ │ │ ├── snmpv3_users.md │ │ │ ├── ssh.md │ │ │ ├── system_images.md │ │ │ ├── upgrade.md │ │ │ ├── vlan.md │ │ │ ├── vlan_interface.md │ │ │ └── vlan_trunking_8021q.md │ │ ├── edge_switch_cabling_guide.md │ │ ├── firmware │ │ │ └── update_management_network_firmware.md │ │ ├── fresh_install.md │ │ ├── generate_switch_configs.md │ │ ├── hardware │ │ │ └── ex2500.md │ │ ├── img │ │ │ ├── architecture_comparison.png │ │ │ ├── aruba_arista.png │ │ │ ├── cmn_plus_can.png │ │ │ ├── cmn_plus_chn.png │ │ │ ├── customer_access_overview.png │ │ │ ├── exascale.png │ │ │ ├── intro.png │ │ │ ├── large.png │ │ │ ├── mclag_link_ha.png │ │ │ ├── medium.png │ │ │ ├── member_power_failure.png │ │ │ ├── network_traffic_pattern.png │ │ │ ├── scenario-a-topology.png │ │ │ ├── scenario-b-topology.png │ │ │ ├── shcd_example.png │ │ │ ├── small.png │ │ │ ├── tds_can_overview.png │ │ │ ├── vsx_isl_ha.png │ │ │ └── vsx_split.png │ │ ├── index.md │ │ ├── manual_switch_config.md │ │ ├── mellanox │ │ │ ├── README.md │ │ │ ├── acl.md │ │ │ ├── arp.md │ │ │ ├── backup.md │ │ │ ├── bgp_basic.md │ │ │ ├── cable_diagnostics.md │ │ │ ├── check_bgp_and_metallb.md │ │ │ ├── check_current_dhcp_leases.md │ │ │ ├── check_dhcp_lease_is_getting_allocated.md │ │ │ ├── check_hsm.md │ │ │ ├── check_kea_dhcp_logs.md │ │ │ ├── compute_uan_application_nodes.md │ │ │ ├── dhcp_decline.md │ │ │ ├── dns-client.md │ │ │ ├── domain_name.md │ │ │ ├── duplicate_ip.md │ │ │ ├── exec_banner.md │ │ │ ├── hostname.md │ │ │ ├── igmp.md │ │ │ ├── index.md │ │ │ ├── ip_filter.md │ │ │ ├── key_features.md │ │ │ ├── lag.md │ │ │ ├── large.md │ │ │ ├── lldp.md │ │ │ ├── loopback.md │ │ │ ├── management_interface.md │ │ │ ├── management_network_configuration_example.md │ │ │ ├── management_network_function_in_detail.md │ │ │ ├── medium.md │ │ │ ├── mlag.md │ │ │ ├── mlag_architecture.md │ │ │ ├── mlag_switch.md │ │ │ ├── mstp.md │ │ │ ├── native_vlan.md │ │ │ ├── ncn_tcpdump.md │ │ │ ├── ncns_on_install.md │ │ │ ├── network_naming_function.md │ │ │ ├── network_traffic_pattern.md │ │ │ ├── ntp.md │ │ │ ├── ospfv2.md │ │ │ ├── physical_interfaces.md │ │ │ ├── pim.md │ │ │ ├── reboot_pxe_fail.md │ │ │ ├── remote_logging.md │ │ │ ├── requirements_and_optional_configuration.md │ │ │ ├── routed_interface.md │ │ │ ├── scenario-a.md │ │ │ ├── scenario-b.md │ │ │ ├── small.md │ │ │ ├── snmp_community.md │ │ │ ├── snmpv3_users.md │ │ │ ├── spine_leaf_architecture.md │ │ │ ├── spine_leaf_architecture2.md │ │ │ ├── spine_leaf_architecture3.md │ │ │ ├── ssh.md │ │ │ ├── static_mac.md │ │ │ ├── static_routing.md │ │ │ ├── status_of_cray-dhcp-kea_pods.md │ │ │ ├── system_images.md │ │ │ ├── test_tftp_traffic.md │ │ │ ├── typical_mlag_port_configuration.md │ │ │ ├── typical_mlag_switch_configuration.md │ │ │ ├── upgrade.md │ │ │ ├── verify-switches_are_forwarding_dhcp_traffic.md │ │ │ ├── verify_bgp.md │ │ │ ├── verify_dhcp_traffic_on_workers.md │ │ │ ├── verify_route_to_tftp.md │ │ │ ├── very_large.md │ │ │ ├── vlan.md │ │ │ ├── vlan_interface.md │ │ │ ├── vlan_trunking_8021q.md │ │ │ └── web-ui.md │ │ ├── network_tests.md │ │ ├── reinstall.md │ │ ├── replace_switch.md │ │ ├── saving_config.md │ │ ├── snmp_exporter_configs.md │ │ ├── transceiver_cable_guide.md │ │ ├── transceiver_example.md │ │ ├── validate_cabling.md │ │ ├── validate_shcd.md │ │ ├── validate_switch_configs.md │ │ └── wipe_mgmt_switches.md │ └── metallb_bgp │ │ ├── Check_BGP_Status_and_Reset_Sessions.md │ │ ├── MetalLB_Configuration.md │ │ ├── MetalLB_in_BGP-Mode.md │ │ ├── Troubleshoot_BGP_not_Accepting_Routes_from_MetalLB.md │ │ └── Troubleshoot_Services_without_an_Allocated_IP_Address.md ├── node_management │ ├── Access_and_Update_the_Settings_for_Replacement_NCNs.md │ ├── Add_Remove_Replace_NCNs │ │ ├── Add_NCN_Data.md │ │ ├── Add_Remove_Replace_NCNs.md │ │ ├── Add_Switch_Config.md │ │ ├── Allocate_NCN_IP_Addresses.md │ │ ├── Boot_NCN.md │ │ ├── Collect_NCN_MAC_Addresses.md │ │ ├── Redeploy_Services.md │ │ ├── Remove_NCN_Data.md │ │ ├── Remove_NCN_from_Role.md │ │ ├── Remove_Switch_Config.md │ │ ├── Update_Firmware.md │ │ ├── Update_NCN_BIOS_TPM_State.md │ │ ├── Validate_Health.md │ │ └── Validate_NCN.md │ ├── Add_TLS_Certificates_to_BMCs.md │ ├── Add_a_Standard_Rack_Node.md │ ├── Add_additional_Air-Cooled_Cabinets_to_a_System.md │ ├── Add_additional_Liquid-Cooled_Cabinets_to_a_System.md │ ├── Adding_a_Liquid-cooled_blade_to_a_System.md │ ├── Adding_a_Liquid-cooled_blade_to_a_System_Using_SAT.md │ ├── Build_NCN_Images_Locally.md │ ├── Change_Java_Security_Settings.md │ ├── Change_Settings_for_HMS_Collector_Polling_of_Air_Cooled_Nodes.md │ ├── Check_and_Set_the_metalno-wipe_Setting_on_NCNs.md │ ├── Check_the_BMC_Failover_Mode.md │ ├── Clear_Space_in_Root_File_System_on_Worker_Nodes.md │ ├── Configuration_of_NCN_Bonding.md │ ├── Configure_NTP_on_NCNs.md │ ├── Customize_Disk_Hardware.md │ ├── Customize_PCIe_Hardware.md │ ├── Defragment_NID_Numbering.md │ ├── Disable_Nodes.md │ ├── Dump_a_Non-Compute_Node.md │ ├── Enable_Nodes.md │ ├── Enable_Passwordless_Connections_to_Liquid_Cooled_Node_BMCs.md │ ├── Enable_ipmi_access_on_HPE_iLO_BMCs.md │ ├── Find_Node_Type_and_Manufacturer.md │ ├── Launch_a_Virtual_KVM_on_Gigabyte_Nodes.md │ ├── Launch_a_Virtual_KVM_on_Intel_Nodes.md │ ├── Move_a_Standard_Rack_Node.md │ ├── Move_a_Standard_Rack_Node_SameRack_SameHSNPorts.md │ ├── Move_a_liquid-cooled_blade_within_a_System.md │ ├── NCN_Identify_Drives_Using_ledctl.md │ ├── NCN_NIC_Replacement.md │ ├── NCN_Network_Troubleshooting.md │ ├── Node_Management.md │ ├── Node_Management_Workflows.md │ ├── Reboot_NCNs.md │ ├── Rebuild_NCNs │ │ ├── Final_Validation_Steps.md │ │ ├── Identify_Nodes_and_Update_Metadata.md │ │ ├── Post_Rebuild_Storage_Node_Validation.md │ │ ├── Power_Cycle_and_Rebuild_Nodes.md │ │ ├── Prepare_Storage_Nodes.md │ │ ├── Re-add_Storage_Node_to_Ceph.md │ │ ├── Rebuild_NCNs.md │ │ └── Validate_Boot_Loader.md │ ├── Removing_a_Liquid-cooled_blade_from_a_System.md │ ├── Removing_a_Liquid-cooled_blade_from_a_System_Using_SAT.md │ ├── Removing_a_Standard_Node_from_a_System.md │ ├── Replace_a_Compute_Blade.md │ ├── Replace_a_Compute_Blade_Using_SAT.md │ ├── Replace_a_Standard_Rack_Node.md │ ├── Replacing_Foxconn_User_Pass.md │ ├── Repurpose_Compute_as_UAN.md │ ├── Reset_Credentials_on_Redfish_Devices_for_Reinstallation.md │ ├── S3FS_Usage_and_Guidelines.md │ ├── Set_Gigabyte_Node_BMC_to_Factory_Defaults.md │ ├── Swap_a_Compute_Blade_with_a_Different_System.md │ ├── Swap_a_Compute_Blade_with_a_Different_System_Using_SAT.md │ ├── Switch_PXE_Boot_From_Onboard_NICs_to_PCIe.md │ ├── TLS_Certificates_for_Redfish_BMCs.md │ ├── Troubleshoot_Interfaces_with_IP_Address_Issues.md │ ├── Troubleshoot_Issues_with_Redfish_Endpoint_Discovery.md │ ├── Troubleshoot_Loss_of_Console_Connections_and_Logs_on_Gigabyte_Nodes.md │ ├── Update_Compute_Node_Mellanox_HSN_NIC_Firmware.md │ ├── Update_the_Gigabyte_Node_BIOS_Time.md │ ├── Update_the_HPE_Node_BIOS_Time.md │ ├── Updating_Cabinet_Routes_on_Management_NCNs.md │ ├── Use_the_Physical_KVM.md │ ├── Verify_Node_Removal.md │ ├── View_BIOS_Logs_for_Liquid_Cooled_Nodes.md │ ├── Wipe_NCN_Disks.md │ └── clear_gigabyte_cmos.md ├── observability │ └── Observability.md ├── package_repository_management │ ├── Manage_Repositories_with_Nexus.md │ ├── Nexus_Configuration.md │ ├── Nexus_Deployment.md │ ├── Nexus_Export_and_Restore.md │ ├── Nexus_Service_Recovery.md │ ├── Nexus_Space_Cleanup.md │ ├── Package_Repository_Management.md │ ├── Package_Repository_Management_with_Nexus.md │ ├── Repair_Blobstore.md │ ├── Repair_Yum_Repository_Metadata.md │ ├── Restrict_Admin_Privileges_in_Nexus.md │ └── Troubleshoot_Nexus.md ├── power_management │ ├── Cray_Advanced_Platform_Monitoring_and_Control_CAPMC.md │ ├── Node_Card_Power_Management.md │ ├── Power_Control_Service_PCS.md │ ├── Power_Off_Compute_Cabinets.md │ ├── Power_Off_Management_Cabinets.md │ ├── Power_Off_Storage_Cabinets.md │ ├── Power_Off_the_External_Lustre_File_System.md │ ├── Power_On_Compute_Cabinets.md │ ├── Power_On_and_Boot_Managed_Nodes.md │ ├── Power_On_and_Start_the_Management_Kubernetes_Cluster.md │ ├── Power_On_the_External_Lustre_File_System.md │ ├── Prepare_the_System_for_Power_Off.md │ ├── Recover_from_a_Liquid_Cooled_Cabinet_EPO_Event.md │ ├── Save_Management_Network_Switch_Configurations.md │ ├── Set_the_Turbo_Boost_Limit.md │ ├── Shut_Down_and_Power_Off_Managed_Nodes.md │ ├── Shut_Down_and_Power_Off_the_Management_Kubernetes_Cluster.md │ ├── Standard_Rack_Node_Power_Management.md.save │ ├── System_Power_Off_Procedures.md │ ├── System_Power_On_Procedures.md │ ├── User_Access_to_Compute_Node_Power_Data.md │ └── power_management.md ├── resiliency │ ├── Recreate_StatefulSet_Pods_on_Another_Node.md │ ├── Resilience_of_System_Management_Services.md │ ├── Resiliency.md │ ├── Resiliency_Testing_Procedure.md │ └── Restore_System_Functionality_if_a_Kubernetes_Worker_Node_is_Down.md ├── security_and_authentication │ ├── API_Authorization.md │ ├── Access_the_Keycloak_User_Management_UI.md │ ├── Add_LDAP_User_Federation.md │ ├── Add_Root_Service_Account_for_Gigabyte_Controllers.md │ ├── Audit_Logs.md │ ├── Authenticate_an_Account_with_the_Command_Line.md │ ├── Backup_and_Restore_Vault_Clusters.md │ ├── Certificate_Types.md │ ├── Change_Air-Cooled_Node_BMC_Credentials.md │ ├── Change_Credentials_on_ServerTech_PDUs.md │ ├── Change_EX_Liquid-Cooled_Cabinet_Global_Default_Password.md │ ├── Change_Keycloak_Token_Lifetime.md │ ├── Change_NCN_Image_Root_Password_and_SSH_Keys.md │ ├── Change_NCN_Image_Root_Password_and_SSH_Keys_on_PIT_Node.md │ ├── Change_Root_Passwords_for_Compute_Nodes.md │ ├── Change_the_Keycloak_Admin_Password.md │ ├── Change_the_LDAP_Server_IP_Address_for_Existing_LDAP_Server_Content.md │ ├── Change_the_LDAP_Server_IP_Address_for_New_LDAP_Server_Content.md │ ├── Configure_Keycloak_for_LDAPAD_Authentication.md │ ├── Configure_root_user_on_HPE_iLO_BMCs.md │ ├── Configure_the_RSA_Plugin_in_Keycloak.md │ ├── Create_Internal_Groups_in_the_Keycloak_Shasta_Realm.md │ ├── Create_Internal_User_Accounts_in_the_Keycloak_Shasta_Realm.md │ ├── Create_a_Backup_of_the_Keycloak_Postgres_Database.md │ ├── Create_a_Service_Account_in_Keycloak.md │ ├── Default_Keycloak_Realms_Accounts_and_Clients.md │ ├── Delete_Internal_User_Accounts_from_the_Keycloak_Shasta_Realm.md │ ├── Get_a_Long-lived_Token_for_a_Service_Account.md │ ├── HashiCorp_Vault.md │ ├── Keycloak_Operations.md │ ├── Keycloak_Service_Recovery.md │ ├── Keycloak_User_Localization.md │ ├── Keycloak_User_Management_with_Kcadm.md │ ├── Make_HTTPS_Requests_from_Sources_Outside_the_Management_Kubernetes_Cluster.md │ ├── Manage_Sealed_Secrets.md │ ├── Manage_System_Passwords.md │ ├── PKI_Certificate_Authority_CA.md │ ├── PKI_Services.md │ ├── Preserve_Username_Capitalization_for_Users_Exported_from_Keycloak.md │ ├── Provisioning_a_Liquid-Cooled_EX_Cabinet_CEC_with_Default_Credentials.md │ ├── Public_Key_Infrastructure_PKI.md │ ├── Recovering_from_Mismatched_BMC_Credentials.md │ ├── Remove_Internal_Groups_from_the_Keycloak_Shasta_Realm.md │ ├── Remove_the_Email_Mapper_from_the_LDAP_User_Federation.md │ ├── Remove_the_LDAP_User_Federation_from_Keycloak.md │ ├── Resync_Keycloak_Users_to_Compute_Nodes.md │ ├── Retrieve_an_Authentication_Token.md │ ├── Retrieve_the_Client_Secret_for_Service_Accounts.md │ ├── SOPS.md │ ├── SSH_Keys.md │ ├── System_Security_and_Authentication.md │ ├── Transport_Layer_Security_for_Ingress_Services.md │ ├── Troubleshoot_Common_Vault_Cluster_Issues.md │ ├── Troubleshoot_Kyverno_Configuration_manually.md │ ├── Update_Default_Air-Cooled_BMC_and_Leaf_BMC_Switch_SNMP_Credentials.md │ ├── Update_Default_ServerTech_PDU_Credentials_used_by_the_Redfish_Translation_Service.md │ ├── Update_NCN_Passwords.md │ ├── Update_Root_Secrets_In_Vault.md │ ├── Updating_the_Liquid-Cooled_EX_Cabinet_Default_Credentials_after_a_CEC_Password_Change.md │ └── Vault_Service_Recovery.md ├── spire │ ├── Enable_TPM_node_attestation.md │ ├── Restore_Missing_Spire_Metadata.md │ ├── Restore_Spire_Postgres_without_a_Backup.md │ ├── Spire_Service_Recovery.md │ ├── Troubleshoot_Spire_Failing_to_Start_on_NCNs.md │ ├── Update_Spire_Intermediate_CA_Certificate.md │ └── xname_validation.md ├── system_admin_toolkit │ ├── README.md │ ├── SAT_Uninstall_and_Downgrade.md │ ├── SAT_on_an_External_System.md │ ├── about_sat │ │ ├── Introduction_to_SAT.md │ │ ├── README.md │ │ ├── SAT_Command_Overview.md │ │ ├── SAT_Dependencies.md │ │ └── SAT_in_CSM.md │ ├── configuration │ │ ├── Authenticate_SAT_Commands.md │ │ ├── Configure_Multi-tenancy_Optional.md │ │ ├── Generate_SAT_S3_Credentials.md │ │ ├── README.md │ │ └── Set_System_Revision_Information.md │ └── usage │ │ ├── Change_BOS_Version.md │ │ ├── Change_CFS_Version.md │ │ ├── Configure_Retries.md │ │ ├── README.md │ │ ├── SAT_Bootprep.md │ │ └── SAT_and_IUF.md ├── system_configuration_service │ ├── Configure_BMC_and_Controller_Parameters_with_scsd.md │ ├── Manage_Parameters_with_the_scsd_Service.md │ ├── Set_BMC_Credentials.md │ └── System_Configuration_Service.md ├── system_layout_service │ ├── Add_Liquid-Cooled_Cabinets_To_SLS.md │ ├── Add_UAN_CAN_IP_Addresses_to_SLS.md │ ├── Add_an_alias_to_a_service.md │ ├── Create_a_Backup_of_the_SLS_Postgres_Database.md │ ├── Dump_SLS_Information.md │ ├── Load_SLS_Database_with_Dump_File.md │ ├── Restore_SLS_Postgres_Database_from_Backup.md │ ├── Restore_SLS_Postgres_without_an_Existing_Backup.md │ ├── System_Layout_Service_SLS.md │ └── Update_SLS_with_UAN_Aliases.md ├── system_management_health │ ├── Access_System_Management_Health_Services.md │ ├── Configure_Prometheus_Email_Alert_Notifications.md │ ├── E1000_SMART_data_configuration.md │ ├── Grafana_Dashboards_by_Component.md │ ├── Grok-Exporter_Error.md │ ├── Prometheus_Kafka_Error.md │ ├── Remove_Kiali.md │ ├── System_Management_Health.md │ ├── System_Management_Health_Checks_and_Alerts.md │ ├── Troubleshoot_Grafana_Dashboard.md │ ├── Troubleshoot_Prometheus_Alerts.md │ ├── uan_node_exporter_configs.md │ └── victoriametrics.md ├── utility_storage │ ├── Add_Ceph_Node.md │ ├── Add_Ceph_OSDs.md │ ├── Adjust_Ceph_Pool_Quotas.md │ ├── Alternate_Storage_Pools.md │ ├── Ceph_Daemon_Memory_Profiling.md │ ├── Ceph_Deep_Scrubs.md │ ├── Ceph_Health_States.md │ ├── Ceph_Orchestrator_Usage.md │ ├── Ceph_Service_Check_Script_Usage.md │ ├── Ceph_Storage_Types.md │ ├── Ceph_upgrade_tool_Usage.md │ ├── Cephadm_Reference_Material.md │ ├── Collect_Information_About_the_Ceph_Cluster.md │ ├── Dump_Ceph_Crash_Data.md │ ├── Identify_Ceph_Latency_Issues.md │ ├── Manage_Ceph_Services.md │ ├── Remove_Ceph_Node.md │ ├── Restore_Corrupt_Nexus.md │ ├── Shrink_Ceph_OSDs.md │ ├── Troubleshoot_Ceph-Mon_Processes_Stopping_and_Exceeding_Max_Restarts.md │ ├── Troubleshoot_Ceph_FS_Client_Connectivity_issues.md │ ├── Troubleshoot_Ceph_MDS_reporting_slow_requests_and_failure_on_client.md │ ├── Troubleshoot_Ceph_New_RGW_Deployment_Failing.md │ ├── Troubleshoot_Ceph_OSDs_Not_Created.md │ ├── Troubleshoot_Ceph_OSDs_Reporting_Full.md │ ├── Troubleshoot_Ceph_Services_Not_Starting.md │ ├── Troubleshoot_Failure_to_Get_Ceph_Health.md │ ├── Troubleshoot_HEALTH_ERR_Module_devicehealth.md │ ├── Troubleshoot_Insufficient_Standby_MDS_Daemons_Available.md │ ├── Troubleshoot_Large_Object_Map_Objects_in_Ceph_Health.md │ ├── Troubleshoot_Pods_Multi-Attach_Error.md │ ├── Troubleshoot_Pools_Have_Many_More_Objects_Per_Pg_Than_Average.md │ ├── Troubleshoot_RGW_Health_Check_Fail.md │ ├── Troubleshoot_S3FS_Mounts.md │ ├── Troubleshoot_System_Clock_Skew.md │ ├── Troubleshoot_a_Down_OSD.md │ ├── Troubleshoot_an_Unresponsive_S3_Endpoint.md │ ├── Troubleshoot_ceph_image_with_none_tag.md │ ├── Utility_Storage.md │ └── update_ceph_node_exporter_config.md └── validate_csm_health.md ├── runLint.sh ├── scripts ├── .shellspec ├── CASMINST-1309.sh ├── CASMINST-2015.sh ├── configure_snmp_monitor.py ├── csm_rbd_tool.tar.gz ├── cubs_tool.py ├── ensure_testing_rpms.sh ├── mount-admin-tools-bucket.sh ├── nexus-export.sh ├── nexus-restore.sh ├── nexus-space-usage.sh ├── operations │ ├── bifurcated_can │ │ └── bican_route.py │ ├── boot_script_service │ │ ├── bss-restore-bootparameters.sh │ │ └── bss-update-ids-etags.py │ ├── ceph │ │ ├── bootstrap_osd_on_removable_disk.sh │ │ ├── disable_local_registry.sh │ │ └── redeploy_monitoring_stack_to_nexus.sh │ ├── configuration │ │ ├── apply_csm_configuration.sh │ │ ├── backup_vcs.sh │ │ ├── bash_lib │ │ │ ├── common.sh │ │ │ └── vcs.sh │ │ ├── export_bos_data.sh │ │ ├── export_cfs_data.py │ │ ├── export_cfs_data.sh │ │ ├── export_ims_data.py │ │ ├── get_git.py │ │ ├── import_bos_data.py │ │ ├── import_bos_data.sh │ │ ├── import_cfs_data.py │ │ ├── import_cfs_data.sh │ │ ├── import_ims_data.py │ │ ├── json_str_list_diff.py │ │ ├── make_cfs_config.py │ │ ├── monitor_comp_cfs_config_status.py │ │ ├── python_lib │ │ │ ├── __init__.py │ │ │ ├── api_requests.py │ │ │ ├── args.py │ │ │ ├── bos.py │ │ │ ├── bos_cli.py │ │ │ ├── bss.py │ │ │ ├── cfs.py │ │ │ ├── cfs_import_export.py │ │ │ ├── common.py │ │ │ ├── csm_root_secret.py │ │ │ ├── hsm.py │ │ │ ├── ims.py │ │ │ ├── ims_import_export │ │ │ │ ├── __init__.py │ │ │ │ ├── defs.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── export_options.py │ │ │ │ ├── exported_data.py │ │ │ │ ├── import_options.py │ │ │ │ ├── ims_data.py │ │ │ │ ├── ims_deleted_data.py │ │ │ │ ├── ims_export.py │ │ │ │ ├── ims_import.py │ │ │ │ ├── s3_bucket_info.py │ │ │ │ ├── s3_bucket_listings.py │ │ │ │ ├── s3_data.py │ │ │ │ └── s3_helper.py │ │ │ ├── k8s.py │ │ │ ├── logger.py │ │ │ ├── product_catalog.py │ │ │ ├── root_ssh_config.py │ │ │ ├── s3.py │ │ │ ├── types.py │ │ │ ├── vault.py │ │ │ └── vcs.py │ │ ├── re_run_node_personalization.sh │ │ ├── replace_ssh_keys.sh │ │ ├── restore_ssh_config_from_vault.py │ │ ├── restore_ssh_keys.sh │ │ ├── restore_vcs.sh │ │ ├── update_ims_data_files.py │ │ ├── write_root_secrets_to_vault.py │ │ ├── write_ssh_config_to_vault.py │ │ └── write_sw_admin_pw_to_vault.py │ ├── etcd │ │ └── take-etcd-manual-backups.sh │ ├── firmware │ │ ├── FASBackupImages.py │ │ ├── FASUpdate.py │ │ ├── FASrmActions.sh │ │ ├── FASrmSnapshots.sh │ │ ├── recipes │ │ │ ├── cray_chassisBMC_BMC.json │ │ │ ├── cray_nodeBMC_BMC.json │ │ │ ├── cray_nodeBMC_node0AccFPGA0.json │ │ │ ├── cray_nodeBMC_node0AccVBIOS.json │ │ │ ├── cray_nodeBMC_node0BIOS.json │ │ │ ├── cray_nodeBMC_node0ManagementEthernet.json │ │ │ ├── cray_nodeBMC_node1AccFPGA0.json │ │ │ ├── cray_nodeBMC_node1BIOS.json │ │ │ ├── cray_nodeBMC_node1ManagementEthernet.json │ │ │ ├── cray_nodeBMC_node2BIOS.json │ │ │ ├── cray_nodeBMC_node2ManagementEthernet.json │ │ │ ├── cray_nodeBMC_node3BIOS.json │ │ │ ├── cray_nodeBMC_node3ManagementEthernet.json │ │ │ ├── cray_nodeBMC_nodeAccFPGA0.json │ │ │ ├── cray_nodeBMC_nodeAccUC.json │ │ │ ├── cray_nodeBMC_nodeBIOS.json │ │ │ ├── cray_nodeBMC_nodeManagementEthernet.json │ │ │ ├── cray_routerBMC_BMC.json │ │ │ ├── foxconn_nodeBMC_bios.json │ │ │ ├── foxconn_nodeBMC_bmc.json │ │ │ ├── foxconn_nodeBMC_erot.json │ │ │ ├── foxconn_nodeBMC_fpga.json │ │ │ ├── foxconn_nodeBMC_pld.json │ │ │ ├── gigabyte_nodeBMC_BIOS.json │ │ │ ├── gigabyte_nodeBMC_BMC.json │ │ │ ├── hpe_nodeBMC_iLO5.json │ │ │ ├── hpe_nodeBMC_iLO6.json │ │ │ └── hpe_nodeBMC_systemRom.json │ │ └── upload_nvidia_images_tftp.py │ ├── gateway-test │ │ ├── cn-gateway-test.sh │ │ ├── gateway-test-defn.yaml │ │ ├── gateway-test.py │ │ ├── ncn-gateway-test.sh │ │ └── uan-gateway-test.sh │ ├── hardware_state_manager │ │ ├── FoxconnUserPass.py │ │ ├── updateroles.py │ │ ├── verifygroups.py │ │ ├── verifylocks.py │ │ ├── verifymembership.py │ │ ├── verifypartitions.py │ │ └── verifyroles.py │ ├── known-issues.sh │ ├── kubernetes │ │ ├── encryption.sh │ │ └── latest_chart_manifest.sh │ ├── node_management │ │ ├── Add_Remove_Replace_NCNs │ │ │ ├── add_management_ncn.py │ │ │ ├── mac_collection_script.ipxe │ │ │ ├── ncn_add_pre-req.py │ │ │ ├── ncn_status.py │ │ │ ├── remove_management_ncn.py │ │ │ ├── sls_utils │ │ │ │ ├── Managers.py │ │ │ │ ├── Networks.py │ │ │ │ ├── Reservations.py │ │ │ │ ├── __init__.py │ │ │ │ ├── ipam.py │ │ │ │ ├── json_utils.py │ │ │ │ └── schemas │ │ │ │ │ ├── hold │ │ │ │ │ ├── sls_full.schema.json.hold │ │ │ │ │ ├── sls_hardware.schema.json.hold │ │ │ │ │ ├── sls_networks_schema.json.hold │ │ │ │ │ └── sls_subnet_schema.json.hold │ │ │ │ │ ├── sls_networks_schema.json │ │ │ │ │ ├── sls_reservations_schema.json │ │ │ │ │ └── sls_subnets_schema.json │ │ │ └── update_customizations.sh │ │ ├── Add_River_Cabinets │ │ │ ├── update_ncn_etc_hosts.py │ │ │ └── verify_bmc_credentials.sh │ │ ├── allocate_uan_ip.py │ │ ├── assign-ncn-images.sh │ │ ├── defragment_nids.py │ │ ├── delete_bmc_subscriptions.py │ │ ├── ncn-image-modification.sh │ │ ├── ncn-ims-image-upload.sh │ │ ├── remove_gigabyte_cmc.sh │ │ ├── remove_standard_rack_node.sh │ │ └── update-ncn-cabinet-routes.sh │ ├── pyscripts │ │ ├── .gitignore │ │ ├── pyscripts │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── commands │ │ │ │ ├── __init__.py │ │ │ │ ├── cmd_test_bican_external.py │ │ │ │ ├── cmd_test_bican_internal.py │ │ │ │ ├── test_bican_external │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── can_toggle_tests_external.yaml │ │ │ │ │ ├── chn_toggle_tests_external.yaml │ │ │ │ │ └── test_bican_external.py │ │ │ │ └── test_bican_internal │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── can_toggle_tests.yaml │ │ │ │ │ ├── chn_toggle_tests.yaml │ │ │ │ │ └── test_bican_internal.py │ │ │ └── core │ │ │ │ ├── __init__.py │ │ │ │ ├── csm_api_utils.py │ │ │ │ ├── log_config.py │ │ │ │ └── ssh │ │ │ │ ├── __init__.py │ │ │ │ ├── ssh_connection.py │ │ │ │ ├── ssh_host.py │ │ │ │ └── ssh_targets.py │ │ ├── setup.py │ │ └── start.py │ ├── system_layout_service │ │ ├── add_cdu_switch.py │ │ ├── add_liquid_cooled_cabinet.py │ │ ├── backup_sls_postgres.sh │ │ ├── inspect_sls_cabinets.py │ │ └── restore_sls_postgres_from_backup.sh │ └── xnamevalidation.sh ├── patch-ceph-runcmd.py ├── remove_orphaned_artifacts.py ├── repair-ceph-latency.sh ├── spec │ ├── encryption_spec.sh │ └── spec_helper.sh └── upload_ceph_images_to_nexus.sh ├── troubleshooting ├── BMC_SSH_key_manual_fixup.md ├── README.md ├── cms_barebones_image_boot.md ├── debugging_with_hms_pprof_images.md ├── dhcp_runbook.md ├── dns_runbook.md ├── error_rolling_back_service_chart_with_etcd.md ├── hms_ct_manual_run.md ├── image_projection_inconsistent_across_nodes.md ├── incrementally_configuring_images.md ├── index.md ├── interpreting_hms_health_check_results.md ├── known_issues │ ├── CFS_Component_With_Zero_Length_ID.md │ ├── Gigabyte_BMC_Missing_Redfish_Data.md │ ├── Istio-Proxy_failing_with_too_many_open_files.md │ ├── Keycloak_Error_Cannot_read_properties.md │ ├── Nexus_Fail_Authentication_with_Keycloak_Users.md │ ├── SLS_Not_Working_During_Node_Rebuild.md │ ├── VCS_Password_With_Illegal_Characters.md │ ├── admin_client_auth_not_found.md │ ├── antero_node_NID_allocation.md │ ├── ceph_osd_latency.md │ ├── cfs-api_pods_in_CLBO_state.md │ ├── component_power_state_mismatch.md │ ├── cray-console-node_pods_in_CrashLoopBackOff.md │ ├── craycli_403_forbidden_errors.md │ ├── discovery_job_not_creating_redfish_endpoints.md │ ├── flags_set_for_nodes_in_hsm.md │ ├── helm_chart_deploy_timeouts.md │ ├── hpe_systems_not_transitioning_power_state.md │ ├── ims_images_orphaned_in_s3.md │ ├── issues_with_ncn_health_checks.md │ ├── kubectl_logs_no_space_left_on_device.md │ ├── mellanox_lacp_individual.md │ ├── missing_binaries_in_aarch64_images.md │ ├── ncn_resource_checks.md │ ├── parry_peak_console_boot_errors.md │ ├── postgres_database_recovery.md │ ├── product_catalog_upgrade_error.md │ ├── sms_health_check.md │ ├── spire_database_airgap_configuration.md │ ├── spire_database_lookup_error.md │ ├── spire_pod_initializing.md │ ├── ssl_certificate_validation_issues.md │ ├── test_failures_no_discovered_computes_in_hsm.md │ └── velero_version_mismatch.md ├── kubernetes │ ├── Kubernetes_Kube_apiserver_failing.md │ ├── Kubernetes_Log_File_Locations.md │ ├── Kubernetes_Pods_Failing_to_Mount_PVCs.md │ ├── Kubernetes_Troubleshooting_Information.md │ ├── Troubleshoot_Kubernetes_Node_NotReady.md │ ├── Troubleshoot_Kubernetes_Pods_Not_Starting.md │ ├── Troubleshoot_Liveliness_Readiness_Probe_Failures.md │ └── Troubleshoot_Unresponsive_kubectl_Commands.md ├── pxe_runbook.md └── scripts │ ├── kea_unbound_check.sh │ └── remove_label_from_etcd_cluster.sh ├── update_product_stream ├── README.md └── index.md ├── upgrade ├── Prepare_for_Upgrade_to_Next_CSM_Major_Version.md ├── README.md ├── Upgrade_Management_Nodes_and_CSM_Services.md ├── Upgrade_Only_CSM_with_iuf.md ├── Validate_CSM_Health_During_Upgrade.md ├── index.md ├── manual_ncn_upgrade.md ├── resource_material │ ├── README.md │ ├── index.md │ ├── k8s │ │ └── worker-reference.md │ └── storage │ │ └── cephadm-reference.md └── scripts │ ├── ceph │ ├── ceph-services-stage2.sh │ ├── ceph-upgrade-monitoring-stack.sh │ ├── ceph-upgrade-tool.py │ ├── create_rgw_buckets.sh │ ├── csm-1.5-new-buckets.yml │ └── lib │ │ ├── ceph-health.sh │ │ ├── ceph-image-pull.sh │ │ ├── ceph-install-dashboard.sh │ │ ├── ceph-orch-tasks.sh │ │ ├── ceph-upgrade-init.sh │ │ ├── ceph-upgrade-mdss.sh │ │ ├── ceph-upgrade-mgrs.sh │ │ ├── ceph-upgrade-mons.sh │ │ ├── ceph-upgrade-osds.sh │ │ ├── ceph-upgrade-rgws.sh │ │ ├── ceph-upgrade-step1.sh │ │ ├── cephadm-keys.sh │ │ ├── convert-radosgw.sh │ │ ├── k8s-scale-utils.sh │ │ ├── mark_step_complete.sh │ │ └── update_bss_metadata.sh │ ├── cfs │ └── wait_for_configuration.sh │ ├── common │ ├── k8s-common.sh │ ├── ncn-common.sh │ ├── ncn-rebuild-common.sh │ ├── restart-cfs.sh │ └── upgrade-state.sh │ ├── k8s │ ├── apply-coredns-pod-affinity.sh │ ├── deploy_charts_post_k8s_upgrade.sh │ ├── failover-leader.sh │ ├── fix-kube-prometheus-alerts.sh │ ├── move-pod.sh │ ├── promote-initial-master.sh │ ├── remove-k8s-node.sh │ ├── tds_lower_cpu_requests.sh │ ├── upgrade_control_plane.sh │ └── upgrade_k8s.sh │ ├── rebuild │ ├── ncn-rebuild-master-nodes.sh │ └── prerequisites.sh │ ├── sls │ ├── README.SLS_Upgrade.md │ ├── csm_1_2_upgrade │ │ ├── __init__.py │ │ └── sls_updates.py │ ├── expert_mode.md │ ├── sls_updater.py_technical_details.md │ ├── sls_updater_csm_1.2.py │ └── sls_utils │ │ ├── Managers.py │ │ ├── Networks.py │ │ ├── README.md │ │ ├── Reservations.py │ │ ├── __init__.py │ │ ├── index.md │ │ ├── ipam.py │ │ ├── json_utils.py │ │ └── schemas │ │ ├── hold │ │ ├── sls_full.schema.json.hold │ │ ├── sls_hardware.schema.json.hold │ │ ├── sls_networks_schema.json.hold │ │ └── sls_subnet_schema.json.hold │ │ ├── sls_networks_schema.json │ │ ├── sls_reservations_schema.json │ │ └── sls_subnets_schema.json │ ├── upgrade │ ├── Upgrade_automation.md │ ├── cleanup.py │ ├── cleanup.sh │ ├── csm-upgrade.sh │ ├── label-istio-resources.sh │ ├── ncn-upgrade-master-nodes.sh │ ├── ncn-upgrade-worker-storage-nodes.sh │ ├── prepare-assets.sh │ ├── prerequisites.sh │ ├── rollout-restart.sh │ ├── tds_cpu_requests.yaml │ ├── upload-ncn-images.sh │ └── util │ │ ├── extract_chart_manifest.py │ │ ├── fix-postgres.sh │ │ ├── pre-upgrade-status.sh │ │ ├── sysmgmt-health-upgrade.sh │ │ ├── update-customizations.sh │ │ ├── upgrade-test-rpms.sh │ │ └── verify-k8s-nodes-upgraded.sh │ └── upgrade_ntp_timezone_metadata.sh └── workflows ├── cilium ├── cilium-live-migration.j2 └── generateCiliumLiveMigration.py ├── iuf ├── hooks │ └── master-host-hook-script.yaml ├── operations │ ├── add-product-to-product-catalog.yaml │ ├── extract-release-distributions.yaml │ ├── ims-upload.yaml │ ├── loftsman-manifest-deploy.yaml │ ├── loftsman-manifest-upload │ │ └── loftsman-manifest-upload-template.yaml │ ├── managed-nodes │ │ └── managed-nodes-rollout.yaml │ ├── management-nodes-rollout │ │ ├── management-m001-rollout.yaml │ │ ├── management-storage-nodes-reboot.yaml │ │ ├── management-storage-nodes-rollout.yaml │ │ ├── management-two-master-nodes-rollout.yaml │ │ ├── management-worker-nodes-reboot.yaml │ │ ├── management-worker-nodes-rollout.yaml │ │ └── verify-ncn-images-configs.yaml │ ├── nexus-setup │ │ ├── README.md │ │ ├── cleanup-nexus-admin-credential.yaml │ │ ├── nexus-docker-upload-template.yaml │ │ ├── nexus-get-prerequisites-template.yaml │ │ ├── nexus-helm-upload-template.yaml │ │ ├── nexus-rpm-upload-template.yaml │ │ └── nexus-setup-template.yaml │ ├── operation-record-time.yaml │ ├── post-install-check-end.yaml │ ├── post-install-check-start.yaml │ ├── post-install-service-check-end.yaml │ ├── post-install-service-check-start.yaml │ ├── preflight-checks-for-services.yaml │ ├── prepare-images │ │ ├── prepare-managed-images-template.yaml │ │ └── prepare-management-images-template.yaml │ ├── s3-upload │ │ └── s3-upload-template.yaml │ ├── update-cfs-config │ │ ├── update-managed-cfs-config.yaml │ │ └── update-management-cfs-config.yaml │ ├── vcs-update │ │ └── vcs-update-working-branch.yaml │ └── vcs-upload │ │ ├── README.md │ │ └── vcs-upload-content.yaml ├── samples │ ├── global_params_example.json │ └── global_params_schema.yaml └── stages.yaml ├── ncn ├── hooks │ ├── after-each │ │ ├── update-bss-no-wipe.yaml │ │ ├── update-node-label.yaml │ │ └── wait-for-cfs-after-rebuild.yaml │ ├── before-all │ │ ├── install-csi.yaml │ │ └── move-critical-singleton-pods.yaml │ └── before-each │ │ ├── ensure-critical-pods-are-running.yaml │ │ ├── ensure-etcd-pods-are-healthy.yaml │ │ ├── ensure-pg-pods-are-healthy.yaml │ │ └── force-time-sync.yaml ├── storage │ ├── storage.rebuild.yaml │ └── storage.upgrade.yaml └── worker │ ├── worker.reboot.yaml │ └── worker.rebuild.yaml ├── scripts └── upload-rebuild-templates.sh ├── templates ├── add-labels.yaml ├── after-all-hooks.yaml ├── after-each-hooks.yaml ├── base │ ├── echo.template.argo.yaml │ ├── iufBase.template.argo.yaml │ ├── kubectlAndCurl.template.argo.yaml │ ├── sat-general-iuf.template.argo.yaml │ ├── ssh.template.argo.yaml │ └── update-product-catalog.template.argo.yaml ├── before-all-hooks.yaml ├── before-each-hooks.yaml ├── cilium.deploy.yaml ├── cilium.migrate.yaml ├── cilium.post-migrate.yaml ├── drain-worker.yaml ├── ncn-reboot-common.yaml ├── post-cilium-migrate-healthcheck.yaml ├── post-rebuild-worker.yaml ├── reboot-worker.yaml ├── set-bss-images-cfs-config.yaml ├── set-no-wipe.yaml ├── set-rd.live.dir-overlay.reset.yaml ├── storage.add-node-to-ceph.yaml ├── storage.add-to-haproxy-keepalived.yaml ├── storage.backup-ceph-data.yaml ├── storage.before-all.yaml ├── storage.before-each.yaml ├── storage.ceph-health-check.yaml ├── storage.ceph-upgrade.yaml ├── storage.create-rgw-buckets.yaml ├── storage.drain.yaml ├── storage.goss-tests.yaml ├── storage.post-reboot.yaml ├── storage.reboot.yaml ├── storage.shutdown-services.yaml ├── storage.upgrade-pre-shutdown.yaml ├── storage.wait-for-cfs.yaml ├── time-sync.yaml ├── wait-for-sls.yaml └── wipe-and-reboot-worker.yaml └── update_tags.sh /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_style = space 7 | insert_final_newline = true 8 | trim_trailing_spaces = true 9 | trim_trailing_whitespace = true 10 | 11 | # 4 space indentation 12 | [*.{py,go}] 13 | indent_size = 4 14 | 15 | # 2 space indentation 16 | [*.{json,yml,yaml}] 17 | indent_size = 2 18 | 19 | [*.sh] 20 | indent_size = 2 21 | 22 | [*.{adoc,md}] 23 | trim_trailing_whitespace = false 24 | indent_size = 4 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Report a bug 3 | about: Create a report to help us improve 4 | --- 5 | 6 | ##### SUMMARY 7 | 8 | ##### ISSUE TYPE 9 | - Bug Report 10 | 11 | ##### STEPS TO REPRODUCE 12 | 13 | 1. 14 | 2. 15 | 3. 16 | 4. 17 | 18 | ##### EXPECTED RESULTS 19 | 20 | 21 | ##### ACTUAL RESULTS 22 | 23 | 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/docs.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 📝 Documentation Report 3 | about: Ask us about documentation 4 | --- 5 | 6 | ##### SUMMARY 7 | 8 | 9 | ##### ISSUE TYPE 10 | - Documentation Report 11 | 12 | ##### ADDITIONAL INFORMATION 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/rfe.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ✨ Request for enhancement (RFE) 3 | about: Suggest an idea for this project 4 | --- 5 | 6 | ##### SUMMARY 7 | 8 | ##### ISSUE TYPE 9 | - RFE 10 | 11 | ##### ADDITIONAL INFORMATION 12 | -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | # REST API Documentation 2 | * [Boot Orchestration Service v2](./bos.md) 3 | * [Boot Script Service v1](./bss.md) 4 | * [Cray Advanced Platform Monitoring and Control (CAPMC) v3](./capmc.md) 5 | * [Configuration Framework Service v1](./cfs.md) 6 | * [Console Service v1](./console.md) 7 | * [Firmware Action Service v1](./firmware-action.md) 8 | * [Heartbeat Tracker Service v1](./hbtd.md) 9 | * [HMS Notification Fanout Daemon v1](./hmnfd.md) 10 | * [Image Management Service v3](./ims.md) 11 | * [NCN Lifecycle Service v1](./nls.md) 12 | * [Power Control Service (PCS) v1](./power-control.md) 13 | * [System Configuration Service v1](./scsd.md) 14 | * [System Layout Service v2](./sls.md) 15 | * [Hardware State Manager API v2](./smd.md) 16 | * [Cray STS Token Generator v1](./sts.md) 17 | * [TAPMS Tenant Status API v1](./tapms-operator.md) 18 | -------------------------------------------------------------------------------- /api/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /background/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /img/3rd/redfish.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/3rd/redfish.png -------------------------------------------------------------------------------- /img/Management_Network_Connections_Liquid_Cooled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/Management_Network_Connections_Liquid_Cooled.png -------------------------------------------------------------------------------- /img/SBPS_Architecture_Diagram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/SBPS_Architecture_Diagram.jpg -------------------------------------------------------------------------------- /img/SBPS_flow_diagram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/SBPS_flow_diagram.jpg -------------------------------------------------------------------------------- /img/UAN_transition_CSM_1.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/UAN_transition_CSM_1.2.png -------------------------------------------------------------------------------- /img/bmc-reboot-ilo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-reboot-ilo.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-boot-gigabyte.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-boot-gigabyte.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-gigabyte-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-gigabyte-settings.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-ilo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-ilo.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-intel-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-intel-menu.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-intel-mounted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-intel-mounted.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-intel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-intel.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-settings-gigabyte.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-settings-gigabyte.png -------------------------------------------------------------------------------- /img/bmc-virtual-media-start-gigabyte.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/bmc-virtual-media-start-gigabyte.png -------------------------------------------------------------------------------- /img/boot-flow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/boot-flow.jpg -------------------------------------------------------------------------------- /img/cmos1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/cmos1.png -------------------------------------------------------------------------------- /img/cmos2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/cmos2.png -------------------------------------------------------------------------------- /img/cmos3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/cmos3.png -------------------------------------------------------------------------------- /img/fw-gb-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-gb-1.png -------------------------------------------------------------------------------- /img/fw-gb-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-gb-2.png -------------------------------------------------------------------------------- /img/fw-gb-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-gb-3.png -------------------------------------------------------------------------------- /img/fw-gb-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-gb-4.png -------------------------------------------------------------------------------- /img/fw-ilo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-ilo-1.png -------------------------------------------------------------------------------- /img/fw-ilo-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-ilo-2.png -------------------------------------------------------------------------------- /img/fw-ilo-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-ilo-3.png -------------------------------------------------------------------------------- /img/fw-ilo-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-ilo-4.png -------------------------------------------------------------------------------- /img/fw-ilo-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/fw-ilo-5.png -------------------------------------------------------------------------------- /img/install/shcd-hmn-tab-unexpected-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/install/shcd-hmn-tab-unexpected-data.png -------------------------------------------------------------------------------- /img/network/AGG-SHCD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/AGG-SHCD.png -------------------------------------------------------------------------------- /img/network/CDU-.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/CDU-.png -------------------------------------------------------------------------------- /img/network/CDU-CMM-SHCD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/CDU-CMM-SHCD.png -------------------------------------------------------------------------------- /img/network/CDU-SHCD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/CDU-SHCD.png -------------------------------------------------------------------------------- /img/network/CDU-Wiring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/CDU-Wiring.png -------------------------------------------------------------------------------- /img/network/CECLAG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/CECLAG.png -------------------------------------------------------------------------------- /img/network/DL325-back.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/DL325-back.png -------------------------------------------------------------------------------- /img/network/DL385-back.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/DL385-back.png -------------------------------------------------------------------------------- /img/network/GigaIntel_application.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/GigaIntel_application.png -------------------------------------------------------------------------------- /img/network/GigaIntel_storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/GigaIntel_storage.png -------------------------------------------------------------------------------- /img/network/Gigaintel_Master.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/Gigaintel_Master.png -------------------------------------------------------------------------------- /img/network/Gigaintel_UAN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/Gigaintel_UAN.png -------------------------------------------------------------------------------- /img/network/Gigaintel_Worker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/Gigaintel_Worker.png -------------------------------------------------------------------------------- /img/network/HPE_Master.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/HPE_Master.png -------------------------------------------------------------------------------- /img/network/HPE_Storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/HPE_Storage.png -------------------------------------------------------------------------------- /img/network/HPE_Storage_large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/HPE_Storage_large.png -------------------------------------------------------------------------------- /img/network/HPE_UAN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/HPE_UAN.png -------------------------------------------------------------------------------- /img/network/HPE_Worker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/HPE_Worker.png -------------------------------------------------------------------------------- /img/network/Leaf-SHCD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/Leaf-SHCD.png -------------------------------------------------------------------------------- /img/network/Leaf-Wiring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/Leaf-Wiring.png -------------------------------------------------------------------------------- /img/network/SHCD-40G_10G.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/SHCD-40G_10G.png -------------------------------------------------------------------------------- /img/network/Spine-SHCD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/Spine-SHCD.png -------------------------------------------------------------------------------- /img/network/XL645D-back.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/XL645D-back.png -------------------------------------------------------------------------------- /img/network/XL675D-back.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/XL675D-back.png -------------------------------------------------------------------------------- /img/network/bican_cable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/bican_cable.png -------------------------------------------------------------------------------- /img/network/can-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/can-diagram.png -------------------------------------------------------------------------------- /img/network/edge_shcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/edge_shcd.png -------------------------------------------------------------------------------- /img/network/gigabyte-master.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/gigabyte-master.png -------------------------------------------------------------------------------- /img/network/gigabyte-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/gigabyte-storage.png -------------------------------------------------------------------------------- /img/network/gigabyte-uan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/gigabyte-uan.png -------------------------------------------------------------------------------- /img/network/gigabyte-worker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/gigabyte-worker.png -------------------------------------------------------------------------------- /img/network/management_network/architecture_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/architecture_comparison.png -------------------------------------------------------------------------------- /img/network/management_network/exascale.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/exascale.png -------------------------------------------------------------------------------- /img/network/management_network/large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/large.png -------------------------------------------------------------------------------- /img/network/management_network/mclag_link_ha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/mclag_link_ha.png -------------------------------------------------------------------------------- /img/network/management_network/medium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/medium.png -------------------------------------------------------------------------------- /img/network/management_network/member_power_failure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/member_power_failure.png -------------------------------------------------------------------------------- /img/network/management_network/network_traffic_pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/network_traffic_pattern.png -------------------------------------------------------------------------------- /img/network/management_network/scenario-a-topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/scenario-a-topology.png -------------------------------------------------------------------------------- /img/network/management_network/scenario-b-topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/scenario-b-topology.png -------------------------------------------------------------------------------- /img/network/management_network/small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/small.png -------------------------------------------------------------------------------- /img/network/management_network/vsx_isl_ha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/vsx_isl_ha.png -------------------------------------------------------------------------------- /img/network/management_network/vsx_split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/network/management_network/vsx_split.png -------------------------------------------------------------------------------- /img/operations/Add_Client_in_Keycloak.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Add_Client_in_Keycloak.png -------------------------------------------------------------------------------- /img/operations/AutomationFrameworkWorkflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/AutomationFrameworkWorkflow.png -------------------------------------------------------------------------------- /img/operations/BGP_Peering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/BGP_Peering.png -------------------------------------------------------------------------------- /img/operations/BMC_Firmware_Dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/BMC_Firmware_Dashboard.png -------------------------------------------------------------------------------- /img/operations/BMC_Firmware_Information_Login_Page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/BMC_Firmware_Information_Login_Page.png -------------------------------------------------------------------------------- /img/operations/BSS_Missing_an_Artifact.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/BSS_Missing_an_Artifact.png -------------------------------------------------------------------------------- /img/operations/Boot_Flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Boot_Flow.png -------------------------------------------------------------------------------- /img/operations/CAN_CHN_27_Subnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CAN_CHN_27_Subnet.png -------------------------------------------------------------------------------- /img/operations/CAN_Dual-Spine_Configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CAN_Dual-Spine_Configuration.png -------------------------------------------------------------------------------- /img/operations/CAN_Point_to_Point.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CAN_Point_to_Point.png -------------------------------------------------------------------------------- /img/operations/CAN_Single_Gateway.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CAN_Single_Gateway.png -------------------------------------------------------------------------------- /img/operations/CA_Certificate_Settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CA_Certificate_Settings.png -------------------------------------------------------------------------------- /img/operations/CDU_Circuit_Breakers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CDU_Circuit_Breakers.png -------------------------------------------------------------------------------- /img/operations/CFS_Automated_Session_Flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CFS_Automated_Session_Flow.png -------------------------------------------------------------------------------- /img/operations/CFS_Single_Session_Flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CFS_Single_Session_Flow.png -------------------------------------------------------------------------------- /img/operations/CMN_25_Subnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CMN_25_Subnet.png -------------------------------------------------------------------------------- /img/operations/CN_POST_Call.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CN_POST_Call.png -------------------------------------------------------------------------------- /img/operations/CN_Setup_Menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/CN_Setup_Menu.png -------------------------------------------------------------------------------- /img/operations/DHCP_Helper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/DHCP_Helper.png -------------------------------------------------------------------------------- /img/operations/DNS_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/DNS_architecture.png -------------------------------------------------------------------------------- /img/operations/Dashboard_Remote_Control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Dashboard_Remote_Control.png -------------------------------------------------------------------------------- /img/operations/EEPROM_Error_Dropping_to_Dracut_Emergency_Shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/EEPROM_Error_Dropping_to_Dracut_Emergency_Shell.png -------------------------------------------------------------------------------- /img/operations/ErrorDashboard1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/ErrorDashboard1.png -------------------------------------------------------------------------------- /img/operations/ErrorDashboard2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/ErrorDashboard2.png -------------------------------------------------------------------------------- /img/operations/ExternalDNS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/ExternalDNS.png -------------------------------------------------------------------------------- /img/operations/Full_UEFI_Prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Full_UEFI_Prompt.png -------------------------------------------------------------------------------- /img/operations/GossTestsDashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/GossTestsDashboard.png -------------------------------------------------------------------------------- /img/operations/GossWorkflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/GossWorkflow.png -------------------------------------------------------------------------------- /img/operations/Intel_Integrated_BMC_Console_Launch_Console_Button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Intel_Integrated_BMC_Console_Launch_Console_Button.png -------------------------------------------------------------------------------- /img/operations/Intel_Integrated_BMC_Console_Login_Page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Intel_Integrated_BMC_Console_Login_Page.png -------------------------------------------------------------------------------- /img/operations/Intel_Integrated_BMC_Console_Remote_Control_Tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Intel_Integrated_BMC_Console_Remote_Control_Tab.png -------------------------------------------------------------------------------- /img/operations/Intel_Integrated_BMC_Console_Summary_Page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Intel_Integrated_BMC_Console_Summary_Page.png -------------------------------------------------------------------------------- /img/operations/Java_Control_Panel_Security_Tab_Linux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Java_Control_Panel_Security_Tab_Linux.png -------------------------------------------------------------------------------- /img/operations/Java_Control_Panel_Security_Tab_MAC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Java_Control_Panel_Security_Tab_MAC.png -------------------------------------------------------------------------------- /img/operations/Java_Control_Panel_Security_Tab_Windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Java_Control_Panel_Security_Tab_Windows.png -------------------------------------------------------------------------------- /img/operations/Java_iKVM_Viewer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Java_iKVM_Viewer.png -------------------------------------------------------------------------------- /img/operations/KVM_Main_Menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/KVM_Main_Menu.png -------------------------------------------------------------------------------- /img/operations/KVM_Options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/KVM_Options.png -------------------------------------------------------------------------------- /img/operations/KVM_Unit_Opened.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/KVM_Unit_Opened.png -------------------------------------------------------------------------------- /img/operations/KVM_Unit_Pulled_Out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/KVM_Unit_Pulled_Out.png -------------------------------------------------------------------------------- /img/operations/Keychain_Access_Utility.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keychain_Access_Utility.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Admin-role_Mapper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Admin-role_Mapper.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Client_Details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Client_Details.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Client_Secret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Client_Secret.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Client_Settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Client_Settings.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Client_Token_Lifetime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Client_Token_Lifetime.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Gatekeeper_Client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Gatekeeper_Client.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Global_Session_Lifetime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Global_Session_Lifetime.png -------------------------------------------------------------------------------- /img/operations/Keycloak_Global_Token_Lifetime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_Global_Token_Lifetime.png -------------------------------------------------------------------------------- /img/operations/Keycloak_add_nexus_permission.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_add_nexus_permission.png -------------------------------------------------------------------------------- /img/operations/Keycloak_client_scopes_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keycloak_client_scopes_table.png -------------------------------------------------------------------------------- /img/operations/Keykloak_Gatekeeper_Settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Keykloak_Gatekeeper_Settings.png -------------------------------------------------------------------------------- /img/operations/Kibana_Clusterstor_Logs-Dropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Kibana_Clusterstor_Logs-Dropdown.png -------------------------------------------------------------------------------- /img/operations/Kibana_Discover_Dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Kibana_Discover_Dashboard.png -------------------------------------------------------------------------------- /img/operations/Kibana_Results_Time_Period.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Kibana_Results_Time_Period.png -------------------------------------------------------------------------------- /img/operations/LDAP_User_Federation_Mappers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/LDAP_User_Federation_Mappers.png -------------------------------------------------------------------------------- /img/operations/Management_NCN_Interfaces_and_VLANs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Management_NCN_Interfaces_and_VLANs.png -------------------------------------------------------------------------------- /img/operations/Mellanox_SN2700.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Mellanox_SN2700.png -------------------------------------------------------------------------------- /img/operations/Nexus_Admin_Account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Admin_Account.png -------------------------------------------------------------------------------- /img/operations/Nexus_Anonymous_Account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Anonymous_Account.png -------------------------------------------------------------------------------- /img/operations/Nexus_Browse_Page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Browse_Page.png -------------------------------------------------------------------------------- /img/operations/Nexus_Compact_Task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Compact_Task.png -------------------------------------------------------------------------------- /img/operations/Nexus_Create_Task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Create_Task.png -------------------------------------------------------------------------------- /img/operations/Nexus_Delete_Asset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Delete_Asset.png -------------------------------------------------------------------------------- /img/operations/Nexus_Delete_Repository.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Delete_Repository.png -------------------------------------------------------------------------------- /img/operations/Nexus_New_Task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_New_Task.png -------------------------------------------------------------------------------- /img/operations/Nexus_Repodata_Attributes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Repodata_Attributes.png -------------------------------------------------------------------------------- /img/operations/Nexus_Repodata_Attributes_After.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Repodata_Attributes_After.png -------------------------------------------------------------------------------- /img/operations/Nexus_Repodata_Summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Repodata_Summary.png -------------------------------------------------------------------------------- /img/operations/Nexus_Repodata_Summary_After.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Repodata_Summary_After.png -------------------------------------------------------------------------------- /img/operations/Nexus_Repository_Admin_Page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Repository_Admin_Page.png -------------------------------------------------------------------------------- /img/operations/Nexus_Repository_List.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Repository_List.png -------------------------------------------------------------------------------- /img/operations/Nexus_Task_Confirmation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Task_Confirmation.png -------------------------------------------------------------------------------- /img/operations/Nexus_Task_Detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Task_Detail.png -------------------------------------------------------------------------------- /img/operations/Nexus_Task_Detail_Last_Run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Task_Detail_Last_Run.png -------------------------------------------------------------------------------- /img/operations/Nexus_Task_Location.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Task_Location.png -------------------------------------------------------------------------------- /img/operations/Nexus_Task_Type_Selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Task_Type_Selection.png -------------------------------------------------------------------------------- /img/operations/Nexus_Tasks_Page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Tasks_Page.png -------------------------------------------------------------------------------- /img/operations/Nexus_Web_UI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Nexus_Web_UI.png -------------------------------------------------------------------------------- /img/operations/Node_Console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Node_Console.png -------------------------------------------------------------------------------- /img/operations/PKI_Infrastructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/PKI_Infrastructure.png -------------------------------------------------------------------------------- /img/operations/Security_Infrastructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Security_Infrastructure.png -------------------------------------------------------------------------------- /img/operations/Simple_UEFI_Prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Simple_UEFI_Prompt.png -------------------------------------------------------------------------------- /img/operations/Sma_dashboard_console_hostname.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Sma_dashboard_console_hostname.png -------------------------------------------------------------------------------- /img/operations/Sma_dashboard_home_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Sma_dashboard_home_page.png -------------------------------------------------------------------------------- /img/operations/Sma_dashboard_index_dropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Sma_dashboard_index_dropdown.png -------------------------------------------------------------------------------- /img/operations/Sma_dashboard_time_period.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Sma_dashboard_time_period.png -------------------------------------------------------------------------------- /img/operations/TFTP_without_a_Route_Back_to_the_Node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/TFTP_without_a_Route_Back_to_the_Node.png -------------------------------------------------------------------------------- /img/operations/TimingDashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/TimingDashboard.png -------------------------------------------------------------------------------- /img/operations/VictoriaMetrics_Arcitecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/VictoriaMetrics_Arcitecture.jpg -------------------------------------------------------------------------------- /img/operations/Wireshark_Healthy_DHCP_Discover_Sequence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/Wireshark_Healthy_DHCP_Discover_Sequence.png -------------------------------------------------------------------------------- /img/operations/add-node.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/add-node.gif -------------------------------------------------------------------------------- /img/operations/boot_orchestration/bos_v1_boot.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/boot_orchestration/bos_v1_boot.gif -------------------------------------------------------------------------------- /img/operations/boot_orchestration/bos_v1_reconfigure.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/boot_orchestration/bos_v1_reconfigure.gif -------------------------------------------------------------------------------- /img/operations/boot_orchestration/bos_v1_shutdown.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/boot_orchestration/bos_v1_shutdown.gif -------------------------------------------------------------------------------- /img/operations/boot_orchestration/bos_v2_boot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/boot_orchestration/bos_v2_boot.png -------------------------------------------------------------------------------- /img/operations/boot_orchestration/bos_v2_reboot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/boot_orchestration/bos_v2_reboot.png -------------------------------------------------------------------------------- /img/operations/boot_orchestration/bos_v2_shutdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/boot_orchestration/bos_v2_shutdown.png -------------------------------------------------------------------------------- /img/operations/boot_orchestration/bos_v2_status_transitions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/boot_orchestration/bos_v2_status_transitions.png -------------------------------------------------------------------------------- /img/operations/branch_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/branch_workflow.png -------------------------------------------------------------------------------- /img/operations/crus_upgrade.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/crus_upgrade.gif -------------------------------------------------------------------------------- /img/operations/diagram_csm_stack_upgrade_04022025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/diagram_csm_stack_upgrade_04022025.png -------------------------------------------------------------------------------- /img/operations/diagram_upgrade_csm_manually_and_additional_products_with_IUF_101524.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/diagram_upgrade_csm_manually_and_additional_products_with_IUF_101524.png -------------------------------------------------------------------------------- /img/operations/diagram_upgrade_csm_with_IUF_101524.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/diagram_upgrade_csm_with_IUF_101524.png -------------------------------------------------------------------------------- /img/operations/gitea_repositories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/gitea_repositories.png -------------------------------------------------------------------------------- /img/operations/gitea_repository_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/gitea_repository_settings.png -------------------------------------------------------------------------------- /img/operations/gitea_repository_visibility.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/gitea_repository_visibility.png -------------------------------------------------------------------------------- /img/operations/image-create.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/image-create.gif -------------------------------------------------------------------------------- /img/operations/image-customize.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/image-customize.gif -------------------------------------------------------------------------------- /img/operations/iuf_initial_install_workflow_update_hsn_01132025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/iuf_initial_install_workflow_update_hsn_01132025.png -------------------------------------------------------------------------------- /img/operations/remove-nodes.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/operations/remove-nodes.gif -------------------------------------------------------------------------------- /img/shcd-rack-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/shcd-rack-example.png -------------------------------------------------------------------------------- /img/spit-services.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/spit-services.jpg -------------------------------------------------------------------------------- /img/upgrade-deploy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/upgrade-deploy.jpg -------------------------------------------------------------------------------- /img/usb-flow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/img/usb-flow.jpg -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /install/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /install/livecd/Reset_root_Password_on_a_LiveCD_USB.md: -------------------------------------------------------------------------------- 1 | # Reset root Password on a LiveCD USB 2 | 3 | It may become desirable to clear the password on the LiveCD. 4 | 5 | The root password is preserved within the COW partition at `cow:rw/etc/shadow`. This is the 6 | modified copy of the `/etc/shadow` file used by the operating system. 7 | 8 | If a site/user needs to reset/clear the password for `root`, they can mount their USB on another 9 | machine and remove this file from the COW partition. When next booting from the USB it will 10 | reinitialize to an empty password for `root`, and again at next login it will require the password 11 | to be changed. 12 | 13 | Clear the password (macOS or Linux): 14 | 15 | ```bash 16 | mount -vL cow /mnt 17 | sudo rm -fv /mnt/rw/etc/shadow 18 | umount -v /mnt 19 | ``` 20 | -------------------------------------------------------------------------------- /introduction/deprecated_features/CAPMC_Deprecation_Notice.md: -------------------------------------------------------------------------------- 1 | # CAPMC Deprecation Notice 2 | 3 | CAPMC is deprecated, starting in CSM 1.5, and may be removed in the future. 4 | It has been replaced with the [Power Control Service (PCS)](../../glossary.md#power-control-service-pcs). 5 | Everyone is encouraged to transition to PCS as soon as possible. 6 | 7 | See [PCS API](../../api/power-control.md) for more information about PCS API. 8 | 9 | Here is a list of deprecated CAPMC API (CLI) endpoints: 10 | 11 | * `/get_xname_status` 12 | * `/xname_reinit` 13 | * `/xname_on` 14 | * `/xname_off` 15 | * `/get_power_cap` 16 | * `/get_power_cap_capabilities` 17 | * `/set_power_cap` 18 | * `/health` 19 | * `/liveness` 20 | * `/readiness` 21 | -------------------------------------------------------------------------------- /introduction/deprecated_features/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /introduction/img/github_heading_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/introduction/img/github_heading_icon.png -------------------------------------------------------------------------------- /introduction/img/github_heading_navigation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/introduction/img/github_heading_navigation.png -------------------------------------------------------------------------------- /introduction/img/html_docs_csm_version_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/introduction/img/html_docs_csm_version_selection.png -------------------------------------------------------------------------------- /introduction/img/html_heading_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/introduction/img/html_heading_icon.png -------------------------------------------------------------------------------- /introduction/img/html_heading_navigation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/introduction/img/html_heading_navigation.png -------------------------------------------------------------------------------- /introduction/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /introduction/templates/disclaimers.md: -------------------------------------------------------------------------------- 1 | # Templates 2 | 3 | Copy and paste these into other markdown. 4 | 5 | > ***🚨 DRAFT 🚨*** 6 | > 7 | > This document is in development by the owners and has been labeled as a *DRAFT*. 8 | > 9 | > The contents of this document are considered for advanced system users only, and may require 10 | > familiarity with Linux and/or the relevant, mentioned application(s). 11 | > 12 | > **❗Proceed with caution❗** 13 | -------------------------------------------------------------------------------- /operations/System_Recovery/PBS_Service_Recovery.md: -------------------------------------------------------------------------------- 1 | # PBS Service Recovery 2 | 3 | The following covers restoring PBS data. 4 | 5 | ## Prerequisites 6 | 7 | - The system is fully installed and has transitioned off of the LiveCD. 8 | - All activities required for site maintenance are complete. 9 | - A backup or export of the data already exists. 10 | 11 | ## Service recovery for PBS 12 | 13 | To restore Portable Batch System data from a backup, see *Restore PBS home directory from a backup* in the 14 | **HPE Cray Supercomputing User Services Software Administration Guide: CSM on HPE Cray EX Systems (S-8063)**. 15 | 16 | After restoring PBS data from backup, check that the procedure was successful. 17 | 18 | 1. (`uan#`) Check that accounting records were successfully restored. 19 | 20 | ```bash 21 | qstat -x 22 | ``` 23 | 24 | 1. (`uan#`) Check that queued jobs were successfully restored. 25 | 26 | ```bash 27 | qstat 28 | ``` 29 | 30 | 1. (`uan#`) Check that node states were successfully restored. 31 | 32 | ```bash 33 | pbsnodes -a 34 | ``` 35 | -------------------------------------------------------------------------------- /operations/System_Recovery/System_Recovery.md: -------------------------------------------------------------------------------- 1 | # Beta Procedures for System Recovery 2 | 3 | The table below provides links to the recovery procedures available for critical services. 4 | 5 | | Service | Recovery procedures | 6 | |:---------|:-----------------------------------------------------------------------------------------| 7 | | Vault | [Vault Service Recovery](../security_and_authentication/Vault_Service_Recovery.md) | 8 | | Keycloak | [Keycloak Service Recovery](../security_and_authentication/Keycloak_Service_Recovery.md) | 9 | | Spire | [Spire Service Recovery](../spire/Spire_Service_Recovery.md) | 10 | | Nexus | [Nexus Service Recovery](../package_repository_management/Nexus_Service_Recovery.md) | 11 | | Slurm | [Slurm Service Recovery](Slurm_Service_Recovery.md) | 12 | | PBS | [PBS Service Recovery](PBS_Service_Recovery.md) | 13 | -------------------------------------------------------------------------------- /operations/artifact_management/Use_S3_Libraries_and_Clients.md: -------------------------------------------------------------------------------- 1 | # Use S3 Libraries and Clients 2 | 3 | Several command line clients and language-specific libraries are available in addition to the Simple Storage Service \(S3\) RESTful API. 4 | Developers and system administrators can interact with artifacts in the S3 object store with these tools. 5 | 6 | To learn more, refer to the following links: 7 | 8 | - [S3 Python client](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html) 9 | - [S3 Go client](https://docs.aws.amazon.com/sdk-for-go/api/service/s3/) 10 | - [Amazon Web Services \(AWS\) S3 CLI documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html) 11 | -------------------------------------------------------------------------------- /operations/boot_orchestration/Boot_Issue_Symptom_Node_HSN_Interface_Does_Not_Appear_or_Shows_No_Link_Detected.md: -------------------------------------------------------------------------------- 1 | # Boot Issue Symptom: Node HSN Interface Does Not Appear or Show Detected Links Detected 2 | 3 | A node may fail to boot if the HSN interface is experiencing issues, or if it is not able to detect any links. 4 | 5 | ### Symptom 6 | 7 | The node's HSN interface does not appear in the output of the ip addr command or the output of the ethtool interface command shows no link detected. 8 | 9 | ### Resolution 10 | 11 | Reseat the node's PCIe card. 12 | 13 | -------------------------------------------------------------------------------- /operations/boot_orchestration/Log_File_Locations_and_Ports_Used_in_Compute_Node_Boot_Troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Log File Locations and Ports Used in Compute Node Boot Troubleshooting 2 | 3 | This section includes the port IDs and log file locations of components associated with the node boot process. 4 | 5 | ### Log File Locations 6 | 7 | The log file locations for ConMan, DHCP, and TFTP. 8 | 9 | - ConMan logs are located within the `conman` pod at /var/log/conman.log. 10 | - DHCP: 11 | 12 | ```bash 13 | kubectl logs DHCP_POD_ID 14 | ``` 15 | 16 | - TFTP: 17 | 18 | ```bash 19 | kubectl logs -n services TFTP_POD_ID 20 | ``` 21 | 22 | ### Port IDs 23 | 24 | The following table includes the port IDs for DHCP and TFTP. 25 | 26 | | Component | Port | 27 | |-------------|------| 28 | | DHCP server | 67 | 29 | | DHCP client | 68 | 30 | | TFTP server | 69 | 31 | 32 | -------------------------------------------------------------------------------- /operations/configuration_management/Git_Operations.md: -------------------------------------------------------------------------------- 1 | # Git Operations 2 | 3 | Use the `git` command to manage repository content in the Version Control Service \(VCS\). 4 | 5 | Once a repository is cloned, the git command line tool is available to interact with a repository from VCS. 6 | The `git` command is used for making commits, creating new branches, and pushing new branches, tags, and commits to the remote repository stored in VCS. 7 | 8 | When pushing changes to the VCS server using the `crayvcs` user, input the password retrieved from the Kubernetes secret as the credentials. 9 | See [VCS Administrative User](VCS_Administrative_User.md) for more information. 10 | 11 | ```bash 12 | git push 13 | ``` 14 | 15 | Enter the appropriate credentials when prompted: 16 | 17 | ```text 18 | Username for 'https://api-gw-service-nmn.local': crayvcs 19 | Password for 'https://crayvcs@api-gw-service-nmn.local': 20 | ``` 21 | 22 | For more information on how to use the Git command line tools, refer to the external [Git User Manual](https://git-scm.com/docs/user-manual.html). 23 | -------------------------------------------------------------------------------- /operations/hardware_state_manager/Hardware_State_Manager.md: -------------------------------------------------------------------------------- 1 | # Hardware State Manager \(HSM\) 2 | 3 | The Hardware State Manager \(HSM\) monitors and interrogates hardware components in the HPE Cray EX system, tracking hardware state and inventory information, and making it available via REST queries and message bus events when changes occur. 4 | 5 | In the CSM 0.9.3 release, v1 of the HSM API has begun its deprecation process in favor of the new HSM v2 API. Refer to the HSM API documentation for more information on the changes. 6 | 7 | -------------------------------------------------------------------------------- /operations/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /operations/iuf/examples/iuf_abort.md: -------------------------------------------------------------------------------- 1 | # `iuf abort` Examples 2 | 3 | (`ncn-m001#`) Abort activity `admin.05-15`, allowing the current stage to complete. 4 | 5 | ```bash 6 | iuf -a admin.05-15 abort 7 | ``` 8 | 9 | --- 10 | 11 | (`ncn-m001#`) Abort activity `admin.05-15` immediately, terminating all in progress operations. 12 | 13 | ```bash 14 | iuf -a admin.05-15 abort -f 15 | ``` 16 | 17 | (`ncn-m001#`) Abort activity `admin.05-15` immediately and add a comment to the activity log. 18 | 19 | ```bash 20 | iuf -i input.yaml abort -f "Aborting the activity" 21 | ``` 22 | -------------------------------------------------------------------------------- /operations/iuf/examples/iuf_list_activities.md: -------------------------------------------------------------------------------- 1 | # `iuf list-activities` Examples 2 | 3 | (`ncn-m001#`) List all activities present on the system. 4 | 5 | ```bash 6 | iuf list-activities 7 | ``` 8 | 9 | Example output: 10 | 11 | ```text 12 | fullstack-install-230217 13 | fullstack-upgrade-230222 14 | shs-upgrade-230311 15 | ``` 16 | -------------------------------------------------------------------------------- /operations/iuf/examples/iuf_resume.md: -------------------------------------------------------------------------------- 1 | # `iuf resume` Examples 2 | 3 | (`ncn-m001#`) Begin executing stages `process-media` through `deliver-product` for activity `admin-230126`. 4 | 5 | ```bash 6 | iuf -a admin-230126 run -b process-media -e deliver-product 7 | ``` 8 | 9 | (`ncn-m001#`) Forcefully abort activity `admin-230126` while it is still executing, causing the current stage to fail immediately. 10 | 11 | ```bash 12 | iuf -a admin-230126 abort -f 13 | ``` 14 | 15 | (`ncn-m001#`) Resume activity `admin-230126` to re-execute any failed or aborted steps in the most recent stage of the IUF session specified earlier via `iuf run` and then execute any remaining steps that were not run prior 16 | the execution of `iuf abort`. 17 | 18 | ```bash 19 | iuf -a admin-230126 resume 20 | ``` 21 | 22 | (`ncn-m001#`) Resume activity `admin-230126` and add a comment to the activity log. 23 | 24 | ```bash 25 | iuf -a admin-230126 resume "resuming activity" 26 | ``` 27 | -------------------------------------------------------------------------------- /operations/iuf/workflows/backup.md: -------------------------------------------------------------------------------- 1 | # Backup 2 | 3 | This section describes procedures that back up critical state in case it becomes necessary to fall back to previous configurations and software. 4 | 5 | - [1. Backup](#1-backup) 6 | - [1.1 Slingshot Fabric Manager](#11-slingshot-fabric-manager) 7 | - [2. Next steps](#2-next-steps) 8 | 9 | ## 1. Backup 10 | 11 | ### 1.1 Slingshot Fabric Manager 12 | 13 | Create a backup of the HPE Slingshot Fabric Manager prior to proceeding with the workflow. Refer to the "Create a backup of a Fabric Manager configuration" section in the _HPE Slingshot Installation Guide for CSM_ 14 | for details on how to perform this operation. 15 | 16 | Once this step has completed: 17 | 18 | - Slingshot Fabric Manager content has been backed up 19 | 20 | ## 2. Next steps 21 | 22 | - If performing an upgrade that includes upgrading CSM and additional products with IUF, 23 | return to the [Upgrade CSM and additional products with IUF](upgrade_csm_and_additional_products_with_iuf.md) 24 | workflow to continue the upgrade. 25 | -------------------------------------------------------------------------------- /operations/iuf/workflows/configuration_of_SFM.md: -------------------------------------------------------------------------------- 1 | # Configuration of the Slingshot Fabric Manager 2 | 3 | This section configures the Slingshot Fabric Manager. 4 | 5 | - [1. Manual configuration of the Slingshot Fabric Manager](#1-manual-configuration-of-the-slingshot-fabric-manager) 6 | - [2. Next steps](#2-next-steps) 7 | 8 | ## 1. Manual configuration of the Slingshot Fabric Manager 9 | 10 | Instructions to configure the HPE Slingshot Fabric Manager are provided in the 11 | "Install HPE Slingshot Fabric Manager software" section of the _HPE Slingshot Installation Guide for CSM_. 12 | 13 | For systems with Slingshot NICs, also follow the instructions in 14 | the "Installing 200Gbps NIC host software" section of the _HPE Slingshot Installation Guide for CSM_. 15 | 16 | Once this step has completed: 17 | 18 | - The Slingshot Fabric Manager is configured 19 | 20 | ## 2. Next steps 21 | 22 | Return to the 23 | [Install or upgrade additional products with IUF](install_or_upgrade_additional_products_with_iuf.md) 24 | workflow to continue the install or upgrade. 25 | -------------------------------------------------------------------------------- /operations/kubernetes/About_Kubernetes_Taints_and_Labels.md: -------------------------------------------------------------------------------- 1 | # About Kubernetes Taints and Labels 2 | 3 | Kubernetes labels control node affinity, which is the property of pods that attracts them to a set of nodes. On the other hand, Kubernetes taints enable a node to repel a set of pods. In addition, pods can have tolerances for taints to allow them to run on nodes with certain taints. 4 | 5 | Taints are controlled with the `kubectl taint nodes` command, while node labels for various nodes can be customized with a configmap that contains the desired values. For a description of how to modify the default node labels, refer to the Customer Access Network (CAN) documentation. 6 | 7 | The list of existing labels can be retrieved using the following command: 8 | 9 | ```bash 10 | kubectl get nodes --show-labels 11 | ``` 12 | 13 | To learn more, refer to [https://kubernetes.io/](https://kubernetes.io/). 14 | 15 | -------------------------------------------------------------------------------- /operations/kubernetes/About_Postgres.md: -------------------------------------------------------------------------------- 1 | # About Postgres 2 | 3 | The system uses PostgreSQL \(known as Postgres\) as a database solution. Postgres databases use SQL language to store and manage databases on the system. 4 | 5 | To learn more about Postgres, see [https://www.postgresql.org/docs/](https://www.postgresql.org/docs/). 6 | 7 | The Patroni tool can be used to manage and maintain information in a Postgres database. It handles tasks such as listing cluster members and the replication status, configuring and restarting databases, and more. For more information about this tool, refer to [Troubleshoot Postgres Database](Troubleshoot_Postgres_Database.md). 8 | 9 | -------------------------------------------------------------------------------- /operations/kubernetes/About_kubectl.md: -------------------------------------------------------------------------------- 1 | # About kubectl 2 | 3 | `kubectl` is a CLI that can be used to run commands against a Kubernetes cluster. The format of the `kubectl` command is shown below: 4 | 5 | ```bash 6 | kubectl COMMAND RESOURCE_TYPE RESOURCE_NAME FLAGS 7 | ``` 8 | 9 | An example of using `kubectl` to retrieve information about a pod is shown below: 10 | 11 | ```bash 12 | kubectl get pod POD_NAME1 POD_NAME2 13 | ``` 14 | 15 | `kubectl` is installed by default on the non-compute node \(NCN\) image. To learn more about `kubectl`, refer to [https://kubernetes.io/docs](https://kubernetes.io/docs) 16 | -------------------------------------------------------------------------------- /operations/kubernetes/Increase_Kafka_Pod_Resource_Limits.md: -------------------------------------------------------------------------------- 1 | # Increase Kafka Pod Resource Limits 2 | 3 | For larger scale systems, the Kafka resource limits may need to be increased. See [Increase Pod Resource Limits](Increase_Pod_Resource_Limits.md) for details on how to increase limits. 4 | 5 | **Increase Kafka Resource Limits Example** 6 | 7 | For a 1500 compute node system, increasing the cpu count to 6 and memory limits to 128G should be adequate. 8 | 9 | -------------------------------------------------------------------------------- /operations/kubernetes/Kubernetes_Storage.md: -------------------------------------------------------------------------------- 1 | # Kubernetes Storage 2 | 3 | Data belonging to micro-services in the management cluster is managed through persistent storage, 4 | which provides reliable and resilient data protection for containers running in the Kubernetes cluster. 5 | 6 | The backing storage for this service is currently provided by JBOD disks that are spread across several 7 | nodes of the management cluster. These node disks are managed by Ceph, and are exposed to containers in 8 | the form of persistent volumes. 9 | -------------------------------------------------------------------------------- /operations/kubernetes/encryption/index.md: -------------------------------------------------------------------------------- 1 | README.md -------------------------------------------------------------------------------- /operations/multi-tenancy/hpe_slingshot_network_operator.md: -------------------------------------------------------------------------------- 1 | # HPE Slingshot Network Operator 2 | 3 | Starting in the HPE Slingshot 2.3.0 release, the HPE Slingshot Network Operator is installed as part of the Fabric Manager install. 4 | It is a Kubernetes operator that is designed to support multi-tenancy in CSM 1.6 and later releases. 5 | 6 | For more information on the HPE Slingshot Network Operator, see the "HPE Slingshot Network Operator for CSM Multi-Tenancy" section in the _HPE Slingshot Administration Guide_. Search for this document on the [HPE Support Center](https://support.hpe.com/hpesc/public/home). 7 | 8 | The HPE Slingshot documentation outlines several critical tasks, including: 9 | 10 | - Enabling the HPE Slingshot Network Operator 11 | - Creating HPE Slingshot tenants 12 | - Modifying HPE Slingshot tenants 13 | - Updating VNI and tenant node component names (xnames) 14 | - Removing HPE Slingshot tenants 15 | -------------------------------------------------------------------------------- /operations/multi-tenancy/images/groupmembership.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/multi-tenancy/images/groupmembership.png -------------------------------------------------------------------------------- /operations/multi-tenancy/images/keycloakclient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/multi-tenancy/images/keycloakclient.png -------------------------------------------------------------------------------- /operations/multi-tenancy/images/oidctoken.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/multi-tenancy/images/oidctoken.png -------------------------------------------------------------------------------- /operations/multi-tenancy/images/usergroups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/multi-tenancy/images/usergroups.png -------------------------------------------------------------------------------- /operations/network/customer_accessible_networks/can_to_chn/index.md: -------------------------------------------------------------------------------- 1 | README.md -------------------------------------------------------------------------------- /operations/network/customer_accessible_networks/can_to_chn/scripts/sls/sls_utils/README.md: -------------------------------------------------------------------------------- 1 | # `sls_utils` Library 2 | 3 | This is a reusable Python library for safely interacting with SLS network data (in JSON format). 4 | 5 | The library has been tested against Python version 3.6 and up. 6 | -------------------------------------------------------------------------------- /operations/network/customer_accessible_networks/can_to_chn/scripts/sls/sls_utils/index.md: -------------------------------------------------------------------------------- 1 | # `sls_utils` Library 2 | 3 | This is a reusable Python library for safely interacting with SLS network data (in JSON format). 4 | 5 | The library has been tested against Python version 3.6 and up. 6 | -------------------------------------------------------------------------------- /operations/network/management_network/added_hardware.md: -------------------------------------------------------------------------------- 1 | # Added Hardware 2 | 3 | Follow this procedure when new hardware is added to the system. 4 | 5 | ## Procedure 6 | 7 | 1. Validate the SHCD. 8 | 9 | The SHCD defines the topology of a Shasta system, this is needed when generating switch configurations. 10 | 11 | Refer to [Validate the SHCD](validate_shcd.md). 12 | 13 | 1. Generate the switch configuration file(s). 14 | 15 | Refer to [Generate Switch Configs](generate_switch_configs.md). 16 | 17 | 1. Check the differences between the generated configurations and the configurations on the system. 18 | 19 | Refer to [Validate Switch Configs](validate_switch_configs.md). 20 | 21 | 1. Run a suite of tests against the management network switches. 22 | 23 | Refer to [Network Tests](network_tests.md). 24 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/arp.md: -------------------------------------------------------------------------------- 1 | # Address Resolution Protocol (ARP) 2 | 3 | ARP is commonly used for mapping IPv4 addresses to MAC addresses. 4 | 5 | ## Procedure 6 | 7 | 1. Configure static ARP on an interface. 8 | 9 | ```text 10 | switch(config-if)# arp ipv4 IP-ADDR mac MAC-ADDR 11 | ``` 12 | 13 | 1. Show commands to validate functionality: . 14 | 15 | ```text 16 | show arp 17 | ``` 18 | 19 | ## Expected Results 20 | 21 | 1. Administrators are able to ping the connected device 22 | 1. Administrators can view the ARP entries 23 | 24 | [Back to Index](README.md) 25 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/compute_uan_application_nodes.md: -------------------------------------------------------------------------------- 1 | # Verify Computes/UANs/Application Nodes 2 | 3 | If the computes make it past PXE and go into the PXE shell, verify DNS and connectivity. 4 | 5 | ``` 6 | iPXE> dhcp 7 | Configuring (net0 98:03:9b:a8:60:88).................. No configuration methods succeeded (http://ipxe.org/040ee186) 8 | Configuring (net1 b4:2e:99:be:1a:37)...... ok 9 | ``` 10 | 11 | ## Procedure 12 | 13 | 1. Verify DNS: 14 | 15 | ``` 16 | iPXE> show dns 17 | ``` 18 | 19 | Example output: 20 | 21 | ``` 22 | net1.dhcp/dns:ipv4 = 10.92.100.225 23 | ``` 24 | 25 | 1. Verify connectivity: 26 | 27 | ``` 28 | iPXE> nslookup address api-gw-service-nmn.local 29 | iPXE> echo ${address} 30 | 10.92.100.71 31 | ``` 32 | 33 | [Back to Index](../README.md) 34 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/dns-client.md: -------------------------------------------------------------------------------- 1 | # Configure Domain Name Service (DNS) Clients 2 | 3 | The Domain Name Service (DNS) translates domain and host names to and from IP addresses. 4 | A DNS client resolves hostnames to IP addresses by querying assigned DNS servers for the appropriate IP address. 5 | 6 | ## Configuration Commands 7 | 8 | Configure the switch to resolve queries via a DNS server: 9 | 10 | ```tex 11 | ip dns server-address IP-ADDR [vrf VRF] 12 | ``` 13 | 14 | Configure a domain name: 15 | 16 | ```text 17 | ip dns domain-name NAME 18 | ``` 19 | 20 | Show commands to validate functionality: 21 | 22 | ```text 23 | show ip dns 24 | ``` 25 | 26 | ## Expected Results 27 | 28 | 1. Administrators can configure the DNS client 29 | 1. The output is correct 30 | 1. Administrators can ping the device 31 | 32 | [Back to Index](../README.md) 33 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/domain_name.md: -------------------------------------------------------------------------------- 1 | # Configure Domain Names 2 | 3 | A domain name is a name to identify the person, group, or organization that controls the devices within an area. An example of a domain name could be `us.cray.com`. 4 | 5 | ## Configuration Commands 6 | 7 | Create a domain name: 8 | 9 | ``` 10 | switch(config)# domain-name NAME 11 | ``` 12 | 13 | Show commands to validate functionality: 14 | 15 | ``` 16 | show domain-name 17 | ``` 18 | 19 | ## Expected Results 20 | 21 | 1. Administrators can configure the domain name 22 | 1. The output of all show commands is correct 23 | 24 | [Back to Index](../README.md) 25 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/exec_banner.md: -------------------------------------------------------------------------------- 1 | # Configure Exec Banners 2 | 3 | Exec banners are custom messages displayed to users attempting to connect to the management interfaces. Multiple lines of text can be stored using a custom delimiter to mark the end of message. 4 | 5 | ## Configuration Commands 6 | 7 | Create a banner: 8 | 9 | ``` 10 | switch(config)# banner DELIM 11 | ``` 12 | 13 | Show commands to validate functionality: 14 | 15 | ``` 16 | show banner 17 | ``` 18 | 19 | ## Example Output 20 | 21 | ``` 22 | switch(config)# banner exec $ 23 | Enter a new banner, when you are done enter a new line containing only your 24 | chosen delimiter. 25 | (banner-motd)# This is an example of a custom banner 26 | (banner-motd)# that spans multiple lines. 27 | (banner-motd)# $ 28 | switch(config)# do show banner exec 29 | ``` 30 | 31 | ## Expected Results 32 | 33 | 1. Administrators can create the Exec banner 34 | 2. The output of the Exec banner looks correct 35 | 36 | [Back to Index](../README.md) 37 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/hostname.md: -------------------------------------------------------------------------------- 1 | # Configure Hostnames 2 | 3 | A hostname is a human-friendly name used to identify a device. An example of a hostname could be the name "Test." 4 | 5 | ## Configuration Commands 6 | 7 | Create a hostname: 8 | 9 | ``` 10 | switch(config)# hostname 11 | ``` 12 | 13 | Show commands to validate functionality: 14 | 15 | ``` 16 | show hostname 17 | ``` 18 | 19 | ## Example Output 20 | 21 | ``` 22 | switch(config)# hostname switch-test 23 | show hostname 24 | switch-test 25 | ``` 26 | 27 | ## Expected Results 28 | 29 | 1. Administrators can configure the hostname 30 | 2. The output of all show commands is correct 31 | 32 | [Back to Index](../README.md) 33 | 34 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/igmp.md: -------------------------------------------------------------------------------- 1 | # Configure Internet Group Multicast Protocol (IGMP) 2 | 3 | The Internet Group Multicast Protocol (IGMP) is a communications protocol used by hosts and adjacent routers on IP networks to establish multicast group memberships. The host joins a multicast-group by sending a join request message towards the network router, and responds to queries sent from the network router by dispatching a join report. 4 | 5 | General notes: 6 | 7 | * In ArubaOS-CX igmp snooping is disabled by default 8 | * IGMP v3 is used by default, supported configuration allows v2 and v3 9 | 10 | ## Configuration Commands 11 | 12 | ``` 13 | switch(config)# interface vlan 1 14 | switch(config-if-vlan)# igmp 15 | ``` 16 | 17 | ## Expected Results 18 | 19 | `show ip igmp-snooping vlan 1` should show IGMP enabled on the VLAN, but no IGMP Querier set. 20 | 21 | [Back to Index](../README.md) 22 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /operations/network/management_network/aruba/loopback.md: -------------------------------------------------------------------------------- 1 | # Loopback Interface 2 | 3 | Loopbacks are essentially internal virtual interfaces. Loopback interfaces are not bound to a physical port and are used for device management and routing protocols. 4 | 5 | ## Configuration Commands 6 | 7 | ```text 8 | switch(config)# interface loopback LOOPBACK 9 | switch(config-loopback-if)# ip address IP-ADDR/ 10 | ``` 11 | 12 | ## Example Output 13 | 14 | ```text 15 | switch(config)# interface loopback 1 16 | switch(config-loopback-if)# ip address 99.99.99.1/32 17 | switch(config-loopback-if)# end 18 | show run interface loopback1 19 | interface loopback1 20 | no shutdown 21 | ip address 99.99.99.1/32 22 | exit 23 | show ip interface loopback1 24 | Interface loopback1 is up 25 | Admin state is up 26 | Hardware: Loopback 27 | IPv4 address 99.99.99.1/32 28 | ``` 29 | 30 | ## Expected Results 31 | 32 | 1. Administrators can create a loopback interface 33 | 2. Administrators can give a loopback interface an IP address 34 | 3. Administrators can validate the configuration using the `show` commands. 35 | 36 | [Back to Index](../README.md) 37 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/mclag_isl_ha.md: -------------------------------------------------------------------------------- 1 | # VSX: ISL HA 2 | 3 | The intent here is to showcase an inter-switch-link (ISL) link failover scenario where one of the two links between spine switches goes down, but ISL is still connected with single link. 4 | 5 | The following image is a visualization of disconnected ISL link: 6 | 7 | ![](../../../../img/network/management_network/vsx_isl_ha.png) 8 | 9 | The following things are expected to be seen in this scenario: 10 | 11 | * After disconnecting one ISL, the VSX functionality should not be affected 12 | * A small percentage of packets will be dropped when disconnecting the cable where traffic is flowing; A sub second value is expected during this event 13 | * When connecting back the cable, the hashing needs to be recalculated and some packets may be dropped during this event as well; A sub second value is expected during this event 14 | 15 | [Back to Index](../README.md) 16 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/mclag_power_failure.md: -------------------------------------------------------------------------------- 1 | # VSX: Member Power Failure 2 | 3 | The intent here is to showcase a complete member failure scenario where the spine-01 switch is completely down. 4 | 5 | The following is a visualization of the powered down spine-01: 6 | 7 | ![](../../../../img/network/management_network/member_power_failure.png) 8 | 9 | The following is expected in this scenario: 10 | 11 | * After disconnecting the power supply from one member the other member should be able to detect the member is down and continue a normal operation without any problems. 12 | * If traffic was originally flowing through the member that was shut down, a small percentage of packets may be dropped. A sub second value is expected during this event. 13 | * When restoring the power, the hashing needs to be recalculated and some packets may be dropped during this event as well. A sub second value is expected during this event. 14 | 15 | [Back to Index](../README.md) 16 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/mclag_split.md: -------------------------------------------------------------------------------- 1 | # VSX: Split 2 | 3 | The intent here is to showcase a complete inter-switch-link (ISL) link failure scenario where both of the ISL links between spine switches goes down. 4 | 5 | The following is a visualization of a disconnected ISL link and how the traffic pattern would look: 6 | 7 | ![](../../../../img/network/management_network/vsx_split.png) 8 | 9 | The following is expected in this scenario: 10 | 11 | * After disconnecting both ISL Links and Keepalive is up and properly configured the VSX Secondary Switch should put all its MCLAGs into lacp-blocked state and traffic should only flow through VSX Primary. 12 | * VSX Primary switch should continue to operate without any problems. 13 | * If traffic was originally flowing through secondary VSX member a small percentage of packets may be dropped when disconnecting the ISL. A sub second value is expected during this event. 14 | * When connecting back ISL link, the hashing needs to be recalculated and some packets may be dropped during this event as well. A sub second value is expected during this event. 15 | 16 | [Back to Index](../README.md) 17 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/network_naming_function.md: -------------------------------------------------------------------------------- 1 | # Network Types – Naming and Segment Function 2 | 3 | The following table provides an overview of the different network services defined inside of the spine and leaf architectures. 4 | 5 | | *********** | Administration: Hardware | Administration: Cloud/Job | Customer: Jobs | Customer: Administration | Storage | 6 | |:---------------------|:---------------------------:|:-------------------------:|:-----------------------:|:---------------------------:|---------------------:| 7 | | Full name | Hardware Management Network | Node Management Network | Customer Access Network | Customer Management Network | Storage User Network | 8 | | Short name / acronym | HMN | NMN | CAN | CMN | SUN | 9 | 10 | [Back to Index](../README.md) 11 | 12 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/network_topologies.md: -------------------------------------------------------------------------------- 1 | # Network Topologies 2 | 3 | The following images are example network topologies for systems of various sizes. 4 | 5 | ### Very Large 6 | 7 | ![](../../../../img/network/management_network/exascale.png) 8 | 9 | ### Large 10 | 11 | ![](../../../../img/network/management_network/large.png) 12 | 13 | ### Medium 14 | 15 | ![](../../../../img/network/management_network/medium.png) 16 | 17 | ### Small 18 | 19 | ![](../../../../img/network/management_network/small.png) -------------------------------------------------------------------------------- /operations/network/management_network/aruba/network_traffic_pattern.md: -------------------------------------------------------------------------------- 1 | # Network Traffic Pattern 2 | 3 | ![Network traffic pattern](../img/network_traffic_pattern.png) 4 | 5 | Internal networks: 6 | 7 | * Node Management Network (NMN) - Provides the internal control plane for systems management and jobs control 8 | * Hardware Management Network (HMN) - Provides internal access to system baseboard management controllers (BMC/iLO) and other lower-level hardware access 9 | 10 | External and Edge networks: 11 | 12 | * Customer Management Network (CMN) - Provides customer access from the site to the system for administrators 13 | * Customer Access Network (CAN) or Customer High Speed Network (CHN) provide: 14 | * Customer access from the site to the system for job control and jobs data movement 15 | * Access from the system to the site for network services like DNS, LDAP, and more 16 | 17 | [Back to index](README.md). 18 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/notice.md: -------------------------------------------------------------------------------- 1 | © 2021 Hewlett Packard Enterprise Development LP 2 | 3 | ## Notices 4 | 5 | The information contained herein is subject to change without notice. The only warranties for Hewlett Packard Enterprise products and services are set forth in the express warranty statements accompanying such products and services. Nothing herein should be construed as constituting an additional warranty. Hewlett Packard Enterprise shall not be liable for technical or editorial errors or omissions contained herein. 6 | 7 | Confidential computer software: Valid license from Hewlett Packard Enterprise required for possession, use, or copying. Consistent with FAR 12.211 and 12.212, Commercial Computer Software, Computer Software Documentation, and Technical Data for Commercial Items are licensed to the U.S. Government under vendor's standard commercial license. 8 | 9 | Links to third-party websites take you outside the Hewlett Packard Enterprise website. Hewlett Packard Enterprise has no control over and is not responsible for information outside the Hewlett Packard Enterprise website. 10 | 11 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/physical_interfaces.md: -------------------------------------------------------------------------------- 1 | # Physical Interfaces 2 | 3 | Configure the physical interfaces for a switch. 4 | 5 | ## Configuration Commands 6 | 7 | Enable the interface: 8 | 9 | ```text 10 | switch(config)# interface IFACE 11 | switch(config-if)# no shutdown 12 | ``` 13 | 14 | Show commands to validate functionality: 15 | 16 | ```text 17 | show interface IFACE [transceiver|brief|dom|extended] 18 | ``` 19 | 20 | ## Expected Results 21 | 22 | 1. The switch recognizes the transceiver without errors 23 | 2. Administrators can enter the interface context for the port and enable it 24 | 3. Administrators can establish a link with a partner 25 | 4. Administrators can pass traffic as expected 26 | 27 | [Back to Index](../README.md) 28 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/redundant_power_supplies.md: -------------------------------------------------------------------------------- 1 | # Redundant Power Supplies 2 | 3 | There are no configuration commands for switch power supply functionality. 4 | 5 | > **`NOTE`** HA will be covered in HA section. 6 | 7 | Show commands to validate functionality: 8 | 9 | ```bash 10 | show environment power-supply 11 | ``` 12 | 13 | ## Expected Results 14 | 15 | 1. Validate the switch recognizes the additional power supplies 16 | 2. Validate system remains powered after removing power from all but one power supply 17 | 3. Validate all power supplies are operational 18 | 19 | ## Example Output 20 | 21 | ```bash 22 | show environment power-supply 23 | Product Serial PSU 24 | Wattage 25 | Mbr/PSU Number Number Status 26 | --------------------------------------------------------- 27 | 1/1 JL372A M031SS004TAPC OK 2701 28 | 1/2 JL372A M031SS004UAPC OK 2430 29 | 1/3 N/A N/A Absent 0 30 | 1/4 N/A N/A Absent 0 31 | ``` 32 | 33 | [Back to Index](../README.md) 34 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/remote_logging.md: -------------------------------------------------------------------------------- 1 | # Remote Logging 2 | 3 | Configure remote logging to view log files from the switch on a remote server. This functionality is enabled by syslog. 4 | 5 | > **`NOTE`** The default facility is three (DAEMON). 6 | 7 | ## Configuration Commands 8 | 9 | Configure logging: 10 | 11 | ```text 12 | switch(config)# logging IP-ADDR 13 | ``` 14 | 15 | ## Expected Results 16 | 17 | 1. Administrators can configure remote logging 18 | 1. Administrators can see the log files from the switch on the remote server 19 | 20 | [Back to Index](../README.md) 21 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/routed_interface.md: -------------------------------------------------------------------------------- 1 | # Routed interfaces 2 | 3 | For platforms 8400 and 83xx: By default, all interfaces are configured as routed interfaces with support for both IPv4 and IPv6. 4 | 5 | For platforms 6400 and 6300: By default, all interfaces are configured as access ports on VLAN 1 6 | 7 | ## Configuration Commands 8 | 9 | Give an interface an IP address: 10 | 11 | ``` 12 | switch(config-if)# address IP-ADDR/ 13 | ``` 14 | 15 | Show commands to validate functionality: 16 | 17 | ``` 18 | show interface IFACE 19 | ``` 20 | 21 | ## Expected Results 22 | 23 | 1. Administrators are able to configure an IP address on the interface 24 | 2. Administrators can configure an IP address on the connected network client 25 | 3. The interface is up, and you can validate the IP address and subnet are correct 26 | 4. Administrators can ping from the switch to the client and from the client to the switch 27 | 28 | [Back to Index](../README.md) 29 | 30 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/scenario-a.md: -------------------------------------------------------------------------------- 1 | # Scenario A: Network Connection via Management Network 2 | 3 | The example here covers outside connections achieved with the management network. 4 | 5 | ### Summary 6 | 7 | * Create a new VRF 8 | * Move interfaces to the new VRF 9 | * Create a new BGP process for the new VRF 10 | * Setup the edge router 11 | * Configure MetalLB 12 | * Verification step for BGP routes 13 | * Configure default route for workers 14 | * Verification of external communication 15 | 16 | ### Topology 17 | 18 | The following is an example topology: 19 | 20 | ![](../../../../img/network/management_network/scenario-a-topology.png) 21 | 22 | [Back to Index](../README.md) 23 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/scenario-b.md: -------------------------------------------------------------------------------- 1 | # Scenario B: Network Connection via High-Speed Network 2 | 3 | This example covers outside connections achieved via highspeed network. 4 | 5 | ### Summary 6 | 7 | * Create a new VRF 8 | * Move interfaces to the new VRF 9 | * Create a new BGP process for the new VRF 10 | * Setup the edge router 11 | * Configure MetalLB 12 | * Verification step for BGP routes 13 | * Configure default route for workers 14 | * Verification of external communication 15 | 16 | ### Topology 17 | 18 | The following is an example topology: 19 | 20 | ![](../../../../img/network/management_network/scenario-b-topology.png) 21 | 22 | [Back to Index](../README.md) 23 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/spine_leaf_architecture2.md: -------------------------------------------------------------------------------- 1 | # Spine-leaf Architecture 2 | 3 | How does a spine-leaf architecture differ from traditional network designs? 4 | Traditionally, data center networks were based on a three-tier model: 5 | 6 | 1. Access switches connect to servers 7 | 1. Leaf or distribution switches provide redundant connections to access switches 8 | 1. Core switches provide fast transport between leaf switches, typically connected in a redundant pair for high availability 9 | 10 | At the most basic level, a spine-leaf architecture collapses one of these tiers, as depicted in these diagrams. 11 | 12 | ![Architecture comparison](../img/architecture_comparison.png) 13 | 14 | Other common differences in spine-leaf topologies include: 15 | 16 | * The removal of Spanning Tree Protocol (STP) where feasible 17 | * A scale-out vs. scale-up of infrastructure 18 | 19 | [Back to index](README.md). 20 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/test_tftp_traffic.md: -------------------------------------------------------------------------------- 1 | # Test TFTP Traffic (Aruba Only) 2 | 3 | TFTP traffic can be tested by attempting to download the ipxe.efi binary. 4 | 5 | Log into the leaf switch and try to download the iPXE binary. 6 | 7 | This requires that the leaf switch can talk to the TFTP server "10.92.100.60". 8 | 9 | ```text 10 | start-shell 11 | sw-leaf-001:~$ sudo su 12 | sw-leaf-001:/home/tftp 10.92.100.60 13 | tftp> get ipxe.efi 14 | Received 1007200 bytes in 2.2 seconds 15 | tftp> get ipxe.efi 16 | Received 1007200 bytes in 2.2 seconds 17 | tftp> get ipxe.efi 18 | Received 1007200 bytes in 2.2 seconds 19 | ``` 20 | 21 | The ipxe.efi binary is downloaded three times in a row in this example. 22 | 23 | [Back to Index](../README.md) 24 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/typical_edge_port_configuration.md: -------------------------------------------------------------------------------- 1 | # Typical Edge Port Configuration 2 | 3 | The following is a very basic configuration for devices that are single homed to the network. For instance, network ILO cards, BMCs, PDUs, and so on. 4 | 5 | 6 | 7 | 19 | 20 | 21 | 33 | 34 |
8 |
 9 | Leaf-01
10 | interface 1/1/47
11 |     no shutdown
12 |     mtu 9198
13 |     description HMN
14 |     no routing
15 |     vlan access 4
16 |     spanning-tree bpdu-guard
17 |     spanning-tree port-type admin-edge
18 | 
22 |
23 | Leaf-02
24 | interface 1/1/47
25 |     no shutdown
26 |     mtu 9198
27 |     description BMC
28 |     no routing
29 |     vlan access 4
30 |     spanning-tree bpdu-guard
31 |     spanning-tree port-type admin-edge
32 | 
35 | 36 | [Back to Index](../README.md) 37 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/verify-switches_are_forwarding_dhcp_traffic.md: -------------------------------------------------------------------------------- 1 | # Verify the Switches are Forwarding DHCP Traffic 2 | 3 | If this point is reached and PXE booting is still not possible, it is likely the IP-Helper is broken on the switch. 4 | 5 | [Back to index](README.md). 6 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/verify_bgp.md: -------------------------------------------------------------------------------- 1 | # Verify BGP 2 | 3 | Verify the BGP neighbors are in the established state on BOTH the switches. 4 | 5 | ## Procedure 6 | 7 | 1. Check Aruba BGP status. 8 | 9 | ```bash 10 | show bgp ipv4 u s 11 | ``` 12 | 13 | Example output: 14 | 15 | ``` 16 | VRF : default 17 | BGP Summary 18 | ----------- 19 | Local AS : 65533 BGP Router Identifier : 10.252.0.3 20 | Peers : 4 Log Neighbor Changes : No 21 | Cfg. Hold Time : 180 Cfg. Keep Alive : 60 22 | Confederation Id : 0 23 | 24 | Neighbor Remote-AS MsgRcvd MsgSent Up/Down Time State AdminStatus 25 | 10.252.0.2 65533 45052 45044 02m:02w:02d Established Up 26 | 10.252.1.7 65533 78389 90090 02m:02w:02d Established Up 27 | 10.252.1.8 65533 78384 90059 02m:02w:02d Established Up 28 | 10.252.1.9 65533 78389 90108 02m:02w:02d Established Up 29 | ``` 30 | 31 | [Back to Index](../README.md) 32 | -------------------------------------------------------------------------------- /operations/network/management_network/aruba/verify_route_to_tftp.md: -------------------------------------------------------------------------------- 1 | # Verify Route to TFTP 2 | 3 | On **BOTH** Aruba switches, a single route to the TFTP server 10.92.100.60 is needed. The configuration may differ on the system in use. 4 | 5 | This is needed because there are issues with Aruba ECMP hashing and TFTP traffic. 6 | 7 | ```bash 8 | show ip route 10.92.100.60 9 | ``` 10 | 11 | Example output: 12 | 13 | ``` 14 | Displaying ipv4 routes selected for forwarding 15 | 16 | '[x/y]' denotes [distance/metric] 17 | 18 | 10.92.100.60/32, vrf default, tag 0 19 | via 10.252.1.9, [70/0], bgp 20 | ``` 21 | 22 | This route can be a static route or a BGP route that is pinned to a single worker. The 1.4.2 patch introduced the BGP pinned route. 23 | 24 | Verify that you can ping the next hop of this route. For example, in the example above we would ping 10.252.1.9. If this is not reachable, this is the problem. 25 | 26 | [Back to Index](../README.md) 27 | -------------------------------------------------------------------------------- /operations/network/management_network/canu/README.md: -------------------------------------------------------------------------------- 1 | # CSM Automatic Network Utility 2 | 3 | CSM Automatic Network Utility (CANU) is a tool used to generate/validate/test the Shasta management network. 4 | 5 | * [Introduction to CANU](introduction_to_canu.md) 6 | * [Official Documentation](https://github.com/Cray-HPE/canu) 7 | * [Quick Start Guide](quick_start_guide_to_canu.md) 8 | * [Install CANU](canu_installation.md) 9 | * [Update CANU From CSM Tarball](update_canu_from_csm_tarball.md) 10 | * [Initialize CANU](initializing_canu.md) 11 | * [Verify, generate, or compare switch configurations](canu_verify_generate_compare_switch_configuration.md) 12 | * [Generate full network configuration](using_canu_to_generate_full_network_config.md) 13 | * [Uninstall CANU](uninstall_canu.md) 14 | * [CANU Validation Error](canu_validation_error.md) 15 | * [Using CANU Custom Configs](custom_config.md) 16 | -------------------------------------------------------------------------------- /operations/network/management_network/canu/canu_installation.md: -------------------------------------------------------------------------------- 1 | # CANU Installation 2 | 3 | ## Prerequisites 4 | 5 | In order to run CANU, both `python3` and `pip3` must be installed. 6 | 7 | ## Installation 8 | 9 | 1. Install `pip3`, if it is not already installed. 10 | 11 | ```ShellSession 12 | pip3 install --editable 13 | ``` 14 | 15 | 1. Install the development build of CANU. 16 | 17 | ```ShellSession 18 | python3 setup.py develop --user 19 | ``` 20 | 21 | [Back to Index](README.md) 22 | -------------------------------------------------------------------------------- /operations/network/management_network/canu/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /operations/network/management_network/canu/introduction_to_canu.md: -------------------------------------------------------------------------------- 1 | # Introduction to CANU 2 | 3 | The CSM Automatic Network Utility (CANU) guides administrators through the installation of new Shasta networks. CANU helps ensure that 4 | the installation follows best practices and helps administrators set up a supported configuration. 5 | 6 | The following are some of the tasks that CANU can perform: 7 | 8 | * Check if the management switches on a Shasta network meet the firmware version requirements 9 | * Check the cabling status of the management switches on a Shasta network using LLDP. 10 | * Use a CANU-generated configuration to compare an existing network configuration against the best practice configuration. 11 | 12 | CANU reads switch version information from the `canu.yaml` file in the root directory. 13 | 14 | Additional information can be found in the [CANU documentation](https://github.com/Cray-HPE/canu). 15 | 16 | If doing a CSM install or upgrade, a CANU RPM is located in the release tarball. For more information, see this procedure: [Update CANU From CSM Tarball](update_canu_from_csm_tarball.md) 17 | 18 | [Back to Index](README.md) 19 | -------------------------------------------------------------------------------- /operations/network/management_network/canu/uninstall_canu.md: -------------------------------------------------------------------------------- 1 | # Uninstall CANU 2 | 3 | Uninstalling CANU can be achieved by: 4 | 5 | ```ShellSession 6 | pip3 uninstall canu 7 | ``` 8 | -------------------------------------------------------------------------------- /operations/network/management_network/canu/update_canu_from_csm_tarball.md: -------------------------------------------------------------------------------- 1 | # Update CANU From CSM Release Tarball 2 | 3 | If doing a CSM install or upgrade, the release tarball contains a CANU RPM. It can be extracted and installed using the following steps. 4 | 5 | ## Procedure 6 | 7 | 1. Display the current CANU version. 8 | 9 | ```bash 10 | canu --version 11 | ``` 12 | 13 | 1. Set the `TARBALL` variable to the path and filename of the CSM release tarball: 14 | 15 | ```bash 16 | TARBALL=/your/path/here/csm-version.tar.gz 17 | ``` 18 | 19 | 1. Extract the CANU RPM from the tarball: 20 | 21 | ```bash 22 | tar -xzvf "$TARBALL" --wildcards "*/canu*.rpm" 23 | ``` 24 | 25 | Output should look similar to the following: 26 | 27 | ```text 28 | csm-1.2.0-beta.81/rpm/cray/csm/sle-15sp2/x86_64/canu-1.2.1-1.x86_64.rpm 29 | ``` 30 | 31 | 1. Note the path to the RPM from the output of the previous command, and install it: 32 | 33 | ```bash 34 | rpm -Uvh 35 | ``` 36 | 37 | 1. Display the new CANU version. 38 | 39 | ```bash 40 | canu --version 41 | ``` 42 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/acl.md: -------------------------------------------------------------------------------- 1 | # Configure Access Control Links (ACLs) 2 | 3 | ACLs are used to help improve network performance and restrict network usage by creating policies to eliminate 4 | unwanted IP traffic by filtering packets where they enter the switch on layer 2 and layer 3 interfaces. 5 | An ACL is an ordered list of one or more access control list entries (ACEs) prioritized by sequence number. 6 | An incoming packet is matched sequentially against each entry in an ACL. 7 | 8 | ## Configuration Commands 9 | 10 | Create an ACL: 11 | 12 | ```text 13 | ip access-list name 14 | permit ip 1.1.1.0/24 any 15 | ``` 16 | 17 | Show commands to validate functionality: 18 | 19 | ```text 20 | show ip access-list name 21 | ``` 22 | 23 | [Back to Index](../README.md) 24 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/arp.md: -------------------------------------------------------------------------------- 1 | # Configure Address Resolution Protocol (ARP) 2 | 3 | ARP is commonly used for mapping IPv4 addresses to MAC addresses. 4 | 5 | ## Configuration Commands 6 | 7 | Configure static ARP on an interface: 8 | 9 | ```text 10 | ip arp ipv4 IP-ADDR mac MAC-ADDR 11 | ``` 12 | 13 | Show commands to validate functionality: 14 | 15 | ```text 16 | show ip arp 17 | ``` 18 | 19 | ## Expected Results 20 | 21 | 1. Administrators are able to ping the connected device 22 | 2. Administrators can view the ARP entries 23 | 24 | [Back to Index](../README.md) 25 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/backup.md: -------------------------------------------------------------------------------- 1 | # Back Up a Switch Configuration 2 | 3 | The following command copies the running configuration or the startup configuration to a remote location as a file. 4 | 5 | ```console 6 | copy running-configuration {config://filepath | home://filepath | 7 | ftp://userid:passwd@hostip/filepath | scp://userid:passwd@hostip/filepath | 8 | sftp://userid:passwd@hostip/filepath | tftp://hostip/filepath} 9 | ``` 10 | 11 | [Back to Index](../README.md) 12 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/dns-client.md: -------------------------------------------------------------------------------- 1 | # Configure Domain Name System (DNS) Client 2 | 3 | The Domain Name System (DNS) translates domain and host names to and from IP addresses. 4 | A DNS client resolves hostnames to IP addresses by querying assigned DNS servers for the appropriate IP address. 5 | 6 | ## Configuration Commands 7 | 8 | Enter a domain name in CONFIGURATION mode (up to 64 alphanumeric characters): 9 | 10 | ```text 11 | ip domain-name NAME 12 | ``` 13 | 14 | Add names to complete unqualified host names in CONFIGURATION mode: 15 | 16 | ```text 17 | ip domain-list NAME 18 | ``` 19 | 20 | ## Expected Results 21 | 22 | 1. Administrators can configure the DNS client 23 | 2. The output is correct 24 | 3. Administrators can ping the device 25 | [Back to Index](../README.md) 26 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/domain_name.md: -------------------------------------------------------------------------------- 1 | # Configure Domain Name 2 | 3 | A domain name is a name to identify the person, group, or organization that controls the devices within an area. 4 | An example of a domain name could be `us.cray.com`. 5 | 6 | ## Configuration Commands 7 | 8 | Create a domain name: 9 | 10 | ```text 11 | domain-name NAME 12 | ``` 13 | 14 | Show commands to validate functionality: 15 | 16 | ```text 17 | show domain-name 18 | ``` 19 | 20 | ## Expected Results 21 | 22 | 1. Administrators can configure the domain name 23 | 2. The output of all `show` commands is correct 24 | 25 | [Back to Index](../README.md) 26 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/hostname.md: -------------------------------------------------------------------------------- 1 | # Configure Hostnames 2 | 3 | A hostname is a human-friendly name used to identify a device. An example of a hostname could be the name `Test`. 4 | 5 | ## Configuration Commands 6 | 7 | Create a hostname: 8 | 9 | ```text 10 | hostname NAME 11 | ``` 12 | 13 | Show commands to validate functionality: 14 | 15 | ```text 16 | show hostname 17 | ``` 18 | 19 | ## Example Output 20 | 21 | ```text 22 | hostname switch-test 23 | show hostname 24 | 25 | ``` 26 | 27 | ## Expected Results 28 | 29 | 1. Administrators can configure the hostname 30 | 2. The output of all `show` commands is correct 31 | 32 | [Back to Index](../README.md) 33 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/igmp.md: -------------------------------------------------------------------------------- 1 | # Configure Internet Group Multicast Protocol (IGMP) 2 | 3 | The Internet Group Multicast Protocol (IGMP) is a communications protocol used by hosts and adjacent routers on IP networks to establish multicast group memberships. 4 | The host joins a multicast-group by sending a join request message towards the network router, and responds to queries sent from the network router by dispatching a join report. 5 | 6 | ## Configuration Command 7 | 8 | ```console 9 | ip igmp snooping enable 10 | ``` 11 | 12 | ## Expected Results 13 | 14 | `show ip igmp-snooping vlan 1` should show IGMP enabled on the VLAN, but no IGMP Querier set 15 | 16 | [Back to Index](../README.md) 17 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /operations/network/management_network/dell/lag.md: -------------------------------------------------------------------------------- 1 | # Configure Link Aggregation Group (LAG) 2 | 3 | Link aggregation allows administrators to assign multiple physical links to one logical link that 4 | functions as a single, higher-speed link providing dramatically increased bandwidth. 5 | 6 | ## Configuration Commands 7 | 8 | Create and configure the LAG interface: 9 | 10 | ```text 11 | interface port-channel 10 12 | no shutdown 13 | ``` 14 | 15 | Associate member links with the LAG interface: 16 | 17 | interface IFACE` 18 | 19 | ```text 20 | interface ethernet 1/1/1 21 | channel-group 10 22 | ``` 23 | 24 | To enable LACP on the LAG: 25 | 26 | ```text 27 | interface ethernet 1/1/1 28 | switch(conf-if-eth1/1/1)#channel-group 10 mode active 29 | ``` 30 | 31 | Show commands to validate functionality: 32 | 33 | ```text 34 | show interface port-channel 35 | ``` 36 | 37 | ## Expected Results 38 | 39 | 1. Administrators can create and configure a LAG 40 | 2. Administrators can add ports to a LAG 41 | 3. Administrators can configure a LAG interface 42 | 43 | [Back to Index](../README.md) 44 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/locator_led.md: -------------------------------------------------------------------------------- 1 | # Configure Locator LED 2 | 3 | The Locator LED is an LED in the front of the chassis that can turn on or flash. 4 | This is a useful feature when guiding someone to the switch during a "remote hands" situation, 5 | such as asking an engineer to run a cable to the switch. 6 | 7 | ## Configuration Commands 8 | 9 | Enable LED: 10 | 11 | ```text 12 | location-led system 1 on 13 | ``` 14 | 15 | Disable LED: 16 | 17 | ```text 18 | location-led system 1 off 19 | ``` 20 | 21 | ## Expected Results 22 | 23 | 1. The Locator LED is in the off state 24 | 2. The Locator LED is now flashing 25 | 26 | [Back to Index](../README.md) 27 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/loopback.md: -------------------------------------------------------------------------------- 1 | # Configure Loopback Interface 2 | 3 | Loopbacks can be thought of as internal virtual interfaces. Loopback interfaces are not bound to a physical port 4 | and are used for device management and routing protocols. 5 | 6 | ## Configuration Commands 7 | 8 | ```text 9 | interface loopback LOOPBACK 10 | ip address IP-ADDR/ 11 | ``` 12 | 13 | ## Expected Results 14 | 15 | 1. Create a loopback interface. 16 | 1. Give a loopback interface an IP address. 17 | 1. Validate the configuration using the `show` commands. 18 | 19 | [Back to Index](../README.md) 20 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/mstp.md: -------------------------------------------------------------------------------- 1 | # Configure Multiple Spanning Tree Protocol (MSTP) 2 | 3 | MSTP (802.1s) ensures that only one active path exists between any two nodes in a spanning-tree instance. 4 | A spanning-tree instance comprises a unique set of VLANs. MSTP instances significantly improve network 5 | resource utilization while maintaining a loop-free environment. 6 | 7 | ## Configuration commands 8 | 9 | (`sw#`) Enable MSTP (default mode for spanning-tree): 10 | 11 | ```text 12 | spanning-tree mode mst 13 | name my-mstp-region 14 | revision 0 15 | ``` 16 | 17 | (`sw#`) Show commands to validate functionality: 18 | 19 | ```text 20 | show spanning-tree mst 21 | ``` 22 | 23 | ## Expected results 24 | 25 | 1. Spanning-tree mode is configured 26 | 2. Spanning-tree is enabled, if loops are detected ports should go blocked state 27 | 3. Spanning-tree splits traffic domain between two DUTs 28 | 29 | [Back to Index](../README.md) 30 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/physical_interfaces.md: -------------------------------------------------------------------------------- 1 | # Configure Physical Interfaces 2 | 3 | Ethernet port interfaces are enabled by default. 4 | 5 | ## Configuration Commands 6 | 7 | Enable the interface: 8 | 9 | ```text 10 | interface ethernet 1/1/1 11 | no shutdown 12 | ``` 13 | 14 | Disable the interface: 15 | 16 | ```text 17 | interface ethernet 1/1/1 18 | shutdown 19 | ``` 20 | 21 | Show commands to validate functionality: 22 | 23 | ```text 24 | show configuration 25 | ``` 26 | 27 | ## Expected Results 28 | 29 | 1. The switch recognizes the transceiver without errors 30 | 2. Administrators can enter the interface context for the port and enable it 31 | 3. Administrators can establish a link with a partner 32 | 4. Administrators can pass traffic as expected 33 | 34 | [Back to Index](../README.md) 35 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/qos.md: -------------------------------------------------------------------------------- 1 | # Configure QoS 2 | 3 | Network traffic is processed based on classification and policies that are created and applied to the traffic. 4 | 5 | QoS trust is by default disabled. 6 | 7 | ## Configuration Commands 8 | 9 | Create a `dot1p` trust map: 10 | 11 | ```text 12 | trust dot1p-map dot1p-trust-map 13 | switch(config-tmap-dot1p-map)# 14 | ``` 15 | 16 | Define the set of values to match the class: 17 | 18 | ```text 19 | qos-group 3 dot1p 0-4 20 | qos-group 5 dot1p 5-7 21 | ``` 22 | 23 | Apply the map on a specific interface or on global level: 24 | 25 | ```text 26 | trust-map dot1p dot1p-trust-map 27 | trust-map dot1p dot1p-trust-map 28 | ``` 29 | 30 | [Back to Index](../README.md) 31 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/remote_logging.md: -------------------------------------------------------------------------------- 1 | # Configure Remote Logging 2 | 3 | Configure remote logging to view log files from the switch on a remote server. This functionality is enabled by syslog. 4 | 5 | ## Configuration Commands 6 | 7 | Configure logging: 8 | 9 | ```text 10 | logging server dell.com severity log-info 11 | ``` 12 | 13 | ## Expected Results 14 | 15 | 1. Administrators can configure remote logging 16 | 2. Administrators can see the log files from the switch on the remote server 17 | 18 | [Back to Index](../README.md) 19 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/reset.md: -------------------------------------------------------------------------------- 1 | # Reset Dell Switch Configuration 2 | 3 | How to reset Dell switch configuration: 4 | 5 | ```text 6 | delete startup-config 7 | ``` 8 | 9 | [Back to Index](../README.md) 10 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/ssh.md: -------------------------------------------------------------------------------- 1 | 2 | # Configure Secure Shell (SSH) 3 | 4 | SSH server enables an SSH client to make a secure and encrypted connection to a switch. 5 | Currently, switches support SSH version 2.0 only. 6 | The user authentication mechanisms supported for SSH are public key authentication and password authentication (RADIUS, TACACS+, or locally stored password). 7 | Secure File Transfer Protocol (SFTP) provides file transfer. 8 | SSH Server and `sftp-client` via the `copy` command are supported for managing the router. 9 | 10 | ## Configuration Commands 11 | 12 | The SSH server is enabled by default. 13 | 14 | To disable the SSH server: 15 | 16 | ```text 17 | no ip ssh server enable. 18 | ``` 19 | 20 | ## Expected Results 21 | 22 | 1. Administrators can create the user account 23 | 2. Administrators can generate working SSH keys 24 | 3. The output of the `show` commands is correct 25 | 4. Administrators can successfully connect to the switch via an SSH client using SSH 2.0 26 | 27 | [Back to Index](../README.md) 28 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/system_images.md: -------------------------------------------------------------------------------- 1 | # Configure System Images 2 | 3 | Dell switches support active and standby images. 4 | 5 | ## Configuration Commands 6 | 7 | Copy an image from a local server: 8 | 9 | ```text 10 | image download ftp://admin@1.1.1.1:/image.bin 11 | ``` 12 | 13 | Install image: 14 | 15 | ```text 16 | image install file-url 17 | ``` 18 | 19 | Show commands to validate functionality: 20 | 21 | ```text 22 | show boot detail 23 | ``` 24 | 25 | ## Expected Results 26 | 27 | 1. Administrators can upload an image to the switch 28 | 2. Administrators can boot into the uploaded image 29 | 3. Administrators can see they are running the uploaded image 30 | 31 | [Back to Index](../README.md) 32 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/upgrade.md: -------------------------------------------------------------------------------- 1 | # Perform an Upgrade on Dell Switches 2 | 3 | How to perform an upgrade on the Dell switches. 4 | 5 | ## Configuration Commands 6 | 7 | Download the new software image: 8 | 9 | ```text 10 | image download file-url 11 | ``` 12 | 13 | View the current software download status: 14 | 15 | ```text 16 | show image status 17 | ``` 18 | 19 | Install the software image: 20 | 21 | ```text 22 | image install image-url 23 | ``` 24 | 25 | View the status of the current software install: 26 | 27 | ```text 28 | show image status 29 | ``` 30 | 31 | Change the next boot partition to the standby partition: 32 | 33 | ```text 34 | boot system standby 35 | ``` 36 | 37 | Reload the new software image: 38 | 39 | ```text 40 | reload 41 | ``` 42 | 43 | [Back to Index](../README.md) 44 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/vlan.md: -------------------------------------------------------------------------------- 1 | # Configure Virtual Local Access Networks (VLANs) 2 | 3 | VLANs allow for the logical grouping of switch interfaces, enabling communication as if all connected devices were on the same isolated network. 4 | 5 | ## Configuration Commands 6 | 7 | Create VLAN: 8 | 9 | ```text 10 | interface vlan 11 | ``` 12 | 13 | Show commands to validate functionality: 14 | 15 | ```text 16 | show vlan [VLAN] 17 | ``` 18 | 19 | ## Expected Results 20 | 21 | 1. Administrators can create a VLAN 22 | 2. Administrators can assign a VLAN to the physical interface 23 | 24 | [Back to Index](../README.md) 25 | -------------------------------------------------------------------------------- /operations/network/management_network/dell/vlan_interface.md: -------------------------------------------------------------------------------- 1 | # Configure VLAN Interface 2 | 3 | The switch also supports classic L3 VLAN interfaces. 4 | 5 | ## Configuration Commands 6 | 7 | Configure the VLAN: 8 | 9 | ```text 10 | vlan VLAN 11 | ``` 12 | 13 | The default mode of any VLAN is L2 only. To enable L3 functionality, run `no shutdown` on the VLAN: 14 | 15 | ```text 16 | interface vlan 2 17 | no shutdown 18 | ``` 19 | 20 | Show commands to validate functionality: 21 | 22 | ```text 23 | show interface vlan 24 | ``` 25 | 26 | ## Expected Results 27 | 28 | 1. Administrators can configure the VLAN 29 | 2. Administrators can enable the interface and associate it with the VLAN 30 | 3. Administrators can create an IP-enabled VLAN interface, and it is up 31 | 4. Administrators validate the configuration is correct 32 | 5. Administrators can ping from the switch to the client and from the client to the switch 33 | 34 | [Back to Index](../README.md) 35 | -------------------------------------------------------------------------------- /operations/network/management_network/img/architecture_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/architecture_comparison.png -------------------------------------------------------------------------------- /operations/network/management_network/img/aruba_arista.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/aruba_arista.png -------------------------------------------------------------------------------- /operations/network/management_network/img/cmn_plus_can.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/cmn_plus_can.png -------------------------------------------------------------------------------- /operations/network/management_network/img/cmn_plus_chn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/cmn_plus_chn.png -------------------------------------------------------------------------------- /operations/network/management_network/img/customer_access_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/customer_access_overview.png -------------------------------------------------------------------------------- /operations/network/management_network/img/exascale.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/exascale.png -------------------------------------------------------------------------------- /operations/network/management_network/img/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/intro.png -------------------------------------------------------------------------------- /operations/network/management_network/img/large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/large.png -------------------------------------------------------------------------------- /operations/network/management_network/img/mclag_link_ha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/mclag_link_ha.png -------------------------------------------------------------------------------- /operations/network/management_network/img/medium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/medium.png -------------------------------------------------------------------------------- /operations/network/management_network/img/member_power_failure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/member_power_failure.png -------------------------------------------------------------------------------- /operations/network/management_network/img/network_traffic_pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/network_traffic_pattern.png -------------------------------------------------------------------------------- /operations/network/management_network/img/scenario-a-topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/scenario-a-topology.png -------------------------------------------------------------------------------- /operations/network/management_network/img/scenario-b-topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/scenario-b-topology.png -------------------------------------------------------------------------------- /operations/network/management_network/img/shcd_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/shcd_example.png -------------------------------------------------------------------------------- /operations/network/management_network/img/small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/small.png -------------------------------------------------------------------------------- /operations/network/management_network/img/tds_can_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/tds_can_overview.png -------------------------------------------------------------------------------- /operations/network/management_network/img/vsx_isl_ha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/vsx_isl_ha.png -------------------------------------------------------------------------------- /operations/network/management_network/img/vsx_split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/operations/network/management_network/img/vsx_split.png -------------------------------------------------------------------------------- /operations/network/management_network/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/arp.md: -------------------------------------------------------------------------------- 1 | # Address resolution protocol (ARP) 2 | 3 | ARP is commonly used for mapping IPv4 addresses to MAC addresses. Static ARP addresses only supported in management interfaces; 4 | 5 | Relevant Configuration 6 | 7 | Configure static ARP on an interface 8 | 9 | ``` 10 | Switch (config) # interface mgmt0 11 | switch(config interface mgmt0)# arp ipv4 IP-ADDR mac MAC-ADDR 12 | ``` 13 | 14 | Show Commands to Validate Functionality 15 | 16 | ``` 17 | show ip arp 18 | ``` 19 | 20 | Expected Results 21 | 22 | * Step 1: You are able to ping the connected device 23 | * Step 2: You can view the ARP entries 24 | 25 | [Back to Index](../README.md) 26 | 27 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/backup.md: -------------------------------------------------------------------------------- 1 | # Backing up switch configuration 2 | 3 | Backing up current configuration of the switch in text format 4 | Example 5 | 6 | To create a new text-based configuration file, complete the following steps: 7 | 8 | Log in to the switch as Admin. 9 | 10 | Type the following command: 11 | 12 | ``` 13 | switch (config) # configuration text generate active running save my-filename 14 | ``` 15 | 16 | To upload a text-based configuration file from a switch to an external file server, complete the following steps: 17 | 18 | ``` 19 | switch (config) # configuration text file my-filename upload 20 | scp://root@my-server/root/tmp/my-filename 21 | ``` 22 | 23 | [Back to Index](../README.md) 24 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/check_dhcp_lease_is_getting_allocated.md: -------------------------------------------------------------------------------- 1 | # Check DHCP lease is getting allocated 2 | 3 | * Check the KEA logs and verify that the lease is getting allocated. 4 | 5 | ``` 6 | kubectl logs -n services pod/$(kubectl get -n services pods | grep kea | head -n1 | cut -f 1 -d ' ') -c cray-dhcp-kea 7 | ``` 8 | 9 | 2021-04-21 00:13:05.416 INFO [kea-dhcp4.leases/24.139710796402304] DHCP4_LEASE_ ***ALLOC*** [hwtype=1 02:23:28:01:30:10], cid=[00:78:39:30:30:30:63:31:73:30:62:31], tid=0x21f2433a: lease 10.104.0.23 has been allocated for 300 seconds 10 | 11 | * Here we can see that KEA is allocating a lease to 10.104.0.23. 12 | * The lease MUST say DHCP4_LEASE_ALLOC, if it says DHCP4_LEASE_ADVERT, there is likely a problem. Restarting KEA will fix this issue most of the time. 13 | 14 | 2021-06-21 16:44:31.124 INFO [kea-dhcp4.leases/18.139837089017472] DHCP4_LEASE_ ***ADVERT*** [hwtype=1 14:02:ec:d9:79:88], cid=[no info], tid=0xe87fad10: lease 10.252.1.16 will be advertised 15 | 16 | [Back to Index](../README.md) 17 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/compute_uan_application_nodes.md: -------------------------------------------------------------------------------- 1 | # Computes/UANs/Application Nodes 2 | 3 | If the Computes make it past PXE and go into the PXE shell you can verify DNS and connectivity. 4 | 5 | ``` 6 | iPXE> dhcp 7 | Configuring (net0 98:03:9b:a8:60:88).................. No configuration methods succeeded (http://ipxe.org/040ee186) 8 | Configuring (net1 b4:2e:99:be:1a:37)...... ok 9 | ``` 10 | 11 | ``` 12 | iPXE> show dns 13 | net1.dhcp/dns:ipv4 = 10.92.100.225 14 | ``` 15 | 16 | ``` 17 | iPXE> nslookup address api-gw-service-nmn.local 18 | iPXE> echo ${address} 19 | 10.92.100.71 20 | ``` 21 | 22 | [Back to Index](../README.md) 23 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/dns-client.md: -------------------------------------------------------------------------------- 1 | # Domain name system (DNS) client 2 | 3 | The Domain Name System (DNS) translates domain and host names to and from IP addresses. A DNS client resolves hostnames to IP addresses by querying assigned DNS servers for the appropriate IP address. 4 | 5 | Relevant Configuration 6 | 7 | Configure the switch to resolve queries via a DNS server 8 | 9 | ``` 10 | switch(config)# ip name-server 11 | ``` 12 | 13 | Configure a domain name 14 | 15 | ``` 16 | switch(config)# ip domain-list mydomain2.com 17 | ``` 18 | 19 | Show Commands to Validate Functionality 20 | 21 | ``` 22 | show hosts 23 | ``` 24 | 25 | Expected Results 26 | 27 | * Step 1: You can configure the DNS client 28 | * Step 2: The output is correct 29 | * Step 3: You can ping the device 30 | 31 | [Back to Index](../README.md) 32 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/domain_name.md: -------------------------------------------------------------------------------- 1 | # Domain name 2 | 3 | A domain name is a name to identify the person, group, or organization that controls the devices within an area. An example of a domain name could be us.cray.com 4 | 5 | Relevant Configuration 6 | 7 | Creating a domain name 8 | 9 | ``` 10 | switch(config)# ip map-hostname 11 | ``` 12 | 13 | Show Commands to Validate Functionality 14 | 15 | ``` 16 | show hosts 17 | ``` 18 | 19 | Expected Results 20 | 21 | * Step 1: You can configure the domain name 22 | * Step 2: The output of all show commands is correct 23 | 24 | [Back to Index](../README.md) 25 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/exec_banner.md: -------------------------------------------------------------------------------- 1 | # Exec banners 2 | 3 | Banners are custom messages displayed to users attempting to connect to the management interfaces. MOTD banners are displayed pre-login while exec banners are displayed post-login. Multiple lines of text can be stored using a custom delimiter to mark the end of message. 4 | 5 | Relevant Configuration 6 | 7 | Create a banner. 8 | 9 | ``` 10 | switch(config)# banner motd Testing 11 | ``` 12 | 13 | Show Commands to Validate Functionality. 14 | 15 | ``` 16 | show banner 17 | ``` 18 | 19 | Example Output 20 | 21 | ``` 22 | ufmapl [ mgmt-sa ] (config) # show banner 23 | Banners: 24 | MOTD: 25 | Mellanox UFM Appliance 26 | 27 | Login: 28 | Mellanox MLNX-OS UFM Appliance Management 29 | ``` 30 | 31 | Expected Results: 32 | 33 | * Step 1: You can create the banner 34 | * Step 2: The output of the banner looks correct 35 | 36 | [Back to Index](../README.md) 37 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/hostname.md: -------------------------------------------------------------------------------- 1 | # Hostname 2 | 3 | A hostname is a human-friendly name used to identify a device. An example of a hostname could be the name "Test." 4 | 5 | Relevant Configuration 6 | 7 | Creating a hostname 8 | 9 | ``` 10 | switch(config)# hostname 11 | ``` 12 | 13 | Show Commands to Validate Functionality 14 | 15 | ``` 16 | show hosts 17 | ``` 18 | 19 | Expected Results 20 | 21 | * Step 1: You can configure the hostname 22 | * Step 2: The output of all show commands is correct 23 | 24 | [Back to Index](../README.md) 25 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/igmp.md: -------------------------------------------------------------------------------- 1 | # IGMP 2 | 3 | The Internet Group Multicast Protocol (IGMP) is a communications protocol used by hosts and adjacent routers on IP networks to establish multicast group memberships. The host joins a multicast-group by sending a join request message towards the network router, and responds to queries sent from the network router by dispatching a join report. 4 | 5 | Relevant Configuration 6 | 7 | Enable IGMP snooping globally. Run: 8 | 9 | ``` 10 | switch (config) # ip igmp snooping 11 | ``` 12 | 13 | Enable IGMP snooping on a VLAN. Run: 14 | 15 | ``` 16 | switch (config) # vlan 2 17 | switch (config vlan 2) # ip igmp snooping 18 | ``` 19 | 20 | (Optional) Verify the IGMP snooping querier configuration. Run: 21 | 22 | ``` 23 | switch (config vlan 10)# show ip igmp snooping querier 24 | ``` 25 | 26 | [Back to Index](../README.md) 27 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/large.md: -------------------------------------------------------------------------------- 1 | # Large 2 | 3 | ![Large network](../img/large.png) 4 | 5 | [Back to index](README.md). 6 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/lldp.md: -------------------------------------------------------------------------------- 1 | # Link layer discovery protocol (LLDP) 2 | 3 | LLDP is used to advertise the device's identity and abilities and read other devices connected to the same network. Note: LLDP is enabled by default. 4 | 5 | Relevant Configuration 6 | 7 | Enable lldp 8 | 9 | ``` 10 | switch(config)# lldp 11 | ``` 12 | 13 | Enable lldp on interface 14 | 15 | ``` 16 | switch (config interface ethernet 1/1) # lldp receive 17 | switch (config interface ethernet 1/1) # lldp transmit 18 | ``` 19 | 20 | Show Commands to Validate Functionality 21 | 22 | ``` 23 | show lldp local 24 | ``` 25 | 26 | Expected Results 27 | 28 | * Step 1: Link status between the peer devices is UP 29 | * Step 2: LLDP is enabled 30 | * Step 3: Local device LLDP Information is displayed 31 | * Step 4: Remote device LLDP information is displayed 32 | * Step 5: LLDP statistics are displayed 33 | 34 | [Back to Index](../README.md) 35 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/loopback.md: -------------------------------------------------------------------------------- 1 | # Loopback interface 2 | 3 | You can think of loopbacks as internal virtual interfaces. Loopback interfaces are not bound to a physical port and are used for device management and routing protocols. 4 | 5 | Relevant Configuration 6 | 7 | Create a loopback interface. Run: 8 | 9 | ``` 10 | switch (config)# interface loopback 2 11 | switch (config interface loopback 2)# 12 | ``` 13 | 14 | Configure an IP address on the loopback interface. Run: 15 | 16 | ``` 17 | switch (config interface loopback 2)# ip address 20.20.20.20 /32 18 | ``` 19 | 20 | Show Commands to Validate Functionality 21 | 22 | ``` 23 | show interfaces loopback 2 24 | ``` 25 | 26 | Expected Results 27 | 28 | * Step 1: You can create a loopback interface 29 | * Step 2: You can give a loopback interface an IP address 30 | * Step 3: You can validate the configuration using the show commands. 31 | 32 | [Back to Index](../README.md) 33 | 34 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/management_interface.md: -------------------------------------------------------------------------------- 1 | # Management interface 2 | 3 | The management interface can be used to gain remote management access to the switch. The management interface is accessible using the `mgmt` VRF and is separate from the data plane 4 | interfaces, which are in the `default` VRF. Mellanox switches support out-of-band (OOB) dedicated interfaces (e.g. `mgmt0`, `mgmt1`) and in-band dedicated interfaces. 5 | 6 | (`sw#`) Enter configuration mode. 7 | 8 | ```console 9 | enable 10 | configure terminal 11 | ``` 12 | 13 | (`sw#`) Disable setting IP addresses using the DHCP using the following command in configuration mode: 14 | 15 | ```console 16 | no interface mgmt0 dhcp 17 | ``` 18 | 19 | (`sw#`) Define the interface IP address statically using the following command in configuration mode: 20 | 21 | ```console 22 | interface mgmt0 ip address 23 | ``` 24 | 25 | (`sw#`) Show interface information. 26 | 27 | ```console 28 | show interface mgmt 29 | ``` 30 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/medium.md: -------------------------------------------------------------------------------- 1 | # Medium 2 | 3 | ![Medium network](../img/medium.png) 4 | 5 | [Back to index](README.md). 6 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/mlag_architecture.md: -------------------------------------------------------------------------------- 1 | # MLAG (Multi-Chassis LAG) 2 | 3 | Is a type of Link Aggregation Group where ports from single device such as server terminate on two separate switches providing switch-level redundancy. 4 | 5 | What are the benefits of MLAG 6 | 7 | * Increased bandwidth achieved by dual connection to node. 8 | 9 | * High availability (HA) for servers while allowing full use of the bandwidth of both links 10 | 11 | * To achieve HA on a switch level without the using of STP 12 | 13 | Key limitations of MLAG in mellanox: 14 | 15 | * Only one MLAG domain supported per device 16 | 17 | * Maximum number of devices in MLAG domain is two switches. 18 | 19 | * At least one port per switch (in MLAG domain) MUST be reserved for inter-switch link. 20 | 21 | More details, requirements and limitations on Mellanox devices can be found from: 22 | 23 | [https://docs.mellanox.com/display/ONYXv381174/MLAG]() 24 | 25 | [Back to Index](../README.md) 26 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/mstp.md: -------------------------------------------------------------------------------- 1 | # Multiple spanning tree protocol (MSTP) 2 | 3 | MSTP (802.1s) ensures that only one active path exists between any two nodes in a spanning-tree instance. 4 | A spanning-tree instance comprises a unique set of VLANs. MSTP instances significantly improve network 5 | resource utilization while maintaining a loop-free environment. 6 | 7 | ## Configuration commands 8 | 9 | (`sw#`) Enable MSTP (default mode for spanning-tree) 10 | 11 | ```text 12 | spanning-tree 13 | spanning-tree mode mstp 14 | spanning-tree mst revision 1 15 | spanning-tree mst name mellanox 16 | ``` 17 | 18 | Show commands to validate functionality 19 | 20 | ```text 21 | show spanning-tree 22 | ``` 23 | 24 | ## Expected results 25 | 26 | 1. Spanning-tree mode is configured 27 | 1. Spanning-tree is enabled, if loops are detected ports should go blocked state. 28 | 1. Spanning-tree splits traffic domain between two DUTs 29 | 30 | [Back to Index](../README.md) 31 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/ncn_tcpdump.md: -------------------------------------------------------------------------------- 1 | # TCPDUMP 2 | 3 | If your host is not getting an IP address you can run a packet capture to see if DHCP traffic is being transmitted. 4 | 5 | On `ncn-w001` or a worker/manager with `kubectl`, run: 6 | 7 | ``` 8 | tcpdump -w dhcp.pcap -envli bond0.nmn0 port 67 or port 68 9 | ``` 10 | 11 | This will make a .pcap file named dhcp in your current directory. It will collect all DHCP traffic on the port you specify, in this example we are looking for DHCP traffic on interface bond0.nmn0 (10.252.0.0/17) 12 | 13 | To view the DHCP traffic, run: 14 | 15 | ``` 16 | tcpdump -r dhcp.pcap -v -n 17 | ``` 18 | 19 | The output may be very long so you might have to use filters. 20 | 21 | If you want to do a tcpdump for a certain MAC address you can run: 22 | 23 | ``` 24 | tcpdump -i eth0 -vvv -s 1500 '((port 67 or port 68) and (udp[38:4] = 0x993b7030))' 25 | ``` 26 | 27 | Note: This example is using the MAC of b4:2e:99:3b:70:30 and will show the output on your terminal and not save to a file. 28 | 29 | [Back to Index](../README.md) 30 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/network_naming_function.md: -------------------------------------------------------------------------------- 1 | # Network types – Naming and segment Function 2 | 3 | Description 4 | 5 | In below you can find the overview of the different networks services defined inside of our spine and leaf architecture. 6 | 7 | | *********** | Administration: Hardware | Administration: Cloud/Job | Customer: Jobs | Customer: Administration | Storage | 8 | |:---------------------|:---------------------------:|:-------------------------:|:-----------------------:|:---------------------------:|---------------------:| 9 | | Full name | Hardware Management Network | Node Management Network | Customer Access Network | Customer Management Network | Storage User Network | 10 | | Short name / acronym | HMN | NMN | CAN | CMN | SUN | 11 | 12 | [Back to Index](../README.md) 13 | 14 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/network_traffic_pattern.md: -------------------------------------------------------------------------------- 1 | # Network traffic pattern inside of the system 2 | 3 | ![](../../../../img/network/management_network/network_traffic_pattern.png) 4 | 5 | Internal Networks: 6 | 7 | * Node Management Network (NMN) - Provides the internal control plane for systems management and jobs control. 8 | * Hardware Management Network (HMN) - Provides internal access to system baseboard management controllers (BMC/iLO) and other lower-level hardware access. 9 | 10 | External and Edge Networks: 11 | 12 | * Customer Management Network (CMN) - Provides customer access from the Site to the System for administrators. 13 | * Customer Access Network (CAN) or Customer High Speed Network (CHN) provide: 14 | * Customer access from the site to the System for job control and jobs data movement. 15 | * Access from the System to the Site for network services like DNS, LDAP, etc. 16 | 17 | [Back to Index](../README.md) 18 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/physical_interfaces.md: -------------------------------------------------------------------------------- 1 | # Physical interfaces 2 | 3 | Interfaces in Mellanox are enabled by default. 4 | 5 | Relevant Configuration 6 | 7 | Enter interface context 8 | 9 | ``` 10 | switch (config) # interface ethernet 1/1 11 | ``` 12 | 13 | Show Commands to Validate Functionality 14 | 15 | ``` 16 | show interfaces ethernet 1/1 17 | ``` 18 | 19 | Expected Results 20 | 21 | * Step 1: You can enter the interface context for the port 22 | * Step 2: You can establish a link with a partner 23 | * Step 3: You can pass traffic as expected 24 | 25 | [Back to Index](../README.md) 26 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/remote_logging.md: -------------------------------------------------------------------------------- 1 | # Remote logging 2 | 3 | "In its most simplistic terms, the syslog protocol provides a transport to allow a machine to send event notification messages across IP networks to event message collectors - also known as syslog servers." –rfc3164 4 | 5 | Note: the default facility is 3(DAEMON) 6 | 7 | Relevant Configuration 8 | 9 | Configure logging 10 | 11 | ``` 12 | switch(config)# logging [trap { | override class priority }] 13 | ``` 14 | 15 | Expected Results 16 | 17 | * Step 1: You can configure remote logging 18 | * Step 2: You can see the log files from the switch on the remote server 19 | 20 | [Back to Index](../README.md) 21 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/routed_interface.md: -------------------------------------------------------------------------------- 1 | # Routed interfaces 2 | 3 | By default Mellanox interfaces are set as "switchports" which is to allow L2 communication. To change to routed only port, you have to disable L2 functionality. 4 | 5 | Relevant Configuration 6 | 7 | Disable L2 functionality 8 | 9 | ``` 10 | Switch (config) # interface ethernet ¼ 11 | Switch (config-int) no switchport force 12 | ``` 13 | 14 | Give an interface an IP address 15 | 16 | ``` 17 | switch (config) # interface ethernet 1/14 ip address 192.168.75.1/31 18 | primary 19 | ``` 20 | 21 | Show Commands to Validate Functionality 22 | 23 | ``` 24 | show ethernet interface IFACE 25 | ``` 26 | 27 | Expected Results 28 | 29 | * Step 1: You are able to configure an IP address on the interface 30 | * Step 2: You can configure an IP address on the connected network client 31 | * Step 3: The interface is up, and you can validate the IP address and subnet are correct 32 | * Step 4: You can ping from the switch to the client and from the client to the switch 33 | 34 | [Back to Index](../README.md) 35 | 36 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/scenario-a.md: -------------------------------------------------------------------------------- 1 | # Scenario A: network connection via management network 2 | 3 | Description 4 | The example here covers outside connections achieved via management network. 5 | 6 | ### Summary 7 | * Create a new VRF 8 | * Move interfaces to the new VRF 9 | * Create a new BGP process for the new VRF 10 | * Setup the edge router 11 | * Configure MetalLB 12 | * Verification step for BGP routes 13 | * Configure default route for workers 14 | * Verification of external communication 15 | 16 | ### Topology 17 | 18 | ![](../../../../img/network/management_network/scenario-a-topology.png) 19 | 20 | [Back to Index](../README.md) 21 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/scenario-b.md: -------------------------------------------------------------------------------- 1 | # Scenario B: network connection via high speed network 2 | 3 | Description 4 | The example here covers outside connections achieved via highspeed network. 5 | 6 | ### Summary 7 | * Create a new VRF 8 | * Move interfaces to the new VRF 9 | * Create a new BGP process for the new VRF 10 | * Setup the edge router 11 | * Configure MetalLB 12 | * Verification step for BGP routes 13 | * Configure default route for workers 14 | * Verification of external communication 15 | 16 | ### Topology 17 | 18 | ![](../../../../img/network/management_network/scenario-b-topology.png) 19 | 20 | [Back to Index](../README.md) 21 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/small.md: -------------------------------------------------------------------------------- 1 | # Small 2 | 3 | ![Small network](../img/small.png) 4 | 5 | [Back to index](README.md). 6 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/snmp_community.md: -------------------------------------------------------------------------------- 1 | # SNMPv2c community 2 | 3 | The switch supports SNMPv2c community-based security for Read-Only access. 4 | 5 | Relevant Configuration 6 | 7 | Configure an SNMPv2c community name 8 | 9 | Enable SNMP 10 | 11 | ``` 12 | switch(config)# snmp-server community private rw 13 | ``` 14 | 15 | Configure a SNMPv2c trap receiver host 16 | 17 | ``` 18 | switch(config)# snmp-server host IP-ADDR version v2c [community NAME] 19 | ``` 20 | 21 | Show Commands to Validate Functionality 22 | 23 | ``` 24 | show snmp 25 | ``` 26 | 27 | Expected Results 28 | 29 | * Step 1: You can configure the community name 30 | * Step 2: You can bind the SNMP server to the default VRF 31 | * Step 3: You can connect from the workstation using the community name 32 | 33 | [Back to Index](../README.md) 34 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/snmpv3_users.md: -------------------------------------------------------------------------------- 1 | # Mellanox SNMPv3 users 2 | 3 | SNMPv3 supports cryptographic security by a combination of authenticating and encrypting the SNMP protocol packets over the network. Read-Only access is currently supported. The admin user can add or remove SNMPv3 users. 4 | 5 | ## Relevant Configuration 6 | 7 | Configure a new SNMPv3 user (Minimum 8 characters for passwords) 8 | 9 | ```console 10 | switch(config)# snmp-server user testuser v3 capability admin 11 | switch(config)# snmp-server user testuser v3 enable 12 | switch(config)# snmp-server user testuser v3 enable sets 13 | switch(config)# snmp-server user testuser v3 encrypted auth md5 xxxxxxx priv des xxxxxxx 14 | switch(config)# snmp-server user testuser v3 require-privacy 15 | ``` 16 | 17 | Show Commands to Validate Functionality 18 | 19 | ```console 20 | show snmp user 21 | ``` 22 | 23 | [Back to Index](../README.md) 24 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/spine_leaf_architecture.md: -------------------------------------------------------------------------------- 1 | # Spine-leaf architecture 2 | 3 | Description 4 | 5 | The network design used in majority of our supercomputer installations is spine leaf architecture. In more sizeable systems we also utilize super-spine to accommodate number of spines that connect the network to provide additional HA capabilities. 6 | 7 | # What is Spine-Leaf Architecture? 8 | A spine-leaf architecture is data center network topology that consists of two switching layers—a spine and leaf. The leaf layer consists of access switches that aggregate traffic from servers and connect directly into the spine or network core. Spine switches interconnect all leaf switches in a full-mesh topology. 9 | 10 | [Back to Index](../README.md) 11 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/spine_leaf_architecture2.md: -------------------------------------------------------------------------------- 1 | # Spine-leaf architecture 2 | 3 | How does a spine-leaf architecture differ from traditional network designs? 4 | Traditionally, data center networks were based on a three-tier model: 5 | 6 | 1. Access switches connect to servers 7 | 2. Aggregation or distribution switches provide redundant connections to access switches 8 | 3. Core switches provide fast transport between aggregation switches, typically connected in a redundant pair for high availability 9 | 10 | At the most basic level, a spine-leaf architecture collapses one of these tiers, as depicted in these diagrams. 11 | 12 | ![](../../../../img/network/management_network/architecture_comparison.png) 13 | 14 | Other common differences in spine-leaf topologies include: 15 | 16 | * The removal of Spanning Tree Protocol (STP) where feasible 17 | * A scale-out vs. scale-up of infrastructure 18 | 19 | [Back to Index](../README.md) 20 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/static_mac.md: -------------------------------------------------------------------------------- 1 | # Mac address Table 2 | 3 | You can configure static MAC addresses for unicast traffic. This feature improves security and reduces unknown unicast flooding. 4 | 5 | To configure Unicast Static MAC address: 6 | 7 | ``` 8 | Switch (config) # mac-address-table static unicast vlan interface ethernet / 9 | ``` 10 | 11 | For example: 12 | 13 | ``` 14 | switch (config) # mac-address-table static 00:11:22:33:44:55 vlan 1 interface ethernet 1/1 15 | ``` 16 | 17 | [Back to Index](../README.md) 18 | 19 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/static_routing.md: -------------------------------------------------------------------------------- 1 | # Static routing 2 | 3 | "Static routing is manually performed by the network administrator. The administrator is responsible for discovering and propagating routes through the network. These definitions are manually programmed in every routing device in the environment. After a device has been configured, it simply forwards packets out the predetermined ports. There is no communication between routers regarding the current topology of the network." –IBM Redbook, TCP/IP 4 | 5 | Relevant Configuration 6 | 7 | ``` 8 | switch(config)# ip route vrf default 0.0.0.0/0 null0 9 | ``` 10 | 11 | Show Commands to Validate Functionality 12 | 13 | ``` 14 | show ip route 15 | ``` 16 | 17 | Expected Results 18 | 19 | * Step 1: You can configure a static route on the DUT 20 | * Step 2: You can validate using the show command(s) above 21 | * Step 3: You can ping the connected device 22 | 23 | [Back to Index](../README.md) 24 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/system_images.md: -------------------------------------------------------------------------------- 1 | # System images 2 | 3 | Mellanox switches can hold two firmware images. These images, once uploaded, are called the Running and Image available for install. 4 | 5 | Relevant Configuration 6 | 7 | Copy an image from a local server using sftp 8 | 9 | ``` 10 | switch (config)#image delete XXX // --> delete old images, if exist 11 | switch (config)#image fetch scp://root:password@server/path-to-image/image-X86_64-3.4.2002.img 12 | switch (config)#image install image-X86_64-3.4.2002.img 13 | ``` 14 | 15 | Boot the switch into the new firmware 16 | 17 | ``` 18 | switch (config)#image boot next 19 | switch (config)#configuration write 20 | switch (config)#reload 21 | ``` 22 | 23 | Show Commands to Validate Functionality 24 | 25 | ``` 26 | show version 27 | ``` 28 | 29 | Expected Results 30 | 31 | * Step 1: You can upload an image to the switch 32 | * Step 2: You can see the versions of code for the primary and secondary images 33 | * Step 3: You can boot into the uploaded image 34 | * Step 4: You can see you are running the uploaded image 35 | 36 | [Back to Index](../README.md) 37 | 38 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/test_tftp_traffic.md: -------------------------------------------------------------------------------- 1 | # Test TFTP traffic (Aruba Only) 2 | 3 | * You can test the TFTP traffic by trying to download the ipxe.efi binary. 4 | * Log into the leaf switch and try to download the iPXE binary. 5 | * This requires that the leaf switch can talk to the TFTP server "10.92.100.60" 6 | 7 | ``` 8 | start-shell 9 | sw-leaf-001:~$ sudo su 10 | sw-leaf-001:/home/tftp 10.92.100.60 11 | tftp> get ipxe.efi 12 | Received 1007200 bytes in 2.2 seconds 13 | tftp> get ipxe.efi 14 | Received 1007200 bytes in 2.2 seconds 15 | tftp> get ipxe.efi 16 | Received 1007200 bytes in 2.2 seconds 17 | ``` 18 | 19 | You can see here that the ipxe.efi binary is downloaded three times in a row. 20 | 21 | [Back to Index](../README.md) 22 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/typical_mlag_switch_configuration.md: -------------------------------------------------------------------------------- 1 | # Typical configuration of MLAG between switches 2 | 3 | The intent here is to show case very basic mlag configuration between two spine switches. 4 | 5 | 6 | 7 | 15 | 16 | 17 | 25 | 26 |
8 |
 9 | mlag-vip cray-mlag-domain ip 192.168.255.242 /29 force
10 | no mlag shutdown
11 | mlag system-mac 00:00:5E:00:01:01
12 | interface port-channel 100 ipl 1
13 | interface vlan 4000 ipl 1 peer-address 192.168.255.253
14 | 
18 |
19 | mlag-vip cray-mlag-domain ip 192.168.255.242 /29 force
20 | no mlag shutdown
21 | mlag system-mac 00:00:5E:00:01:5D
22 | interface port-channel 100 ipl 1
23 | interface vlan 4000 ipl 1 peer-address 192.168.255.254
24 | 
27 | 28 | [Back to Index](../README.md) 29 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/verify-switches_are_forwarding_dhcp_traffic.md: -------------------------------------------------------------------------------- 1 | # Verify the switches are forwarding DHCP traffic 2 | 3 | If you made it this far and still cannot pxe boot, you may have run into the IP-Helper breaking on the switch. 4 | 5 | [Back to Index](../README.md) 6 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/verify_bgp.md: -------------------------------------------------------------------------------- 1 | # Verify BGP 2 | 3 | Verify the BGP neighbors are in the established state on BOTH the switches. 4 | 5 | How to check Aruba BGP status: 6 | 7 | ``` 8 | show bgp ipv4 u s 9 | 10 | VRF : default 11 | BGP Summary 12 | ----------- 13 | Local AS : 65533 BGP Router Identifier : 10.252.0.3 14 | Peers : 4 Log Neighbor Changes : No 15 | Cfg. Hold Time : 180 Cfg. Keep Alive : 60 16 | Confederation Id : 0 17 | 18 | Neighbor Remote-AS MsgRcvd MsgSent Up/Down Time State AdminStatus 19 | 10.252.0.2 65533 45052 45044 02m:02w:02d Established Up 20 | 10.252.1.7 65533 78389 90090 02m:02w:02d Established Up 21 | 10.252.1.8 65533 78384 90059 02m:02w:02d Established Up 22 | 10.252.1.9 65533 78389 90108 02m:02w:02d Established Up 23 | ``` 24 | 25 | [Back to Index](../README.md) 26 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/verify_route_to_tftp.md: -------------------------------------------------------------------------------- 1 | # Verify route to TFTP 2 | 3 | On BOTH Aruba switches we need a single route to the TFTP server 10.92.100.60 (your configuration may differ). 4 | 5 | This is needed because there are issues with Aruba ECMP hashing and TFTP traffic. 6 | 7 | ``` 8 | show ip route 10.92.100.60 9 | 10 | Displaying ipv4 routes selected for forwarding 11 | 12 | '[x/y]' denotes [distance/metric] 13 | 14 | 10.92.100.60/32, vrf default, tag 0 15 | via 10.252.1.9, [70/0], bgp 16 | ``` 17 | 18 | * This route can be a static route or a BGP route that is pinned to a single worker. (1.4.2 patch introduces the BGP pinned route) 19 | * Verify that you can ping the next hop of this route. 20 | * For example above we would ping 10.252.1.9. If this is not reachable this is your problem. 21 | 22 | [Back to Index](../README.md) 23 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/very_large.md: -------------------------------------------------------------------------------- 1 | # Very Large (Exascale) 2 | 3 | ![Exascale network](../img/exascale.png) 4 | 5 | [Back to index](README.md). 6 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/vlan_interface.md: -------------------------------------------------------------------------------- 1 | # VLAN interface 2 | 3 | The switch also supports classic L3 VLAN interfaces. 4 | 5 | Relevant Configuration 6 | 7 | Configure the VLAN 8 | 9 | ``` 10 | switch (config) # vlan 6 11 | switch (config vlan 6) # 12 | ``` 13 | 14 | Create and enable the VLAN interface, and assign it an IP address 15 | 16 | ``` 17 | switch(config vlan 6)# ip address 10.1.0.2/16 18 | ``` 19 | 20 | Show Commands to Validate Functionality 21 | 22 | ``` 23 | show vlan 24 | ``` 25 | 26 | Expected Results 27 | 28 | * Step 1: You can configure the VLAN 29 | * Step 2: You can enable the interface and associate it with the VLAN 30 | * Step 3: You can create an IP-enabled VLAN interface, and it is up 31 | * Step 4: You validate the configuration is correct 32 | * Step 5: You can ping from the switch to the client and from the client to the switch 33 | 34 | [Back to Index](../README.md) 35 | -------------------------------------------------------------------------------- /operations/network/management_network/mellanox/web-ui.md: -------------------------------------------------------------------------------- 1 | # Web user interface (WebUI) 2 | 3 | A web-based management user interface provides a visual representation of a subset of the current switch configuration and states. The Web-UI allows for easy access from modern browsers to modify some aspects of the configuration. 4 | 5 | Relevant Configuration 6 | 7 | Enable the WebUI 8 | 9 | ``` 10 | switch(config)# web enable 11 | ``` 12 | 13 | Configure REST API 14 | 15 | ``` 16 | switch(config)# web enable http|https 17 | ``` 18 | 19 | Show Commands to Validate Functionality 20 | 21 | ``` 22 | show web 23 | ``` 24 | 25 | Expected Results 26 | 27 | * Step 1: You can connect the management interface to a private network 28 | * Step 2: You can enable web-management 29 | * Step 3: You can connect to the IP address from a browser login to the management menu 30 | 31 | [Back to Index](../README.md) 32 | -------------------------------------------------------------------------------- /operations/network/management_network/reinstall.md: -------------------------------------------------------------------------------- 1 | # Reinstall 2 | 3 | Reinstall the same CSM version. 4 | 5 | ***Before continuing with install***, make sure that CANU is running the most current version: 6 | 7 | [Install/Upgrade CANU](canu_install_update.md) 8 | 9 | > **CAUTION:** All of these steps should be done using an out-of-band connection. This process is disruptive and will require downtime. 10 | 11 | ## Procedure 12 | 13 | 1. If the switches being reinstalled are already in the right CSM version, no configuration changes should be required. 14 | 15 | 2. Check the differences between generated configurations and the configurations on the system. 16 | 17 | Refer to [Validate switch configurations](validate_switch_configs.md). 18 | 19 | 3. Run a suite of tests against the management network switches. 20 | 21 | Refer to [Network tests](network_tests.md). 22 | -------------------------------------------------------------------------------- /operations/network/management_network/replace_switch.md: -------------------------------------------------------------------------------- 1 | # Replace Switch 2 | 3 | > **CAUTION:** Do not plug in a switch that is not configured. This can cause unpredictable behavior and network outages. 4 | 5 | ## Prerequisites 6 | 7 | - Out-of-band access to the switches (console). 8 | - GA generated switch configuration or backed-up switch configuration exists. 9 | - [Generate Switch Configurations](generate_switch_configs.md) 10 | - [Configuration Management](config_management.md) 11 | 12 | ### Procedure 13 | 14 | The following steps are required to replace a switch. 15 | 16 | 1. Update firmware on new switch. 17 | 18 | See [Update Management Network Firmware](firmware/update_management_network_firmware.md). 19 | 20 | 1. Apply the configuration. 21 | 22 | See [Apply Switch Configurations](apply_switch_configurations.md). 23 | 24 | 1. Unplug all the network and power cables and remove the failed switch. 25 | 26 | 1. Plug in the network cables. 27 | 28 | 1. Plug in the power cables. 29 | -------------------------------------------------------------------------------- /operations/node_management/Add_Remove_Replace_NCNs/Remove_Switch_Config.md: -------------------------------------------------------------------------------- 1 | # Remove Switch Configuration for NCN 2 | 3 | ## Description 4 | 5 | Update the network switches for the NCN that was removed. 6 | 7 | ## Procedure 8 | 9 | ### Update Networking to Remove NCN 10 | 11 | Details coming soon. 12 | 13 | ## Next Step 14 | 15 | Proceed to the next step to [Redeploy Services](Redeploy_Services.md) or return to the main [Add, Remove, Replace, or Move NCNs](Add_Remove_Replace_NCNs.md) page. 16 | -------------------------------------------------------------------------------- /operations/node_management/Add_Remove_Replace_NCNs/Update_Firmware.md: -------------------------------------------------------------------------------- 1 | # Update Firmware 2 | 3 | ## Description 4 | 5 | Use FAS to update the firmware and set the BMC password. 6 | 7 | ## Procedure 8 | 9 | See [Update Firmware](../../firmware/Update_Firmware_with_FAS.md). 10 | 11 | Proceed to the next step to [Update NCN BIOS TPM State](Update_NCN_BIOS_TPM_State.md) or return to the main [Add, Remove, Replace, or Move NCNs](Add_Remove_Replace_NCNs.md) page. 12 | -------------------------------------------------------------------------------- /operations/node_management/Find_Node_Type_and_Manufacturer.md: -------------------------------------------------------------------------------- 1 | # Find Node Type and Manufacturer 2 | 3 | There are three different vendors providing nodes for air-cooled cabinets, which are Gigabyte, Intel, and HPE. The Hardware State Manager \(HSM\) contains the information required to determine which type of air-cooled node is installed. The endpoint returned in the HSM command can be used to determine the manufacturer. 4 | 5 | HPE nodes contain the /redfish/v1/Systems/1 endpoint: 6 | 7 | ``` 8 | cray hsm inventory componentEndpoints describe XNAME --format json | jq '.RedfishURL' 9 | "x3000c0s18b0/redfish/v1/Systems/1" 10 | ``` 11 | 12 | Gigabyte nodes contain the /redfish/v1/Systems/Self endpoint: 13 | 14 | ``` 15 | cray hsm inventory componentEndpoints describe XNAME --format json | jq '.RedfishURL' 16 | "x3000c0s7b0/redfish/v1/Systems/Self" 17 | ``` 18 | 19 | Intel nodes contain the /redfish/v1/Systems/SERIAL\_NUMBER endpoint: 20 | 21 | ``` 22 | cray hsm inventory componentEndpoints describe XNAME --format json | jq '.RedfishURL' 23 | "x3000c0s15b0/redfish/v1/Systems/BQWT92000021" 24 | ``` 25 | 26 | -------------------------------------------------------------------------------- /operations/node_management/NCN_Identify_Drives_Using_ledctl.md: -------------------------------------------------------------------------------- 1 | # NCN Drive Identification 2 | 3 | Basic usage for the ledmon/ledctl software for drive identification using the drive LEDs. 4 | 5 | ## Usage 6 | 7 | Turn on led locator beacon 8 | 9 | ```bash 10 | ledctl locate=/dev/ 11 | ``` 12 | 13 | Turn off led locator beacon 14 | 15 | ```bash 16 | ledctl locate_off=/dev/ 17 | ``` 18 | -------------------------------------------------------------------------------- /operations/node_management/Rebuild_NCNs/Validate_Boot_Loader.md: -------------------------------------------------------------------------------- 1 | # Validate Boot Loader 2 | 3 | Perform the following steps **on `ncn-m001`**. 4 | 5 | 1. (`ncn-m001#`) Run the script to ensure the local `BOOTRAID` has a valid kernel, `initrd`, and `grub.cfg`. 6 | 7 | ```bash 8 | pdsh -b -w $(grep -oP 'ncn-\w\d+' /etc/hosts | sort -u | tr -t '\n' ',') ' 9 | /opt/cray/tests/install/ncn/scripts/check_bootloader.sh 10 | ' 11 | ``` 12 | 13 | If the script fails because of 'Host key verification' failures, then follow the documentation to [Apply root SSH keys to NCNs](../../security_and_authentication/SSH_Keys.md#procedure-apply-root-ssh-keys-to-ncns-standalone). 14 | 15 | ## Next Step 16 | 17 | If executing this procedure as part of an NCN rebuild, return to the main [Rebuild NCNs](Rebuild_NCNs.md#storage-node) page and proceed with the next step. 18 | -------------------------------------------------------------------------------- /operations/node_management/Replace_a_Standard_Rack_Node.md: -------------------------------------------------------------------------------- 1 | # Replace a Standard rack node from a System 2 | 3 | This procedure will replace a standard node from an HPE Cray EX system. 4 | 5 | ## Procedure 6 | 7 | 1. Follow [Removing a Standard Node from a System procedure](Removing_a_Standard_Node_from_a_System.md) procedure to remove the node from the system. 8 | 1. Follow [Add a Standard Node from a System procedure](Add_a_Standard_Rack_Node.md) procedure to add the replacement node to the system. 9 | -------------------------------------------------------------------------------- /operations/node_management/Repurpose_Compute_as_UAN.md: -------------------------------------------------------------------------------- 1 | # Repurpose a Compute Node as a UAN 2 | 3 | It is possible to repurpose a compute node to be used as a User Access Node (UAN). This is typically done when the processor type of the compute node is not yet available in a UAN server. 4 | 5 | For more information, see the **Repurposing a Compute Node as a UAN** section of the *HPE Cray User Access Node (UAN) Software Administration Guide (`S-8033`)*. 6 | -------------------------------------------------------------------------------- /operations/node_management/Use_the_Physical_KVM.md: -------------------------------------------------------------------------------- 1 | # Use the Physical KVM 2 | 3 | For those who prefer to stand in front of the system and use a physically connected keyboard, mouse, and monitor, Cray provides a rack-mount-extendable KVM unit installed in rack unit slot 23 \(RU23\) of the management cabinet. It is connected to the first non-compute node \(NCN\) by default. 4 | 5 | To use it, pull it out and raise the lid. 6 | 7 | ![KVM Unit Pulled Out](../../img/operations/KVM_Unit_Pulled_Out.png "KVM Unit Pulled Out") 8 | 9 | ![KVM Unit Opened](../../img/operations/KVM_Unit_Opened.png "KVM Unit Opened") 10 | 11 | To bring up the main menu \(shown in following figure\), press **Prnt Scrn**. 12 | 13 | Each node in the system \(except ClusterStor\) appears in the main menu associated with a port. The first NCN is port 01, the other three NCNs are ports 02–04, and the compute nodes are 05–08. 14 | 15 | To move to any node in the system, use the arrow keys and press **Enter**. The login screen for that node will appear. 16 | 17 | ![KVM Main Menu](../../img/operations/KVM_Main_Menu.png "KVM Main Menu") 18 | 19 | -------------------------------------------------------------------------------- /operations/package_repository_management/Package_Repository_Management.md: -------------------------------------------------------------------------------- 1 | # Package Repository Management 2 | 3 | Repositories are added to systems to extend the system functionality beyond what is initially delivered. The Sonatype Nexus Repository Manager is the primary method for 4 | repository management. Nexus hosts the Yum, Docker, `raw`, and Helm repositories for software and firmware content. 5 | 6 | Refer to the following for more information about Nexus: 7 | 8 | - [The official Sonatype documentation](https://help.sonatype.com/repomanager3) 9 | - [Manage Repositories with Nexus](Manage_Repositories_with_Nexus.md) 10 | -------------------------------------------------------------------------------- /operations/power_management/Power_Off_Storage_Cabinets.md: -------------------------------------------------------------------------------- 1 | # Power Off Storage Cabinets 2 | 3 | Power off storage nodes and management switches in standard racks. 4 | 5 | ## Power off standard rack PDU circuit breakers 6 | 7 | **CAUTION:** The Lustre or Spectrum Scale (GPFS) file systems on nodes and switches in storage cabinets should only 8 | be powered off when it has been confirmed that the file systems have been cleanly shut down. See the procedures in 9 | [Power Off the External File Systems](System_Power_Off_Procedures.md#Power_off_the_External_File_systems). 10 | 11 | 1. Set each cabinet PDU circuit breaker to `OFF`. 12 | 13 | A slotted screwdriver may be required to open PDU circuit breakers. 14 | 15 | 1. To power off Motivair liquid-cooled chilled doors and CDUs, locate the power off switch on the CDU control panel and set it to `OFF`. 16 | 17 | Refer to vendor documentation for the chilled-door cooling system for power control procedures when chilled doors are installed on standard racks. 18 | 19 | ## Next step 20 | 21 | Return to [System Power Off Procedures](System_Power_Off_Procedures.md) and continue with next step. 22 | -------------------------------------------------------------------------------- /operations/resiliency/Resiliency.md: -------------------------------------------------------------------------------- 1 | # Resiliency 2 | 3 | HPE Cray EX systems are designed so that system management services \(SMS\) are fully resilient and that there is no single point of failure. 4 | 5 | -------------------------------------------------------------------------------- /operations/security_and_authentication/Change_the_LDAP_Server_IP_Address_for_New_LDAP_Server_Content.md: -------------------------------------------------------------------------------- 1 | # Change the LDAP Server IP Address for New LDAP Server Content 2 | 3 | Delete the old LDAP user federation and create a new one. This procedure should only be done if the LDAP server is being replaced by a different LDAP server that has different contents. 4 | 5 | Refer to [Change the LDAP Server IP Address for Existing LDAP Server Content](Change_the_LDAP_Server_IP_Address_for_Existing_LDAP_Server_Content.md) if the new LDAP server content matches the previous LDAP server content. 6 | 7 | ## Prerequisites 8 | 9 | The LDAP server is being replaced by a different LDAP server that has different contents. For example, different users and groups. 10 | 11 | ## Procedure 12 | 13 | 1. Remove the LDAP user federation from Keycloak. 14 | 15 | Follow the procedure in [Remove the LDAP User Federation from Keycloak](Remove_the_LDAP_User_Federation_from_Keycloak.md). 16 | 17 | 1. Re-add the LDAP user federation in Keycloak. 18 | 19 | Follow the procedure in [Add LDAP User Federation](Add_LDAP_User_Federation.md). 20 | -------------------------------------------------------------------------------- /operations/security_and_authentication/Transport_Layer_Security_for_Ingress_Services.md: -------------------------------------------------------------------------------- 1 | # Transport Layer Security \(TLS\) for Ingress Services 2 | 3 | The Istio Secure Gateway and Keycloak Gatekeeper services utilize Cert-manager for their Transport Layer Security \(TLS\) certificate and private key. Certificate custom resource definitions are deployed as part of Helm Charts for these services. 4 | 5 | To view properties of the Istio Secure Gateway certificate: 6 | 7 | ```bash 8 | kubectl describe certificate -n istio-system ingress-gateway-cert 9 | ``` 10 | 11 | To view the properties of the Keycloak Gatekeeper certificate: 12 | 13 | ```bash 14 | kubectl describe certificate -n services keycloak-gatekeeper 15 | ``` 16 | 17 | An outstanding bug in the Keycloak Gatekeeper service prevents it from updating its TLS certificate and key material upon Cert-manager renewal. Thus, it may be necessary to monitor the situation and proactively renew/force reload Keycloak Gatekeeper. 18 | 19 | -------------------------------------------------------------------------------- /operations/security_and_authentication/Troubleshoot_Kyverno_Configuration_manually.md: -------------------------------------------------------------------------------- 1 | # Troubleshoot Kyverno configuration manually 2 | 3 | ## Check Kyverno pods 4 | 5 | (`ncn-mw#`) Run the following script to verify that the expected Kyverno pods are running: 6 | 7 | ```bash 8 | /opt/cray/tests/install/livecd/scripts/k8s_kyverno_pods_running.sh -p 9 | ``` 10 | 11 | ## Check Kyverno policy report 12 | 13 | (`ncn-mw#`) Run the following script in order to check the Kyverno policy report for any failures, warnings, errors, and skipped policies: 14 | 15 | ```bash 16 | /opt/cray/tests/install/livecd/scripts/k8s_kyverno_polr_list.sh -p 17 | ``` 18 | 19 | ## More information 20 | 21 | See [Kyverno](../kubernetes/Kyverno.md). 22 | -------------------------------------------------------------------------------- /operations/system_admin_toolkit/about_sat/README.md: -------------------------------------------------------------------------------- 1 | # About SAT 2 | 3 | - [Introduction to SAT](Introduction_to_SAT.md) 4 | - [SAT Command Overview](SAT_Command_Overview.md) 5 | - [SAT in CSM](SAT_in_CSM.md) 6 | - [SAT Dependencies](SAT_Dependencies.md) 7 | -------------------------------------------------------------------------------- /operations/system_admin_toolkit/configuration/Configure_Multi-tenancy_Optional.md: -------------------------------------------------------------------------------- 1 | # Configure Multi-tenancy (Optional) 2 | 3 | SAT supports supplying tenant information to CSM services in order to allow 4 | tenant admins to use SAT within their tenant. By default, the tenant name is 5 | not set, and SAT will not send any tenant information with its requests to 6 | CSM services. Configure the tenant name either in the SAT configuration file 7 | or on the command line. 8 | 9 | ## Configure the Tenant Name in the SAT Configuration File 10 | 11 | Set the tenant name in the SAT configuration file using the 12 | `api_gateway.tenant_name` option. 13 | 14 | Here is an example: 15 | 16 | ```toml 17 | [api_gateway] 18 | tenant_name = "my_tenant" 19 | ``` 20 | 21 | ## Configure the Tenant Name on the Command Line 22 | 23 | Set the tenant name for each `sat` invocation using the `--tenant-name` 24 | option. The `--tenant-name` option must be specified before the subcommand 25 | name. 26 | 27 | (`ncn-m001#`) Here is an example: 28 | 29 | ```bash 30 | sat --tenant-name=my_tenant status 31 | ``` 32 | -------------------------------------------------------------------------------- /operations/system_admin_toolkit/configuration/README.md: -------------------------------------------------------------------------------- 1 | # SAT Configuration 2 | 3 | - [Authenticate SAT Commands](Authenticate_SAT_Commands.md) 4 | - [Generate SAT S3 Credentials](Generate_SAT_S3_Credentials.md) 5 | - [Set System Revision Information](Set_System_Revision_Information.md) 6 | - [Configure Multi-tenancy (Optional)](Configure_Multi-tenancy_Optional.md) 7 | -------------------------------------------------------------------------------- /operations/system_admin_toolkit/usage/README.md: -------------------------------------------------------------------------------- 1 | # SAT Usage 2 | 3 | - [SAT Bootprep](SAT_Bootprep.md) 4 | - [SAT and IUF](SAT_and_IUF.md) 5 | - [Change BOS Version](Change_BOS_Version.md) 6 | - [Change CFS Version](Change_CFS_Version.md) 7 | - [Configure Retries to API requests in SAT](Configure_Retries.md) 8 | -------------------------------------------------------------------------------- /operations/system_layout_service/Dump_SLS_Information.md: -------------------------------------------------------------------------------- 1 | # Dump SLS Information 2 | 3 | Perform a dump of the System Layout Service \(SLS\) database. 4 | 5 | This procedure will create the file `sls_dump.json` in the current directory. 6 | 7 | This procedure preserves the information stored in SLS when backing up or reinstalling the system. 8 | 9 | ## Prerequisites 10 | 11 | - The Cray Command Line Interface is configured. See [Configure the Cray CLI](../configure_cray_cli.md). 12 | - This procedure requires administrative privileges. 13 | 14 | ## Procedure 15 | 16 | (`ncn-mw#`) Perform the SLS dump. 17 | The SLS dump will be stored in the `sls_dump.json` file. The `sls_dump.json` file is required to perform the SLS load state operation. 18 | 19 | ```bash 20 | cray sls dumpstate list --format json > sls_dump.json 21 | ``` 22 | -------------------------------------------------------------------------------- /operations/system_layout_service/Load_SLS_Database_with_Dump_File.md: -------------------------------------------------------------------------------- 1 | # Load SLS Database with Dump File 2 | 3 | Load the contents of the SLS dump file to restore SLS to the state of the system at the time of the dump. This will upload and overwrite the current SLS database with the contents of the SLS dump file. 4 | 5 | Use this procedure to restore SLS data after a system re-install. 6 | 7 | ## Prerequisites 8 | 9 | - The System Layout Service \(SLS\) database has been dumped. See [Dump SLS Information](Dump_SLS_Information.md) for more information. 10 | - The Cray Command Line Interface is configured. See [Configure the Cray CLI](../configure_cray_cli.md). 11 | - This procedure requires administrative privileges. 12 | 13 | ## Procedure 14 | 15 | (`ncn-mw#`) Load the dump file into SLS. 16 | This will upload and overwrite the current SLS database with the contents of the posted file. 17 | 18 | ```bash 19 | cray sls loadstate create sls_dump.json 20 | ``` 21 | -------------------------------------------------------------------------------- /operations/system_management_health/Prometheus_Kafka_Error.md: -------------------------------------------------------------------------------- 1 | # `prometheus-kafka-adapter` errors during installation 2 | 3 | ## Symptom 4 | 5 | On a fresh install of CSM, the Prometheus log has errors similar to the following: 6 | 7 | ```text 8 | ts=2022-12-05T13:35:53.495Z caller=dedupe.go:112 component=remote level=warn remote_name=2eb187 9 | url=http://prometheus-kafka-adapter.sma.svc.cluster.local:80/receive msg="Failed to send batch, retrying" 10 | err="Post \"http://prometheus-kafka-adapter.sma.svc.cluster.local:80/receive\": 11 | dial tcp: lookup prometheus-kafka-adapter.sma.svc.cluster.local on 10.16.0.10:53: no such host" 12 | ``` 13 | 14 | ## Explanation 15 | 16 | This Kafka service does not exist, because the [System Monitoring Application (SMA)](../../glossary.md#system-monitoring-application-sma) 17 | has not been installed yet. This causes the above errors for retry to be logged. Prometheus can operate without SMA Kafka and it will 18 | periodically retry the connection to Kafka. These errors will be logged until SMA is installed. Therefore, if they are seen before SMA is 19 | installed, then disregard them. 20 | -------------------------------------------------------------------------------- /operations/utility_storage/Troubleshoot_Failure_to_Get_Ceph_Health.md: -------------------------------------------------------------------------------- 1 | # Troubleshoot Failure to Get Ceph Health 2 | 3 | Inspect Ceph commands that are failing by looking into the Ceph monitor logs \(ceph-mon\). For example, the monitoring logs can help determine any issues causing the `ceph -s` command to hang. 4 | 5 | Troubleshoot Ceph commands failing to run and determine how to make them operational again. These commands need to be operational to obtain critical information about the Ceph cluster on the system. 6 | 7 | ## Prerequisites 8 | 9 | This procedure requires admin privileges. 10 | 11 | ## Procedure 12 | 13 | 1. Verify the node being used is running ceph-mon. 14 | 15 | 1. Verify ceph-mon processes are running on the first three NCN storage nodes. 16 | 17 | - See [Manage_Ceph_Service](Manage_Ceph_Services.md) for more information. 18 | 19 | If more than three storage nodes exist, check the output of `ceph orch ps` for more information. 20 | 21 | 1. Check ceph-mon logs to see if the cluster is out of quorum. 22 | 23 | Verify the issue is resolved by rerunning the Ceph command that failed. 24 | 25 | -------------------------------------------------------------------------------- /scripts/.shellspec: -------------------------------------------------------------------------------- 1 | --require spec_helper 2 | 3 | ## Default kcov (coverage) options 4 | # --kcov-options "--include-path=. --path-strip-level=1" 5 | # --kcov-options "--include-pattern=.sh" 6 | # --kcov-options "--exclude-pattern=/.shellspec,/spec/,/coverage/,/report/" 7 | 8 | ## Example: Include script "myprog" with no extension 9 | # --kcov-options "--include-pattern=.sh,myprog" 10 | 11 | ## Example: Only specified files/directories 12 | # --kcov-options "--include-pattern=myprog,/lib/" 13 | -------------------------------------------------------------------------------- /scripts/csm_rbd_tool.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/scripts/csm_rbd_tool.tar.gz -------------------------------------------------------------------------------- /scripts/operations/configuration/python_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cray-HPE/docs-csm/89f87d6b49de375941a9b3b616524b936246208b/scripts/operations/configuration/python_lib/__init__.py -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_chassisBMC_BMC.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "chassisBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "BMC" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Cray Chassis Controllers" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_BMC.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "BMC" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Olympus node BMCs" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node0AccFPGA0.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.AccFPGA0" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node Redstone FPGA" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node0AccVBIOS.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.AccVBIOS" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node0 AccVBIOS" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node0BIOS.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.BIOS" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node0 BIOS" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node0ManagementEthernet.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.ManagementEthernet" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node0 Management Ethernet" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node1AccFPGA0.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node1.AccFPGA0" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node Redstone FPGA" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node1BIOS.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node1.BIOS" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node1 BIOS" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node1ManagementEthernet.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node1.ManagementEthernet" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node1 Management Ethernet" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node2BIOS.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node2.BIOS" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node2 BIOS" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node2ManagementEthernet.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node2.ManagementEthernet" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node2 Management Ethernet" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node3BIOS.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node3.BIOS" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node3 BIOS" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_node3ManagementEthernet.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node3.ManagementEthernet" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node3 Management Ethernet" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_nodeAccFPGA0.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.AccFPGA0", 13 | "Node1.AccFPGA0" 14 | ] 15 | }, 16 | "command": { 17 | "version": "latest", 18 | "tag": "default", 19 | "overrideDryrun": false, 20 | "restoreNotPossibleOverride": true, 21 | "timeLimit": 1000, 22 | "description": "Upgrade of Node Redstone FPGA" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_nodeAccUC.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.AccUC" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Node AccUC" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_nodeBIOS.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.BIOS", 13 | "Node1.BIOS", 14 | "Node2.BIOS", 15 | "Node3.BIOS" 16 | ] 17 | }, 18 | "command": { 19 | "version": "latest", 20 | "tag": "default", 21 | "overrideDryrun": false, 22 | "restoreNotPossibleOverride": true, 23 | "timeLimit": 1000, 24 | "description": "Upgrade of Node BIOS" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_nodeBMC_nodeManagementEthernet.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "Node0.ManagementEthernet", 13 | "Node1.ManagementEthernet", 14 | "Node2.ManagementEthernet", 15 | "Node3.ManagementEthernet" 16 | ] 17 | }, 18 | "command": { 19 | "version": "latest", 20 | "tag": "default", 21 | "overrideDryrun": false, 22 | "restoreNotPossibleOverride": true, 23 | "timeLimit": 1000, 24 | "description": "Upgrade of Node Management Ethernet" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/cray_routerBMC_BMC.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "cray" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "routerBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "BMC" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Router BMC" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/foxconn_nodeBMC_bios.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "foxconn" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "NodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "bios_active" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Foxconn node bios_active" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/foxconn_nodeBMC_bmc.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "foxconn" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "NodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "bmc_active" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Foxconn node bmc_active" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/foxconn_nodeBMC_erot.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "foxconn" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "NodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "erot_active" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Foxconn node erot_active" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/foxconn_nodeBMC_fpga.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "foxconn" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "NodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "fpga_active" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Foxconn node fpga_active" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/foxconn_nodeBMC_pld.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "foxconn" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "NodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "pld_active" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of Foxconn node pld_active" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/gigabyte_nodeBMC_BIOS.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "gigabyte" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "BIOS" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 4000, 21 | "description": "Upgrade of Gigabyte node BIOS" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/gigabyte_nodeBMC_BMC.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "gigabyte" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "BMC" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 4000, 21 | "description": "Upgrade of Gigabyte node BMCs" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/hpe_nodeBMC_iLO5.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "hpe" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "iLO 5" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of HPE node iLO 5" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/hpe_nodeBMC_iLO6.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "hpe" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "nodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "iLO 6" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of HPE node iLO 6" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/firmware/recipes/hpe_nodeBMC_systemRom.json: -------------------------------------------------------------------------------- 1 | { 2 | "inventoryHardwareFilter": { 3 | "manufacturer": "hpe" 4 | }, 5 | "stateComponentFilter": { 6 | "deviceTypes": [ 7 | "NodeBMC" 8 | ] 9 | }, 10 | "targetFilter": { 11 | "targets": [ 12 | "System ROM" 13 | ] 14 | }, 15 | "command": { 16 | "version": "latest", 17 | "tag": "default", 18 | "overrideDryrun": false, 19 | "restoreNotPossibleOverride": true, 20 | "timeLimit": 1000, 21 | "description": "Upgrade of HPE node System Rom" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/operations/node_management/Add_Remove_Replace_NCNs/sls_utils/schemas/hold/sls_hardware.schema.json.hold: -------------------------------------------------------------------------------- 1 | { 2 | "title": "SLS Hardware Schema - TODO", 3 | "type": "object", 4 | "properties": { 5 | "Hardware": { 6 | "description": "Hardware Data Subsection", 7 | "type": "object" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /scripts/operations/pyscripts/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | data.json 3 | -------------------------------------------------------------------------------- /troubleshooting/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /troubleshooting/known_issues/helm_chart_deploy_timeouts.md: -------------------------------------------------------------------------------- 1 | # Helm Chart Deploy Timeouts 2 | 3 | There are times when installing CSM Services (either during fresh install or upgrade) when some helm charts may take longer than five minutes (default) to deploy. 4 | Several charts known to take longer than five minutes have been modified to allow more time, but this page can be used to manually increase this timeout if needed. 5 | 6 | ## Edit the manifest used by Loftsman 7 | 8 | Locate the chart which is taking too long to deploy in the manifest (typically `platform.yaml` or `sysmgmt.yaml`), and add the `timeout` field at the same level as `name` in the manifest: 9 | 10 | ```text 11 | - name: cray-precache-images 12 | source: csm-algol60 13 | version: 0.5.2 14 | namespace: nexus 15 | timeout: 20m0s <------------- 16 | ``` 17 | 18 | ## Continue with the CSM Services install 19 | 20 | After having changed this setting in the manifest, re-running the install (or upgrade script) should successfully deploy the chart(s). 21 | -------------------------------------------------------------------------------- /troubleshooting/known_issues/kubectl_logs_no_space_left_on_device.md: -------------------------------------------------------------------------------- 1 | # Known issue: `kubectl logs -f` returns no space left on device 2 | 3 | On some systems, running `kubectl logs -n -f` returns `no space left on device`. 4 | This can be caused by a lower limit for the `sysctl` setting `fs.inotify.max_user_watches` (defaults to `65536`) in some kernel releases. 5 | This can be fixed by increasing this setting. Note that later versions of the kernel increase this setting by default. 6 | 7 | ## Fix 8 | 9 | Run the following command from a master node. Be sure to change the `-w ncn-w[001-0..]` argument to reflect all of the worker nodes for the system: 10 | 11 | ```bash 12 | pdsh -w ncn-w[001-0..] 'sysctl -w fs.inotify.max_user_watches=524288' 13 | ``` 14 | 15 | Once the `sysctl` command is complete, the `kubectl logs` command should again follow the log for that pod. 16 | -------------------------------------------------------------------------------- /troubleshooting/known_issues/mellanox_lacp_individual.md: -------------------------------------------------------------------------------- 1 | # Mellanox `lacp-individual` Limitations 2 | 3 | ## Description 4 | 5 | In some failover/maintenance scenarios, administrators may want to shut down one port of the bond on an NCN. 6 | Because of the way Mellanox handles `lacp-individual` mode, the ports need to be shut down from the switch instead of the NCN. 7 | 8 | ## Fix 9 | 10 | Shut down the port on the switch instead of the NCN. 11 | -------------------------------------------------------------------------------- /update_product_stream/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /upgrade/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /upgrade/resource_material/README.md: -------------------------------------------------------------------------------- 1 | # Resource Materials 2 | 3 | Files in this directory (and its sub-directories) are provided as reference material in 4 | support of the automated scripts which are intended to execute as much as possible during 5 | the upgrade process. 6 | 7 | ## Topics 8 | 9 | * [Worker Reference](k8s/worker-reference.md) 10 | * [Cephadm Reference](storage/cephadm-reference.md) 11 | -------------------------------------------------------------------------------- /upgrade/resource_material/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /upgrade/resource_material/k8s/worker-reference.md: -------------------------------------------------------------------------------- 1 | # Worker-Specific Manual Steps 2 | 3 | 1. Confirm the CFS `configuration_status` for **all** worker nodes before shutting down this worker node. 4 | 5 | (`ncn#`) If the following command reports that the state is `pending`, then the administrator should tail the logs of the CFS pod running on that node 6 | in order to watch the job finish before rebooting this node. If the state is `failed` for this node, then this indicates that the failed CFS job state 7 | preceded this worker rebuild, and that it can be addressed independent of rebuilding this worker. 8 | 9 | This example uses `ncn-w002`. 10 | 11 | ```bash 12 | NODE=ncn-w002 13 | XNAME=$(ssh "${NODE}" cat /etc/cray/xname) 14 | cray cfs v3 components describe "${XNAME}" --format json | jq .configuration_status 15 | ``` 16 | 17 | Example output: 18 | 19 | ```json 20 | "configured" 21 | ``` 22 | -------------------------------------------------------------------------------- /upgrade/scripts/sls/sls_utils/README.md: -------------------------------------------------------------------------------- 1 | # `sls_utils` Library 2 | 3 | This is a reusable Python library for safely interacting with SLS network data (in JSON format). 4 | 5 | The library has been tested against Python version 3.6 and up. 6 | -------------------------------------------------------------------------------- /upgrade/scripts/sls/sls_utils/index.md: -------------------------------------------------------------------------------- 1 | ./README.md -------------------------------------------------------------------------------- /upgrade/scripts/sls/sls_utils/schemas/hold/sls_hardware.schema.json.hold: -------------------------------------------------------------------------------- 1 | { 2 | "title": "SLS Hardware Schema - TODO", 3 | "type": "object", 4 | "properties": { 5 | "Hardware": { 6 | "description": "Hardware Data Subsection", 7 | "type": "object" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /upgrade/scripts/sls/sls_utils/schemas/sls_reservations_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema", 3 | "$id": "https://github.com/Cray-HPE/hms-sls/schemas/sls_reservations_schema.json", 4 | "title": "SLS Reservations Schema", 5 | "IPReservations": { 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "Name": { 11 | "type": "string" 12 | }, 13 | "IPAddress": { 14 | "type": "string", 15 | "format": "ipv4" 16 | }, 17 | "Aliases": { 18 | "type": "array", 19 | "items": { 20 | "type": "string" 21 | } 22 | }, 23 | "Comment": { 24 | "type": "string" 25 | } 26 | }, 27 | "additionalProperties": false, 28 | "required": [ 29 | "Name", 30 | "IPAddress" 31 | ] 32 | } 33 | } 34 | } --------------------------------------------------------------------------------