├── llm_agent ├── __init__.py ├── log_config │ ├── __init__.py │ ├── logger_settings.json │ └── logger_setup.py ├── webex_chat │ ├── __init__.py │ ├── chat_api_client.py │ ├── bot.py │ └── ai_command.py ├── pyats_connector │ ├── __init__.py │ ├── api │ │ ├── __init__ .py │ │ ├── interface_operations.py │ │ ├── interface_config.py │ │ ├── device_health_state.py │ │ ├── routing.py │ │ ├── isis.py │ │ └── interface_state.py │ ├── inventory.py │ ├── connection_methods.py │ └── connection_handler.py ├── config │ ├── global_settings.json │ ├── pyats_testbed.yaml │ └── global_settings.py ├── utils │ └── text_utils.py ├── langchain_tools │ ├── lc_inventory.py │ ├── lc_interface_operations.py │ ├── lc_interface_config.py │ ├── lc_isis.py │ ├── lc_tools_list.py │ ├── lc_routing.py │ ├── lc_device_health_state.py │ └── lc_interface_state.py ├── fastAPI │ └── models.py ├── llm │ ├── prompts.py │ └── agent.py └── app.py ├── .dockerignore ├── test_llm_agent ├── __init__.py ├── test_settings.json ├── test_pyats_inventory.py ├── load_test_settings.py ├── test_pyats_device_health_state.py ├── test_pyats_interface_config.py ├── test_pyats_isis.py ├── test_pyats_routing.py ├── test_pyats_interface_operations.py └── test_pyats_interface_state.py ├── telegraf ├── requirements.txt ├── config │ ├── cat8000v-0_settings.json │ ├── cat8000v-1_settings.json │ ├── cat8000v-2_settings.json │ ├── grpc.conf │ ├── telegraf.conf │ └── netconf.conf └── dockerfile ├── img ├── cml.png ├── grafana1.png ├── grafana2.png ├── llm_flow.png ├── components.png ├── webex_bot1.png ├── webex_bot2.png ├── webex_bot3.png └── webex_bot4.png ├── llm_agent.requirements.txt ├── .vscode ├── settings.json ├── tasks.json └── launch.json ├── grafana ├── config │ ├── policies.yaml │ ├── dashboard.yaml │ ├── datasources.yaml │ ├── grafana.ini │ ├── contact_points.yaml │ ├── message_template.yaml │ ├── alerts.yaml │ └── dashboard.json └── dockerfile ├── .env.local ├── Makefile ├── llm_agent.Dockerfile ├── influxdb └── dockerfile ├── LICENSE ├── docker-compose.yml ├── .gitignore ├── README.md └── cml └── topology.yaml /llm_agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ -------------------------------------------------------------------------------- /test_llm_agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llm_agent/log_config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llm_agent/webex_chat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /telegraf/requirements.txt: -------------------------------------------------------------------------------- 1 | ncpeek -------------------------------------------------------------------------------- /llm_agent/pyats_connector/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/api/__init__ .py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/cml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/cml.png -------------------------------------------------------------------------------- /img/grafana1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/grafana1.png -------------------------------------------------------------------------------- /img/grafana2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/grafana2.png -------------------------------------------------------------------------------- /img/llm_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/llm_flow.png -------------------------------------------------------------------------------- /img/components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/components.png -------------------------------------------------------------------------------- /img/webex_bot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/webex_bot1.png -------------------------------------------------------------------------------- /img/webex_bot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/webex_bot2.png -------------------------------------------------------------------------------- /img/webex_bot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/webex_bot3.png -------------------------------------------------------------------------------- /img/webex_bot4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jillesca/AI-Network-Troubleshooting-PoC/HEAD/img/webex_bot4.png -------------------------------------------------------------------------------- /test_llm_agent/test_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "device": "cat8000v-0", 3 | "interface_name": "GigabitEthernet2" 4 | } -------------------------------------------------------------------------------- /llm_agent.requirements.txt: -------------------------------------------------------------------------------- 1 | langchain 2 | langchain-openai 3 | openai 4 | pyats[full] 5 | webex_bot 6 | uvicorn 7 | fastapi 8 | colorlog -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": ["ai-agent-for-network-issues"], 3 | "python.testing.unittestEnabled": false, 4 | "python.testing.pytestEnabled": true 5 | } 6 | -------------------------------------------------------------------------------- /grafana/config/policies.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | policies: 3 | - orgId: 1 4 | receiver: webhooks 5 | 6 | group_wait: 0s 7 | group_interval: 10m 8 | repeat_interval: 15m 9 | -------------------------------------------------------------------------------- /llm_agent/config/global_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "host_url": "0.0.0.0", 3 | "llm_http_port": 5001, 4 | "testbed_file": "llm_agent/config/pyats_testbed.yaml", 5 | "logging_config_file": "llm_agent/log_config/logger_settings.json" 6 | } -------------------------------------------------------------------------------- /grafana/config/dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'influxdb' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | editable: true 10 | options: 11 | path: /etc/grafana/provisioning/dashboards -------------------------------------------------------------------------------- /telegraf/config/cat8000v-0_settings.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "host": "10.10.20.215", 4 | "port": 830, 5 | "username": "cisco", 6 | "password": "cisco", 7 | "hostkey_verify": "False", 8 | "device_params": { 9 | "name": "iosxe" 10 | }, 11 | "hostname": "cat8000v-0" 12 | } 13 | ] -------------------------------------------------------------------------------- /telegraf/config/cat8000v-1_settings.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "host": "10.10.20.216", 4 | "port": 830, 5 | "username": "cisco", 6 | "password": "cisco", 7 | "hostkey_verify": "False", 8 | "device_params": { 9 | "name": "iosxe" 10 | }, 11 | "hostname": "cat8000v-1" 12 | } 13 | ] -------------------------------------------------------------------------------- /telegraf/config/cat8000v-2_settings.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "host": "10.10.20.217", 4 | "port": 830, 5 | "username": "cisco", 6 | "password": "cisco", 7 | "hostkey_verify": "False", 8 | "device_params": { 9 | "name": "iosxe" 10 | }, 11 | "hostname": "cat8000v-2" 12 | } 13 | ] -------------------------------------------------------------------------------- /test_llm_agent/test_pyats_inventory.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used to test the get_devices_from_inventory function 3 | """ 4 | 5 | from pprint import pprint as pp 6 | 7 | 8 | from llm_agent.pyats_connector.inventory import ( 9 | get_devices_from_inventory, 10 | ) 11 | 12 | if __name__ == "__main__": 13 | pp(get_devices_from_inventory()) 14 | -------------------------------------------------------------------------------- /grafana/config/datasources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: InfluxDB_v2_Flux 5 | type: influxdb 6 | access: proxy 7 | url: http://influxdb:8086 8 | jsonData: 9 | version: Flux 10 | organization: telemtry-org 11 | defaultBucket: mdt_grpc 12 | tlsSkipVerify: true 13 | secureJsonData: 14 | token: $GRAFANA_TOKEN 15 | -------------------------------------------------------------------------------- /telegraf/config/grpc.conf: -------------------------------------------------------------------------------- 1 | [[inputs.cisco_telemetry_mdt]] 2 | transport = "grpc" 3 | service_address = ":57500" 4 | 5 | [[processors.enum]] 6 | [[processors.enum.mapping]] 7 | field = "state" 8 | namepass = "Cisco-IOS-XE-isis-oper:isis-oper-data/isis-instance/isis-neighbor" 9 | dest = "status_code" 10 | default = 0 11 | 12 | [processors.enum.mapping.value_mappings] 13 | isis-adj-up = 1 14 | -------------------------------------------------------------------------------- /grafana/config/grafana.ini: -------------------------------------------------------------------------------- 1 | [paths] 2 | logs = /tmp/grafana 3 | 4 | [security] 5 | admin_user = admin 6 | admin_password = admin 7 | 8 | # Default UI theme ("dark" or "light" or "system") 9 | default_theme = light 10 | 11 | # [log] 12 | # mode = console file 13 | # level = debug 14 | 15 | # [log.console] 16 | # level = debug 17 | # format = console 18 | 19 | # # For "file" mode only 20 | [log.file] 21 | level = debug 22 | format = text 23 | -------------------------------------------------------------------------------- /telegraf/config/telegraf.conf: -------------------------------------------------------------------------------- 1 | # Global Agent Configuration 2 | [agent] 3 | flush_interval = "10s" 4 | interval = "10s" 5 | debug = true 6 | quiet = false 7 | logtarget = "file" 8 | logfile = "/tmp/telegraf-grpc.log" 9 | logfile_rotation_max_size = "0" 10 | hostname = "telegraf-collector" 11 | 12 | [[outputs.influxdb_v2]] 13 | bucket = "$TELEGRAF_BUCKET" 14 | organization = "$TELEGRAF_ORG" 15 | urls = ["http://influxdb:8086"] 16 | token = "$TELEGRAF_TOKEN" -------------------------------------------------------------------------------- /llm_agent/pyats_connector/inventory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to retrieve the devices from the inventory using the pyATS framework. 3 | """ 4 | 5 | from pyats.topology import loader 6 | 7 | from llm_agent.config.global_settings import TESTBED_FILE 8 | 9 | 10 | def get_devices_from_inventory() -> list: 11 | """ 12 | Retrieves a list of devices from the inventory. 13 | 14 | Returns: 15 | list: A list of device names. 16 | """ 17 | topology = loader.load(TESTBED_FILE) 18 | return list(topology.devices.names) 19 | -------------------------------------------------------------------------------- /test_llm_agent/load_test_settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Load test settings from JSON file. 3 | """ 4 | 5 | import json 6 | 7 | TEST_SETTINGS_FILE = "test_llm_agent/test_settings.json" 8 | 9 | 10 | def load_json_file(json_file: str) -> dict: 11 | """ 12 | Load JSON file. 13 | """ 14 | with open(json_file, encoding="utf-8") as f: 15 | return json.load(f) 16 | 17 | 18 | test_config = load_json_file(json_file=TEST_SETTINGS_FILE) 19 | DEVICE_NAME = test_config.get("device") 20 | INTERFACE_NAME = test_config.get("interface_name") 21 | -------------------------------------------------------------------------------- /llm_agent/utils/text_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def output_to_json(data: str) -> str: 5 | """ 6 | Convert python to JSON string. 7 | """ 8 | return json.dumps(data) 9 | 10 | 11 | def remove_white_spaces(string: str) -> str: 12 | """ 13 | Removes extra white spaces from a string. 14 | """ 15 | return " ".join(string.split()) 16 | 17 | 18 | def load_json_file(json_file: str) -> dict: 19 | """ 20 | Load JSON file. 21 | """ 22 | with open(json_file, encoding="utf-8") as f: 23 | return json.load(f) 24 | -------------------------------------------------------------------------------- /test_llm_agent/test_pyats_device_health_state.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used to test the device health state operations. 3 | """ 4 | 5 | from pprint import pprint as pp 6 | from test_llm_agent.load_test_settings import DEVICE_NAME 7 | 8 | from llm_agent.pyats_connector.api.device_health_state import ( 9 | health_cpu, 10 | health_memory, 11 | health_logging, 12 | ) 13 | 14 | 15 | if __name__ == "__main__": 16 | pp(health_cpu(device_name=DEVICE_NAME)) 17 | pp(health_memory(device_name=DEVICE_NAME)) 18 | pp(health_logging(device_name=DEVICE_NAME)) 19 | -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_inventory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper functions to work with langchain tools and openAI 3 | """ 4 | 5 | from langchain.agents import tool 6 | 7 | from llm_agent.utils.text_utils import output_to_json 8 | from llm_agent.pyats_connector.inventory import ( 9 | get_devices_from_inventory, 10 | ) 11 | 12 | 13 | @tool 14 | def get_devices_list_available() -> list: 15 | """ 16 | Retrieves the list of valid available devices. 17 | 18 | Returns: 19 | A list representation of the available devices. 20 | """ 21 | return output_to_json(get_devices_from_inventory()) 22 | -------------------------------------------------------------------------------- /test_llm_agent/test_pyats_interface_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used to test the interface configuration 3 | """ 4 | 5 | from pprint import pprint as pp 6 | from test_llm_agent.load_test_settings import DEVICE_NAME, INTERFACE_NAME 7 | 8 | from llm_agent.pyats_connector.api.interface_config import ( 9 | interface_running_config, 10 | interfaces_description, 11 | ) 12 | 13 | 14 | if __name__ == "__main__": 15 | pp( 16 | interface_running_config( 17 | device_name=DEVICE_NAME, interface_name=INTERFACE_NAME 18 | ) 19 | ) 20 | 21 | pp(interfaces_description(device_name=DEVICE_NAME)) 22 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "docker-build", 6 | "label": "docker-build", 7 | "platform": "python", 8 | "dockerBuild": { 9 | "tag": "aiagentfornetworkissues:latest", 10 | "dockerfile": "${workspaceFolder}/Dockerfile", 11 | "context": "${workspaceFolder}", 12 | "pull": true 13 | } 14 | }, 15 | { 16 | "type": "docker-run", 17 | "label": "docker-run: debug", 18 | "dependsOn": ["docker-build"], 19 | "python": { 20 | "file": "llm_agent/pyats_tools/pyats_connection.py" 21 | } 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /test_llm_agent/test_pyats_isis.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used to test the interface state 3 | """ 4 | 5 | from pprint import pprint as pp 6 | from test_llm_agent.load_test_settings import DEVICE_NAME 7 | 8 | from llm_agent.pyats_connector.api.isis import ( 9 | isis_neighbors, 10 | isis_interface_events, 11 | isis_interfaces, 12 | ) 13 | 14 | VRF = "default" 15 | INTERFACES_NAME = ["GigabitEthernet1", "GigabitEthernet2"] 16 | 17 | if __name__ == "__main__": 18 | pp(isis_neighbors(device_name=DEVICE_NAME)) 19 | 20 | pp(isis_interface_events(device_name=DEVICE_NAME)) 21 | 22 | pp(isis_interfaces(device_name=DEVICE_NAME, vrf_name=VRF)) 23 | -------------------------------------------------------------------------------- /test_llm_agent/test_pyats_routing.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used to test the interface state 3 | """ 4 | 5 | from pprint import pprint as pp 6 | from test_llm_agent.load_test_settings import DEVICE_NAME 7 | 8 | from llm_agent.pyats_connector.api.routing import ( 9 | vrfs_present, 10 | interface_interfaces_under_vrf, 11 | route_entries, 12 | ) 13 | 14 | VRF_DEFAULT = "default" 15 | 16 | if __name__ == "__main__": 17 | pp(vrfs_present(device_name=DEVICE_NAME)) 18 | 19 | pp( 20 | interface_interfaces_under_vrf( 21 | device_name=DEVICE_NAME, vrf_name=VRF_DEFAULT 22 | ) 23 | ) 24 | 25 | pp(route_entries(device_name=DEVICE_NAME)) 26 | -------------------------------------------------------------------------------- /.env.local: -------------------------------------------------------------------------------- 1 | TAG=local 2 | BUCKET=mdt_grpc 3 | ORG=telemtry-org 4 | TOKEN=oZSZ0CnbZcwKouMQ3JfR7n7K0vAuMNFaa0rymxpQ6jGQU8ZqHjiHb9Vu5OVf23CeLR7Tytuzg8doVQTo_n_3Vg== 5 | 6 | GRAFANA_VERSION=10.4.2 7 | GRAFANA_TAG=${TAG} 8 | GRAFANA_TOKEN=${TOKEN} 9 | 10 | INFLUXDB_VERSION=2.7.6 11 | INFLUXDB_TAG=${TAG} 12 | INFLUXDB_ORG=${ORG} 13 | INFLUXDB_MODE=setup 14 | INFLUXDB_BUCKET=${BUCKET} 15 | INFLUXDB_USERNAME=admin 16 | INFLUXDB_PASSWORD=admin123 17 | INFLUXDB_ADMIN_TOKEN=${TOKEN} 18 | 19 | TELEGRAF_VERSION=1.30.1 20 | TELEGRAF_TAG=${TAG} 21 | TELEGRAF_ORG=${ORG} 22 | TELEGRAF_TOKEN=${TOKEN} 23 | TELEGRAF_BUCKET=${BUCKET} 24 | 25 | LLM_HTTP_PORT=5001 26 | LLM_TAG="local" 27 | 28 | PYTHONPATH=$PYTHONPATH:. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include .env.local 2 | include .env 3 | export 4 | 5 | build-tig: 6 | $(MAKE) clean-tig 7 | docker compose up --build --detach telegraf influxdb grafana 8 | 9 | run-tig: 10 | $(MAKE) clean-tig 11 | docker compose up --detach telegraf influxdb grafana 12 | 13 | build-llm: 14 | $(MAKE) clean-llm 15 | docker compose up --build --detach llm_agent 16 | 17 | run-llm: 18 | $(MAKE) clean-llm 19 | docker compose up --detach llm_agent 20 | $(MAKE) follow 21 | 22 | follow: 23 | docker compose logs --follow llm_agent 24 | 25 | cli: 26 | docker compose exec llm_agent bash 27 | 28 | clean-tig: 29 | -docker compose down telegraf influxdb grafana 30 | -docker compose rm -f telegraf influxdb grafana 31 | 32 | clean-llm: 33 | -docker compose down llm_agent 34 | -docker compose rm -f llm_agent 35 | -------------------------------------------------------------------------------- /test_llm_agent/test_pyats_interface_operations.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used to test interface operations 3 | """ 4 | 5 | from pprint import pprint as pp 6 | from test_llm_agent.load_test_settings import DEVICE_NAME, INTERFACE_NAME 7 | 8 | from llm_agent.pyats_connector.api.interface_operations import ( 9 | shut_interface, 10 | unshut_interface, 11 | ) 12 | from llm_agent.pyats_connector.api.interface_state import interfaces_status 13 | 14 | 15 | if __name__ == "__main__": 16 | pp(interfaces_status(device_name=DEVICE_NAME)) 17 | 18 | pp(shut_interface(device_name=DEVICE_NAME, interface_name=INTERFACE_NAME)) 19 | 20 | pp(interfaces_status(device_name=DEVICE_NAME)) 21 | 22 | pp( 23 | unshut_interface( 24 | device_name=DEVICE_NAME, interface_name=INTERFACE_NAME 25 | ) 26 | ) 27 | pp(interfaces_status(device_name=DEVICE_NAME)) 28 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "justMyCode": true 14 | }, 15 | { 16 | "name": "Docker: Python - General", 17 | "type": "docker", 18 | "request": "launch", 19 | "preLaunchTask": "docker-run: debug", 20 | "python": { 21 | "pathMappings": [ 22 | { 23 | "localRoot": "${workspaceFolder}", 24 | "remoteRoot": "/app" 25 | } 26 | ], 27 | "projectType": "general" 28 | } 29 | } 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /llm_agent.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12.2 as base 2 | 3 | COPY llm_agent.requirements.txt /tmp/requirements.txt 4 | 5 | RUN pip install --upgrade pip && \ 6 | pip install -r /tmp/requirements.txt 7 | 8 | FROM base AS app 9 | 10 | EXPOSE 5001 11 | 12 | ARG OPENAI_API_KEY \ 13 | GRAFANA_WEB_HOOK \ 14 | LANGCHAIN_API_KEY \ 15 | LANGCHAIN_PROJECT \ 16 | WEBEX_ROOM_ID \ 17 | WEBEX_TEAMS_ACCESS_TOKEN \ 18 | WEBEX_APPROVED_USERS_MAIL 19 | 20 | ENV OPENAI_API_KEY=${OPENAI_API_KEY} \ 21 | LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY} \ 22 | LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT} \ 23 | GRAFANA_WEB_HOOK=${GRAFANA_WEB_HOOK} \ 24 | WEBEX_ROOM_ID=${WEBEX_ROOM_ID} \ 25 | PYTHONPATH=:/llm_agent \ 26 | WEBEX_TEAMS_ACCESS_TOKEN=${WEBEX_TEAMS_ACCESS_TOKEN} \ 27 | WEBEX_APPROVED_USERS_MAIL=${WEBEX_APPROVED_USERS_MAIL} 28 | 29 | COPY llm_agent /llm_agent 30 | 31 | ENTRYPOINT [ "python", "-u", "/llm_agent/app.py" ] 32 | # ENTRYPOINT ["sh", "-c", "while :; do sleep 1; done"] -------------------------------------------------------------------------------- /llm_agent/fastAPI/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains Pydantic models for handling webhook messages in a FastAPI application. 3 | """ 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class Message(BaseModel): 9 | """ 10 | This class represents a message model. 11 | """ 12 | 13 | message: str 14 | 15 | 16 | class AlertAnnotations(BaseModel): 17 | """ 18 | This class represents the annotations of an alert. 19 | """ 20 | 21 | summary: str 22 | 23 | 24 | class Alert(BaseModel): 25 | """ 26 | This class represents an alert. 27 | """ 28 | 29 | status: str 30 | annotations: AlertAnnotations 31 | startsAt: str 32 | endsAt: str 33 | dashboardURL: str 34 | panelURL: str 35 | 36 | 37 | class GrafanaWebhookMessage(BaseModel): 38 | """ 39 | This class represents a Grafana webhook message. 40 | """ 41 | 42 | alerts: list[Alert] 43 | commonAnnotations: dict 44 | title: str 45 | status: str 46 | state: str 47 | message: str 48 | -------------------------------------------------------------------------------- /influxdb/dockerfile: -------------------------------------------------------------------------------- 1 | ARG INFLUXDB_VERSION 2 | FROM influxdb:${INFLUXDB_VERSION} 3 | 4 | EXPOSE 8086 5 | 6 | ARG DOCKER_INFLUXDB_INIT_ORG \ 7 | DOCKER_INFLUXDB_INIT_MODE \ 8 | DOCKER_INFLUXDB_INIT_BUCKET \ 9 | DOCKER_INFLUXDB_INIT_USERNAME \ 10 | DOCKER_INFLUXDB_INIT_PASSWORD \ 11 | DOCKER_INFLUXDB_INIT_ADMIN_TOKEN 12 | 13 | ENV DOCKER_INFLUXDB_INIT_ORG=${DOCKER_INFLUXDB_INIT_ORG} \ 14 | DOCKER_INFLUXDB_INIT_MODE=${DOCKER_INFLUXDB_INIT_MODE} \ 15 | DOCKER_INFLUXDB_INIT_BUCKET=${DOCKER_INFLUXDB_INIT_BUCKET} \ 16 | DOCKER_INFLUXDB_INIT_USERNAME=${DOCKER_INFLUXDB_INIT_USERNAME} \ 17 | DOCKER_INFLUXDB_INIT_PASSWORD=${DOCKER_INFLUXDB_INIT_PASSWORD} \ 18 | DOCKER_INFLUXDB_INIT_ADMIN_TOKEN=${DOCKER_INFLUXDB_INIT_ADMIN_TOKEN} 19 | 20 | # Run from root dir 21 | # docker build --file influxdb.dockerfile --tag influxdb:test . 22 | # docker run -itd -p 8086:8086 --volume influxdb:/var/lib/influxdb2 --name influxdb --add-host host.docker.internal:host-gateway influxdb:test 23 | # docker exec -it influxdb bash -------------------------------------------------------------------------------- /grafana/config/contact_points.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | contactPoints: 3 | - orgId: 1 4 | name: webhooks 5 | receivers: 6 | - uid: b29cd573-af3b-432b-b465-52a8479add88 7 | type: webhook 8 | settings: 9 | httpMethod: POST 10 | message: Issue detected with network telemetry 11 | title: Grafana Alert 12 | url: $GRAFANA_WEB_HOOK 13 | disableResolveMessage: false 14 | - uid: bd8139a5-44e1-4db1-b114-8fc9e1e25c23 15 | type: webhook 16 | settings: 17 | httpMethod: POST 18 | message: Network issue detected 19 | title: Automatic Network Alert 20 | url: http://llm_agent:$LLM_HTTP_PORT/alert 21 | disableResolveMessage: false 22 | - uid: f727420e-064c-49c1-aff8-cc8831f85a88 23 | type: webex 24 | settings: 25 | bot_token: $WEBEX_TEAMS_ACCESS_TOKEN 26 | room_id: $WEBEX_ROOM_ID 27 | message: '{{ template "webex.message" . }}' 28 | disableResolveMessage: false 29 | -------------------------------------------------------------------------------- /grafana/config/message_template.yaml: -------------------------------------------------------------------------------- 1 | # Based on 2 | # https://softwaremill.com/mastering-alerts-in-grafana-kubernetes-provisioning-and-slack-notifications/ 3 | templates: 4 | - orgID: 1 5 | name: webex.message 6 | template: | 7 | {{ define "webex.print_alert" -}} 8 | 9 | {{ if .Annotations }} 10 | 📝 **Summary:** {{ .Annotations.summary}} 11 | {{ if .Annotations.description }} 12 | **Description:** {{ .Annotations.description }} 13 | {{ end }} 14 | {{ end }} 15 | 16 | Link(s): 17 | {{ if .DashboardURL }} 18 | - 📊 [Grafana Dashboard]({{ .DashboardURL }}) 19 | {{ end }} 20 | {{ if .PanelURL }} 21 | - 📈 [Panel Dashboard]({{ .PanelURL }}) 22 | {{ end }} 23 | {{ if .SilenceURL }} 24 | - 🔕 [Silence this alert]({{ .SilenceURL }}) 25 | {{- end }} 26 | {{- end }} 27 | 28 | {{ define "webex.message" -}} 29 | {{ if .Alerts.Firing }} 30 | ### 🚨 Network Alert 🚨 31 | 🔥 Firing alert(s): {{ len .Alerts.Firing }} 32 | 33 | {{- range $i, $alert := .Alerts.Firing }} 34 | {{ template "webex.print_alert" $alert }} 35 | 36 | --- 37 | 38 | {{- end }} 39 | {{- end }} 40 | {{- end }} 41 | -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_interface_operations.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper functions to work with langchain tools and openAI 3 | """ 4 | 5 | from langchain.agents import tool 6 | 7 | from llm_agent.utils.text_utils import output_to_json 8 | from llm_agent.pyats_connector.api.interface_operations import ( 9 | shut_interface, 10 | unshut_interface, 11 | ) 12 | 13 | 14 | @tool 15 | def action_shut_interface(device_name: str, interface_name: str) -> None: 16 | """ 17 | Shut down an interface on a device. 18 | 19 | Args: 20 | device_name (str): Must come from the function get_devices_list_available 21 | interface_name (str): The name of the interface to shut down. 22 | 23 | Returns: 24 | None 25 | """ 26 | return output_to_json(shut_interface(device_name, interface_name)) 27 | 28 | 29 | @tool 30 | def action_unshut_interface(device_name: str, interface_name: str) -> None: 31 | """ 32 | Shut down an interface on a device. 33 | 34 | Args: 35 | device_name (str): Must come from the function get_devices_list_available 36 | interface_name (str): The name of the interface to be shut down. 37 | 38 | Returns: 39 | None 40 | """ 41 | return output_to_json(unshut_interface(device_name, interface_name)) 42 | -------------------------------------------------------------------------------- /test_llm_agent/test_pyats_interface_state.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used to test the interface state 3 | """ 4 | 5 | from pprint import pprint as pp 6 | from test_llm_agent.load_test_settings import DEVICE_NAME, INTERFACE_NAME 7 | 8 | from llm_agent.pyats_connector.api.interface_state import ( 9 | interfaces_status, 10 | single_interface_status, 11 | interfaces_information, 12 | interface_admin_status, 13 | verify_state_up, 14 | interface_events, 15 | ) 16 | 17 | INTERFACES_NAME = ["GigabitEthernet1", "GigabitEthernet2"] 18 | 19 | if __name__ == "__main__": 20 | pp(interfaces_status(device_name=DEVICE_NAME)) 21 | 22 | pp( 23 | single_interface_status( 24 | device_name=DEVICE_NAME, interface_name=INTERFACE_NAME 25 | ) 26 | ) 27 | 28 | pp( 29 | interfaces_information( 30 | device_name=DEVICE_NAME, interfaces_name=INTERFACES_NAME 31 | ) 32 | ) 33 | 34 | pp( 35 | interface_admin_status( 36 | device_name=DEVICE_NAME, interface_name=INTERFACE_NAME 37 | ) 38 | ) 39 | 40 | pp(verify_state_up(device_name=DEVICE_NAME, interface_name=INTERFACE_NAME)) 41 | 42 | pp( 43 | interface_events( 44 | device_name=DEVICE_NAME, interface_name=INTERFACE_NAME 45 | ) 46 | ) 47 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/api/interface_operations.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to perform operations on a device interface using the pyATS framework. 3 | """ 4 | 5 | from llm_agent.pyats_connector.connection_methods import api_connect 6 | 7 | 8 | def shut_interface(device_name: str, interface_name: str) -> dict: 9 | """ 10 | Shuts down the specified interface on the given device. 11 | 12 | Args: 13 | device_name (str): The name of the device. 14 | interface_name (str): The name of the interface to shut down. 15 | 16 | Returns: 17 | dict: A dictionary containing the result of the operation. 18 | """ 19 | return api_connect( 20 | device_name=device_name, 21 | method="shut_interface", 22 | args=interface_name, 23 | ) 24 | 25 | 26 | def unshut_interface(device_name: str, interface_name: str) -> dict: 27 | """ 28 | Unshuts the specified interface on the given device. 29 | 30 | Args: 31 | device_name (str): The name of the device. 32 | interface_name (str): The name of the interface to unshut. 33 | 34 | Returns: 35 | dict: A dictionary containing the result of the operation. 36 | 37 | """ 38 | return api_connect( 39 | device_name=device_name, 40 | method="unshut_interface", 41 | args=interface_name, 42 | ) 43 | -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_interface_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper functions to work with langchain tools and openAI 3 | """ 4 | 5 | from langchain.agents import tool 6 | 7 | from llm_agent.utils.text_utils import output_to_json 8 | from llm_agent.pyats_connector.api.interface_config import ( 9 | interface_running_config, 10 | interfaces_description, 11 | ) 12 | 13 | 14 | @tool 15 | def get_interface_running_config( 16 | device_name: str, interface_name: str 17 | ) -> dict: 18 | """ 19 | Get the running config of a single interface on a device. 20 | 21 | Args: 22 | device_name (str): Must come from the function get_devices_list_available 23 | interface_name (str): The name of the interface. 24 | 25 | Returns: 26 | dict: The running configuration of the specified interface. 27 | """ 28 | 29 | return output_to_json( 30 | interface_running_config(device_name, interface_name) 31 | ) 32 | 33 | 34 | @tool 35 | def get_interfaces_description(device_name: str) -> dict: 36 | """ 37 | Get the description of the interfaces per device. 38 | 39 | Args: 40 | device_name (str): Must come from the function get_devices_list_available 41 | interface_name (str): The name of the interface. 42 | 43 | Returns: 44 | dict: A dictionary containing the status of the interface. 45 | """ 46 | return output_to_json(interfaces_description(device_name)) 47 | -------------------------------------------------------------------------------- /llm_agent/log_config/logger_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "disable_existing_loggers": false, 4 | "formatters": { 5 | "detailed": { 6 | "format": "%(asctime)s.%(msecs)03d: %(name)s-%(levelname)s: %(message)s", 7 | "datefmt": "%Y-%m-%d %H:%M:%S" 8 | }, 9 | "colored": { 10 | "()": "colorlog.ColoredFormatter", 11 | "format": "%(green)s%(asctime)s.%(msecs)03d:%(reset)s %(bold)s%(log_color)s[%(levelname)s]%(reset)s %(blue)s[%(name)s]:%(lineno)d%(reset)s %(message)s", 12 | "datefmt": "%Y-%m-%d %H:%M:%S", 13 | "log_colors": { 14 | "DEBUG": "cyan", 15 | "INFO": "black", 16 | "WARNING": "yellow", 17 | "ERROR": "red", 18 | "CRITICAL": "red" 19 | } 20 | } 21 | }, 22 | "handlers": { 23 | "stdout": { 24 | "class": "colorlog.StreamHandler", 25 | "level": "INFO", 26 | "formatter": "colored" 27 | }, 28 | "stderr": { 29 | "class": "colorlog.StreamHandler", 30 | "level": "INFO", 31 | "formatter": "colored" 32 | }, 33 | "file": { 34 | "class": "logging.handlers.RotatingFileHandler", 35 | "level": "ERROR", 36 | "formatter": "detailed", 37 | "filename": "llm_agent/logs/llm_agent.log", 38 | "maxBytes": 10000000, 39 | "backupCount": 3 40 | } 41 | }, 42 | "loggers": { 43 | "llm_agent": { 44 | "level": "INFO", 45 | "handlers": [ 46 | "stdout", 47 | "stderr", 48 | "file" 49 | ], 50 | "propagate": true 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /llm_agent/pyats_connector/api/interface_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to retrieve the configuration of a device interface using the pyATS framework. 3 | """ 4 | 5 | from llm_agent.pyats_connector.connection_methods import api_connect, parse_connect 6 | 7 | 8 | def interface_running_config(device_name: str, interface_name: str) -> dict: 9 | """ 10 | Get the running config of a single interface on a device. 11 | 12 | Args: 13 | device_name (str): The name of the device. Must come from the function get_devices_list_available. 14 | interface_name (str): The name of the interface. 15 | 16 | Returns: 17 | dict: The running configuration of the specified interface. 18 | """ 19 | return api_connect( 20 | device_name=device_name, 21 | method="get_interface_running_config", 22 | args=interface_name, 23 | ) 24 | 25 | 26 | def interfaces_description(device_name: str) -> dict: 27 | """ 28 | Get the description of the interfaces per device. 29 | 30 | Args: 31 | device_name (str): The name of the device. Must come from the function get_devices_list_available. 32 | 33 | Returns: 34 | dict: A dictionary containing the description of the interfaces. If there is an error getting the description, 35 | the value "ERROR_GETTING_INTERFACES_DESCRIPTION" will be returned. 36 | """ 37 | result = parse_connect( 38 | device_name=device_name, string_to_parse="show interfaces description" 39 | ) 40 | 41 | return result.get("interfaces", "ERROR_GETTING_INTERFACES_DESCRIPTION") 42 | -------------------------------------------------------------------------------- /grafana/dockerfile: -------------------------------------------------------------------------------- 1 | ARG GRAFANA_VERSION 2 | FROM grafana/grafana-oss:${GRAFANA_VERSION} 3 | 4 | EXPOSE 3000 5 | 6 | ARG GRAFANA_TOKEN \ 7 | LLM_HTTP_PORT \ 8 | WEBEX_ROOM_ID \ 9 | GRAFANA_WEB_HOOK \ 10 | WEBEX_TEAMS_ACCESS_TOKEN 11 | 12 | ENV GRAFANA_TOKEN=${GRAFANA_TOKEN} \ 13 | LLM_HTTP_PORT=${LLM_HTTP_PORT} \ 14 | WEBEX_ROOM_ID=${WEBEX_ROOM_ID} \ 15 | GRAFANA_WEB_HOOK=${GRAFANA_WEB_HOOK} \ 16 | WEBEX_TEAMS_ACCESS_TOKEN=${WEBEX_TEAMS_ACCESS_TOKEN} 17 | 18 | COPY config/ /tmp/config/ 19 | 20 | USER root 21 | 22 | RUN mv /tmp/config/grafana.ini /etc/grafana/grafana.ini \ 23 | && mv /tmp/config/alerts.yaml /etc/grafana/provisioning/alerting/alerts.yaml \ 24 | && mv /tmp/config/policies.yaml /etc/grafana/provisioning/alerting/policies.yaml \ 25 | && mv /tmp/config/dashboard.yaml /etc/grafana/provisioning/dashboards/dashboard.yaml \ 26 | && mv /tmp/config/dashboard.json /etc/grafana/provisioning/dashboards/dashboard.json \ 27 | && mv /tmp/config/datasources.yaml /etc/grafana/provisioning/datasources/datasources.yaml \ 28 | && mv /tmp/config/contact_points.yaml /etc/grafana/provisioning/alerting/contact_points.yaml \ 29 | && mv /tmp/config/message_template.yaml /etc/grafana/provisioning/alerting/message_template.yaml \ 30 | && echo 'alias ll="ls -al"' >> ~/.bashrc 31 | 32 | USER grafana 33 | 34 | # Run from root directory 35 | # docker build --file grafana.dockerfile --tag grafana:test . 36 | # docker run -itd -p 3000:3000 --name grafana --add-host host.docker.internal:host-gateway grafana:test 37 | # docker exec -it grafana bash 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, Jesus Illescas 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /llm_agent/webex_chat/chat_api_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains the function to send a message to the chat API. 3 | """ 4 | 5 | import time 6 | import requests 7 | 8 | from llm_agent.config.global_settings import ( 9 | HOST_URL, 10 | LLM_HTTP_PORT, 11 | ) 12 | from llm_agent.log_config.logger_setup import logger 13 | 14 | FASTAPI_REST_PATH = "chat" 15 | MAX_NUMBER_OF_TRIES_TO_CONNECT = 11 16 | 17 | 18 | def send_message_to_chat_api(message: str) -> str: 19 | """ 20 | Sends a message to the chat API and returns the response. 21 | 22 | Args: 23 | message (str): The message to send. 24 | 25 | Returns: 26 | str: The response from the API, or an error message if the request failed. 27 | """ 28 | url = f"http://{HOST_URL}:{LLM_HTTP_PORT}/{FASTAPI_REST_PATH}" 29 | data = {"message": message} 30 | for retries in range(MAX_NUMBER_OF_TRIES_TO_CONNECT): 31 | logger.debug("Attempting to connect: %s", retries) 32 | try: 33 | response = requests.post(url, json=data, timeout=120) 34 | if response.status_code == 200: 35 | return response.json() 36 | else: 37 | logger.error( 38 | "send_message_to_chat_api: Error connecting to fastAPI: http status code: %s, http response: %s, url: %s", 39 | response.status_code, 40 | response.text, 41 | url, 42 | ) 43 | except requests.exceptions.RequestException as e: 44 | logger.error("send_message_to_chat_api: Error from fastAPI: %s", e) 45 | time.sleep(0.5) 46 | 47 | return ( 48 | "Ouch, I got an error from fastAPI server, can't connect to the LLM." 49 | ) 50 | -------------------------------------------------------------------------------- /telegraf/dockerfile: -------------------------------------------------------------------------------- 1 | ARG TELEGRAF_VERSION 2 | FROM telegraf:${TELEGRAF_VERSION} as base 3 | 4 | ENV PYTHONUNBUFFERED=1 \ 5 | PIP_NO_CACHE_DIR=true \ 6 | PIP_ROOT_USER_ACTION=ignore \ 7 | PIP_NO_WARN_SCRIPT_LOCATION=0 \ 8 | PIP_DISABLE_ROOT_WARNING=ignore 9 | 10 | RUN apt update \ 11 | && apt install -y python3 python3-venv \ 12 | && python3 -m venv /opt/venv 13 | 14 | ENV PATH="/opt/venv/bin:$PATH" 15 | 16 | COPY requirements.txt /tmp/requirements.txt 17 | 18 | RUN python3 -m pip install --upgrade pip \ 19 | && pip install -r /tmp/requirements.txt \ 20 | && rm /tmp/requirements.txt \ 21 | && echo 'alias ll="ls -al"' >> ~/.bashrc 22 | 23 | FROM base as telegraf 24 | 25 | EXPOSE 57500 26 | 27 | ARG TELEGRAF_ORG \ 28 | TELEGRAF_TOKEN \ 29 | TELEGRAF_BUCKET 30 | 31 | ENV TELEGRAF_ORG=${TELEGRAF_ORG} \ 32 | TELEGRAF_TOKEN=${TELEGRAF_TOKEN} \ 33 | TELEGRAF_BUCKET=${TELEGRAF_BUCKET} 34 | 35 | COPY config/ /tmp/config/ 36 | 37 | RUN mv /tmp/config/telegraf.conf /etc/telegraf/telegraf.conf \ 38 | && mv /tmp/config/netconf.conf /etc/telegraf/telegraf.d/netconf.conf \ 39 | && mv /tmp/config/grpc.conf /etc/telegraf/telegraf.d/grpc.conf \ 40 | && mv /tmp/config/cat8000v-0_settings.json /home/cat8000v-0_settings.json \ 41 | && mv /tmp/config/cat8000v-1_settings.json /home/cat8000v-1_settings.json \ 42 | && mv /tmp/config/cat8000v-2_settings.json /home/cat8000v-2_settings.json \ 43 | && rm -rf /tmp/config 44 | 45 | # Run from root dir 46 | # docker build --target telegraf --file telegraf.dockerfile --tag telegraf:test . 47 | # docker run -itd -p 57500:57500 --name telegraf --add-host host.docker.internal:host-gateway telegraf:test 48 | # docker exec -it telegraf bash 49 | # tail -F /tmp/telegraf-grpc.log -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_isis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper functions to work with langchain tools and openAI 3 | """ 4 | 5 | from langchain.agents import tool 6 | 7 | from llm_agent.utils.text_utils import output_to_json 8 | from llm_agent.pyats_connector.api.isis import ( 9 | isis_neighbors, 10 | isis_interface_events, 11 | isis_interfaces, 12 | ) 13 | 14 | 15 | @tool 16 | def verify_active_isis_neighbors(device_name: str) -> dict: 17 | """ 18 | Retrieves the ISIS neighbors for a given device. Neighbors down are not included. 19 | 20 | Args: 21 | device_name (str): Must come from the function get_devices_list_available 22 | 23 | Returns: 24 | dict: A dictionary containing the ISIS neighbors information. 25 | """ 26 | return output_to_json(isis_neighbors(device_name)) 27 | 28 | 29 | @tool 30 | def get_isis_interface_events(device_name: str) -> dict: 31 | """ 32 | Retrieves ISIS interface events for a given device. 33 | 34 | Args: 35 | device_name (str): Must come from the function get_devices_list_available 36 | 37 | Returns: 38 | dict: A dictionary containing the ISIS interface events. 39 | """ 40 | return output_to_json(isis_interface_events(device_name)) 41 | 42 | 43 | @tool 44 | def get_isis_interface_information( 45 | device_name: str, vrf_name: str = "default" 46 | ) -> list: 47 | """ 48 | Retrieves the ISIS interfaces for a given device and VRF. 49 | 50 | Args: 51 | device_name (str): Must come from the function get_devices_list_available 52 | vrf_name (str, optional): The name of the VRF. Defaults to "default". 53 | 54 | Returns: 55 | list: A list of ISIS interfaces. 56 | 57 | """ 58 | return output_to_json(isis_interfaces(device_name, vrf_name)) 59 | -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_tools_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import all the tools from the langchain_tools folder 3 | """ 4 | 5 | from llm_agent.langchain_tools.lc_inventory import get_devices_list_available 6 | from llm_agent.langchain_tools.lc_device_health_state import ( 7 | get_health_memory, 8 | get_health_cpu, 9 | get_health_logging, 10 | ) 11 | from llm_agent.langchain_tools.lc_interface_config import ( 12 | get_interface_running_config, 13 | get_interfaces_description, 14 | ) 15 | from llm_agent.langchain_tools.lc_interface_operations import ( 16 | action_shut_interface, 17 | action_unshut_interface, 18 | ) 19 | from llm_agent.langchain_tools.lc_interface_state import ( 20 | get_interfaces_status, 21 | get_single_interface_status, 22 | get_interface_information, 23 | get_interface_admin_status, 24 | verify_interface_state_up, 25 | get_interface_events, 26 | ) 27 | from llm_agent.langchain_tools.lc_isis import ( 28 | verify_active_isis_neighbors, 29 | get_isis_interface_events, 30 | get_isis_interface_information, 31 | ) 32 | from llm_agent.langchain_tools.lc_routing import ( 33 | get_vrf_present, 34 | get_interface_interfaces_under_vrf, 35 | get_routing_routes, 36 | ) 37 | 38 | tools = [ 39 | get_devices_list_available, 40 | get_health_memory, 41 | get_health_cpu, 42 | get_health_logging, 43 | get_interface_running_config, 44 | action_shut_interface, 45 | action_unshut_interface, 46 | get_interfaces_status, 47 | get_single_interface_status, 48 | get_interface_information, 49 | get_interfaces_description, 50 | get_interface_admin_status, 51 | verify_interface_state_up, 52 | get_interface_events, 53 | verify_active_isis_neighbors, 54 | get_isis_interface_events, 55 | get_isis_interface_information, 56 | get_vrf_present, 57 | get_interface_interfaces_under_vrf, 58 | get_routing_routes, 59 | ] 60 | -------------------------------------------------------------------------------- /llm_agent/config/pyats_testbed.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | cat8000v-0: 3 | type: router 4 | os: iosxe 5 | credentials: 6 | default: 7 | username: cisco 8 | password: cisco 9 | connections: 10 | cli: 11 | protocol: ssh 12 | ip: 10.10.20.215 13 | netconf: 14 | class: yang.connector.Netconf 15 | port: 830 16 | ip: 10.10.20.215 17 | 18 | cat8000v-1: 19 | type: router 20 | alias: uut 21 | os: iosxe 22 | credentials: 23 | default: 24 | username: cisco 25 | password: cisco 26 | connections: 27 | cli: 28 | protocol: ssh 29 | ip: 10.10.20.216 30 | netconf: 31 | class: yang.connector.Netconf 32 | port: 830 33 | ip: 10.10.20.216 34 | 35 | cat8000v-2: 36 | type: router 37 | os: iosxe 38 | credentials: 39 | default: 40 | username: cisco 41 | password: cisco 42 | connections: 43 | cli: 44 | protocol: ssh 45 | ip: 10.10.20.217 46 | netconf: 47 | class: yang.connector.Netconf 48 | port: 830 49 | ip: 10.10.20.217 50 | 51 | # sandbox-iosxe-latest-1.cisco.com: 52 | # type: router 53 | # os: iosxe 54 | # credentials: 55 | # default: 56 | # username: admin 57 | # password: C1sco12345 58 | # connections: 59 | # cli: 60 | # protocol: ssh 61 | # ip: sandbox-iosxe-latest-1.cisco.com 62 | # netconf: 63 | # class: yang.connector.Netconf 64 | # port: 830 65 | # ip: sandbox-iosxe-latest-1.cisco.com 66 | 67 | # sandbox-iosxr-1.cisco.com: 68 | # type: "router" 69 | # os: "iosxr" 70 | # platform: "iosxrv" 71 | # alias: "xr" 72 | # credentials: 73 | # default: 74 | # username: admin 75 | # password: C1sco12345 76 | # connections: 77 | # cli: 78 | # protocol: ssh 79 | # ip: "sandbox-iosxr-1.cisco.com" 80 | -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_routing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper functions to work with langchain tools and openAI 3 | """ 4 | 5 | from langchain.agents import tool 6 | 7 | from llm_agent.utils.text_utils import output_to_json 8 | from llm_agent.pyats_connector.api.routing import ( 9 | vrfs_present, 10 | interface_interfaces_under_vrf, 11 | route_entries, 12 | ) 13 | 14 | 15 | @tool 16 | def get_vrf_present(device_name: str) -> list: 17 | """ 18 | Get all vrfs from device 19 | 20 | Args: 21 | device_name (str): Must come from the function get_devices_list_available 22 | 23 | Returns: 24 | list: List of vrfs present on the device 25 | """ 26 | return output_to_json(vrfs_present(device_name)) 27 | 28 | 29 | @tool 30 | def get_interface_interfaces_under_vrf( 31 | device_name: str, vrf_name: str = None 32 | ) -> list: 33 | """ 34 | Get interfaces configured under specific Vrf 35 | 36 | Args: 37 | device_name (str): Must come from the function get_devices_list_available 38 | vrf_name (str, optional): Name of the VRF. Defaults to None. 39 | 40 | Returns: 41 | list: List of interfaces configured under the specified VRF 42 | """ 43 | return output_to_json( 44 | interface_interfaces_under_vrf(device_name, vrf_name) 45 | ) 46 | 47 | 48 | @tool 49 | def get_routing_routes( 50 | device_name: str, vrf_name: str = None, address_family: str = "ipv4" 51 | ) -> dict: 52 | """ 53 | TODO: Need to reduce the amount of inrormation returned 54 | Execute 'show ip route vrf ' and retrieve the routes 55 | 56 | Args: 57 | device_name (str): Must come from the function get_devices_list_available 58 | vrf_name (str, optional): The name of the VRF. Defaults to None. 59 | address_family (str, optional): The address family name. Defaults to "ipv4". 60 | 61 | Returns: 62 | dict: A dictionary containing the received routes. 63 | """ 64 | return output_to_json(route_entries(device_name, vrf_name, address_family)) 65 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | llm_agent: 3 | build: 4 | context: ./ 5 | dockerfile: llm_agent.Dockerfile 6 | args: 7 | OPENAI_API_KEY: ${OPENAI_API_KEY} 8 | GRAFANA_WEB_HOOK: ${GRAFANA_WEB_HOOK} 9 | LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} 10 | LANGCHAIN_PROJECT: ${LANGCHAIN_PROJECT} 11 | WEBEX_ROOM_ID: ${WEBEX_ROOM_ID} 12 | WEBEX_TEAMS_ACCESS_TOKEN: ${WEBEX_TEAMS_ACCESS_TOKEN} 13 | WEBEX_APPROVED_USERS_MAIL: ${WEBEX_APPROVED_USERS_MAIL} 14 | container_name: llm_agent 15 | ports: 16 | - "5001:5001" 17 | 18 | grafana: 19 | build: 20 | context: ./grafana 21 | args: 22 | LLM_HTTP_PORT: ${LLM_HTTP_PORT} 23 | GRAFANA_TOKEN: ${GRAFANA_TOKEN} 24 | WEBEX_ROOM_ID: ${WEBEX_ROOM_ID} 25 | GRAFANA_VERSION: ${GRAFANA_VERSION} 26 | GRAFANA_WEB_HOOK: ${GRAFANA_WEB_HOOK} 27 | WEBEX_TEAMS_ACCESS_TOKEN: ${WEBEX_TEAMS_ACCESS_TOKEN} 28 | container_name: grafana 29 | ports: 30 | - "3000:3000" 31 | 32 | influxdb: 33 | build: 34 | context: ./influxdb 35 | args: 36 | INFLUXDB_VERSION: ${INFLUXDB_VERSION} 37 | DOCKER_INFLUXDB_INIT_MODE: ${INFLUXDB_MODE} 38 | DOCKER_INFLUXDB_INIT_USERNAME: ${INFLUXDB_USERNAME} 39 | DOCKER_INFLUXDB_INIT_PASSWORD: ${INFLUXDB_PASSWORD} 40 | DOCKER_INFLUXDB_INIT_ORG: ${INFLUXDB_ORG} 41 | DOCKER_INFLUXDB_INIT_BUCKET: ${INFLUXDB_BUCKET} 42 | DOCKER_INFLUXDB_INIT_ADMIN_TOKEN: ${INFLUXDB_ADMIN_TOKEN} 43 | container_name: influxdb 44 | ports: 45 | - "8086:8086" 46 | volumes: 47 | - influxdb:/var/lib/influxdb2 48 | 49 | telegraf: 50 | build: 51 | context: ./telegraf 52 | args: 53 | TELEGRAF_VERSION: ${TELEGRAF_VERSION} 54 | TELEGRAF_ORG: ${TELEGRAF_ORG} 55 | TELEGRAF_TOKEN: ${TELEGRAF_TOKEN} 56 | TELEGRAF_BUCKET: ${TELEGRAF_BUCKET} 57 | container_name: telegraf 58 | ports: 59 | - "57500:57500" 60 | 61 | volumes: 62 | influxdb: 63 | -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_device_health_state.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper functions to work with langchain tools and openAI 3 | """ 4 | 5 | from langchain.agents import tool 6 | 7 | from llm_agent.utils.text_utils import output_to_json 8 | from llm_agent.pyats_connector.api.device_health_state import ( 9 | health_cpu, 10 | health_memory, 11 | health_logging, 12 | ) 13 | 14 | 15 | @tool 16 | def get_health_memory(device_name: str) -> dict: 17 | """ 18 | Retrieves the memory health information for a given device. 19 | 20 | Args: 21 | device_name (str): Must come from the function get_devices_list_available 22 | 23 | Returns: 24 | dict: A dictionary containing the memory health information. Empty is good. 25 | """ 26 | return output_to_json(health_memory(device_name)) 27 | 28 | 29 | @tool 30 | def get_health_cpu(device_name: str) -> dict: 31 | """ 32 | Retrieves the CPU health information for a given device. 33 | 34 | Args: 35 | device_name (str): Must come from the function get_devices_list_available 36 | 37 | Returns: 38 | dict: A dictionary containing the CPU health information. Empty is good. 39 | """ 40 | return output_to_json(health_cpu(device_name)) 41 | 42 | 43 | @tool 44 | def get_health_logging( 45 | device_name: str, 46 | keywords: list[str] = None, 47 | ) -> dict: 48 | """ 49 | Retrieves health logging information from a device. 50 | 51 | Args: 52 | device_name (str): Must come from the function get_devices_list_available 53 | keywords (list[str], optional): List of keywords to filter the health logging information. 54 | Defaults to traceback, error, down and adjchange. 55 | 56 | Returns: 57 | dict: The health logging information in JSON format. 58 | """ 59 | if keywords is None: 60 | keywords = [ 61 | "traceback", 62 | "Traceback", 63 | "TRACEBACK", 64 | "rror", 65 | "own", 66 | "ADJCHANGE", 67 | ] 68 | return output_to_json(health_logging(device_name, keywords)) 69 | -------------------------------------------------------------------------------- /llm_agent/config/global_settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is responsible for loading global settings for the application. 3 | 4 | It imports necessary modules and functions, 5 | defines a function for retrieving environment variables, 6 | and sets several global variables. 7 | 8 | Variables: 9 | GLOBAL_SETTINGS_FILE (str): The path to the global settings JSON file. 10 | global_config (dict): The loaded global settings from the JSON file. 11 | TESTBED_FILE (str): The path to the testbed file, retrieved from the global settings. 12 | HOST_URL (str): The host URL for the application, retrieved from the global settings. 13 | PORT (int): The port for the application, retrieved from the global settings. 14 | WEBEX_TEAMS_ACCESS_TOKEN (str): The access token for Webex Teams, retrieved from an environment variable. 15 | WEBEX_APPROVED_USERS_MAIL (str): The approved users mail for Webex, retrieved from an environment variable. 16 | OPENAI_API_KEY (str): The API key for OpenAI, retrieved from an environment variable. 17 | """ 18 | 19 | import os 20 | from llm_agent.utils.text_utils import load_json_file 21 | 22 | GLOBAL_SETTINGS_FILE = "llm_agent/config/global_settings.json" 23 | 24 | 25 | def get_environment_variable(envvar: str) -> str: 26 | """ 27 | Retrieve the value of an environment variable. 28 | If the environment variable is not set, raise an exception. 29 | """ 30 | value = os.getenv(envvar) 31 | if value is None: 32 | raise EnvironmentError( 33 | f"The required environment variable {envvar} is not set." 34 | ) 35 | return value 36 | 37 | 38 | global_config = load_json_file(json_file=GLOBAL_SETTINGS_FILE) 39 | 40 | HOST_URL = global_config.get("host_url") 41 | LLM_HTTP_PORT = global_config.get("llm_http_port") 42 | TESTBED_FILE = global_config.get("testbed_file") 43 | LOGGING_CONFIG_FILE = global_config.get("logging_config_file") 44 | 45 | WEBEX_TEAMS_ACCESS_TOKEN = get_environment_variable("WEBEX_TEAMS_ACCESS_TOKEN") 46 | 47 | WEBEX_APPROVED_USERS_MAIL = get_environment_variable( 48 | "WEBEX_APPROVED_USERS_MAIL" 49 | ) 50 | 51 | OPENAI_API_KEY = get_environment_variable("OPENAI_API_KEY") 52 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/api/device_health_state.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to retrieve the health state of a device using the pyATS framework. 3 | """ 4 | 5 | from llm_agent.pyats_connector.connection_methods import api_connect 6 | 7 | 8 | def health_memory(device_name: str) -> dict: 9 | """ 10 | Retrieves the memory health information for a given device. 11 | 12 | Args: 13 | device_name (str): Must come from the function get_devices_list_available 14 | 15 | Returns: 16 | dict: A dictionary containing the memory health information. Empty is good. 17 | """ 18 | result = api_connect(device_name=device_name, method="health_memory") 19 | if not result["health_data"]: 20 | return {"message": "No memory health issues detected on the device"} 21 | return result 22 | 23 | 24 | def health_cpu(device_name: str) -> dict: 25 | """ 26 | Retrieves the CPU health information for a given device. 27 | 28 | Args: 29 | device_name (str): Must come from the function get_devices_list_available 30 | 31 | Returns: 32 | dict: A dictionary containing the CPU health information. Empty is good. 33 | """ 34 | result = api_connect(device_name=device_name, method="health_cpu") 35 | if not result["health_data"]: 36 | return {"message": "No CPU health issues detected on the device"} 37 | return result 38 | 39 | 40 | def health_logging(device_name: str, keywords: list[str] = None) -> dict: 41 | """ 42 | Retrieves health logging information from a device. 43 | 44 | Args: 45 | device_name (str): Must come from the function get_devices_list_available 46 | keywords (list[str], optional): List of keywords to filter the health logging information. 47 | Defaults to traceback, error, down and adjchange. 48 | 49 | Returns: 50 | dict: The health logging information in JSON format. 51 | """ 52 | if keywords is None: 53 | keywords = [ 54 | "traceback", 55 | "Traceback", 56 | "TRACEBACK", 57 | "rror", 58 | "own", 59 | "ADJCHANGE", 60 | ] 61 | 62 | result = api_connect( 63 | device_name=device_name, 64 | method="health_logging", 65 | args={"keywords": keywords}, 66 | ) 67 | 68 | if not result["health_data"]: 69 | return {"message": "No issues detected on the logs of the device"} 70 | return result 71 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/api/routing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to retrieve routing information from a device using the pyATS framework. 3 | """ 4 | 5 | from llm_agent.pyats_connector.connection_methods import api_connect 6 | 7 | 8 | def vrfs_present(device_name: str) -> list: 9 | """ 10 | Get all vrfs from device 11 | 12 | Args: 13 | device_name (str): Must come from the function get_devices_list_available 14 | 15 | Returns: 16 | list: List of vrfs present on the device. If no vrfs are found, returns ["NO_VRFs_FOUND"]. 17 | """ 18 | result = api_connect( 19 | device_name=device_name, 20 | method="get_vrf_vrfs", 21 | ) 22 | if not result: 23 | return [f"NO_VRFs_FOUND on device {device_name}"] 24 | return list(result.keys()) 25 | 26 | 27 | def interface_interfaces_under_vrf(device_name: str, vrf_name: str) -> list: 28 | """ 29 | Get interfaces configured under specific Vrf 30 | 31 | Args: 32 | device_name (str): Must come from the function get_devices_list_available 33 | vrf_name (str, optional): Name of the VRF. Defaults to None. 34 | 35 | Returns: 36 | list: List of interfaces configured under the specified VRF 37 | """ 38 | result = api_connect( 39 | device_name=device_name, 40 | method="get_interface_interfaces_under_vrf", 41 | args=vrf_name, 42 | ) 43 | if not result: 44 | return [ 45 | f"NO_INTERFACES_FOUND_FOR_VRF: {vrf_name} on DEVICE {device_name}" 46 | ] 47 | return result 48 | 49 | 50 | def route_entries( 51 | device_name: str, vrf_name: str = None, address_family: str = "ipv4" 52 | ) -> dict: 53 | """ 54 | Execute 'show ip route vrf ' and retrieve the routes. 55 | 56 | Args: 57 | device_name (str): The name of the device. Must come from the function get_devices_list_available. 58 | vrf_name (str, optional): The name of the VRF. Defaults to None. 59 | address_family (str, optional): The address family name. Defaults to "ipv4". 60 | 61 | Returns: 62 | dict: A dictionary containing the received routes. 63 | """ 64 | result = api_connect( 65 | device_name=device_name, 66 | method="get_routing_routes", 67 | args={"vrf": vrf_name, "address_family": address_family}, 68 | ) 69 | if not result: 70 | return {"error": f"NO_ROUTES_FOUND_FOR_VRF_{vrf_name}"} 71 | return result 72 | -------------------------------------------------------------------------------- /llm_agent/log_config/logger_setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example taken from 3 | https://github.com/mCodingLLC/VideosSampleCode/blob/master/videos/135_modern_logging/main.py 4 | 5 | See youtube video associated with this code: 6 | https://youtu.be/9L77QExPmI0 7 | """ 8 | 9 | import os 10 | import pathlib 11 | import logging.config 12 | from llm_agent.utils.text_utils import load_json_file 13 | from llm_agent.config.global_settings import ( 14 | LOGGING_CONFIG_FILE, 15 | ) 16 | 17 | 18 | def setup_logging(): 19 | """ 20 | Set up logging configuration. 21 | 22 | This function loads the logging configuration from a JSON file, 23 | ensures the log file exists, and configures the logging module. 24 | 25 | Returns: 26 | logger: The logger object. 27 | 28 | """ 29 | config_file = pathlib.Path(LOGGING_CONFIG_FILE) 30 | config = load_json_file(config_file) 31 | 32 | log_file = _get_log_file_name(config) 33 | _ensure_log_file_exists(log_file) 34 | 35 | logging.config.dictConfig(config) 36 | 37 | return logging.getLogger() 38 | 39 | 40 | def _ensure_log_file_exists(filename): 41 | """ 42 | Ensures a log file exists. If it doesn't, creates it. 43 | 44 | Args: 45 | filename (str): The path and name of the log file. 46 | 47 | Returns: 48 | None 49 | """ 50 | if not os.path.exists(filename): 51 | os.makedirs(os.path.dirname(filename), exist_ok=True) 52 | with open(filename, "w", encoding="utf-8"): 53 | pass 54 | 55 | 56 | def _get_log_file_name(config): 57 | """ 58 | Extracts the log file name from the configuration. 59 | 60 | Args: 61 | config (dict): The logging configuration. 62 | 63 | Returns: 64 | str: The log file name. 65 | """ 66 | return config["handlers"]["file"]["filename"] 67 | 68 | 69 | logger = setup_logging() 70 | logger.debug("Setting up logging using %s", LOGGING_CONFIG_FILE) 71 | 72 | 73 | def main(): 74 | """ 75 | It sets up the logger and demonstrates logging at different levels. 76 | """ 77 | logger.debug("debug message", extra={"x": "hello"}) 78 | logger.info("info message") 79 | logger.warning("warning message") 80 | logger.error("error message") 81 | logger.critical("critical message") 82 | try: 83 | 1 / 0 84 | except ZeroDivisionError: 85 | logger.exception("exception message") 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /telegraf/config/netconf.conf: -------------------------------------------------------------------------------- 1 | [[inputs.exec]] 2 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-0_settings.json --xml-filter=Cisco-IOS-XE-interfaces-oper.xml"] 3 | json_name_key = "field" 4 | tag_keys = ["name", "device", "ip"] 5 | timeout = "60s" 6 | data_format = "json" 7 | 8 | [[inputs.exec]] 9 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-1_settings.json --xml-filter=Cisco-IOS-XE-interfaces-oper.xml"] 10 | json_name_key = "field" 11 | tag_keys = ["name", "device", "ip"] 12 | timeout = "60s" 13 | data_format = "json" 14 | 15 | [[inputs.exec]] 16 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-2_settings.json --xml-filter=Cisco-IOS-XE-interfaces-oper.xml"] 17 | json_name_key = "field" 18 | tag_keys = ["name", "device", "ip"] 19 | timeout = "60s" 20 | data_format = "json" 21 | 22 | [[inputs.exec]] 23 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-0_settings.json --xml-filter=Cisco-IOS-XE-memory-oper.xml"] 24 | json_name_key = "field" 25 | tag_keys = ["name", "device", "ip"] 26 | timeout = "60s" 27 | data_format = "json" 28 | 29 | [[inputs.exec]] 30 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-1_settings.json --xml-filter=Cisco-IOS-XE-memory-oper.xml"] 31 | json_name_key = "field" 32 | tag_keys = ["name", "device", "ip"] 33 | timeout = "60s" 34 | data_format = "json" 35 | 36 | [[inputs.exec]] 37 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-2_settings.json --xml-filter=Cisco-IOS-XE-memory-oper.xml"] 38 | json_name_key = "field" 39 | tag_keys = ["name", "device", "ip"] 40 | timeout = "60s" 41 | data_format = "json" 42 | 43 | [[inputs.exec]] 44 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-0_settings.json --xpath-filter=http://cisco.com/ns/yang/Cisco-IOS-XE-isis-oper:/isis-oper-data/isis-instance"] 45 | json_name_key = "field" 46 | tag_keys = ["name", "device", "ip"] 47 | timeout = "60s" 48 | data_format = "json" 49 | 50 | [[inputs.exec]] 51 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-1_settings.json --xpath-filter=http://cisco.com/ns/yang/Cisco-IOS-XE-isis-oper:/isis-oper-data/isis-instance"] 52 | json_name_key = "field" 53 | tag_keys = ["name", "device", "ip"] 54 | timeout = "60s" 55 | data_format = "json" 56 | 57 | [[inputs.exec]] 58 | commands = ["python3 -m ncpeek --device-settings=/home/cat8000v-2_settings.json --xpath-filter=http://cisco.com/ns/yang/Cisco-IOS-XE-isis-oper:/isis-oper-data/isis-instance"] 59 | json_name_key = "field" 60 | tag_keys = ["name", "device", "ip"] 61 | timeout = "60s" 62 | data_format = "json" 63 | -------------------------------------------------------------------------------- /llm_agent/llm/prompts.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains the prompts used by the AI agent. 3 | """ 4 | 5 | SYSTEM_PROMPT = """ 6 | As a helpful AI assistant, your role is to troubleshoot issues on network devices on behalf of users. Follow these guidelines: 7 | 1. Always use the 'get_devices_list_available' function to obtain the list of available device names. Do not use this function to connect to the devices. Once you have the correct device name, use it as an input to the appropriate network functions to retrieve information from the device. 8 | 2. Anticipate user errors in device names. Use the list obtained from the 'get_devices_list_available' function to find the closest match to the user's input. 9 | 3. You may receive multiple alerts at a time. Analyze each alert and consider if they could be related to a previous one. Consider previous steps taken, current issues, and how all events relate to each other when deciding on the next steps. 10 | 4. Verify if the alert is active or a false positive. 11 | 5. After obtaining the correct device name, the first step in troubleshooting is to review the logs of the device. Correlate the log messages with the alert to see if they are related. 12 | 6. Always review the CPU and memory usage of the device to see if the device is under stress. 13 | 7. Then review the status of the interfaces to discard any layer 1 or layer 2 issues. 14 | 8. Always use available network functions to gather device information and provide insights. 15 | 9. Always execute network functions directly, don't just print the commands that would be used. 16 | 10. Always use real devices and information. If a device doesn't exist, inform the user and stop the process. 17 | 11. Always use the interface description of the devices to find out to which device is directly connected to. 18 | 12. Limit connection attempts to a device to two. If unsuccessful, stop the process. 19 | 13. Always provide a summary of the alert received, so users know why are you contacting them. 20 | 14. Always provide a summary with actionable steps for the user to resolve the issue, then apply the steps you suggest directly. You are free to grab any information you need as long as you don't do configuration changes. Don't wait for the user to tell you to start. 21 | 15. If you need to perform a configuration change, always ask the user permision before doing so. Provide a summary of the changes you are going to make and ask for confirmation, why you need to do it, what configuration you are going to change and what are the expected results. 22 | 16. Present results in markdown format. 23 | 17. Must use as much as possible many emojis that are relevant to your messages to make them more human-friendly. 24 | """ 25 | -------------------------------------------------------------------------------- /llm_agent/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module defines the FastAPI application and the endpoints for the chat API. 3 | 4 | It imports necessary modules and functions, sets up logging, 5 | loads global settings, and initializes the chat agent and the Webex bot manager. 6 | 7 | It defines a Pydantic model for the message data and two POST endpoints: 8 | one for sending messages to the chat agent and another for processing alerts. 9 | """ 10 | 11 | import threading 12 | import uvicorn 13 | from fastapi import FastAPI 14 | 15 | from llm_agent.config.global_settings import ( 16 | HOST_URL, 17 | LLM_HTTP_PORT, 18 | ) 19 | 20 | from llm_agent.llm.agent import LLMChatAgent 21 | from llm_agent.webex_chat.bot import WebexBotManager 22 | from llm_agent.log_config.logger_setup import logger 23 | from llm_agent.fastAPI.models import Message, GrafanaWebhookMessage 24 | 25 | 26 | logger.info("Starting the FastAPI application.") 27 | 28 | app = FastAPI() 29 | chat_agent = LLMChatAgent() 30 | webex_bot_manager = WebexBotManager() 31 | 32 | 33 | @app.post("/chat") 34 | def chat_to_llm(message: Message) -> str: 35 | """ 36 | Process the given message and return a response from the chat agent. 37 | 38 | Args: 39 | message (Message): The message to be processed. 40 | 41 | Returns: 42 | str: The response from the chat agent. 43 | """ 44 | logger.info("WEBEX_MESSAGE_SENT_TO_LLM: %s", message.message) 45 | return chat_agent.chat(message.message) 46 | 47 | 48 | @app.post("/alert") 49 | async def alert(message: GrafanaWebhookMessage) -> dict: 50 | """ 51 | This function receives a webhook alert and starts processing it. 52 | Grafana sends a webhook empty as a keepalive. 53 | 'Firing' is used to identify a real alert. 54 | """ 55 | logger.info("WEBHOOK_MESSAGE_RECEIVED: %s", message) 56 | if message.status.lower() == "firing": 57 | process_alert(message) 58 | return {"status": "success"} 59 | 60 | 61 | def process_alert(message: Message) -> None: 62 | """ 63 | This function sends the alert to the LLM 64 | and sends the result of the initial analysis to the Webex room. 65 | """ 66 | notification = chat_agent.notification(message) 67 | notify(notification) 68 | 69 | 70 | def notify(notification: str) -> None: 71 | """ 72 | Sends a notification message. 73 | """ 74 | logger.info("SENDING_NOTIFICATON_TO_WEBEX: %s", notification) 75 | webex_bot_manager.send_notification(notification) 76 | 77 | 78 | if __name__ == "__main__": 79 | threading.Thread( 80 | target=uvicorn.run, 81 | args=("app:app",), 82 | kwargs={"host": HOST_URL, "port": LLM_HTTP_PORT}, 83 | ).start() 84 | webex_bot_manager.run() 85 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/connection_methods.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides functions for connecting to devices using PyATSConnection API. 3 | """ 4 | 5 | from typing import Optional, Union, Dict 6 | 7 | from llm_agent.log_config.logger_setup import logger 8 | from llm_agent.pyats_connector.connection_handler import PyATSConnection 9 | 10 | 11 | def api_connect( 12 | device_name: str, 13 | method: str, 14 | args: Optional[Union[str, Dict[str, str]]] = None, 15 | ) -> any: 16 | """ 17 | Connects to a device using PyATSConnection API and executes a specified method. 18 | 19 | Args: 20 | device_name (str): The name of the device to connect to. 21 | method (str): The name of the method to execute on the device. 22 | args (dict, optional): A dictionary of arguments to pass to the method. Defaults to None. 23 | 24 | Returns: 25 | dict: A dictionary containing the result of the method execution or an exception if an error occurs. 26 | """ 27 | logger.info("EXECUTING METHOD: %s, DEVICE: %s", method, device_name) 28 | logger.debug("ARGS: %s", args) 29 | with PyATSConnection(device_name=device_name) as device_connection: 30 | method_to_call = getattr(device_connection.api, method) 31 | try: 32 | if isinstance(args, dict): 33 | return method_to_call(**args) 34 | elif isinstance(args, str): 35 | return method_to_call(args) 36 | elif isinstance(args, list): 37 | return method_to_call(args) 38 | else: 39 | return method_to_call() 40 | except Exception as e: 41 | logger.error("api_connect error executing method: %s", e) 42 | return {method.__name__: e} 43 | 44 | 45 | def parse_connect(device_name: str, string_to_parse: str) -> any: 46 | """ 47 | Connects to a device using PyATSConnection parse and executes a specified method. 48 | 49 | Args: 50 | device_name (str): The name of the device to connect to. 51 | method (str): The name of the method to execute on the device. 52 | args (dict, optional): A dictionary of arguments to pass to the method. Defaults to None. 53 | 54 | Returns: 55 | dict: A dictionary containing the result of the method execution or an exception if an error occurs. 56 | """ 57 | logger.info("Parsing: %s, DEVICE: %s", string_to_parse, device_name) 58 | with PyATSConnection(device_name=device_name) as device_connection: 59 | method = getattr(device_connection, "parse") 60 | try: 61 | return method(string_to_parse) 62 | except Exception as e: 63 | logger.error("parse_connect error executing method: %s", e) 64 | return {method.__name__: e} 65 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/api/isis.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains the ISIS API functions. 3 | """ 4 | 5 | from llm_agent.pyats_connector.connection_methods import ( 6 | parse_connect, 7 | ) 8 | 9 | 10 | def isis_neighbors(device_name: str) -> dict: 11 | """ 12 | Retrieves the ISIS neighbors for a given device. Neighbors down are not included. 13 | 14 | Args: 15 | device_name (str): Must come from the function get_devices_list_available 16 | 17 | Returns: 18 | dict: A dictionary containing the ISIS neighbors information. 19 | """ 20 | try: 21 | return parse_connect( 22 | device_name=device_name, 23 | string_to_parse="show isis neighbors", 24 | ) 25 | except Exception: 26 | return {"error": f"NO_ISIS_NEIGHBORS_FOUND_ON: {device_name}"} 27 | 28 | 29 | def isis_interface_events(device_name: str) -> dict: 30 | """ 31 | Retrieves ISIS interface events for a given device. 32 | 33 | Args: 34 | device_name (str): Must come from the function get_devices_list_available 35 | 36 | Returns: 37 | dict: A dictionary containing the ISIS interface events. 38 | """ 39 | try: 40 | return parse_connect( 41 | device_name=device_name, 42 | string_to_parse="show isis lsp-log", 43 | ) 44 | except Exception: 45 | return {"error": f"NO_ISIS_CONFIGURED_ON: {device_name}"} 46 | 47 | 48 | def isis_interfaces(device_name: str, vrf_name: str = "default") -> list: 49 | """ 50 | Retrieves the ISIS interfaces for a given device and VRF. 51 | 52 | Args: 53 | device_name (str): Must come from the function get_devices_list_available 54 | vrf_name (str, optional): The name of the VRF. Defaults to "default". 55 | 56 | Returns: 57 | list: A list of ISIS interfaces. 58 | 59 | """ 60 | try: 61 | result = parse_connect( 62 | device_name=device_name, 63 | string_to_parse="show ip protocols", 64 | ) 65 | except Exception: 66 | return [ 67 | f"NO_ISIS_INTERFACES_FOUND VRF: {vrf_name}, DEVICE: {device_name}" 68 | ] 69 | 70 | intf_isis = _extract_isis_interfaces(data=result) 71 | return intf_isis.get( 72 | vrf_name, 73 | f"NO_ISIS_INTERFACES_FOUND VRF: {vrf_name}, DEVICE: {device_name}", 74 | ) 75 | 76 | 77 | def _extract_isis_interfaces(data: dict) -> dict: 78 | isis_data = data.get("protocols", {}).get("isis", {}).get("vrf", {}) 79 | result = {} 80 | for vrf, vrf_data in isis_data.items(): 81 | interfaces = ( 82 | vrf_data.get("address_family", {}) 83 | .get("ipv4", {}) 84 | .get("instance", {}) 85 | .get("default", {}) 86 | .get("configured_interfaces") 87 | ) 88 | if interfaces is not None: 89 | result[vrf] = interfaces 90 | return result 91 | -------------------------------------------------------------------------------- /llm_agent/webex_chat/bot.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides functionality for interacting with Webex Teams. 3 | 4 | The main class is `WebexBotManager`, which encapsulates the logic for creating a bot and sending notifications. 5 | 6 | This module is based on the idea from: https://github.com/fbradyirl/webex_bot 7 | """ 8 | 9 | import logging 10 | from webexteamssdk import WebexTeamsAPI 11 | from webex_bot.webex_bot import WebexBot 12 | from llm_agent.webex_chat.ai_command import AiCommand 13 | from llm_agent.config.global_settings import ( 14 | WEBEX_APPROVED_USERS_MAIL, 15 | WEBEX_TEAMS_ACCESS_TOKEN, 16 | ) 17 | 18 | webexteamssdk_logger = logging.getLogger("webexteamssdk") 19 | webexteamssdk_logger.setLevel(logging.CRITICAL) 20 | 21 | webex_bot_logger = logging.getLogger("webex_bot") 22 | webex_bot_logger.setLevel(logging.CRITICAL) 23 | 24 | 25 | def get_webex_room_id(webex_api: WebexTeamsAPI) -> str: 26 | """ 27 | Retrieve the ID of the first room that contains the specified username in its title. 28 | 29 | TODO: This is a hacky way, only works if there is only one room with the boot 30 | """ 31 | all_rooms = webex_api.rooms.list() 32 | room_id = [room.id for room in all_rooms] 33 | return room_id[0] 34 | 35 | 36 | class WebexBotManager: 37 | """ 38 | This class encapsulates the logic for creating a Webex bot and sending notifications. 39 | """ 40 | 41 | def __init__(self): 42 | 43 | self.bot = self._create_bot() 44 | self.webex_api = self._initialize_webex_api() 45 | self._add_commands() 46 | 47 | def _create_bot(self) -> WebexBot: 48 | """ 49 | Create a new Webex bot. 50 | 51 | :return: The created Webex bot. 52 | """ 53 | return WebexBot( 54 | teams_bot_token=WEBEX_TEAMS_ACCESS_TOKEN, 55 | approved_users=[WEBEX_APPROVED_USERS_MAIL], 56 | bot_name="my-buddy", 57 | include_demo_commands=False, 58 | ) 59 | 60 | def _initialize_webex_api(self) -> WebexTeamsAPI: 61 | """ 62 | Get the Webex API object. 63 | 64 | :return: The Webex API object. 65 | """ 66 | return WebexTeamsAPI(access_token=WEBEX_TEAMS_ACCESS_TOKEN) 67 | 68 | def _add_commands(self) -> None: 69 | self.bot.commands.clear() 70 | self.bot.add_command(AiCommand()) 71 | self.bot.help_command = AiCommand() 72 | 73 | def send_notification(self, message: str) -> None: 74 | """ 75 | Send a message to a specified room. 76 | 77 | :param message: The message to send. 78 | """ 79 | room_id = get_webex_room_id(self.webex_api) 80 | self.webex_api.messages.create(roomId=room_id, markdown=message) 81 | 82 | def run(self): 83 | """ 84 | Start the bot process. 85 | """ 86 | self.bot.run() 87 | 88 | 89 | if __name__ == "__main__": 90 | webex_bot_manager = WebexBotManager() 91 | webex_bot_manager.run() 92 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/connection_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides a class for managing pyATS connections. 3 | """ 4 | 5 | import logging 6 | from dataclasses import dataclass 7 | from typing import Optional 8 | 9 | 10 | from pyats.topology import loader, Device 11 | 12 | from llm_agent.pyats_connector.inventory import get_devices_from_inventory 13 | from llm_agent.config.global_settings import TESTBED_FILE 14 | from llm_agent.log_config.logger_setup import logger 15 | 16 | 17 | NUMBER_OF_TRIES_TO_CONNECT = 10 18 | 19 | 20 | @dataclass 21 | class PyATSConnection: 22 | """ 23 | A class to manage pyATS connections. 24 | """ 25 | 26 | device_name: str 27 | testbed_file: str = TESTBED_FILE 28 | device_pyats: Optional[Device] = None 29 | 30 | def __enter__(self): 31 | logger.debug("CREATING INSTANCE") 32 | self._establish_connection() 33 | return self.device_pyats 34 | 35 | def _establish_connection(self) -> Device: 36 | """ 37 | Establish a connection to a device using pyATS. 38 | """ 39 | 40 | logger.debug("LOADING DEVICES") 41 | self._load_devices_from_testbed() 42 | self._connection_handler() 43 | self._set_device_settings() 44 | 45 | def _load_devices_from_testbed(self) -> None: 46 | testbed = loader.load(self.testbed_file) 47 | try: 48 | self.device_pyats = testbed.devices[self.device_name] 49 | except KeyError as exc: 50 | logger.error( 51 | "_load_devices_from_testbed error: device not found in testbed %s, error: %s", 52 | self.device_name, 53 | exc, 54 | ) 55 | devices_available = get_devices_from_inventory() 56 | raise KeyError( 57 | f"Device {self.device_name} not found in testbed. Devices available are: {devices_available}" 58 | ) from exc 59 | 60 | def _set_device_settings(self) -> None: 61 | # So the connection close faster 62 | self.device_pyats.settings.GRACEFUL_DISCONNECT_WAIT_SEC = 0 63 | self.device_pyats.settings.POST_DISCONNECT_WAIT_SEC = 0 64 | 65 | def _connection_handler(self) -> None: 66 | for _ in range(NUMBER_OF_TRIES_TO_CONNECT): 67 | try: 68 | self._connect_to_device() 69 | break 70 | except ConnectionError as e: 71 | logger.error("PyATSConnection connection failed: %s", e) 72 | 73 | def _connect_to_device(self) -> None: 74 | logger.debug("ESTABLISHING CONNECTION to %s", self.device_name) 75 | self.device_pyats.connect( 76 | mit=True, 77 | via="cli", 78 | learn_hostname=True, 79 | connection_timeout=10, 80 | log_stdout=self._get_logging_level(), 81 | ) 82 | 83 | def _get_logging_level(self) -> bool: 84 | return logger.getEffectiveLevel() == logging.DEBUG 85 | 86 | def __exit__(self, exc_type, exc_val, exc_tb): 87 | logger.debug("CLOSING CONNECTION") 88 | if exc_type is not None: 89 | logger.error( 90 | "PyATSConnection while exiting an error occurred: %s", exc_val 91 | ) 92 | self.device_pyats.disconnect() 93 | logger.debug("CONNECTION CLOSED") 94 | 95 | return False 96 | -------------------------------------------------------------------------------- /llm_agent/langchain_tools/lc_interface_state.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper functions to work with langchain tools and openAI 3 | """ 4 | 5 | from langchain.agents import tool 6 | 7 | from llm_agent.utils.text_utils import output_to_json 8 | from llm_agent.pyats_connector.api.interface_state import ( 9 | interfaces_status, 10 | single_interface_status, 11 | interfaces_information, 12 | interface_admin_status, 13 | verify_state_up, 14 | interface_events, 15 | ) 16 | 17 | 18 | @tool 19 | def get_interfaces_status(device_name: str) -> dict: 20 | """ 21 | Get the status of interfaces on a device. 22 | 23 | Args: 24 | device_name (str): Must come from the function get_devices_list_available 25 | 26 | Returns: 27 | dict: A dictionary containing the status of the interfaces on the device. 28 | """ 29 | return output_to_json(interfaces_status(device_name)) 30 | 31 | 32 | @tool 33 | def get_single_interface_status(device_name: str, interface_name: str) -> dict: 34 | """ 35 | Get the status of a single interface on a device. 36 | 37 | Args: 38 | device_name (str): Must come from the function get_devices_list_available 39 | interface_name (str): The name of the interface. 40 | 41 | Returns: 42 | dict: A dictionary containing the status of the interface. 43 | """ 44 | return output_to_json(single_interface_status(device_name, interface_name)) 45 | 46 | 47 | @tool 48 | def get_interface_information( 49 | device_name: str, interfaces_name: list[str] 50 | ) -> list[dict]: 51 | """ 52 | Get interface information from device for a list of interfaces 53 | 54 | Args: 55 | device_name (str): Must come from the function get_devices_list_available 56 | interfaces_name (list[str]): A list of interface names 57 | 58 | Returns: 59 | list[dict]: A list of dictionaries containing interface information 60 | """ 61 | return output_to_json(interfaces_information(device_name, interfaces_name)) 62 | 63 | 64 | @tool 65 | def get_interface_admin_status(device_name: str, interface_name: str) -> str: 66 | """ 67 | Get the administrative status of a single interface on a device. 68 | 69 | Args: 70 | device_name (str): Must come from the function get_devices_list_available 71 | interface_name (str): The name of the interface. 72 | 73 | Returns: 74 | str: The administrative status of the interface. 75 | 76 | """ 77 | return output_to_json(interface_admin_status(device_name, interface_name)) 78 | 79 | 80 | @tool 81 | def verify_interface_state_up(device_name: str, interface_name: str) -> bool: 82 | """ 83 | Verify interface state is up and line protocol is up 84 | 85 | Args: 86 | device_name (str): Must come from the function get_devices_list_available 87 | interface_name (str): The name of the interface 88 | 89 | Returns: 90 | bool: True if the interface state is up and line protocol is up, False otherwise 91 | """ 92 | return output_to_json(verify_state_up(device_name, interface_name)) 93 | 94 | 95 | @tool 96 | def get_interface_events(device_name: str, interface_name: str) -> dict: 97 | """ 98 | Retrieves the events for a specific interface on a device. 99 | 100 | Args: 101 | device_name (str): Must come from the function get_devices_list_available 102 | interface_name (str): The name of the interface. 103 | 104 | Returns: 105 | dict: A dictionary containing the events for the specified interface. 106 | """ 107 | return output_to_json(interface_events(device_name, interface_name)) 108 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tmp.* 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # poetry 100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 101 | # This is especially recommended for binary packages to ensure reproducibility, and is more 102 | # commonly ignored for libraries. 103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 104 | #poetry.lock 105 | 106 | # pdm 107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 108 | #pdm.lock 109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 110 | # in version control. 111 | # https://pdm.fming.dev/#use-with-ide 112 | .pdm.toml 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /llm_agent/pyats_connector/api/interface_state.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to retrieve the status of a device interface using the pyATS framework. 3 | """ 4 | 5 | from llm_agent.pyats_connector.connection_methods import api_connect, parse_connect 6 | 7 | 8 | def interfaces_status(device_name: str) -> dict: 9 | """ 10 | Retrieves the status of interfaces on a given device. 11 | 12 | Args: 13 | device_name (str): The name of the device. 14 | 15 | Returns: 16 | dict: A dictionary containing the status of interfaces on the device. 17 | """ 18 | return api_connect( 19 | device_name=device_name, 20 | method="get_interfaces_status", 21 | ) 22 | 23 | 24 | def single_interface_status(device_name: str, interface_name: str) -> dict: 25 | """ 26 | Retrieves the status of a single interface on a device. 27 | 28 | Args: 29 | device_name (str): The name of the device. 30 | interface_name (str): The name of the interface. 31 | 32 | Returns: 33 | dict: A dictionary containing the status of the interface. If the interface is not found, 34 | the value "ERROR_INTERFACE_NOT_FOUND" is returned. 35 | """ 36 | result = parse_connect( 37 | device_name=device_name, 38 | string_to_parse=f"show interfaces {interface_name}", 39 | ) 40 | return result.get(interface_name, "ERROR_INTERFACE_NOT_FOUND") 41 | 42 | 43 | def interfaces_information( 44 | device_name: str, interfaces_name: list[str] 45 | ) -> str: 46 | """ 47 | Retrieves interface information for the specified device and interfaces. 48 | TODO: Need to reduce the amount of information returned 49 | 50 | Args: 51 | device_name (str): The name of the device. 52 | interfaces_name (list[str]): A list of interface names. 53 | 54 | Returns: 55 | str: The interface information. 56 | 57 | """ 58 | return api_connect( 59 | device_name=device_name, 60 | method="get_interface_information", 61 | args=interfaces_name, 62 | ) 63 | 64 | 65 | def interface_admin_status(device_name: str, interface_name: str) -> str: 66 | """ 67 | Retrieves the administrative status of a network interface on a device. 68 | 69 | Args: 70 | device_name (str): The name or IP address of the device. 71 | interface_name (str): The name of the network interface. 72 | 73 | Returns: 74 | str: A message indicating the administrative status of the network interface. 75 | 76 | """ 77 | result = api_connect( 78 | device_name=device_name, 79 | method="get_interface_admin_status", 80 | args=interface_name, 81 | ) 82 | return f"Interface {interface_name} on device {device_name} is set to: {result} (Admin Status)" 83 | 84 | 85 | def verify_state_up(device_name: str, interface_name: str) -> bool: 86 | """ 87 | Verify if the interface state is up on a given device. 88 | 89 | Args: 90 | device_name (str): The name of the device to connect to. 91 | interface_name (str): The name of the interface to verify. 92 | 93 | Returns: 94 | str: A message indicating whether the interface state is up or not. 95 | """ 96 | result = api_connect( 97 | device_name=device_name, 98 | method="verify_interface_state_up", 99 | args=interface_name, 100 | ) 101 | state = "UP" if result else "NOT UP" 102 | return f"The interface {interface_name} on device {device_name} is {state}" 103 | 104 | 105 | def interface_events(device_name: str, interface_name: str) -> dict: 106 | """ 107 | Retrieve interface events for a specific device and interface. 108 | 109 | Args: 110 | device_name (str): The name of the device. 111 | interface_name (str): The name of the interface. 112 | 113 | Returns: 114 | dict: A dictionary containing the parsed interface events. 115 | 116 | """ 117 | return parse_connect( 118 | device_name=device_name, 119 | string_to_parse=f"show logging | i {interface_name}", 120 | ) 121 | -------------------------------------------------------------------------------- /llm_agent/llm/agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | The LLMChatAgent class is responsible for handling the chat interactions with the LLM. 3 | """ 4 | 5 | import logging 6 | from pydantic import ValidationError 7 | from langchain_openai import ChatOpenAI 8 | from langchain.agents import AgentExecutor 9 | from langchain.agents.format_scratchpad import ( 10 | format_to_openai_function_messages, 11 | ) 12 | from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser 13 | from langchain.memory import ConversationBufferMemory 14 | from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder 15 | from langchain_core.utils.function_calling import convert_to_openai_function 16 | 17 | from unicon.core.errors import ConnectionError 18 | 19 | from llm_agent.llm.prompts import SYSTEM_PROMPT 20 | from llm_agent.log_config.logger_setup import logger 21 | from llm_agent.langchain_tools.lc_tools_list import tools 22 | from llm_agent.utils.text_utils import remove_white_spaces, output_to_json 23 | from llm_agent.fastAPI.models import GrafanaWebhookMessage 24 | 25 | httpx_logger = logging.getLogger("httpx") 26 | httpx_logger.setLevel(logging.ERROR) 27 | 28 | NOTIFICATION_PROMPT = """ 29 | This is a network alert, not a user message. 30 | """ 31 | 32 | MEMORY_KEY = "chat_history" 33 | 34 | LLM_MODEL = "chatgpt-4o-latest" 35 | 36 | 37 | class LLMChatAgent: 38 | def __init__(self) -> None: 39 | self._create_agent() 40 | 41 | def _create_agent(self) -> None: 42 | prompt = ChatPromptTemplate.from_messages( 43 | [ 44 | ( 45 | "system", 46 | remove_white_spaces(string=SYSTEM_PROMPT), 47 | ), 48 | MessagesPlaceholder(variable_name=MEMORY_KEY), 49 | ("user", "{input}"), 50 | MessagesPlaceholder(variable_name="agent_scratchpad"), 51 | ] 52 | ) 53 | 54 | llm = ChatOpenAI(model=LLM_MODEL, temperature=0) 55 | llm_with_tools = llm.bind( 56 | functions=[convert_to_openai_function(t) for t in tools] 57 | ) 58 | 59 | agent = ( 60 | { 61 | "input": lambda x: x["input"], 62 | "agent_scratchpad": lambda x: format_to_openai_function_messages( 63 | x["intermediate_steps"] 64 | ), 65 | "chat_history": lambda x: x["chat_history"], 66 | } 67 | | prompt 68 | | llm_with_tools 69 | | OpenAIFunctionsAgentOutputParser() 70 | ) 71 | 72 | memory = ConversationBufferMemory( 73 | memory_key="chat_history", return_messages=True 74 | ) 75 | self.agent_executor = AgentExecutor( 76 | agent=agent, tools=tools, verbose=True, memory=memory 77 | ) 78 | 79 | def _agent_executor(self, message: str) -> str: 80 | return self.agent_executor.invoke({"input": message})["output"] 81 | 82 | def chat(self, message: str, attempts: int = 0) -> str: 83 | """ 84 | TODO: There a potential loop here. If agent is not able to connect to the device, 85 | the agent will try to connect again to the device. This can go on forever. 86 | The agent stoppped at 3 attempts to connect to the device. 87 | """ 88 | logger.debug("CHAT_SENT_TO_LLM: %s", message) 89 | try: 90 | return self._agent_executor(message) 91 | except (ValidationError, ConnectionError, KeyError) as e: 92 | if attempts < 2: 93 | if isinstance(e, ValidationError): 94 | msg = f"ERROR: You missed a parameter invoking the function, See for the information missing: {e}" 95 | elif isinstance(e, ConnectionError): 96 | msg = f"ERROR: Unable to connect. {e}" 97 | else: # KeyError 98 | msg = f"ERROR: You provided an empty value or a device that doesn't exists. {e}" 99 | logger.error(msg) 100 | return self.chat(msg, attempts + 1) 101 | else: 102 | logger.error("Uncatched error: %s", e) 103 | return f"ERROR: {e}" 104 | 105 | def notification(self, message: GrafanaWebhookMessage) -> str: 106 | """ 107 | Sends a notification to the LLM agent. 108 | 109 | Args: 110 | message (GrafanaWebhookMessage): The message containing the notification details. 111 | 112 | Returns: 113 | str: The response from the LLM agent. 114 | 115 | """ 116 | notification = { 117 | "system_instructions": remove_white_spaces( 118 | string=NOTIFICATION_PROMPT 119 | ), 120 | "network_alert": message.model_dump(), 121 | } 122 | 123 | return self.chat(output_to_json(notification), attempts=0) 124 | 125 | 126 | if __name__ == "__main__": 127 | agent = LLMChatAgent() 128 | chat = agent.chat("can you check the interfaces on the cat8000v-0 device?") 129 | print(chat) 130 | print("#" * 80, "\n") 131 | chat = agent.chat("can you check if the isis is configured?") 132 | print(chat) 133 | print("#" * 80, "\n") 134 | chat = agent.chat("what vrfs I have there?") 135 | print(chat) 136 | print("#" * 80, "\n") 137 | chat = agent.chat( 138 | "please provide a summary of all activities I asked you to check in our conversation" 139 | ) 140 | print(chat) 141 | print("#" * 80, "\n") 142 | -------------------------------------------------------------------------------- /llm_agent/webex_chat/ai_command.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module represents a command to interact with an AI based on OpenAI's. 3 | """ 4 | 5 | from webex_bot.models.command import Command 6 | from webex_bot.formatting import quote_info 7 | from webex_bot.models.response import response_from_adaptive_card 8 | from webexteamssdk.models.cards.actions import OpenUrl 9 | from webexteamssdk.models.cards import ( 10 | Colors, 11 | TextBlock, 12 | FontWeight, 13 | FontSize, 14 | Column, 15 | AdaptiveCard, 16 | ColumnSet, 17 | Image, 18 | ImageSize, 19 | Fact, 20 | FactSet, 21 | ) 22 | 23 | from llm_agent.log_config.logger_setup import logger 24 | from llm_agent.webex_chat.chat_api_client import send_message_to_chat_api 25 | 26 | 27 | OPENAI_ICON = "https://github.com/fbradyirl/fbradyirl.github.io/raw/master/static/img/OpenAI_logo-100x70-rounded.png" 28 | CARD_CALLBACK_MORE_INFO = "help" 29 | 30 | 31 | class AiCommand(Command): 32 | """ 33 | Represents a command to interact with an AI based on OpenAI's. 34 | 35 | This command allows users to interact with an AI by sending a message prompt. 36 | The AI will generate a response using OpenAI's models, and the response 37 | will be sent back to the user. 38 | 39 | Attributes: 40 | command_keyword (str): The keyword used to trigger this command. 41 | help_message (str): A brief description of the command. 42 | chained_commands (list): A list of additional commands to be executed after this command. 43 | 44 | Methods: 45 | execute(message, attachment_actions, activity): 46 | Executes the command by sending the message prompt to the AI and returning the response. 47 | """ 48 | 49 | def __init__(self): 50 | super().__init__( 51 | command_keyword="my-buddy", 52 | help_message="Interact with an AI based on OpenAI's", 53 | chained_commands=[AiMoreInfoCallback()], 54 | ) 55 | 56 | def execute(self, message, attachment_actions, activity): 57 | """ 58 | Executes the command by sending the message prompt to the AI and returning the response. 59 | 60 | Args: 61 | message (str): The message prompt to be sent to the AI. 62 | attachment_actions: Actions related to attachments (not specified in the code). 63 | activity: The activity related to the command execution (not specified in the code). 64 | 65 | Returns: 66 | list: A list containing the response generated by the AI. 67 | """ 68 | logger.debug("Message recevied from user: %s", message) 69 | response = send_message_to_chat_api(message=message) 70 | logger.debug("LLM response: %s", response) 71 | return [quote_info(response)] 72 | 73 | 74 | class AiMoreInfoCallback(Command): 75 | """ 76 | Represents a command for providing more information about the AI bot. 77 | 78 | This command displays an adaptive card with information about the bot, 79 | including its version, purpose, and a link to the OpenAI website. 80 | 81 | Args: 82 | Command (class): The base class for all commands. 83 | 84 | Attributes: 85 | card_callback_keyword (str): The keyword for identifying this command. 86 | delete_previous_message (bool): Whether to delete the previous message before executing the command. 87 | """ 88 | 89 | def __init__(self): 90 | super().__init__( 91 | card_callback_keyword=CARD_CALLBACK_MORE_INFO, 92 | delete_previous_message=False, 93 | ) 94 | 95 | def execute(self, message, attachment_actions, activity): 96 | bot_version_info = "Ask me something 🤙" 97 | 98 | bot_facts = [] 99 | 100 | heading = TextBlock( 101 | "LLM my-buddy", 102 | weight=FontWeight.BOLDER, 103 | wrap=True, 104 | size=FontSize.LARGE, 105 | ) 106 | subtitle = TextBlock( 107 | bot_version_info, 108 | wrap=True, 109 | size=FontSize.SMALL, 110 | color=Colors.LIGHT, 111 | ) 112 | 113 | image = Image(url=OPENAI_ICON, size=ImageSize.AUTO) 114 | 115 | header_column = Column(items=[heading, subtitle], width=2) 116 | header_image_column = Column( 117 | items=[image], 118 | width=1, 119 | ) 120 | 121 | max_tokens_info_textblock = TextBlock( 122 | "I'm an AI that can connect to network devices and run commands.", 123 | wrap=True, 124 | size=FontSize.SMALL, 125 | color=Colors.LIGHT, 126 | ) 127 | 128 | temp_info_textblock = TextBlock( 129 | "**I'm here to help**", 130 | wrap=True, 131 | size=FontSize.SMALL, 132 | color=Colors.LIGHT, 133 | ) 134 | 135 | card = AdaptiveCard( 136 | body=[ 137 | ColumnSet(columns=[header_column, header_image_column]), 138 | FactSet(facts=bot_facts), 139 | ColumnSet( 140 | columns=[ 141 | Column( 142 | items=[ 143 | temp_info_textblock, 144 | max_tokens_info_textblock, 145 | ], 146 | width=2, 147 | ) 148 | ] 149 | ), 150 | ], 151 | actions=[ 152 | OpenUrl(url="https://platform.openai.com", title="openai.com") 153 | ], 154 | ) 155 | 156 | return response_from_adaptive_card(card) 157 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI-Network-Troubleshooting-PoC 2 | 3 | ![Docker Version](https://img.shields.io/badge/Docker-26.0+-blue) 4 | 5 | > [!IMPORTANT] 6 | > See [oncall-netops-tig-pyats-demo](https://github.com/jillesca/oncall-netops-tig-pyats-demo) which is a newer and better version of this demo. No further updates will be done to this repository. 7 | 8 | This demo is built to showcase how you AI might assist you in troubleshooting network issues. This demo was presented at Cisco Developer Days 2024 and API Days Paris 2024. Check out the [recording from Developer Days](https://youtu.be/0Uef42agBso?si=dpgQxlmdCcFZ5-Lz) to see how this solution works. 9 | 10 | The components used by this demo are: 11 | 12 | ![components used](img/components.png) 13 | 14 | - Virtual IOS-XE devices running ISIS. 15 | - The [CML Devnet sandbox](https://developer.cisco.com/site/sandbox/) was used to build the lab. Grab a reservation. 16 | - - `developer` / `C1sco12345` 17 | - [ncpeek.](https://github.com/jillesca/ncpeek) A `ncclient` wrapper I wrote as a netconf client for telegraf. 18 | - TIG stack with docker `20.10+` 🐳 19 | - Telegraf uses `ncpeek` to pull telemetry data from network devices. 20 | - Grafana kicks a webhook when an alarm is detected. 🚨 21 | - FastAPI. 22 | - Host the LLM. 23 | - Interacts with PyATS & Webex. 24 | - PyATS. Provides a framework to interact with network devices. 🛠️ 25 | - [Webex_bot](https://github.com/fbradyirl/webex_bot) use to interact with the LLM. 🤖 26 | - OpenAI LLM. 🧠 27 | - `chatgpt-4o-latest` is used. 🚀 28 | 29 | > [!IMPORTANT] 30 | > Run the containers locally on your system instead of using the sandbox VM. Use the sandbox CML to run the CML topology with the XE devices. 31 | 32 | ## Flow 33 | 34 | When an alert is triggered in Grafana, a webhook is sent, prompting the LLM to initiate an analysis of the alert and establish connections with network devices to identify the root cause of the issue following a plan the LLM creates. 35 | 36 | Once the initial analysis is complete, the LLM presents a concise summary of its findings to the users, along with actionable items. 37 | 38 | ![llm flow](img/llm_flow.png) 39 | 40 | ## 🎬 Demo 41 | 42 | For this demo [one alarm was created.](grafana/config/alerts.yaml) 43 | 44 | ```bash 45 | if avgNeighbors(30sec) < avgNeighbors(30min) : send Alarm​ 46 | ``` 47 | 48 | > When the average number of ISIS neighbors in a lapse of 30 second _**is less than**_ the average number of ISIS neighbors in a lapse of 30 minutes, the alarm will trigger a webhook for the LLM. 49 | 50 | This signal that a stable ISIS neighbor that was working on the last 30 minutes was lost, and allows to work with `N` number of ISIS neighbors. 51 | 52 | ## 🛠️ Prepare Demo 53 | 54 | ### Requirements 55 | 56 | - This demo utilizes Compose V2 for container orchestration, which means you should use `docker compose` instead of `docker-compose`. If you haven't already, please [update your Docker client.](https://docs.docker.com/compose/releases/migrate/#how-do-i-switch-to-compose-v2) 57 | 58 | - Remove the default CML topology from the CML sandbox () and [upload the topology](cml/topology.yaml) used for this demo. 59 | 60 | ### 🔑 Environment variables 61 | 62 | Environment variables are injected through the use of [the Makefile on root of the project.](Makefile) 63 | 64 | #### 📌 Mandatory variables 65 | 66 | > [!IMPORTANT] 67 | > For the demo to work, you **must** set the next environment variables. 68 | 69 | **Create** a `.env` file in the root directory of the project and include the following environment variables. This `.env` file is utilized by the [Makefile.](Makefile#L2) 70 | 71 | ```bash 72 | OPENAI_API_KEY= 73 | WEBEX_TEAMS_ACCESS_TOKEN= 74 | WEBEX_APPROVED_USERS_MAIL= 75 | WEBEX_USERNAME= 76 | WEBEX_ROOM_ID= 77 | ``` 78 | 79 | > [!NOTE] 80 | > The webex variables are only needed if you interact with the LLM using webex. However you need to modify the python accordingly. 81 | 82 | If you prefer to use another client, you need to: 83 | 84 | - Modify the [notify function](llm_agent/app.py#L59) to accomodate your client. 85 | - Remove/comment [the start of the webex bot](llm_agent/app.py#L73) 86 | - Communicate with the LLM using REST API. See [send_message_to_chat_api](llm_agent/webex_chat/chat_api_client.py#L13) for an example. 📡 87 | 88 | ##### 📝 Webex considerations 89 | 90 | To get your webex token go to and create a bot. 91 | 92 | To get the `WEBEX_ROOM_ID` the easiest way is to open a room with your bot in the webex app. Once you have your room, you can get the `WEBEX_ROOM_ID` by using [API list room,](https://developer.webex.com/docs/api/v1/rooms/list-rooms) use your token created before. 93 | 94 | #### 📌 Optional Variables 95 | 96 | For testing, you can use the `GRAFANA_WEB_HOOK` env var to send webhooks to other site, such as 97 | 98 | If you have access to [smith.langchain.com](https://smith.langchain.com/) (recommended for view LLM operations) add your project ID and API key. 99 | 100 | ```bash 101 | GRAFANA_WEB_HOOK= 102 | LANGCHAIN_PROJECT= 103 | LANGCHAIN_API_KEY= 104 | LANGCHAIN_TRACING_V2=true 105 | LANGCHAIN_ENDPOINT=https://api.smith.langchain.com 106 | ``` 107 | 108 | #### Why `.env.local` and `.env` must be present? 109 | 110 | For this demo to function correctly, additional environment variables are required by the containers. 111 | 112 | The [.env.local file](.env.local) serves as a template with some predefined environment variables. 113 | 114 | The `.env` file, which is ignored by git, is used to store sensitive API keys preventing accidental commits of sensitive data. 115 | 116 | In a production environment, ensure that your environment variables are not included in your git repository to maintain security. 117 | 118 | ### 🚀 Start the topology 119 | 120 | This demo uses a CML instance from the [Cisco DevNet sandbox](https://developer.cisco.com/site/sandbox/). You can also use a dedicated CML instance or a NSO sandbox. 🏖️ 121 | 122 | After acquiring your sandbox, stop the default topology and wipe it out. 🧹 123 | 124 | Then, import the [topology file](cml/topology.yaml) used for this demo and start the lab. 125 | 126 | ### 📦 TIG Stack 127 | 128 | The TIG stack requires Docker and IP reachability to the CML instance. For this demo, I used the sandbox VM `10.10.20.50`. 129 | 130 | First time, build the TIG stack. 131 | 132 | ```bash 133 | make build-tig 134 | ``` 135 | 136 | Subsequent runs of the TIG stack you can run the containers. 137 | 138 | ```bash 139 | make run-tig 140 | ``` 141 | 142 | ### 🚦 Verifying Telemetry on Telegraf, Influxdb, Grafana 143 | 144 | **Telegraf** 145 | 146 | - Logs: On 10.10.20.50 use `docker exec -it telegraf bash` then `tail -F /tmp/telegraf-grpc.log`. 147 | - [See config defined here](telegraf/dockerfile#L30) 148 | 149 | **Influxdb** 150 | 151 | - with the credentials `admin`/`admin123` 152 | 153 | **Grafana** 154 | 155 | - with the credentials `admin`/`admin` 156 | - Navigate to `General > Network Telemetry` to see the grafana dashboard. 157 | 158 | ### 🏁 Starting the LLM 159 | 160 | The [llm_agent directory](llm_agent/) provides the entry point for the application, the [app file](llm_agent/app.py) 161 | 162 | The llm container runs on the sandbox VM `10.10.20.50`. 163 | 164 | ```bash 165 | make run-llm 166 | ``` 167 | 168 | ## 🎮 Running the Demo 169 | 170 | ![network topology](/img/cml.png) 171 | 172 | The demo involves shutting down one interface, causing an `ISIS` failure, and allowing the LLM to diagnose the issue and implement a fix. 173 | 174 | In the images below, `GigabitEthernet5` was shutting down on `cat8000-v0` resulting in losing its ISIS adjacency with `cat8000-v2` 175 | 176 | You can watch the [recorded demo here](https://app.vidcast.io/share/1bb750ef-76ef-4fa5-9f2f-442a82151463) 177 | 178 | > [!NOTE] 179 | > The recoding was done as a backup demo. It doesn't have audio or instructions. 180 | 181 | On Grafana, you can observe the ISIS count decreasing and triggering an alarm. 182 | 183 | ![grafana alarm](img/grafana1.png) 184 | ![grafana alarm 2](img/grafana2.png) 185 | 186 | Next, you will receive a webex notification from grafana and the LLM will receive the webhook. The webhook triggers the LLM to start looking at what the issue is and how to resolve it. 187 | 188 | ![llm thinking 1](img/webex_bot1.png) 189 | ![llm thinking 2](img/webex_bot2.png) 190 | ![llm thinking 3](img/webex_bot3.png) 191 | ![llm thinking 4](img/webex_bot4.png) 192 | 193 | ## 📝 Notes 194 | 195 | - Tokens can run out easily with netconf, highly important to filter what is sent to the AI. 196 | - Repeated alarms are suppresed by Grafana, this is controlled by [the grafana policy file,](grafana/config/policies.yaml) 197 | - If you are testing continously, run `make run-tig` to destroy and create the TIG containers. 198 | - This isn't an ideal scenario, but a proper solution wasn't found within the given time. 199 | - From time to time, the answers from the LLM are lost and not sent to webex. You can find them on the terminal output. 200 | - This is the third iteration of this exercise. The first one was [Cisco Live Amsterdam 2024](https://github.com/jillesca/CLEUR-DEVNET-3707) 201 | - The main differences are the use of makefile, docker compose and the refactoring of the llm agent code for a better separation of concerns. 202 | -------------------------------------------------------------------------------- /grafana/config/alerts.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | groups: 3 | - orgId: 1 4 | name: isis_neighbor_down 5 | folder: isis_neighbor_down 6 | interval: 10s 7 | rules: 8 | - uid: cd2069c2-6524-4164-b51a-e3fcc8f74aa9 9 | title: cat8000v-0 isis neighbors 10 | condition: E 11 | data: 12 | - refId: A 13 | relativeTimeRange: 14 | from: 1800 15 | to: 0 16 | datasourceUid: P5697886F9CA74929 17 | model: 18 | intervalMs: 1000 19 | maxDataPoints: 43200 20 | query: |- 21 | from(bucket: "mdt_grpc") 22 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 23 | |> filter(fn: (r) => r["_field"] == "isis_neighbors_count") 24 | |> filter(fn: (r) => r["device"] == "cat8000v-0") 25 | |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) 26 | |> yield(name: "mean") 27 | refId: A 28 | - refId: B 29 | relativeTimeRange: 30 | from: 30 31 | to: 0 32 | datasourceUid: P5697886F9CA74929 33 | model: 34 | datasource: 35 | type: influxdb 36 | uid: P5697886F9CA74929 37 | hide: false 38 | intervalMs: 1000 39 | maxDataPoints: 43200 40 | query: |- 41 | from(bucket: "mdt_grpc") 42 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 43 | |> filter(fn: (r) => r["_field"] == "isis_neighbors_count") 44 | |> filter(fn: (r) => r["device"] == "cat8000v-0") 45 | |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) 46 | |> yield(name: "mean") 47 | refId: B 48 | - refId: C 49 | relativeTimeRange: 50 | from: 1800 51 | to: 0 52 | datasourceUid: __expr__ 53 | model: 54 | conditions: 55 | - evaluator: 56 | params: 57 | - 0 58 | - 0 59 | type: gt 60 | operator: 61 | type: and 62 | query: 63 | params: [] 64 | reducer: 65 | params: [] 66 | type: avg 67 | type: query 68 | datasource: 69 | name: Expression 70 | type: __expr__ 71 | uid: __expr__ 72 | expression: A 73 | hide: false 74 | intervalMs: 1000 75 | maxDataPoints: 43200 76 | reducer: mean 77 | refId: C 78 | type: reduce 79 | - refId: D 80 | relativeTimeRange: 81 | from: 30 82 | to: 0 83 | datasourceUid: __expr__ 84 | model: 85 | conditions: 86 | - evaluator: 87 | params: 88 | - 0 89 | - 0 90 | type: gt 91 | operator: 92 | type: and 93 | query: 94 | params: [] 95 | reducer: 96 | params: [] 97 | type: avg 98 | type: query 99 | datasource: 100 | name: Expression 101 | type: __expr__ 102 | uid: __expr__ 103 | expression: B 104 | hide: false 105 | intervalMs: 1000 106 | maxDataPoints: 43200 107 | reducer: last 108 | refId: D 109 | settings: 110 | mode: replaceNN 111 | replaceWithValue: 0 112 | type: reduce 113 | - refId: E 114 | relativeTimeRange: 115 | from: 600 116 | to: 0 117 | datasourceUid: __expr__ 118 | model: 119 | conditions: 120 | - evaluator: 121 | params: 122 | - 0 123 | - 0 124 | type: gt 125 | operator: 126 | type: and 127 | query: 128 | params: [] 129 | reducer: 130 | params: [] 131 | type: avg 132 | type: query 133 | datasource: 134 | name: Expression 135 | type: __expr__ 136 | uid: __expr__ 137 | expression: $C > $D 138 | hide: false 139 | intervalMs: 1000 140 | maxDataPoints: 43200 141 | refId: E 142 | type: math 143 | dashboardUid: d44174ca-56c5-41dc-9451-2c858cc0566b 144 | panelId: 12 145 | noDataState: OK 146 | execErrState: Error 147 | for: 10s 148 | annotations: 149 | __dashboardUid__: d44174ca-56c5-41dc-9451-2c858cc0566b 150 | __panelId__: "12" 151 | description: One ISIS neighbor on cat8000v-0 has been detected down 152 | summary: cat8000v-0 ISIS Neighbor down 153 | labels: 154 | device: cat8000v-0 155 | isPaused: false 156 | - uid: b2663051-b248-46c4-9c61-d4b67ee0ef12 157 | title: cat8000v-1 isis neighbors 158 | condition: E 159 | data: 160 | - refId: A 161 | relativeTimeRange: 162 | from: 1800 163 | to: 0 164 | datasourceUid: P5697886F9CA74929 165 | model: 166 | intervalMs: 1000 167 | maxDataPoints: 43200 168 | query: |- 169 | from(bucket: "mdt_grpc") 170 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 171 | |> filter(fn: (r) => r["_field"] == "isis_neighbors_count") 172 | |> filter(fn: (r) => r["device"] == "cat8000v-1") 173 | |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) 174 | |> yield(name: "mean") 175 | refId: A 176 | - refId: B 177 | relativeTimeRange: 178 | from: 30 179 | to: 0 180 | datasourceUid: P5697886F9CA74929 181 | model: 182 | datasource: 183 | type: influxdb 184 | uid: P5697886F9CA74929 185 | intervalMs: 1000 186 | maxDataPoints: 43200 187 | query: |- 188 | from(bucket: "mdt_grpc") 189 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 190 | |> filter(fn: (r) => r["_field"] == "isis_neighbors_count") 191 | |> filter(fn: (r) => r["device"] == "cat8000v-1") 192 | |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) 193 | |> yield(name: "mean") 194 | refId: B 195 | - refId: C 196 | relativeTimeRange: 197 | from: 1800 198 | to: 0 199 | datasourceUid: __expr__ 200 | model: 201 | conditions: 202 | - evaluator: 203 | params: 204 | - 0 205 | - 0 206 | type: gt 207 | operator: 208 | type: and 209 | query: 210 | params: [] 211 | reducer: 212 | params: [] 213 | type: avg 214 | type: query 215 | datasource: 216 | name: Expression 217 | type: __expr__ 218 | uid: __expr__ 219 | expression: A 220 | intervalMs: 1000 221 | maxDataPoints: 43200 222 | reducer: mean 223 | refId: C 224 | type: reduce 225 | - refId: D 226 | relativeTimeRange: 227 | from: 30 228 | to: 0 229 | datasourceUid: __expr__ 230 | model: 231 | conditions: 232 | - evaluator: 233 | params: 234 | - 0 235 | - 0 236 | type: gt 237 | operator: 238 | type: and 239 | query: 240 | params: [] 241 | reducer: 242 | params: [] 243 | type: avg 244 | type: query 245 | datasource: 246 | name: Expression 247 | type: __expr__ 248 | uid: __expr__ 249 | expression: B 250 | intervalMs: 1000 251 | maxDataPoints: 43200 252 | reducer: last 253 | refId: D 254 | settings: 255 | mode: replaceNN 256 | replaceWithValue: 0 257 | type: reduce 258 | - refId: E 259 | relativeTimeRange: 260 | from: 600 261 | to: 0 262 | datasourceUid: __expr__ 263 | model: 264 | conditions: 265 | - evaluator: 266 | params: 267 | - 0 268 | - 0 269 | type: gt 270 | operator: 271 | type: and 272 | query: 273 | params: [] 274 | reducer: 275 | params: [] 276 | type: avg 277 | type: query 278 | datasource: 279 | name: Expression 280 | type: __expr__ 281 | uid: __expr__ 282 | expression: $C > $D 283 | intervalMs: 1000 284 | maxDataPoints: 43200 285 | refId: E 286 | type: math 287 | dashboardUid: d44174ca-56c5-41dc-9451-2c858cc0566b 288 | panelId: 12 289 | noDataState: OK 290 | execErrState: Error 291 | for: 10s 292 | annotations: 293 | __dashboardUid__: d44174ca-56c5-41dc-9451-2c858cc0566b 294 | __panelId__: "12" 295 | description: One ISIS neighbor on cat8000v-1 has been detected down 296 | summary: cat8000v-1 ISIS Neighbor down 297 | labels: 298 | device: cat8000v-1 299 | isPaused: false 300 | - uid: af154e35-a02d-4735-a807-f3c052d999dc 301 | title: cat8000v-2 isis neighbors 302 | condition: E 303 | data: 304 | - refId: A 305 | relativeTimeRange: 306 | from: 1800 307 | to: 0 308 | datasourceUid: P5697886F9CA74929 309 | model: 310 | intervalMs: 1000 311 | maxDataPoints: 43200 312 | query: |- 313 | from(bucket: "mdt_grpc") 314 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 315 | |> filter(fn: (r) => r["_field"] == "isis_neighbors_count") 316 | |> filter(fn: (r) => r["device"] == "cat8000v-2") 317 | |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) 318 | |> yield(name: "mean") 319 | refId: A 320 | - refId: B 321 | relativeTimeRange: 322 | from: 30 323 | to: 0 324 | datasourceUid: P5697886F9CA74929 325 | model: 326 | datasource: 327 | type: influxdb 328 | uid: P5697886F9CA74929 329 | intervalMs: 1000 330 | maxDataPoints: 43200 331 | query: |- 332 | from(bucket: "mdt_grpc") 333 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 334 | |> filter(fn: (r) => r["_field"] == "isis_neighbors_count") 335 | |> filter(fn: (r) => r["device"] == "cat8000v-2") 336 | |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) 337 | |> yield(name: "mean") 338 | refId: B 339 | - refId: C 340 | relativeTimeRange: 341 | from: 1800 342 | to: 0 343 | datasourceUid: __expr__ 344 | model: 345 | conditions: 346 | - evaluator: 347 | params: 348 | - 0 349 | - 0 350 | type: gt 351 | operator: 352 | type: and 353 | query: 354 | params: [] 355 | reducer: 356 | params: [] 357 | type: avg 358 | type: query 359 | datasource: 360 | name: Expression 361 | type: __expr__ 362 | uid: __expr__ 363 | expression: A 364 | intervalMs: 1000 365 | maxDataPoints: 43200 366 | reducer: mean 367 | refId: C 368 | type: reduce 369 | - refId: D 370 | relativeTimeRange: 371 | from: 30 372 | to: 0 373 | datasourceUid: __expr__ 374 | model: 375 | conditions: 376 | - evaluator: 377 | params: 378 | - 0 379 | - 0 380 | type: gt 381 | operator: 382 | type: and 383 | query: 384 | params: [] 385 | reducer: 386 | params: [] 387 | type: avg 388 | type: query 389 | datasource: 390 | name: Expression 391 | type: __expr__ 392 | uid: __expr__ 393 | expression: B 394 | intervalMs: 1000 395 | maxDataPoints: 43200 396 | reducer: last 397 | refId: D 398 | settings: 399 | mode: replaceNN 400 | replaceWithValue: 0 401 | type: reduce 402 | - refId: E 403 | relativeTimeRange: 404 | from: 600 405 | to: 0 406 | datasourceUid: __expr__ 407 | model: 408 | conditions: 409 | - evaluator: 410 | params: 411 | - 0 412 | - 0 413 | type: gt 414 | operator: 415 | type: and 416 | query: 417 | params: [] 418 | reducer: 419 | params: [] 420 | type: avg 421 | type: query 422 | datasource: 423 | name: Expression 424 | type: __expr__ 425 | uid: __expr__ 426 | expression: $C > $D 427 | intervalMs: 1000 428 | maxDataPoints: 43200 429 | refId: E 430 | type: math 431 | dashboardUid: d44174ca-56c5-41dc-9451-2c858cc0566b 432 | panelId: 12 433 | noDataState: OK 434 | execErrState: Error 435 | for: 10s 436 | annotations: 437 | __dashboardUid__: d44174ca-56c5-41dc-9451-2c858cc0566b 438 | __panelId__: "12" 439 | description: One ISIS neighbor on cat8000v-2 has been detected down 440 | summary: cat8000v-2 ISIS Neighbor down 441 | labels: 442 | device: cat8000v-2 443 | isPaused: false 444 | -------------------------------------------------------------------------------- /grafana/config/dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "grafana", 8 | "uid": "-- Grafana --" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "type": "dashboard" 15 | } 16 | ] 17 | }, 18 | "editable": true, 19 | "fiscalYearStartMonth": 0, 20 | "graphTooltip": 0, 21 | "links": [], 22 | "liveNow": false, 23 | "panels": [ 24 | { 25 | "datasource": { 26 | "type": "influxdb", 27 | "uid": "P5697886F9CA74929" 28 | }, 29 | "fieldConfig": { 30 | "defaults": { 31 | "color": { 32 | "mode": "palette-classic" 33 | }, 34 | "custom": { 35 | "axisBorderShow": false, 36 | "axisCenteredZero": false, 37 | "axisColorMode": "text", 38 | "axisLabel": "", 39 | "axisPlacement": "auto", 40 | "barAlignment": 0, 41 | "drawStyle": "line", 42 | "fillOpacity": 0, 43 | "gradientMode": "none", 44 | "hideFrom": { 45 | "legend": false, 46 | "tooltip": false, 47 | "viz": false 48 | }, 49 | "insertNulls": false, 50 | "lineInterpolation": "linear", 51 | "lineWidth": 1, 52 | "pointSize": 5, 53 | "scaleDistribution": { 54 | "type": "linear" 55 | }, 56 | "showPoints": "auto", 57 | "spanNulls": false, 58 | "stacking": { 59 | "group": "A", 60 | "mode": "none" 61 | }, 62 | "thresholdsStyle": { 63 | "mode": "off" 64 | } 65 | }, 66 | "mappings": [], 67 | "thresholds": { 68 | "mode": "absolute", 69 | "steps": [ 70 | { 71 | "color": "green", 72 | "value": null 73 | }, 74 | { 75 | "color": "red", 76 | "value": 80 77 | } 78 | ] 79 | }, 80 | "unitScale": true 81 | }, 82 | "overrides": [] 83 | }, 84 | "gridPos": { 85 | "h": 8, 86 | "w": 12, 87 | "x": 0, 88 | "y": 0 89 | }, 90 | "id": 12, 91 | "options": { 92 | "legend": { 93 | "calcs": [], 94 | "displayMode": "list", 95 | "placement": "bottom", 96 | "showLegend": true 97 | }, 98 | "tooltip": { 99 | "mode": "single", 100 | "sort": "none" 101 | } 102 | }, 103 | "targets": [ 104 | { 105 | "datasource": { 106 | "type": "influxdb", 107 | "uid": "P5697886F9CA74929" 108 | }, 109 | "query": "from(bucket: \"mdt_grpc\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"http://cisco.com/ns/yang/Cisco-IOS-XE-isis-oper:/isis-oper-data/isis-instance\")\n |> filter(fn: (r) => r[\"_field\"] == \"isis_neighbors_count\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")", 110 | "refId": "A" 111 | } 112 | ], 113 | "title": "ISIS neighbors count", 114 | "type": "timeseries" 115 | }, 116 | { 117 | "datasource": { 118 | "type": "influxdb", 119 | "uid": "P5697886F9CA74929" 120 | }, 121 | "fieldConfig": { 122 | "defaults": { 123 | "color": { 124 | "mode": "palette-classic" 125 | }, 126 | "custom": { 127 | "axisBorderShow": false, 128 | "axisCenteredZero": false, 129 | "axisColorMode": "text", 130 | "axisLabel": "", 131 | "axisPlacement": "auto", 132 | "barAlignment": 0, 133 | "drawStyle": "line", 134 | "fillOpacity": 0, 135 | "gradientMode": "none", 136 | "hideFrom": { 137 | "legend": false, 138 | "tooltip": false, 139 | "viz": false 140 | }, 141 | "insertNulls": false, 142 | "lineInterpolation": "linear", 143 | "lineWidth": 1, 144 | "pointSize": 5, 145 | "scaleDistribution": { 146 | "type": "linear" 147 | }, 148 | "showPoints": "auto", 149 | "spanNulls": false, 150 | "stacking": { 151 | "group": "A", 152 | "mode": "none" 153 | }, 154 | "thresholdsStyle": { 155 | "mode": "off" 156 | } 157 | }, 158 | "mappings": [], 159 | "thresholds": { 160 | "mode": "absolute", 161 | "steps": [ 162 | { 163 | "color": "green", 164 | "value": null 165 | }, 166 | { 167 | "color": "red", 168 | "value": 80 169 | } 170 | ] 171 | }, 172 | "unitScale": true 173 | }, 174 | "overrides": [] 175 | }, 176 | "gridPos": { 177 | "h": 8, 178 | "w": 12, 179 | "x": 12, 180 | "y": 0 181 | }, 182 | "id": 9, 183 | "options": { 184 | "legend": { 185 | "calcs": [], 186 | "displayMode": "list", 187 | "placement": "bottom", 188 | "showLegend": true 189 | }, 190 | "tooltip": { 191 | "mode": "single", 192 | "sort": "none" 193 | } 194 | }, 195 | "targets": [ 196 | { 197 | "datasource": { 198 | "type": "influxdb", 199 | "uid": "P5697886F9CA74929" 200 | }, 201 | "query": "from(bucket: \"mdt_grpc\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_field\"] == \"operational_status\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")", 202 | "refId": "A" 203 | } 204 | ], 205 | "title": "Interface Oper status", 206 | "type": "timeseries" 207 | }, 208 | { 209 | "datasource": { 210 | "type": "influxdb", 211 | "uid": "P5697886F9CA74929" 212 | }, 213 | "fieldConfig": { 214 | "defaults": { 215 | "color": { 216 | "mode": "palette-classic" 217 | }, 218 | "custom": { 219 | "axisBorderShow": false, 220 | "axisCenteredZero": false, 221 | "axisColorMode": "text", 222 | "axisLabel": "", 223 | "axisPlacement": "auto", 224 | "barAlignment": 0, 225 | "drawStyle": "line", 226 | "fillOpacity": 0, 227 | "gradientMode": "none", 228 | "hideFrom": { 229 | "legend": false, 230 | "tooltip": false, 231 | "viz": false 232 | }, 233 | "insertNulls": false, 234 | "lineInterpolation": "linear", 235 | "lineWidth": 1, 236 | "pointSize": 5, 237 | "scaleDistribution": { 238 | "type": "linear" 239 | }, 240 | "showPoints": "auto", 241 | "spanNulls": false, 242 | "stacking": { 243 | "group": "A", 244 | "mode": "none" 245 | }, 246 | "thresholdsStyle": { 247 | "mode": "off" 248 | } 249 | }, 250 | "mappings": [], 251 | "thresholds": { 252 | "mode": "absolute", 253 | "steps": [ 254 | { 255 | "color": "green", 256 | "value": null 257 | }, 258 | { 259 | "color": "red", 260 | "value": 80 261 | } 262 | ] 263 | }, 264 | "unitScale": true 265 | }, 266 | "overrides": [] 267 | }, 268 | "gridPos": { 269 | "h": 8, 270 | "w": 12, 271 | "x": 0, 272 | "y": 8 273 | }, 274 | "id": 8, 275 | "options": { 276 | "legend": { 277 | "calcs": [], 278 | "displayMode": "list", 279 | "placement": "bottom", 280 | "showLegend": true 281 | }, 282 | "tooltip": { 283 | "mode": "single", 284 | "sort": "none" 285 | } 286 | }, 287 | "targets": [ 288 | { 289 | "datasource": { 290 | "type": "influxdb", 291 | "uid": "P5697886F9CA74929" 292 | }, 293 | "query": "from(bucket: \"mdt_grpc\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"Cisco-IOS-XE-interfaces-oper.xml\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx-kbps\" or r[\"_field\"] == \"tx-kbps\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")", 294 | "refId": "A" 295 | } 296 | ], 297 | "title": "Interface Traffic tx/rx kbps", 298 | "type": "timeseries" 299 | }, 300 | { 301 | "datasource": { 302 | "type": "influxdb", 303 | "uid": "P5697886F9CA74929" 304 | }, 305 | "fieldConfig": { 306 | "defaults": { 307 | "color": { 308 | "mode": "palette-classic" 309 | }, 310 | "custom": { 311 | "axisBorderShow": false, 312 | "axisCenteredZero": false, 313 | "axisColorMode": "text", 314 | "axisLabel": "", 315 | "axisPlacement": "auto", 316 | "barAlignment": 0, 317 | "drawStyle": "line", 318 | "fillOpacity": 0, 319 | "gradientMode": "none", 320 | "hideFrom": { 321 | "legend": false, 322 | "tooltip": false, 323 | "viz": false 324 | }, 325 | "insertNulls": false, 326 | "lineInterpolation": "linear", 327 | "lineWidth": 1, 328 | "pointSize": 5, 329 | "scaleDistribution": { 330 | "type": "linear" 331 | }, 332 | "showPoints": "auto", 333 | "spanNulls": false, 334 | "stacking": { 335 | "group": "A", 336 | "mode": "none" 337 | }, 338 | "thresholdsStyle": { 339 | "mode": "off" 340 | } 341 | }, 342 | "mappings": [], 343 | "thresholds": { 344 | "mode": "absolute", 345 | "steps": [ 346 | { 347 | "color": "green", 348 | "value": null 349 | }, 350 | { 351 | "color": "red", 352 | "value": 80 353 | } 354 | ] 355 | }, 356 | "unitScale": true 357 | }, 358 | "overrides": [] 359 | }, 360 | "gridPos": { 361 | "h": 8, 362 | "w": 12, 363 | "x": 12, 364 | "y": 8 365 | }, 366 | "id": 6, 367 | "options": { 368 | "legend": { 369 | "calcs": [], 370 | "displayMode": "list", 371 | "placement": "bottom", 372 | "showLegend": true 373 | }, 374 | "tooltip": { 375 | "mode": "single", 376 | "sort": "none" 377 | } 378 | }, 379 | "targets": [ 380 | { 381 | "datasource": { 382 | "type": "influxdb", 383 | "uid": "P5697886F9CA74929" 384 | }, 385 | "query": "from(bucket: \"mdt_grpc\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"Cisco-IOS-XE-memory-oper.xml\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")", 386 | "refId": "from(bucket: \"mdt_grpc\") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r[\"_measurement\"] == \"Cisco-IOS-XE-memory-oper.xml\") |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) |> yield(name: \"mean\")" 387 | } 388 | ], 389 | "title": "Memory Usage", 390 | "type": "timeseries" 391 | }, 392 | { 393 | "datasource": { 394 | "type": "influxdb", 395 | "uid": "P5697886F9CA74929" 396 | }, 397 | "fieldConfig": { 398 | "defaults": { 399 | "color": { 400 | "mode": "palette-classic" 401 | }, 402 | "custom": { 403 | "axisBorderShow": false, 404 | "axisCenteredZero": false, 405 | "axisColorMode": "text", 406 | "axisLabel": "", 407 | "axisPlacement": "auto", 408 | "barAlignment": 0, 409 | "drawStyle": "line", 410 | "fillOpacity": 0, 411 | "gradientMode": "none", 412 | "hideFrom": { 413 | "legend": false, 414 | "tooltip": false, 415 | "viz": false 416 | }, 417 | "insertNulls": false, 418 | "lineInterpolation": "linear", 419 | "lineWidth": 1, 420 | "pointSize": 5, 421 | "scaleDistribution": { 422 | "type": "linear" 423 | }, 424 | "showPoints": "auto", 425 | "spanNulls": false, 426 | "stacking": { 427 | "group": "A", 428 | "mode": "none" 429 | }, 430 | "thresholdsStyle": { 431 | "mode": "off" 432 | } 433 | }, 434 | "mappings": [], 435 | "thresholds": { 436 | "mode": "absolute", 437 | "steps": [ 438 | { 439 | "color": "green", 440 | "value": null 441 | }, 442 | { 443 | "color": "red", 444 | "value": 80 445 | } 446 | ] 447 | }, 448 | "unitScale": true 449 | }, 450 | "overrides": [] 451 | }, 452 | "gridPos": { 453 | "h": 8, 454 | "w": 12, 455 | "x": 12, 456 | "y": 16 457 | }, 458 | "id": 3, 459 | "options": { 460 | "legend": { 461 | "calcs": [], 462 | "displayMode": "list", 463 | "placement": "bottom", 464 | "showLegend": true 465 | }, 466 | "tooltip": { 467 | "mode": "single", 468 | "sort": "none" 469 | } 470 | }, 471 | "targets": [ 472 | { 473 | "datasource": { 474 | "type": "influxdb", 475 | "uid": "P5697886F9CA74929" 476 | }, 477 | "query": "from(bucket: \"mdt_grpc\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_field\"] == \"in-crc-errors\" or r[\"_field\"] == \"in_errors\" or r[\"_field\"] == \"out_errors\" or r[\"_field\"] == \"in-discards\" or r[\"_field\"] == \"out-discards\" or r[\"_field\"] == \"in-unknown-protos\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")", 478 | "refId": "A" 479 | } 480 | ], 481 | "title": "Interface Errors", 482 | "type": "timeseries" 483 | } 484 | ], 485 | "refresh": "5s", 486 | "schemaVersion": 39, 487 | "tags": [], 488 | "templating": { 489 | "list": [ 490 | { 491 | "current": { 492 | "isNone": true, 493 | "selected": false, 494 | "text": "None", 495 | "value": "" 496 | }, 497 | "datasource": { 498 | "type": "influxdb", 499 | "uid": "P5697886F9CA74929" 500 | }, 501 | "definition": "", 502 | "description": "name of the devices available", 503 | "hide": 0, 504 | "includeAll": false, 505 | "label": "devices", 506 | "multi": false, 507 | "name": "devices", 508 | "options": [], 509 | "query": "", 510 | "refresh": 1, 511 | "regex": "", 512 | "skipUrlSync": false, 513 | "sort": 0, 514 | "type": "query" 515 | } 516 | ] 517 | }, 518 | "time": { 519 | "from": "now-5m", 520 | "to": "now" 521 | }, 522 | "timepicker": {}, 523 | "timezone": "", 524 | "title": "Network Telemetry", 525 | "uid": "d44174ca-56c5-41dc-9451-2c858cc0566b", 526 | "version": 1, 527 | "weekStart": "" 528 | } -------------------------------------------------------------------------------- /cml/topology.yaml: -------------------------------------------------------------------------------- 1 | lab: 2 | description: "" 3 | notes: "" 4 | title: Lab at Tue 08:57 AM 5 | version: 0.2.0 6 | links: 7 | - id: l0 8 | n1: n0 9 | n2: n1 10 | i1: i0 11 | i2: i0 12 | label: bridge-to-sandbox-port<->sandbox-backend-port0 13 | - id: l1 14 | n1: n1 15 | n2: n4 16 | i1: i1 17 | i2: i1 18 | label: sandbox-backend-port1<->cat8000v-0-GigabitEthernet1 19 | - id: l2 20 | n1: n1 21 | n2: n5 22 | i1: i2 23 | i2: i1 24 | label: sandbox-backend-port2<->cat8000v-1-GigabitEthernet1 25 | - id: l3 26 | n1: n1 27 | n2: n2 28 | i1: i3 29 | i2: i0 30 | label: sandbox-backend-port3<->server-ens2 31 | - id: l4 32 | n1: n1 33 | n2: n3 34 | i1: i4 35 | i2: i0 36 | label: sandbox-backend-port4<->client-ens2 37 | - id: l5 38 | n1: n4 39 | n2: n5 40 | i1: i2 41 | i2: i2 42 | label: cat8000v-0-GigabitEthernet2<->cat8000v-1-GigabitEthernet2 43 | - id: l6 44 | n1: n2 45 | n2: n4 46 | i1: i1 47 | i2: i3 48 | label: server-ens3<->cat8000v-0-GigabitEthernet3 49 | - id: l7 50 | n1: n5 51 | n2: n3 52 | i1: i3 53 | i2: i1 54 | label: cat8000v-1-GigabitEthernet3<->client-ens3 55 | - id: l8 56 | n1: n4 57 | n2: n5 58 | i1: i4 59 | i2: i4 60 | label: cat8000v-0-GigabitEthernet4<->cat8000v-1-GigabitEthernet4 61 | - id: l9 62 | n1: n6 63 | n2: n1 64 | i1: i1 65 | i2: i5 66 | label: cat8000v-2-GigabitEthernet1<->sandbox-backend-port5 67 | - id: l10 68 | n1: n6 69 | n2: n4 70 | i1: i2 71 | i2: i5 72 | label: cat8000v-2-GigabitEthernet2<->cat8000v-0-GigabitEthernet5 73 | - id: l11 74 | n1: n6 75 | n2: n5 76 | i1: i3 77 | i2: i5 78 | label: cat8000v-2-GigabitEthernet3<->cat8000v-1-GigabitEthernet5 79 | nodes: 80 | - boot_disk_size: null 81 | configuration: bridge0 82 | cpu_limit: null 83 | cpus: null 84 | data_volume: null 85 | hide_links: false 86 | id: n0 87 | image_definition: null 88 | label: bridge-to-sandbox 89 | node_definition: external_connector 90 | ram: null 91 | tags: [] 92 | x: 120 93 | y: -200 94 | interfaces: 95 | - id: i0 96 | label: port 97 | slot: 0 98 | type: physical 99 | - boot_disk_size: null 100 | configuration: ums-f8612dfc-e6 101 | cpu_limit: null 102 | cpus: null 103 | data_volume: null 104 | hide_links: false 105 | id: n1 106 | image_definition: null 107 | label: sandbox-backend 108 | node_definition: unmanaged_switch 109 | ram: null 110 | tags: [] 111 | x: -80 112 | y: -80 113 | interfaces: 114 | - id: i0 115 | label: port0 116 | slot: 0 117 | type: physical 118 | - id: i1 119 | label: port1 120 | slot: 1 121 | type: physical 122 | - id: i2 123 | label: port2 124 | slot: 2 125 | type: physical 126 | - id: i3 127 | label: port3 128 | slot: 3 129 | type: physical 130 | - id: i4 131 | label: port4 132 | slot: 4 133 | type: physical 134 | - id: i5 135 | label: port5 136 | slot: 5 137 | type: physical 138 | - id: i6 139 | label: port6 140 | slot: 6 141 | type: physical 142 | - id: i7 143 | label: port7 144 | slot: 7 145 | type: physical 146 | - boot_disk_size: null 147 | configuration: |- 148 | #cloud-config 149 | hostname: server 150 | manage_etc_hosts: True 151 | system_info: 152 | default_user: 153 | name: cisco 154 | password: cisco 155 | chpasswd: { expire: False } 156 | ssh_pwauth: True 157 | ssh_authorized_keys: 158 | - VIRL-USER-SSH-PUBLIC-KEY 159 | write_files: 160 | - path: /etc/netplan/50-cloud-init.yaml 161 | content: | 162 | network: 163 | version: 2 164 | ethernets: 165 | ens3: 166 | match: 167 | name: ens3 168 | addresses: 169 | - 172.16.101.11/24 170 | routes: 171 | - to: 172.16.0.0/16 172 | via: 172.16.101.1 173 | set-name: ens3 174 | ens2: 175 | match: 176 | name: ens2 177 | addresses: 178 | - 10.10.20.179/24 179 | gateway4: 10.10.20.254 180 | set-name: ens2 181 | runcmd: 182 | - netplan apply 183 | - sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config 184 | - echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config 185 | - echo "UseDNS no" >> /etc/ssh/sshd_config 186 | - systemctl restart sshd 187 | cpu_limit: null 188 | cpus: null 189 | data_volume: null 190 | hide_links: false 191 | id: n2 192 | image_definition: null 193 | label: server 194 | node_definition: ubuntu 195 | ram: null 196 | tags: [] 197 | x: -560 198 | y: 120 199 | interfaces: 200 | - id: i0 201 | label: ens2 202 | slot: 0 203 | type: physical 204 | - id: i1 205 | label: ens3 206 | slot: 1 207 | type: physical 208 | - boot_disk_size: null 209 | configuration: |- 210 | #cloud-config 211 | hostname: client 212 | manage_etc_hosts: True 213 | system_info: 214 | default_user: 215 | name: cisco 216 | password: cisco 217 | chpasswd: { expire: False } 218 | ssh_pwauth: True 219 | ssh_authorized_keys: 220 | - VIRL-USER-SSH-PUBLIC-KEY 221 | write_files: 222 | - path: /etc/netplan/50-cloud-init.yaml 223 | content: | 224 | network: 225 | version: 2 226 | ethernets: 227 | ens3: 228 | match: 229 | name: ens3 230 | addresses: 231 | - 172.16.102.11/24 232 | routes: 233 | - to: 172.16.0.0/16 234 | via: 172.16.102.1 235 | set-name: ens3 236 | ens2: 237 | match: 238 | name: ens2 239 | addresses: 240 | - 10.10.20.180/24 241 | gateway4: 10.10.20.254 242 | set-name: ens2 243 | runcmd: 244 | - netplan apply 245 | - sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config 246 | - echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config 247 | - echo "UseDNS no" >> /etc/ssh/sshd_config 248 | - systemctl restart sshd 249 | cpu_limit: null 250 | cpus: null 251 | data_volume: null 252 | hide_links: false 253 | id: n3 254 | image_definition: null 255 | label: client 256 | node_definition: ubuntu 257 | ram: null 258 | tags: [] 259 | x: 400 260 | y: 120 261 | interfaces: 262 | - id: i0 263 | label: ens2 264 | slot: 0 265 | type: physical 266 | - id: i1 267 | label: ens3 268 | slot: 1 269 | type: physical 270 | - boot_disk_size: null 271 | configuration: |- 272 | ! 273 | hostname cat8000v-0 274 | ! 275 | no logging console 276 | enable password cisco 277 | ! 278 | aaa new-model 279 | ! 280 | aaa authentication login default local 281 | aaa authorization exec default local 282 | ! 283 | aaa session-id common 284 | ! 285 | ! 286 | username cisco privilege 15 secret 9 $9$iVecEqVTUJzHUk$EO2BfGoo4I8.wW.QanPw2rSxwy9NJt6kc3xFNEFLYSA 287 | username admin privilege 15 password 0 Cisco123 288 | ! 289 | interface GigabitEthernet1 290 | description mgmt to port1.sandbox-backend 291 | ip address 10.10.20.215 255.255.255.0 292 | negotiation auto 293 | no shutdown 294 | ! 295 | interface GigabitEthernet2 296 | description Connected to cat8000v-1 GigabitEthernet2 297 | ip address 10.1.1.1 255.255.255.0 298 | ip router isis 299 | negotiation auto 300 | no shutdown 301 | ! 302 | interface GigabitEthernet3 303 | description not used 304 | no ip address 305 | no shutdown 306 | negotiation auto 307 | ! 308 | interface GigabitEthernet4 309 | description Connected to cat8000v-1 GigabitEthernet4 310 | ip address 10.2.2.1 255.255.255.0 311 | ip router isis 312 | negotiation auto 313 | no shutdown 314 | ! 315 | interface GigabitEthernet5 316 | description Connected to cat8000v-2 GigabitEthernet2 317 | ip address 10.3.3.1 255.255.255.0 318 | ip router isis 319 | negotiation auto 320 | no shutdown 321 | ! 322 | router isis 323 | net 49.0001.0000.0000.000a.00 324 | is-type level-1 325 | ! 326 | no ip http server 327 | ip http secure-server 328 | ! 329 | ip route 0.0.0.0 0.0.0.0 10.10.20.254 330 | ! 331 | line con 0 332 | logging synchronous 333 | exec-timeout 0 0 334 | password cisco 335 | stopbits 1 336 | line aux 0 337 | line vty 0 4 338 | logging synchronous 339 | exec-timeout 0 0 340 | password cisco 341 | length 0 342 | transport input telnet ssh 343 | ! 344 | telemetry ietf subscription 1010 345 | encoding encode-kvgpb 346 | filter xpath /process-cpu-ios-xe-oper:cpu-usage/cpu-utilization/five-seconds 347 | source-address 10.10.20.215 348 | stream yang-push 349 | update-policy periodic 2000 350 | receiver ip address 10.10.20.50 57500 protocol grpc-tcp 351 | netconf-yang 352 | restconf 353 | ! 354 | cpu_limit: null 355 | cpus: null 356 | data_volume: null 357 | hide_links: false 358 | id: n4 359 | image_definition: null 360 | label: cat8000v-0 361 | node_definition: cat8000v 362 | ram: null 363 | tags: [] 364 | x: -320 365 | y: 120 366 | interfaces: 367 | - id: i0 368 | label: Loopback0 369 | type: loopback 370 | - id: i1 371 | label: GigabitEthernet1 372 | slot: 0 373 | type: physical 374 | - id: i2 375 | label: GigabitEthernet2 376 | slot: 1 377 | type: physical 378 | - id: i3 379 | label: GigabitEthernet3 380 | slot: 2 381 | type: physical 382 | - id: i4 383 | label: GigabitEthernet4 384 | slot: 3 385 | type: physical 386 | - id: i5 387 | label: GigabitEthernet5 388 | slot: 4 389 | type: physical 390 | - boot_disk_size: null 391 | configuration: |- 392 | ! 393 | hostname cat8000v-1 394 | ! 395 | no logging console 396 | enable password cisco 397 | ! 398 | aaa new-model 399 | ! 400 | aaa authentication login default local 401 | aaa authorization exec default local 402 | ! 403 | aaa session-id common 404 | ! 405 | ! 406 | username cisco privilege 15 secret 9 $9$iVecEqVTUJzHUk$EO2BfGoo4I8.wW.QanPw2rSxwy9NJt6kc3xFNEFLYSA 407 | username admin privilege 15 password 0 Cisco123 408 | ! 409 | ! 410 | interface GigabitEthernet1 411 | description mgmt to port2.sandbox-backend 412 | ip address 10.10.20.216 255.255.255.0 413 | negotiation auto 414 | no shutdown 415 | ! 416 | interface GigabitEthernet2 417 | description Connected to cat8000v-0 GigabitEthernet2 418 | ip address 10.1.1.2 255.255.255.0 419 | ip router isis 420 | negotiation auto 421 | no shutdown 422 | ! 423 | interface GigabitEthernet3 424 | description Connected to end host 425 | ip address 172.16.102.1 255.255.255.0 426 | negotiation auto 427 | no shutdown 428 | ! 429 | interface GigabitEthernet4 430 | description Connected to cat8000v-0 GigabitEthernet4 431 | ip address 10.2.2.2 255.255.255.0 432 | ip router isis 433 | negotiation auto 434 | no shutdown 435 | ! 436 | interface GigabitEthernet5 437 | description Connected to cat8000v-2 GigabitEthernet3 438 | ip address 10.4.4.1 255.255.255.0 439 | ip router isis 440 | negotiation auto 441 | no shutdown 442 | ! 443 | router isis 444 | net 49.0001.0000.0000.000b.00 445 | is-type level-1 446 | ! 447 | no ip http server 448 | ip http secure-server 449 | ! 450 | ip route 0.0.0.0 0.0.0.0 10.10.20.254 451 | ! 452 | line con 0 453 | logging synchronous 454 | exec-timeout 0 0 455 | password cisco 456 | stopbits 1 457 | line aux 0 458 | line vty 0 4 459 | logging synchronous 460 | exec-timeout 0 0 461 | password cisco 462 | transport input telnet ssh 463 | ! 464 | ! 465 | telemetry ietf subscription 1010 466 | encoding encode-kvgpb 467 | filter xpath /process-cpu-ios-xe-oper:cpu-usage/cpu-utilization/five-seconds 468 | source-address 10.10.20.216 469 | stream yang-push 470 | update-policy periodic 2000 471 | receiver ip address 10.10.20.50 57500 protocol grpc-tcp 472 | netconf-yang 473 | restconf 474 | ! 475 | cpu_limit: null 476 | cpus: null 477 | data_volume: null 478 | hide_links: false 479 | id: n5 480 | image_definition: null 481 | label: cat8000v-1 482 | node_definition: cat8000v 483 | ram: null 484 | tags: [] 485 | x: 120 486 | y: 120 487 | interfaces: 488 | - id: i0 489 | label: Loopback0 490 | type: loopback 491 | - id: i1 492 | label: GigabitEthernet1 493 | slot: 0 494 | type: physical 495 | - id: i2 496 | label: GigabitEthernet2 497 | slot: 1 498 | type: physical 499 | - id: i3 500 | label: GigabitEthernet3 501 | slot: 2 502 | type: physical 503 | - id: i4 504 | label: GigabitEthernet4 505 | slot: 3 506 | type: physical 507 | - id: i5 508 | label: GigabitEthernet5 509 | slot: 4 510 | type: physical 511 | - boot_disk_size: null 512 | configuration: |- 513 | ! 514 | hostname cat8000v-2 515 | ! 516 | logging console 517 | enable password cisco 518 | ! 519 | aaa new-model 520 | ! 521 | aaa authentication login default local 522 | aaa authorization exec default local 523 | ! 524 | aaa session-id common 525 | ! 526 | ! 527 | username cisco privilege 15 secret 9 $9$iVecEqVTUJzHUk$EO2BfGoo4I8.wW.QanPw2rSxwy9NJt6kc3xFNEFLYSA 528 | username admin privilege 15 password 0 Cisco123 529 | ! 530 | ! 531 | interface GigabitEthernet1 532 | description mgmt to port5.sandbox-backend 533 | ip address 10.10.20.217 255.255.255.0 534 | negotiation auto 535 | no shutdown 536 | ! 537 | interface GigabitEthernet2 538 | description Connected to cat8000v-0 GigabitEthernet5 539 | ip address 10.3.3.2 255.255.255.0 540 | ip router isis 541 | negotiation auto 542 | no shutdown 543 | ! 544 | interface GigabitEthernet3 545 | description Connected to cat8000v-1 GigabitEthernet5 546 | ip address 10.4.4.2 255.255.255.0 547 | ip router isis 548 | negotiation auto 549 | no shutdown 550 | ! 551 | router isis 552 | net 49.0001.0000.0000.000c.00 553 | is-type level-1 554 | ! 555 | no ip http server 556 | ip http secure-server 557 | ! 558 | ip route 0.0.0.0 0.0.0.0 10.10.20.254 559 | ! 560 | line con 0 561 | logging synchronous 562 | exec-timeout 0 0 563 | password cisco 564 | stopbits 1 565 | line aux 0 566 | line vty 0 4 567 | logging synchronous 568 | exec-timeout 0 0 569 | password cisco 570 | transport input telnet ssh 571 | ! 572 | ! 573 | telemetry ietf subscription 1010 574 | encoding encode-kvgpb 575 | filter xpath /process-cpu-ios-xe-oper:cpu-usage/cpu-utilization/five-seconds 576 | source-address 10.10.20.216 577 | stream yang-push 578 | update-policy periodic 2000 579 | receiver ip address 10.10.20.50 57500 protocol grpc-tcp 580 | netconf-yang 581 | restconf 582 | ! 583 | cpu_limit: null 584 | cpus: null 585 | data_volume: null 586 | hide_links: false 587 | id: n6 588 | image_definition: null 589 | label: cat8000v-2 590 | node_definition: cat8000v 591 | ram: null 592 | tags: [] 593 | x: -80 594 | y: 40 595 | interfaces: 596 | - id: i0 597 | label: Loopback0 598 | type: loopback 599 | - id: i1 600 | label: GigabitEthernet1 601 | slot: 0 602 | type: physical 603 | - id: i2 604 | label: GigabitEthernet2 605 | slot: 1 606 | type: physical 607 | - id: i3 608 | label: GigabitEthernet3 609 | slot: 2 610 | type: physical 611 | - id: i4 612 | label: GigabitEthernet4 613 | slot: 3 614 | type: physical 615 | annotations: [] 616 | --------------------------------------------------------------------------------