├── README.md ├── cluster-health.sh ├── deploy-without-application.sh ├── deploy-without-correct-resources.py ├── deploy-without-limits.sh └── deployment-health.sh /README.md: -------------------------------------------------------------------------------- 1 | # Kubernetes Scripts 2 | 3 | These Scripts allow you to troubleshoot and check the health status of the cluster and deployments 4 | They allow you to gather these information 5 | 6 | - Cluster resources 7 | - Cluster Nodes status 8 | - Nodes Conditions 9 | - Pods per Nodes 10 | - Worker Nodes Per Availability Zones 11 | - Cluster Node Types 12 | - Pods not in running or completed status 13 | - Top Pods according to Memory Limits 14 | - Top Pods according to CPU Limits 15 | - Number of Pods 16 | - Pods Status 17 | - Max Pods restart count 18 | - Readiness of Pods 19 | - Pods Average Utilization 20 | - Top Pods according to CPU Utilization 21 | - Top Pods according to Memory Utilization 22 | - Pods Distribution per Nodes 23 | - Node Distribution per Availability Zone 24 | - Deployments without correct resources (Memory or CPU) 25 | - Deployments without Limits 26 | - Deployments without Application configured in Labels 27 | 28 | -------------------------------------------------------------------------------- /cluster-health.sh: -------------------------------------------------------------------------------- 1 | ########################################################################################### 2 | # Script to check the health status of the cluster and report the objects and resources # 3 | ########################################################################################### 4 | 5 | #!/bin/bash 6 | 7 | # Colors 8 | RED='\033[0;31m' 9 | GREEN='\033[0;32m' 10 | YELLOW='\033[0;33m' 11 | BLUE='\033[0;36m' 12 | PLAIN='\033[0m' 13 | bold=$(tput bold) 14 | normal=$(tput sgr0) 15 | 16 | deploy="$2" 17 | namespace="$1" 18 | 19 | cluster_objects() { 20 | echo -e "\e[44mCollecting Information from the Cluster:\e[21m" 21 | deployments=$(kubectl get deployment --all-namespaces | grep -v NAMESPACE | wc -l) 22 | pods=$(kubectl get po --all-namespaces | grep -v NAMESPACE | wc -l) 23 | services=$(kubectl get svc --all-namespaces | grep -v NAMESPACE | wc -l) 24 | ingresses=$(kubectl get ing --all-namespaces | grep -v NAMESPACE | wc -l) 25 | statefulset=$(kubectl get statefulset --all-namespaces | grep -v NAMESPACE | wc -l) 26 | postgresql=$(kubectl get postgresql --all-namespaces | grep -v NAMESPACE | wc -l) 27 | daemonset=$(kubectl get daemonset --all-namespaces | grep -v NAMESPACE | wc -l) 28 | replicaset=$(kubectl get rs --all-namespaces | grep -v NAMESPACE | wc -l) 29 | serviceaccount=$(kubectl get sa --all-namespaces | grep -v NAMESPACE | wc -l) 30 | storageclass=$(kubectl get sc --all-namespaces | grep -v NAMESPACE | wc -l) 31 | PodDistrubtion=$(kubectl get pdb --all-namespaces | grep -v NAMESPACE | wc -l) 32 | CustomResources=$(kubectl get crd --all-namespaces | grep -v NAMESPACE | wc -l) 33 | cronjobs=$(kubectl get cronjobs --all-namespaces | grep -v NAMESPACE | wc -l) 34 | persistancevolumes=$(kubectl get pv --all-namespaces | grep -v NAMESPACE | wc -l) 35 | volumeclaims=$(kubectl get pvc --all-namespaces | grep -v NAMESPACE | wc -l) 36 | hpa=$(kubectl get hpa --all-namespaces | grep -v NAMESPACE | wc -l) 37 | echo -e "\e[1m\e[39mCluster Resources:\e[21m" 38 | echo -e "${BLUE}"Deployments" :${GREEN}$deployments" 39 | echo -e "${BLUE}"Services" :${GREEN}$services" 40 | echo -e "${BLUE}"Ingresses" :${GREEN}$ingresses" 41 | echo -e "${BLUE}"StatefulSets" :${GREEN}$statefulset" 42 | echo -e "${BLUE}"Pods" :${GREEN}$pods" 43 | echo -e "${BLUE}"DaemonSets" :${GREEN}$daemonset" 44 | echo -e "${BLUE}"ReplicaSets" :${GREEN}$replicaset" 45 | echo -e "${BLUE}"StorageClasses" :${GREEN}$storageclass" 46 | echo -e "${BLUE}"CronJobs" :${GREEN}$cronjobs" 47 | echo -e "${BLUE}"PostgreSQL" :${GREEN}$postgresql" 48 | echo -e "${BLUE}"CustomResources" :${GREEN}$CustomResources" 49 | echo -e "${BLUE}"HorizontalPodAutoscaler" :${GREEN}$hpa" 50 | echo -e "${BLUE}"PersistanceVolumes" :${GREEN}$persistancevolumes" 51 | echo -e "${BLUE}"VolumeClaims" :${GREEN}$volumeclaims" 52 | 53 | } 54 | 55 | cluster_nodes() { 56 | nodes=$(kubectl get nodes | grep -v NAME | wc -l) 57 | worker=$(kubectl get nodes | grep -v NAME | grep worker | wc -l) 58 | master=$(kubectl get nodes | grep -v NAME | grep master | wc -l) 59 | node_status=$(for i in $(kubectl get node | grep -v NAME | awk {'print $2'} | sort -u); do echo "$i";done) 60 | echo -e "\e[1m\e[39mCluster Node Status:\e[21m" 61 | echo -e "${BLUE}"ALL Nodes" :${GREEN}$nodes" 62 | echo -e "${BLUE}"Worker Nodes" :${GREEN}$worker" 63 | echo -e "${BLUE}"Master Nodes" :${GREEN}$master" 64 | echo -e "${BLUE}"Nodes Status" :${GREEN}$node_status" 65 | echo -e "\e[1m\e[39mNodes Conditions:\e[21m" 66 | echo -e "${BLUE}$(kubectl describe node | grep kubelet | awk {'print $15'} | sort -u)" 67 | echo -e "\e[1m\e[39mPods Per Node:\e[21m" 68 | for node in $(kubectl get node | grep -v NAME | awk {'print $1'}) 69 | do pod_per_node=$(kubectl get pods --all-namespaces --field-selector spec.nodeName=$node -o wide | wc -l) 70 | echo -e "${BLUE}"$node" \t :${GREEN}$pod_per_node" 71 | done 72 | # Nodes Per AZ 73 | a=$(kubectl get node -l failure-domain.beta.kubernetes.io/zone=eu-central-1a | grep -v NAME | grep -v master | wc -l) 74 | b=$(kubectl get node -l failure-domain.beta.kubernetes.io/zone=eu-central-1b | grep -v NAME | grep -v master | wc -l) 75 | c=$(kubectl get node -l failure-domain.beta.kubernetes.io/zone=eu-central-1c | grep -v NAME | grep -v master | wc -l) 76 | echo -e "\e[1m\e[39mWorker Nodes per AZ:\e[21m" 77 | echo -e "${BLUE}"eu-central-1a" \t :${GREEN}$a" 78 | echo -e "${BLUE}"eu-central-1b" \t :${GREEN}$b" 79 | echo -e "${BLUE}"eu-central-1c" \t :${GREEN}$c" 80 | #Node Types 81 | types=$(kubectl describe node | grep beta.kubernetes.io/instance-type | cut -d"=" -f2 | sort | uniq -c | awk -F$'\t' {'print $2 $1'}) 82 | echo -e "\e[1m\e[39mCluster Node Types:\e[21m" 83 | echo -e "\e[34m$types" 84 | } 85 | 86 | pod_with_issues() { 87 | echo -e "\e[1m\e[39mPods not in Running or Completed State:\e[21m" 88 | kubectl get pods --all-namespaces --field-selector=status.phase!=Running | grep -v Completed 89 | } 90 | 91 | top_mem_pods() { 92 | echo -e "\e[1m\e[39mTop Pods According to Memory Limits:\e[21m" 93 | for node in $(kubectl get node | awk {'print $1'} | grep -v NAME) 94 | do kubectl describe node $node | sed -n "/Non-terminated Pods/,/Allocated resources/p"| grep -P -v "terminated|Allocated|Namespace" 95 | done | grep '[0-9]G' | awk -v OFS=' \t' '{if ($9 >= '2Gi') print "\033[0;36m"$2," ", "\033[0;31m"$9}' | sort -k2 -r | column -t 96 | 97 | } 98 | top_cpu_pods() { 99 | echo -e "\e[1m\e[39mTop Pods According to CPU Limits:\e[21m" 100 | for node in $(kubectl get node | awk {'print $1'} | grep -v NAME) 101 | do kubectl describe node $node | sed -n "/Non-terminated Pods/,/Allocated resources/p" | grep -P -v "terminated|Allocated|Namespace" 102 | done | awk -v OFS=' \t' '{if ($5 ~/^[2-9]+$/) print "\033[0;36m"$2, "\033[0;31m"$5}' | sort -k2 -r | column -t 103 | } 104 | 105 | clear 106 | cluster_objects 107 | cluster_nodes 108 | pod_with_issues 109 | top_mem_pods 110 | top_cpu_pods 111 | 112 | -------------------------------------------------------------------------------- /deploy-without-application.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo -e "\e[1m\e[39mThese Deployments do not have Application in the Labels\e[21m" 4 | namespaces=$(kubectl get ns | awk {'print $1'} | grep -v NAME) 5 | for namespace in $namespaces 6 | do 7 | echo -e "\e[1m\e[39mNamespace: $namespace\e[21m" 8 | deployments=$(kubectl get deploy -n $namespace | awk {'print $1'} | grep -v NAME) 9 | for deploy in $deployments 10 | do 11 | 12 | var=$(kubectl get deployment -n ${namespace} --output=json ${deploy} | \ 13 | jq -j '.metadata.labels | to_entries | .[] | "\(.key)=\(.value),"') 14 | labels=${var%?} 15 | 16 | echo $labels | grep application &> /dev/null 17 | status=$? 18 | if [ $status -ne 0 ] 19 | then 20 | echo $deploy 21 | fi 22 | 23 | 24 | done 25 | done 26 | -------------------------------------------------------------------------------- /deploy-without-correct-resources.py: -------------------------------------------------------------------------------- 1 | from kubernetes import client, config, watch 2 | from kubernetes.client.rest import ApiException 3 | from kubernetes.client.api_client import ApiClient 4 | import sys 5 | import ast 6 | 7 | config.load_kube_config() 8 | api = client.CoreV1Api() 9 | deploymentAPI = client.ExtensionsV1beta1Api() 10 | 11 | 12 | def check_deployments(): 13 | try: 14 | namespaces = api.list_namespace().items 15 | matches = [] 16 | deployments_without_correct_resources = [] 17 | for namespace in namespaces: 18 | deployments = deploymentAPI.list_namespaced_deployment(namespace.metadata.name).items 19 | for deploy in deployments: 20 | deployment = deploy.metadata.name 21 | containers = deploy.spec.template.spec.containers 22 | for cont in containers: 23 | r = cont.resources 24 | r = ast.literal_eval(str(r)) 25 | if "requests" not in r: 26 | matches.append(deployment) 27 | elif "limits" not in r: 28 | matches.append(deployment) 29 | elif r.get("requests") == None: 30 | matches.append(deployment) 31 | elif r.get("limits") == None: 32 | matches.append(deployment) 33 | elif r.get("requests").get("cpu") == None: 34 | matches.append(deployment) 35 | elif r.get("requests").get("memory") == None: 36 | matches.append(deployment) 37 | elif r.get("limits").get("memory") == None: 38 | matches.append(deployment) 39 | for match in matches: 40 | if match not in deployments_without_correct_resources: 41 | deployments_without_correct_resources.append(match) 42 | for d in deployments_without_correct_resources: 43 | print(d) 44 | 45 | except ApiException as e: 46 | print("Exception when calling the function: %s\n" % e) 47 | 48 | 49 | if __name__ == '__main__': 50 | check_deployments() 51 | -------------------------------------------------------------------------------- /deploy-without-limits.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo -e "\e[1m\e[39mThese Deployments that do not have Limits configured\e[21m" 4 | namespaces=$(kubectl get ns | awk {'print $1'} | grep -v NAME) 5 | for namespace in $namespaces 6 | do 7 | echo -e "\e[1m\e[39mNamespace: $namespace\e[21m" 8 | deployments=$(kubectl get deploy -n $namespace | awk {'print $1'} | grep -v NAME) 9 | for deploy in $deployments 10 | do 11 | 12 | var=$(kubectl get deployment -n ${namespace} --output=json ${deploy} | \ 13 | jq -j '.spec.selector.matchLabels | to_entries | .[] | "\(.key)=\(.value),"') 14 | selector=${var%?} 15 | 16 | if [ $((kubectl get po -n ${namespace} -l ${selector} | grep -v NAME | wc -l) 2> /dev/null ) -eq 0 ] 17 | then 18 | continue 19 | fi 20 | cpulimit=$(kubectl describe node | grep $(kubectl get po -n ${namespace} -l ${selector} | grep -v NAME | \ 21 | awk {'print $1'} | head -n1) | awk {'print $5'} | grep -Ev "^$" | sort -u | \ 22 | awk '{ if ($0 ~ /[0-9]*m/) print $0; else print $0*1000;}' | sed 's/[^0-9]*//g') 23 | if [ $cpulimit -eq 0 ] 24 | then 25 | echo $deploy 26 | fi 27 | 28 | done 29 | done 30 | -------------------------------------------------------------------------------- /deployment-health.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ###################################################################################### 4 | # Script to check the health status of Deployment and resources assosiated with it # 5 | # #################################################################################### 6 | 7 | 8 | # Colors 9 | RED='\033[0;31m' 10 | GREEN='\033[0;32m' 11 | YELLOW='\033[0;33m' 12 | BLUE='\033[0;36m' 13 | PLAIN='\033[0m' 14 | bold=$(tput bold) 15 | normal=$(tput sgr0) 16 | 17 | deploy="$2" 18 | namespace="$1" 19 | 20 | if [ $# -ne 2 ]; then 21 | echo "usage: $0 " 22 | exit 1 23 | fi 24 | 25 | var=$(kubectl get deployment -n ${namespace} --output=json ${deploy} | \ 26 | jq -j '.spec.selector.matchLabels | to_entries | .[] | "\(.key)=\(.value),"') 27 | selector=${var%?} 28 | 29 | pod_status() { 30 | no_of_pods=$(kubectl get po -n $namespace -l $selector | grep -v NAME | wc -l) 31 | if [[ $no_of_pods -eq 0 ]] 32 | then 33 | echo "Deployment $deploy has 0 replicas" 34 | exit 0 35 | fi 36 | pods_status=$(for i in $(kubectl get po -n $namespace -l $selector | grep -v NAME | awk {'print $3'} | sort -u); do echo "$i";done) 37 | restart_count=$(kubectl get po -n $namespace -l $selector | grep -v NAME | awk {'print $4'} | grep -v RESTARTS | sort -ur | awk 'FNR <= 1') 38 | echo -e "${BLUE}"Number of Pods" :${GREEN}$no_of_pods" 39 | echo -e "${BLUE}"Pods Status" :${GREEN}$pods_status" 40 | echo -e "${BLUE}"MAX Pod Restart Count" :${GREEN}$restart_count" 41 | readiness() { 42 | r=$(kubectl get po -n $namespace | grep $deploy | grep -vE '1/1|2/2|3/3|4/4|5/5|6/6|7/7' &> /dev/null ) 43 | if [[ $? -ne 0 ]] 44 | then 45 | echo -e "${BLUE}"Readiness" :${GREEN}"ALL Pods are Ready"" 46 | else 47 | echo -e "${BLUE}"Readiness" :${RED}"You have some Pods not ready "" 48 | fi 49 | } 50 | readiness 51 | 52 | } 53 | pod_distribution() { 54 | echo -e "\e[1m\e[39mPod Distribution per Node\e[21m" 55 | for nodes in $(kubectl get po -n $namespace -l $selector -o wide | grep $deploy | awk {'print $7'} | sort -u) 56 | do 57 | echo -e "${BLUE}$nodes \t \t :${GREEN}$(kubectl describe node $nodes | grep $deploy | wc -l)" 58 | done 59 | echo -e "\e[1m\e[39mNode Distribution per Availability Zone\e[21m" 60 | node_dist=$(for node in $(kubectl get po -n $namespace -l $selector -o wide | grep $deploy | awk {'print $7'} | sort -u) 61 | do kubectl get node --show-labels $node 62 | done | awk {'print $6'} | grep -v LABELS) 63 | a=$(echo $node_dist | grep -o eu-central-1a | wc -l) 64 | b=$(echo $node_dist | grep -o eu-central-1b | wc -l) 65 | c=$(echo $node_dist | grep -o eu-central-1c | wc -l) 66 | echo -e "${BLUE}"eu-central-1a" \t \t :${GREEN}$a" 67 | echo -e "${BLUE}"eu-central-1b" \t \t :${GREEN}$b" 68 | echo -e "${BLUE}"eu-central-1c" \t \t :${GREEN}$c" 69 | 70 | } 71 | 72 | pod_utilization() { 73 | 74 | cpulimit=$(kubectl describe node | grep $(kubectl get po -n ${namespace} -l ${selector} | grep -v NAME | \ 75 | awk {'print $1'} | head -n1) | awk {'print $5'} | grep -Ev "^$" | sort -u | \ 76 | awk '{ if ($0 ~ /[0-9]*m/) print $0; else print $0*1000;}' | sed 's/[^0-9]*//g') 77 | 78 | memlimit=$(kubectl describe node | grep $(kubectl get po -n ${namespace} -l ${selector} | grep -v NAME | \ 79 | awk {'print $1'} | head -n1) | awk {'print $9'} | grep -Ev "^$" | sort -u | \ 80 | awk '{ if ($0 ~ /[0-9]*Gi/) print $0*1024; else if ($0 ~ /[0-9]*G/) print $0*1000; \ 81 | else if ($0 ~ /[0-9]*M/ || $0 ~ /[0-9]*Mi/) print $0 ; else print $0}' | sed 's/[^0-9]*//g') 82 | dcores=$(kubectl top pods -n $namespace | grep $deploy | awk {'print $2'} | sed 's/[^0-9]*//g' | awk '{n += $1}; END{print n}') 83 | dmem=$(kubectl top pods -n $namespace | grep $deploy | awk {'print $3'} | sed 's/[^0-9]*//g' | awk '{n += $1}; END{print n}') 84 | 85 | 86 | if [ $cpulimit -eq 0 ] 87 | then 88 | echo -e "\e[1m\e[33mWARN: Pods do not have CPU Limits\e[21m" 89 | else 90 | echo -e "\e[1m\e[39mAverage Utilization \e[21m" 91 | deploymentcpu=$(bc <<< "scale=2;$dcores/($cpulimit*$no_of_pods)*100") 92 | echo -e "${BLUE}"CPU Utilization" :${GREEN}$deploymentcpu%" 93 | if [ $memlimit -ne 0 ] 94 | then 95 | deploymentmem=$(bc <<< "scale=2;$dmem/($memlimit*$no_of_pods)*100") 96 | echo -e "${BLUE}"Memory Utilization" :${GREEN}$deploymentmem%" 97 | fi 98 | echo -e "\e[1m\e[39mTop Pods CPU Utilization\e[21m" 99 | kubectl top pods -n $namespace -l $selector | grep -v NAME| \ 100 | awk 'FNR <= 5' | awk {'print $1,$2'}| awk '$2=($2/'$cpulimit')*100"%"' | \ 101 | awk '{printf $1 " " "%0.2f\n",$2}' | sort -k2 -r | \ 102 | awk -v OFS='\t' '{if ($2 >= 80) print "\033[0;36m"$1," ", "\033[0;31m"":"$2"%"; else print "\033[0;36m"$1," ","\033[0;32m"":"$2"%";}' 103 | fi 104 | if [ $memlimit -eq 0 ] 105 | then 106 | echo -e "\e[1m\e[33mWARN: Pods do not have Memory Limits\e[21m" 107 | else 108 | echo -e "\e[1m\e[39mTop Pods Memory Utilization\e[21m" 109 | kubectl top pods -n $namespace -l $selector | grep -v NAME | \ 110 | awk 'FNR <= 5' | awk {'print $1,$3'} | awk '$2=($2/'$memlimit')*100"%"' | \ 111 | awk '{printf $1 " " "%0.2f\n",$2}' | sort -k2 -r | \ 112 | awk -v OFS=' \t' '{if ($2 >= 80) print "\033[0;36m"$1," ", "\033[0;31m"":"$2"%"; else print "\033[0;36m"$1," ","\033[0;32m"":"$2"%";}' 113 | fi 114 | } 115 | 116 | clear 117 | kubectl get deploy $deploy -n $namespace &> /dev/null 118 | status=$? 119 | if [ $status -ne 0 ]; then 120 | echo -e "Deployment $deploy not exist. \nPlease make sure you provide the correct deployment name and the correct namespace" 121 | exit $status 122 | fi 123 | echo -e "\e[1m\e[39mChecking Deployment $deploy...\e[21m" 124 | pod_status 125 | pod_utilization 126 | pod_distribution 127 | 128 | 129 | --------------------------------------------------------------------------------