├── doc
    ├── images
    │   ├── arch1.png
    │   ├── arch2.png
    │   └── k8s-lsf-logos.png
    ├── IBM_Spectrum_Computing_Cloud_Pak_Quickstart_Guide.pdf
    └── LSF_Operator
    │   ├── service_account.yaml
    │   ├── role_binding.yaml
    │   ├── clusterrolebinding1.yaml
    │   ├── clusterrolebinding2.yaml
    │   ├── scc.yaml
    │   ├── clusterrole.yaml
    │   ├── role.yaml
    │   ├── operator.yaml
    │   ├── example-pod-sched.yaml
    │   ├── lsf_v1beta1_lsfcluster_crd.yaml
    │   ├── example-lsf.yaml
    │   ├── README-pod-sched.md
    │   └── README.md
├── examples
    ├── Resource_Sharing
    │   ├── cleanup.sh
    │   ├── templateJob.yml
    │   ├── runtest.sh
    │   ├── completions.sh
    │   └── README.md
    ├── Run_Limits
    │   ├── cleanup.sh
    │   ├── templateJob.yml
    │   ├── runtest.sh
    │   └── README.md
    ├── Run_Windows
    │   ├── cleanup.sh
    │   ├── completions.sh
    │   ├── templateJob.yml
    │   ├── runtest.sh
    │   └── README.md
    ├── Pod_Priority_and_Preemption
    │   ├── cleanup.sh
    │   ├── high-priority.yml
    │   ├── low-priority-preempt.yml
    │   ├── low-priority-non-preempt.yml
    │   ├── priority-test-preempt.sh
    │   ├── priority-test-non-preempt.sh
    │   ├── completions.sh
    │   └── README.md
    ├── Pod_Dependencies
    │   ├── cleanup.sh
    │   ├── completions.sh
    │   ├── templateJob.yml
    │   ├── j1.yaml
    │   ├── j2.yaml
    │   ├── j3.yaml
    │   ├── j4.yaml
    │   ├── j5.yaml
    │   ├── README.md
    │   └── runtest.sh
    ├── README.md
    ├── distributed-helloworld.py
    └── distributed-mnist.py
├── README.md
└── LICENSE


/doc/images/arch1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBMSpectrumComputing/lsf-kubernetes/HEAD/doc/images/arch1.png


--------------------------------------------------------------------------------
/doc/images/arch2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBMSpectrumComputing/lsf-kubernetes/HEAD/doc/images/arch2.png


--------------------------------------------------------------------------------
/doc/images/k8s-lsf-logos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBMSpectrumComputing/lsf-kubernetes/HEAD/doc/images/k8s-lsf-logos.png


--------------------------------------------------------------------------------
/doc/IBM_Spectrum_Computing_Cloud_Pak_Quickstart_Guide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBMSpectrumComputing/lsf-kubernetes/HEAD/doc/IBM_Spectrum_Computing_Cloud_Pak_Quickstart_Guide.pdf


--------------------------------------------------------------------------------
/examples/Resource_Sharing/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ ! -d jobtmp ]; then
 4 |     echo "No pods"
 5 |     exit 0 
 6 | fi
 7 | cd jobtmp
 8 | for i in $(ls *yaml); do 
 9 |     kubectl delete -f $i 
10 |     rm -rf $i
11 | done
12 | 


--------------------------------------------------------------------------------
/examples/Run_Limits/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ ! -d jobtmp ]; then
 4 |     echo "No jobtmp"
 5 |     exit 1 
 6 | fi
 7 | cd jobtmp
 8 | for i in $(ls rljob*yaml); do 
 9 |     kubectl delete -f $i 
10 |     rm -rf $i
11 | done
12 | 


--------------------------------------------------------------------------------
/examples/Run_Windows/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ ! -d jobtmp ]; then
 4 |     echo "No jobtmp"
 5 |     exit 1 
 6 | fi
 7 | cd jobtmp
 8 | for i in $(ls rwjob*yaml); do 
 9 |     kubectl delete -f $i 
10 |     rm -rf $i
11 | done
12 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | TEMPLATE=templateJob.yml
 4 | if [ ! -d jobtmp ]; then
 5 |     echo "No jobs"
 6 |     exit 0 
 7 | fi
 8 | cd jobtmp
 9 | for i in $(ls *.yaml); do 
10 |     kubectl delete -f $i 
11 |     rm -rf $i
12 | done
13 | rm -rf j.tmp
14 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | DIRTY=$(kubectl get pods |grep -c jdpod- 2>/dev/null)
 4 | if [ $DIRTY -ne 0 ]; then
 5 |     kubectl delete -f j1.yaml >/dev/null 2>&1
 6 |     kubectl delete -f j2.yaml >/dev/null 2>&1
 7 |     kubectl delete -f j3.yaml >/dev/null 2>&1
 8 |     kubectl delete -f j4.yaml >/dev/null 2>&1
 9 |     kubectl delete -f j5.yaml >/dev/null 2>&1
10 | fi
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/Run_Windows/completions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script shows the completion rate of the jobs in the system
 4 | 
 5 | P3CNT=0
 6 | rm -rf j.tmp
 7 | 
 8 | echo "time,done_pods"
 9 | while [ true ]; do
10 |     NOW=$(date -u)
11 |     kubectl get jobs |grep rwjob |grep '1/1' > j.tmp
12 |     P3JOBS=$(grep rwjob j.tmp |wc -l)
13 | 
14 |     DONE3=$(expr $P3JOBS - $P3CNT)
15 |     echo "$NOW,$DONE3"
16 |     P3CNT=$P3JOBS
17 |     sleep 300
18 | done
19 | 
20 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/completions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script shows the completion rate of the jobs in the system
 4 | 
 5 | P3CNT=0
 6 | rm -rf j.tmp
 7 | 
 8 | echo "time,done_pods"
 9 | while [ true ]; do
10 |     NOW=$(date -u)
11 |     kubectl get jobs |grep jdpod |grep '1/1' > j.tmp
12 |     P3JOBS=$(grep jdpod j.tmp |wc -l)
13 | 
14 |     DONE3=$(expr $P3JOBS - $P3CNT)
15 |     echo "$NOW,$DONE3"
16 |     P3CNT=$P3JOBS
17 |     sleep 300
18 | done
19 | 
20 | 


--------------------------------------------------------------------------------
/examples/Run_Limits/templateJob.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: rljob-SEQ
 5 |   annotations:
 6 |     lsf.ibm.com/queue: "normal"
 7 | spec:
 8 |   schedulerName: lsf
 9 |   containers:
10 |   - name: longjob
11 |     image: ubuntu:latest
12 |     imagePullPolicy: IfNotPresent
13 |     command: ["sleep", "3600"]
14 |     resources:
15 |       requests:
16 |         cpu: 1
17 |         memory: 128M
18 |       limits:
19 |         cpu: 1
20 |         memory: 256M
21 | 
22 |   restartPolicy: Never
23 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/templateJob.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: jdpod-SEQ
 5 | spec:
 6 |   template:
 7 |     metadata:
 8 |       name: jdpod-SEQ
 9 |       annotations:
10 |         lsf.ibm.com/queue: "night"
11 |     spec:
12 |       schedulerName: lsf
13 |       containers:
14 |       - name: bigjob
15 |         image: ubuntu:latest
16 |         command: ["sleep", "60"]
17 |         resources:
18 |           requests:
19 |             cpu: 1
20 |             memory: 128M
21 |           limits:
22 |             cpu: 1
23 |             memory: 256M
24 | 
25 |       restartPolicy: Never
26 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/service_account.yaml:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------------------
 2 | # Copyright IBM Corp. 2020. All rights reserved.
 3 | # US Government Users Restricted Rights - Use, duplication or disclosure
 4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
 5 | #--------------------------------------------------------
 6 | apiVersion: v1
 7 | kind: ServiceAccount
 8 | metadata:
 9 |   name: ibm-lsf-operator
10 |   labels:
11 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
12 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
13 |     app.kubernetes.io/instance: "lsf"
14 |     release: "lsf"
15 | 
16 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/j1.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: jdpod-1
 5 | spec:
 6 |   template:
 7 |     metadata:
 8 |       name: jdpod-1
 9 |       annotations:
10 |         lsf.ibm.com/queue: "normal"
11 |     spec:
12 |       schedulerName: lsf
13 |       containers:
14 |       - name: bigjob
15 |         image: ubuntu:latest
16 |         imagePullPolicy: IfNotPresent
17 |         command: ["sleep", "120"]
18 |         resources:
19 |           requests:
20 |             cpu: 1
21 |             memory: 128M
22 |           limits:
23 |             cpu: 1
24 |             memory: 256M
25 | 
26 |       restartPolicy: Never
27 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/j2.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: jdpod-2
 5 | spec:
 6 |   template:
 7 |     metadata:
 8 |       name: jdpod-2
 9 |       annotations:
10 |         lsf.ibm.com/queue: "normal"
11 |     spec:
12 |       schedulerName: lsf
13 |       containers:
14 |       - name: bigjob
15 |         image: ubuntu:latest
16 |         imagePullPolicy: IfNotPresent
17 |         command: ["sleep", "300"]
18 |         resources:
19 |           requests:
20 |             cpu: 1
21 |             memory: 128M
22 |           limits:
23 |             cpu: 1
24 |             memory: 256M
25 | 
26 |       restartPolicy: Never
27 | 


--------------------------------------------------------------------------------
/examples/Run_Windows/templateJob.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: rwjob-SEQ
 5 | spec:
 6 |   template:
 7 |     metadata:
 8 |       name: rwjob-SEQ
 9 |       annotations:
10 |         lsf.ibm.com/queue: "night"
11 |     spec:
12 |       schedulerName: lsf
13 |       containers:
14 |       - name: bigjob
15 |         image: ubuntu:latest
16 |         imagePullPolicy: IfNotPresent
17 |         command: ["sleep", "3600"]
18 |         resources:
19 |           requests:
20 |             cpu: 1
21 |             memory: 128M
22 |           limits:
23 |             cpu: 1
24 |             memory: 256M
25 | 
26 |       restartPolicy: Never
27 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/j3.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: jdpod-3
 5 | spec:
 6 |   template:
 7 |     metadata:
 8 |       name: jdpod-3
 9 |       annotations:
10 |         lsf.ibm.com/queue: "normal"
11 |         lsf.ibm.com/dependency: "JOBDEP"
12 |     spec:
13 |       schedulerName: lsf
14 |       containers:
15 |       - name: bigjob
16 |         image: ubuntu:latest
17 |         imagePullPolicy: IfNotPresent
18 |         command: ["sleep", "60"]
19 |         resources:
20 |           requests:
21 |             cpu: 1
22 |             memory: 128M
23 |           limits:
24 |             cpu: 1
25 |             memory: 256M
26 | 
27 |       restartPolicy: Never
28 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/j4.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: jdpod-4
 5 | spec:
 6 |   template:
 7 |     metadata:
 8 |       name: jdpod-4
 9 |       annotations:
10 |         lsf.ibm.com/queue: "normal"
11 |         lsf.ibm.com/dependency: "JOBDEP"
12 |     spec:
13 |       schedulerName: lsf
14 |       containers:
15 |       - name: bigjob
16 |         image: ubuntu:latest
17 |         imagePullPolicy: IfNotPresent
18 |         command: ["sleep", "60"]
19 |         resources:
20 |           requests:
21 |             cpu: 1
22 |             memory: 128M
23 |           limits:
24 |             cpu: 1
25 |             memory: 256M
26 | 
27 |       restartPolicy: Never
28 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/j5.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: jdpod-5
 5 | spec:
 6 |   template:
 7 |     metadata:
 8 |       name: jdpod-5
 9 |       annotations:
10 |         lsf.ibm.com/queue: "normal"
11 |         lsf.ibm.com/dependency: "JOBDEP"
12 |     spec:
13 |       schedulerName: lsf
14 |       containers:
15 |       - name: bigjob
16 |         image: ubuntu:latest
17 |         imagePullPolicy: IfNotPresent
18 |         command: ["sleep", "60"]
19 |         resources:
20 |           requests:
21 |             cpu: 1
22 |             memory: 128M
23 |           limits:
24 |             cpu: 1
25 |             memory: 256M
26 | 
27 |       restartPolicy: Never
28 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/role_binding.yaml:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------------------
 2 | # Copyright IBM Corp. 2020. All rights reserved.
 3 | # US Government Users Restricted Rights - Use, duplication or disclosure
 4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
 5 | #--------------------------------------------------------
 6 | #
 7 | kind: RoleBinding
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | metadata:
10 |   name: ibm-lsf-operator
11 |   labels:
12 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
13 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
14 |     app.kubernetes.io/instance: "lsf"
15 |     release: "lsf"
16 | subjects:
17 | - kind: ServiceAccount
18 |   name: ibm-lsf-operator
19 | roleRef:
20 |   kind: Role
21 |   name: ibm-lsf-operator
22 |   apiGroup: rbac.authorization.k8s.io
23 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/high-priority.yml:
--------------------------------------------------------------------------------
 1 | # This is a template for a high priority job.
 2 | # It will start 10 pods in the "priority" queue.
 3 | # Each will run for 60 seoonds
 4 | apiVersion: batch/v1
 5 | kind: Job
 6 | metadata:
 7 |   name: ppnp-high-SEQ
 8 | spec:
 9 |   parallelism: 10
10 |   completions: 10
11 |   template:
12 |     metadata:
13 |       name: ppnp-high-SEQ
14 |       annotations:
15 |         lsf.ibm.com/queue: "priority"
16 |     spec:
17 |       schedulerName: lsf
18 |       containers:
19 |       - name: bigjob
20 |         image: ubuntu:latest
21 |         imagePullPolicy: IfNotPresent
22 |         command: ["sleep", "60"]
23 |         resources:
24 |           requests:
25 |             cpu: 1
26 |             memory: 128M
27 |           limits:
28 |             cpu: 1
29 |             memory: 128M
30 | 
31 |       restartPolicy: Never
32 | 


--------------------------------------------------------------------------------
/examples/Resource_Sharing/templateJob.yml:
--------------------------------------------------------------------------------
 1 | # This is the template for the job.  It will start 10 pods
 2 | # in the "normal" queue.
 3 | apiVersion: batch/v1
 4 | kind: Job
 5 | metadata:
 6 |   name: sharepod-FSGRP-SEQ
 7 | spec:
 8 |   parallelism: 10
 9 |   completions: 10
10 |   template:
11 |     metadata:
12 |       name: sharepod-FSGRP-SEQ
13 |       annotations:
14 |         lsf.ibm.com/fairshareGroup: "FSGRP"
15 |         lsf.ibm.com/queue: "normal"
16 |     spec:
17 |       schedulerName: lsf
18 |       containers:
19 |       - name: sharingjob
20 |         image: ubuntu:latest
21 |         imagePullPolicy: IfNotPresent
22 |         command: ["sleep", "60"]
23 |         resources:
24 |           requests:
25 |             cpu: 1
26 |             memory: 128M
27 |           limits:
28 |             cpu: 1
29 |             memory: 256M
30 | 
31 |       restartPolicy: Never
32 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/low-priority-preempt.yml:
--------------------------------------------------------------------------------
 1 | # This is a template for a preemptible low priority job.
 2 | # It will start 10 pods in the "idle" queue.
 3 | # Each will run for 3600 seoonds
 4 | apiVersion: batch/v1
 5 | kind: Job
 6 | metadata:
 7 |   name: ppnp-low-SEQ
 8 | spec:
 9 |   parallelism: 10
10 |   completions: 10
11 |   template:
12 |     metadata:
13 |       name: ppnp-low-SEQ
14 |       annotations:
15 |         lsf.ibm.com/queue: "idle"
16 |     spec:
17 |       schedulerName: lsf
18 |       containers:
19 |       - name: sleepjob
20 |         image: ubuntu:latest
21 |         imagePullPolicy: IfNotPresent
22 |         command: ["sleep", "3600"]
23 |         resources:
24 |           requests:
25 |             cpu: 1
26 |             memory: 128M
27 |           limits:
28 |             cpu: 1
29 |             memory: 128M
30 | 
31 |       restartPolicy: Never
32 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/low-priority-non-preempt.yml:
--------------------------------------------------------------------------------
 1 | # This is a template for a preemptible low priority job.
 2 | # It will start 10 pods in the "normal" queue.
 3 | # Each will run for 60 seoonds
 4 | apiVersion: batch/v1
 5 | kind: Job
 6 | metadata:
 7 |   name: ppnp-low-SEQ
 8 | spec:
 9 |   parallelism: 10
10 |   completions: 10
11 |   template:
12 |     metadata:
13 |       name: ppnp-low-SEQ
14 |       annotations:
15 |         lsf.ibm.com/queue: "normal"
16 |     spec:
17 |       schedulerName: lsf
18 |       containers:
19 |       - name: sleepjob
20 |         image: ubuntu:latest
21 |         imagePullPolicy: IfNotPresent
22 |         command: ["sleep", "60"]
23 |         resources:
24 |           requests:
25 |             cpu: 1
26 |             memory: 128M
27 |           limits:
28 |             cpu: 1
29 |             memory: 128M
30 | 
31 |       restartPolicy: Never
32 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/clusterrolebinding1.yaml:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------------------
 2 | # Copyright IBM Corp. 2020. All rights reserved.
 3 | # US Government Users Restricted Rights - Use, duplication or disclosure
 4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
 5 | #--------------------------------------------------------
 6 | #
 7 | apiVersion: rbac.authorization.k8s.io/v1
 8 | kind: ClusterRoleBinding
 9 | metadata:
10 |   name: ibm-lsf-operator
11 |   labels:
12 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
13 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
14 |     app.kubernetes.io/instance: "lsf"
15 |     release: "lsf"
16 | roleRef:
17 |   kind: ClusterRole
18 |   name: ibm-lsf-operator
19 |   apiGroup: rbac.authorization.k8s.io
20 | subjects:
21 | - kind: ServiceAccount
22 |   name: ibm-lsf-operator
23 |   # Use your namespace
24 |   namespace: ibm-lsf-project
25 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/clusterrolebinding2.yaml:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------------------
 2 | # Copyright IBM Corp. 2020. All rights reserved.
 3 | # US Government Users Restricted Rights - Use, duplication or disclosure
 4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
 5 | #--------------------------------------------------------
 6 | #
 7 | apiVersion: rbac.authorization.k8s.io/v1
 8 | kind: ClusterRoleBinding
 9 | metadata:
10 |   name: ibm-lsf-operator-kube-scheduler-crb
11 |   labels:
12 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
13 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
14 |     app.kubernetes.io/instance: "lsf"
15 |     release: "lsf"
16 | 
17 | roleRef:
18 |   kind: ClusterRole
19 |   name: system:kube-scheduler
20 |   apiGroup: rbac.authorization.k8s.io
21 | 
22 | subjects:
23 | - kind: ServiceAccount
24 |   name: ibm-lsf-operator
25 |   # User your namespace
26 |   namespace: ibm-lsf-project
27 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/priority-test-preempt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Template for the low priority job
 4 | TEMPLATE1=low-priority-preempt.yml
 5 | # Template for the high priority jobs
 6 | TEMPLATE2=high-priority.yml
 7 | 
 8 | if [ ! -d jobtmp ]; then
 9 |     mkdir jobtmp
10 | fi
11 | 
12 | echo "This script will create a large number of low priority test jobs in the idle queue.
13 | It will then create some high priority pods in the priority queue.
14 | 
15 | It will be necessary to saturate the cluster with the low priority jobs to begin.
16 | The number of jobs needed is approximately  (Number of cores) / 2.
17 | How many jobs do you want to submit?
18 | "
19 | 
20 | read NUMJOBS
21 | echo "Creating $NUMJOBS low priority jobs.  This will take some time"
22 | echo ""
23 | echo "In another shell you may wish to start the completions.sh script to gather data"
24 | sleep 5
25 | 
26 | for i in $(seq 1 $NUMJOBS); do
27 |    sed -e s:SEQ:$i:g < $TEMPLATE1 > jobtmp/ppnp1-job$i.yaml
28 |    kubectl create -f jobtmp/ppnp1-job$i.yaml
29 | done
30 | 
31 | echo "The low priority jobs should now be filling the available resources."
32 | echo "Waiting for 30 seconds to allow the pods to get started."
33 | sleep 30
34 | 
35 | echo "Now starting the high priority jobs."
36 | S=$(expr $NUMJOBS + 1)
37 | E=$(expr $NUMJOBS + $S)
38 | 
39 | for i in $(seq $S $E); do
40 |    # sed -e s:SEQ:$i:g < $TEMPLATE1 > jobtmp/ppnp1-job$i.yaml
41 |    sed -e s:SEQ:$i:g < $TEMPLATE2 > jobtmp/ppnp2-job$i.yaml
42 |    # kubectl create -f jobtmp/ppnp1-job$i.yaml
43 |    kubectl create -f jobtmp/ppnp2-job$i.yaml
44 | done
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/Resource_Sharing/runtest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | TEMPLATE=templateJob.yml
 4 | if [ ! -d jobtmp ]; then
 5 |     mkdir jobtmp
 6 | fi
 7 | 
 8 | clear
 9 | echo ""
10 | echo "This script will demonstrate fairshare groups
11 | It will jobs to the gold, silver and bronze fairshare groups
12 | When there is resource contention we will see that the pods
13 | in the gold group get proportionately more resources than the
14 | other pods as seen by there completion rates
15 | 
16 | Each job will start 10 pods.  Enough pods need to be started 
17 | to cause resource contention.  The number of jobs needed is
18 | approximately = (Number of cores in the cluster) / 2
19 | How many jobs do you want to submit?
20 | "
21 | read NUMJOBS
22 | echo "Creating $NUMJOBS jobs."
23 | echo ""
24 | echo "In another shell you may wish to start the completions.sh script to gather data"
25 | sleep 5
26 | 
27 | for i in $(seq 1 $NUMJOBS); do
28 |    # Create a Bronze job
29 |    FSGRP=bronze
30 |    sed -e s:SEQ:$i:g < $TEMPLATE > jobtmp/sharepod-${FSGRP}-$i.yaml
31 |    sed -i -s s:FSGRP:${FSGRP}:g jobtmp/sharepod-${FSGRP}-$i.yaml
32 |    kubectl create -f jobtmp/sharepod-${FSGRP}-$i.yaml
33 | 
34 |    # Create a Silver job
35 |    FSGRP=silver
36 |    sed -e s:SEQ:$i:g < $TEMPLATE > jobtmp/sharepod-${FSGRP}-$i.yaml
37 |    sed -i -s s:FSGRP:${FSGRP}:g jobtmp/sharepod-${FSGRP}-$i.yaml
38 |    kubectl create -f jobtmp/sharepod-${FSGRP}-$i.yaml
39 | 
40 |    # Create a Gold job
41 |    FSGRP=gold
42 |    sed -e s:SEQ:$i:g < $TEMPLATE > jobtmp/sharepod-${FSGRP}-$i.yaml
43 |    sed -i -s s:FSGRP:${FSGRP}:g jobtmp/sharepod-${FSGRP}-$i.yaml
44 |    kubectl create -f jobtmp/sharepod-${FSGRP}-$i.yaml
45 | done
46 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/priority-test-non-preempt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Template for the low priority job
 4 | TEMPLATE1=low-priority-non-preempt.yml
 5 | # Template for the high priority jobs
 6 | TEMPLATE2=high-priority.yml
 7 | 
 8 | if [ ! -d jobtmp ]; then
 9 |     mkdir jobtmp
10 | fi
11 | 
12 | clear
13 | 
14 | echo "This script will create a large number of low priority test jobs in the normal queue.
15 | It will then create some high priority in the priority queue.
16 | 
17 | It will be necessary to saturate the cluster with the low priority jobs to begin.
18 | The number of jobs needed is approximately  3x(Number of workers).
19 | How many jobs do you want to submit?
20 | "
21 | 
22 | read NUMJOBS
23 | echo "Creating $NUMJOBS low priority jobs.  This will take some time"
24 | echo ""
25 | echo "In another shell you may wish to start the completions.sh script to gather data"
26 | sleep 5
27 | 
28 | for i in $(seq 1 $NUMJOBS); do
29 |    sed -e s:SEQ:$i:g < $TEMPLATE1 > jobtmp/ppnp1-job$i.yaml
30 |    kubectl create -f jobtmp/ppnp1-job$i.yaml
31 | done
32 | 
33 | echo "The low priority jobs should now be filling the available resources."
34 | echo "Waiting for 30 seconds to allow the pods to get started."
35 | sleep 30
36 | 
37 | echo "Now starting the high priority jobs.  In parallel we will also continue"
38 | echo "to submit low priority jobs."
39 | S=$(expr $NUMJOBS + 1)
40 | E=$(expr $NUMJOBS + $S)
41 | for i in $(seq $S $E); do
42 |    sed -e s:SEQ:$i:g < $TEMPLATE1 > jobtmp/ppnp1-job$i.yaml
43 |    sed -e s:SEQ:$i:g < $TEMPLATE2 > jobtmp/ppnp2-job$i.yaml
44 |    kubectl create -f jobtmp/ppnp1-job$i.yaml
45 |    kubectl create -f jobtmp/ppnp2-job$i.yaml
46 | done
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/examples/Resource_Sharing/completions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script shows the completion rate of the jobs in the system
 4 | 
 5 | OUTFILE=test-output.csv
 6 | rm -rf $OUTFILE
 7 | echo "Data is stored in $OUTFILE"
 8 | echo "Time,GoldDone,SilverDone,BronzeDone,GoldRun,SilverRun,BronzeRun,%gold,%silver,%bronze"
 9 | echo "Time,GoldDone,SilverDone,BronzeDone,GoldRun,SilverRun,BronzeRun,%gold,%silver,%bronze" > $OUTFILE
10 | 
11 | TRUN=0
12 | while [ true ]; do
13 |     NOW=$(date +%H:%M:%S)
14 |     kubectl get pods 2>/dev/null |grep sharepod- > j.tmp
15 |     GDONE=$(grep gold j.tmp 2>/dev/null |grep -c Completed)
16 |     SDONE=$(grep silver j.tmp 2>/dev/null |grep -c Completed)
17 |     BDONE=$(grep bronze j.tmp 2>/dev/null |grep -c Completed)
18 | 
19 |     GRUN=$(grep gold j.tmp 2>/dev/null |grep -c Running)
20 |     SRUN=$(grep silver j.tmp 2>/dev/null |grep -c Running)
21 |     BRUN=$(grep bronze j.tmp 2>/dev/null |grep -c Running)
22 | 
23 |     TOTDONE=$(( $GDONE + $SDONE + $BDONE ))
24 |     TOTRUN=$(( $GRUN + $SRUN + $BRUN ))
25 |     if [ $TOTRUN -eq 0 ]; then
26 | 	PCGRUN=0
27 |         PCSRUN=0
28 |         PCBRUN=0
29 |     else
30 |         PCGRUN=$(( $GRUN * 100 / $TOTRUN ))
31 |         PCSRUN=$(( $SRUN * 100 / $TOTRUN ))
32 |         PCBRUN=$(( $BRUN * 100 / $TOTRUN ))
33 |     fi
34 | 
35 |     echo "$NOW,$GDONE,$SDONE,$BDONE,$GRUN,$SRUN,$BRUN,$PCGRUN,$PCSRUN,$PCBRUN"
36 |     echo "$NOW,$GDONE,$SDONE,$BDONE,$GRUN,$SRUN,$BRUN,$PCGRUN,$PCSRUN,$PCBRUN" >> $OUTFILE
37 | 
38 |     if [ "$TRUN" = "0" -a $TOTRUN -gt 0 ]; then
39 |         TRUN=1
40 |     fi
41 |     if [ "$TRUN" = "1" -a $TOTRUN -eq 0 ]; then
42 |         echo "Test completed"
43 |         exit 0
44 |     fi
45 |     sleep 10
46 | done
47 | 
48 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # LSF - Kubernetes Examples
 2 | 
 3 | This directory contains several examples that demonstrate some of the features of the LSF-Kubernetes integration.  Each directory contains tests that show how a particular feature works.  Download and run the tests that are of interest.  The directories contain the sample pods along with scripts and instructions.
 4 | 
 5 | * **Pod_Dependencies**  - This test provides an example of the workflow feature that is provided by this integration.  The workflow feature allows you to define workflows to perform complex multi-pod operations.
 6 | * **Pod_Priority_and_Preemption**  - This directory provides some tests that will help you explore the pod priority capabilities, which allow you to run pods with higher priorities before those with lower priorities.  They also show how high priority pods can kill lower priority pods to free resources.
 7 | * **Resource_Sharing**  - Resource contentention happens, what will you do about it?  This test looks at one of the sharing policies that this integration provides.  It demonstrates how to align business priorites with the resources that are available so that the more critical pods have the resources they deserve.
 8 | * **Run_Limits**  - Sometime a broken application may hold resources, when they should be freed.  This test provides an example of run limits, which can be used to free resources from mishehaving pods.
 9 | * **Run_Windows**  - Not every pod should run immediately.  Sometimes it is better to queue pods till the night, or weekend so that resources are available for more business significant work.  This test shows how to construct and test run windows.
10 | 
11 | Follow the instructions in the README's in each directory.   
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/scc.yaml:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | # Licensed Materials - Property of IBM.
 3 | # Copyright IBM Corporation 2019. All Rights Reserved.
 4 | # U.S. Government Users Restricted Rights - Use, duplication or disclosure
 5 | # restricted by GSA ADP Schedule Contract with IBM Corp.
 6 | #
 7 | # Contributors:
 8 | #  IBM Corporation - initial version
 9 | ###############################################################################
10 | #
11 | apiVersion: security.openshift.io/v1
12 | kind: SecurityContextConstraints
13 | metadata:
14 |   annotations:
15 |     kubernetes.io/description: 'this allows access to many privileged and host
16 |       features and the ability to run as any user, any group, any fsGroup, and with
17 |       any SELinux context.  WARNING: this is only for hpac.'
18 |   name: ibm-lsf-scc
19 |   labels:
20 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
21 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
22 |     app.kubernetes.io/instance: "lsf"
23 |     release: "lsf"
24 | defaultAddCapabilities: null
25 | fsGroup:
26 |   type: RunAsAny
27 | groups:
28 | - system:cluster-admins
29 | - system:nodes
30 | - system:masters
31 | allowHostIPC: false
32 | allowHostNetwork: false
33 | allowHostPID: false
34 | allowHostPorts: false
35 | allowPrivilegeEscalation: true
36 | allowPrivilegedContainer: false
37 | allowedCapabilities:
38 | - KILL
39 | - SETUID
40 | - SETGID
41 | - CHOWN
42 | - SETPCAP
43 | - NET_BIND_SERVICE
44 | - DAC_OVERRIDE
45 | - SYS_ADMIN
46 | - AUDIT_WRITE
47 | - SYS_TTY_CONFIG
48 | allowedUnsafeSysctls:
49 | - '*'
50 | priority: null
51 | readOnlyRootFilesystem: false
52 | requiredDropCapabilities:
53 | - MKNOD
54 | - NET_RAW
55 | - SYS_CHROOT
56 | - SETFCAP
57 | - FOWNER
58 | - FSETID
59 | runAsUser:
60 |   type: RunAsAny
61 | seLinuxContext:
62 |   type: RunAsAny
63 | seccompProfiles:
64 | - '*'
65 | supplementalGroups:
66 |   type: RunAsAny
67 | volumes:
68 | - '*'
69 | users:
70 | - system:admin
71 | - system:serviceaccount:MyNameSpace:ibm-lsf-operator
72 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/completions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script shows the completion rate of the jobs in the system
 4 | 
 5 | NAMESPACE=ibm-lsf-tp
 6 | MASTERPOD=$(kubectl get pods -n ${NAMESPACE} |grep ibm-spectrum-computing-prod-master |awk '{ print $1 }')
 7 | 
 8 | if [ "$MASTERPOD" = "" ]; then
 9 |     echo "Could not locate the master pod.  Looking for a pod name containing"
10 |     echo "\"ibm-spectrum-computing-prod-master\" in the currnet namespace."
11 |     exit 1
12 | fi
13 | 
14 | OUTFILE=test-output.csv
15 | rm -rf $OUTFILE
16 | BQLINES=( )
17 | CURR_IFS=$IFS
18 | LINE=( )
19 | echo "Time,Qname,Qnjobs,Qpend,Qrun,.(repeat for all queues)"
20 | echo "Time,Priority,Priority-NJobs,Priority-Pend,Priority-Run,Normal,Normal-NJobs,Normal-Pend,Normal-Run,Idle,Idle-NJobs,Idle-Pend,Idle-Run,Night,Night-NJobs,Night-Pend,Night-Run" 
21 | echo "Time,Priority,Priority-NJobs,Priority-Pend,Priority-Run,Normal,Normal-NJobs,Normal-Pend,Normal-Run,Idle,Idle-NJobs,Idle-Pend,Idle-Run,Night,Night-NJobs,Night-Pend,Night-Run" > $OUTFILE
22 | 
23 | TRUN=0
24 | while [ true ]; do
25 |     NOW=$(date +%H:%M:%S)
26 |     BQOUT=$(kubectl exec $MASTERPOD -n ${NAMESPACE} -- /bin/sh -c ". /etc/profile.d/lsf.sh ;bqueues")
27 |     IFS=$'\n'
28 |     BQLINES=( $BQOUT )
29 |     OUT="$NOW"
30 |     TOTALJOBS=0
31 |     for ((i=1; i < ${#BQLINES[*]}; i++)); do
32 |         # echo "Line: ${BQLINES[$i]}"
33 | 	IFS=$' \t'
34 |         LINE=( ${BQLINES[$i]} )
35 |         if [ -z "$LINE" ]; then
36 |             continue
37 |         fi
38 |         QNAME="${LINE[0]}"
39 |         QNJOBS="${LINE[7]}"
40 |         QPEND="${LINE[8]}"
41 |         QRUN="${LINE[9]}"
42 |         # echo "$NOW,$QNAME,$QNJOBS,$QPEND,$QRUN"
43 |         OUT="${OUT},$QNAME,$QNJOBS,$QPEND,$QRUN"
44 |         TOTALJOBS=$(( $TOTALJOBS + $QNJOBS ))
45 |         if [ "$TRUN" = "0" -a $TOTALJOBS -ge 0 ]; then
46 | 	    TRUN=1
47 |         fi
48 |         if [ "$TRUN" = "1" -a $TOTALJOBS -eq 0 ]; then
49 |             echo "Test completed"
50 |             exit 0
51 |         fi
52 |     done
53 |     echo "$OUT"
54 |     echo "$OUT" >> $OUTFILE
55 |     sleep 10
56 | done
57 | 
58 | 


--------------------------------------------------------------------------------
/examples/Run_Limits/runtest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | NAMESPACE=ibm-lsf-tp
 4 | 
 5 | OUTFILE=test-output.csv
 6 | rm -rf $OUTFILE
 7 | 
 8 | MASTERPOD=$(kubectl get pods -n ${NAMESPACE} |grep ibm-spectrum-computing-prod-master |awk '{ print $1 }')
 9 | 
10 | if [ "$MASTERPOD" = "" ]; then
11 |     echo "Could not locate the master pod.  Looking for a pod name containing"
12 |     echo "\"ibm-spectrum-computing-prod-master\" in the currnet namespace."
13 |     exit 1
14 | fi
15 | 
16 | if [ -d jobtmp ]; then
17 |     echo "Cleaning up if needed"
18 |     cd jobtmp
19 |     for i in $(ls rljob*yaml); do
20 |         kubectl delete -f $i 2>&1 >/dev/null
21 |         echo -n "."
22 |         rm -rf $i
23 |     done
24 |     echo ""
25 |     cd ..
26 | fi
27 | echo ""
28 | 
29 | echo "This script will create long running test jobs in the normal queue.
30 | The jobs will run until the RUNLIMIT is reached and will then be killed
31 | 
32 | "
33 | 
34 | RUNLIM=$(kubectl exec $MASTERPOD -n ${NAMESPACE} -- /bin/sh -c ". /etc/profile.d/lsf.sh ;bqueues -l normal |grep RUNLIMIT")
35 | if [ "${RUNLIM}" = "" ]; then
36 |     echo "The normal queue does not have the RUNLIMIT defined."
37 |     echo "Cannot run the test."
38 |     exit 1
39 | fi
40 | 
41 | TEMPLATE=templateJob.yml
42 | if [ ! -d jobtmp ]; then
43 |     mkdir jobtmp
44 | fi
45 | 
46 | for i in $(seq 1 10); do
47 |    sed -e s:SEQ:$i:g < $TEMPLATE > jobtmp/rljob-$i.yaml
48 |    kubectl create -f jobtmp/rljob-$i.yaml
49 | done
50 | 
51 | echo ""
52 | echo "Ten pods have been created in the 'normal' queue."
53 | echo "Script will now check every 10 seconds but only report changes till done."
54 | echo "Data is stored in $OUTFILE"
55 | echo ""
56 | echo "Worker_Time(HH:MM:SS),Num_Pend_Pods,Num_Run_Pods,Num_Terminating_Pods"
57 | OLDOUT=""
58 | OUT=""
59 | while [ true ]; do
60 |     kubectl get pods 2>/dev/null |grep rljob- > j.tmp
61 |     JTERM=$(grep -c Terminating j.tmp 2>/dev/null) 
62 |     JRUN=$(egrep -c 'ContainerCreating|Running' j.tmp 2>/dev/null) 
63 |     JPEND=$(grep -c Pending j.tmp 2>/dev/null) 
64 |     NOW=$(date +%H:%M:%S)
65 |     OUT="$JPEND,$JRUN,$JTERM"
66 |     if [ "$OUT" != "$OLDOUT" ]; then
67 |         echo "$NOW,$OUT"
68 |         echo "$NOW,$OUT" >> $OUTFILE
69 |         OLDOUT=$OUT
70 |     fi
71 |     if [ $JRUN -eq 0 -a $JTERM -eq 0 -a $JPEND -eq 0 ]; then
72 |         echo "Test complete"
73 |         rm -rf jobtmp j.tmp
74 |         exit 0
75 |     fi
76 |     sleep 10
77 | done
78 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/clusterrole.yaml:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------------
  2 | # Copyright IBM Corp. 2020. All rights reserved.
  3 | # US Government Users Restricted Rights - Use, duplication or disclosure
  4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
  5 | #--------------------------------------------------------
  6 | #
  7 | # This ClusterRole should be used in conjunction with the system:kube-scheduler
  8 | # ClusterRole to grant the scheduler the permissions it needs to operate
  9 | # 
 10 | apiVersion: rbac.authorization.k8s.io/v1
 11 | kind: ClusterRole
 12 | metadata:
 13 |   name: ibm-lsf-operator 
 14 |   labels:
 15 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
 16 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
 17 |     app.kubernetes.io/instance: "lsf"
 18 |     release: "lsf"
 19 | rules:
 20 | - apiGroups:
 21 |   - ""
 22 |   resources:
 23 |   - namespaces
 24 |   - pods
 25 |   - resourcequotas
 26 |   - nodes
 27 |   verbs:
 28 |   - list
 29 |   - update
 30 |   - get
 31 |   - watch
 32 |   - patch
 33 |   - create
 34 | - apiGroups:
 35 |   - batch
 36 |   resources:
 37 |   - "*"
 38 |   verbs:
 39 |   - list
 40 |   - update
 41 |   - get
 42 |   - watch
 43 |   - patch
 44 |   - create
 45 | - apiGroups:
 46 |   - ibm.com
 47 |   resources:
 48 |   - paralleljobs
 49 |   - resourceplans
 50 |   verbs:
 51 |   - "*"
 52 | - apiGroups:
 53 |   - ibm.com
 54 |   resources:
 55 |   - "*"
 56 |   verbs:
 57 |   - update
 58 | - apiGroups:
 59 |   - apiextensions.k8s.io
 60 |   attributeRestrictions: null
 61 |   resources:
 62 |   - customresourcedefinitions
 63 |   verbs:
 64 |   - "*"
 65 | - apiGroups:
 66 |   - storage.k8s.io
 67 |   resources:
 68 |   - storageclasses
 69 |   verbs:
 70 |   - watch
 71 |   - get
 72 |   - list
 73 | - apiGroups:
 74 |   - security.openshift.io 
 75 |   resources:
 76 |   - securitycontextconstraints
 77 |   verbs:
 78 |   - use
 79 | - apiGroups:
 80 |   - ""
 81 |   attributeRestrictions: null
 82 |   resources:
 83 |   - configmaps
 84 |   verbs:
 85 |   - list
 86 |   - watch
 87 |   - get
 88 | - apiGroups:
 89 |   - ""
 90 |   attributeRestrictions: null
 91 |   resources:
 92 |   - events
 93 |   verbs:
 94 |   - list
 95 |   - watch
 96 |   - get
 97 |   - create
 98 | - apiGroups:
 99 |   - "rbac.authorization.k8s.io"
100 |   resources:
101 |   - clusterrolebindings
102 |   - clusterroles
103 |   verbs:
104 |   - list
105 |   - watch
106 |   - get
107 |   - create
108 |   - patch
109 | 


--------------------------------------------------------------------------------
/examples/Run_Windows/runtest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | OUTFILE=test-output.csv
 4 | rm -rf $OUTFILE
 5 | 
 6 | NAMESPACE=ibm-lsf-tp
 7 | MASTERPOD=$(kubectl get pods -n ${NAMESPACE} |grep ibm-spectrum-computing-prod-master |awk '{ print $1 }')
 8 | 
 9 | if [ "$MASTERPOD" = "" ]; then
10 |     echo "Could not locate the master pod.  Looking for a pod name containing"
11 |     echo "\"ibm-spectrum-computing-prod-master\" in the currnet namespace."
12 |     exit 1
13 | fi
14 | 
15 | if [ -d jobtmp ]; then
16 |     echo "Cleaning up if needed"
17 |     cd jobtmp
18 |     for i in $(ls rwjob*yaml); do
19 |         kubectl delete -f $i
20 |         rm -rf $i
21 |     done
22 |     cd ..
23 | fi
24 | echo ""
25 | 
26 | echo "This script will create a number of test jobs in the night queue.
27 | The jobs will remain in the pending state till the run window is open
28 | 
29 | "
30 | 
31 | PDATE=$(kubectl exec $MASTERPOD -n ${NAMESPACE} -- /bin/sh -c "date")
32 | NOW=$(date)
33 | echo "You time is:  $NOW"
34 | echo "Pod time is:  $PDATE"
35 | echo ""
36 | 
37 | RUNWIN=$(kubectl exec $MASTERPOD -n ${NAMESPACE} -- /bin/sh -c ". /etc/profile.d/lsf.sh ;bqueues -l night |grep RUN_WINDOW")
38 | echo "The night queue run window currently has:  $RUNWIN"
39 | echo "The format is:  opentime-closetime"
40 | echo "Where the time is expressed as weekday:hour:minute or hour:minute"
41 | echo "and weekday 0 equals Sunday."
42 | echo ""
43 | 
44 | TEMPLATE=templateJob.yml
45 | if [ ! -d jobtmp ]; then
46 |     mkdir jobtmp
47 | fi
48 | 
49 | for i in $(seq 1 10); do
50 |    sed -e s:SEQ:$i:g < $TEMPLATE > jobtmp/rwjob-$i.yaml
51 |    kubectl create -f jobtmp/rwjob-$i.yaml
52 | done
53 | 
54 | echo ""
55 | echo "Ten pods have been created in the 'night' queue."
56 | echo "Script will now check every 1 minute but only report changes till done."
57 | echo "Data is stored in $OUTFILE"
58 | echo ""
59 | echo "Worker_Time(HH:MM:SS),Container_Time(HH:MM:SS),Num_Pend_Pods,Num_Run_Pods,Num_Complete_Pods"
60 | OLDOUT=""
61 | OUT=""
62 | while [ true ]; do
63 |     kubectl get pods 2>/dev/null |grep rwjob- > j.tmp
64 |     JDONE=$(grep -c Completed j.tmp 2>/dev/null) 
65 |     JRUN=$(egrep -c 'ContainerCreating|Running' j.tmp 2>/dev/null) 
66 |     JPEND=$(grep -c Pending j.tmp 2>/dev/null) 
67 |     PDATE=$(kubectl exec $MASTERPOD -n ${NAMESPACE} -- /bin/sh -c "date +%H:%M:%S")
68 |     NOW=$(date +%H:%M:%S)
69 |     OUT="$JPEND,$JRUN,$JDONE"
70 |     if [ "$OUT" != "$OLDOUT" ]; then
71 |         echo "$NOW,$PDATE,$OUT"
72 |         echo "$NOW,$PDATE,$OUT" >> $OUTFILE
73 |         OLDOUT=$OUT
74 |     fi
75 |     if [ $JDONE -eq 10 ]; then
76 |         echo "Test complete"
77 |         exit 0
78 |     fi
79 |     sleep 60
80 | done
81 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/role.yaml:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------------
  2 | # Copyright IBM Corp. 2020. All rights reserved.
  3 | # US Government Users Restricted Rights - Use, duplication or disclosure
  4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
  5 | #--------------------------------------------------------
  6 | #
  7 | apiVersion: rbac.authorization.k8s.io/v1
  8 | kind: Role
  9 | metadata:
 10 |   name: ibm-lsf-operator
 11 |   labels:
 12 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
 13 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
 14 |     app.kubernetes.io/instance: "lsf"
 15 |     release: "lsf"
 16 | rules:
 17 | - apiGroups:
 18 |   - ""
 19 |   resources:
 20 |   - pods
 21 |   - services
 22 |   - services/finalizers
 23 |   - endpoints
 24 |   - persistentvolumeclaims
 25 |   - events
 26 |   - configmaps
 27 |   - secrets
 28 |   - serviceaccounts
 29 |   - roles
 30 |   - rolebindings
 31 |   verbs:
 32 |   - '*'
 33 | - apiGroups:
 34 |   - ""
 35 |   resources:
 36 |   - nodes
 37 |   verbs:
 38 |   - list
 39 |   - get
 40 |   - watch
 41 | - apiGroups:
 42 |   - security.openshift.io
 43 |   resources:
 44 |   - securitycontextconstraints
 45 |   verbs:
 46 |   - use
 47 | - apiGroups:
 48 |   - route.openshift.io
 49 |   resources:
 50 |   - routes
 51 |   verbs:
 52 |   - list
 53 |   - get
 54 |   - create
 55 |   - update
 56 |   - patch
 57 | - apiGroups:
 58 |   - apps
 59 |   - extensions
 60 |   resources:
 61 |   - deployments
 62 |   - daemonsets
 63 |   - replicasets
 64 |   - statefulsets
 65 |   verbs:
 66 |   - '*'
 67 | - apiGroups:
 68 |   - monitoring.coreos.com
 69 |   resources:
 70 |   - servicemonitors
 71 |   verbs:
 72 |   - get
 73 |   - create
 74 | - apiGroups:
 75 |   - apps
 76 |   resourceNames:
 77 |   - ibm-lsf-operator
 78 |   resources:
 79 |   - deployments/finalizers
 80 |   verbs:
 81 |   - update
 82 | - apiGroups:
 83 |   - apps
 84 |   resources:
 85 |   - replicasets
 86 |   verbs:
 87 |   - get
 88 | - apiGroups:
 89 |   - lsf.spectrumcomputing.ibm.com
 90 |   resources:
 91 |   - '*'
 92 |   verbs:
 93 |   - '*'
 94 | - apiGroups:
 95 |   - extensions
 96 |   - networking.k8s.io
 97 |   - crd.projectcalico.org
 98 |   resources:
 99 |   - networkpolicies
100 |   verbs:
101 |   - get
102 |   - list
103 |   - create
104 |   - patch
105 |   - update
106 | - apiGroups:
107 |   - extensions
108 |   - networking.k8s.io
109 |   resources:
110 |   - ingresses
111 |   verbs:
112 |   - get
113 |   - list
114 |   - create
115 |   - patch
116 |   - update
117 | # Needed to create the role
118 | - apiGroups:
119 |   - rbac.authorization.k8s.io
120 |   resources:
121 |   - roles
122 |   - rolebindings
123 |   - clusterroles
124 |   - clusterrolebindings
125 |   verbs:
126 |   - list
127 |   - get
128 |   - create
129 |   - patch
130 |   - update
131 | - apiGroups:
132 |   - networking.k8s.io
133 |   resources:
134 |   - networkpolicies
135 |   verbs:
136 |   - list
137 |   - get
138 |   - create
139 |   - patch
140 |   - update
141 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/README.md:
--------------------------------------------------------------------------------
 1 | # Pod Dependences (Workflows)
 2 | 
 3 | It is possible to sequence pods so that workflows can be constructed.
 4 | A pod can be defined such that it will only run once the pod(s) it depends on has completed.  For example:
 5 | ```
 6 | Pod 1 Extracts data from a data source
 7 | Pod 2 Takes the data from Pod 1 and scrubs it and performs some transformation
 8 | Pod 3 Takes the output of Pod 2 and stores it in a database
 9 | ```
10 | This test case will demonstrate a workflow with two processing streams that are 
11 | eventually combined by the last pod.  The workflow will look like:
12 | ```
13 | 
14 | jdpod-1  -->  jdpod-3  --> jdpod-4  -\
15 |                                       && --> jdpod-5
16 | jdpod-2  ----------------------------/
17 | 
18 | ```
19 | 
20 | Job 3 only runs once job 1 finishes, job 4 only runs after job 3, and job 5 only 
21 | runs when both job 2 and job 4 have finished.
22 | 
23 | ## How It Works
24 | 
25 | Workflows are created by adding an annotation to the pod to define it's dependencies e.g.
26 | ```
27 |         lsf.ibm.com/dependency: "done(default/jdpod-2-x24gk) \&\& done(default/jdpod-4-d8q9r)"
28 | ```
29 | 
30 | The dependency expression can be a single job, or a dependency expression of multiple conditions with the following operators: 
31 | * "&&" (AND) 
32 | * "||" (OR)
33 | * "!" (NOT)
34 | 
35 | Parentheses can also be used to indicate the order of operations.
36 | 
37 | 
38 | ## Running the Test
39 | 
40 | The test can be started by running:
41 | ```
42 |    $ ./runtest.sh
43 | ```
44 | 
45 | It will describe the workflow it will run, and show the dependency annotation that it is using e.g.
46 | ```
47 | This script will create a simple workflow as follows:
48 | 
49 | jdpod-1  -->  jdpod-3  --> jdpod-4  -\
50 |                                       && --> jdpod-5
51 | jdpod-2  ----------------------------/
52 | 
53 | Pods jdpod-1 and jdpod-2 will start immediately.
54 | Pod jdpod-3 will wait for jdpod-1 to finish, and jdpod-4 will
55 | wait for jdpod-3 to finish.
56 | Pod jdpod-5 will wait for jdpod-4 and jdpod-2 to finish before
57 | starting.
58 | 
59 | 
60 | Creating pods jdpod-1 and jdpod-2
61 | job.batch/jdpod-1 created
62 | job.batch/jdpod-2 created
63 | 
64 | Starting job jdpod-3 with dependency annotation:
65 |         lsf.ibm.com/dependency: "done(default/jdpod-1-wdz92)"
66 | 
67 | job.batch/jdpod-3 created
68 | 
69 | Starting job jdpod-4 with dependency annotation:
70 |         lsf.ibm.com/dependency: "done(default/jdpod-3-jp899)"
71 | 
72 | job.batch/jdpod-4 created
73 | 
74 | Starting job jdpod-5 with dependency annotation:
75 |         lsf.ibm.com/dependency: "done(default/jdpod-2-x24gk) \&\& done(default/jdpod-4-d8q9r)"
76 | 
77 | job.batch/jdpod-5 created
78 | ```
79 | 
80 | To follow the workflow as it runs use:
81 | ```
82 |     $ watch -n 5 "kubectl get jobs |grep jdpod"
83 | ```
84 | or for more details:
85 | ```
86 |     $ watch -n 5 "kubectl get pods |grep jdpod"
87 | ```
88 | 
89 | The workflow will complete as described above.
90 | 
91 | ## Pod Dependences Conclusion
92 | The pod dependency annotation allows for the construction of pod workflows.  The dependency annotation supports several logical operations and parentheses to control the evaluation of the expression.   
93 | 


--------------------------------------------------------------------------------
/examples/Pod_Dependencies/runtest.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | OUTFILE=test-output.csv
  4 | rm -rf $OUTFILE
  5 | 
  6 | # Namespace for LSF
  7 | NAMESPACE=ibm-lsf-tp
  8 | MASTERPOD=$(kubectl get pods -n ${NAMESPACE} |grep ibm-spectrum-computing-prod-master |awk '{ print $1 }')
  9 | 
 10 | clear
 11 | 
 12 | echo "This script will create a simple workflow as follows:
 13 | 
 14 | jdpod-1  -->  jdpod-3  --> jdpod-4  -\\
 15 |                                       && --> jdpod-5 
 16 | jdpod-2  ----------------------------/
 17 | 
 18 | Pods jdpod-1 and jdpod-2 will start immediately.
 19 | Pod jdpod-3 will wait for jdpod-1 to finish, and jdpod-4 will 
 20 | wait for jdpod-3 to finish.
 21 | Pod jdpod-5 will wait for jdpod-4 and jdpod-2 to finish before
 22 | starting.
 23 | 
 24 | "
 25 | 
 26 | DIRTY=$(kubectl get pods |grep -c jdpod- 2>/dev/null)
 27 | if [ $DIRTY -ne 0 ]; then
 28 |     kubectl delete -f j1.yaml >/dev/null 2>&1
 29 |     kubectl delete -f j2.yaml >/dev/null 2>&1
 30 |     kubectl delete -f j3.yaml >/dev/null 2>&1
 31 |     kubectl delete -f j4.yaml >/dev/null 2>&1
 32 |     kubectl delete -f j5.yaml >/dev/null 2>&1
 33 | fi
 34 | 
 35 | echo "Creating pods jdpod-1 and jdpod-2"
 36 | kubectl create -f j1.yaml
 37 | kubectl create -f j2.yaml
 38 | 
 39 | # echo "
 40 | #Extracting pod names from the jobs jdpod-1 and jdpod-2
 41 | #"
 42 | NAMESP=$(kubectl describe job jdpod-1 |grep 'Namespace:' |awk '{ print $2 }')
 43 | JOB1ID=$(kubectl get pods |grep jdpod-1- |awk '{ print $1 }')
 44 | JOB2ID=$(kubectl get pods |grep jdpod-2- |awk '{ print $1 }')
 45 | 
 46 | JOBDEP="done(${NAMESP}/${JOB1ID})"
 47 | echo "
 48 | Starting job jdpod-3 with dependency annotation:
 49 |         lsf.ibm.com/dependency: \"${JOBDEP}\"
 50 | "
 51 | sed -e s:JOBDEP:"${JOBDEP}":g < j3.yaml > j3-run.yaml
 52 | kubectl create -f j3-run.yaml
 53 | rm -rf j3-run.yaml
 54 | 
 55 | #echo "
 56 | #Extracting pod names from the job jdpod-3
 57 | #"
 58 | 
 59 | JOB3ID=$(kubectl get pods |grep jdpod-3- |awk '{ print $1 }')
 60 | JOBDEP="done(${NAMESP}/${JOB3ID})"
 61 | echo "
 62 | Starting job jdpod-4 with dependency annotation:
 63 |         lsf.ibm.com/dependency: \"${JOBDEP}\"
 64 | "
 65 | sed -e "s:JOBDEP:${JOBDEP}:g" < j4.yaml > j4-run.yaml
 66 | kubectl create -f j4-run.yaml
 67 | rm -rf j4-run.yaml
 68 | 
 69 | #echo "
 70 | #Extracting pod names from the job jdpod-4
 71 | #"
 72 | 
 73 | JOB4ID=$(kubectl get pods |grep jdpod-4- |awk '{ print $1 }')
 74 | JOBDEP="done(${NAMESP}/${JOB2ID}) \&\& done(${NAMESP}/${JOB4ID})"
 75 | echo "
 76 | Starting job jdpod-5 with dependency annotation:
 77 |         lsf.ibm.com/dependency: \"${JOBDEP}\"
 78 | "
 79 | sed -e "s:JOBDEP:${JOBDEP}:g" < j5.yaml > j5-run.yaml
 80 | kubectl create -f j5-run.yaml
 81 | rm -rf j5-run.yaml
 82 | 
 83 | 
 84 | exit 0
 85 | 
 86 | for i in $(seq 1 10); do
 87 |    sed -e s:SEQ:$i:g < $TEMPLATE > jobtmp/jdpod-$i.yaml
 88 |    kubectl create -f jobtmp/jdpod-$i.yaml
 89 | done
 90 | 
 91 | echo ""
 92 | echo "Ten pods have been created in the 'night' queue."
 93 | echo "Script will now check every 1 minute but only report changes till done."
 94 | echo "Data is stored in $OUTFILE"
 95 | echo ""
 96 | echo "Worker_Time(HH:MM:SS),Container_Time(HH:MM:SS),Num_Pend_Pods,Num_Run_Pods,Num_Complete_Pods"
 97 | OLDOUT=""
 98 | OUT=""
 99 | while [ true ]; do
100 |     kubectl get pods 2>/dev/null |grep jdpod- > j.tmp
101 |     JDONE=$(grep -c Completed j.tmp 2>/dev/null) 
102 |     JRUN=$(egrep -c 'ContainerCreating|Running' j.tmp 2>/dev/null) 
103 |     JPEND=$(grep -c Pending j.tmp 2>/dev/null) 
104 |     PDATE=$(kubectl exec $MASTERPOD -n ${NAMESPACE} -- /bin/sh -c "date +%H:%M:%S")
105 |     NOW=$(date +%H:%M:%S)
106 |     OUT="$JPEND,$JRUN,$JDONE"
107 |     if [ "$OUT" != "$OLDOUT" ]; then
108 |         echo "$NOW,$PDATE,$OUT"
109 |         echo "$NOW,$PDATE,$OUT" >> $OUTFILE
110 |         OLDOUT=$OUT
111 |     fi
112 |     if [ $JDONE -eq 10 ]; then
113 |         echo "Test complete"
114 |         exit 0
115 |     fi
116 |     sleep 60
117 | done
118 | 


--------------------------------------------------------------------------------
/examples/Run_Limits/README.md:
--------------------------------------------------------------------------------
  1 | # Run Limits
  2 | 
  3 | **NOTE:  Unfortunately last minute changes to the Kubernetes integration will prevent this test from running properly.  Look for resolution in future releases.**
  4 | 
  5 | Run limits allow you to impose restrictions on how long a pod is allowed to
  6 | run.  When a pod reaches the limit, it is terminated.  This allows resources
  7 | to be freed from pods that are running uncharacteristically long.
  8 | This test will modify the "normal" queue and add a RUNLIMIT.  The RUNLIMIT
  9 | is the maximum minutes a pod will be allowed to run.  We will then submit 
 10 | a few long running pods and allow them to be terminated when the RUNLIMIT
 11 | is reached.
 12 | 
 13 | **NOTE:  The test uses a pod, rather than a Kubernetes job.  The job controller restarts terminated pods.**
 14 | 
 15 | ## Adding the RUNLIMIT
 16 | The RUNLIMIT is defined in the queue definitions.  This configuration file is stored as a 
 17 | configMap, so you can edit it using "kubectl".  Use the following procedure to edit the run window.
 18 | 
 19 | 1. Determine the configMap name:
 20 | ```
 21 | $ kubectl get cm
 22 | ```
 23 | Look for a configMap with "ibm-spectrum-computing-prod-queues" in the name.
 24 | 
 25 | 2. Edit the configMap discovered from above:
 26 | ```
 27 | kubectl edit cm myname-ibm-spectrum-computing-prod-queues
 28 | ```
 29 | 
 30 | 3. Find the "normal" queue section.  It will look something like:
 31 | ```
 32 |     Begin Queue
 33 |     QUEUE_NAME   = normal
 34 |     PRIORITY     = 30
 35 |     DESCRIPTION  = For normal low priority pods
 36 |     End Queue
 37 | ```
 38 | 
 39 | 4. Add the RUNLIMIT before the end of the normal queue definition e.g.
 40 | ```
 41 |     RUNLIMIT     = 2
 42 | ```
 43 | 
 44 | **NOTE:  When editing the configMap the spacing is significant.  Do not change the format.**
 45 | 
 46 | 5. Wait for 2 minutes for the configuration change to be applied to your cluster
 47 | 
 48 | 6. Run the test.  Remember to remove the RUNLIMIT after the test is complete.
 49 | 
 50 | 
 51 | ## Running the Test
 52 | The test pods will run for 60 minutes, however the RUNLIMIT has been set to
 53 | 2 minutes.  We will see the pods start, then in 2 minutes they will start to
 54 | terminate.  To run the test:
 55 | 1. Add the RUNLIMIT to the *normal* queue as described above.
 56 | 
 57 | 2. Run the test script:
 58 | ```
 59 | $ ./runtest.sh
 60 | 
 61 | This script will create long running test jobs in the normal queue.
 62 | The jobs will run until the RUNLIMIT is reached and will then be killed
 63 | 
 64 | 
 65 | pod/rljob-1 created
 66 | pod/rljob-2 created
 67 | pod/rljob-3 created
 68 | pod/rljob-4 created
 69 | pod/rljob-5 created
 70 | pod/rljob-6 created
 71 | pod/rljob-7 created
 72 | pod/rljob-8 created
 73 | pod/rljob-9 created
 74 | pod/rljob-10 created
 75 | 
 76 | Ten pods have been created in the 'normal' queue.
 77 | Script will now check every 10 seconds but only report changes till done.
 78 | Data is stored in test-output.csv
 79 | 
 80 | ```
 81 | 
 82 | 3. Analyze the results
 83 | 
 84 | ### Analyzing the Results
 85 | The script will check every 10 seconds to see the state of the "rljob-\*" pods and will output in the following format: 
 86 | ```
 87 | 13:22:40,0,0,0
 88 | ```
 89 | Where the comma delimited columns are: 
 90 | * Worker time (HH:MM:SS)
 91 | * Number of pending pods from this test
 92 | * Number of running pods from this test
 93 | * Number of terminating pods from this test
 94 | 
 95 | Provided the RUNLIMIT is set correctly we will see something like:
 96 | ```
 97 | 13:22:40, 4,  6,  0
 98 | 13:22:50, 0, 10,  0   <-- All pods are running
 99 | 13:24:46, 0,  6,  4   <-- 2 minutes later pods start terminating
100 | 13:24:56, 0,  1,  9
101 | 13:25:06, 0,  0, 10   <-- All "long running" pods are terminated
102 | 13:25:17, 0,  0,  9
103 | 13:25:27, 0,  0,  4
104 | 13:25:37, 0,  0,  1
105 | 13:25:47, 0,  0,  0
106 | Test complete
107 | ```
108 | 
109 | 
110 | ## Conclusion
111 | We have see that we can define a RUNLIMIT that allows us to recover the resources that are held by a misbehaving pod.  In this test we saw that once the pod had run for a "long time" it is automatically killed.  We also saw that this behaviour is defined at the queue level, so only those pods submitted to the *normal* queue had this policy applied.
112 |  
113 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/operator.yaml:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------------
  2 | # Copyright IBM Corp. 2020. All rights reserved.
  3 | # US Government Users Restricted Rights - Use, duplication or disclosure
  4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
  5 | #--------------------------------------------------------
  6 | #
  7 | apiVersion: apps/v1
  8 | kind: Deployment
  9 | metadata:
 10 |   name: ibm-lsf-operator
 11 |   labels:
 12 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
 13 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
 14 |     app.kubernetes.io/instance: "lsf"
 15 |     release: "lsf"
 16 |     lsfversion: "10.1.0.9"
 17 | spec:
 18 |   replicas: 1
 19 |   selector:
 20 |     matchLabels:
 21 |       name: ibm-lsf-operator
 22 |   template:
 23 |     metadata:
 24 |       labels:
 25 |         name: ibm-lsf-operator
 26 |         app.kubernetes.io/name: "ibm-spectrum-lsf"
 27 |         app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
 28 |         app.kubernetes.io/instance: "lsf"
 29 |         release: "lsf"
 30 |         lsfversion: "10.1.0.9"
 31 |       annotations:
 32 |         productID: IBM LSF CE
 33 |         productName: IBM Spectrum LSF Community Edition
 34 |         productVersion: 10.1.0.9
 35 | 
 36 |     spec:
 37 |       serviceAccountName: ibm-lsf-operator
 38 |       hostIPC: false
 39 |       hostNetwork: false
 40 |       hostPID: false
 41 |       securityContext:
 42 |         runAsUser: 1001
 43 |         runAsNonRoot: true
 44 |       affinity:
 45 |         nodeAffinity:
 46 |           requiredDuringSchedulingIgnoredDuringExecution:
 47 |             nodeSelectorTerms:
 48 |             - matchExpressions:
 49 |               - key: beta.kubernetes.io/arch
 50 |                 operator: In
 51 |                 values:
 52 |                 - amd64
 53 |                 - ppc64le
 54 | 
 55 |       containers:
 56 |         - name: ansible
 57 |           command:
 58 |           - /usr/local/bin/ao-logs
 59 |           - /tmp/ansible-operator/runner
 60 |           - stdout
 61 |           # Replace this with the built image name
 62 |           image: "ibmcom/lsfce-operator:1.0.1r02"
 63 |           imagePullPolicy: "Always"
 64 |           volumeMounts:
 65 |           - mountPath: /tmp/ansible-operator/runner
 66 |             name: runner
 67 |             readOnly: true
 68 |           env:
 69 |           - name: "ANSIBLE_VERBOSITY"
 70 |             value: "3"
 71 |           resources:
 72 |             limits:
 73 |               cpu: "1"
 74 |               memory: 1G
 75 |             requests:
 76 |               cpu: "100m"
 77 |               memory: 256M
 78 |           livenessProbe:
 79 |             exec:
 80 |               command:
 81 |                 - ls
 82 |                 - /tmp/ansible-operator/runner
 83 |             initialDelaySeconds: 10
 84 |             periodSeconds: 30
 85 |           readinessProbe:
 86 |             exec:
 87 |               command:
 88 |                 - ls
 89 |                 - /tmp/ansible-operator/runner
 90 |             initialDelaySeconds: 10
 91 |             periodSeconds: 30
 92 | 
 93 |           securityContext:
 94 |             allowPrivilegeEscalation: true
 95 |             capabilities:
 96 |               drop:
 97 |               - ALL
 98 |             privileged: false
 99 |             readOnlyRootFilesystem: false
100 |         - name: operator
101 |           # Replace this with the built image name
102 |           image: "ibmcom/lsfce-operator:1.0.1r02"
103 |           imagePullPolicy: "Always"
104 |           volumeMounts:
105 |           - mountPath: /tmp/ansible-operator/runner
106 |             name: runner
107 |           env:
108 |             - name: WATCH_NAMESPACE
109 |               valueFrom:
110 |                 fieldRef:
111 |                   fieldPath: metadata.namespace
112 |             - name: POD_NAME
113 |               valueFrom:
114 |                 fieldRef:
115 |                   fieldPath: metadata.name
116 |             - name: OPERATOR_NAME
117 |               value: "ibm-lsf-operator"
118 |           resources:
119 |             limits:
120 |               cpu: "1"
121 |               memory: 1G
122 |             requests:
123 |               cpu: "100m"
124 |               memory: 256M
125 |           securityContext:
126 |             allowPrivilegeEscalation: true
127 |             capabilities:
128 |               drop:
129 |               - ALL
130 |             privileged: false
131 |             readOnlyRootFilesystem: false
132 |           livenessProbe:
133 |             exec:
134 |               command:
135 |                 - ls
136 |                 - /tmp/ansible-operator/runner
137 |             initialDelaySeconds: 10
138 |             periodSeconds: 30
139 |           readinessProbe:
140 |             exec:
141 |               command:
142 |                 - ls
143 |                 - /tmp/ansible-operator/runner
144 |             initialDelaySeconds: 10
145 |             periodSeconds: 30
146 |       volumes:
147 |         - name: runner
148 |           emptyDir: {}
149 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/example-pod-sched.yaml:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------------
  2 | # Copyright IBM Corp. 2020. All rights reserved.
  3 | # US Government Users Restricted Rights - Use, duplication or disclosure
  4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
  5 | #--------------------------------------------------------
  6 | #
  7 | apiVersion: lsf.spectrumcomputing.ibm.com/v1beta1
  8 | kind: LSFCluster
  9 | metadata:
 10 |   name: example-pod-sched
 11 |   labels:
 12 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
 13 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
 14 |     app.kubernetes.io/instance: "lsf"
 15 |     release: "lsf-podscheduler"
 16 | 
 17 | spec:
 18 |   # Indicate acceptance of the Licenses
 19 |   # The licenses are available from this site:
 20 |   #      http://www-03.ibm.com/software/sla/sladb.nsf
 21 |   # Use the search option to find IBM Spectrum LSF CE
 22 |   # Indicate acceptance of the Licenses
 23 |   licenseAccepted: false
 24 | 
 25 |   # Provide the name of the service account you wich to use
 26 |   serviceAccount: ibm-lsf-sa
 27 | 
 28 |   cluster:
 29 |     # The operator can deploy lsf in two different modes:
 30 |     #   lsf           - LSF is deployed as a cluster within K8s
 31 |     #   podscheduler  - LSF enhances the pod scheduling capabilities
 32 |     #                   of K8s.
 33 |     lsfrole: podscheduler
 34 | 
 35 |     clustername: pod-sched
 36 | 
 37 |     # PersistentVolumeClaim (Storage volume) patrameters
 38 |     pvc:
 39 |       dynamicStorage: false
 40 |       storageClass: ""
 41 |       selectorLabel: "lsfvol"
 42 |       selectorValue: "lsfvol"
 43 |       size: "10G"
 44 | 
 45 |   master:
 46 |     image: "ibmcom/lsfce-master:10.1.0.9r02"
 47 |     imagePullPolicy: "Always"
 48 | 
 49 |     # The placement variables control how the pods will be placed
 50 |     placement:
 51 |       # includeLabel  - Optional label to apply to hosts that
 52 |       #                 should be allowed to run the compute pod
 53 |       includeLabel: ""
 54 | 
 55 |       # excludeLabel  - Is a label to apply to hosts to prevent
 56 |       #                 them from being used to host the compute pod
 57 |       excludeLabel: "excludelsf"
 58 | 
 59 |       # Taints can be used to control which nodes are available
 60 |       # to the LSF and Kubernetes scheduler.  If used these
 61 |       # parameters are used to allow the LSF pods to run on
 62 |       # tainted nodes.  When not defined the K8s master nodes
 63 |       # will be used to host the master pod.
 64 |       #
 65 |       #  tolerateName  - Optional name of the taint that has been
 66 |       #                  applied to a node
 67 |       #  tolerateValue - The value given to the taint
 68 |       #  tolerateEffect - The effect of the taint
 69 |       #
 70 |       tolerateName: ""
 71 |       tolerateValue: ""
 72 |       tolerateEffect: NoExecute
 73 | 
 74 |     # Define the number of this type of pod you want to have running.
 75 |     # Valid values for a master are 0 or 1.
 76 |     replicas: 1
 77 | 
 78 |     # These are the Memory and CPU allocations for the pod.
 79 |     # Set the memory and cpu requests and limits to the same values
 80 |     # to get a guarenteed QoS.
 81 |     resources:
 82 |       requests:
 83 |         cpu: "4"
 84 |         memory: "8G"
 85 |       limits:
 86 |         cpu: "4"
 87 |         memory: "8G"
 88 | 
 89 |   # There can be a number of different compute pods running.  Each pod
 90 |   # type supporting different applications.  Define a list of compute
 91 |   # pods and there characteristics here.
 92 |   computes:
 93 |     # This is the start of a specific compute pod type.  The name
 94 |     # given should be short and without spaces
 95 |     - name: "RHEL7"
 96 | 
 97 |       # Define where to get the image from
 98 |       image: "ibmcom/lsfce-comp:10.1.0.9r02" 
 99 |       imagePullPolicy: "Always"
100 | 
101 |       # The placement variables control how the pods will be placed
102 |       placement:
103 |         # includeLabel  - Optional label to apply to hosts that
104 |         #                 should be allowed to run the compute pod
105 |         includeLabel: ""
106 | 
107 |         # excludeLabel  - Is a label to apply to hosts to prevent
108 |         #                 them from being used to host the compute pod
109 |         excludeLabel: "excludelsf"
110 | 
111 |         # Taints can be used to control which nodes are available
112 |         # to the LSF and Kubernetes scheduler.  If used these
113 |         # parameters are used to allow the LSF pods to run on
114 |         # tainted nodes.  When not defined the K8s master nodes
115 |         # will be used to host the master pod.
116 |         #
117 |         #  tolerateName  - Optional name of the taint that has been
118 |         #                  applied to a node
119 |         #  tolerateValue - The value given to the taint
120 |         #  tolerateEffect - The effect of the taint
121 |         #
122 |         tolerateName: ""
123 |         tolerateValue: ""
124 |         tolerateEffect: NoExecute
125 | 
126 |       resources:
127 |         requests:
128 |           cpu: "200m"
129 |           memory: "1G"
130 |         limits:
131 |           cpu: "200m"
132 |           memory: "1G"
133 | 
134 | 


--------------------------------------------------------------------------------
/examples/Resource_Sharing/README.md:
--------------------------------------------------------------------------------
  1 | # Resource Sharing Test
  2 | 
  3 | This test case will look at a simple example of sharing resources between competing
  4 | pods.  This differs from the priority test cases, where highest priority pods were given
  5 | preference.  In this case we will look at how to deal with pods are associated with 
  6 | different groups or Lines of Business (LOBs) who want to share the Kubernetes cluster.
  7 | How do you control how the resources are shared?  This test
  8 | will look at a simple way to share those resources.  Other more advanced configurations
  9 | are possible using project groups and there limits, but are beyond the scope of this 
 10 | test.
 11 | 
 12 | Normally when there is no resource contention the resources can be used by
 13 | which ever group or LOB that needs them, so they get results sooner.
 14 | To trigger this case we will need to create enough demand between different
 15 | groups so that there is contention for resources to run pods.
 16 | 
 17 | The test case will use the default fairshareGroups that are provided at installation:
 18 | * gold
 19 | * silver
 20 | * bronze
 21 | The fairshare groups are modifiable, but will not be covered in this test.
 22 | 
 23 | These groups define the proportion of resources each will receive when there is
 24 | contention.  The **gold** group has 9 shares, **silver** has 3 and **bronze** has 1.  
 25 | From this we would expect that the gold group will get about 70% of the resources
 26 | while silver will get about 23%, and bronze the rest.
 27 | 
 28 | 
 29 | ## Running the Test
 30 | The test should be started with a cluster with no job pods.  Use the scripts provided
 31 | to run the test.  The test will create jobs with 10 pods per job.  The jobs will be
 32 | either gold, silver or bronze.  The **completions.sh" script will monitor the progress
 33 | and log the results in **test-output.csv**.
 34 | 
 35 | 1. Start the test by running **runtest.sh**.  It will ask for the number of jobs to create.  It will create 10 pods per job with each pod using 1 CPU core.  You will want to create enough jobs to create contention long enough for the policy to re-balance the resource assignment.
 36 | 
 37 | 2. In another window run **completions.sh**.  It will gather the data that we can analyze. 
 38 | 
 39 | 3. Wait for the **completions.sh** to exit, then analyze the results.
 40 | 
 41 | 
 42 | ## Analyzing the Results
 43 | 
 44 | When the test is first run the **completions.sh** script will output something like:
 45 | ```
 46 | 13:29:10,0,0,0,0,0,0,0,0,0
 47 | ```
 48 | 
 49 | The output has the following columns:
 50 | * Time hh:mm:ss
 51 | * Number of **gold** group pods done
 52 | * Number of **silver** group pods done
 53 | * Number of **bronze** group pods done
 54 | * Number of **gold** group pods running
 55 | * Number of **silver** group pods running
 56 | * Number of **bronze** group pods running
 57 | * Percentage of **gold** group pods running
 58 | * Percentage of **silver** group pods running
 59 | * Percentage of **bronze** group pods running
 60 | 
 61 | Initially as the pods are started there is no contention, so we see the pods being started 
 62 | in the order they are received and the number of pods running in each group is roughly
 63 | equal e.g.
 64 | ```
 65 | Time,    GDone,SDone,BDone,GRun,SRun,BRun,%gold,%silv,%bronze
 66 | 13:29:20,    0,    0,    0,   1,   0,   0,  100,    0,      0
 67 | 13:29:31,    0,    0,    0,   2,   7,   4,   15,   53,     30
 68 | 13:29:41,    0,    0,    0,   9,  12,   7,   32,   42,     25
 69 | 13:29:51,    0,    0,    0,  16,  15,  13,   36,   34,     29
 70 | 13:30:02,    0,    0,    0,  20,  20,  18,   34,   34,     31
 71 | 13:30:12,    0,    0,    0,  21,  24,  23,   30,   35,     33
 72 | ```
 73 | 
 74 | However as the test progresses we start to get contention and start to see that the gold
 75 | group is getting more than silver, and silver is getting more than bronze e.g.
 76 | ```
 77 | Time,    GDone,SDone,BDone,GRun,SRun,BRun,%gold,%silv,%bronze
 78 | 13:32:08,   60,   35,   34,  46,  16,   4,   69,   24,     6
 79 | 13:32:19,   76,   38,   34,  42,  19,   6,   62,   28,     8
 80 | 13:32:33,   83,   40,   34,  44,  20,   7,   61,   28,     9
 81 | ```
 82 | 
 83 | There was an equal number of jobs in each group, but the gold groups is getting more 
 84 | resources to run.  As a result the gold group jobs are completing quicker and
 85 | eventually we see that all gold jobs are done.  Now the contention is only between
 86 | the silver and bronze groups e.g.
 87 | ```
 88 | Time,    GDone,SDone,BDone,GRun,SRun,BRun,%gold,%silv,%bronze
 89 | 13:37:04,  299,  111,   56,   1,  49,  17,    1,   73,     25 
 90 | 13:37:14,  300,  121,   59,   0,  51,  16,    0,   76,     23 <-- No gold pods
 91 | 13:37:25,  300,  131,   63,   0,  50,  16,    0,   75,     24
 92 | ```
 93 | 
 94 | The silver group pods then complete, leaving the resources to the bronze group
 95 | 
 96 | ## Conclusion
 97 | When there is no contention for resources users will be able to run whatever number
 98 | of pods they like.  However this is not desirable when there is limited resources.
 99 | The fairshare groups provide a way to resolve resource contention between different 
100 | competing groups or users.  The builtin groups provide a way for administrators to
101 | manage the portion of resources that users and groups will get.  Administrators 
102 | can set the sharing policy based on business priority, and ensure that business
103 | critical processes get the share of the resources they deserve
104 | 
105 | 


--------------------------------------------------------------------------------
/examples/distributed-helloworld.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import math
  4 | import tensorflow as tf
  5 | from kubernetes import client, config
  6 | import socket
  7 | import fcntl
  8 | import struct
  9 | import time
 10 | import threading
 11 | 
 12 | # these globals are used to bootstrap the TF cluster
 13 | myindex = -99
 14 | ps_hosts = []
 15 | worker_hosts = []
 16 | 
 17 | def get_ip_address(ifname):
 18 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 19 |     return socket.inet_ntoa(fcntl.ioctl(
 20 |         s.fileno(),
 21 |         0x8915,  # SIOCGIFADDR
 22 |         struct.pack('256s', ifname[:15])
 23 |     )[20:24])
 24 | 
 25 | def startup():
 26 |   global ps_hosts
 27 |   global worker_hosts
 28 |   global myindex
 29 |   # The ip address of the pod is used to determine the index of
 30 |   # the pod in the ClusterSpec.
 31 |   myip = get_ip_address('eth0')
 32 | 
 33 |   # Get the namespace from the service account
 34 |   with open ("/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as nsfile:
 35 |     namespace = nsfile.readline()
 36 | 
 37 |   # Get a list of pods that are part of this job
 38 |   # Since all pods may not be ready yet. The code will sleep and loop
 39 |   # until all pod IPs are available.
 40 |   config.load_incluster_config()
 41 |   ready = False
 42 |   while not ready:
 43 |     ready = True
 44 |     allpods = []
 45 |     v1 = client.CoreV1Api()
 46 |     podlist = v1.list_namespaced_pod(namespace, label_selector="lsf.ibm.com/jobId="+os.environ["LSB_JOBID"])
 47 |     for pod in podlist.items:
 48 |       if pod.status.pod_ip == None:
 49 |         ready = False
 50 |         time.sleep(1)
 51 |         continue
 52 |       else:
 53 |         allpods.append(pod.status.pod_ip)
 54 | 
 55 |   # Now that the pod list is complete.  Get ready for cluster spec generation
 56 |   # by sorting the pod list by IP address.
 57 |   allpods = sorted(allpods, key=lambda ip: socket.inet_aton(ip))
 58 |   print "allpods " + str(allpods)
 59 | 
 60 |   # Build the cluster configuration.
 61 |   # Keep track of which index in the cluster spec
 62 |   # corresponds to me.
 63 |   ix = 0
 64 |   for pod in allpods:
 65 |     ps_hosts.append(pod + ":2221")
 66 |     worker_hosts.append(pod + ":2222")
 67 |     if pod == myip:
 68 |       myindex = ix
 69 |     ix = ix + 1
 70 |   print "startup done. myindex: "+str(myindex)+", ps_hosts: "+str(ps_hosts)+", worker_hosts: "+str(worker_hosts)
 71 | 
 72 | def run_ps():
 73 |   global ps_hosts
 74 |   global worker_hosts
 75 |   global myindex
 76 |   print "ps_hosts: "+str(ps_hosts)+", myindex: "+str(myindex)
 77 |   cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
 78 |   server = tf.train.Server(cluster,
 79 |                            job_name="ps",
 80 |                            task_index=myindex)
 81 | 
 82 |   # to enable the parameter server to exit gracefully, make some queues that
 83 |   # workers can write to, to indicate that they are done. when a parameter
 84 |   # server sees that all workers are done, then it will exit.
 85 |   with tf.device('/job:ps/task:%d' % myindex):
 86 |       queue = tf.FIFOQueue(cluster.num_tasks('worker'), tf.int32, shared_name='done_queue%d' % myindex)
 87 | 
 88 |   # wait for the queue to be filled
 89 |   with tf.Session(server.target) as sess:
 90 |       for i in range(cluster.num_tasks('worker')):
 91 |           sess.run(queue.dequeue())
 92 |           print('ps:%d received "done" from worker:%d' % (myindex, i))
 93 |       print('ps:%d quitting' % myindex)
 94 | 
 95 | def run_worker():
 96 |   global ps_hosts
 97 |   global worker_hosts
 98 |   global myindex
 99 |   cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
100 |   server = tf.train.Server(cluster,
101 |                            job_name="worker",
102 |                            task_index=myindex)
103 | 
104 |   # Assigns ops to the local worker by default.
105 |   with tf.device(tf.train.replica_device_setter(
106 |       worker_device="/job:worker/task:%s" % myindex,
107 |       cluster=cluster)):
108 | 
109 |     # set up some queue to notify the ps tasks when it time to exit
110 |     stop_queues = []
111 |     # create a shared queue on the worker which is visible on /job:ps/task:%d
112 |     for i in range(cluster.num_tasks('ps')):
113 |         with tf.device('/job:ps/task:%d' % i):
114 |             stop_queues.append(tf.FIFOQueue(cluster.num_tasks('worker'), tf.int32, shared_name='done_queue%d' % i).enqueue(1))
115 | 
116 |     # Create a "supervisor", which oversees the training process.
117 |     sv = tf.train.Supervisor(is_chief=(myindex==0))
118 | 
119 |     # The supervisor takes care of session initialization, restoring from
120 |     # a checkpoint, and closing when done or an error occurs.
121 |     with sv.managed_session(server.target) as sess:
122 | 
123 |       print "*********************"
124 |       print "Hello from worker %d!" % myindex
125 |       print "*********************"
126 | 
127 |       # notify the parameter servers that its time to exit.
128 |       for op in stop_queues:
129 |         sess.run(op)
130 | 
131 |     # Ask for all the services to stop.
132 |     sv.stop()
133 | 
134 | if __name__ == "__main__":
135 |   # Each pod is both a parameter server and a worker
136 |   # Each runs in a different thread.
137 |   startup()
138 |   threads = [
139 |     threading.Thread(target=run_ps),
140 |     threading.Thread(target=run_worker)]
141 |   for thread in threads:
142 |     thread.start()
143 |   for thread in threads:
144 |     thread.join()
145 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ![Logos](doc/images/k8s-lsf-logos.png)
 3 | 
 4 | # LSF-Kubernetes
 5 | 
 6 | ## Introduction
 7 | LSF-Kubernetes integration delivers three key capabilities:
 8 | * Effectively manages highly variable demands in workloads within a finite supply of resources
 9 | * Provides improved service levels for different consumers and workloads in a shared multitenant environment
10 | * Optimizes the usage of expensive resources such as general-purpose graphics processing units (GPGPUs) to help ensure that they are allocated the most important work
11 | 
12 | ## Deployment options
13 | 
14 | ### LSF as a scheduler for Kubernetes
15 | 
16 | Users wanting to have a better scheduler for Kubernetes, or wanting parallel or elastic job support should use the Tech Preview.
17 | The preview is available until July 1, 2020. For more information about the integration, refer to the [Quick Start Guide](https://github.com/IBMSpectrumComputing/lsf-kubernetes/blob/master/doc/IBM_Spectrum_Computing_Cloud_Pak_Quickstart_Guide.pdf).
18 | 
19 | To download the package, visit the [IBM website](http://ibm.biz/LSFKubernetes).
20 | 
21 | ### Kubernetes add-on for LSF
22 | 
23 | LSF users have access to the Kubernetes add-on for LSF in fix pack 9.  LSF users are able to use LSF's **bsub** CLI to submit work, but can also use Kubernetes **kubectl** CLI to submit pods.  Sample pod definitions are included in the Quickstart Guide in the **doc** directory.
24 | 
25 | 
26 | ## Overview
27 | LSF-Kubernetes integration builds on IBM Spectrum Computing's rich heritage in workload management and orchestration in demanding high performance computing and enterprise environments. With this strong foundation, IBM Spectrum Computing brings a wide range of workload management capabilities that include:
28 | * Multilevel priority queues and preemption
29 | * Fairshare among projects and namespaces
30 | * Resource reservation
31 | * Dynamic load-balancing
32 | * Topology-aware scheduling
33 | * Capabilty to schedule GPU jobs with consideration for CPU or GPU topology
34 | * Parallel and elastic jobs
35 | * Time-windows
36 | * Time-based configuration
37 | * Advanced reservation
38 | * Workflows
39 | * Multi-cluster workload management (roadmap)
40 | 
41 | ### Improved workload prioritization and management
42 | IBM Spectrum Computing adds robust workload orchestration and prioritization capabilities to Kubernetes environments, such as IBM Cloud Private, or OpenShift. IBM Cloud Private is an application platform for developing and managing on-premises, containerized applications. It is an integrated environment for managing containers that includes the container orchestrator Kubernetes, a private image repository, a management console, and monitoring frameworks.
43 | While the Kubernetes scheduler employs a basic “first come, first served" method for processing workloads, IBM Spectrum Computing enables organizations to effectively prioritize and manage workloads based on business priorities and objectives. 
44 | 
45 | ### Key capabilities of IBM Spectrum Computing
46 | **Workload Orchestration**  
47 | Kubernetes provides effective orchestration of workloads as long as there is capacity. In the public cloud, the environment can usually be enlarged to help ensure that there is always capacity in response to workload demands. However, in an on-premises deployment, resources are ultimately finite. For workloads that dynamically create Kubernetes pods (such as Jenkins, Jupyter Hub, Apache Spark, Tensorflow, ETL, and so on), the default "first come, first served" orchestration policy is not sufficient to help ensure that important business workloads process first or get resources before less important workloads.  The LSF-Kubernetes integration prioritizes access to the resources for key business processes and lower priority workloads are queued until resources can be made available.
48 | 
49 | **Service Level Management**  
50 | In a multitenant environment where there is competition for resources, workloads (users, user groups, projects, and namespaces) can be assigned to different service levels that help ensure the right workload gets access to the right resource at the right time. This function prioritizes workloads and allocates a minimum number of resources for each service class. In addition to service levels, workloads can also be subject to prioritization and multilevel fairshare policies, which maintain correct prioritization of workloads within the same Service Level Agreement (SLA). 
51 | 
52 | **Resource Optimization**
53 | Environments are rarely homogeneous. There might be some servers with additional memory or some might have GPGPUs or additional capabilities. Running workloads on these servers that do not require those capabilities can block or delay workloads that do require additional functions. IBM Spectrum Computing provides multiple polices such as multilevel fairshare and service level management, enabling the optimization of resources based on business policy rather than by users competing for resources.
54 | 
55 | ## Features
56 | - Advanced GPU scheduling policies like NVlink affinity.
57 | - Pod co-scheduling
58 | - Fairshare
59 | - Reservation / backfill (avoid job starvation)
60 | - Sophisticated limit policies
61 | - Queue prioritization
62 | - Scalability / throughput
63 | - Resource ownership policies
64 | - Integration with LSF add-ons: RTM, Application Center, Process Manager
65 | 
66 | ## Architecture
67 | 
68 | ![Architecture](doc/images/arch2.png)
69 | 
70 | ## Articles and Blogs
71 | 
72 | - [Bridging HPC and Cloud Native Development with Kubernetes](https://www.hpcwire.com/solution_content/ibm/cross-industry/bridging-hpc-and-cloud-native-development-with-kubernetes/), Khalid Ahmed, 2019-Apr-16, HPCWire
73 | 
74 | ## Support
75 | 
76 | Support is available on the IBM Cloud Tech public slack.  The channel name is `#icplsf-tp-support`.  To get an invite to the workspace, [click here](http://ibm.biz/BdsHmN).
77 | 
78 | Support is also available through email to LSF-Inquiry@ca.ibm.com
79 | 
80 | ## Deployment options
81 | 
82 | ### LSF as a scheduler for Kubernetes
83 | 
84 | A tech preview is available for Kubernetes users. The preview is available until July 1, 2020. For more information about the integration, refer to the [Quick Start Guide](https://github.com/IBMSpectrumComputing/lsf-kubernetes/blob/master/doc/IBM_Spectrum_Computing_Cloud_Pak_Quickstart_Guide.pdf).
85 | 
86 | To download the package, visit the [IBM website](https://epwt-www.mybluemix.net/software/support/trial/cst/welcomepage.wss?siteId=548&tabId=1091&w=1&_ga=2.248881479.1380170776.1576783491-1713667621.1568643347).
87 | 
88 | ### Kubernetes add-on for LSF
89 | 
90 | LSF users have access to the Kubernetes add-on for LSF in fix pack 9.  LSF users are able to use LSF's **bsub** CLI to submit work, but can also use Kubernetes **kubectl** CLI to submit pods.  Sample pod definitions are included in the Quickstart Guide in the **doc** directory.
91 | 
92 | 


--------------------------------------------------------------------------------
/examples/distributed-mnist.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import math
  4 | import tensorflow as tf
  5 | from tensorflow.examples.tutorials.mnist import input_data
  6 | from kubernetes import client, config
  7 | import socket
  8 | import fcntl
  9 | import struct
 10 | import time
 11 | import threading
 12 | 
 13 | # some hardcoded parameters for the training
 14 | hidden_units = 100
 15 | data_dir = "/tmp/mnist-data"
 16 | batch_size = 100
 17 | IMAGE_PIXELS = 28
 18 | 
 19 | # these globals are used to bootstrap the TF cluster
 20 | myindex = -99
 21 | ps_hosts = []
 22 | worker_hosts = []
 23 | 
 24 | def get_ip_address(ifname):
 25 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 26 |     return socket.inet_ntoa(fcntl.ioctl(
 27 |         s.fileno(),
 28 |         0x8915,  # SIOCGIFADDR
 29 |         struct.pack('256s', ifname[:15])
 30 |     )[20:24])
 31 | 
 32 | def startup():
 33 |   global ps_hosts
 34 |   global worker_hosts
 35 |   global myindex
 36 |   # The ip address of the pod is used to determine the index of
 37 |   # the pod in the ClusterSpec.
 38 |   myip = get_ip_address('eth0')
 39 | 
 40 |   # Get the namespace from the service account
 41 |   with open ("/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as nsfile:
 42 |     namespace = nsfile.readline()
 43 | 
 44 |   # Get a list of pods that are part of this job
 45 |   # Since all pods may not be ready yet. The code will sleep and loop
 46 |   # until all pod IPs are available.
 47 |   config.load_incluster_config()
 48 |   ready = False
 49 |   while not ready:
 50 |     ready = True
 51 |     allpods = []
 52 |     v1 = client.CoreV1Api()
 53 |     podlist = v1.list_namespaced_pod(namespace, label_selector="lsf.ibm.com/jobId="+os.environ["LSB_JOBID"])
 54 |     for pod in podlist.items:
 55 |       if pod.status.pod_ip == None:
 56 |         ready = False
 57 |         time.sleep(1)
 58 |         continue
 59 |       else:
 60 |         allpods.append(pod.status.pod_ip)
 61 | 
 62 |   # Now that the pod list is complete.  Get ready for cluster spec generation
 63 |   # by sorting the pod list by IP address.
 64 |   allpods = sorted(allpods, key=lambda ip: socket.inet_aton(ip))
 65 |   print "allpods " + str(allpods)
 66 | 
 67 |   # Build the cluster configuration.
 68 |   # Keep track of which index in the cluster spec
 69 |   # corresponds to me.
 70 |   ix = 0
 71 |   for pod in allpods:
 72 |     ps_hosts.append(pod + ":2221")
 73 |     worker_hosts.append(pod + ":2222")
 74 |     if pod == myip:
 75 |       myindex = ix
 76 |     ix = ix + 1
 77 |   print "startup done. myindex: "+str(myindex)+", ps_hosts: "+str(ps_hosts)+", worker_hosts: "+str(worker_hosts)
 78 | 
 79 | def run_ps():
 80 |   global ps_hosts
 81 |   global worker_hosts
 82 |   global myindex
 83 |   print "ps_hosts: "+str(ps_hosts)+", myindex: "+str(myindex)
 84 |   cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
 85 |   server = tf.train.Server(cluster,
 86 |                            job_name="ps",
 87 |                            task_index=myindex)
 88 | 
 89 |   # to enable the parameter server to exit gracefully, make some queues that
 90 |   # workers can write to, to indicate that they are done. when a parameter
 91 |   # server sees that all workers are done, then it will exit.
 92 |   with tf.device('/job:ps/task:%d' % myindex):
 93 |       queue = tf.FIFOQueue(cluster.num_tasks('worker'), tf.int32, shared_name='done_queue%d' % myindex)
 94 | 
 95 |   # wait for the queue to be filled
 96 |   with tf.Session(server.target) as sess:
 97 |       for i in range(cluster.num_tasks('worker')):
 98 |           sess.run(queue.dequeue())
 99 |           print('ps:%d received "done" from worker:%d' % (myindex, i))
100 |       print('ps:%d quitting' % myindex)
101 | 
102 | def run_worker():
103 |   global ps_hosts
104 |   global worker_hosts
105 |   global myindex
106 |   cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
107 |   server = tf.train.Server(cluster,
108 |                            job_name="worker",
109 |                            task_index=myindex)
110 | 
111 |   # Assigns ops to the local worker by default.
112 |   with tf.device(tf.train.replica_device_setter(
113 |       worker_device="/job:worker/task:%s" % myindex,
114 |       cluster=cluster)):
115 | 
116 |     # Variables of the hidden layer
117 |     hid_w = tf.Variable(
118 |         tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, hidden_units],
119 |                               stddev=1.0 / IMAGE_PIXELS), name="hid_w")
120 |     hid_b = tf.Variable(tf.zeros([hidden_units]), name="hid_b")
121 | 
122 |     # Variables of the softmax layer
123 |     sm_w = tf.Variable(
124 |         tf.truncated_normal([hidden_units, 10],
125 |                             stddev=1.0 / math.sqrt(hidden_units)),
126 |         name="sm_w")
127 |     sm_b = tf.Variable(tf.zeros([10]), name="sm_b")
128 | 
129 |     x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS])
130 |     y_ = tf.placeholder(tf.float32, [None, 10])
131 | 
132 |     hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b)
133 |     hid = tf.nn.relu(hid_lin)
134 | 
135 |     y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b))
136 |     loss = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
137 | 
138 |     global_step = tf.Variable(0)
139 | 
140 |     train_op = tf.train.AdagradOptimizer(0.01).minimize(
141 |         loss, global_step=global_step)
142 | 
143 |     saver = tf.train.Saver()
144 |     summary_op = tf.summary.merge_all()
145 |     init_op = tf.initialize_all_variables()
146 | 
147 |     # set up some queue to notify the ps tasks when it time to exit
148 |     stop_queues = []
149 |     # create a shared queue on the worker which is visible on /job:ps/task:%d
150 |     for i in range(cluster.num_tasks('ps')):
151 |         with tf.device('/job:ps/task:%d' % i):
152 |             stop_queues.append(tf.FIFOQueue(cluster.num_tasks('worker'), tf.int32, shared_name='done_queue%d' % i).enqueue(1))
153 | 
154 |     # Create a "supervisor", which oversees the training process.
155 |     sv = tf.train.Supervisor(is_chief=(myindex==0),
156 |                              logdir="/tmp/train_logs",
157 |                              init_op=init_op,
158 |                              summary_op=summary_op,
159 |                              saver=saver,
160 |                              global_step=global_step,
161 |                              save_model_secs=600)
162 | 
163 |     mnist = input_data.read_data_sets(data_dir, one_hot=True)
164 | 
165 |     # The supervisor takes care of session initialization, restoring from
166 |     # a checkpoint, and closing when done or an error occurs.
167 |     with sv.managed_session(server.target) as sess:
168 |       # Loop until the supervisor shuts down or 1000000 steps have completed.
169 |       step = 0
170 |       while not sv.should_stop() and step < 1000000:
171 |         # Run a training step asynchronously.
172 |         # See `tf.train.SyncReplicasOptimizer` for additional details on how to
173 |         # perform *synchronous* training.
174 | 
175 |         batch_xs, batch_ys = mnist.train.next_batch(batch_size)
176 |         train_feed = {x: batch_xs, y_: batch_ys}
177 | 
178 |         _, step = sess.run([train_op, global_step], feed_dict=train_feed)
179 |         if step % 100 == 0: 
180 |             print "Done step %d" % step
181 | 
182 |       # notify the parameter servers that its time to exit.
183 |       for op in stop_queues:
184 |         sess.run(op)
185 | 
186 |     # Ask for all the services to stop.
187 |     sv.stop()
188 | 
189 | if __name__ == "__main__":
190 |   # Each pod is both a parameter server and a worker
191 |   # Each runs in a different thread.
192 |   startup()
193 |   threads = [
194 |     threading.Thread(target=run_ps),
195 |     threading.Thread(target=run_worker)]
196 |   for thread in threads:
197 |     thread.start()
198 |   for thread in threads:
199 |     thread.join()
200 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/lsf_v1beta1_lsfcluster_crd.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apiextensions.k8s.io/v1beta1
  2 | kind: CustomResourceDefinition
  3 | metadata:
  4 |   name: lsfclusters.lsf.spectrumcomputing.ibm.com
  5 |   labels:
  6 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
  7 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
  8 |     app.kubernetes.io/instance: "lsfcrd"
  9 |     release: "lsf"
 10 | spec:
 11 |   group: lsf.spectrumcomputing.ibm.com
 12 |   names:
 13 |     kind: LSFCluster
 14 |     listKind: LSFClusterList
 15 |     plural: lsfclusters
 16 |     singular: lsfcluster
 17 |     shortNames:
 18 |     - lsfs
 19 |   scope: Namespaced
 20 |   subresources:
 21 |     status: {}
 22 |   version: v1beta1
 23 |   versions:
 24 |   - name: v1beta1
 25 |     served: true
 26 |     storage: true
 27 |   validation:
 28 |     openAPIV3Schema:
 29 |       properties:
 30 |         apiVersion:
 31 |           type: string
 32 |           description: "apiVersion defines the versioned schema of this representation of an object"
 33 |         kind: 
 34 |           type: string
 35 |           description: "This CRD is for the LSFCluster kind"
 36 |         metadata:
 37 |           description: "Metadata for this specific instance"
 38 |           properties:
 39 |             name:
 40 |               type: string
 41 |               description: "Name to assign to this LSF cluster"
 42 |         spec:
 43 |           description: "Contains specifications for an LSF cluster"
 44 |           properties:
 45 |             namespace:
 46 |               type: string
 47 |               description: "The namespace to deploy in"
 48 |             serviceAccount:
 49 |               type: string
 50 |               description: "The name of the service account for the LSF cluster to use to access the K8s API"
 51 |             licenseAccepted:
 52 |               type: boolean
 53 |               description: "Indicate acceptance of the licences"
 54 |             cluster:
 55 |               description: "These are cluster wide configuration parameters"
 56 |               properties:
 57 |                 lsfrole:
 58 |                   type: string
 59 |                   description: "Either lsf or podscheduler"
 60 |                 clustername:
 61 |                   type: string
 62 |                   description: "Prefix for objects created for this cluster"
 63 |                 administrators:
 64 |                   description: "A list of LSF administrators"
 65 |                   type: array
 66 |                   items:
 67 |                     type: string
 68 |                 pvc:
 69 |                   description: "Storage for failover"
 70 |                   properties:
 71 |                     dynamicStorage:
 72 |                       type: boolean
 73 |                     storageClass:
 74 |                       type: string
 75 |                     selectorLabel:
 76 |                       type: string
 77 |                     selectorValue:
 78 |                       type: string
 79 |                     size:
 80 |                       type: string
 81 |                 volumes:
 82 |                   description: "List of additional volumes to mpount to all LSF pods"
 83 |                   type: array
 84 |                   items:
 85 |                     properties:
 86 |                       name:
 87 |                         type: string
 88 |                       mount:
 89 |                         type: string
 90 |                       selectorLabel:
 91 |                         type: string
 92 |                       selectorValue:
 93 |                         type: string
 94 |                       accessModes:
 95 |                         type: string
 96 |                       size:
 97 |                         type: string
 98 |                 userauth:
 99 |                   description: "Configutation for providing user authentication within pods"
100 |                   properties:
101 |                     authconfigargs:
102 |                       type: string
103 |                     configs:
104 |                       type: array
105 |                       items:
106 |                         properties:
107 |                           name:
108 |                             type: string
109 |                           filename:
110 |                             type: string
111 |                     starts:
112 |                       description: "List of daemons to start for authentication"
113 |                       type: array
114 |                       items:
115 |                         type: string
116 |             master:
117 |               properties:
118 |                 image:
119 |                   type: string
120 |                 imagePullPolicy:
121 |                   type: string
122 |                 replicas:
123 |                   type: integer
124 |                   minimum: 0
125 |                   maximum: 1
126 |                 placement:
127 |                   properties:
128 |                     includeLabel:
129 |                       type: string
130 |                     excludeLabel:
131 |                       type: string
132 |                     tolerateName:
133 |                       type: string
134 |                     tolerateValue:
135 |                       type: string
136 |                     tolerateEffect:
137 |                       type: string
138 |                 resources:
139 |                   properties:
140 |                     requests:
141 |                       properties:
142 |                         cpu:
143 |                           type: string
144 |                         memory:
145 |                           type: string
146 |                     limits:
147 |                       properties:
148 |                         cpu:
149 |                           type: string
150 |                         memory:
151 |                           type: string
152 |                 mountList:
153 |                   type: array
154 |                   items:
155 |                     type: string
156 |             gui:
157 |               properties:
158 |                 image:
159 |                   type: string
160 |                 imagePullPolicy:
161 |                   type: string
162 |                 replicas:
163 |                   type: integer
164 |                   minimum: 0
165 |                   maximum: 1
166 |                 db:
167 |                   properties:
168 |                     image:
169 |                       type: string
170 |                     imagePullPolicy:
171 |                       type: string
172 |                     cpu:
173 |                       type: string
174 |                     memory:
175 |                       type: string
176 |                     passwordSecret:
177 |                       type: string
178 |                 placement:
179 |                   properties:
180 |                     includeLabel:
181 |                       type: string
182 |                     excludeLabel:
183 |                       type: string
184 |                     tolerateName:
185 |                       type: string
186 |                     tolerateValue:
187 |                       type: string
188 |                     tolerateEffect:
189 |                       type: string
190 |                 resources:
191 |                   properties:
192 |                     requests:
193 |                       properties:
194 |                         cpu:
195 |                           type: string
196 |                         memory:
197 |                           type: string
198 |                     limits:
199 |                       properties:
200 |                         cpu:
201 |                           type: string
202 |                         memory:
203 |                           type: string
204 |                 mountList:
205 |                   type: array
206 |                   items:
207 |                     type: string
208 | 
209 |             computes:
210 |               type: array
211 |               items:
212 |                 properties:
213 |                   image:
214 |                     type: string
215 |                   imagePullPolicy:
216 |                     type: string
217 |                   replicas:
218 |                     type: integer
219 |                     minimum: 0
220 |                     maximum: 10
221 |                   placement:
222 |                     properties:
223 |                       includeLabel:
224 |                         type: string
225 |                       excludeLabel:
226 |                         type: string
227 |                       tolerateName:
228 |                         type: string
229 |                       tolerateValue:
230 |                         type: string
231 |                       tolerateEffect:
232 |                         type: string
233 |                   resources:
234 |                     properties:
235 |                       requests:
236 |                         properties:
237 |                           cpu:
238 |                             type: string
239 |                           memory:
240 |                             type: string
241 |                       limits:
242 |                         properties:
243 |                           cpu:
244 |                             type: string
245 |                           memory:
246 |                             type: string
247 |                   mountList:
248 |                     type: array
249 |                     items:
250 |                       type: string
251 | 
252 | 


--------------------------------------------------------------------------------
/examples/Run_Windows/README.md:
--------------------------------------------------------------------------------
  1 | # Run Windows
  2 | 
  3 | The run windows allow you to control when pods will be allowed to run.  This is useful for sharing the kubernestes resources between different workloads.  The resources can be used to run different types of pods based on the time of day, and day of the week.
  4 | 
  5 | In this test we will use the "night" queue.  Job pods will be annotated with the "night" queue.  We will see that the pods remain in a pending state, until the run window opens.
  6 | 
  7 | **NOTE:  The pods are set to UTC time, so you need to account for this**
  8 | 
  9 | ## Changing the Run Window
 10 | The run windows are defined in the queue definitions.  This configuration file is stored as a 
 11 | configMap, so you can edit it using "kubectl".  Use the following proceedure to edit the run window.
 12 | 
 13 | 1. Determine the configMap name:
 14 | ```
 15 | $ kubectl get cm
 16 | ```
 17 | Look for a configMap with "ibm-spectrum-computing-prod-queues" in the name.
 18 | 
 19 | 2. Edit the configMap discovered from above:
 20 | ```
 21 | kubectl edit cm myname-ibm-spectrum-computing-prod-queues
 22 | ```
 23 | 
 24 | 3. Find the "night" queue section.  It will look something like:
 25 | ```
 26 |     Begin Queue
 27 |     QUEUE_NAME   = night
 28 |     PRIORITY     = 15
 29 |     RUN_WINDOW   = 5:19:00-1:8:30 20:00-8:30
 30 |     r1m          = 0.8/2.5
 31 |     FAIRSHARE    = USER_SHARES[[gold,10] [silver,4] [bronze,1]]
 32 |     DESCRIPTION  = For large heavy duty pods, running during off hours
 33 |     End Queue
 34 | ```
 35 | 
 36 | 4. Edit the "RUN_WINDOW", and change the time.  In my case I want to get the results quicker so I changed it to:
 37 | ```
 38 |     RUN_WINDOW   = 5:19:00-1:8:30 14:20-8:30
 39 | ```
 40 | 
 41 | 5. Run the test.
 42 | 
 43 | **NOTE:  When editing the configMap the spacing is significant.  Do not change the format.**
 44 | 
 45 | ## Other Window Options
 46 | The RUN_WINDOW defines when pods will be allowed to run.  When the window opens the pods will be started, and when the window closes the pods will be killed.  There is another option that allows the pods to start when the run window opens, and to run to completion even if the run window is closed.  This is the "DISPATCH_WINDOW".  Replacing the "RUN_WINDOW" with "DISPATCH_WINDOW" will cause it to allow started pod to run to completion. 
 47 | Changing the configMap from:
 48 | ```
 49 |     RUN_WINDOW   = 20:00-8:30
 50 | ``` 
 51 | to:
 52 | ```
 53 |     DISPATCH_WINDOW   = 20:00-8:30
 54 | ```
 55 | Will allow the started pods to run to completion.
 56 | 
 57 | 
 58 | ## Running the Test
 59 | There are several tests we can run.  We can:
 60 | 1. Show what happens when the "RUN_WINDOW" opens.
 61 | 2. Show what happens when the "RUN_WINDOW" closes.
 62 | 3. Show what happens when the "DISPATCH_WINDOW" closes.
 63 | Which one we run will be determined by the current "RUN_WINDOW" / "DISPATCH_WINDOW" value.
 64 | 
 65 | 
 66 | ### Test 1: The RUN_WINDOW Opens
 67 | Before running the test edit the RUN_WINDOW as described above and set it to open 10 minutes in the future.  Run the "runtest.sh" script to start the test.  It will output the time on the current host, and on the master pod.  You will see something like:
 68 | ```
 69 | # ./runtest.sh
 70 | This script will create a number of test jobs in the night queue.
 71 | The jobs will remain in the pending state till the run window is open
 72 | 
 73 | 
 74 | You time is:  Tue Jun 25 14:23:34 EDT 2019
 75 | Pod time is:  Tue Jun 25 18:23:34 UTC 2019
 76 | 
 77 | The night queue run window currently has:  RUN_WINDOW: 5:19:00-1:8:30 20:00-8:30
 78 | The format is:  opentime-closetime
 79 | Where the time is expressed as weekday:hour:minute or hour:minute
 80 | and weekday 0 equals Sunday.
 81 | ```
 82 | 
 83 | We can see that there is a 4 hour time difference between the worker and the pod.  If we wanted the run window to open at 6:00pm we would need to change the run window to:
 84 | ```
 85 |   RUN_WINDOW: 5:19:00-1:8:30 22:00-8:30
 86 | ```
 87 | We might also want to change when it closes and the weekend window so:
 88 | ```
 89 |   RUN_WINDOW: 5:22:00-1:12:30 22:00-12:30
 90 | ```
 91 | 
 92 | The script will create 10 jobs in the night queue.  It will then poll every minute to see the state of the pods, but will only output when there is a change.
 93 | 
 94 | Allow the script to complete.  It should finish about 2 minutes after the run window opens.
 95 | 
 96 | 
 97 | ### Analyzing the Results
 98 | The script will check every minute to see the state of the "rwjob-*" pods and will output in the following format: 
 99 | ```
100 | 14:23:41,18:23:41,10,0,0
101 | ```
102 | Where the comma delimited columns are: 
103 | * Worker time (HH:MM:SS)
104 | * Container time (HH:MM:SS)
105 | * Number of pending pods from this test
106 | * Number of running pods from this test
107 | * Number of completed pods from this test
108 | 
109 | Provided the RUN_WINDOW was set correctly we may see something like:
110 | ```
111 | 15:27:40,19:27:40, 10,  0,  0
112 | 15:31:42,19:31:42,  0, 10,  0
113 | 15:32:43,19:32:43,  0,  6,  4
114 | 15:33:43,19:33:43,  0,  0, 10
115 | Test complete
116 | ```
117 | 
118 | My RUN_WINDOW was "RUN_WINDOW   = 5:19:00-1:8:30 19:30-8:30" so at 19:30 the pods would be allowed to start.  From the output we see that by 19:31:42 all pods were running, so the RUN_WINDOW opening caused the pending pods to run.
119 | 
120 | 
121 | ### Test 2: The RUN_WINDOW Closes
122 | In this test we will evaluate what happens when the RUN_WINDOW closes.  We expect that any pods that are running will be killed and returned to a Pending state.
123 | 
124 | To run this test we must:
125 | 1. Modify the test job "templateJob.yml" so that it runs a lot longer.  This is so we have enough time to see the run window close.  Edit the "templateJob.yml" file and locate the line: 
126 | ```
127 |         command: ["sleep", "60"]
128 | ```
129 | and change to
130 | ```
131 |         command: ["sleep", "3600"]
132 | ```
133 | 
134 | 2. Edit the "night" queue and add an additional parameter.
135 | ```
136 |     JOB_CONTROLS = SUSPEND[SIGTERM]
137 | ```
138 | It should look something like:
139 | ```
140 |     Begin Queue
141 |     QUEUE_NAME   = night
142 |     PRIORITY     = 15
143 |     RUN_WINDOW   = 5:19:00-1:8:30 19:30-20:00
144 |     r1m          = 0.8/2.5
145 |     JOB_CONTROLS = SUSPEND[SIGTERM]       
146 |     FAIRSHARE    = USER_SHARES[[gold,10] [silver,4] [bronze,1]]
147 |     DESCRIPTION  = For large heavy duty pods, running during off hours
148 |     End Queue
149 | ```
150 | 
151 | 3. Edit the RUN_WINDOW and change it so that the run window open-time is a few minutes in the past, and the close time to 10 minutes in the future.
152 | 
153 | 4. Run the test script: "runtest.sh"
154 | 
155 | 
156 | ### Analyze Test 2 Results
157 | In this case the pending jobs quickly move to running, so we see 10 jobs in the running column.  If you see 10 pending jobs after a minute, check the run window open time.  The output will look something like:
158 | ```
159 | 15:56:16,19:56:16, 10,  0, 0
160 | 15:58:18,19:58:18,  0, 10, 0
161 | 16:00:19,20:00:19, 10,  0, 0
162 | ```
163 | In my case the RUN_WINDOW was:
164 | ```
165 | RUN_WINDOW: 5:19:00-1:8:30 19:30-20:00
166 | ```
167 | When the script is run there are initially 10 pending jobs.  You may find that you have some running.  In the next poll all of the pods are running.
168 | 
169 | At 20:00 the run window closes, and we see that window closes.  If this were not a container it would be suspended, but since containers do not support suspend, they are killed and go back to a pending state e.g.
170 | ```
171 | rwjob-1-g8hxz                                             0/1     Pending   0          12m
172 |   :
173 | rwjob-10-77ffw                                            0/1     Pending   0          12m
174 | ```
175 | 
176 | Once the run windows re-opens the pods will be scheduled again.
177 | 
178 | 
179 | ### Test 3: The DISPATCH_WINDOW Closes
180 | This test case is the same as the previous one except we change the RUN_WINDOW to DISPATCH_WINDOW.  Here we will see that the pods continue to run eventhough the window has closed. 
181 | 
182 | To run this test we must:
183 | 1. Modify the test job "templateJob.yml" so that it runs a lot longer.  This is so we have enough time to see the run window close.  Edit the "templateJob.yml" file and locate the line:
184 | ```
185 |         command: ["sleep", "60"]
186 | ```
187 | and change to
188 | ```
189 |         command: ["sleep", "3600"]
190 | ```
191 | 
192 | 2. Edit the "night" queue and replace 
193 | ```
194 |     RUN_WINDOW   = {Some value}
195 | ```
196 | with
197 | ```
198 |     DISPATCH_WINDOW = {Some value}
199 | ```
200 | It should look something like:
201 | ```
202 |     Begin Queue
203 |     QUEUE_NAME   = night
204 |     PRIORITY     = 15
205 |     DISPATCH_WINDOW = 5:19:00-1:8:30 19:30-20:20
206 |     r1m          = 0.8/2.5
207 |     JOB_CONTROLS = SUSPEND[SIGTERM]
208 |     FAIRSHARE    = USER_SHARES[[gold,10] [silver,4] [bronze,1]]
209 |     DESCRIPTION  = For large heavy duty pods, running during off hours
210 |     End Queue
211 | ```
212 | 
213 | 3. Edit the DISPATCH_WINDOW and change it so that the dispatch window open-time is a few minutes in the past, and the close time to 10 minutes in the future.
214 | 
215 | 4. Run the test script: "runtest.sh"
216 | 
217 | ### Analyze Test 2 Results
218 | In this case the pending jobs quickly move to running, so we see 10 jobs in the running column.  If you see 10 pending jobs after a minute, check the run window open time.  The output will look something like:
219 | ```
220 | 16:28:03,20:28:03, 10,  0,  0
221 | 16:29:04,20:29:04,  0, 10,  0
222 | 17:29:05,21:29:05,  0,  0, 10
223 | Test Complete
224 | ```
225 | In my case the DISPATCH_WINDOW was:
226 | ```
227 | DISPATCH_WINDOW: 5:19:00-1:8:30 19:30-20:35
228 | ```
229 | When the script is run there are initially 10 pending jobs.  You may find that you have some running.  In the next poll all of the pods are running.
230 | 
231 | At 20:35 the dispatch window closes.  This time since it is the DISPATCH_WINDOW no action is taken on the running pods and given time they complete.
232 | 
233 | ## Conclusion
234 | We have see that we can define time periods were pods will be allowed to run.  We can use this to define queues for processing workload at certain times of the day/week.  We have also seen that we can control what happens to running pods when that time period ends.  We can have running pods run to completion, or have them terminate and go back to a pending state.
235 | 
236 |  
237 | 


--------------------------------------------------------------------------------
/examples/Pod_Priority_and_Preemption/README.md:
--------------------------------------------------------------------------------
  1 | # Pod Priority and Preemption
  2 | 
  3 | These test cases look at pods with different priorities and how to handle resource contention.
  4 | When there are enough resources for everyone there is no contention, but when there is contention,
  5 | or some processes need to run as soon as possible, what should happen?
  6 | 
  7 | These tests will show that the enhanced scheduler provides the concept of pod priority, expressed 
  8 | through queues.  The default configuration has the following queues:
  9 | * priority
 10 | * normal
 11 | * idle
 12 | * night
 13 | 
 14 | The test jobs will start 10 pods per job, with each pod needing 1 core.  If you are running on
 15 | VM's or small machines you may need to adjust the job template yaml files to suit your environment.
 16 | 
 17 | ## Test case 1:  Priority Pods Submitted to an Already busy Cluster
 18 | In this test the cluster will be loaded with many pods of a normal priority.  Once the 
 19 | cluster is saturated with jobs, high priority pods will be created.  In this case the 
 20 | normal priority pods that are already running will be allowed to run to completion,
 21 | however the high priority jobs will be started before any new normal priority pods
 22 | will be started.
 23 | 
 24 | Two scripts are provided to run this test:
 25 | * **priority-test-non-preempt.sh**
 26 | * **completions.sh**
 27 | 
 28 | ### Running the Test
 29 | 
 30 | 1. Start the test by running **priority-test-non-preempt.sh**.  It will ask for the number of jobs to create.  It will create 10 pods per job with each pod using 1 CPU core.  You will want to create enough jobs so that all cores are used. 
 31 | 
 32 | 2. In another window run **completions.sh**.  It will gather the queue name, number of jobs in that queue, number of pending jobs and number of running jobs.
 33 | 
 34 | 3. Wait for all pods to complete (when the number of jobs drops to 0), than analyze the results.
 35 | 
 36 | ### Analyzing the Results
 37 | 
 38 | When the test is first run the **completions.sh** script will output something like:
 39 | ```
 40 | 15:42:00,priority,0,0,0,normal,0,0,0,idle,0,0,0,night,0,0,0
 41 | ```
 42 | The output has the following columns:
 43 | * Time hh:mm:ss
 44 | * Queue Name (priority)
 45 | * Number of pods in the priority queue
 46 | * Number of pending pods in the priority queue
 47 | * Number of running pods in the priority queue
 48 | * Queue Name (normal)
 49 | * Number of pods in the normal queue
 50 | * Number of pending pods in the normal queue
 51 | * Number of running pods in the normal queue
 52 | * Queue Name (idle)
 53 | * Number of pods in the idle queue
 54 | * Number of pending pods in the idle queue
 55 | * Number of running pods in the idle queue
 56 | * Queue Name (night)
 57 | * Number of pods in the night queue
 58 | * Number of pending pods in the night queue
 59 | * Number of running pods in the night queue
 60 |  
 61 | As the pods are submitted the number of pods in the normal queue will increase e.g.
 62 | ```
 63 | 15:42:31,priority,0,0,0,normal,   0,  0,   0, idle,0,0,0,night,0,0,0
 64 | 15:42:42,priority,0,0,0,normal, 119, 15, 104, idle,0,0,0,night,0,0,0
 65 | 15:42:52,priority,0,0,0,normal, 200, 81, 119, idle,0,0,0,night,0,0,0
 66 | ```
 67 | In this case it starts with 0 pods and quickly goes to 200 jobs, with 119 running.  
 68 | This cluster can run about 120 pods, so all the resources are used.
 69 | 
 70 | Shortly high priority pods will be submitted.  When they start the number of pods in the
 71 | priority queue will go up e.g.
 72 | ```
 73 | 15:43:13,priority,   0,   0, 0,normal,200,81,119,idle,0,0,0,night,0,0,0
 74 | 15:43:23,priority,  57,  57, 0,normal,260,141,119,idle,0,0,0,night,0,0,0
 75 | 15:43:34,priority, 120, 120, 0,normal,330,211,119,idle,0,0,0,night,0,0,0
 76 | ```
 77 | Note the prority jobs do start immediately.  This is because all of the resources
 78 | are currently running the normal priority jobs.  The next test case will introduce
 79 | pod preemption.
 80 | 
 81 | As pods in the normal queue complete those resources are then used to run the pods
 82 | in the priority queue.  No pods in the normal queue will be run.  In the data the
 83 | number of running pod in the normal queue will drop to 0, while the number of jobs 
 84 | in the priority queue goes to the limit.  It will look something like:
 85 | ```
 86 |                              Run                   Run
 87 | 15:43:44,priority, 189, 188,   1,normal, 390, 271, 119, idle,0,0,0,night,0,0,0
 88 | 15:44:05,priority, 210, 184,  26,normal, 384, 291,  93, idle,0,0,0,night,0,0,0
 89 | 15:44:26,priority, 210, 159,  51,normal, 357, 291,  66, idle,0,0,0,night,0,0,0
 90 | 15:44:47,priority, 210, 134,  76,normal, 333, 291,  42, idle,0,0,0,night,0,0,0
 91 | 15:45:07,priority, 203, 106,  97,normal, 314, 291,  23, idle,0,0,0,night,0,0,0
 92 | 15:45:28,priority, 193,  81, 112,normal, 299, 291,   8, idle,0,0,0,night,0,0,0
 93 | 15:45:49,priority, 175,  57, 118,normal, 291, 291,   0, idle,0,0,0,night,0,0,0
 94 | ```
 95 | Once the priority pods start to finish, and there are no more pending priority
 96 | jobs to run, the normal priority pods will be given resources to run, and will
 97 | look something like:
 98 | ```
 99 |                            Run                    Run
100 | 15:46:41,priority, 122, 3, 119, normal, 291, 291,   0, idle,0,0,0,night,0,0,0
101 | 15:47:22,priority,  75, 0,  75, normal, 291, 249,  42, idle,0,0,0,night,0,0,0
102 | 15:48:04,priority,  34, 0,  34, normal, 286, 202,  84, idle,0,0,0,night,0,0,0
103 | 15:48:46,priority,  10, 0,  10, normal, 272, 164, 108, idle,0,0,0,night,0,0,0
104 | 15:49:06,priority,   0, 0,   0, normal, 259, 140, 119, idle,0,0,0,night,0,0,0
105 | ```
106 | 
107 | ### Test case 1:  Priority Pods Submitted to an Already busy Cluster Conclusion
108 | From the test results we can see that the scheduler provides the ability to assign 
109 | priority to pods, and that pods with higher priority will be given preference when
110 | resources for running more pods are available.  The next test case will do the 
111 | same except when there is resource contention the lower priority pods will be killed
112 | to free resources for the higher priority pods.
113 | 
114 | 
115 | ## Test case 2:  Priority Pods Submitted to a Busy Cluster with Preemption
116 | In this test the cluster will be loaded with many pods of a idle priority.  Once the 
117 | cluster is saturated with jobs, high priority pods will be created.  In this case the 
118 | idle priority pods that are already running will be killed to free there resources
119 | so that the high priority pods can get the resources sooned.
120 | 
121 | Two scripts are provided to run this test:
122 | * **priority-test-preempt.sh**
123 | * **completions.sh**
124 | 
125 | Additional configuration is needed to run this test.  Currently the MXJ parameter needs
126 | to be set in the scheduler configuration for this test to run.  The MXJ value is the 
127 | maximum number of jobs that  the scheduler will send to a worker at any time.
128 | The out of box value is not set, because it is dependent on the hardware in the cluster.
129 | Use the proceedure below to set the MXJ value.
130 | 
131 | 1. Locate the master pod by looking for **ibm-spectrum-computing-prod-master** in the list of pods e.g.
132 | ```
133 | $ kubectl get pods |grep ibm-spectrum-computing-prod-master
134 | lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-84gcj   1/1     Running   0          3d19h
135 | ```
136 | 
137 | 2. Connect to the management pod e.g.
138 | ```
139 | $ kubectl exec -ti lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-84gcj bash
140 | ```
141 | 
142 | 3. Edit the lsb.hosts configuration file
143 | ```
144 | LSF POD [root@lsfmaster /]# vi /opt/ibm/lsfsuite/lsf/conf/lsbatch/myCluster/configdir/lsb.hosts
145 | ```
146 | 
147 | 4. Modify the **default** entry from:
148 | ```
149 | default    ()   ()      ()    ()     ()     ()            (Y)   # Example
150 | ```
151 | to
152 | ```
153 | default    25   ()      ()    ()     ()     ()            (Y)   # Example
154 | ```
155 | ** NOTE:  If your workers have few cores, you may need to use a MXJ lower than 25.**
156 | 
157 | 5. Save the file and trigger reconfiguration.
158 | ```
159 | LSF POD [root@lsfmaster /]# badmin mbdrestart
160 | ```
161 | 
162 | ### Run the Test
163 | 
164 | 1. Start the test by running **priority-test-preempt.sh**.  It will ask for the number of jobs to create.  It will create 10 pods per job with each pod using 1 CPU core.  You will want to create enough jobs so that all cores are used. 
165 | 2. In another window run **completions.sh**.  It will gather the queue name, number of jobs in that queue, number of pending jobs and number of running jobs.
166 | 
167 | 3. Wait for all priority pods to complete (when the number of jobs drops to 0), than analyze the results.  The idle queue jobs will run for an hour.  It is not necessary to wait for them to all complete.
168 | 
169 | 
170 | ### Analyzing the Results
171 | 
172 | When the test is first run the **completions.sh** script will output something like:
173 | ```
174 |                   NJobs,Pend,Run         NJobs,Pend, Run
175 | 14:17:29,priority,    0,   0,  0,..,idle,    0,   0,   0
176 | 14:17:40,priority,    0,   0,  0,..,idle,   61,   9,  52
177 | 14:17:50,priority,    0,   0,  0,..,idle,  140,  40, 100
178 | ```
179 | ** NOTE: Extra fields have been removed for clarity **
180 | 
181 | Above we see that the idle queue goes from 0 to 140 pods.  The idle queue pods are long running.
182 | In the absence of higher priority jobs these will run for an hour, but in our test we see high
183 | priority pods being submitted e.g.
184 | ```
185 |                   NJobs,Pend,Run         NJobs,Pend, Run
186 | 14:18:12,priority,    0,   0,  0,..,idle,  140,  40, 100
187 | 14:18:23,priority,   56,  41,  5,..,idle,  125,  40, 85   <-- Preemption starts
188 | 14:18:33,priority,  121,  98, 18,..,idle,  120,  43, 77
189 | ```
190 | We see almost immediately that the number of running pods in the idle queue starts to drop.
191 | This is because the priority jobs have preference to the point where pods in the idle queue 
192 | will be killed to free resources for the priority pods.
193 | 
194 | This process continues until the number of pending pods in the priority queue reaches zero.
195 | Once this happens more resources become free to run pods from the idle queue, and you start
196 | to see the number of running pods from the idle rise e.g.
197 | ```
198 |                   NJobs,Pend,Run         NJobs,Pend, Run
199 | 14:21:18,priority,   92,   4, 87,..,idle,  139, 127,  12
200 | 14:21:29,priority,   91,   1, 89,..,idle,  139, 130,   9   
201 | 14:21:39,priority,   88,   0, 88,..,idle,  140, 128,  12  <-- Resources available for idle queue
202 | 14:21:49,priority,   73,   0, 73,..,idle,  140, 116,  24
203 | ```
204 | If there are enough priority jobs you may see that the number of running pods from the idle queue
205 | drops to 0. 
206 | 
207 | 
208 | ### Test case 2:  Priority Pods Submitted to a Busy Cluster with Preemption Conclusion
209 | From the test results we can see that the scheduler provides the ability to assign
210 | priority to pods, and that pods with higher priority will be given preference when
211 | resources for running more pods are available.  We also see that the scheduler may
212 | choose to kill pods of lower priority in order to free resources quicker for higher 
213 | priority pods.
214 | 
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/example-lsf.yaml:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------------
  2 | # Copyright IBM Corp. 2020. All rights reserved.
  3 | # US Government Users Restricted Rights - Use, duplication or disclosure
  4 | # restricted by GSA ADP Schedule Contract with IBM Corp.
  5 | #--------------------------------------------------------
  6 | #
  7 | apiVersion: lsf.spectrumcomputing.ibm.com/v1beta1
  8 | kind: LSFCluster
  9 | metadata:
 10 |   name: example-lsfcluster
 11 |   labels:
 12 |     app.kubernetes.io/name: "ibm-spectrum-lsf"
 13 |     app.kubernetes.io/managed-by: "lsfclusters.lsf.spectrumcomputing.ibm.com"
 14 |     app.kubernetes.io/instance: "lsf"
 15 |     release: "lsf"
 16 | 
 17 | spec:
 18 |   # Indicate acceptance of the Licenses
 19 |   # The licenses are available from this site:
 20 |   #      http://www-03.ibm.com/software/sla/sladb.nsf
 21 |   # Use the search option to find IBM Spectrum LSF CE
 22 |   licenseAccepted: false
 23 | 
 24 |   # Provide the name of the service account you wish to use
 25 |   serviceAccount: my-new-sa
 26 | 
 27 |   cluster:
 28 |     # The operator can deploy lsf in two different modes:
 29 |     #   lsf           - LSF is deployed as a cluster within K8s
 30 |     #   podscheduler  - LSF enhances the pod scheduling capabilities
 31 |     #                   of K8s.
 32 |     lsfrole: lsf
 33 | 
 34 |     clustername: mylsf2
 35 | 
 36 |     # The administrators is a list of usernames that can perform
 37 |     # LSF administrative functions using the LSF GUI.
 38 |     #
 39 |     # administrators:
 40 |     # - someuser
 41 | 
 42 |     # PersistentVolumeClaim (Storage volume) parameters
 43 |     pvc:
 44 |       dynamicStorage: false
 45 |       storageClass: ""
 46 |       selectorLabel: "lsfvol"
 47 |       selectorValue: "lsfvol"
 48 |       size: "20G"
 49 | 
 50 |     # The cluster needs to access users home directories and applications
 51 |     # This section allows you to define the PersistentVolume to use to 
 52 |     # access them.
 53 |     #
 54 |     # volumes:
 55 |     # - name: "Home"
 56 |     #   mount: "/home"
 57 |     #   selectorLabel: "realhome"
 58 |     #   selectorValue: "realhome"
 59 |     #   accessModes: "ReadWriteMany"
 60 |     #   size: "9G"
 61 |     # - name: "Applications"
 62 |     #   mount: "/apps"
 63 |     #   selectorLabel: "apps"
 64 |     #   selectorValue: "apps"
 65 |     #   accessModes: "ReadOnlyMany"
 66 |     #   size: ""
 67 | 
 68 |     # This section is for configuring username resolution
 69 |     # The pods will call "authconfig" to setup the authentication
 70 |     # It can be used with the authentication schemes that "authconfig"
 71 |     # supports.
 72 |     #
 73 |     # userauth:
 74 |         # Configs are a list of secrets that will be passed to the 
 75 |         # running pod as configuration files.  This is how to pass
 76 |         # certificates to the authentication daemons.  The secret has
 77 |         # a name and value and is created using:
 78 |         #    kubectl create secret generic test-secret --from-literal=filename=filecontents
 79 |         # The actual filename in the pod is the filename from the configs
 80 |         # list below plus the filename from the command above.
 81 |       # configs:
 82 |       # - name: "test-secret"
 83 |         # filename: "/etc/test/test-secret"
 84 | 
 85 |         # These are the arguments to invoke the "authconfig" command 
 86 |         # with.  This will generate the needed configuration files.
 87 |         # NOTE:  The "--nostart" argument will be added.
 88 |       # authconfigargs: "--enableldap --enableldapauth --ldapserver=ldap://172.16.2.2/,ldap://172.16.2.3/ --ldapbasedn=dc=platformlab,dc=ibm,dc=com --update"
 89 | 
 90 |         # List the daemons to start, e.g.  nslcd, and sssd
 91 |       # starts:
 92 |       # - /usr/sbin/nslcd
 93 | 
 94 | 
 95 |   master:
 96 |     image: "ibmcom/lsfce-master:10.1.0.9r02"
 97 |     imagePullPolicy: "Always"
 98 | 
 99 |     # The placement variables control how the pods will be placed
100 |     placement:
101 |       # includeLabel  - Optional label to apply to hosts that
102 |       #                 should be allowed to run the compute pod
103 |       includeLabel: ""
104 |     
105 |       # excludeLabel  - Is a label to apply to hosts to prevent
106 |       #                 them from being used to host the compute pod
107 |       excludeLabel: "excludelsf"
108 | 
109 |       # Taints can be used to control which nodes are available
110 |       # to the LSF and Kubernetes scheduler.  If used these
111 |       # parameters are used to allow the LSF pods to run on
112 |       # tainted nodes.  When not defined the K8s master nodes
113 |       # will be used to host the master pod.
114 |       #
115 |       #  tolerateName  - Optional name of the taint that has been
116 |       #                  applied to a node
117 |       #  tolerateValue - The value given to the taint
118 |       #  tolerateEffect - The effect of the taint
119 |       #
120 |       tolerateName: ""
121 |       tolerateValue: ""
122 |       tolerateEffect: NoExecute
123 | 
124 |     # Define the number of this type of pod you want to have running.
125 |     # Valid values for a master are 0 or 1.
126 |     replicas: 1
127 | 
128 |     # These are the Memory and CPU allocations for the pod.
129 |     # Set the memory and cpu requests and limits to the same values
130 |     # to get a guaranteed QoS.
131 |     resources:
132 |       requests:
133 |         cpu: "1"
134 |         memory: "1G"
135 |       limits:
136 |         cpu: "1"
137 |         memory: "1G"
138 | 
139 |     # The master pod will typically need to get access to the user data
140 |     # and home directory.  The mountList provides a way to access
141 |     # directories from the OS running on the Kubernetes worker nodes.
142 |     # This will be translated into hostPath volumeMounts for the pod.
143 |     # For example
144 |     # listing '/home' will cause /home from the worker OS to be
145 |     # mounted in the running container, allowing users to access there
146 |     # home directory (assuming automounter is setup).
147 |     #mountList:
148 |     #  - /home
149 | 
150 |   gui:
151 |     image: "ibmcom/lsfce-gui:10.1.0.9r02"
152 |     imagePullPolicy: "Always"
153 | 
154 |     # Database parameters
155 |     db:
156 |       image: "mariadb:10.4"
157 |       imagePullPolicy: "IfNotPresent"
158 |       cpu: 250m
159 |       memory: 256M
160 |       passwordSecret: "db-pass"
161 | 
162 |     # The placement variables control how the pods will be placed
163 |     placement:
164 |       includeLabel: ""
165 |       excludeLabel: "excludelsf"
166 |       tolerateName: ""
167 |       tolerateValue: ""
168 |       tolerateEffect: NoExecute
169 | 
170 |     # Define the number of this type of pod you want to have running.
171 |     # Valid values for a master are 0 or 1.
172 |     replicas: 1
173 | 
174 |     # These are the Memory and CPU allocations for the pod.
175 |     # Set the memory and cpu requests and limits to the same values
176 |     # to get a guaranteed QoS.
177 |     resources:
178 |       requests:
179 |         cpu: "2"
180 |         memory: "16G"
181 |       limits:
182 |         cpu: "2"
183 |         memory: "16G"
184 | 
185 |     #mountList:
186 |     #  - /home
187 | 
188 | 
189 |   # There can be a number of different compute pods running.  Each pod
190 |   # type supporting different applications.  Define a list of compute
191 |   # pods and there characteristics here.
192 |   computes:
193 |     # This is the start of a specific compute pod type.  The name
194 |     # given should be short and without spaces
195 |     - name: "RHEL7"
196 |     
197 |       # A meaningful description should be given to the pod.  It should
198 |       # describe what applications this pod is capable of running
199 |       description: "Compute pods for running app1 and app2"
200 | 
201 |       # The compute pods will provide the resources for running
202 |       # various workloads.  Resources listed here will be assigned
203 |       # to the pods in LSF
204 |       provider:
205 |         - rhel7
206 |         - app1
207 |         - app2
208 | 
209 |       # Define where to get the image from
210 |       image: "ibmcom/lsfce-comp:10.1.0.9r02"
211 |       imagePullPolicy: "Always"
212 | 
213 |       # Define the number of this type of pod you want to have running
214 |       replicas: 1
215 | 
216 |       # The placement variables control how the pods will be placed
217 |       placement:
218 |         # includeLabel  - Optional label to apply to hosts that 
219 |         #                 should be allowed to run the compute pod
220 |         includeLabel: ""
221 | 
222 |         # excludeLabel  - Is a label to apply to hosts to prevent 
223 |         #                 them from being used to host the compute pod
224 |         excludeLabel: "excludelsf"
225 | 
226 |         # Taints can be used to control which nodes are available 
227 |         # to the LSF and Kubernetes scheduler.  If used these 
228 |         # parameters are used to allow the LSF pods to run on 
229 |         # tainted nodes.  When not defined the K8s master nodes 
230 |         # will be used to host the master pod.
231 |         #
232 |         #  tolerateName  - Optional name of the taint that has been
233 |         #                  applied to a node
234 |         #  tolerateValue - The value given to the taint
235 |         #  tolerateEffect - The effect of the taint
236 |         #
237 |         tolerateName: ""
238 |         tolerateValue: ""
239 |         tolerateEffect: NoExecute
240 | 
241 |       resources:
242 |         requests:
243 |           cpu: "2"
244 |           memory: "1G"
245 |         limits:
246 |           cpu: "2"
247 |           memory: "1G"
248 |         # gpu - defines if the compute pod should be created with 
249 |         #       support for the GPU.  Valid values are "yes|no".
250 |         # If set to "yes", the NVIDIA___ environment variables will be
251 |         # set and the nodeSelectorTerms will include the
252 |         # openshift.com/gpu-accelerator key with operator Exists.
253 |         # The seLinuxOptions will set the type to 'nvidia_container_t'
254 |         # and the SYS_ADMIN capability will be added for GPU mode switching
255 |         gpu: "no"
256 | 
257 |       # The application running in the pods will typically need to get 
258 |       # access to the user data and even application binaries.  The 
259 |       # mountList provides a way to access directories from the OS
260 |       # running on the Kubernetes worker nodes.  This will be translated
261 |       # into hostPath volumeMounts for the pod to access. For example
262 |       # listing '/home' will cause /home from the worker OS to be 
263 |       # mounted in the running container, allowing users to access there
264 |       # home directory (assuming automounter is setup).
265 |       #mountList:
266 |       #  - /home
267 |       #  - /apps
268 | 
269 |     # Create other compute types here to support other applications.
270 |     # - name: "RHEL6"
271 |     #   description: "Compute pods for Fluent and Nastran"
272 |     #   image: "lsf-comp-rhel6:10.1.0.9"
273 |     #   imagePullPolicy: "Always"
274 |     #   replicas: 1
275 |     #   placement:
276 |     #     includeLabel: ""
277 |     #     excludeLabel: "excludelsf"
278 |     #   resources:
279 |     #     requests:
280 |     #       cpu: "2"
281 |     #       memory: "1G"
282 |     #     limits:
283 |     #       cpu: "2"
284 |     #       memory: "1G"
285 |     #     gpu: "no"
286 |       #mountList:
287 |       #  - /home
288 |       #  - /apps
289 |     #   provider:
290 |     #     - rhel6
291 |     #     - nastran
292 |     #     - fluent
293 |       
294 |       
295 |     # - name: "RHEL6wGPU"
296 |     #   description: "Compute pods for Fluent and Nastran"
297 |     #   image: "docker-registry.default.svc:5000/ibm-lsf-project/lsf-comp-rhel6:10.1.0.9"
298 |     #   imagePullPolicy: "Always"
299 |     #   replicas: 0
300 |     #   placement:
301 |     #     includeLabel: ""
302 |     #     excludeLabel: "excludelsf"
303 |     #     tolerateName: ""
304 |     #     tolerateValue: ""
305 |     #     tolerateEffect: NoExecute
306 |     #   resources:
307 |     #     requests:
308 |     #       cpu: "2"
309 |     #       memory: "1G"
310 |     #     limits:
311 |     #       cpu: "2"
312 |     #       memory: "1G"
313 |     #     gpu: "yes"
314 |       #mountList:
315 |       #  - /home
316 |       #  - /apps
317 |     #   provider:
318 |     #     - nastran
319 |     #     - rhel6
320 | 
321 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/README-pod-sched.md:
--------------------------------------------------------------------------------
  1 | [![IBM Spectrum Computing Technical Preview](https://github.com/IBMSpectrumComputing/lsf-hybrid-cloud/blob/master/Spectrum_icon.png)](https://www.ibm.com/support/knowledgecenter/SSWRJV/product_welcome_spectrum_lsf.html)
  2 | 
  3 | # IBM Spectrum LSF Deployed as an Enhanced Pod Scheduler 
  4 | 
  5 | **NOTE:  This is a technical preview and will expire Sept 30th, 2020**
  6 | 
  7 | ## Introduction
  8 | IBM Spectrum LSF Deployed as an **Enhanced Pod Scheduler** delivers three key capabilities:
  9 | * Effectively manages highly variable demands in workloads within a finite supply of resources
 10 | * Provides improved service levels for different consumers and workloads in a shared multitenant environment
 11 | * Optimizes the usage of expensive resources such as general-purpose graphics processing units (GPGPUs) to help ensure that they are allocated the most important work
 12 | 
 13 | ### Overview
 14 | **Enhanced Pod Scheduler** Technical Preview builds on IBM Spectrum Computing’s rich heritage in workload management and orchestration in demanding high performance computing and enterprise environments. With this strong foundation, **Enhanced Pod Scheduler** brings a wide range of workload management capabilities that include:
 15 | * Multilevel priority queues and pre-emption
 16 | * Fair share among projects and namespaces
 17 | * Resource reservation
 18 | * Dynamic load-balancing
 19 | * Topology-aware scheduling
 20 | * Capability to schedule GPU jobs with consideration for CPU or GPU topology
 21 | * Parallel and elastic jobs
 22 | * Time-windows
 23 | * Time-based configuration
 24 | * Advanced reservation
 25 | * Workflows
 26 | 
 27 | ### Improved workload prioritization and management
 28 | **Enhanced Pod Scheduler** adds robust workload orchestration and prioritization capabilities to Kubernetes clusters. 
 29 | Kubernetes provides an application platform for developing and managing on-premises, containerized applications. 
 30 | While the Kubernetes scheduler employs a basic “first come, first served" method for processing workloads, 
 31 | **Enhanced Pod Scheduler** enables organizations to effectively prioritize and manage workloads based on business priorities and objectives. 
 32 | 
 33 | ### Key capabilities of IBM Spectrum Computing Technical Preview
 34 | #### Workload Orchestration
 35 | Kubernetes provides effective orchestration of workloads if there is capacity. 
 36 | In the public cloud, the environment can usually be enlarged to help ensure that there is always capacity in response to workload demands. 
 37 | However, in an on-premises deployment of Kubernetes, resources are ultimately finite. 
 38 | For workloads that dynamically create Kubernetes pods (such as Jenkins, Jupyter Hub, Apache Spark, TensorFlow, ETL, and so on), 
 39 | the default "first come, first served" orchestration policy is not enough to help ensure that important business workloads process first or get resources before less important workloads. 
 40 | **Enhanced Pod Scheduler** prioritizes access to the resources for key business processes and lower priority workloads are queued until resources can be made available.
 41 | 
 42 | #### Service Level Management  
 43 | In a multitenant environment where there is competition for resources, workloads (users, user groups, projects, and namespaces) can be assigned to different service levels that help ensure the right workload gets access to the right resource at the right time. This function prioritizes workloads and allocates a minimum number of resources for each service class. In addition to service levels, workloads can also be subject to prioritization and multilevel fair share policies, which maintain correct prioritization of workloads within the same Service Level Agreement (SLA). 
 44 | 
 45 | #### Resource Optimization
 46 | Environments are rarely homogeneous. There might be some servers with additional memory, or some might have GPGPUs or additional capabilities. Running workloads on these servers that do not require those capabilities can block or delay workloads that do require additional functions. **Enhanced Pod Scheduler** provides multiple polices such as multilevel fair share and service level management, enabling the optimization of resources based on business policy rather than by users competing for resources.
 47 | 
 48 | 
 49 | # **Enhanced Pod Scheduler** Job Scheduler Spec Reference and Examples
 50 | This section outlines how to use the new capabilities.
 51 | 
 52 | Additional examples and the most current pod specification annotations are available [here.](https://github.com/IBMSpectrumComputing/lsf-kubernetes)
 53 | Questions can be posted but do not post any confidential information.
 54 | 
 55 | More information on the LSF job submission options and configuration can be found [here](https://www.ibm.com/support/knowledgecenter/SSWRJV_10.1.0/lsf_welcome/lsf_kc_cluster_ops.html).
 56 | 
 57 | ## Job Scheduler Spec Reference
 58 | Deploying the **Enhanced Pod Scheduler** enables job control extensions for the pods started in any namespace.  The table below lists the pod spec fields that are available:
 59 | 
 60 | | Pod Spec Field                  | Description                            | LSF job submission option |
 61 | | ------------------------------- | ------------------------------------   | ----------------- |
 62 | | `*metadata.name`                | A name to assign to the job            | `Job Name (-J)`  |
 63 | | `++lsf.ibm.com/project`         | A project name to assign to job        | `Project Name (-P)`  |
 64 | | `++lsf.ibm.com/application`     | An application profile to use          | `Application Profile (-app)`|
 65 | | `++lsf.ibm.com/gpu`             | The GPU requirements for the job       | `GPU requirement (-gpu)`  |
 66 | | `++lsf.ibm.com/queue`           | The name of the job queue to run the job in | `Queue (-q)`   |
 67 | | `++lsf.ibm.com/jobGroup`        | The job group to put the job in        | `Job Group (-g)`  |
 68 | | `++lsf.ibm.com/fairshareGroup`  | The fair share group to use to share resources between jobs | `Fair share Group (-G)`  |
 69 | | `++lsf.ibm.com/user`            | The user to run applications as, and for accounting  | `Job submission user`  |
 70 | | `++lsf.ibm.com/reservation`     | Reserve the resources prior to running job | `Advanced Reservation (-U)`  |
 71 | | `++lsf.ibm.com/serviceClass`    | The jobs service class                 | `Service Class (-sla)`  |
 72 | | `spec.containers[].resources.requests.memory` | The amount of memory to reserve for the job | `Memory Reservation (-R "rusage[mem=...]")` |
 73 | | `*spec.schedulerName`           | Set to "lsf"                           | N/A |
 74 | 
 75 | **NOTE:  * - in pod specification section:  spec.template, ++ - in pod specification section:  spec.template.metadata.annotations**
 76 | 
 77 | These capabilities are accessed by modifying the pod specifications for jobs.  Below are some samples of how to configure jobs to access the new capabilities.
 78 | 
 79 | ### Job Scheduler Example 1 
 80 | This example uses the new scheduler for the placement of the workload.  The placement request will be routed to the LSF scheduler for queuing and placement.  
 81 | 
 82 | ```yaml
 83 | apiVersion: batch/v1
 84 | kind: Job
 85 | metadata:
 86 |   name: myjob-k8s-115
 87 | spec:
 88 |   template:
 89 |     metadata:
 90 |       name: myjob-001
 91 |     spec:
 92 |       schedulerName: lsf        # This directs scheduling to the LSF Scheduler
 93 |       containers:
 94 |       - name: ubuntutest
 95 |         image: ubuntu
 96 |         command: ["sleep", "60"]
 97 |         resources:
 98 |           requests:
 99 |             memory: 5Gi
100 |       restartPolicy: Never
101 | ```
102 | Here we have just told Kubernetes to use **lsf** as the job scheduler.  The LSF job scheduler can 
103 | then apply its policies to choose when and where the job will run.
104 | 
105 | ### Job Scheduler Example 2
106 | Additional parameters can be added to the pod yaml file to control the job.  The example below adds 
107 | some additional annotations for controlling the job.  The `lsf.ibm.com/queue: "normal"` tells the scheduler to use the `normal` queue.  By default, there are four queues available:
108 | - priority - This is for high priority jobs
109 | - normal - This is for normal jobs
110 | - idle - These are for jobs that can only run if there are idle resources
111 | - night - These are for jobs that are only allowed to run at night
112 | 
113 | Additional queues can be added by modifying the **lsb.queues** configMap.
114 | 
115 | The `lsf.ibm.com/fairshareGroup: "gold"` tells the scheduler which fair share group this job belongs to.  By default, the following groups have been configured:
116 | - gold
117 | - silver
118 | - bronze
119 | 
120 | These groups allow the user to modify how the resources are shared.  Some groups may have a higher allocation of resources and can use a better fairshareGroup.
121 |  
122 | ```yaml
123 | apiVersion: batch/v1
124 | kind: Job
125 | metadata:
126 |   name: myjob-001
127 | spec:
128 |   template:
129 |     metadata:
130 |       name: myjob-001
131 |       # The following annotations provide additional scheduling
132 |       # information to better place the pods on the worker nodes
133 |       # NOTE:  Some of these require additional configuration to work
134 |       annotations:
135 |         lsf.ibm.com/project: "big-project-1000"
136 |         lsf.ibm.com/queue: "normal"
137 |         lsf.ibm.com/jobGroup: "/my-group"
138 |         lsf.ibm.com/fairshareGroup: "gold"
139 |     spec:
140 |       # This directs scheduling to the LSF Scheduler
141 |       schedulerName: lsf
142 |       containers:
143 |       - name: ubuntutest
144 |         image: ubuntu
145 |         command: ["sleep", "60"]
146 |       restartPolicy: Never
147 | ```
148 | In the example above the annotations provide the LSF scheduler more information about the job and how it should be run.  
149 | 
150 | ### Important Example About Pod Users 
151 | Users that submit a job through Kubernetes typically are trusted to run services 
152 | and workloads as other users.  For example, the pod specifications allow the pod to run as other users e.g.
153 | ```yaml
154 | apiVersion: batch/v1
155 | kind: Job
156 | metadata:
157 |   name: myjob-uid1003-0002
158 | spec:
159 |   template:
160 |     metadata:
161 |       name: myjob-uid1003-0002
162 |     spec:
163 |       schedulerName: lsf
164 |       containers:
165 |       - name: ubuntutest
166 |         image: ubuntu
167 |         command: ["id"]
168 |       restartPolicy: Never
169 |       securityContext:
170 |         runAsUser: 1003
171 |         fsGroup: 100
172 |         runAsGroup: 1001
173 | ```
174 | In the above example the pod would run as UID 1003, and produce the following output:
175 | ```sh
176 | uid=1003(billy) gid=0(root) groups=0(root),1001(users)
177 | ``` 
178 | **Note the GID and groups.**  
179 | Care should be taken to limit who can create pods.  LSF administrators can safely allow users to run pods using the LSF Connector for Kubernetes.  
180 | This allows users to launch pod jobs on Kubernetes without exposing the Kubernetes `runAsUser` feature.
181 | 
182 | ## Parallel Jobs
183 | The chart includes a new Custom Resource Definition (CRD) for parallel jobs.  This simplifies the creation of parallel jobs in Kubernetes.  The ParallelJob CRD describes the resource requirements for parallel jobs with multiple tasks on K8s.
184 | The ParallelJob controller daemon is responsible to create separate Pods for each task described
185 | in the ParallelJob CRD. 
186 | 
187 | The CRD supports both job-level and task-level scheduling terms which can satisfy common scheduling 
188 | needs over all the Pods in the same job or individual need for each Pod. At the same time, one
189 | can also specify all the Pod Spec policies for the Pod defined in the ParallelJob CRD.
190 | 
191 | ### Job level terms
192 | 
193 | ParallelJob CRD supports the following job-level terms to describe the resource requirements apply for 
194 | all the Pods in the same parallel job.
195 | 
196 | * spec.description: the human readable description words attached to the parallel job
197 | * spec.resizable: the valid values are "true" or "false", which determines whether the Pods in the parallel job should be co-scheduling together. Specifically, a resizable job can be started with a few Pods got enough resources, while a non-resizable job must get enough resources for all the Pods before starting any Pods.
198 | * spec.headerTask: typical parallel jobs (e.g. Spark, MPI, Distributed TensorFlow) run a "driver" task to co-ordinate or work as a central sync point for the left tasks. This term can be used to specify the name of such "driver" task in a parallel job. It will make sure the header task can be scheduled and started before or at the same time with other non-header tasks.
199 | * spec.placement: this term supports multiple sub-terms which can satisfy various task distribution policies, such as co-allocating multiple tasks on the same host or zone, or evenly distribute the same number of tasks across allocated hosts. This term can be defined in both job-level and task-group level.
200 | 
201 | Currently, this term supports the following placement policies. The example defines a "same" policy in job-level to enforce all the tasks belong to the parallel job co-allocated to the nodes in the same zone. 
202 | 
203 | ```
204 | sameTerm: node | rack | zone
205 | spanTerms:
206 | - topologyKey: node
207 |   taskTile: #tasks_per_topology 
208 | ```
209 | To use the topology keys, you must define the following host-based resources in your LSF configuration files. Examples are as follows.
210 | 
211 | lsf.shared:
212 | 
213 | ```
214 | Begin Resource
215 | RESOURCENAME  TYPE    INTERVAL INCREASING  DESCRIPTION
216 | ...
217 | kube_name     String  ()       ()          (Kubernetes node name)
218 | rack_name     String  ()       ()          (Kubernetes node rack name)
219 | zone_name     String  ()       ()          (Kubernetes node zone name)    
220 | End Resource
221 | ```
222 | 
223 | lsf.cluster:
224 | ```
225 | Begin   Host
226 | HOSTNAME    model  type  server  RESOURCES
227 | ...
228 | ICPHost01  !      !     1       (kube_name=172.29.14.7 rack_name=blade1 zone_name=Florida)
229 | End Host
230 | ```
231 | 
232 | * spec.priority: this term is used to specify job priority number which can rank the parallel job with other jobs submitted by the same user. The default maximum number can be supported by LSF is 100.    
233 | 
234 | ### Task level terms
235 | 
236 | The tasks are grouped by the common resource requirements of replicas. 
237 | 
238 | * spec.taskGroups[].spec.replica: this term defines the number of tasks in current task group
239 | * spec.taskGroups[].spec.placement: this term shares the same syntax with the one defined at job level. 
240 | The second task group in the example defines an alternative "span" like placement policy, which can either put 4 replicas across two nodes or on the same node.
241 | * spec.taskGroups[].spec.template.spec: the Pod Spec shares the same syntax supported by your K8s cluster. For example, you can specify the nodeSelector to filter node labels during scheduling.
242 | 
243 | ### LSF specific annotations
244 | 
245 | The annotations defined at job-level can support job control extensions with prefix of "lsf.ibm.com/" listed in [here](#Job-Scheduler-Spec-Reference). The resource requirements conflict of the following extensions is described as follows.
246 | 
247 | * lsf.ibm.com/gpu: Number of GPUs to be requested on each host (-gpu). This term will be ignored when the Pod explicitly request nvidia.com/gpu resource in ParallelJob CRD.
248 | * lsf.ibm.com/minCurrent: Not supported by ParallelJob CRD. All of replicas must get allocation at the same time for non-resizable job. For resizable job, once header task got allocation, the job can be started no matter whether other tasks can get allocation at
249 | the same time.
250 | 
251 | ## Submit ParallelJob CRD
252 | 
253 | The following example submission script describes a parallel job which have two replicas (tasks) in total.
254 | 
255 | ```
256 | $ cat example.yaml
257 | apiVersion: ibm.com/v1alpha1
258 | kind: ParallelJob
259 | metadata:
260 |   name: double-tasks-parallel
261 |   namespace: default
262 |   labels:
263 |     lable1: example2
264 | spec:
265 |   name: double-tasks-parallel
266 |   description: This is a parallel job with two tasks to be running on the same node.
267 |   headerTask: group0
268 |   priority: 100
269 |   schedulerName: lsf
270 |   taskGroups:
271 |   - metadata:
272 |       name: group0
273 |     spec:
274 |       placement:
275 |         sameTerm: node
276 |         spanTerms:
277 |         - topologyKey: node
278 |           taskTile: 2
279 |       replica: 2
280 |       template:
281 |         spec:
282 |           containers:
283 |           - args:
284 |             image: ubuntu
285 |             command: ["sleep", "30"]
286 |             name: task1
287 |             resources:
288 |               limits:
289 |                 cpu: 1
290 |               requests:
291 |                 cpu: 1
292 |                 memory: 200Mi
293 |           restartPolicy: Never
294 | ```
295 | 
296 | Sample jobs may also be found on GitHub: https://github.com/IBMSpectrumComputing/lsf-kubernetes
297 | 
298 | 
299 | ### Monitor ParallelJob CRD
300 | 
301 | Use the following command to monitor the status of a parallel job submitted using ParallelJob CRD. It will give the Job Status together with the counters of its Pods in various Pod phases as Task Status.
302 | 
303 | When the Job is in Pending status, the command shows the Job Pending Reason of corresponding LSF control job.
304 | 
305 | ```
306 | > kubectl describe pj
307 | Name:         parallel-job
308 | Namespace:    default
309 | Annotations:  <none>
310 | API Version:  ibm.com/v1alpha1
311 | Kind:         ParallelJob
312 | ...
313 | ...
314 | Status:
315 |   Job Pending Reason:  "New job is waiting for scheduling;"
316 |   Job Status:          Pending
317 |   Task Status:
318 |     Unknown:    0
319 |     Failed:     0
320 |     Pending:    5
321 |     Running:    0
322 |     Succeeded:  0
323 | ```
324 | 
325 | The LSF control job ID is attached as a Pod label named lsf.ibm.com/jobId on each Pod. Several special Pod labels are attached to record the information of its parallel job belongs to.
326 | 
327 | ```
328 | > kubectl describe po
329 | Name:               double-tasks-parallel-kflb9
330 | Namespace:          default
331 | Priority:           0
332 | PriorityClassName:  <none>
333 | Node:               <none>
334 | Labels:             controller-uid=de751862-9114-11e9-864a-3440b5c56250
335 |                     lsf.ibm.com/jobId=2762
336 |                     parallelJob.name=double-tasks-parallel
337 |                     parallelJob.taskGroup.index=1
338 |                     parallelJob.taskGroup.name=group1
339 | Annotations:        lsf.ibm.com/pendingReason: "New job is waiting for scheduling;"
340 | ...
341 | ...
342 | ```
343 | 
344 | ## Host Maintenance
345 | It may be necessary to remove a machine from operation, perhaps to apply patches to the Operating System.
346 | To do this it is necessary to stop the machine from accepting any new workload.  This is done by running:
347 | ```sh
348 | kubectl drain --ignore-daemonsets {Name of Node}
349 | ```
350 | If you check the node status it will look something like:
351 | ```sh
352 | 10.10.10.12   Ready,SchedulingDisabled   worker                            5d1h   v1.12.4+-ee
353 | ```
354 | The **SchedulingDisabled** status indicates that the scheduler will ignore this host.
355 | 
356 | Once the maintenance is complete the machine can be returned to use by running:
357 | ```sh
358 | kubectl uncordon {Name of Node}
359 | ```
360 | The **SchedulingDisabled** status will be removed from the machine and pods will be scheduled on it.
361 | 
362 | 
363 | ## Backups
364 | Configuration and state information is stored in the persistent volume claim.  
365 | Backups of that data should be performed periodically.  The state information 
366 | can become stale very fast as users work is submitted and finished.  Some
367 | job state data will be lost for jobs submitted between the last backup and 
368 | current time.
369 | 
370 | > A reliable filesystem is critical to minimize job state loss.
371 | 
372 | Dynamic provisioning of the persistent volume is discouraged because of the difficulty
373 | in locating the correct resource to backup.  Pre-creating a persistent volume claim,
374 | or labeling a persistent volume, for the deployment to use provides the easiest 
375 | way to locates the storage to backup. 
376 | 
377 | Restoring from a backup will require restarting the manager processes.  Use the procedure
378 | below to reconfigure the entire cluster after restoring files.
379 | 1. Locate the master pod by looking for **master** in the list of pods e.g.
380 | ```
381 | $ kubectl get pods |grep master
382 | lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-84gcj   1/1     Running   0          3d19h
383 | ```
384 | 
385 | 2. Connect to the management pod e.g.
386 | ```
387 | $ kubectl exec -ti lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-84gcj bash
388 | ```
389 | 
390 | 3. Run the command to re-read the configuration files
391 | ```
392 | LSF POD [root@lsfmaster /]# cd /opt/ibm/lsfsuite/lsf/conf 
393 | LSF POD [root@lsfmaster /opt/ibm/lsfsuite/lsf/conf]# ./trigger-reconfig.sh
394 | ```
395 | 
396 | 4. Wait for a minute and try some commands to see if the cluster is functioning okay e.g.
397 | ```
398 | LSF POD [root@lsfmaster /]# lsid
399 | IBM Spectrum LSF Connetor for Kubernetes 10.1.0.0, Oct 17 2019
400 | Copyright International Business Machines Corp. 1992, 2016.
401 | US Government Users Restricted Rights - Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
402 | 
403 | My cluster name is myCluster
404 | My master name is lsfmaster
405 | ```
406 |  > Command should report the software versions and manager hostname.
407 | 
408 | ```
409 | LSF POD [root@lsfmaster /]# bhosts
410 | HOST_NAME          STATUS       JL/U    MAX  NJOBS    RUN  SSUSP  USUSP    RSV
411 | lsfmaster          closed          -      0      0      0      0      0      0
412 | worker-10-10-10-10 ok              -      -      0      0      0      0      0
413 | worker-10-10-10-11 ok              -      -      0      0      0      0      0
414 | worker-10-10-10-12 ok              -      -      0      0      0      0      0
415 | ```
416 |  > Host status should be **ok**, except for the lsfmaster, which will be **closed**.
417 | 
418 | ```
419 | LSF POD [root@lsfmaster /]# bqueues
420 | QUEUE_NAME      PRIO STATUS          MAX JL/U JL/P JL/H NJOBS  PEND   RUN  SUSP
421 | priority         43  Open:Active       -    -    -    -     0     0     0     0
422 | normal           30  Open:Active       -    -    -    -     0     0     0     0
423 | idle             20  Open:Active       -    -    -    -     0     0     0     0
424 | night             1  Open:Inact        -    -    -    -     0     0     0     0
425 | ```
426 |  > Queues should be open.
427 | 
428 | 
429 | ## Changing the Schedulers Configuration
430 | The scheduler stores policy configuration in the persistent volume claim used by the manager pod.  
431 | Additional configuration for the queues and fairshareGroups is stored in configMaps.
432 | The default configuration can be changed, and information about the file formats is available [here.](https://www.ibm.com/support/knowledgecenter/SSWRJV_10.1.0/lsf_welcome/lsf_kc_cluster_ops.html)
433 | 
434 | What follows is an overview of how to change both the configuration stored in the persistent volume claim, and the configMaps.
435 | 
436 | ### Changing Configuration Files
437 | Changing the scheduler configuration requires:
438 | * Connecting to the manager pod
439 | * Changing the configuration file(s)
440 | * Reconfiguring the scheduler
441 | 
442 | To connect the manager pod, use the following procedure:
443 | 1. Locate the master pod by looking for **master** in the list of pods e.g.
444 | ```
445 | $ kubectl get pods --namespace {Namespace used to deploy chart} |grep ibm-spectrum-computing-prod-master
446 | lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-84gcj   1/1     Running   0          3d19h
447 | ```
448 | 
449 | 2. Connect to the management pod e.g.
450 | ```
451 | $ kubectl exec -ti lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-84gcj bash
452 | ```
453 | 
454 | The configuration files are located in: `/opt/ibm/lsfsuite/lsf/conf`
455 | 
456 | The directory has the following files in it:
457 | ```
458 |  conf/cshrc.lsf
459 |  conf/profile.lsf
460 |  conf/hosts
461 |  conf/lsf.conf                            <-- This is exposed as a configmap
462 |  conf/lsf.cluster.myCluster
463 |  conf/lsf.shared
464 |  conf/lsf.task
465 |  conf/lsbatch/myCluster/configdir/lsb.users     <-- This is exposed as a configmap
466 |  conf/lsbatch/myCluster/configdir/lsb.nqsmaps
467 |  conf/lsbatch/myCluster/configdir/lsb.reasons
468 |  conf/lsbatch/myCluster/configdir/lsb.hosts     <-- This is exposed as a configmap
469 |  conf/lsbatch/myCluster/configdir/lsb.serviceclasses
470 |  conf/lsbatch/myCluster/configdir/lsb.resources <-- This is exposed as a configmap
471 |  conf/lsbatch/myCluster/configdir/lsb.modules
472 |  conf/lsbatch/myCluster/configdir/lsb.threshold
473 |  conf/lsbatch/myCluster/configdir/lsb.applications  <-- This is exposed as a configmap
474 |  conf/lsbatch/myCluster/configdir/lsb.globalpolicies
475 |  conf/lsbatch/myCluster/configdir/lsb.params
476 |  conf/lsbatch/myCluster/configdir/lsb.paralleljobs  <-- This is exposed as a configmap
477 |  conf/lsbatch/myCluster/configdir/lsb.queues    <-- This is exposed as a configmap
478 | ```
479 | 
480 | **NOTE:  Do not directly edit the configmap files, otherwise you will lose your changes.**
481 | 
482 | Find the file you want to change and modify it.
483 | 
484 | After changing the configuration files(s) it is necessary to trigger the scheduler to re-read the configuration.  
485 | This will not affect running or pending workload.  From within the management pod do the following:
486 | 
487 | 1. Run the command to reconfigure the base
488 | ```
489 | LSF POD [root@lsfmaster /]# lsadmin reconfig
490 | 
491 | Checking configuration files ...
492 | 
493 | No errors found.
494 | 
495 | Restart only the master candidate hosts? [y/n] y
496 | Restart LIM on <lsfmaster> ...... done
497 | 
498 | ```
499 | To reconfigure the base on all nodes use:
500 | ```
501 | LSF POD [root@lsfmaster /]# lsadmin reconfig all
502 | ```
503 | 
504 | 2. Run the command to re-read the scheduler configuration.
505 | ```
506 | LSF POD [root@lsfmaster /]# badmin mbdrestart
507 | 
508 | Checking configuration files ...
509 | 
510 | There are warning errors.
511 | 
512 | Do you want to see detailed messages? [y/n] y
513 | Apr 22 13:14:49 2019 22437 4 10.1 orderQueueGroups(): File /opt/ibm/lsfsuite/lsf/conf/lsbatch/myCluster/configdir/lsb.queues: Priority value <20> of queue <night> falls in the range of priorities defined for the queues that use the same cross-queue fairshare/absolute priority scheduling policy. The priority value of queue <night> has been set to 1
514 | ---------------------------------------------------------
515 | No fatal errors found.
516 | Warning: Some configuration parameters may be incorrect.
517 |          They are either ignored or replaced by default values.
518 | 
519 | Do you want to restart MBD? [y/n]
520 | ```
521 | 
522 | Here we see there is an error.  The initial configuration will not have errors, but it is instructive to see what they might look like.
523 | 
524 | 3. If errors are seen, correct them, and retry the command to check that
525 | the errors have been corrected.
526 | 
527 | 
528 | ### Changing the ConfigMap Files
529 | Several configuration files are exposed as configMaps.  They are:
530 | * **lsf.conf** - This is the configuration file for the scheduler
531 | * **lsb.applications** - This contains the application templates that simplify the submission of jobs
532 | * **lsb.hosts** - This contians the host properties 
533 | * **lsb.paralleljobs** - This contains the parameters used by parallel jobs
534 | * **lsb.queues** - This contains the queue definitions
535 | * **lsb.resources** - This contains the resource definitions
536 | * **lsb.users** - This contains the users and user groups for configuring fairshare
537 | 
538 | They can be edited in the GUI, or using the following commands:
539 | ```bash
540 | $ kubectl get configmap
541 | ```
542 | This will list all the config maps. 
543 | ```bash
544 | $ kubectl edit configmap lsf-ibm-spectrum-computing-prod-queues
545 | ```
546 | ** NOTE: You will see additional metadata associated with the configmap.  Do not change this. **  
547 | 
548 | Changes to the configMaps will be automatically applied to the cluster.  Errors in the
549 | configMaps will cause the scheduler to revert to a default configuration.  To check
550 | for errors use the procedure in the above section to test for errors, but remember that
551 | changes to the **lsb.users** and **lsb.queues** have to be done by editing the configmap.
552 | 
553 | 
554 | ## Upgrading the Cluster
555 | Upgrading the cluster requires several steps to ensure that there is little disruption to the running pods.
556 | Use the following procedure:
557 | 
558 | 1. Determine the master pod and connect to it.
559 | ```
560 | $ kubectl get pods |grep master
561 | lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-99999   1/1     Running   0          3d19h
562 | $ kubectl exec -ti lsf-ibm-spectrum-computing-prod-master-56b55d6dc8-99999 bash
563 | ```
564 | 
565 | 2. List the queues in the cluster with:
566 | ```
567 | LSF POD [root@lsfmaster /]# bqueues
568 | QUEUE_NAME      PRIO STATUS          MAX JL/U JL/P JL/H NJOBS  PEND   RUN  SUSP
569 | priority         43  Open:Active       -    -    -    -     0     0     0     0
570 | normal           30  Open:Active       -    -    -    -     0   153   570     0
571 | idle             20  Open:Active       -    -    -    -     0     0     0     0
572 | night            15  Open:Inact        -    -    -    -     0     0     0     0
573 | ```
574 | 
575 | 3. Close the queues to stop new pods from starting
576 | ```
577 | LSF POD [root@lsfmaster /]# badmin qclose {Name of Queue}
578 | ```
579 | Repeat this for all the queues.
580 | 
581 | 4. Watch the number of running jobs by running the **bqueues** command.
582 | ```
583 | LSF POD [root@lsfmaster /]# bqueues
584 | QUEUE_NAME      PRIO STATUS          MAX JL/U JL/P JL/H NJOBS  PEND   RUN  SUSP
585 | priority         43  Open:Active       -    -    -    -     0     0     0     0
586 | normal           30  Open:Active       -    -    -    -     0   397     0     0
587 | idle             20  Open:Active       -    -    -    -     0     0     0     0
588 | night            15  Open:Inact        -    -    -    -     0     0     0     0
589 | ```
590 | Wait for the number of **RUN** jobs to drop to 0.
591 | 
592 | 5. Once enough of the pods have finished the cluster can be upgraded.  
593 | 
594 | 6. Once the cluster has been upgraded connect to the master pod and check the queue state.  If needed reopen the queues with:
595 | ```
596 | LSF POD [root@lsfmaster /]# badmin qopen {Name of Queue}
597 | ```
598 | 
599 | 
600 | ## Copyright and trademark information
601 | © Copyright IBM Corporation 2019
602 | U.S. Government Users Restricted Rights - Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
603 | IBM®, the IBM logo and ibm.com® are trademarks of International Business Machines Corp., registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the Web at "Copyright and trademark information" at [www.ibm.com/legal/copytrade.shtml](https://www.ibm.com/legal/copytrade.shtml).
604 | 
605 | 
606 | 


--------------------------------------------------------------------------------
/doc/LSF_Operator/README.md:
--------------------------------------------------------------------------------
  1 | [![IBM Spectrum LSF](https://github.com/IBMSpectrumComputing/lsf-hybrid-cloud/blob/master/Spectrum_icon.png)](https://www.ibm.com/support/knowledgecenter/SSWRJV/product_welcome_spectrum_lsf.html)
  2 | 
  3 | # IBM Spectrum LSF
  4 | 
  5 | ## Introduction
  6 | IBM Spectrum LSF (LSF) is a powerful workload management platform for demanding, distributed HPC environments.  It provides a comprehensive set of intelligent, policy-driven scheduling features that enables full utilization of your compute infrastructure resources and ensure optimal application performance.
  7 | 
  8 | **NOTE:  This is a technical preview.  It will expire September 30th, 2020**
  9 | 
 10 | ### Overview
 11 | LSF Technical Preview builds on IBM Spectrum Computings rich heritage in workload management and orchestration in demanding high performance computing and enterprise environments.  With this strong foundation, LSF Technical Preview brings a wide range of workload management capabilities that include:
 12 | * Multilevel priority queues and pre-emption
 13 | * Fair share among projects and namespaces
 14 | * Resource reservation
 15 | * Dynamic load-balancing
 16 | * Topology-aware scheduling
 17 | * Capability to schedule GPU jobs with consideration for CPU or GPU topology
 18 | * Parallel and elastic jobs
 19 | * Time-windows
 20 | * Time-based configuration
 21 | * Advanced reservation
 22 | * Workflows
 23 | 
 24 | LSF is deployed by the LSF Operator.  The LSF Operator can deploy fully functional LSF clusters on Kubernetes, or it can deploy a modified LSF cluster that provides enhanced scheduling capabilities in Kubernetes including parallel job support.  The LSF operator will install LSF Community Edition (CE).  LSF CE is a free version of LSF.  An unrestricted version is also available.  Email:  LSF-Inquiry@ca.ibm.com  for more information.
 25 | 
 26 | 
 27 | #### LSF on Kubernetes
 28 | LSF is a powerful workload management system for distributed computing environments.  LSF provides a comprehensive set of intelligent, policy-driven scheduling features that enable you to utilize 
 29 | all your compute infrastructure resources and ensure optimal application performance.
 30 | 
 31 | The LSF Operator can deploy multiple LSF CE clusters on Kubernetes.  The LSF Cluster also includes a restricted version of LSF Application Center.  Application Center provides a Graphical User Interface (GUI) that users can use to submit and manage jobs.  Once deployed users can login to the GUI using there normal UNIX account.  There home directories and application directories are made available so users can easily run the applications they want.     
 32 | 
 33 | The LSF cluster can support multiple Linux OS's, and tools are provided [here](https://github.com/IBMSpectrumComputing/lsf-kubernetes) to assist in building custom images to support your applications.
 34 | 
 35 | 
 36 | #### Enhanced Pod Scheduler
 37 | The Enhanced pod scheduler deployment of LSF Technical Preview adds robust workload orchestration and prioritization capabilities to Kubernetes clusters. Kubernetes provides an application platform for developing and managing on-premises, containerized applications.  While the Kubernetes scheduler employs a basic “first come, first served" method for processing workloads, LSF enables organizations to effectively prioritize and manage workloads based on business priorities and objectives.  It provides the following key capabilities:
 38 | 
 39 | ##### Workload Orchestration  
 40 | Kubernetes provides effective orchestration of workloads if there is capacity.  In the public cloud, the environment can usually be enlarged to help ensure that there is always capacity in response to workload demands.  However, in an on-premises deployment of Kubernetes, resources are ultimately finite.  For workloads that dynamically create Kubernetes pods (such as Jenkins, Jupyter Hub, Apache Spark, TensorFlow, ETL, and so on), the default "first come, first served" orchestration policy is not sufficient to help ensure that important business workloads process first or get resources before less important workloads.  LSF Technical Preview prioritizes access to the resources for key business processes and lower priority workloads are queued until resources can be made available.
 41 | 
 42 | ##### Service Level Management
 43 | In a multitenant environment where there is competition for resources, workloads (users, user groups, projects, and namespaces) can be assigned to different service levels that help ensure the right workload gets access to the right resource at the right time.  This function prioritizes workloads and allocates a minimum number of resources for each service class.  In addition to service levels, workloads can also be subject to prioritization and multilevel fair share policies, which maintain correct prioritization of workloads within the same Service Level Agreement (SLA). 
 44 | 
 45 | ##### Resource Optimization
 46 | Environments are rarely homogeneous. There might be some servers with additional memory, or some might have GPGPUs or additional capabilities.  Running workloads on these servers that do not require those capabilities can block or delay workloads that do require additional functions.  LSF Technical Preview provides multiple polices such as multilevel fair share and service level management, enabling the optimization of resources based on business policy rather than by users competing for resources.
 47 | 
 48 | **NOTE: Only one instance of the Enhanced Pod Scheduler can be deployed at a time, because it extends the Kubernetes APIs with new functions.**
 49 | 
 50 | 
 51 | ## Details
 52 | The LSF cluster is deployed by the LSF operator.  One operator can deploy one LSF cluster in the same namespace.  When deploying an LSF on Kubernetes cluster it will create the following items in the namespace: 
 53 | * An LSF master pod
 54 | * An LSF GUI pod (For LSF on Kubernetes clusters)
 55 | * A variety of LSF compute pods (limited to 8 in CE)
 56 | The cluster configuration supports the edition of multiple application and data directories to the pods.  The LSF cluster can also be configured to allow users to login to the LSF GUI and submit work as them selves. 
 57 | 
 58 | When deploying an Enhanced Pod Scheduler, it will create the following items in the namespace: 
 59 | * An LSF master pod
 60 | * LSF compute pods on all available worker nodes (limited to 9 in CE)
 61 | 
 62 | 
 63 | ## Prerequisites
 64 | The following are needed to deploy an LSF cluster:
 65 | * Cluster Administrator access
 66 | * A persistent volume for LSF state storage.
 67 | 
 68 | For the LSF on Kubernetes clusters a secret is needed for storing the database password.
 69 | For the LSF on Kubernetes clusters the following are recommended: 
 70 | * LDAP/NIS/YP authentication server
 71 | * Persistent Volumes for users home directories
 72 | * Persistent Volumes for application binaries
 73 | 
 74 | ## Resources Required
 75 | The resources needed will depend on which type of installation is deployed.  For LSF on Kubernetes LSF manages the resources that it is provided.  Those resources will also govern the number and size of jobs it can concurrently run.  The resources that are provided to the compute pods will be available for running jobs.  The compute pods should be as large as possible.  The minimal requirements for an LSF on Kubernetes cluster are:
 76 | * Operator Pod:
 77 |   - 256 MB of RAM
 78 |   - 200m of CPU
 79 | * LSF Master Pod:
 80 |   - 1 GB of RAM
 81 |   - 1 CPU core
 82 | * LSF GUI Pod:
 83 |   - 16 GB or RAM
 84 |   - 2 CPU cores
 85 | * LSF Compute Pod:
 86 |   - 1 GB of RAM
 87 |   - 2 CPU cores
 88 | 
 89 | A recommended cluster would be the same except for the Compute pods where they should be larger.  For example: 
 90 | * 32 GB of RAM
 91 | * 16 CPU cores
 92 | 
 93 | Production versions would use many more compute pods with even larger CPU and memory allocations. 
 94 | 
 95 | When deploying LSF as an Enhanced Pod Scheduler the workload is Kubernetes pods, and do not run inside of the LSF Compute Pods.  In this type of deployment, the Enhanced Pod Scheduler should be deployed with minimal resources for example:
 96 | * Operator Pod:
 97 |   - 256 MB of RAM
 98 |   - 200m of CPU
 99 | * LSF Master Pod:
100 |   - 8 GB of RAM
101 |   - 4 CPU core
102 | * LSF Compute Pod:
103 |   - 1 GB of RAM
104 |   - 200m CPU cores
105 | 
106 | 
107 | ## Limitations
108 | This operator deploys LSF Community Edition (CE). CE is limited to 10 pods with no more than 64 cores per pod.
109 | 
110 | No encryption of the data at rest or in motion is provided by this deployment.  It is up to the administrator to configure storage encryption and IPSEC to secure the data.
111 | 
112 | **NOTE:  The CPU resources should use the same values for the resource.request.cpu and resource.limit.cpu.  Likewise for resource.request.memory and resource.limit.memory.  This is so the pods have a guaranteed QOS.**
113 | 
114 | # SecurityContextConstraints Requirements
115 | LSF on OpenShift requires the [`privileged`](https://ibm.biz/cpkspec-scc) Security Context Constraint (SCC), however a tighter SCC is provided below.  It is recommended instead of `privileged`.  The custom SecurityContextConstraints below should be used where possible:
116 | ```
117 | # Security Context Constraint for WMLA Pod Scheduler
118 | allowHostDirVolumePlugin: true
119 | allowHostIPC: false
120 | allowHostNetwork: false
121 | allowHostPID: false
122 | allowHostPorts: false
123 | allowPrivilegeEscalation: true
124 | allowPrivilegedContainer: false
125 | allowedCapabilities:
126 | - KILL
127 | - SETUID
128 | - SETGID
129 | - CHOWN
130 | - SETPCAP
131 | - NET_BIND_SERVICE
132 | - DAC_OVERRIDE
133 | - SYS_ADMIN
134 | - SYS_TTY_CONFIG
135 | allowedUnsafeSysctls:
136 | - '*'
137 | apiVersion: security.openshift.io/v1
138 | defaultAddCapabilities: null
139 | fsGroup:
140 |   type: RunAsAny
141 | groups:
142 | - system:cluster-admins
143 | - system:nodes
144 | - system:masters
145 | kind: SecurityContextConstraints
146 | metadata:
147 |   annotations:
148 |     kubernetes.io/description: 'This allows the LSF daemons to run as root and start 
149 |       workloads as the user that submitted the jobs.'
150 |   name: ibm-lsf-scc
151 | priority: null
152 | readOnlyRootFilesystem: false
153 | requiredDropCapabilities:
154 | - MKNOD
155 | - NET_RAW
156 | - SYS_CHROOT
157 | - SETFCAP
158 | - AUDIT_WRITE
159 | - FOWNER
160 | - FSETID
161 | runAsUser:
162 |   type: RunAsAny
163 | seLinuxContext:
164 |   type: RunAsAny
165 | seccompProfiles:
166 | - '*'
167 | supplementalGroups:
168 |   type: RunAsAny
169 | users:
170 | - system:admin
171 | volumes:
172 | - '*'
173 | 
174 | ```
175 | It may be downloaded from [here.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/blob/master/doc/LSF_Operator/scc.yaml)
176 | 
177 | ## Installing the Operator
178 | The LSF operator must be installed to install the LSF cluster.  Instructions for installing on OpenShift and Kubernetes are below.
179 | 
180 | The following steps should be performed by the cluster administrator.  The yaml files used below are available from [here.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/tree/master/doc/LSF_Operator)  The images are hosted on [Docker Hub](https://hub.docker.com/repository/docker/ibmcom/lsfce-operator).
181 |    
182 | The following steps need to be performed manually:
183 | 1. Create a namespace.  This namespace will be used by both the operator and the cluster deployed by it.  
184 | It is recommended that no other pods use this namespace.
185 | ```
186 | kubectl create namespace {Your Namespace}
187 | ```
188 | 2. Create the CRD:
189 | ```
190 | kubectl create -f lsf_v1beta1_lsfcluster_crd.yaml -n {Your Namespace}
191 | ```
192 | 3. Create a SCC with the namespace from above
193 | ```
194 | sed -i -e 's:MyNameSpace:{Your Namespace}:g' scc.yaml
195 | kubectl create -f scc.yaml 
196 | ```
197 | NOTE:  If the SCC has been created before for another LSF cluster deployment then you will need to **edit** the SCC and add in the service account for this cluster e.g.
198 | ```
199 | kubectl edit scc ibm-lsf-scc
200 | ```
201 | In the **users** section add another entry for this namespace e.g.
202 | ```
203 | users:
204 | - system:admin
205 | - system:serviceaccount:othernamespace:ibm-lsf-operator
206 | - system:serviceaccount:{Your Namespace}:ibm-lsf-operator
207 | ```
208 | 4. Create a service account with the needed RBAC policies
209 | ```
210 | kubectl create -f service_account.yaml -n {Your Namespace}
211 | kubectl create -f role.yaml -n {Your Namespace}
212 | kubectl create -f role_binding.yaml -n {Your Namespace}
213 | ```
214 | 5. Modify the clusterrolebindings and set the namespace to the correct value in the **clusterrolebinding1.yaml** and **clusterrolebinding2.yaml** files.  The clusterrolebindings allow the operator to deploy LSF as an enhanced pod scheduler for Kubernetes.
215 | 6. Create the clusterrole and clusterrolebindings:
216 | ```
217 | kubectl create -f clusterrole.yaml
218 | kubectl create -f clusterrolebinding1.yaml
219 | kubectl create -f clusterrolebinding2.yaml
220 | ```
221 | 7. Deploy the operator
222 | ```
223 | kubectl create -f operator.yaml -n {Your Namespace}
224 | ```
225 | **NOTE: It is assumed that the Kubernetes cluster is allowed to pull images from [Docker hub.](https://hub.docker.com/repository/docker/ibmcom/lsfce-operator)  If this is not the case the images will have to be staged on an internal registry, and the operator.yaml file modified to use the alternate imagename.**
226 | 
227 | #### Moving the Images to an Internal Registry
228 | If [Docker hub.](https://hub.docker.com/repository/docker/ibmcom/lsfce-operator) is not accessible from the Kubernetes/OpenShift cluster, it will be necessary to relocate the images to an internal registry.  The procedure below documents how to do that.
229 | 1. Login to a machine that has `docker` or `podman` installed and that has unrestricted access to Docker Hub.
230 | 2. Pull down the needed images e.g.
231 | ``` bash
232 | docker pull ibmcom/lsfce-operator:1.0.1
233 | docker pull ibmcom/lsfce-master:10.1.0.9
234 | docker pull ibmcom/lsfce-comp:10.1.0.9
235 | docker pull ibmcom/lsfce-gui:10.1.0.9
236 | ```
237 | The lsfce-gui image may be omitted if you only intend to deploy the Enhanced Pod Scheduler.
238 | 3. Save the images e.g.
239 | ``` bash
240 | docker save ibmcom/lsfce-operator:1.0.1 -o lsfce-operator-1.0.1.img
241 | docker save ibmcom/lsfce-master:10.1.0.9 -o lsfce-master-10.1.0.9.img
242 | docker save ibmcom/lsfce-comp:10.1.0.9 -o lsfce-comp-10.1.0.9.img
243 | docker save ibmcom/lsfce-gui:10.1.0.9 -o lsfce-gui-10.1.0.9.img
244 | ```
245 | 4. Move the images to a machine that has access to the internal registry
246 | 5. Load the images e.g.
247 | ```
248 | docker load -i lsfce-operator-1.0.1.img
249 | docker load -i lsfce-master-10.1.0.9.img
250 | docker load -i lsfce-comp-10.1.0.9.img
251 | docker load -i lsfce-gui-10.1.0.9.img
252 | ```
253 | 6. Tag the images with the internal registry name.  Use your registry and project name.
254 | ```
255 | docker tag ibmcom/lsfce-operator:1.0.1 MyRegistry/MyProject/lsfce-operator:1.0.1
256 | docker tag ibmcom/lsfce-master:10.1.0.9 MyRegistry/MyProject/lsfce-master:10.1.0.9
257 | docker tag ibmcom/lsfce-comp:10.1.0.9 MyRegistry/MyProject/lsfce-comp:10.1.0.9
258 | docker tag ibmcom/lsfce-gui:10.1.0.9 MyRegistry/MyProject/lsfce-gui:10.1.0.9
259 | ```
260 | 7. Push the images to the internal registry.  Remember to login to this registry first.
261 | ```
262 | docker push MyRegistry/MyProject/lsfce-operator:1.0.1
263 | docker push MyRegistry/MyProject/lsfce-master:10.1.0.9
264 | docker push MyRegistry/MyProject/lsfce-comp:10.1.0.9
265 | docker push MyRegistry/MyProject/lsfce-gui:10.1.0.9
266 | ```
267 | 
268 | #### Verify the Operator is Running
269 | Use the following to verify the operator is running and ready to accept requests.  OpenShift user can verify the operator is running by navigating to `Operators` then `Installed Operators`.  The status should be **InstallSucceeded**.  Kubernetes users can run the following to see the operator state:
270 | ```
271 | kubectl get pods -n {Your Namespace}
272 | ```
273 | When the operator is ready it should have a **Running** status, and **2/2** pods ready e.g. 
274 | ```
275 | NAME                                 READY     STATUS    RESTARTS   AGE
276 | ibm-lsf-operator-5a83545d69-mdd7r    2/2       Running   0          2d
277 | ```
278 | If the operator is not ready in a minute check the logs by running:
279 | ```
280 |   kubectl logs -c operator -n {Your Namespace} {Name of operator pod}
281 | ```
282 | A typical problem will be a missed (cluster)rolebinding.
283 | 
284 | Once the operator is deployed the Administrator can then construct a LSFCluster specification file and use it with the operator to deploy an LSF cluster.
285 | 
286 | 
287 | ### Deleting the LSF Operator
288 | The LSF Operator can be deleted using the following procedure as the cluster administrator:
289 | ```
290 | kubectl delete -f operator.yaml -n {Your Namespace}
291 | kubectl delete -f clusterrolebinding1.yaml
292 | kubectl delete -f clusterrolebinding2.yaml
293 | kubectl delete -f role_binding.yaml -n {Your Namespace}
294 | kubectl delete -f role.yaml -n {Your Namespace}
295 | kubectl delete -f service_account.yaml -n {Your Namespace}
296 | kubectl delete -f lsf_v1beta1_lsfcluster_crd.yaml -n {Your Namespace}
297 | ```
298 | 
299 | **NOTE: The clusterrole, and SCC are global and may be used by other clusters.  They can only be deleted when no other LSF operators are deployed.**
300 | 
301 | OpenShift users can delete the LSF operator from the GUI.
302 | 
303 | ## Storage
304 | A Persistent Volume (PV) should be created before deploying the chart.  A dynamic volume is not recommended because the configuration and job state are persisted on the volume.  Backing up this volume backs up the cluster.  Consult the storage configuration documentation to setup the PV.  The sample definition below is for an NFS based persistent volume.  Note the use of labels to identify this PV.  These can be used when deploying the LSF cluster to control which statically created PV to use.
305 | ```yaml
306 | apiVersion: v1
307 | kind: PersistentVolume
308 | metadata:
309 |   name: mylsfvol
310 |   labels:
311 |     lsfvol: "lsfvol"
312 | spec:
313 |   capacity:
314 |     storage: 10Gi
315 |   accessModes:
316 |     - ReadWriteMany
317 |   persistentVolumeReclaimPolicy: "Retain"
318 |   nfs:
319 |     # FIXME: Use your NFS servers IP and export
320 |     server: 10.1.1.1
321 |     path: "/export/stuff"
322 | ```
323 | Save the definition and replace the `server` and `path` values to match your NFS server.  Note the labels.  These are used to make sure that this volume is used with the chart deployment.  The configuration files and state information in this volume.
324 | 
325 | 
326 | ## Deploying an LSF Cluster with the LSF Operator
327 | Deploying the LSF cluster with the operator requires an LSF cluster specification file.  Sample files are available.  Select the type of cluster you wish to deploy:
328 | * For **LSF on Kubernetes** clusters start with [this template.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/blob/master/doc/LSF_Operator/example-lsf.yaml)
329 | * For **Enhanced Pod Scheduling** cluster start with [this template.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/blob/master/doc/LSF_Operator/example-pod-sched.yaml)
330 | If Docker Hub is not accessible, and the procedure for moving the images to an internal registry was used, then the sample templates should be edited the `image` entries updated with the internal registry name.
331 | 
332 | This file is structured into functional parts:
333 | 1. **Cluster** - This contains configuration for the entire cluster.  The setting here are applied to all pods.  It defines the type of LSF cluster it will deploy. It defines the storage volume for the cluster to use.  For **LSF on Kubernetes** clusters it includes configuration for setting up user authentication, so that ordinary users can login to the LSF GUI and submit work, and settings for accessing additional volumes for users home directories and applications.
334 | 2. **Master** - This provides the parameters for deploying the LSF master pod.  It has the typical controls for the image and resources along with controls to control the placement of the pod.
335 | 3. **GUI** - This provides the parameters for deploying the LSF GUI pod.  It only is used with the **LSF on Kubernetes** cluster.  It has the typical controls for the image and resources along with controls for placement of the pod.
336 | 4. **Computes** - This is a list of LSF compute pod types.  The cluster can have more than one OS software stack.  This way the compute images can be tailored for the workload it needs to run.  Each compute type can specify the image to use, along with the number of replicas, and the type of resources that this pod supports.  For example, you might have some pods with a RHEL 7 software stack, and another with CentOS 6.  A small compute image is provided.  The sample image is based of the Red Hat UBI image.  Instructions on building your own images are [here.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/tree/master/doc/LSF_Operator)
337 | 
338 | 
339 | ## Configuration
340 | The LSF operator uses a configuration file to deploy the LSF cluster.  Start with the sample files provided above and edit them for your specific needs.  The instructions below provide more details on how to prepare the file.
341 | 
342 | Use the instructions below to modify the cluster for your needs.  Edit the file.
343 | 1. Set the name of the LSF cluster.  Here it is `example-lsfcluster`.
344 | ```yaml
345 | metadata:
346 |   name: example-lsfcluster
347 | ```
348 | 
349 | 2. Read the licenses and indicate acceptance by setting the `licenseAccepted` flag to `true`.  The licenses are available from [http://www-03.ibm.com/software/sla/sladb.nsf](http://www-03.ibm.com/software/sla/sladb.nsf)
350 | ```yaml
351 | spec:
352 |   # Indicate acceptance of the Licenses
353 |   # The licenses are available from this site:
354 |   #      http://www-03.ibm.com/software/sla/sladb.nsf
355 |   # Use the search option to find IBM Spectrum LSF CE
356 |   licenseAccepted: false
357 | ```
358 | 
359 | 3. Set the namespace to the same namespace that the operator is deployed in.  The `serviceAccount` can be left as is.
360 | ```yaml
361 |   # Use your own namespace from the steps above
362 |   namespace: ibm-lsf-project
363 | ```
364 | 
365 | 4. Set the type of cluster to deploy, either `lsf` or `podscheduler`.
366 | ```yaml
367 | spec:
368 |   cluster:
369 |     # The operator can deploy lsf in two different modes:
370 |     #   lsf           - LSF is deployed as a cluster within K8s
371 |     #   podscheduler  - LSF enhances the pod scheduling capabilities
372 |     #                   of K8s.
373 |     lsfrole: lsf
374 | ```
375 | 
376 | 5. Set the name of the cluster.  This will be used as a prefix to many of the objects the operator will create
377 | ```yaml
378 | spec:
379 |   cluster:
380 |     clustername: mylsf
381 | ```
382 | 
383 | 6. Provide the storage parameters for the LSF cluster.  Using an existing PersistentVolume (PV) is recommended.  Label the PV with your own label and label value, and use the label as the `selectorLabel` below, and the the label value as the `selectorValue` below.  If dynamic storage is to be used set `dynamicStorage` to true and specify the `storageClass`.
384 | ```yaml
385 | spec:
386 |   cluster:
387 |     # PersistentVolumeClaim (Storage volume) parameters
388 |     pvc:
389 |       dynamicStorage: false
390 |       storageClass: ""
391 |       selectorLabel: "lsfvol"
392 |       selectorValue: "lsfvol"
393 |       size: "10G"
394 | ```
395 | 
396 | 7. For **LSF on Kubernetes** clusters one or more users need to be designated as LSF administrators.  These users will be able to perform LSF administrative functions using the GUI.  Provide a list of the UNIX usernames to use as administrators.
397 | ```yaml
398 | spec:
399 |   cluster:
400 |     administrators:
401 |     - someUNIXuser
402 |     - someOtherUser
403 | ```
404 | 
405 | 8. For **LSF on Kubernetes** clusters the pods in the cluster will need to access user data and applications.  The **volumes** section provides a way to connect existing PVs to the LSF cluster pods.  Define PVs for the data and application binaries and add as many as needed for your site e.g.  
406 | ```yaml 
407 |     volumes:
408 |     - name: "Home"
409 |       mount: "/home"
410 |       selectorLabel: "realhome"
411 |       selectorValue: "realhome"
412 |       accessModes: "ReadWriteMany"
413 |       size: ""
414 |     - name: "Applications"
415 |       mount: "/apps"
416 |       selectorLabel: "apps"
417 |       selectorValue: "apps"
418 |       accessModes: "ReadOnlyMany"
419 |       size: ""
420 | ```
421 | **NOTE: When creating the PVs to use as volumes in the cluster do NOT set the `Reclaim Policy` to `Recycle`.  This would cause Kubernetes to delete everything in the PV when the LSF cluster is deleted.**
422 | 
423 | 9. For **LSF on Kubernetes** clusters users need to login to the LSF GUI to submit work you will need to define the configuration for the pod authentication.  Inside the pods the entrypoint script will run **authconfig** to generate the needed configuration files.  The **userauth** section allows you to:
424 |    - Define the arguments to the authconfig command
425 |    - Provide any configuration files needed by the authentication daemons
426 |    - List any daemons that should be started for authentication.
427 | Edit the **userauth** section and define your configuration.  It may be necessary to test the configuration.  This can be done by logging into the master pod and running the following commands to verify that user authentication is functioning:
428 | ```bash
429 | # getent passwd
430 | # getent group
431 | ```
432 | When the user authentication is functioning correctly these will provide the passwd and group contents.
433 | ```yaml
434 | spec:
435 |   cluster:
436 |     # This section is for configuring username resolution
437 |     # The pods will call "authconfig" to setup the authentication
438 |     # It can be used with the authentication schemes that "authconfig"
439 |     # supports.
440 |     userauth:
441 |       # Configs are a list of secrets that will be passed to the
442 |       # running pod as configuration files.  This is how to pass
443 |       # certificates to the authentication daemons.  The secret has
444 |       # a name and value and are created using:
445 |       #    kubectl create secret generic test-secret --from-literal=filename=filecontents
446 |       # The actual filename in the pod is the filename from the configs
447 |       # list below plus the filename from the command above.
448 |       configs:
449 |       - name: "test-secret"
450 |         filename: "/etc/test/test-secret"
451 | 
452 |       # These are the arguments to invoke the "authconfig" command
453 |       # with.  This will generate the needed configuration files.
454 |       # NOTE:  The "--nostart" argument will be added.
455 |       authconfigargs: "--enableldap --enableldapauth --ldapserver=ldap://10.10.10.10/,ldap://10.10.10.11/ --ldapbasedn=dc=mygroup,dc=company,dc=com --update"
456 | 
457 |       # List the daemons to start, e.g.  nslcd, sssd, etc
458 |       starts:
459 |       - /usr/sbin/nslcd
460 | ```
461 | 
462 | 10. Placement options are provided for all the pods.  They can be used to control where the pods will be placed.  The `includeLabel` is used to place the pods on worker nodes that have that label.  The `excludeLabel` has the opposite effect.  Worker nodes that have the `excludeLabel` will not be used for running the LSF pods.  Taints can also be used to taint worker nodes so that the kube-scheduler will not normally use those worker nodes for running pods.  This can be used to grant the LSF cluster exclusive use of a worker node.  To have a worker node exclusively for the LSF cluster taint the node and use the taint name, value and effect in the placement.tolerate... section e.g.
463 | ```yaml
464 | spec:
465 |   master:    # The GUI and Computes have the same controls
466 |     # The placement variables control how the pods will be placed
467 |     placement:
468 |       # includeLabel  - Optional label to apply to hosts that
469 |       #                 should be allowed to run the compute pod
470 |       includeLabel: ""
471 |     
472 |       # excludeLabel  - Is a label to apply to hosts to prevent
473 |       #                 them from being used to host the compute pod
474 |       excludeLabel: "excludelsf"
475 | 
476 |       # Taints can be used to control which nodes are available
477 |       # to the LSF and Kubernetes scheduler.  If used these
478 |       # parameters are used to allow the LSF pods to run on
479 |       # tainted nodes.  When not defined the K8s master nodes
480 |       # will be used to host the master pod.
481 |       #
482 |       #  tolerateName  - Optional name of the taint that has been
483 |       #                  applied to a node
484 |       #  tolerateValue - The value given to the taint
485 |       #  tolerateEffect - The effect of the taint
486 |       #
487 |       tolerateName: ""
488 |       tolerateValue: ""
489 |       tolerateEffect: NoExecute
490 | ```
491 | 
492 | 11. The `image` and `imagePullPolicy` control where and how the images are pulled.  The free images are hosted on docker hub.  If you are building your own images, or pulling from an internal registry change the `image` value to your internal registry
493 | ```yaml
494 | spec:
495 |   master:      # The GUI and Computes will have similar configuration
496 |     image: "MyRegistry/MyProject/lsfce-master:10.1.0.9"
497 |     imagePullPolicy: "Always"
498 | ```
499 | 
500 | 12. The `resources` section defines how much memory and CPU to assign to each pod.  LSF will only use the resources provided to its pods, so the pods should be sized to allow the largest LSF job to run.  Conversely **Enhanced Pod Scheduler** pods are sized for the minimum resource consumption.  The defaults are good for **Enhanced Pod Scheduler**, however for **LSF on Kubernetes** clusters the `computes` `memory` and `cpu` should be increased as large as possible.  
501 | ```yaml
502 |   computes:
503 |     - name: "Name of this collection of compute pods"
504 |       resources:
505 |         # Change the cpu and memory values to as large as possible 
506 |         requests:
507 |           cpu: "2"
508 |           memory: "1G"
509 |         limits:
510 |           cpu: "2"
511 |           memory: "1G"
512 |       # Define the number of this type of pod you want to have running
513 |       replicas: 1
514 | ```
515 | 
516 | 13. For **LSF on Kubernetes** clusters an alternate way pods can access data and applications is using the **mountList**.  This mounts the list of provided paths from the host into the pod.  The path must exist on the worker node.  This is not available on OpenShift.
517 | ```yaml
518 |     mountList:
519 |       - /usr/local
520 | ```
521 | 
522 | 14. For **LSF on Kubernetes** clusters the LSF GUI uses a database.  The GUI container communicates with the database container with the aid of a password.  The password is provided via a secret.  The name of the secret is provided in the LSF cluster spec file as:
523 | ```yaml
524 | spec:
525 |   gui:
526 |     db:
527 |       passwordSecret: "db-pass"
528 | ```
529 | The secret needs to be created prior to deploying the cluster.  Replace the **MyPasswordString** with your password in the command below to generate the secret:
530 | ```bash
531 | kubectl create secret generic db-pass --from-literal=MYSQL_ROOT_PASSWORD=MyPasswordString
532 | ```
533 | If using the OpenShift GUI create a Key/Value secret by setting the secret name, and using the key `MYSQL_ROOT_PASSWORD`.  The value must be provided from a file that has the value in it.
534 | 
535 | 15. For **LSF on Kubernetes** clusters the cluster can have more than one OS software stack.  This is defined in the `computes` list.  This way the compute images can be tailored for the workload it needs to run.  Each compute type can specify the image to use, along with the number of replicas, and the type of resources that this pod supports.  For example, you might have some pods with a RHEL 7 software stack, and another with CentOS 6.  A small compute image is provided.  Instructions on building your own images are [here.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/tree/master/doc/LSF_Operator)  The images should be pushed to an internal registry, and the `image` files updated for that compute type.  Each compute type provides a different software stack for the applications.  The `provides` is used to construct LSF resource groups, so that a user can submit a job and request the correct software stack for the application e.g.
536 | ```yaml
537 | spec:
538 |   computes:
539 |     - name: "MyRHEL7"
540 |       # A meaningful description should be given to the pod.  It should
541 |       # describe what applications this pod is capable of running
542 |       description: "Compute pods for Openfoam"
543 | 
544 |       # Content removed for clarity
545 | 
546 |       # The compute pods will provide the resources for running
547 |       # various workloads.  Resources listed here will be assigned
548 |       # to the pods in LSF
549 |       provider:
550 |         - rhel7
551 |         - openfoam  
552 | 
553 |     - name: "TheNextComputeType"
554 |       # The other compute type goes here
555 | ```
556 | 
557 | ### Deploying the Cluster
558 | The cluster is deployed by creating an instance of a **lsfcluster**.  Use the file from above to deploy the cluster e.g.  OpenShift users can deploy the cluster from the GUI by providing the yaml file created in the above steps.  Kubernetes users can use the following:
559 | ```bash
560 | kubectl create -n {Your namespace} -f example-lsf.yaml
561 | ```
562 | To check the progress of the deployment run:
563 | ```bash
564 | kubectl get lsfclusters -n {Your namespace}
565 | kubectl get pods -n {Your namespace}
566 | ```
567 | There should be a minimum of 4 pod types, but you may have more.
568 | ```bash
569 | NAME                                 READY     STATUS    RESTARTS   AGE
570 | dept-a-lsf-gui-58f6ccfdb-49x8f       2/2       Running   0          4d
571 | dept-a-lsf-master-85dbdbf6c8-sv7jr   1/1       Running   0          4d
572 | dept-a-lsf-rhel7-55f8c44cfb-vmjz8    3/3       Running   0          4d
573 | dept-a-lsf-centos6-5ac8c43cfa-fdfh2  4/4       Running   0          4d
574 | ibm-lsf-operator-5b84545b69-mdd7r    2/2       Running   0          4d
575 | ```
576 | Only one **Enhanced Pod Scheduler** cluster can be deployed at a time.  Deploying more than one will have unpredictable effects.
577 | 
578 | ## Debugging your Yaml File
579 | As you are testing the LSF cluster you may find that the pods are not created.  This is usually from an issue in the yaml file.  To debug you can use the following commands to see what went wrong:
580 | ```
581 | kubectl get pods |grep ibm-lsf-operator
582 | ```
583 | This is the operator pod.  You will need the name for the following steps.
584 | 
585 | To see the Ansible logs run:
586 | ```
587 | kubectl logs -c ansible {Pod name from above}
588 | ```
589 | A successful run looks something like:
590 | ```
591 | <removed>
592 | 
593 | PLAY RECAP *********************************************************************
594 | localhost                  : ok=28   changed=0    unreachable=0    failed=0    skipped=18   rescued=0    ignored=0
595 | ```
596 | The failed count should be 0.  If not look for the failed task.  This will provide a clue as to which parameter may be in error.
597 | 
598 | If the log only shows:
599 | ```
600 | Setting up watches.  Beware: since -r was given, this may take a while!
601 | Watches established.
602 | ```
603 | Either:
604 | - The cluster has not been created.  Run:  **oc get lsfclusters** to check.
605 | - The operator is polling for changes and has not woke up yet.  Give it 30 seconds.
606 | - The operator has failed to initialize.  Run: **oc logs -c operator {Operator Pod}**
607 | 
608 | Another common issue is forgetting to create the database secret.  When this happens the GUI pod in the LSF on Kubernetes cluster will be stuck in a pending state.  To resolve it create the secret and re-create the cluster.
609 | 
610 | ## Deleting an LSF Cluster
611 | The LSF cluster can be deleted by running:
612 | ```bash
613 | kubectl get lsfclusters -n {Your namespace}
614 | ```
615 | This gets the name of the LSF cluster that has been deployed in this namespace.  Use the name to delete the cluster e.g.
616 | ```bash
617 | kubectl delete lsfcluster -n {Your namespace} {Your LSF Cluster name from above}
618 | ```
619 | **NOTE:  The storage may be still bound.  If needed release the storage before redeploying the cluster.**
620 | 
621 | 
622 | ## Accessing the Cluster
623 | How to access the cluster depends on which cluster is deployed.  When the **LSF on Kubernetes** cluster is deployed it will create a route on OpenShift, or an ingress on Kubernetes.  On OpenShift navigate to `Networking` then `Routes` and locate the `lsf-route`.  The `Location` is the URL of the LSF Application Center GUI.  If authentication is setup properly you should be able to login using your UNIX account.
624 | 
625 | The **Enhanced Pod Scheduler** cluster does not have a graphical user interface.  Instructions on how to access the features are [documented here.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/blob/master/doc/LSF_Operator/README-pod-sched.md)
626 | 
627 | 
628 | ## Cluster Maintenance
629 | LSF Documentation is available [here.](https://www.ibm.com/support/knowledgecenter/SSWRJV_10.1.0/lsf_welcome/lsf_kc_cluster_ops.html)   This documentation covers how to configure LSF.  Additional documentation for managing the **Enhanced Pod Scheduler** is [documented here.](https://github.com/IBMSpectrumComputing/lsf-kubernetes/blob/master/doc/LSF_Operator/README-pod-sched.md)
630 | 
631 | Although hosted on Kubernetes **LSF on Kubernetes** clusters can be administrated as outlined in the documentation.  Where possible worker nodes should be tainted to prevent other services triggering eviction of an LSF pod.  Should an LSF pod be evicted the jobs running on it will be marked as failed, and users will have to re-run them.   To access the LSF master to manage it you can connect to it using the following procedure:
632 | 1. Get a list of pods in the namespace/project
633 |    ```bash
634 |    kubectl get pods -n {namespace}
635 |    ```
636 | 2. From the list of pods locate the LSF master pod.  It will have `-master-` in the pod name e.g.
637 |    ```bash
638 |    NAME                                READY     STATUS    RESTARTS   AGE
639 |    ibm-lsf-operator-6c49bcbc56-94csr   2/2       Running   0          26h
640 |    lsf-gui-5cfb995c8c-twvhd            2/2       Running   0          34m
641 |    lsf-master-5bb89b5f6-ntmmb          1/1       Running   0          34m
642 |    lsf-rhel7-88b64f5f-nd4mb            1/1       Running   0          34m
643 |    lsf-rhel7-88b64f5f-xl9jf            1/1       Running   0          34m
644 |    ```
645 | 3. Run an interactive shell on the LSF master pod e.g.
646 |    ```bash
647 |    kubectl exec -ti -n {namespace} lsf-master-5bb89b5f6-ntmmb bash
648 |    ```
649 | 4. A Bash shell will start on the master pod and you can run LSF commands e.g.
650 |    ```bash
651 |    LSF POD [root:/]# lsid
652 |    IBM Spectrum LSF Community Edition 10.1.0.0, Feb 21 2020
653 |    Copyright IBM Corp. 1992, 2016. All rights reserved.
654 |    US Government Users Restricted Rights - Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
655 |    
656 |    My cluster name is myCluster
657 |    My master name is lsfmaster
658 |    ```
659 | With a shell on LSF master you can now manage it like a normal LSF cluster.
660 | 
661 | ## Backups
662 | Configuration and state information is stored in the persistent volume claim (PVC).  Backups of that data should be performed periodically.  The state information can become stale very fast as users work is submitted and finished.  Some job state data will be lost for jobs submitted between the last backup and 
663 | current time.
664 | 
665 | > A reliable filesystem is critical to minimize job state loss.
666 | 
667 | Dynamic provisioning of the PV is discouraged because of the difficulty in locating the correct resource to backup.  Pre-creating a PVC, or labeling a PV, for the deployment to use provides the easiest way to locates the storage to backup.
668 | 
669 | **NOTE:  The reclaim policy should be set to "Retain" otherwise the data will be removed should the cluster be deleted.**
670 | 
671 | Use the following procedure to backup the job and state data.
672 | 1. Determine the master pod and connect to it.
673 | ```
674 | $ kubectl get pods |grep lsf-master
675 | lsf-master-56b55d6dc8-99999   1/1     Running   0          3d19h
676 | $ kubectl exec -ti lsf-master-56b55d6dc8-99999 bash
677 | ```
678 | 
679 | 2. List the queues in the cluster with:
680 | ```
681 | LSF POD [root@lsfmaster /]# bqueues
682 | QUEUE_NAME      PRIO STATUS          MAX JL/U JL/P JL/H NJOBS  PEND   RUN  SUSP
683 | priority         43  Open:Active       -    -    -    -     0     0     0     0
684 | normal           30  Open:Active       -    -    -    -     0   153   570     0
685 | idle             20  Open:Active       -    -    -    -     0     0     0     0
686 | night            15  Open:Inact        -    -    -    -     0     0     0     0
687 | ```
688 | 
689 | 3. Close the queues to stop new jobs from starting
690 | ```
691 | LSF POD [root@lsfmaster /]# badmin qclose {Name of Queue}
692 | ```
693 | Repeat this for all the queues.
694 | 
695 | 4. Watch the number of running jobs by running the **bqueues** command.
696 | ```
697 | LSF POD [root@lsfmaster /]# bqueues
698 | QUEUE_NAME      PRIO STATUS          MAX JL/U JL/P JL/H NJOBS  PEND   RUN  SUSP
699 | priority         43  Open:Active       -    -    -    -     0     0     0     0
700 | normal           30  Open:Active       -    -    -    -     0   397     0     0
701 | idle             20  Open:Active       -    -    -    -     0     0     0     0
702 | night            15  Open:Inact        -    -    -    -     0     0     0     0
703 | ```
704 | Wait for the number of **RUN** jobs to drop to 0.
705 | 
706 | 5. Once enough of the pods have finished the LSF cluster can be backed up.  This is done by backing up the PV contents to another location or media.  
707 | 
708 | 6. When the backup is complete the jobs can be started again with:
709 | ```
710 | LSF POD [root@lsfmaster /]# badmin qopen {Name of Queue}
711 | ```
712 | Repeat this for all the queues.
713 | 
714 | 
715 | ## Restoring from a Backup 
716 | Restoring from a backup will requires restoring the backed-up data prior to starting the LSF cluster.  The data should be restored into a PV created and labeled for LSF use.  Once the data has been restored into the PV.  The LSF cluster can be created using that PV as the clusters PV.  The LSF master will start and read the jobs and configuration from the files.  
717 | 
718 | ## Upgrading the Cluster
719 | Upgrading the cluster requires several steps to ensure that there is little disruption to the running pods.  Use the following procedure:
720 | 
721 | 1. Determine the master pod and connect to it.
722 | ```
723 | $ kubectl get pods |grep lsf-master
724 | lsf-master-56b55d6dc8-99999   1/1     Running   0          3d19h
725 | $ kubectl exec -ti lsf-master-56b55d6dc8-99999 bash
726 | ```
727 | 
728 | 2. List the queues in the cluster with:
729 | ```
730 | LSF POD [root@lsfmaster /]# bqueues
731 | QUEUE_NAME      PRIO STATUS          MAX JL/U JL/P JL/H NJOBS  PEND   RUN  SUSP
732 | priority         43  Open:Active       -    -    -    -     0     0     0     0
733 | normal           30  Open:Active       -    -    -    -     0   153   570     0
734 | idle             20  Open:Active       -    -    -    -     0     0     0     0
735 | night            15  Open:Inact        -    -    -    -     0     0     0     0
736 | ```
737 | 
738 | 3. Close the queues to stop new jobs from starting
739 | ```
740 | LSF POD [root@lsfmaster /]# badmin qclose {Name of Queue}
741 | ```
742 | Repeat this for all the queues.
743 | 
744 | 4. Watch the number of running jobs by running the **bqueues** command.
745 | ```
746 | LSF POD [root@lsfmaster /]# bqueues
747 | QUEUE_NAME      PRIO STATUS          MAX JL/U JL/P JL/H NJOBS  PEND   RUN  SUSP
748 | priority         43  Open:Active       -    -    -    -     0     0     0     0
749 | normal           30  Open:Active       -    -    -    -     0   397     0     0
750 | idle             20  Open:Active       -    -    -    -     0     0     0     0
751 | night            15  Open:Inact        -    -    -    -     0     0     0     0
752 | ```
753 | Wait for the number of **RUN** jobs to drop to 0.
754 | 
755 | 5. Once enough of the pods have finished the LSF cluster can be upgraded.  The job data is stored on the persistent volume (PV).  The PV must have been created with the **Retain** reclaim policy.  If it was created with the **Recycle** reclaim policy, then the PV contents need to be backed up to a new PV with a **Retain** reclaim policy.  The old LSF cluster can then be deleted, and the new one deployed. 
756 | 
757 | 6. Once the chart has been upgraded connect to the master pod and check the queue state.  If needed reopen the queues with:
758 | ```
759 | LSF POD [root@lsfmaster /]# badmin qopen {Name of Queue}
760 | ```
761 | 
762 | 
763 | ## Copyright and trademark information
764 | © Copyright IBM Corporation 2019
765 | U.S. Government Users Restricted Rights - Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
766 | IBM®, the IBM logo and ibm.com® are trademarks of International Business Machines Corp., registered in many jurisdictions worldwide. Other product and service names might be trademarks of IBM or other companies. A current list of IBM trademarks is available on the Web at "Copyright and trademark information" at [www.ibm.com/legal/copytrade.shtml](https://www.ibm.com/legal/copytrade.shtml).
767 | 


--------------------------------------------------------------------------------