├── .gitignore
├── R
    ├── README.md
    ├── job_wrapper.sh
    └── myRscript.R
├── README.md
├── abaqus
    ├── Balldrop_phillips_benchmark.inp
    └── test_job.sh
├── alphafold
    ├── README.md
    ├── kernal.json
    └── run_alphafold_cle.sh
├── ansys
    ├── HKHLR-HowTo-Ansys_Fluent.pdf
    ├── README.md
    ├── run_cfx.sh
    ├── run_fluent_gpu.sh
    └── run_fluent_mpi_journal.sh
├── array
    ├── README.md
    ├── command_line
    ├── inputs.txt
    ├── slurm-406233_1.out
    ├── slurm-406233_10.out
    ├── slurm-406233_2.out
    ├── slurm-406233_3.out
    ├── slurm-406233_4.out
    ├── slurm-406233_5.out
    ├── slurm-406233_6.out
    ├── slurm-406233_7.out
    ├── slurm-406233_8.out
    ├── slurm-406233_9.out
    └── test_job.sh
├── basic
    ├── slurm-1217.out
    ├── slurm-1218.out
    ├── slurm-1222.out
    ├── slurm-666.out
    ├── slurm-667.out
    ├── slurm-668.out
    ├── slurm-669.out
    ├── statsdept_test_job.sh
    ├── test_job.sh
    └── test_pbs.sh
├── blast
    └── test_job.sh
├── castep
    └── job.sh
├── checkpoint
    ├── blcr
    │   ├── slurm-2963303.out
    │   ├── slurm-2963306.out
    │   └── test_job.sh
    └── dmtcp
    │   ├── README
    │   ├── ccr_buffalo
    │       ├── README.txt
    │       ├── slurm_dmtcp_ompi_tcp
    │       ├── slurm_dmtcp_openmp
    │       └── slurm_dmtcp_serial
    │   ├── slurm_launch.job
    │   ├── slurm_rstr.job
    │   ├── stampede
    │       ├── README
    │       ├── slurm_launch.job
    │       └── slurm_rstr.job
    │   └── ucr-hpcc
    │       └── single
    │           ├── ckpts
    │               ├── ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp
    │               ├── ckpt_bash_afc8ad-40000-3ea0c9317bade2_files
    │               │   ├── count.sh_99078
    │               │   └── fd-info.txt
    │               ├── ckpt_sleep_afc8ad-105000-5b7f491c.dmtcp
    │               ├── ckpt_sleep_afc8ad-110000-5b7f4957.dmtcp
    │               ├── ckpt_sleep_afc8ad-115000-5b7f4994.dmtcp
    │               ├── ckpt_sleep_afc8ad-120000-5b7f49d0.dmtcp
    │               ├── ckpt_sleep_afc8ad-125000-5b7f4a0c.dmtcp
    │               ├── ckpt_sleep_afc8ad-130000-5b7f4a49.dmtcp
    │               ├── ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp
    │               └── dmtcp_restart_script_afc8ad-40000-3ea0c918c3d461.sh
    │           ├── count.sh
    │           ├── slurm_launch.job
    │           └── slurm_rstr.job
├── configs
    ├── active_users.sh
    ├── cgroup.conf
    ├── layout.d
    │   └── power.conf
    ├── slurm.conf
    ├── slurmdbd.conf
    ├── topology.conf
    └── update_slurm.sh
├── dedalus
    ├── README.md
    └── ivp_2d_rayleigh_benard
    │   ├── job.sh
    │   ├── plot_snapshots.py
    │   └── rayleigh_benard.py
├── depend
    ├── README.md
    ├── slurm-180.out
    ├── slurm-181.out
    ├── slurm-182.out
    └── test_job.sh
├── espresso
    ├── README.md
    ├── espresso.sh
    └── espresso_intel.sh
├── folddock
    └── README.md
├── galaxy
    └── README.md
├── gaussian
    ├── README.md
    ├── cpu_job.sh
    ├── cpu_job_g09.sh
    ├── gpu_2xp100_job.sh
    ├── gpu_4xk80_job.sh
    └── gpu_8xk80_job.sh
├── hmmer
    ├── README.md
    ├── pipeline
    │   ├── 01_hmmscan321_pfam34.sh
    │   ├── 01_hmmscan33_pfam31.sh
    │   ├── 01_hmmscan33_pfam34.sh
    │   ├── 02_hmmsearch_COX1.sh
    │   └── 03_hmmsearch_MPI.sh
    └── query
    │   ├── download.sh
    │   └── query.pep
├── interactive
    └── README.txt
├── jupyter
    ├── README.md
    ├── jupyter-notebook-3523021.log
    ├── jupyter-notebook-5205779.log
    ├── notebook.html
    ├── notebook.ipynb
    ├── notebook.nbconvert.ipynb
    ├── submit_jupyter.sh
    └── submit_notebook.sh
├── mathematica
    ├── README.md
    ├── submission_script.sh
    └── test.m
├── matlab
    ├── Getting_Started_With_Serial_And_Parallel_MATLAB.pdf
    ├── README.md
    ├── simple_args
    │   ├── command_line
    │   ├── job_script.sh
    │   ├── matlabCode.m
    │   ├── slurm-376816.out
    │   └── slurm-396765.out
    ├── submission_script.sh
    └── submission_script2.sh
├── mpi
    ├── R
    │   ├── slurm-688508.out
    │   ├── snow-test.R
    │   └── snow-test.sh
    ├── SUBMIT.txt
    ├── a.out
    ├── fortran
    │   ├── fhello_world_mpi
    │   └── fhello_world_mpi.F90
    ├── hello-mpi
    ├── hello-mpi.cpp
    ├── mpiTest
    ├── mpiTest.c
    ├── mpiTest_mpich
    ├── slurm-1880596.out
    └── slurm-201.out
├── multi_steps
    └── SUBMIT.txt
├── python
    ├── README.md
    ├── job_py_wrapper.sh
    └── myPyscript.py
├── rstudio-server
    ├── README.md
    └── start-rserver.sh
├── singularity
    ├── deepvariant
    │   └── README.md
    ├── galaxy
    │   ├── README.md
    │   └── start_galaxy.sh
    ├── mariadb
    │   ├── README.md
    │   ├── create_mysql_db.sh
    │   └── start_mariadb.sh
    ├── metaerg
    │   ├── README.md
    │   └── metaerg_job.sh
    ├── mongo
    │   └── README.md
    └── orthomcl
    │   ├── README.md
    │   └── orthomcl_job.sh
├── spark
    └── spark_job.sh
├── stata
    ├── README.md
    ├── submit.sh
    └── test.do
├── vasp
    └── run.sh
├── vnc
    ├── READMD.md
    └── vnc_job.sh
└── workshop
    ├── README.md
    └── SBATCH.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.swp
3 | *.swo
4 | 


--------------------------------------------------------------------------------
/R/README.md:
--------------------------------------------------------------------------------
 1 | # R
 2 | 
 3 | Here is a basic example on how you can submit R code to the cluster.
 4 | 
 5 | Make sure your `job_wrapper.sh` and `myRscript.R` files are in the same directory, and then submit your wrapper from that directory:
 6 | 
 7 | 1. Make example directory
 8 | 
 9 | ```bash
10 | mkdir ~/R_example
11 | cd ~/R_example
12 | ```
13 | 
14 | 2. Download example scripts
15 | 
16 | ```bash
17 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/R/job_wrapper.sh
18 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/R/myRscript.R
19 | ```
20 | 
21 | 3. Submit wrapper
22 | 
23 | ```
24 | sbatch job_wrapper.sh
25 | ```
26 | 
27 | > NOTE: When using a real R script will need to adjust the `SBATCH` resource requests within the `job_wrapper.sh` before submitting it.
28 | 


--------------------------------------------------------------------------------
/R/job_wrapper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --mem=10G
 7 | #SBATCH --time=1-00:15:00     # 1 day and 15 minutes
 8 | #SBATCH --mail-user=useremail@address.com
 9 | #SBATCH --mail-type=ALL
10 | #SBATCH --job-name="R Example"
11 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu
12 | 
13 | # The latest R is loaded by default
14 | # However, if you want to use a diferent version, then do so here
15 | #module load R
16 | 
17 | # Use Rscript to run R script
18 | Rscript myRscript.R
19 | 


--------------------------------------------------------------------------------
/R/myRscript.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 | 
3 | # Some calculation
4 | data=1+1
5 | 
6 | # Save output to csv
7 | write.csv(data,'myResults.csv', row.names=FALSE)
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # hpcc_slurm_examples
 2 | This is a collection of Slurm examples that can be used on the HPC Cluster at the University of California, Riverside.
 3 | 
 4 | ## Credit
 5 | Some examples borrowed from:
 6 | 
 7 | [http://www.nersc.gov/users/computational-systems/cori/running-jobs/example-batch-scripts/](http://www.nersc.gov/users/computational-systems/cori/running-jobs/example-batch-scripts/)
 8 |  
 9 | [https://docs.ycrc.yale.edu/clusters-at-yale/guides/jupyter/](https://docs.ycrc.yale.edu/clusters-at-yale/guides/jupyter/)
10 | 
11 | [https://www.dursi.ca/post/spark-in-hpc-clusters.html](https://www.dursi.ca/post/spark-in-hpc-clusters.html)
12 | 


--------------------------------------------------------------------------------
/abaqus/test_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=5
 6 | #SBATCH --mem=10G
 7 | #SBATCH --time=1-00:15:00     # 1 day and 15 minutes
 8 | #SBATCH --mail-user=useremail@address.com
 9 | #SBATCH --mail-type=ALL
10 | #SBATCH --job-name="just_a_test"
11 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu
12 | 
13 | # Unset this variable, since Abaqus errors when it is set
14 | export SLURM_GTIDS=
15 | 
16 | # Load software
17 | module load abaqus
18 | 
19 | # Run abaqus
20 | abaqus job="Ball" input=./Balldrop_phillips_benchmark.inp interactive
21 | 
22 | # Other useful options for abaqus:
23 | #   parallel_mode=MPI
24 | #   mp_mode={mpi | threads}
25 | #   gpus=number-of-gpgpus
26 | #   memory=memory-size
27 | #   interactive
28 | #   scratch=scratch-dir
29 | #   timeout=co-simulation timeout value in seconds
30 | 
31 | 


--------------------------------------------------------------------------------
/alphafold/README.md:
--------------------------------------------------------------------------------
 1 | # AlphaFold
 2 | 
 3 | ## Running
 4 | 
 5 | ### Cluster
 6 | 
 7 | In order to run AlphaFold, you need to utilize the installed workflow under a `Singularity` container.
 8 | 
 9 | The [run_alphafold_cle.sh](run_alphafold_cle.sh) file is an example running AlphaFold on the HPCC.
10 | 
11 | Once downoaded and altered to your preferences, then you can just submit this script as a job, like so:
12 | 
13 | ```bash
14 | # Download
15 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/alphafold/run_alphafold_cle.sh
16 | 
17 | # Edit
18 | vim run_alphafold_cle.sh
19 | 
20 | # Submit
21 | sbatch run_alphafold_cle.sh
22 | ```
23 | 
24 | ## Jupyter
25 | 
26 | ### JupyterHub
27 | 
28 | The `Singularity` container can also be used within our [Jupyter](https://jupyter.hpcc.ucr.edu) service (limited CPU and RAM, and no GPUs).
29 | 
30 | All that is required is that you download the [kernal.json](kernal.json) file and place it under the following directory:
31 | 
32 | ```bash
33 | # Download
34 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/alphafold/kernal.json
35 | 
36 | # Create directory
37 | mkdir -p ~/.local/share/jupyter/kernels/alphafold
38 | 
39 | # Move kernel
40 | mv kernel.json ~/.local/share/jupyter/kernels/alphafold/kernel.json
41 | ```
42 | 
43 | ### Jupyter Job
44 | 
45 | If your job requires heavy resources, or GPUs, then you will have to submit your own `Jupyter` notebook server on the cluster ([Jupyter: as a job](https://github.com/ucr-hpcc/hpcc_slurm_examples/tree/master/jupyter#interactively-as-a-job)).
46 |    
47 | 
48 |    
49 | 


--------------------------------------------------------------------------------
/alphafold/kernal.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "language": "python",
 3 |  "argv": ["/opt/linux/centos/7.x/x86_64/pkgs/singularity/3.7.3/bin/singularity",
 4 |    "exec",
 5 |    "/opt/linux/centos/7.x/x86_64/pkgs/alphafold/2.0.0/alphafold.sif",
 6 |    "/opt/conda/bin/python",
 7 |    "-m",
 8 |    "ipykernel",
 9 |    "-f",
10 |    "{connection_file}"
11 |  ],
12 |  "display_name": "AlphaFold (2.0.0)"
13 | }
14 | 


--------------------------------------------------------------------------------
/alphafold/run_alphafold_cle.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/bash
  2 | #SBATCH --ntasks=24
  3 | #SBATCH -N 1 
  4 | #SBATCH --mem=48gb
  5 | #SBATCH --time=1-00:00:00
  6 | #SBATCH -p gpu
  7 | #SBATCH --gres=gpu:1
  8 | #SBATCH --out=logs/alphafold.%A.log
  9 | #SBATCH -J calp_alpha
 10 | ##SBATCH --mail-type=END # notifications for job done & fail
 11 | ##SBATCH --mail-user=cassande@ucr.edu # send-to address
 12 | ##SBATCH -D /rhome/cassande/shared/projects/Caulerpa/alphafold_test/
 13 | 
 14 | # Path to directory of supporting data, the databases!
 15 | data_dir=/srv/projects/db/alphafold 
 16 | DOWNLOAD_DIR=$data_dir
 17 | 
 18 | # Path to a directory that will store the results
 19 | output_dir="${PWD}/CLENT_006666_model"
 20 | 
 21 | # Names of models to use (a comma separated list)
 22 | model_names=model_1 
 23 | 
 24 | # Path to a FASTA file containing one sequence
 25 | fasta_path="${PWD}/query.fasta"
 26 | 
 27 | # Last template date to consider in model in (ISO-8601 format - i.e. YYYY-MM-DD)
 28 | max_template_date=2020-08-12 
 29 | 
 30 | # Enable NVIDIA runtime to run with GPUs (default: True)
 31 | use_gpu=true 
 32 | 
 33 | # OpenMM threads (default: all available cores)
 34 | openmm_threads=24 
 35 | 
 36 | # Comma separated list of devices to pass to 'CUDA_VISIBLE_DEVICES' (default: 0)
 37 | gpu_devices=0 
 38 | 
 39 | # Choose preset model configuration - no ensembling and smaller genetic database config (reduced_dbs), no ensembling and full genetic database config  (full_dbs) or full genetic database config and 8 model ensemblings (casp14)
 40 | preset=full_dbs
 41 | 
 42 | # Run multiple JAX model evaluations to obtain a timing that excludes the compilation time, which should be more indicative of the time required for inferencing many proteins (default: 'False')
 43 | benchmark=false 
 44 | 
 45 | # Manually set CUDA devices
 46 | #export SINGULARITYENV_CUDA_VISIBLE_DEVICES=-1
 47 | #if [[ "$use_gpu" == true ]] ; then
 48 | #    export SINGULARITYENV_CUDA_VISIBLE_DEVICES=0
 49 | 
 50 | #    if [[ "$gpu_devices" ]] ; then
 51 | #        export SINGULARITYENV_CUDA_VISIBLE_DEVICES=$gpu_devices
 52 | #    fi
 53 | #fi
 54 | 
 55 | # OpenMM threads control
 56 | #if [[ "$openmm_threads" ]] ; then
 57 | #    export SINGULARITYENV_OPENMM_CPU_THREADS=$openmm_threads
 58 | #fi
 59 | 
 60 | # TensorFlow control
 61 | #export SINGULARITYENV_TF_FORCE_UNIFIED_MEMORY='1'
 62 | 
 63 | # JAX control
 64 | #export SINGULARITYENV_XLA_PYTHON_CLIENT_MEM_FRACTION='4.0'
 65 | 
 66 | # Path and user config (change me if required)
 67 | bfd_database_path=$data_dir/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
 68 | small_bfd_database_path=$data_dir/small_bfd/bfd-first_non_consensus_sequences.fasta
 69 | mgnify_database_path=$data_dir/mgnify/mgy_clusters_2018_12.fa
 70 | template_mmcif_dir=$data_dir/pdb_mmcif/mmcif_files/
 71 | obsolete_pdbs_path=$data_dir/pdb_mmcif/obsolete.dat
 72 | pdb70_database_path=$data_dir/pdb70/pdb70
 73 | uniclust30_database_path=$data_dir/uniclust30/uniclust30_2018_08/uniclust30_2018_08
 74 | uniref90_database_path=$data_dir/uniref90/uniref90.fasta
 75 | 
 76 | # Binary path defaults should work within singularity
 77 | #hhblits_binary_path=$(which hhblits)
 78 | #hhsearch_binary_path=$(which hhsearch)
 79 | #jackhmmer_binary_path=$(which jackhmmer)
 80 | #kalign_binary_path=$(which kalign)
 81 | 
 82 | # Load alphafold
 83 | module load alphafold/2.1.2
 84 | 
 85 | # Load scratch
 86 | module load workspace/scratch
 87 | export SINGULARITY_BIND="${SCRATCH}:/tmp"
 88 | 
 89 | # Run alphafold container with nvidia support
 90 | singularity run --bind ${data_dir} --nv $ALPHAFOLD_SING \
 91 |     --bfd_database_path=$bfd_database_path \
 92 |     --mgnify_database_path=$mgnify_database_path \
 93 |     --template_mmcif_dir=$template_mmcif_dir \
 94 |     --obsolete_pdbs_path=$obsolete_pdbs_path \
 95 |     --pdb70_database_path=$pdb70_database_path \
 96 |     --uniclust30_database_path=$uniclust30_database_path \
 97 |     --uniref90_database_path=$uniref90_database_path \
 98 |     --data_dir=$data_dir \
 99 |     --output_dir=$output_dir \
100 |     --fasta_paths=$fasta_path \
101 |     --model_names=$model_names \
102 |     --max_template_date=$max_template_date \
103 |     --preset=$preset \
104 |     --benchmark=$benchmark \
105 |     --logtostderr
106 | 
107 | 


--------------------------------------------------------------------------------
/ansys/HKHLR-HowTo-Ansys_Fluent.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/ansys/HKHLR-HowTo-Ansys_Fluent.pdf


--------------------------------------------------------------------------------
/ansys/README.md:
--------------------------------------------------------------------------------
1 | # Ansys Examples
2 | Each `*.sh` file here can be used as a template.
3 | Modify as needed and then submitted to the cluster using the `sbatch` command.
4 | 


--------------------------------------------------------------------------------
/ansys/run_cfx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1             # 1 node
 4 | #SBATCH --ntasks=16           # 16 Cores
 5 | #SBATCH --mem-per-cpu=50G    # 50 GB of RAM
 6 | #SBATCH --time=7-00:00:00     # 7 days
 7 | #SBATCH --output=my.stdout    # Standard output file
 8 | #SBATCH --mail-user=useremail@address.com # Your email
 9 | #SBATCH --mail-type=ALL       # Send mail on start,fail,complete
10 | #SBATCH --job-name="CFX Job"  # Name of Job
11 | #SBATCH -p epyc              # Use epyc nodes 
12 | 
13 | # Load samtools
14 | module load ansys
15 | 
16 | # Do work 
17 | cfx5solve -partition 16 -s 51200M -scat 1.5x -def Transient.def -ini Transient.res
18 | 
19 | 


--------------------------------------------------------------------------------
/ansys/run_fluent_gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1                    # 1 intel node
 4 | #SBATCH --ntasks=16                  # 16 Cores
 5 | #SBATCH --mem-per-cpu=50G            # 50 GB of RAM
 6 | #SBATCH --time=7-00:00:00            # 7 days
 7 | #SBATCH --output=my.stdout           # Standard output file
 8 | #SBATCH --mail-user=email@domain.com # Your email
 9 | #SBATCH --mail-type=ALL              # Send mail on start,fail,complete
10 | #SBATCH --job-name="Fluent Job"      # Name of Job
11 | #SBATCH -p gpu                       # Use gpu nodes
12 | #SBATCH --gres=gpu:1                 # Use 1 gpu
13 | 
14 | # Load ansys
15 | module load ansys
16 | 
17 | # ToDo
18 | # Need examples
19 | # Here is a good reference:
20 | #https://www.sharcnet.ca/Software/Ansys/16.2.3/en-us/help/flu_ug/flu_ug_sec_parallel_unix_command.html
21 | 
22 | # Usage
23 | #fluent version -tnprocs [-gpgpu=ngpgpus ] [-pinterconnect ] [-mpi=mpi_type ] -cnf=hosts_file
24 | 
25 | 


--------------------------------------------------------------------------------
/ansys/run_fluent_mpi_journal.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --ntasks=64
 3 | #SBATCH -t 20:00:00
 4 | #SBATCH --mem-per-cpu=6000
 5 | 
 6 | module load intel
 7 | module load ansys
 8 | 
 9 | #Get an unique temporary filename to use for our nodelist
10 | FLUENTNODEFILE=$(mktemp)
11 | 
12 | #Output the nodes to our nodelist file
13 | scontrol show hostnames > $FLUENTNODEFILE
14 | 
15 | #Display to us the nodes being used
16 | echo "Running on nodes:"
17 | cat $FLUENTNODEFILE
18 | 
19 | #Run fluent with the requested number of tasks on the assigned nodes
20 | fluent 3ddp -g -t $SLURM_NTASKS -mpi=intel -ssh -cnf="$FLUENTNODEFILE" -i YOUR_JOU_FILE
21 | 
22 | #Clean up
23 | rm $FLUENTNODEFILE
24 | 


--------------------------------------------------------------------------------
/array/README.md:
--------------------------------------------------------------------------------
 1 | # Array Job
 2 | 
 3 | You can consider using an array job if you want to submit many jobs that look identical, except for the input.
 4 | 
 5 | 
 6 | First write a job script that would work for a single input, removing a bash loop for example.
 7 | Then Use the `${SLURM_ARRAY_TASK_ID}` environment variable to control which input you should be processing.
 8 | 
 9 | After that you need to submit the job using the `array` option, like so:
10 | 
11 | ```bash
12 | sbatch --array=1-10 test_job.sh
13 | ```
14 | 
15 | This will copy the job into 10 tasks and the numbers 1 through 10 will each be used for the `$SLURM_ARRAY_TASK_ID` varaible within each task.
16 | 
17 | You can also control how many tasks are processed at the same time with the following syntax:
18 | 
19 | ```bash
20 | sbatch --array=1-10%2 test_job.sh
21 | ```
22 | 
23 | This will only allow 2 out of 10 tasks to run at the same time.
24 | 
25 | # Examples
26 | 
27 | Here is an example of a [test_job.sh](test_job.sh) submission script and an [inputs.txt](inputs.txt) file to demonstrate how the `{SLURM_ARRAY_TASK_ID}` environment variable can be used to pull the correct input.
28 | 
29 | 


--------------------------------------------------------------------------------
/array/command_line:
--------------------------------------------------------------------------------
1 | sbatch --array=1-10 -N 1 test_job.sh
2 | 


--------------------------------------------------------------------------------
/array/inputs.txt:
--------------------------------------------------------------------------------
 1 | a
 2 | b
 3 | c
 4 | d
 5 | e
 6 | f
 7 | g
 8 | h
 9 | i
10 | j
11 | 


--------------------------------------------------------------------------------
/array/slurm-406233_1.out:
--------------------------------------------------------------------------------
1 | Processing task 1
2 | a
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_10.out:
--------------------------------------------------------------------------------
1 | Processing task 10
2 | j
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_2.out:
--------------------------------------------------------------------------------
1 | Processing task 2
2 | b
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_3.out:
--------------------------------------------------------------------------------
1 | Processing task 3
2 | c
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_4.out:
--------------------------------------------------------------------------------
1 | Processing task 4
2 | d
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_5.out:
--------------------------------------------------------------------------------
1 | Processing task 5
2 | e
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_6.out:
--------------------------------------------------------------------------------
1 | Processing task 6
2 | f
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_7.out:
--------------------------------------------------------------------------------
1 | Processing task 7
2 | g
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_8.out:
--------------------------------------------------------------------------------
1 | Processing task 8
2 | h
3 | 


--------------------------------------------------------------------------------
/array/slurm-406233_9.out:
--------------------------------------------------------------------------------
1 | Processing task 9
2 | i
3 | 


--------------------------------------------------------------------------------
/array/test_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --nodes=1
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --mem-per-cpu=1G
 5 | #SBATCH --time=0-00:15:00     # 15 minutes
 6 | ##SBATCH --output=my.stdout
 7 | ##SBATCH --mail-user=jhayes@ucr.edu
 8 | ##SBATCH --mail-type=ALL
 9 | ##SBATCH --job-name="just_a_test"
10 | 
11 | echo "Processing task ${SLURM_ARRAY_TASK_ID}"
12 | 
13 | # Get input value from file
14 | input=$(sed -n "${SLURM_ARRAY_TASK_ID}p" inputs.txt)
15 | 
16 | # Run a command with input, replace "echo" with real command
17 | echo $input
18 | 


--------------------------------------------------------------------------------
/basic/slurm-1217.out:
--------------------------------------------------------------------------------
1 | Mon Dec 12 13:34:06 PST 2016
2 | i01
3 | 


--------------------------------------------------------------------------------
/basic/slurm-1218.out:
--------------------------------------------------------------------------------
1 | Mon Dec 12 13:35:24 PST 2016
2 | i02
3 | 


--------------------------------------------------------------------------------
/basic/slurm-1222.out:
--------------------------------------------------------------------------------
  1 | Mon Dec 12 14:22:24 PST 2016
  2 | Mon Dec 12 14:22:24 PST 2016
  3 | Mon Dec 12 14:22:24 PST 2016
  4 | Mon Dec 12 14:22:24 PST 2016
  5 | Mon Dec 12 14:22:24 PST 2016
  6 | Mon Dec 12 14:22:24 PST 2016
  7 | Mon Dec 12 14:22:24 PST 2016
  8 | Mon Dec 12 14:22:24 PST 2016
  9 | Mon Dec 12 14:22:24 PST 2016
 10 | Mon Dec 12 14:22:24 PST 2016
 11 | Mon Dec 12 14:22:24 PST 2016
 12 | Mon Dec 12 14:22:24 PST 2016
 13 | Mon Dec 12 14:22:24 PST 2016
 14 | Mon Dec 12 14:22:24 PST 2016
 15 | Mon Dec 12 14:22:24 PST 2016
 16 | Mon Dec 12 14:22:24 PST 2016
 17 | Mon Dec 12 14:22:24 PST 2016
 18 | Mon Dec 12 14:22:24 PST 2016
 19 | Mon Dec 12 14:22:24 PST 2016
 20 | Mon Dec 12 14:22:24 PST 2016
 21 | Mon Dec 12 14:22:24 PST 2016
 22 | Mon Dec 12 14:22:24 PST 2016
 23 | Mon Dec 12 14:22:24 PST 2016
 24 | Mon Dec 12 14:22:24 PST 2016
 25 | Mon Dec 12 14:22:24 PST 2016
 26 | Mon Dec 12 14:22:24 PST 2016
 27 | Mon Dec 12 14:22:24 PST 2016
 28 | Mon Dec 12 14:22:24 PST 2016
 29 | Mon Dec 12 14:22:24 PST 2016
 30 | Mon Dec 12 14:22:24 PST 2016
 31 | Mon Dec 12 14:22:24 PST 2016
 32 | Mon Dec 12 14:22:24 PST 2016
 33 | Mon Dec 12 14:22:24 PST 2016
 34 | Mon Dec 12 14:22:24 PST 2016
 35 | Mon Dec 12 14:22:24 PST 2016
 36 | Mon Dec 12 14:22:24 PST 2016
 37 | Mon Dec 12 14:22:24 PST 2016
 38 | Mon Dec 12 14:22:24 PST 2016
 39 | Mon Dec 12 14:22:24 PST 2016
 40 | Mon Dec 12 14:22:24 PST 2016
 41 | Mon Dec 12 14:22:24 PST 2016
 42 | Mon Dec 12 14:22:24 PST 2016
 43 | Mon Dec 12 14:22:24 PST 2016
 44 | Mon Dec 12 14:22:24 PST 2016
 45 | Mon Dec 12 14:22:24 PST 2016
 46 | Mon Dec 12 14:22:24 PST 2016
 47 | Mon Dec 12 14:22:24 PST 2016
 48 | Mon Dec 12 14:22:24 PST 2016
 49 | Mon Dec 12 14:22:24 PST 2016
 50 | Mon Dec 12 14:22:24 PST 2016
 51 | Mon Dec 12 14:22:24 PST 2016
 52 | Mon Dec 12 14:22:24 PST 2016
 53 | Mon Dec 12 14:22:24 PST 2016
 54 | Mon Dec 12 14:22:24 PST 2016
 55 | Mon Dec 12 14:22:24 PST 2016
 56 | Mon Dec 12 14:22:24 PST 2016
 57 | Mon Dec 12 14:22:24 PST 2016
 58 | Mon Dec 12 14:22:24 PST 2016
 59 | Mon Dec 12 14:22:24 PST 2016
 60 | Mon Dec 12 14:22:24 PST 2016
 61 | Mon Dec 12 14:22:24 PST 2016
 62 | Mon Dec 12 14:22:24 PST 2016
 63 | Mon Dec 12 14:22:24 PST 2016
 64 | Mon Dec 12 14:22:24 PST 2016
 65 | slurmstepd-i02: error: execve(): slep: No such file or directory
 66 | slurmstepd-i02: error: execve(): slep: No such file or directory
 67 | slurmstepd-i02: error: execve(): slep: No such file or directory
 68 | slurmstepd-i02: error: execve(): slep: No such file or directory
 69 | slurmstepd-i02: error: execve(): slep: No such file or directory
 70 | slurmstepd-i02: error: execve(): slep: No such file or directory
 71 | slurmstepd-i02: error: execve(): slep: No such file or directory
 72 | slurmstepd-i02: error: execve(): slep: No such file or directory
 73 | slurmstepd-i02: error: execve(): slep: No such file or directory
 74 | slurmstepd-i02: error: execve(): slep: No such file or directory
 75 | slurmstepd-i02: error: execve(): slep: No such file or directory
 76 | slurmstepd-i02: error: execve(): slep: No such file or directory
 77 | slurmstepd-i02: error: execve(): slep: No such file or directory
 78 | slurmstepd-i02: error: execve(): slep: No such file or directory
 79 | slurmstepd-i02: error: execve(): slep: No such file or directory
 80 | slurmstepd-i02: error: execve(): slep: No such file or directory
 81 | slurmstepd-i02: error: execve(): slep: No such file or directory
 82 | slurmstepd-i02: error: execve(): slep: No such file or directory
 83 | slurmstepd-i02: error: execve(): slep: No such file or directory
 84 | slurmstepd-i02: error: execve(): slep: No such file or directory
 85 | slurmstepd-i02: error: execve(): slep: No such file or directory
 86 | slurmstepd-i02: error: execve(): slep: No such file or directory
 87 | slurmstepd-i02: error: execve(): slep: No such file or directory
 88 | slurmstepd-i02: error: execve(): slep: No such file or directory
 89 | slurmstepd-i02: error: execve(): slep: No such file or directory
 90 | slurmstepd-i02: error: execve(): slep: No such file or directory
 91 | slurmstepd-i02: error: execve(): slep: No such file or directory
 92 | slurmstepd-i02: error: execve(): slep: No such file or directory
 93 | slurmstepd-i02: error: execve(): slep: No such file or directory
 94 | slurmstepd-i02: error: execve(): slep: No such file or directory
 95 | slurmstepd-i02: error: execve(): slep: No such file or directory
 96 | slurmstepd-i02: error: execve(): slep: No such file or directory
 97 | slurmstepd-i02: error: execve(): slep: No such file or directory
 98 | slurmstepd-i02: error: execve(): slep: No such file or directory
 99 | slurmstepd-i02: error: execve(): slep: No such file or directory
100 | slurmstepd-i02: error: execve(): slep: No such file or directory
101 | slurmstepd-i02: error: execve(): slep: No such file or directory
102 | slurmstepd-i02: error: execve(): slep: No such file or directory
103 | slurmstepd-i02: error: execve(): slep: No such file or directory
104 | slurmstepd-i02: error: execve(): slep: No such file or directory
105 | slurmstepd-i02: error: execve(): slep: No such file or directory
106 | slurmstepd-i02: error: execve(): slep: No such file or directory
107 | slurmstepd-i02: error: execve(): slep: No such file or directory
108 | slurmstepd-i02: error: execve(): slep: No such file or directory
109 | slurmstepd-i02: error: execve(): slep: No such file or directory
110 | slurmstepd-i02: error: execve(): slep: No such file or directory
111 | slurmstepd-i02: error: execve(): slep: No such file or directory
112 | slurmstepd-i02: error: execve(): slep: No such file or directory
113 | slurmstepd-i02: error: execve(): slep: No such file or directory
114 | slurmstepd-i02: error: execve(): slep: No such file or directory
115 | slurmstepd-i02: error: execve(): slep: No such file or directory
116 | slurmstepd-i02: error: execve(): slep: No such file or directory
117 | slurmstepd-i02: error: execve(): slep: No such file or directory
118 | slurmstepd-i02: error: execve(): slep: No such file or directory
119 | slurmstepd-i02: error: execve(): slep: No such file or directory
120 | slurmstepd-i02: error: execve(): slep: No such file or directory
121 | slurmstepd-i02: error: execve(): slep: No such file or directory
122 | slurmstepd-i02: error: execve(): slep: No such file or directory
123 | slurmstepd-i02: error: execve(): slep: No such file or directory
124 | slurmstepd-i02: error: execve(): slep: No such file or directory
125 | slurmstepd-i02: error: execve(): slep: No such file or directory
126 | slurmstepd-i02: error: execve(): slep: No such file or directory
127 | slurmstepd-i02: error: execve(): slep: No such file or directory
128 | slurmstepd-i02: error: execve(): slep: No such file or directory
129 | srun: error: i02: tasks 0-63: Exited with exit code 2
130 | i02
131 | i02
132 | i02
133 | i02
134 | i02
135 | i02
136 | i02
137 | i02
138 | i02
139 | i02
140 | i02
141 | i02
142 | i02
143 | i02
144 | i02
145 | i02
146 | i02
147 | i02
148 | i02
149 | i02
150 | i02
151 | i02
152 | i02
153 | i02
154 | i02
155 | i02
156 | i02
157 | i02
158 | i02
159 | i02
160 | i02
161 | i02
162 | i02
163 | i02
164 | i02
165 | i02
166 | i02
167 | i02
168 | i02
169 | i02
170 | i02
171 | i02
172 | i02
173 | i02
174 | i02
175 | i02
176 | i02
177 | i02
178 | i02
179 | i02
180 | i02
181 | i02
182 | i02
183 | i02
184 | i02
185 | i02
186 | i02
187 | i02
188 | i02
189 | i02
190 | i02
191 | i02
192 | i02
193 | i02
194 | 


--------------------------------------------------------------------------------
/basic/slurm-666.out:
--------------------------------------------------------------------------------
1 | Thu Nov 24 01:36:29 PST 2016
2 | /rhome/jhayes/iigb/slurm/basic
3 | i01
4 | 


--------------------------------------------------------------------------------
/basic/slurm-667.out:
--------------------------------------------------------------------------------
1 | Thu Nov 24 01:37:26 PST 2016
2 | Currently Loaded Modulefiles:
3 |   1) vim/7.4.1952                  6) ggobi/2.1.11
4 |   2) tmux/2.2                      7) R/3.3.0
5 |   3) python/2.7.5                  8) perl/5.20.2
6 |   4) slurm/16.05.4                 9) less-highlight/1.0
7 |   5) openmpi/2.0.1-slurm-16.05.4  10) iigb_utilities/1
8 | i01
9 | 


--------------------------------------------------------------------------------
/basic/slurm-668.out:
--------------------------------------------------------------------------------
1 | Thu Nov 24 01:37:55 PST 2016
2 | Currently Loaded Modulefiles:
3 |   1) vim/7.4.1952                  6) ggobi/2.1.11
4 |   2) tmux/2.2                      7) R/3.3.0
5 |   3) python/2.7.5                  8) perl/5.20.2
6 |   4) slurm/16.05.4                 9) less-highlight/1.0
7 |   5) openmpi/2.0.1-slurm-16.05.4  10) iigb_utilities/1
8 | i01
9 | 


--------------------------------------------------------------------------------
/basic/slurm-669.out:
--------------------------------------------------------------------------------
 1 | Thu Nov 24 01:38:24 PST 2016
 2 | /rhome/jhayes/iigb/slurm/basic
 3 | Currently Loaded Modulefiles:
 4 |   1) vim/7.4.1952                  6) ggobi/2.1.11
 5 |   2) tmux/2.2                      7) R/3.3.0
 6 |   3) python/2.7.5                  8) perl/5.20.2
 7 |   4) slurm/16.05.4                 9) less-highlight/1.0
 8 |   5) openmpi/2.0.1-slurm-16.05.4  10) iigb_utilities/1
 9 | i01
10 | 


--------------------------------------------------------------------------------
/basic/statsdept_test_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --nodes=1
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --cpus-per-task=1
 5 | #SBATCH -p statsdept
 6 | #SBATCH --mem-per-cpu=1G
 7 | #SBATCH --time=0-00:15:00     # 15 minutes
 8 | ##SBATCH --mail-user=email@address.com
 9 | ##SBATCH --mail-type=ALL
10 | ##SBATCH --job-name="just_a_test"
11 | 
12 | date
13 | sleep 60
14 | hostname
15 | 


--------------------------------------------------------------------------------
/basic/test_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --nodes=1
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --cpus-per-task=1
 5 | #SBATCH -p epyc
 6 | #SBATCH --mem-per-cpu=1G
 7 | #SBATCH --time=0-00:15:00     # 15 minutes
 8 | ##SBATCH --mail-user=email@address.com
 9 | ##SBATCH --mail-type=ALL
10 | ##SBATCH --job-name="just_a_test"
11 | 
12 | date
13 | sleep 60
14 | hostname
15 | 


--------------------------------------------------------------------------------
/basic/test_pbs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #PBS -N test_job
3 | #PBS -l nodes=1,walltime=01:00:00
4 | #PBS -q batch
5 | 
6 | date
7 | sleep 60
8 | hostname
9 | 


--------------------------------------------------------------------------------
/blast/test_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -c 10
 4 | #SBATCH --mem=10g
 5 | #SBATCH --time=2:00:00
 6 | #SBATCH -p short
 7 | 
 8 | module load ncbi-blast
 9 | cd ~/bigdata/Projects/blast_fasta/
10 | blastp -num_threads 10 dsg sdgsdg dhfdh
11 | 
12 | 


--------------------------------------------------------------------------------
/castep/job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -N 1
 3 | #SBATCH -c 1
 4 | #SBATCH -n 4
 5 | #SBATCH -p short
 6 | #SBATCH --time=10:00
 7 | 
 8 | # Ensure cleanworking dir
 9 | rm -rf ~/bigdata/Projects/castep
10 | mkdir -p ~/bigdata/Projects/castep
11 | 
12 | # Move to working dir
13 | cd ~/bigdata/Projects/castep
14 | 
15 | # Get data
16 | wget http://www.castep.org/files/Si2.tgz
17 | 
18 | # Extract data
19 | tar -xf Si2.tgz
20 | 
21 | # Move to data
22 | cd Si2
23 | 
24 | # Clear default modules
25 | module purge
26 | # Load common modules
27 | module load slurm hpcc_user_utils
28 | 
29 | # Load module based on CPU type
30 | if [[ $(cpu_type) == "intel" ]] || [[ $(cpu_type) == "xeon" ]]; then
31 |     echo "Loading intel"
32 |     module load castep/19.11_intel-2017
33 | else
34 |     echo "Loading gcc"
35 |     module load castep/19.11_gcc-8.3.0
36 | fi
37 | 
38 | # Run with mpi
39 | mpirun -n 4 castep.mpi Si2
40 | 


--------------------------------------------------------------------------------
/checkpoint/blcr/slurm-2963303.out:
--------------------------------------------------------------------------------
 1 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
 2 | 1
 3 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
 4 | 2
 5 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
 6 | 3
 7 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
 8 | 4
 9 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
10 | 5
11 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
12 | 6
13 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
14 | 7
15 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
16 | 8
17 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
18 | 9
19 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
20 | 10
21 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
22 | 11
23 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
24 | 12
25 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
26 | 13
27 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
28 | 14
29 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory
30 | 15
31 | slurmstepd-i09: error: *** JOB 2963303 ON i09 CANCELLED AT 2018-08-08T16:56:23 ***
32 | 


--------------------------------------------------------------------------------
/checkpoint/blcr/slurm-2963306.out:
--------------------------------------------------------------------------------
 1 | srun_cr: fatal: failed to initialize libcr: Function not implemented
 2 | 1
 3 | srun_cr: fatal: failed to initialize libcr: Function not implemented
 4 | 2
 5 | srun_cr: fatal: failed to initialize libcr: Function not implemented
 6 | 3
 7 | srun_cr: fatal: failed to initialize libcr: Function not implemented
 8 | 4
 9 | srun_cr: fatal: failed to initialize libcr: Function not implemented
10 | 5
11 | srun_cr: fatal: failed to initialize libcr: Function not implemented
12 | 6
13 | slurmstepd-i25: error: *** JOB 2963306 ON i25 CANCELLED AT 2018-08-08T17:05:47 ***
14 | 


--------------------------------------------------------------------------------
/checkpoint/blcr/test_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | module load blcr
 4 | 
 5 | count=0
 6 | while [ 1 -eq 1 ]; do
 7 |     count=$(($count+1))
 8 |     sleep $count
 9 |     srun_cr
10 |     echo "$count"
11 | done
12 | 
13 | echo "Completed $count"
14 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/README:
--------------------------------------------------------------------------------
 1 | When using DMTCP, you will need two submit scripts:  one for
 2 | launching under checkpoint control, and one for restarting
 3 | from a crashed job.
 4 | 
 5 | For example, for SLURM, you would modify slurm_launch.job to change
 6 | "<your_binary>" in the line for dmtcp_launch.  The default script
 7 | does not automatically checkpoint.  Search on "dmtcp_command" for
 8 | instructions on how to use it to manually request a checkpoint.
 9 | Do "dmtcp_command -h" to see the options for "dmtcp_command".
10 | Alternatively, search on "start_coordinator" in slurm_launch.job,
11 | and add "-i 3600" to create a checkpoint every 3600 seconds (every hour).
12 | "dmtcp_coordinator -h" and "dmtcp_launch -h" also exist.
13 | When ready, execute the SLURM command:
14 |   sbatch slurm_launch.job
15 | 
16 | Upon checkpointing, a script, dmtcp_restart_script.sh, will be saved
17 | in the local directory, along with the checkpoint image files.
18 | 
19 | When restarting, slurm_rstr.job assumes that the script
20 | dmtcp_restart_script.sh is in the local directory.
21 | The default for the restart script is for manually requested
22 | checkpointing.  See the above instructions and "dmtcp_restart -h"
23 | for setting checkpoints at regular time intervals.  Modify
24 | slurm_rstr.job if automatic checkpointing is desired.
25 | Finally, it suffices to run:
26 |   sbatch slurm_rstr.job
27 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ccr_buffalo/README.txt:
--------------------------------------------------------------------------------
1 | This scipts was provided by L. Shawn Matott 
2 | Center for Computational Research (CCR) University of Buffalo 
3 | while deploying DMTCP on CCR rush cluster
4 | (http://ccr.buffalo.edu/support/research_facilities/general_compute.html)
5 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ccr_buffalo/slurm_dmtcp_ompi_tcp:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #SBATCH --time=01:00:00
  3 | #SBATCH --nodes=2
  4 | #SBATCH --cpus-per-task=1
  5 | #SBATCH --tasks-per-node=8
  6 | #SBATCH --mail-user=your_user_name@buffalo.edu
  7 | #SBATCH --mail-type=END
  8 | #SBATCH --job-name=dmtcp
  9 | #SBATCH --output=output.out
 10 | #SBATCH --error=output.err
 11 | #SBATCH --partition=debug
 12 | 
 13 | # spit out some basic SLURM information
 14 | echo "SLURM_JOBID         = "$SLURM_JOB_ID
 15 | echo "SLURM_SUBMIT_DIR    = "$SLURM_SUBMIT_DIR
 16 | echo "SLURM_NODELIST      = "`nodeset -e $SLURM_NODELIST`
 17 | echo "SLURM_NPROCS        = "$SLURM_NPROCS
 18 | echo "SLURM_NNODES        = "$SLURM_NNODES
 19 | echo "SLURM_CPUS_PER_TASK = "$SLURM_CPUS_PER_TASK
 20 | echo "SLURMTMPDIR         = "$SLURMTMPDIR 
 21 | 
 22 | module load dmtcp/2.2.1-r2777
 23 | #module load openmpi/gcc-4.4.7/1.8.0
 24 | module load  openmpi/gcc-4.4.6/1.6.5
 25 | module list
 26 | ulimit -s unlimited
 27 | 
 28 | #
 29 | # How long to run the application before checkpointing.
 30 | # After checkpointing, the application will be shut down. 
 31 | # Users will typically want to set this to occur a bit before 
 32 | # the job's walltime expires.
 33 | #
 34 | CHECKPOINT_TIME=1m
 35 | 
 36 | # EXE is the name of the application/executable
 37 | # ARGS is any command-line args
 38 | # OUTFILE is the file where stdout will be redirected
 39 | # ERRFILE if the file where stderr will be redirected
 40 | EXE=$SLURM_SUBMIT_DIR/MonteCarloIntegration
 41 | ARGS="1200000000 0 8"
 42 | OUTFILE=Integrals.out
 43 | ERRFILE=Integrals.err
 44 | 
 45 | # This script with auto-sense whether to perform a checkpoint
 46 | # or restart operation. Set FORCE_CHECKPOINT to yes if you 
 47 | # DO NOT want to restart even if a restart script is located 
 48 | # in the working directory.
 49 | FORCE_CHECKPOINT=No
 50 | 
 51 | # *************************************************************************************************
 52 | # *************************************************************************************************
 53 | # Users should not have to change anything beyond this point!
 54 | # *************************************************************************************************
 55 | # *************************************************************************************************
 56 | export DMTCP_TMPDIR=$SLURM_SUBMIT_DIR
 57 | 
 58 | # configure openmpi environment to use tcp
 59 | export OMPI_MCA_mtl=^psm
 60 | export OMPI_MCA_btl=self,tcp
 61 | 
 62 | # =================================================================================================
 63 | # start_coordinator() 
 64 | #   Routine provided by Artem Polyakov
 65 | #
 66 | # Start dmtcp coordinator on launching node. Free TCP port is automatically allocated.
 67 | # this function creates dmtcp_command.$JOBID script that serves like a wrapper around
 68 | # dmtcp_command that tunes it on exact dmtcp_coordinator (it's hostname and port)
 69 | # instead of typing "dmtcp_command -h <coordinator hostname> -p <coordinator port> <command>"
 70 | # you just type "dmtcp_command.$JOBID <command>" and talk to coordinator of JOBID job
 71 | # =================================================================================================
 72 | start_coordinator()
 73 | {
 74 |     fname=dmtcp_command.$SLURM_JOBID
 75 |     h=`hostname`
 76 |     echo "dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1"
 77 |     dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1
 78 |     
 79 |     while true; do 
 80 |         if [ -f "$fname" ]; then
 81 |             p=`cat $fname`
 82 |             if [ -n "$p" ]; then
 83 |                 # try to communicate ? dmtcp_command -p $p l
 84 |                 break
 85 |             fi
 86 |         fi
 87 |     done
 88 |     
 89 |     # Create dmtcp_command wrapper for easy communication with coordinator
 90 |     p=`cat $fname`
 91 |     chmod +x $fname
 92 |     echo "#!/bin/bash" > $fname
 93 |     echo >> $fname
 94 |     echo "export PATH=$PATH" >> $fname
 95 |     echo "export DMTCP_HOST=$h" >> $fname
 96 |     echo "export DMTCP_COORD_PORT=$p" >> $fname
 97 |     echo "dmtcp_command \$@" >> $fname
 98 | 
 99 |     # Setup local environment for DMTCP
100 |     export DMTCP_COORD_HOST=$h
101 |     export DMTCP_COORD_PORT=$p
102 | }
103 | 
104 | echo "Launching dmtcp coordintor daemon"
105 | echo "start_coordinator --exit-after-ckpt"
106 | start_coordinator --exit-after-ckpt
107 | 
108 | # convert checkpoint time to seconds
109 | nTics=`echo $CHECKPOINT_TIME | \
110 | sed 's/m/ \* 60/g' | \
111 | sed 's/h/ \* 3600/g' | \
112 | sed 's/d/ \* 86400/g' | \
113 | sed 's/s//g' | \
114 | bc | \
115 | awk '{ printf("%d\n", $1); }'`
116 | echo "Checkpointing will commence after $nTics seconds"
117 | 
118 | tic=`date +%s`
119 | if [[ -f ./dmtcp_restart_script.sh ]] && [[ "${FORCE_CHECKPOINT}" == "No" ]]; then
120 |   echo "Restarting application under dmtcp control"
121 |   echo "./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>$OUTFILE 2>>$ERRFILE"
122 |   ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>${OUTFILE}.${SLURM_JOB_ID} 2>>${ERRFILE}.${SLURM_JOB_ID}
123 |   cat ${OUTFILE}.${SLURM_JOB_ID} >> ${OUTFILE}
124 |   rm -f ${OUTFILE}.${SLURM_JOB_ID}
125 |   cat ${ERRFILE}.${SLURM_JOB_ID} >> ${ERRFILE}
126 |   rm -f ${ERRFILE}.${SLURM_JOB_ID}
127 | else
128 |   # clear output and error files
129 |   echo "" > ${OUTFILE}
130 |   echo "" > ${ERRFILE}
131 |   echo "Launching application under dmtcp control"
132 |   echo "srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}"
133 |   srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}
134 | fi
135 | toc=`date +%s`
136 | 
137 | elapsedTime=`expr $toc - $tic`
138 | overheadTime=`expr $elapsedTime - $nTics`
139 | if [ "$overheadTime" -lt "0" ]; then
140 |   overheadTime=0
141 |   echo "All done - no checkpoint was required."
142 | else
143 |   echo "All done - checkpoint files are listed below:"
144 |   ls -1 *.dmtcp
145 | fi
146 | 
147 | echo "Elapsed Time = $elapsedTime seconds"
148 | echo "Checkpoint Overhead = $overheadTime seconds"
149 | 
150 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ccr_buffalo/slurm_dmtcp_openmp:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #SBATCH --time=01:00:00
  3 | #SBATCH --nodes=1
  4 | #SBATCH --cpus-per-task=8
  5 | #SBATCH --tasks-per-node=1
  6 | #SBATCH --mail-user=your_user_name@buffalo.edu
  7 | #SBATCH --mail-type=END
  8 | #SBATCH --job-name=dmtcp
  9 | #SBATCH --output=output.out
 10 | #SBATCH --error=output.err
 11 | #SBATCH --partition=debug
 12 | 
 13 | # spit out some basic SLURM information
 14 | echo "SLURM_JOBID         = "$SLURM_JOB_ID
 15 | echo "SLURM_SUBMIT_DIR    = "$SLURM_SUBMIT_DIR
 16 | echo "SLURM_NODELIST      = "`nodeset -e $SLURM_NODELIST`
 17 | echo "SLURM_NPROCS        = "$SLURM_NPROCS
 18 | echo "SLURM_NNODES        = "$SLURM_NNODES
 19 | echo "SLURM_CPUS_PER_TASK = "$SLURM_CPUS_PER_TASK
 20 | echo "SLURMTMPDIR         = "$SLURMTMPDIR 
 21 | 
 22 | # configure OpenMP environment
 23 | export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
 24 | 
 25 | module load dmtcp/2.2.1-r2777
 26 | module list
 27 | ulimit -s unlimited
 28 | 
 29 | #
 30 | # How long to run the application before checkpointing.
 31 | # After checkpointing, the application will be shut down. 
 32 | # Users will typically want to set this to occur a bit before 
 33 | # the job's walltime expires.
 34 | #
 35 | CHECKPOINT_TIME=1m
 36 | 
 37 | # EXE is the name of the application/executable
 38 | # ARGS is any command-line args
 39 | # OUTFILE is the file where stdout will be redirected
 40 | # ERRFILE if the file where stderr will be redirected
 41 | EXE=$SLURM_SUBMIT_DIR/MonteCarloIntegration
 42 | ARGS="1200000000 0 8"
 43 | OUTFILE=Integrals.out
 44 | ERRFILE=Integrals.err
 45 | 
 46 | # This script with auto-sense whether to perform a checkpoint
 47 | # or restart operation. Set FORCE_CHECKPOINT to yes if you 
 48 | # DO NOT want to restart even if a restart script is located 
 49 | # in the working directory.
 50 | FORCE_CHECKPOINT=No
 51 | 
 52 | # *************************************************************************************************
 53 | # *************************************************************************************************
 54 | # Users should not have to change anything beyond this point!
 55 | # *************************************************************************************************
 56 | # *************************************************************************************************
 57 | export DMTCP_TMPDIR=$SLURM_SUBMIT_DIR
 58 | 
 59 | # =================================================================================================
 60 | # start_coordinator() 
 61 | #   Routine provided by Artem Polyakov
 62 | #
 63 | # Start dmtcp coordinator on launching node. Free TCP port is automatically allocated.
 64 | # this function creates dmtcp_command.$JOBID script that serves like a wrapper around
 65 | # dmtcp_command that tunes it on exact dmtcp_coordinator (it's hostname and port)
 66 | # instead of typing "dmtcp_command -h <coordinator hostname> -p <coordinator port> <command>"
 67 | # you just type "dmtcp_command.$JOBID <command>" and talk to coordinator of JOBID job
 68 | # =================================================================================================
 69 | start_coordinator()
 70 | {
 71 |     fname=dmtcp_command.$SLURM_JOBID
 72 |     h=`hostname`
 73 |     echo "dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1"
 74 |     dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1
 75 |     
 76 |     while true; do 
 77 |         if [ -f "$fname" ]; then
 78 |             p=`cat $fname`
 79 |             if [ -n "$p" ]; then
 80 |                 # try to communicate ? dmtcp_command -p $p l
 81 |                 break
 82 |             fi
 83 |         fi
 84 |     done
 85 |     
 86 |     # Create dmtcp_command wrapper for easy communication with coordinator
 87 |     p=`cat $fname`
 88 |     chmod +x $fname
 89 |     echo "#!/bin/bash" > $fname
 90 |     echo >> $fname
 91 |     echo "export PATH=$PATH" >> $fname
 92 |     echo "export DMTCP_HOST=$h" >> $fname
 93 |     echo "export DMTCP_COORD_PORT=$p" >> $fname
 94 |     echo "dmtcp_command \$@" >> $fname
 95 | 
 96 |     # Setup local environment for DMTCP
 97 |     export DMTCP_COORD_HOST=$h
 98 |     export DMTCP_COORD_PORT=$p
 99 | }
100 | 
101 | echo "Launching dmtcp coordintor daemon"
102 | echo "start_coordinator --exit-after-ckpt"
103 | start_coordinator --exit-after-ckpt
104 | 
105 | # convert checkpoint time to seconds
106 | nTics=`echo $CHECKPOINT_TIME | \
107 | sed 's/m/ \* 60/g' | \
108 | sed 's/h/ \* 3600/g' | \
109 | sed 's/d/ \* 86400/g' | \
110 | sed 's/s//g' | \
111 | bc | \
112 | awk '{ printf("%d\n", $1); }'`
113 | echo "Checkpointing will commence after $nTics seconds"
114 | 
115 | tic=`date +%s`
116 | if [[ -f ./dmtcp_restart_script.sh ]] && [[ "${FORCE_CHECKPOINT}" == "No" ]]; then
117 |   echo "Restarting application under dmtcp control"
118 |   echo "./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>$OUTFILE 2>>$ERRFILE"
119 |   ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>${OUTFILE}.${SLURM_JOB_ID} 2>>${ERRFILE}.${SLURM_JOB_ID}
120 |   cat ${OUTFILE}.${SLURM_JOB_ID} >> ${OUTFILE}
121 |   rm -f ${OUTFILE}.${SLURM_JOB_ID}
122 |   cat ${ERRFILE}.${SLURM_JOB_ID} >> ${ERRFILE}
123 |   rm -f ${ERRFILE}.${SLURM_JOB_ID}
124 | else
125 |   # clear output and error files
126 |   echo "" > ${OUTFILE}
127 |   echo "" > ${ERRFILE}
128 |   echo "Launching application under dmtcp control"
129 |   echo "srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}"
130 |   srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}
131 | fi
132 | toc=`date +%s`
133 | 
134 | elapsedTime=`expr $toc - $tic`
135 | overheadTime=`expr $elapsedTime - $nTics`
136 | if [ "$overheadTime" -lt "0" ]; then
137 |   overheadTime=0
138 |   echo "All done - no checkpoint was required."
139 | else
140 |   echo "All done - checkpoint files are listed below:"
141 |   ls -1 *.dmtcp
142 | fi
143 | 
144 | echo "Elapsed Time = $elapsedTime seconds"
145 | echo "Checkpoint Overhead = $overheadTime seconds"
146 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ccr_buffalo/slurm_dmtcp_serial:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #SBATCH --time=01:00:00
  3 | #SBATCH --nodes=1
  4 | #SBATCH --cpus-per-task=1
  5 | #SBATCH --tasks-per-node=1
  6 | #SBATCH --mail-user=your_user_name@buffalo.edu
  7 | #SBATCH --mail-type=END
  8 | #SBATCH --job-name=dmtcp
  9 | #SBATCH --output=output.out
 10 | #SBATCH --error=output.err
 11 | #SBATCH --partition=debug
 12 | 
 13 | # spit out some basic SLURM information
 14 | echo "SLURM_JOBID      = "$SLURM_JOB_ID
 15 | echo "SLURM_SUBMIT_DIR = "$SLURM_SUBMIT_DIR
 16 | echo "SLURM_NODELIST   = "`nodeset -e $SLURM_NODELIST`
 17 | echo "SLURM_NPROCS     = "$SLURM_NPROCS
 18 | echo "SLURM_NNODES     = "$SLURM_NNODES
 19 | echo "SLURMTMPDIR      = "$SLURMTMPDIR 
 20 | 
 21 | module load dmtcp/2.2.1-r2777
 22 | module list
 23 | ulimit -s unlimited
 24 | 
 25 | #
 26 | # How long to run the application before checkpointing.
 27 | # After checkpointing, the application will be shut down. 
 28 | # Users will typically want to set this to occur a bit before 
 29 | # the job's walltime expires.
 30 | #
 31 | CHECKPOINT_TIME=1m
 32 | 
 33 | # EXE is the name of the application/executable
 34 | # ARGS is any command-line args
 35 | # OUTFILE is the file where stdout will be redirected
 36 | # ERRFILE if the file where stderr will be redirected
 37 | EXE=$SLURM_SUBMIT_DIR/MonteCarloIntegration
 38 | ARGS="100000000 0"
 39 | OUTFILE=Integrals.out
 40 | ERRFILE=Integrals.err
 41 | 
 42 | # This script with auto-sense whether to perform a checkpoint
 43 | # or restart operation. Set FORCE_CHECKPOINT to yes if you 
 44 | # DO NOT want to restart even if a restart script is located 
 45 | # in the working directory.
 46 | FORCE_CHECKPOINT=No
 47 | 
 48 | # *************************************************************************************************
 49 | # *************************************************************************************************
 50 | # Users should not have to change anything beyond this point!
 51 | # *************************************************************************************************
 52 | # *************************************************************************************************
 53 | export DMTCP_TMPDIR=$SLURM_SUBMIT_DIR
 54 | 
 55 | # =================================================================================================
 56 | # start_coordinator() 
 57 | #   Routine provided by Artem Polyakov
 58 | #
 59 | # Start dmtcp coordinator on launching node. Free TCP port is automatically allocated.
 60 | # this function creates dmtcp_command.$JOBID script that serves like a wrapper around
 61 | # dmtcp_command that tunes it on exact dmtcp_coordinator (it's hostname and port)
 62 | # instead of typing "dmtcp_command -h <coordinator hostname> -p <coordinator port> <command>"
 63 | # you just type "dmtcp_command.$JOBID <command>" and talk to coordinator of JOBID job
 64 | # =================================================================================================
 65 | start_coordinator()
 66 | {
 67 |     fname=dmtcp_command.$SLURM_JOBID
 68 |     h=`hostname`
 69 |     echo "dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1"
 70 |     dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1
 71 |     
 72 |     while true; do 
 73 |         if [ -f "$fname" ]; then
 74 |             p=`cat $fname`
 75 |             if [ -n "$p" ]; then
 76 |                 # try to communicate ? dmtcp_command -p $p l
 77 |                 break
 78 |             fi
 79 |         fi
 80 |     done
 81 |     
 82 |     # Create dmtcp_command wrapper for easy communication with coordinator
 83 |     p=`cat $fname`
 84 |     chmod +x $fname
 85 |     echo "#!/bin/bash" > $fname
 86 |     echo >> $fname
 87 |     echo "export PATH=$PATH" >> $fname
 88 |     echo "export DMTCP_COORD_HOST=$h" >> $fname
 89 |     echo "export DMTCP_COORD_PORT=$p" >> $fname
 90 |     echo "dmtcp_command \$@" >> $fname
 91 | 
 92 |     # Setup local environment for DMTCP
 93 |     export DMTCP_COORD_HOST=$h
 94 |     export DMTCP_COORD_PORT=$p
 95 | }
 96 | 
 97 | echo "Launching dmtcp coordintor daemon"
 98 | echo "start_coordinator --exit-after-ckpt"
 99 | start_coordinator --exit-after-ckpt
100 | 
101 | # convert checkpoint time to seconds
102 | nTics=`echo $CHECKPOINT_TIME | \
103 | sed 's/m/ \* 60/g' | \
104 | sed 's/h/ \* 3600/g' | \
105 | sed 's/d/ \* 86400/g' | \
106 | sed 's/s//g' | \
107 | bc | \
108 | awk '{ printf("%d\n", $1); }'`
109 | echo "Checkpointing will commence after $nTics seconds"
110 | 
111 | tic=`date +%s`
112 | if [[ -f ./dmtcp_restart_script.sh ]] && [[ "${FORCE_CHECKPOINT}" == "No" ]]; then
113 |   echo "Restarting application under dmtcp control"
114 |   echo "./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>$OUTFILE 2>>$ERRFILE"
115 |   ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>${OUTFILE}.${SLURM_JOB_ID} 2>>${ERRFILE}.${SLURM_JOB_ID}
116 |   cat ${OUTFILE}.${SLURM_JOB_ID} >> ${OUTFILE}
117 |   rm -f ${OUTFILE}.${SLURM_JOB_ID}
118 |   cat ${ERRFILE}.${SLURM_JOB_ID} >> ${ERRFILE}
119 |   rm -f ${ERRFILE}.${SLURM_JOB_ID}
120 | else
121 |   # clear output and error files
122 |   echo "" > ${OUTFILE}
123 |   echo "" > ${ERRFILE}
124 |   echo "Launching application under dmtcp control"
125 |   echo "srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}"
126 |   srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}
127 | fi
128 | toc=`date +%s`
129 | 
130 | elapsedTime=`expr $toc - $tic`
131 | overheadTime=`expr $elapsedTime - $nTics`
132 | if [ "$overheadTime" -lt "0" ]; then
133 |   overheadTime=0
134 |   echo "All done - no checkpoint was required."
135 | else
136 |   echo "All done - checkpoint files are listed below:"
137 |   ls -1 *.dmtcp
138 | fi
139 | 
140 | echo "Elapsed Time = $elapsedTime seconds"
141 | echo "Checkpoint Overhead = $overheadTime seconds"
142 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/slurm_launch.job:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | #SBATCH -p short                  # change to proper partition name or remove
  3 | #SBATCH --time=00:15:00           # put proper time of reservation here
  4 | #SBATCH --nodes=2                 # number of nodes
  5 | #SBATCH --ntasks=8                # number of total tasks
  6 | ##SBATCH --cpus-per-task=2         # number of cpus per task
  7 | ##SBATCH --ntasks-per-node=4       # processes per node
  8 | #SBATCH --mem=24000               # memory resource
  9 | #SBATCH --job-name="dmtcp_job"    # change to your job name
 10 | #SBATCH --switches=1              # Try to localize IB traffice on a single switch
 11 | #SBATCH --output=dmtcp.out        # change to proper file name or remove for defaults
 12 | # ? Any other batch options ?
 13 | 
 14 | # ? Any module that need to be loaded ?
 15 | module unload openmpi
 16 | module load mpich
 17 | module load dmtcp
 18 | 
 19 | #----------------------------- Set up DMTCP environment for a job ------------#
 20 | 
 21 | ###############################################################################
 22 | # Start DMTCP coordinator on the launching node. Free TCP port is automatically
 23 | # allocated.  This function creates a dmtcp_command.$JOBID script, which serves
 24 | # as a wrapper around dmtcp_command.  The script tunes dmtcp_command for the
 25 | # exact dmtcp_coordinator (its hostname and port).  Instead of typing
 26 | # "dmtcp_command -h <coordinator hostname> -p <coordinator port> <command>",
 27 | # you just type "dmtcp_command.$JOBID <command>" and talk to the coordinator
 28 | # for JOBID job.
 29 | ###############################################################################
 30 | 
 31 | start_coordinator()
 32 | {
 33 |     ############################################################
 34 |     # For debugging when launching a custom coordinator, uncomment 
 35 |     # the following lines and provide the proper host and port for 
 36 |     # the coordinator.
 37 |     ############################################################
 38 |     # export DMTCP_COORD_HOST=$h
 39 |     # export DMTCP_COORD_PORT=$p
 40 |     # return
 41 | 
 42 |     fname=dmtcp_command.$SLURM_JOBID
 43 |     h=`hostname`
 44 | 
 45 |     check_coordinator=`which dmtcp_coordinator`
 46 |     if [ -z "$check_coordinator" ]; then
 47 |         echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings."
 48 |         exit 0
 49 |     fi
 50 | 
 51 |     dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1
 52 |     
 53 |     while true; do 
 54 |         if [ -f "$fname" ]; then
 55 |             p=`cat $fname`
 56 |             if [ -n "$p" ]; then
 57 |                 # try to communicate ? dmtcp_command -p $p l
 58 |                 break
 59 |             fi
 60 |         fi
 61 |     done
 62 |     
 63 |     # Create dmtcp_command wrapper for easy communication with coordinator
 64 |     p=`cat $fname`
 65 |     chmod +x $fname
 66 |     echo "#!/bin/bash" > $fname
 67 |     echo >> $fname
 68 |     echo "export PATH=$PATH" >> $fname
 69 |     echo "export DMTCP_COORD_HOST=$h" >> $fname
 70 |     echo "export DMTCP_COORD_PORT=$p" >> $fname
 71 |     echo "dmtcp_command \$@" >> $fname
 72 | 
 73 |     # Set up local environment for DMTCP
 74 |     export DMTCP_COORD_HOST=$h
 75 |     export DMTCP_COORD_PORT=$p
 76 | 
 77 | }
 78 | 
 79 | ###################################################################################
 80 | # Print out the SLURM job information.  Remove this if you don't need it.
 81 | ###################################################################################
 82 | 
 83 | # Print out the SLURM job information. Remove this if you don't need it.
 84 | echo "SLURM_JOBID="$SLURM_JOBID
 85 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST
 86 | echo "SLURM_NNODES"=$SLURM_NNODES
 87 | echo "SLURMTMPDIR="$SLURMTMPDIR
 88 | echo "working directory = "$SLURM_SUBMIT_DIR
 89 | 
 90 | # changedir to workdir
 91 | cd $SLURM_SUBMIT_DIR
 92 | 
 93 | 
 94 | #----------------------------------- Set up job environment ------------------#
 95 | 
 96 | ###############################################################################
 97 | # Load all nessesary modules or export PATH/LD_LIBRARY_PATH/etc here.
 98 | # Make sure that the prefix for the DMTCP install path is in PATH
 99 | # and LD_LIBRARY_PATH.
100 | ###############################################################################
101 | 
102 | #           **** IF USING Open MPI 1.8, SEE COMMENT BELOW ****
103 | # module load openmpi
104 | ###############################################################################
105 | # For Open MPI 1.8, if using InfiniBand, uncomment the following statement
106 | # export OMPI_MCA_mpi_leave_pinned=0
107 | # This could prevent a bug due to interaction with memalign() and ptmalloc2()
108 | # on restart.
109 | ###############################################################################
110 | 
111 | # export PATH=<dmtcp-install-path>/bin:$PATH
112 | # export LD_LIBRARY_PATH=<dmtcp-install-path>/lib:$LD_LIBRARY_PATH
113 | 
114 | #------------------------------------- Launch application ---------------------#
115 | 
116 | ################################################################################
117 | # 1. Start DMTCP coordinator
118 | ################################################################################
119 | 
120 | start_coordinator -i 60 # ... <put dmtcp coordinator options here>
121 | 
122 | 
123 | ################################################################################
124 | # 2. Launch application
125 | # 2.1. If you use mpiexec/mpirun to launch an application, use the following
126 | #      command line:
127 | #        $ dmtcp_launch --rm mpiexec <mpi-options> ./<app-binary> <app-options>
128 | # 2.2. If you use PMI1 to launch an application, use the following command line:
129 | #        $ srun dmtcp_launch --rm ./<app-binary> <app-options>
130 | # Note: PMI2 is not supported yet.
131 | # 2.3. If you use the Stampede supercomputer at Texas Advanced Computing Center
132 | #      (TACC), use ibrun command to launch the application (--rm is not required):
133 | #        $ ibrun dmtcp_launch ./<app-binary> <app-options>
134 | ################################################################################
135 | 
136 | #dmtcp_launch --rm mpirun --mca btl self,tcp ./<your binary>
137 | #dmtcp_launch --rm mpirun --mca btl self,tcp ~/bigdata/Projects/iigb/slurm/mpi/mpiTest_mpich
138 | dmtcp_launch --rm ./count.sh
139 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/slurm_rstr.job:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | 
  3 | #SBATCH -p short         # change to proper partition name or remove
  4 | #SBATCH --time=00:15:00           # put proper time of reservation here
  5 | ##SBATCH --nodes=2                 # number of nodes
  6 | #SBATCH --ntasks=8                # number of tasks
  7 | ##SBATCH --cpus-per-task           # number of cpus per task
  8 | ##SBATCH --ntasks-per-node=4       # processes per node
  9 | #SBATCH --mem=24000               # memory resource
 10 | #SBATCH --job-name="dmtcp_job"    # change to your job name
 11 | #SBATCH --output=dmtcp.out        # change to proper file name or remove for defaults
 12 | # ? Any other batch options ?
 13 | 
 14 | #----------------------------- Set up DMTCP environment for a job ------------#
 15 | # ? Any module that need to be loaded ?
 16 | module unload openmpi
 17 | module load mpich
 18 | module load dmtcp
 19 | 
 20 | ###############################################################################
 21 | # Start DMTCP coordinator on the launching node. Free TCP port is automatically
 22 | # allocated.  This function creates a dmtcp_command.$JOBID script, which serves
 23 | # as a wrapper around dmtcp_command.  The script tunes dmtcp_command for the
 24 | # exact dmtcp_coordinator (its hostname and port).  Instead of typing
 25 | # "dmtcp_command -h <coordinator hostname> -p <coordinator port> <command>",
 26 | # you just type "dmtcp_command.$JOBID <command>" and talk to the coordinator
 27 | # for JOBID job.
 28 | ###############################################################################
 29 | 
 30 | start_coordinator()
 31 | {
 32 |     ############################################################
 33 |     # For debugging when launching a custom coordinator, uncomment 
 34 |     # the following lines and provide the proper host and port for 
 35 |     # the coordinator.
 36 |     ############################################################
 37 |     # export DMTCP_COORD_HOST=$h
 38 |     # export DMTCP_COORD_PORT=$p
 39 |     # return
 40 | 
 41 |     fname=dmtcp_command.$SLURM_JOBID
 42 |     h=`hostname`
 43 | 
 44 |     check_coordinator=`which dmtcp_coordinator`
 45 |     if [ -z "$check_coordinator" ]; then
 46 |         echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings"
 47 |         exit 0
 48 |     fi
 49 | 
 50 |     dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1
 51 |     
 52 |     while true; do 
 53 |         if [ -f "$fname" ]; then
 54 |             p=`cat $fname`
 55 |             if [ -n "$p" ]; then
 56 |                 # try to communicate ? dmtcp_command -p $p l
 57 |                 break
 58 |             fi
 59 |         fi
 60 |     done
 61 |     
 62 |     # Create a dmtcp_command wrapper for easy communication with the coordinator.
 63 |     p=`cat $fname`
 64 |     chmod +x $fname
 65 |     echo "#!/bin/bash" > $fname
 66 |     echo >> $fname
 67 |     echo "export PATH=$PATH" >> $fname
 68 |     echo "export DMTCP_COORD_HOST=$h" >> $fname
 69 |     echo "export DMTCP_COORD_PORT=$p" >> $fname
 70 |     echo "dmtcp_command \$@" >> $fname
 71 | 
 72 |     # Set up local environment for DMTCP
 73 |     export DMTCP_COORD_HOST=$h
 74 |     export DMTCP_COORD_PORT=$p
 75 | 
 76 | }
 77 | 
 78 | #----------------------- Some rutine steps and information output -------------------------#
 79 | 
 80 | ###################################################################################
 81 | # Print out the SLURM job information.  Remove this if you don't need it.
 82 | ###################################################################################
 83 | echo "SLURM_JOBID="$SLURM_JOBID
 84 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST
 85 | echo "SLURM_NNODES"=$SLURM_NNODES
 86 | echo "SLURMTMPDIR="$SLURMTMPDIR
 87 | echo "working directory = "$SLURM_SUBMIT_DIR
 88 | 
 89 | # changedir to workdir
 90 | cd $SLURM_SUBMIT_DIR
 91 | 
 92 | #----------------------------------- Set up job environment ------------------#
 93 | 
 94 | ###############################################################################
 95 | # Load all nessesary modules or export PATH/LD_LIBRARY_PATH/etc here.
 96 | # Make sure that the prefix for the DMTCP install path is in PATH
 97 | # and LD_LIBRARY_PATH.
 98 | ###############################################################################
 99 | 
100 | # module load openmpi
101 | # export PATH=<dmtcp-install-path>/bin:$PATH
102 | # export LD_LIBRARY_PATH=<dmtcp-install-path>/lib:$LD_LIBRARY_PATH
103 | 
104 | ###############################################################################
105 | # If you use the Stampede supercomputer at Texas Advanced Computing Center
106 | # (TACC), add the following:
107 | # HOSTFILE=hostfile
108 | # echo "SLURM_JOB_NODELIST" | scontrol show hostname > $HOSTFILE
109 | ###############################################################################
110 | 
111 | #------------------------------------- Launch application ---------------------#
112 | 
113 | ################################################################################
114 | # 1. Start DMTCP coordinator
115 | ################################################################################
116 | 
117 | start_coordinator # -i 120 ... <put dmtcp coordinator options here>
118 | 
119 | ################################################################################
120 | # 2. Restart application
121 | ################################################################################
122 | 
123 | /bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT
124 | 
125 | ###############################################################################
126 | # If you use the Stampede supercomputer at Texas Advanced Computing Center
127 | # (TACC), add the --hostfile option:
128 | # /bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT\
129 | #                                     --hostfile $HOSTFILE
130 | ###############################################################################
131 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/stampede/README:
--------------------------------------------------------------------------------
1 | For users who want to use DMTCP for large-scale applications on the
2 | Stampede supercomputer at Texas Advanced Computing Center (TACC),
3 | these scripts will automatically set up the coordinator on a
4 | separate node, launch the application, and restart from the
5 | previous checkpoint if needed.
6 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/stampede/slurm_launch.job:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Put your SLURM options here
  3 | #SBATCH --time=00:30:00          # put proper time of reservation here
  4 | #SBATCH --nodes=129              # number of nodes, where one node is the coordinator node, and the rest are the compute nodes
  5 | #SBATCH --ntasks-per-node=16     # processes per node
  6 | #SBATCH --job-name="example"
  7 | #SBATCH --output=example.std
  8 | #SBATCH --error=example.err
  9 | #SBATCH --partition=normal
 10 | #SBATCH -A project_number
 11 | # ? Any other batch options ?
 12 | 
 13 | # Start a dmtcp coordinator on launch node. A free TCP port is automatically allocated.
 14 | # This function creates a dmtcp_command.$JOBID script that serves as a wrapper around
 15 | # dmtcp_command that tunes it for the exact dmtcp_coordinator (its hostname and port).
 16 | # Instead of typing "dmtcp_command -h <coordinator hostname> -p <coordinator port> <command>",
 17 | # one just types "dmtcp_command.$JOBID <command>" and talks to the coordinator of JOBID job.
 18 | 
 19 | start_coordinator()
 20 | {
 21 |     fname=dmtcp_command.$SLURM_JOBID
 22 |     h=`hostname`
 23 | 
 24 |     check_coordinator=`which dmtcp_coordinator`
 25 |     if [ -z "$check_coordinator" ]; then
 26 |         echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings"
 27 |         exit 0
 28 |     fi
 29 | 
 30 |     dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1
 31 |     
 32 |     while true; do 
 33 |         if [ -f "$fname" ]; then
 34 |             p=`cat $fname`
 35 |             if [ -n "$p" ]; then
 36 |                 break
 37 |             fi
 38 |         fi
 39 |     done
 40 |     
 41 |     # Create dmtcp_command wrapper for easy communication with coordinator
 42 |     p=`cat $fname`
 43 |     chmod +x $fname
 44 |     echo "#!/bin/bash" > $fname
 45 |     echo >> $fname
 46 |     echo "export PATH=$PATH" >> $fname
 47 |     echo "export DMTCP_COORD_HOST=$h" >> $fname
 48 |     echo "export DMTCP_COORD_PORT=$p" >> $fname
 49 |     echo "dmtcp_command \$@" >> $fname
 50 | 
 51 |     export DMTCP_COORD_HOST=$h
 52 |     export DMTCP_COORD_PORT=$p
 53 | }
 54 | 
 55 | 
 56 | # Print out SLURM job information. Remove it if you don't need it
 57 | echo "SLURM_JOBID="$SLURM_JOBID
 58 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST
 59 | echo "SLURM_NNODES"=$SLURM_NNODES
 60 | echo "SLURMTMPDIR="$SLURMTMPDIR
 61 | echo "working directory = "$SLURM_SUBMIT_DIR
 62 | 
 63 | # Change directory to workdir
 64 | cd $SLURM_SUBMIT_DIR
 65 | 
 66 | # Some initial setup like 
 67 | module load intel/15.0.2
 68 | 
 69 | # DMTCP settings
 70 | DMTCP_PATH="$WORK/dmtcp"
 71 | export PATH="$DMTCP_PATH/bin/:$PATH"
 72 | 
 73 | DMTCP_NODE=1
 74 | NUM=$SLURM_CPUS_ON_NODE
 75 | 
 76 | if [ -s hosts ]
 77 | then
 78 |   rm hosts
 79 | fi
 80 | 
 81 | #Create hostfiles
 82 | position=1
 83 | for i in `scontrol show hostnames $SLURM_NODELIST`
 84 | do
 85 |     if [ $position -gt $DMTCP_NODE ]; then
 86 |         echo $i:$NUM >>hosts
 87 |     fi
 88 |     let "position++"
 89 | done
 90 | 
 91 | if [ -s hosts ]
 92 | then
 93 |   echo -e "App hostfile created\n"
 94 | fi
 95 | 
 96 | let APP_NODES=$SLURM_NNODES-$DMTCP_NODE
 97 | let NUM_APP=$APP_NODES*$NUM
 98 | 
 99 | start_coordinator # -i 120 ... <put dmtcp coordinator options here>
100 | 
101 | echo "mpirun_rsh -np $NUM_APP -hostfile hosts -export dmtcp_launch --ckpt-signal 10 --ib ./a.out"
102 | 
103 | mpirun_rsh -np $NUM_APP -hostfile hosts -export dmtcp_launch --ckpt-signal 10 --ib ./a.out
104 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/stampede/slurm_rstr.job:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Put your SLURM options here
 3 | #SBATCH --time=00:30:00         # put proper time of reservation here
 4 | #SBATCH --nodes=129             # number of nodes, where one node is the coordinator node, and the rest are the compute nodes
 5 | #SBATCH --ntasks-per-node=16    # processes per node
 6 | #SBATCH --job-name="restart"
 7 | #SBATCH --output=restart.std
 8 | #SBATCH --error=restart.err
 9 | #SBATCH --partition=normal
10 | #SBATCH -A project_number
11 | 
12 | # Start a dmtcp coordinator on launch node. A free TCP port is automatically allocated.
13 | # This function creates a dmtcp_command.$JOBID script that serves as a wrapper around
14 | # dmtcp_command that tunes it for the exact dmtcp_coordinator (its hostname and port).
15 | # Instead of typing "dmtcp_command -h <coordinator hostname> -p <coordinator port> <command>",
16 | # you just types "dmtcp_command.$JOBID <command>" and talks to the coordinator of JOBID job.
17 | 
18 | start_coordinator()
19 | {
20 |     fname=dmtcp_command.$SLURM_JOBID
21 |     h=`hostname`
22 | 
23 |     check_coordinator=`which dmtcp_coordinator`
24 |     if [ -z "$check_coordinator" ]; then
25 |         echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings"
26 |         exit 0
27 |     fi
28 | 
29 |     dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1
30 |     
31 |     while true; do 
32 |         if [ -f "$fname" ]; then
33 |             p=`cat $fname`
34 |             if [ -n "$p" ]; then
35 |                 break
36 |             fi
37 |         fi
38 |     done
39 |     
40 |     # Create dmtcp_command wrapper for easy communication with coordinator
41 |     p=`cat $fname`
42 |     chmod +x $fname
43 |     echo "#!/bin/bash" > $fname
44 |     echo >> $fname
45 |     echo "export PATH=$PATH" >> $fname
46 |     echo "export DMTCP_COORD_HOST=$h" >> $fname
47 |     echo "export DMTCP_COORD_PORT=$p" >> $fname
48 |     echo "dmtcp_command \$@" >> $fname
49 | 
50 |     # Setup local environment for DMTCP
51 |     export DMTCP_COORD_HOST=$h
52 |     export DMTCP_COORD_PORT=$p
53 | }
54 | 
55 | 
56 | # Print out SLURM job information. Remove it if you don't need it
57 | echo "SLURM_JOBID="$SLURM_JOBID
58 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST
59 | echo "SLURM_NNODES"=$SLURM_NNODES
60 | echo "SLURMTMPDIR="$SLURMTMPDIR
61 | echo "working directory = "$SLURM_SUBMIT_DIR
62 | 
63 | # Change directory to workdir
64 | cd $SLURM_SUBMIT_DIR
65 | HOSTFILE=hostfile
66 | 
67 | scontrol show hostname | tail -n +2 > $HOSTFILE
68 | 
69 | # Some initial setup like 
70 | DMTCP_PATH="$WORK/dmtcp"
71 | export PATH="$DMTCP_PATH/bin/:$PATH"
72 | 
73 | start_coordinator # -i 120 ... <put dmtcp coordinator options here>
74 | 
75 | echo "/bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT --hostfile $HOSTFILE"
76 | 
77 | /bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT --hostfile $HOSTFILE
78 | 
79 | rm $HOSTFILE
80 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2_files/count.sh_99078:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | count=0
 4 | echo "$count" > count.log
 5 | while [ 1 -eq 1 ]; do
 6 |     count=$(($count+1))
 7 |     sleep 10
 8 |     echo "$count" >> count.log
 9 | done
10 | 
11 | echo "Completed $count"
12 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2_files/fd-info.txt:
--------------------------------------------------------------------------------
1 | count.sh_99078:/bigdata/operations/jhayes/Projects/iigb/slurm/checkpoint/dmtcp/ucr-hpcc/single/count.sh
2 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-105000-5b7f491c.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-105000-5b7f491c.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-110000-5b7f4957.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-110000-5b7f4957.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-115000-5b7f4994.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-115000-5b7f4994.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-120000-5b7f49d0.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-120000-5b7f49d0.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-125000-5b7f4a0c.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-125000-5b7f4a0c.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-130000-5b7f4a49.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-130000-5b7f4a49.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/ckpts/dmtcp_restart_script_afc8ad-40000-3ea0c918c3d461.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -m # turn on job control
  4 | 
  5 | #This script launches all the restarts in the background.
  6 | #Suggestions for editing:
  7 | #  1. For those processes executing on the localhost, remove
  8 | #     'ssh <hostname> from the start of the line.
  9 | #  2. If using ssh, verify that ssh does not require passwords or other
 10 | #     prompts.
 11 | #  3. Verify that the dmtcp_restart command is in your path on all hosts,
 12 | #     otherwise set the dmt_rstr_cmd appropriately.
 13 | #  4. Verify DMTCP_COORD_HOST and DMTCP_COORD_PORT match the location of
 14 | #     the dmtcp_coordinator. If necessary, add
 15 | #     'DMTCP_COORD_PORT=<dmtcp_coordinator port>' after
 16 | #     'DMTCP_COORD_HOST=<...>'.
 17 | #  5. Remove the '&' from a line if that process reads STDIN.
 18 | #     If multiple processes read STDIN then prefix the line with
 19 | #     'xterm -hold -e' and put '&' at the end of the line.
 20 | #  6. Processes on same host can be restarted with single dmtcp_restart
 21 | #     command.
 22 | 
 23 | 
 24 | check_local()
 25 | {
 26 |   worker_host=$1
 27 |   unset is_local_node
 28 |   worker_ip=$(gethostip -d $worker_host 2> /dev/null)
 29 |   if [ -z "$worker_ip" ]; then
 30 |     worker_ip=$(nslookup $worker_host | grep -A1 'Name:' | grep 'Address:' | sed -e 's/Address://' -e 's/ //' -e 's/	//')
 31 |   fi
 32 |   if [ -z "$worker_ip" ]; then
 33 |     worker_ip=$(getent ahosts $worker_host |grep "^[0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+ *STREAM" | cut -d' ' -f1)
 34 |   fi
 35 |   if [ -z "$worker_ip" ]; then
 36 |     echo Could not find ip-address for $worker_host. Exiting...
 37 |     exit 1
 38 |   fi
 39 |   ifconfig_path=$(which ifconfig)
 40 |   if [ -z "$ifconfig_path" ]; then
 41 |     ifconfig_path="/sbin/ifconfig"
 42 |   fi
 43 |   output=$($ifconfig_path -a | grep "inet addr:.*${worker_ip} .*Bcast")
 44 |   if [ -n "$output" ]; then
 45 |     is_local_node=1
 46 |   else
 47 |     is_local_node=0
 48 |   fi
 49 | }
 50 | 
 51 | 
 52 | pass_slurm_helper_contact()
 53 | {
 54 |   LOCAL_FILES="$1"
 55 |   # Create temp directory if needed
 56 |   if [ -n "$DMTCP_TMPDIR" ]; then
 57 |     CURRENT_TMPDIR=$DMTCP_TMPDIR/dmtcp-`whoami`@`hostname`
 58 |   elif [ -n "$TMPDIR" ]; then
 59 |     CURRENT_TMPDIR=$TMPDIR/dmtcp-`whoami`@`hostname`
 60 |   else
 61 |     CURRENT_TMPDIR=/tmp/dmtcp-`whoami`@`hostname`
 62 |   fi
 63 |   if [ ! -d "$CURRENT_TMPDIR" ]; then
 64 |     mkdir -p $CURRENT_TMPDIR
 65 |   fi
 66 |   # Create files with SLURM environment
 67 |   for CKPT_FILE in $LOCAL_FILES; do
 68 |     SUFFIX=${CKPT_FILE%%.dmtcp}
 69 |     SLURM_ENV_FILE=$CURRENT_TMPDIR/slurm_env_${SUFFIX##*_}
 70 |     echo "DMTCP_SRUN_HELPER_ADDR=$DMTCP_SRUN_HELPER_ADDR" >> $SLURM_ENV_FILE
 71 |   done
 72 | }
 73 | 
 74 | 
 75 | usage_str='USAGE:
 76 |   dmtcp_restart_script.sh [OPTIONS]
 77 | 
 78 | OPTIONS:
 79 |   --coord-host, -h, (environment variable DMTCP_COORD_HOST):
 80 |       Hostname where dmtcp_coordinator is running
 81 |   --coord-port, -p, (environment variable DMTCP_COORD_PORT):
 82 |       Port where dmtcp_coordinator is running
 83 |   --hostfile <arg0> :
 84 |       Provide a hostfile (One host per line, "#" indicates comments)
 85 |   --ckptdir, -d, (environment variable DMTCP_CHECKPOINT_DIR):
 86 |       Directory to store checkpoint images
 87 |       (default: use the same directory used in previous checkpoint)
 88 |   --restartdir, -d, (environment variable DMTCP_RESTART_DIR):
 89 |       Directory to read checkpoint images from
 90 |   --tmpdir, -t, (environment variable DMTCP_TMPDIR):
 91 |       Directory to store temporary files (default: $TMDPIR or /tmp)
 92 |   --no-strict-checking:
 93 |       Disable uid checking for checkpoint image. This allows the
 94 |       checkpoint image to be restarted by a different user than the one
 95 |       that created it.  And suppress warning about running as root.
 96 |       (environment variable DMTCP_DISABLE_STRICT_CHECKING)
 97 |   --interval, -i, (environment variable DMTCP_CHECKPOINT_INTERVAL):
 98 |       Time in seconds between automatic checkpoints
 99 |       (Default: Use pre-checkpoint value)
100 |   --coord-logfile PATH (environment variable DMTCP_COORD_LOG_FILENAME
101 |               Coordinator will dump its logs to the given file
102 |   --help:
103 |       Print this message and exit.'
104 | 
105 | 
106 | ckpt_timestamp="Thu Aug 23 17:00:13 2018"
107 | 
108 | remote_shell_cmd="ssh"
109 | 
110 | coord_host=$DMTCP_COORD_HOST
111 | if test -z "$DMTCP_COORD_HOST"; then
112 |   coord_host=i11
113 | fi
114 | 
115 | coord_port=$DMTCP_COORD_PORT
116 | if test -z "$DMTCP_COORD_PORT"; then
117 |   coord_port=46172
118 | fi
119 | 
120 | checkpoint_interval=$DMTCP_CHECKPOINT_INTERVAL
121 | if test -z "$DMTCP_CHECKPOINT_INTERVAL"; then
122 |   checkpoint_interval=60
123 | fi
124 | export DMTCP_CHECKPOINT_INTERVAL=${checkpoint_interval}
125 | 
126 | if [ $# -gt 0 ]; then
127 |   while [ $# -gt 0 ]
128 |   do
129 |     if [ $1 = "--help" ]; then
130 |       echo "$usage_str"
131 |       exit
132 |     elif [ $# -ge 1 ]; then
133 |       case "$1" in
134 |         --coord-host|--host|-h)
135 |           coord_host="$2"
136 |           shift; shift;;
137 |         --coord-port|--port|-p)
138 |           coord_port="$2"
139 |           shift; shift;;
140 |         --coord-logfile)
141 |           DMTCP_COORD_LOGFILE="$2"
142 |           shift; shift;;
143 |         --hostfile)
144 |           hostfile="$2"
145 |           if [ ! -f "$hostfile" ]; then
146 |             echo "ERROR: hostfile $hostfile not found"
147 |             exit
148 |           fi
149 |           shift; shift;;
150 |         --restartdir|-d)
151 |           DMTCP_RESTART_DIR=$2
152 |           shift; shift;;
153 |         --ckptdir|-d)
154 |           DMTCP_CKPT_DIR=$2
155 |           shift; shift;;
156 |         --tmpdir|-t)
157 |           DMTCP_TMPDIR=$2
158 |           shift; shift;;
159 |         --no-strict-checking)
160 |           noStrictChecking="--no-strict-checking"
161 |           shift;;
162 |         --interval|-i)
163 |           checkpoint_interval=$2
164 |           shift; shift;;
165 |         *)
166 |           echo "$0: unrecognized option '$1'. See correct usage below"
167 |           echo "$usage_str"
168 |           exit;;
169 |       esac
170 |     elif [ $1 = "--help" ]; then
171 |       echo "$usage_str"
172 |       exit
173 |     else
174 |       echo "$0: Incorrect usage.  See correct usage below"
175 |       echo
176 |       echo "$usage_str"
177 |       exit
178 |     fi
179 |   done
180 | fi
181 | 
182 | dmt_rstr_cmd=/bigdata/operations/pkgadmin/opt/linux/centos/7.x/x86_64/pkgs/dmtcp/2.5.2/bin/dmtcp_restart
183 | which $dmt_rstr_cmd > /dev/null 2>&1 || dmt_rstr_cmd=dmtcp_restart
184 | which $dmt_rstr_cmd > /dev/null 2>&1 || echo "$0: $dmt_rstr_cmd not found"
185 | which $dmt_rstr_cmd > /dev/null 2>&1 || exit 1
186 | 
187 | # Number of hosts in the computation = 1
188 | # Number of processes in the computation = 2
189 | 
190 | # SYNTAX:
191 | #  :: <HOST> :<MODE>: <CHECKPOINT_IMAGE> ... :<REMOTE SHELL CMD>
192 | # Host names and filenames must not include ':'
193 | # At most one fg (foreground) mode allowed; it must be last.
194 | # 'maybexterm' and 'maybebg' are set from <MODE>.
195 | worker_ckpts='
196 |  :: i11 :bg: ckpts/ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp : ssh
197 | '
198 | 
199 | # Check for resource manager
200 | ibrun_path=$(which ibrun 2> /dev/null)
201 | if [ ! -n "$ibrun_path" ]; then
202 |   discover_rm_path=$(which dmtcp_discover_rm)
203 |   if [ -n "$discover_rm_path" ]; then
204 |     eval $(dmtcp_discover_rm -t)
205 |     srun_path=$(which srun 2> /dev/null)
206 |     llaunch=`which dmtcp_rm_loclaunch`
207 |     if [ $RES_MANAGER = "SLURM" ] && [ -n "$srun_path" ]; then
208 |       eval $(dmtcp_discover_rm -n "$worker_ckpts")
209 |       if [ -n "$DMTCP_DISCOVER_RM_ERROR" ]; then
210 |           echo "Restart error: $DMTCP_DISCOVER_RM_ERROR"
211 |           echo "Allocated resources: $manager_resources"
212 |           exit 0
213 |       fi
214 |       export DMTCP_REMLAUNCH_NODES=$DMTCP_REMLAUNCH_NODES
215 |       bound=$(($DMTCP_REMLAUNCH_NODES - 1))
216 |       srun_nnodes=0
217 |       srun_ntasks=0
218 |       for i in $(seq 0 $bound); do
219 |         eval "val=\${DMTCP_REMLAUNCH_${i}_SLOTS}"
220 |         #skip allocated-but-not-used nodes (dmtcp_discover_rm returns 0)
221 |         test "$val" = "0" && continue
222 |         srun_nnodes=$(( $srun_nnodes + 1 ))
223 |         export DMTCP_REMLAUNCH_${i}_SLOTS="$val"
224 |         bound2=$(($val - 1))
225 |         for j in $(seq 0 $bound2); do
226 |           srun_ntasks=$(( $srun_ntasks + 1 ))
227 |           eval "ckpts=\${DMTCP_REMLAUNCH_${i}_${j}}"
228 |           export DMTCP_REMLAUNCH_${i}_${j}="$ckpts"
229 |         done
230 |       done
231 |       if [ "$DMTCP_DISCOVER_PM_TYPE" = "HYDRA" ]; then
232 |         export DMTCP_SRUN_HELPER_SYNCFILE=`mktemp ./tmp.XXXXXXXXXX`
233 |         rm $DMTCP_SRUN_HELPER_SYNCFILE
234 |         dmtcp_srun_helper -r $srun_path "$llaunch"
235 |         if [ ! -f $DMTCP_SRUN_HELPER_SYNCFILE ]; then
236 |           echo "Error launching application"
237 |           exit 1
238 |         fi
239 |         # export helper contact info
240 |         . $DMTCP_SRUN_HELPER_SYNCFILE
241 |         pass_slurm_helper_contact "$DMTCP_LAUNCH_CKPTS"
242 |         rm $DMTCP_SRUN_HELPER_SYNCFILE
243 |         dmtcp_restart --join-coordinator --coord-host $DMTCP_COORD_HOST --coord-port $DMTCP_COORD_PORT $DMTCP_LAUNCH_CKPTS
244 |       else
245 |         DMTCP_REMLAUNCH_0_0="$DMTCP_REMLAUNCH_0_0 $DMTCP_LAUNCH_CKPTS"
246 |         $srun_path --nodes=$srun_nnodes --ntasks=$srun_ntasks "$llaunch"
247 |       fi
248 |       exit 0
249 |     elif [ $RES_MANAGER = "TORQUE" ]; then
250 |       #eval $(dmtcp_discover_rm "$worker_ckpts")
251 |       #if [ -n "$new_worker_ckpts" ]; then
252 |       #  worker_ckpts="$new_worker_ckpts"
253 |       #fi
254 |       eval $(dmtcp_discover_rm -n "$worker_ckpts")
255 |       if [ -n "$DMTCP_DISCOVER_RM_ERROR" ]; then
256 |           echo "Restart error: $DMTCP_DISCOVER_RM_ERROR"
257 |           echo "Allocated resources: $manager_resources"
258 |           exit 0
259 |       fi
260 |       arguments="PATH=$PATH DMTCP_COORD_HOST=$DMTCP_COORD_HOST DMTCP_COORD_PORT=$DMTCP_COORD_PORT"
261 |       arguments=$arguments" DMTCP_CHECKPOINT_INTERVAL=$DMTCP_CHECKPOINT_INTERVAL"
262 |       arguments=$arguments" DMTCP_TMPDIR=$DMTCP_TMPDIR"
263 |       arguments=$arguments" DMTCP_REMLAUNCH_NODES=$DMTCP_REMLAUNCH_NODES"
264 |       bound=$(($DMTCP_REMLAUNCH_NODES - 1))
265 |       for i in $(seq 0 $bound); do
266 |         eval "val=\${DMTCP_REMLAUNCH_${i}_SLOTS}"
267 |         arguments=$arguments" DMTCP_REMLAUNCH_${i}_SLOTS=\"$val\""
268 |         bound2=$(($val - 1))
269 |         for j in $(seq 0 $bound2); do
270 |           eval "ckpts=\${DMTCP_REMLAUNCH_${i}_${j}}"
271 |           arguments=$arguments" DMTCP_REMLAUNCH_${i}_${j}=\"$ckpts\""
272 |         done
273 |       done
274 |       pbsdsh -u "$llaunch" "$arguments"
275 |       exit 0
276 |     fi
277 |   fi
278 | fi
279 | 
280 | 
281 | worker_ckpts_regexp=\
282 | '[^:]*::[ \t\n]*\([^ \t\n]\+\)[ \t\n]*:\([a-z]\+\):[ \t\n]*\([^:]\+\)[ \t\n]*:\([^:]\+\)'
283 | 
284 | worker_hosts=$(\
285 |   echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/\1 /g')
286 | restart_modes=$(\
287 |   echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/: \2/g')
288 | ckpt_files_groups=$(\
289 |   echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/: \3/g')
290 | remote_cmd=$(\
291 |   echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/: \4/g')
292 | 
293 | if [ ! -z "$hostfile" ]; then
294 |   worker_hosts=$(\
295 |     cat "$hostfile" | sed -e 's/#.*//' -e 's/[ \t\r]*//' -e '/^$/ d')
296 | fi
297 | 
298 | localhost_ckpt_files_group=
299 | 
300 | num_worker_hosts=$(echo $worker_hosts | wc -w)
301 | 
302 | maybejoin=
303 | if [ "$num_worker_hosts" != "1" ]; then
304 |   maybejoin='--join-coordinator'
305 | fi
306 | 
307 | for worker_host in $worker_hosts
308 | do
309 | 
310 |   ckpt_files_group=$(\
311 |     echo $ckpt_files_groups | sed -e 's/[^:]*:[ \t\n]*\([^:]*\).*/\1/')
312 |   ckpt_files_groups=$(echo $ckpt_files_groups | sed -e 's/[^:]*:[^:]*//')
313 | 
314 |   mode=$(echo $restart_modes | sed -e 's/[^:]*:[ \t\n]*\([^:]*\).*/\1/')
315 |   restart_modes=$(echo $restart_modes | sed -e 's/[^:]*:[^:]*//')
316 | 
317 |   remote_shell_cmd=$(echo $remote_cmd | sed -e 's/[^:]*:[ \t\n]*\([^:]*\).*/\1/')
318 |   remote_cmd=$(echo $remote_cmd | sed -e 's/[^:]*:[^:]*//')
319 | 
320 |   maybexterm=
321 |   maybebg=
322 |   case $mode in
323 |     bg) maybebg='bg';;
324 |     xterm) maybexterm=xterm;;
325 |     fg) ;;
326 |     *) echo "WARNING: Unknown Mode";;
327 |   esac
328 | 
329 |   if [ -z "$ckpt_files_group" ]; then
330 |     break;
331 |   fi
332 | 
333 |   new_ckpt_files_group=""
334 |   for tmp in $ckpt_files_group
335 |   do
336 |       if  [ ! -z "$DMTCP_RESTART_DIR" ]; then
337 |         tmp=$DMTCP_RESTART_DIR/$(basename $tmp)
338 |       fi
339 |       new_ckpt_files_group="$new_ckpt_files_group $tmp"
340 |   done
341 | 
342 | tmpdir=
343 | if [ ! -z "$DMTCP_TMPDIR" ]; then
344 |   tmpdir="--tmpdir $DMTCP_TMPDIR"
345 | fi
346 | 
347 | coord_logfile=
348 | if [ ! -z "$DMTCP_COORD_LOGFILE" ]; then
349 |   coord_logfile="--coord-logfile $DMTCP_COORD_LOGFILE"
350 | fi
351 | 
352 |   check_local $worker_host
353 |   if [ "$is_local_node" -eq 1 -o "$num_worker_hosts" == "1" ]; then
354 |     localhost_ckpt_files_group="$new_ckpt_files_group $localhost_ckpt_files_group"
355 |     continue
356 |   fi
357 |   if [ -z $maybebg ]; then
358 |     $maybexterm /usr/bin/$remote_shell_cmd -t "$worker_host" \
359 |       $dmt_rstr_cmd --coord-host "$coord_host" --cord-port "$coord_port"\
360 |       $ckpt_dir --join-coordinator --interval "$checkpoint_interval" $tmpdir \
361 |       $new_ckpt_files_group
362 |   else
363 |     $maybexterm /usr/bin/$remote_shell_cmd "$worker_host" \
364 |       "/bin/sh -c '$dmt_rstr_cmd --coord-host $coord_host --coord-port $coord_port $coord_logfile\
365 |       $ckpt_dir --join-coordinator --interval "$checkpoint_interval" $tmpdir \
366 |       $new_ckpt_files_group'" &
367 |   fi
368 | 
369 | done
370 | 
371 | if [ -n "$localhost_ckpt_files_group" ]; then
372 | exec $dmt_rstr_cmd --coord-host "$coord_host" --coord-port "$coord_port" $coord_logfile \
373 |   $ckpt_dir $maybejoin --interval "$checkpoint_interval" $tmpdir $noStrictChecking $localhost_ckpt_files_group
374 | fi
375 | 
376 | #wait for them all to finish
377 | wait
378 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/count.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | count=0
 4 | #echo "$count" > count.log
 5 | while [ 1 -eq 1 ]; do
 6 |     count=$(($count+1))
 7 |     sleep 10
 8 |     echo "$count" #>> count.log
 9 | done
10 | 
11 | echo "Completed $count"
12 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/slurm_launch.job:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -p short
 3 | #SBATCH --job-name=count
 4 | #SBATCH --nodes=1
 5 | #SBATCH --ntasks=2
 6 | #SBATCH --time=15:00
 7 | #SBATCH --mem=1gb
 8 | #SBATCH --output=count.txt
 9 | 
10 | module load dmtcp
11 | 
12 | dmtcp_launch --new-coordinator --ckptdir ckpts --rm --interval 60 ./count.sh
13 | 
14 | 


--------------------------------------------------------------------------------
/checkpoint/dmtcp/ucr-hpcc/single/slurm_rstr.job:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -p short
 3 | #SBATCH --job-name=count
 4 | #SBATCH --nodes=1
 5 | #SBATCH --ntasks=2
 6 | #SBATCH --time=15:00
 7 | #SBATCH --mem=1gb
 8 | #SBATCH --output=count_rstrt.txt
 9 | 
10 | # Load modules
11 | module load dmtcp
12 | 
13 | dmtcp_restart_script=$(ls -t ckpts/dmtcp_restart_script_*.sh | head -1)
14 | 
15 | # Start DMTCP ################
16 | dmtcp_coordinator --daemon --port 0 --port-file /tmp/port
17 | export DMTCP_COORD_HOST=`hostname`
18 | export DMTCP_COORD_PORT=$(</tmp/port)
19 | 
20 | # Restart job ################
21 | bash $dmtcp_restart_script
22 | #./dmtcp_restart_script.sh
23 | #dmtcp_restart ckpt_*.dmtcp
24 | 
25 | 


--------------------------------------------------------------------------------
/configs/active_users.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | IFS=$'\n'
 4 | 
 5 | for SUSER in $(getent passwd | egrep -v "disabled|nfsnobody"); do
 6 |     if [[ $(echo ${SUSER} | cut -d: -f3) -ge 1000 ]]; then
 7 |         echo $(echo $SUSER| cut -d: -f1);
 8 |     fi
 9 | done
10 | 
11 | 


--------------------------------------------------------------------------------
/configs/cgroup.conf:
--------------------------------------------------------------------------------
 1 | CgroupMountpoint="/sys/fs/cgroup"
 2 | CgroupAutomount=yes
 3 | # There is a bug CentOS 7.3 kernel that does not clear cgroup dirs under /sys/fs/cgroup/*/slurm_*/
 4 | # Possible solutions, upgrade to Slurm 17.02.3 or update the kernel?
 5 | # Possible workaround, remove CgroupReleaseAgentDir
 6 | #CgroupReleaseAgentDir="/etc/slurm/cgroup"
 7 | ConstrainCores=yes
 8 | TaskAffinity=no
 9 | ConstrainRAMSpace=yes
10 | ConstrainSwapSpace=yes
11 | ConstrainDevices=no
12 | AllowedRamSpace=100
13 | AllowedSwapSpace=0
14 | MaxRAMPercent=100
15 | MaxSwapPercent=100
16 | MinRAMSpace=30
17 | 


--------------------------------------------------------------------------------
/configs/layout.d/power.conf:
--------------------------------------------------------------------------------
 1 | Priority=10
 2 | Root=Cluster
 3 | Entity=Cluster Type=Cluster Enclosed=Rack[1-3] CurrentPower=200
 4 | Entity=Rack1 Type=Rack Enclosed=pelican,pigeon,c[01-14],c[29-39],h[01-02],gpu01,gpu05 CurrentPower=200
 5 | Entity=Rack2 Type=Rack Enclosed=globus,penguin,owl,c[15-28],c[40-48],h[03-06],gpu02,m02,m[04-05] CurrentPower=200
 6 | Entity=Rack3 Type=Rack Enclosed=i[01-40],gpu[03-04] CurrentPower=200
 7 | Entity=c[01-48] Type=Node CurrentPower=0 Frequency=0
 8 | Entity=i[01-40] Type=Node CurrentPower=0 Frequency=0
 9 | Entity=h[01-06] Type=Node CurrentPower=0 Frequency=0
10 | Entity=gpu[01-04] Type=Node CurrentPower=0 Frequency=0
11 | Entity=m02,[04-05] Type=Node CurrentPower=0 Frequency=0
12 | Entity=pigeon,pelican,globus,penguin,owl Type=Node CurrentPower=0 Frequency=0
13 | 


--------------------------------------------------------------------------------
/configs/slurm.conf:
--------------------------------------------------------------------------------
  1 | # Sample config for SLURM Users Group
  2 | # Managment Policies
  3 | ClusterName=biocluster
  4 | ControlMachine=slurm
  5 | #ControlMachine=slurm,pelican
  6 | #ControlAddr=slurm
  7 | #BackupController=slurm2
  8 | SlurmctldPort=6817
  9 | SlurmdPort=6818
 10 | AuthType=auth/munge
 11 | CryptoType=crypto/munge
 12 | SlurmUser=root
 13 | 
 14 | # Location of logs and state info
 15 | #StateSaveLocation=/usr/local/var/tmp
 16 | #SlurmdSpoolDir=/usr/local/var/tmp/slurmd.%n.spool
 17 | #SlurmctldPidFile=/usr/local/var/run/slurmctld.pid
 18 | #SlurmdPidFile=/usr/local/var/run/slurmd.%n.pid
 19 | #SlurmctldLogFile=/usr/local/var/log/slurmctld.log
 20 | #SlurmdLogFile=/usr/local/var/log/slurmd.%n.log.%h
 21 | 
 22 | # Accounting
 23 | ##JobAcctGatherType=jobacct_gather/linux
 24 | JobAcctGatherType=jobacct_gather/cgroup
 25 | JobAcctGatherFrequency=30
 26 | AccountingStorageType=accounting_storage/slurmdbd
 27 | AccountingStorageEnforce=limits
 28 | AccountingStorageLoc=slurm_acct_db
 29 | #AccountingStoragePort=8513
 30 | AccountingStoragePort=6819
 31 | AccountingStorageHost=slurm
 32 | AccountingStorageTRES=gres/gpu
 33 | # Job Completion Logs, plugin required
 34 | JobCompType=jobcomp/elasticsearch
 35 | JobCompLoc=http://kibana:9200
 36 | 
 37 | # Scheduling Policies
 38 | SchedulerParameters=bf_max_job_test=1000,bf_max_job_user=50
 39 | #SchedulerType=sched/builtin
 40 | SchedulerType=sched/backfill
 41 | FastSchedule=1
 42 | 
 43 | # JOB PREEMPTION
 44 | # Preempt by partition_prio/suspend seems to suspend jobs from the same user on the same node, oscillating which is running and which is suspended.
 45 | PreemptMode=Off
 46 | #PreemptMode=CHECKPOINT # Checkpoints or cancels
 47 | #PreemptMode=GANG,SUSPEND # Suspend job, but also allows resource sharing
 48 | PreemptType=preempt/none
 49 | #PreemptType=preempt/partition_prio
 50 | 
 51 | # JOB PRIORITY
 52 | FairShareDampeningFactor=1
 53 | PriorityType=priority/multifactor
 54 | PriorityDecayHalfLife=1-0
 55 | #PriorityCalcPeriod=
 56 | #PriorityFavorSmall=
 57 | PriorityMaxAge=7-0
 58 | #PriorityUsageResetPeriod=
 59 | PriorityWeightAge=10000
 60 | PriorityWeightFairshare=20000000
 61 | PriorityWeightJobSize=0
 62 | PriorityWeightPartition=100000000
 63 | PriorityWeightQOS=0
 64 | #PriorityFlags=ACCRUE_ALWAYS,SMALL_RELATIVE_TO_TIME,FAIR_TREE,MAX_TRES
 65 | 
 66 | # This should email admins when a job is not killable
 67 | #UnkillableStepProgram=/path/to/email/function
 68 | 
 69 | ## Task Plugin controls assignment (binding) of tasks to CPU
 70 | # None - All tasks on a node can use all cpus on the node.
 71 | # Cgroup - cgroup subsystem is used to contain job to allocated CPUs. Portable Hardware Locality ( hwloc) library used to bind tasks to CPUs.
 72 | # Affinity - Bind tasks with one of the following:
 73 | # 	Cpusets     use cpuset subsystem to contain cpus assigned to tasks.
 74 | # 	Sched       use sched_setaffinity to bind tasks to cpus .
 75 | # In addition, a binding unit may also be specified. It can be one of Sockets, Cores, Threads, None
 76 | # Both the are specified on the TaskPluginParam statement.
 77 | TaskPlugin=task/cgroup
 78 | ProctrackType=proctrack/cgroup
 79 | 
 80 | # Allocaton Policies
 81 | SelectType=select/cons_res # Allows multiples jobs to run on a single node (resource)
 82 | SelectTypeParameters=CR_Core_Memory
 83 | #SelectTypeParameters=CR_Core
 84 | #SelectTypeParameters=CR_Memory
 85 | DefMemPerCPU=1024
 86 | GresTypes=gpu
 87 | #GresTypes=gpu,mem,gmem,scratch
 88 | DefMemPerNode=1024
 89 | MaxJobCount=50000
 90 | MaxArraySize=2500
 91 | MaxStepCount=40000
 92 | #MaxTasksPerNode=64
 93 | MemLimitEnforce=yes
 94 | #TmpFs=/tmp
 95 | # Still need to add slurm_pam.so to proper location in order for this to work properly
 96 | #UsePAM=1
 97 | # Automatically return node to service if valid config from slurmd is registered
 98 | #	0
 99 | #	    A node will remain in the DOWN state until a system administrator explicitly changes its state (even if the slurmd daemon registers and resumes communications). 
100 | #	1
101 | #	    A DOWN node will become available for use upon registration with a valid configuration only if it was set DOWN due to being non-responsive. If the node was set DOWN for any other reason (low memory, unexpected reboot, etc.), its state will not automatically be changed. A node registers with a valid configuration if its memory, GRES, CPU count, etc. are equal to or greater than the values configured in slurm.conf. 
102 | #	2
103 | #	    A DOWN node will become available for use upon registration with a valid configuration. The node could have been set DOWN for any reason. A node registers with a valid configuration if its memory, GRES, CPU count, etc. are equal to or greater than the values configured in slurm.conf. (Disabled on Cray ALPS systems.) 
104 | ReturnToService=1
105 | 
106 | # Node Definitions
107 | #NodeName=DEFAULT Sockets=2 CoresPerSocket=4 ThreadsPerCore=2
108 | NodeName=pigeon Sockets=2 CoresPerSocket=4 ThreadsPerCore=2
109 | NodeName=pelican Sockets=2 CoresPerSocket=8 ThreadsPerCore=2
110 | NodeName=globus Sockets=2 CoresPerSocket=8 ThreadsPerCore=2
111 | NodeName=penguin CPUS=8 RealMemory=60000 Sockets=2 CoresPerSocket=4 ThreadsPerCore=1
112 | NodeName=owl Sockets=4 CoresPerSocket=4 ThreadsPerCore=1
113 | NodeName=charmander Sockets=2 CoresPerSocket=8 ThreadsPerCore=2
114 | #NodeName=slurm Sockets=1 CoresPerSocket=1 ThreadsPerCore=1
115 | # The AMD partition needs to use all 64 cores as physical cores. Try redefining the c nodes here then reboot slurmctld and all slurmd's
116 | NodeName=c[01-48] CPUs=64 RealMemory=500000 State=UNKNOWN
117 | #NodeName=c[01-48] CPUs=64 RealMemory=500000 Sockets=4 CoresPerSocket=8 ThreadsPerCore=2 State=UNKNOWN
118 | NodeName=h[01-06] CPUs=32 RealMemory=1000000 Sockets=4 CoresPerSocket=8 ThreadsPerCore=1 State=UNKNOWN #Feature=HyperThread
119 | NodeName=gpu[01-02] CPUs=32 RealMemory=115000 Sockets=2 CoresPerSocket=8 ThreadsPerCore=2 Gres=gpu:4 State=UNKNOWN
120 | NodeName=gpu[03-04] CPUs=32 RealMemory=500000 Sockets=2 CoresPerSocket=8 ThreadsPerCore=2 Gres=gpu:8 State=UNKNOWN
121 | NodeName=gpu05 CPUs=64 RealMemory=200000 Sockets=2 CoresPerSocket=16 ThreadsPerCore=2 Gres=gpu:2 State=UNKNOWN
122 | NodeName=i[01-44] CPUs=64 RealMemory=450000 Sockets=2 CoresPerSocket=16 ThreadsPerCore=2 State=UNKNOWN #Feature=HyperThread
123 | NodeName=i[45-50] CPUs=64 RealMemory=200000 Sockets=2 CoresPerSocket=16 ThreadsPerCore=2 State=UNKNOWN #Feature=HyperThread
124 | NodeName=m02 CPUs=64 RealMemory=450000 Sockets=4 CoresPerSocket=8 ThreadsPerCore=2 State=UNKNOWN 
125 | NodeName=m[04-05] CPUs=64 RealMemory=100000 Sockets=4 CoresPerSocket=8 ThreadsPerCore=2 State=UNKNOWN 
126 | 
127 | ## Preempt Modes
128 | # Off
129 | # Cancel - preempted job is cancelled.
130 | # Checkpoint - preempted job is checkpointed if possible, or cancelled.
131 | # Gang - enables time slicing of jobs on the same resource.
132 | # Requeue - job is requeued and restarted at the beginning (only for sbatch ).
133 | # Suspend - job is suspended until the higher priority job ends (requires Gang).
134 | 
135 | ## OverSubscribe (Shared) Option
136 | # Controls the ability of the partition to execute more than one job on a resource (node, socket, core)
137 | # EXCLUSIVE allocates entire node (overrides cons_res ability to allocate cores and sockets to multiple jobs) 
138 | # NO sharing of any resource.
139 | # YES all resources can be shared, unless user specifies - exclusive on srun | salloc | sbatch
140 | # FORCE all resources can be shared and user cannot override. (Generally only recommended for BlueGene , although FORCE:1 means that users cannot use - exclusive, but resources allocated to a job will not be shared.)
141 | 
142 | ## Custom partitions ##
143 | # Debug Partition
144 | PartitionName=debug AllowGroups=operations PriorityTier=1 AllowQos=normal Nodes=penguin \
145 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=INFINITE State=UP \
146 | TRESBillingWeights="CPU=1.0,Mem=0.25G"
147 | 
148 | # Short Partition
149 | PartitionName=short PriorityTier=1 QOS=short AllowQos=short Nodes=i[01-50] \
150 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=120 State=UP \
151 | TRESBillingWeights="CPU=1.0,Mem=0.25G"
152 | 
153 | # Intel Partition
154 | PartitionName=intel PriorityTier=1 AllowQos=normal Nodes=i[01-02,17-40] \
155 | Shared=NO OverSubscribe=NO Default=YES DefaultTime=10080 MaxTime=30-00:00 State=UP \
156 | TRESBillingWeights="CPU=1.0,Mem=0.25G"
157 | 
158 | # Batch Partition
159 | PartitionName=batch PriorityTier=1 AllowQos=normal Nodes=c[01-48] \
160 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
161 | TRESBillingWeights="CPU=1.0,Mem=0.25G"
162 | 
163 | # Highmem Partition
164 | PartitionName=highmem PriorityTier=1 QOS=highmem AllowQos=highmem Nodes=h[01-06] \
165 | Shared=NO OverSubscribe=NO DefaultTime=2880 MaxTime=30-00:00 State=UP \
166 | TRESBillingWeights="CPU=1.0,Mem=0.25G"
167 | 
168 | # GPU Partition
169 | PartitionName=gpu PriorityTier=1 QOS=gpu AllowQos=gpu Nodes=gpu[01-05] \
170 | Shared=NO OverSubscribe=NO DefaultTime=2880 MaxTime=30-00:00 State=UP \
171 | TRESBillingWeights="CPU=1.0,Mem=0.25G,GRES/gpu=2.0"
172 | 
173 | ## Private partitions ##
174 | # You can hide partitions with Hidden=Yes
175 | # Stajich Partition
176 | PartitionName=stajichlab AllowGroups=stajichlab PriorityJobFactor=1000 PriorityTier=2 Nodes=m02,m[04-05] \
177 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
178 | TRESBillingWeights="CPU=0,Mem=0"
179 | 
180 | # Girke Partition
181 | PartitionName=girkelab AllowGroups=girkelab PriorityJobFactor=1000 PriorityTier=2 Nodes=i[03-04] \
182 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
183 | TRESBillingWeights="CPU=0,Mem=0"
184 | 
185 | # Wenxiu Ma Partition
186 | PartitionName=wmalab AllowGroups=wmalab PriorityJobFactor=1000 PriorityTier=2 Nodes=i[05-08] \
187 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
188 | TRESBillingWeights="CPU=0,Mem=0"
189 | 
190 | # Koenig Partition
191 | PartitionName=koeniglab AllowGroups=koeniglab PriorityJobFactor=1000 PriorityTier=2 Nodes=i[09-12] \
192 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
193 | TRESBillingWeights="CPU=0,Mem=0"
194 | 
195 | # Greaney Partition
196 | PartitionName=greaneylab AllowGroups=greaneylab PriorityJobFactor=1000 PriorityTier=2 Nodes=i[13-16,41-44] \
197 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
198 | TRESBillingWeights="CPU=0,Mem=0"
199 | 
200 | # Stats Partition
201 | PartitionName=statsdept AllowGroups=statsdept PriorityJobFactor=1000 PriorityTier=2 Nodes=i[45-48] \
202 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
203 | TRESBillingWeights="CPU=0,Mem=0"
204 | 
205 | # Yehua Li Partition
206 | PartitionName=ylilab AllowGroups=ylilab PriorityJobFactor=1000 PriorityTier=2 Nodes=i[49-50] \
207 | Shared=NO OverSubscribe=NO DefaultTime=10080 MaxTime=30-00:00 State=UP \
208 | TRESBillingWeights="CPU=0,Mem=0"
209 | 
210 | # Licenses
211 | Licenses=intel:1
212 | 
213 | # Topology
214 | #TopologyPlugin=topology/none
215 | # Define node to IB switch mapping
216 | TopologyPlugin=topology/tree
217 | 
218 | # Define QOS Example, our QOS rules are defined in update_slurm.sh in the hpcc_dashboard repo
219 | #sacctmgr add qos meremortal
220 | #sacctmgr add qos vip Preempt=meremortal PreemptMode=cancel
221 | 
222 | # Include QOS in association definition
223 | #sacctmgr add user Rod DefaultAccount=math qos=vip,normal DefaultQOS=normal
224 | 
225 | # SLURM Front end name, can be host name, or FQDN, or DEFAULT
226 | # Disabled at compile time
227 | #FrontendName=slurm
228 | 
229 | 


--------------------------------------------------------------------------------
/configs/slurmdbd.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Sample /usr/local/etc/slurmdbd.conf
 3 | #
 4 | ArchiveEvents=yes
 5 | ArchiveJobs=yes
 6 | #ArchiveResv=yes
 7 | ArchiveSteps=no
 8 | ArchiveSuspend=no
 9 | #ArchiveScript=/usr/sbin/slurm.dbd.archive
10 | AuthInfo=/usr/local/var/run/munge/munge.socket.2
11 | AuthType=auth/munge
12 | DbdHost=slurm
13 | DbdPort=6819
14 | DebugLevel=4
15 | PurgeEventAfter=1month
16 | PurgeJobAfter=12month
17 | PurgeResvAfter=1month
18 | PurgeStepAfter=1month
19 | PurgeSuspendAfter=1month
20 | #LogFile=/var/log/slurmdbd.log
21 | #PidFile=/var/run/slurmdbd.pid
22 | SlurmUser=root
23 | #StoragePass=KEVg~,u9z
24 | StorageType=accounting_storage/mysql
25 | StorageUser=root
26 | 


--------------------------------------------------------------------------------
/configs/topology.conf:
--------------------------------------------------------------------------------
 1 | # topology.conf
 2 | # Switch Configuration
 3 | 
 4 | # Cannot have switches and nodes defined under the same switch
 5 | # Also cannot have diconected segments, so create a pesudo switch
 6 | SwitchName=ibswitch Switches=ibswitch[0-2]
 7 | SwitchName=ibswitch0 Nodes=m02,m[04-05],c[01-48],gpu[01-02,05],h[01-06],globus,owl,pelican,penguin,pigeon,charmander
 8 | SwitchName=ibswitch1 Nodes=i[01-20,41-50],gpu[03,04]
 9 | SwitchName=ibswitch2 Nodes=i[21-40]
10 | 


--------------------------------------------------------------------------------
/configs/update_slurm.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Check Arguments
  4 | #EXPECTED_ARGS=1
  5 | #E_BADARGS=65
  6 | #
  7 | #if [ $# -ne $EXPECTED_ARGS ]
  8 | #then
  9 | #  echo "Usage: `basename $0` USERNAME"
 10 | #  exit $E_BADARGS
 11 | #fi
 12 | 
 13 | source /etc/profile.d/modules.sh
 14 | module load slurm/19.05.0
 15 | 
 16 | # Not sure where to place this, but...we need this to set minimum limits
 17 | #sacctmgr add qos gpu
 18 | #sacctmgr modify QOS gpu set MinTRESPerJob=gres/gpu=1
 19 | #sacctmgr modify qos set flags=DenyOnLimit where name=gpu
 20 | #sacctmgr add qos highmem
 21 | #sacctmgr modify QOS highmem set MinTRESPerJob=mem=100g
 22 | #sacctmgr modify qos set flags=DenyOnLimit where name=highmem
 23 | 
 24 | # Set BASE MAXCPU and group offsets
 25 | declare -A CPUMAP
 26 | MAXCPU=512
 27 | CPUMAP['stajichlab']=$(($MAXCPU+(3*64)))
 28 | CPUMAP['girkelab']=$(($MAXCPU+(2*64)))
 29 | CPUMAP['wmalab']=$(($MAXCPU+(4*64)))
 30 | CPUMAP['koeniglab']=$(($MAXCPU+(4*64)))
 31 | CPUMAP['greaneylab']=$(($MAXCPU+(8*64)))
 32 | CPUMAP['statsdept']=$((4*64))
 33 | # Robert Allen requested a temporary increase for jobs that require 288 procs, INDIVIDUAL LIMIT WAS REMOVED
 34 | #CPUMAP['rallenlab']=$((4*64))
 35 | 
 36 | # External labs (non-UC) get only %50 of maximum CPUs
 37 | CPUMAP['ayoublab']=$(($MAXCPU/2))
 38 | CPUMAP['baolab']=$(($MAXCPU/2))
 39 | CPUMAP['bohonaklab']=$(($MAXCPU/2))
 40 | CPUMAP['castaneralab']=$(($MAXCPU/2))
 41 | CPUMAP['columbuslab']=$(($MAXCPU/2))
 42 | CPUMAP['fisherlab']=$(($MAXCPU/2))
 43 | CPUMAP['garblab']=$(($MAXCPU/2))
 44 | CPUMAP['reederlab']=$(($MAXCPU/2))
 45 | # Temporary increase of CPUs, until Jan 1st, 2019
 46 | #CPUMAP['seallab']=$(($MAXCPU/2))
 47 | CPUMAP['waterslab']=$(($MAXCPU/2))
 48 | 
 49 | # Trial accounts are severly limited
 50 | CPUMAP['magdylab']=$(($MAXCPU/16))
 51 | 
 52 | # Gather list of active users
 53 | #SUSERS=$(active_users.sh)
 54 | # Process list of users
 55 | SUSERS=$@
 56 | 
 57 | # Clear SSS cache, better if we do this on a per user/group basis instead of everything
 58 | #sudo sss_cache -E
 59 | 
 60 | # Iterate over all active users
 61 | for SUSER in $SUSERS; do
 62 |     # Flush SSS Cache
 63 |     NEW_SGROUP=$(ldapsearch -x uid="$SUSER" | grep -Po "^ou: \K\w+")    
 64 |     OlD_SGROUP=$(id -gn $SUSER)
 65 |     sudo sss_cache -u $SUSER && sudo sss_cache -g $NEW_SGROUP && sudo sss_cache -g $OlD_SGROUP
 66 | 
 67 |     # Determine primary group, and secondary groups of user
 68 |     SGROUP=$(id -gn $SUSER)
 69 |     if [[ -z $SGROUP ]]; then
 70 |         echo "User $SUSER does not have a primary group. $SGROUP"
 71 |         exit 666
 72 |     fi
 73 |     sudo sss_cache -g statsdept
 74 |     SSGROUP=$(id -Gn $SUSER | grep -o 'statsdept')
 75 | 
 76 |     if [[ ${SGROUP} == 'restricted' ]]; then
 77 |         continue
 78 |     fi
 79 | 
 80 |     # Create group account
 81 |     sacctmgr -i add account $SGROUP Organization=$SGROUP parent=ucr
 82 |     MAXCPU=512
 83 |     USER_MAXCPU=256
 84 |     HIGHMEM_MAXCPU=32
 85 |     if [[ ! -z ${CPUMAP[$SGROUP]} ]]; then
 86 |         # Check if group CPU quota is 256, meaning external account
 87 |         if [[ $((${CPUMAP[$SGROUP]}*2)) == $MAXCPU ]]; then
 88 |             USER_MAXCPU=$(($USER_MAXCPU/2))
 89 |             HIGHMEM_MAXCPU=$(($HIGHMEM_MAXCPU/2))
 90 |         fi
 91 |         # Check if group CPU quota is 16, meaning trial account
 92 |         if [[ $((${CPUMAP[$SGROUP]}*16)) == $MAXCPU ]]; then
 93 |             USER_MAXCPU=$(($USER_MAXCPU/16))
 94 |             HIGHMEM_MAXCPU=$(($HIGHMEM_MAXCPU/16))
 95 |         fi
 96 |         MAXCPU=${CPUMAP[$SGROUP]}
 97 |     fi
 98 |     sacctmgr -i modify account name=$SGROUP set grptres=cpu=$MAXCPU
 99 |     sacctmgr -i modify account name=$SGROUP set MaxSubmitJobs=5000
100 |     sacctmgr -i modify account name=$SGROUP set GrpTRES=gres/gpu=8
101 |     
102 |     # These don't work, can't limit based on group per partition this way
103 |     #sacctmgr -i modify account name=$SGROUP partition=batch set grptres=cpu=512
104 |     #sacctmgr -i modify account name=$SGROUP partition=highmem set grptres=cpu=64
105 |     #sacctmgr -i modify account name=$SGROUP partition=intel set grptres=cpu=64
106 |     #sacctmgr -i modify account name=$SGROUP partition=gpu set grptres=cpu=32
107 | 
108 |     # Create user account, this account is too many levels deep
109 |     #sacctmgr -i add account $SUSER parent=$SGROUP
110 |     #sacctmgr -i delete account $SUSER
111 | 
112 |     # Create and set admin user, if applicable
113 |     if [[ "$SUSER" == "jhayes" || "$SUSER" == "forsythc" ]]; then
114 |         # Create admin user
115 | 	    sacctmgr -i add user $SUSER DefaultAccount=$SGROUP Partitions=debug,batch,highmem,gpu,intel,short,$SGROUP
116 |         sacctmgr -i modify user $SUSER set AdminLevel=Administrator
117 |         sacctmgr -i modify user name=$SUSER partition=debug set grptres=cpu=$USER_MAXCPU,mem=1048576
118 |     elif [[ "$SGROUP" == "statsdept" ]]; then
119 |         # Create limited department based user
120 |         sacctmgr -i add user $SUSER DefaultAccount=$SGROUP Partitions=$SGROUP
121 |         sacctmgr -i modify user name=$SUSER partition=statsdept set grptres=cpu=8,mem=16384
122 |     elif [[ "$SSGROUP" == "statsdept" ]]; then
123 |         sacctmgr -i delete user $SUSER
124 |         sacctmgr -i add user $SUSER DefaultAccount=$SGROUP Partitions=batch,highmem,gpu,intel,short,$SGROUP,$SSGROUP
125 |         sacctmgr -i modify user name=$SUSER partition=statsdept set grptres=cpu=8,mem=16384
126 |     else
127 |         # Create normal user
128 |         sacctmgr -i delete user $SUSER
129 | 
130 |         sacctmgr -i add user $SUSER DefaultAccount=$SGROUP Partitions=batch,highmem,gpu,intel,short,$SGROUP 
131 |     fi
132 | 
133 |     #Set intel,short paritions for rallenlab (requested more than 256 cores), their users do not have individual limits.
134 |     #
135 |     # Commented in case of a need for specific users needed a core limit raise, here is an example:
136 |     #if [[ "$SGROUP" == "rallenlab" ]] || ([[ "$SGROUP" == "agroup" ]] && [[ "$SUSER" == "auser" ]]); then
137 |     #
138 |     if [[ "$SGROUP" == "rallenlab" ]]; then
139 |         sacctmgr -i modify user name=$SUSER partition=intel set grptres=cpu=-1,mem=1048576
140 |         sacctmgr -i modify user name=$SUSER partition=short set grptres=cpu=-1,mem=1048576
141 |     else
142 |         # Set intel partition
143 |         sacctmgr -i modify user name=$SUSER partition=intel set grptres=cpu=$USER_MAXCPU,mem=1048576
144 |         # Set short partition
145 |         sacctmgr -i modify user name=$SUSER partition=short set grptres=cpu=$USER_MAXCPU,mem=1048576
146 |     fi
147 |     
148 |     # Set batch partition
149 |     sacctmgr -i modify user name=$SUSER partition=batch set grptres=cpu=$USER_MAXCPU,mem=1048576
150 | 
151 |     # Set highmem partition
152 |     sacctmgr -i modify user name=$SUSER partition=highmem set grptres=cpu=$HIGHMEM_MAXCPU,mem=1048576
153 |     sacctmgr -i modify user name=$SUSER partition=highmem set qos=highmem
154 |     sacctmgr -i modify user name=$SUSER partition=highmem set DefaultQOS=highmem
155 | 
156 |     # Set gpu partition
157 |     sacctmgr -i modify user name=$SUSER partition=gpu set qos=gpu
158 |     sacctmgr -i modify user name=$SUSER partition=gpu set DefaultQOS=gpu
159 | 
160 |     # Set short partition
161 |     sacctmgr -i modify user name=$SUSER partition=short set qos=short
162 |     sacctmgr -i modify user name=$SUSER partition=short set DefaultQOS=short
163 | 
164 | done
165 | 


--------------------------------------------------------------------------------
/dedalus/README.md:
--------------------------------------------------------------------------------
 1 | # Dedalus
 2 | 
 3 | > NOTE: This assumes you already know the basics of job managment.
 4 | > If not then please take the time to read through [HPC Cluster Jobs](https://hpcc.ucr.edu/manuals/hpc_cluster/jobs/).
 5 | > And/Or review the `Intro to HPCC` video from our [Events](https://hpcc.ucr.edu/events/small/) page.
 6 | 
 7 | Running Dedalus on the cluster is similar to any other software, you need to create a job submission script that acts as a wrapper for your Python code.
 8 | Look at the [job.sh](ivp_2d_rayleigh_benard/job.sh) submission script as an example.
 9 | 
10 | You can see that when running Dedalus on the cluster you need to use mpiexec, however you do not need to pass the number of parallel proceses to `mpiexec`, since this is determined by your `Slurm` resouce request.
11 | 
12 | For example, we run Dedalus Python code, like so:
13 | 
14 | ```bash
15 | mpiexec python3 rayleigh_benard.py
16 | ```
17 | 
18 | Notice the omission of the `-n` flag above, compared to the below:
19 | 
20 | ```bash
21 | mpiexec -n 4 python3 rayleigh_benard.py
22 | ```
23 | 
24 | We do not need the `-n` MPI flag becuase our version of `OpenMPI` is compiled against `Slurm`.
25 | Thus, the `--ntasks` or `-n` Slurm flag determines the number of parallel MPI processes and is automatically passed to `mpiexec`.
26 | 
27 | To scale this to more parallel processes, we just increase the number of tasks:
28 | 
29 | ```
30 | #SBATCH --ntasks=32
31 | ```
32 | 
33 | Since most nodes are capaible of 64 parallel processes we could request up to 64 tasks max.
34 | However, requesting more than 32 tasks may increase the queue wait time, the trade off here is up to you.
35 | 
36 | If you need more than 64 parallel processes, you can increase the number of nodes:
37 | 
38 | ```bash
39 | #SBATCH -N 2
40 | #SBATCH --ntasks=64
41 | ```
42 | 
43 | The above would request 64 total parallel processes, but it would distribute them across 2 nodes.
44 | 
45 | The maximum number of parallel processes you can request is determined by your Slurm limits.
46 | You can check your Slurm limits with the following command:
47 | 
48 | ```bash
49 | slurm_limits
50 | ```
51 | 
52 | Look for the `cpu=X` notation corresponding to the partition you are submitting to.
53 | Where `X` is the maximum number of parallel processes for your account on a particular parition.
54 | 
55 | Lastly, do not forget to disable threading before calling `mpiexec`:
56 | 
57 | ```bash
58 | export OMP_NUM_THREADS=1
59 | ```
60 | 


--------------------------------------------------------------------------------
/dedalus/ivp_2d_rayleigh_benard/job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -N 1
 4 | #SBATCH -n 4
 5 | #SBATCH -c 1
 6 | #SBATCH -p short
 7 | #SBATCH --mem=50gb
 8 | 
 9 | # Load dedalus
10 | module load dedalus
11 | 
12 | # Disable threading
13 | export OMP_NUM_THREADS=1
14 | 
15 | # Run dedalus code
16 | mpiexec python3 rayleigh_benard.py
17 | mpiexec python3 plot_snapshots.py snapshots/*.h5
18 | 


--------------------------------------------------------------------------------
/dedalus/ivp_2d_rayleigh_benard/plot_snapshots.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Plot 2D cartesian snapshots.
 3 | 
 4 | Usage:
 5 |     plot_snapshots.py <files>... [--output=<dir>]
 6 | 
 7 | Options:
 8 |     --output=<dir>  Output directory [default: ./frames]
 9 | 
10 | """
11 | 
12 | import h5py
13 | import numpy as np
14 | import matplotlib
15 | matplotlib.use('Agg')
16 | import matplotlib.pyplot as plt
17 | from dedalus.extras import plot_tools
18 | 
19 | 
20 | def main(filename, start, count, output):
21 |     """Save plot of specified tasks for given range of analysis writes."""
22 | 
23 |     # Plot settings
24 |     tasks = ['buoyancy', 'vorticity']
25 |     scale = 1.5
26 |     dpi = 200
27 |     title_func = lambda sim_time: 't = {:.3f}'.format(sim_time)
28 |     savename_func = lambda write: 'write_{:06}.png'.format(write)
29 | 
30 |     # Layout
31 |     nrows, ncols = 2, 1
32 |     image = plot_tools.Box(4, 1)
33 |     pad = plot_tools.Frame(0.3, 0, 0, 0)
34 |     margin = plot_tools.Frame(0.2, 0.1, 0, 0)
35 | 
36 |     # Create multifigure
37 |     mfig = plot_tools.MultiFigure(nrows, ncols, image, pad, margin, scale)
38 |     fig = mfig.figure
39 | 
40 |     # Plot writes
41 |     with h5py.File(filename, mode='r') as file:
42 |         for index in range(start, start+count):
43 |             for n, task in enumerate(tasks):
44 |                 # Build subfigure axes
45 |                 i, j = divmod(n, ncols)
46 |                 axes = mfig.add_axes(i, j, [0, 0, 1, 1])
47 |                 # Call 3D plotting helper, slicing in time
48 |                 dset = file['tasks'][task]
49 |                 plot_tools.plot_bot_3d(dset, 0, index, axes=axes, title=task, even_scale=True, visible_axes=False)
50 |             # Add time title
51 |             title = title_func(file['scales/sim_time'][index])
52 |             title_height = 1 - 0.5 * mfig.margin.top / mfig.fig.y
53 |             fig.suptitle(title, x=0.44, y=title_height, ha='left')
54 |             # Save figure
55 |             savename = savename_func(file['scales/write_number'][index])
56 |             savepath = output.joinpath(savename)
57 |             fig.savefig(str(savepath), dpi=dpi)
58 |             fig.clear()
59 |     plt.close(fig)
60 | 
61 | 
62 | if __name__ == "__main__":
63 | 
64 |     import pathlib
65 |     from docopt import docopt
66 |     from dedalus.tools import logging
67 |     from dedalus.tools import post
68 |     from dedalus.tools.parallel import Sync
69 | 
70 |     args = docopt(__doc__)
71 | 
72 |     output_path = pathlib.Path(args['--output']).absolute()
73 |     # Create output directory if needed
74 |     with Sync() as sync:
75 |         if sync.comm.rank == 0:
76 |             if not output_path.exists():
77 |                 output_path.mkdir()
78 |     post.visit_writes(args['<files>'], main, output=output_path)
79 | 
80 | 


--------------------------------------------------------------------------------
/dedalus/ivp_2d_rayleigh_benard/rayleigh_benard.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Dedalus script simulating 2D horizontally-periodic Rayleigh-Benard convection.
  3 | This script demonstrates solving a 2D cartesian initial value problem. It can
  4 | be ran serially or in parallel, and uses the built-in analysis framework to save
  5 | data snapshots to HDF5 files. The `plot_snapshots.py` script can be used to
  6 | produce plots from the saved data. It should take about a cpu-minute to run.
  7 | 
  8 | The problem is non-dimensionalized using the box height and freefall time, so
  9 | the resulting thermal diffusivity and viscosity are related to the Prandtl
 10 | and Rayleigh numbers as:
 11 | 
 12 |     kappa = (Rayleigh * Prandtl)**(-1/2)
 13 |     nu = (Rayleigh / Prandtl)**(-1/2)
 14 | 
 15 | For incompressible hydro with two boundaries, we need two tau terms for each the
 16 | velocity and buoyancy. Here we choose to use a first-order formulation, putting
 17 | one tau term each on auxiliary first-order gradient variables and the others in
 18 | the PDE, and lifting them all to the first derivative basis. This formulation puts
 19 | a tau term in the divergence constraint, as required for this geometry.
 20 | 
 21 | To run and plot using e.g. 4 processes:
 22 |     $ mpiexec -n 4 python3 rayleigh_benard.py
 23 |     $ mpiexec -n 4 python3 plot_snapshots.py snapshots/*.h5
 24 | """
 25 | 
 26 | import numpy as np
 27 | import dedalus.public as d3
 28 | import logging
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | 
 32 | # Parameters
 33 | Lx, Lz = 4, 1
 34 | Nx, Nz = 256, 64
 35 | Rayleigh = 2e6
 36 | Prandtl = 1
 37 | dealias = 3/2
 38 | stop_sim_time = 50
 39 | timestepper = d3.RK222
 40 | max_timestep = 0.125
 41 | dtype = np.float64
 42 | 
 43 | # Bases
 44 | coords = d3.CartesianCoordinates('x', 'z')
 45 | dist = d3.Distributor(coords, dtype=dtype)
 46 | xbasis = d3.RealFourier(coords['x'], size=Nx, bounds=(0, Lx), dealias=dealias)
 47 | zbasis = d3.ChebyshevT(coords['z'], size=Nz, bounds=(0, Lz), dealias=dealias)
 48 | 
 49 | # Fields
 50 | p = dist.Field(name='p', bases=(xbasis,zbasis))
 51 | b = dist.Field(name='b', bases=(xbasis,zbasis))
 52 | u = dist.VectorField(coords, name='u', bases=(xbasis,zbasis))
 53 | tau_p = dist.Field(name='tau_p')
 54 | tau_b1 = dist.Field(name='tau_b1', bases=xbasis)
 55 | tau_b2 = dist.Field(name='tau_b2', bases=xbasis)
 56 | tau_u1 = dist.VectorField(coords, name='tau_u1', bases=xbasis)
 57 | tau_u2 = dist.VectorField(coords, name='tau_u2', bases=xbasis)
 58 | 
 59 | # Substitutions
 60 | kappa = (Rayleigh * Prandtl)**(-1/2)
 61 | nu = (Rayleigh / Prandtl)**(-1/2)
 62 | x, z = dist.local_grids(xbasis, zbasis)
 63 | ex, ez = coords.unit_vector_fields(dist)
 64 | lift_basis = zbasis.derivative_basis(1)
 65 | lift = lambda A: d3.Lift(A, lift_basis, -1)
 66 | grad_u = d3.grad(u) + ez*lift(tau_u1) # First-order reduction
 67 | grad_b = d3.grad(b) + ez*lift(tau_b1) # First-order reduction
 68 | 
 69 | # Problem
 70 | # First-order form: "div(f)" becomes "trace(grad_f)"
 71 | # First-order form: "lap(f)" becomes "div(grad_f)"
 72 | problem = d3.IVP([p, b, u, tau_p, tau_b1, tau_b2, tau_u1, tau_u2], namespace=locals())
 73 | problem.add_equation("trace(grad_u) + tau_p = 0")
 74 | problem.add_equation("dt(b) - kappa*div(grad_b) + lift(tau_b2) = - u@grad(b)")
 75 | problem.add_equation("dt(u) - nu*div(grad_u) + grad(p) - b*ez + lift(tau_u2) = - u@grad(u)")
 76 | problem.add_equation("b(z=0) = Lz")
 77 | problem.add_equation("u(z=0) = 0")
 78 | problem.add_equation("b(z=Lz) = 0")
 79 | problem.add_equation("u(z=Lz) = 0")
 80 | problem.add_equation("integ(p) = 0") # Pressure gauge
 81 | 
 82 | # Solver
 83 | solver = problem.build_solver(timestepper)
 84 | solver.stop_sim_time = stop_sim_time
 85 | 
 86 | # Initial conditions
 87 | b.fill_random('g', seed=42, distribution='normal', scale=1e-3) # Random noise
 88 | b['g'] *= z * (Lz - z) # Damp noise at walls
 89 | b['g'] += Lz - z # Add linear background
 90 | 
 91 | # Analysis
 92 | snapshots = solver.evaluator.add_file_handler('snapshots', sim_dt=0.25, max_writes=50)
 93 | snapshots.add_task(b, name='buoyancy')
 94 | snapshots.add_task(-d3.div(d3.skew(u)), name='vorticity')
 95 | 
 96 | # CFL
 97 | CFL = d3.CFL(solver, initial_dt=max_timestep, cadence=10, safety=0.5, threshold=0.05,
 98 |              max_change=1.5, min_change=0.5, max_dt=max_timestep)
 99 | CFL.add_velocity(u)
100 | 
101 | # Flow properties
102 | flow = d3.GlobalFlowProperty(solver, cadence=10)
103 | flow.add_property(np.sqrt(u@u)/nu, name='Re')
104 | 
105 | # Main loop
106 | startup_iter = 10
107 | try:
108 |     logger.info('Starting main loop')
109 |     while solver.proceed:
110 |         timestep = CFL.compute_timestep()
111 |         solver.step(timestep)
112 |         if (solver.iteration-1) % 10 == 0:
113 |             max_Re = flow.max('Re')
114 |             logger.info('Iteration=%i, Time=%e, dt=%e, max(Re)=%f' %(solver.iteration, solver.sim_time, timestep, max_Re))
115 | except:
116 |     logger.error('Exception raised, triggering end of main loop.')
117 |     raise
118 | finally:
119 |     solver.log_stats()
120 | 
121 | 


--------------------------------------------------------------------------------
/depend/README.md:
--------------------------------------------------------------------------------
 1 | # Basic Example
 2 | 
 3 | ```
 4 | sbatch --parsable first_job.sh
 5 | 5383495
 6 | sbatch --dependency=after:5383495 second_job.sh
 7 | ```
 8 | 
 9 | # Scripted Example
10 | One way to script dependencies is to nest submissions (a job submitting a job):
11 | 
12 | ```bash
13 | sbatch first_job.sh
14 | ```
15 | 
16 | Contents of `first_job.sh`:
17 | 
18 | ```bash
19 | #!/bin/bash
20 | #SBATCH -p short
21 | #SBATCH --mem=1G
22 | #SBATCH --ntasks=1
23 | 
24 | sbatch -p short --mem=1G --ntasks=1 --dependency=after:$SLURM_JOB_ID second_job.sh
25 | 
26 | # Do some work
27 | sleep 60
28 | 
29 | ```
30 | 
31 | # Complex Example
32 | This example is a simple linear chain of dependancies (max 3 jobs):
33 | 
34 | ```batch
35 | sbatch test_job.sh
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/depend/slurm-180.out:
--------------------------------------------------------------------------------
1 | hi from 180
2 | submitted 181
3 | 180 done
4 | 


--------------------------------------------------------------------------------
/depend/slurm-181.out:
--------------------------------------------------------------------------------
1 | hi from 181
2 | submitted 182
3 | 181 done
4 | 


--------------------------------------------------------------------------------
/depend/slurm-182.out:
--------------------------------------------------------------------------------
1 | hi from 182
2 | 182 done
3 | 


--------------------------------------------------------------------------------
/depend/test_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --nodes=1
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --mem-per-cpu=1G
 5 | #SBATCH --time=0-00:15:00     # 15 minutes
 6 | ##SBATCH --output=my.stdout
 7 | ##SBATCH --mail-user=jhayes@ucr.edu
 8 | ##SBATCH --mail-type=ALL
 9 | ##SBATCH --job-name="just_a_test"
10 | 
11 | : ${job_number:="1"}           # set job_nubmer to 1 if it is undefined
12 | job_number_max=3
13 | 
14 | echo "hi from ${SLURM_JOB_ID}"
15 | 
16 | if [[ ${job_number} -lt ${job_number_max} ]]
17 | then
18 |   (( job_number++ ))
19 |   next_jobid=$(sbatch --export=job_number=${job_number} -d afterok:${SLURM_JOB_ID} test_job.sh | awk '{print $4}')
20 |   echo "submitted ${next_jobid}"
21 | fi
22 | 
23 | sleep 15
24 | echo "${SLURM_JOB_ID} done"
25 | 


--------------------------------------------------------------------------------
/espresso/README.md:
--------------------------------------------------------------------------------
 1 | ## Espresso
 2 | 
 3 | 
 4 | Submission scripts found here are for the openmpi complied version of espresso (espresso.sh) and intel complied version of espresso (espresso_intel.sh)
 5 | 
 6 | How to run them:
 7 | 
 8 | ```bash
 9 | sbatch espresso.sh
10 | ```
11 | 
12 | ```bash
13 | sbatch espresso_intel.sh
14 | ```
15 | 
16 | 
17 | ### Example input file
18 | 
19 | input_file.in
20 | 
21 | * you would have to edit the pseudo_dir directive to match your folders
22 | 
23 | ```bash
24 | &control
25 |  calculation = 'vc-relax'
26 |  outdir = '_work'
27 |  pseudo_dir = '/rhome/forsythc/bigdata/example-repos/qe/psp'
28 |  prefix = 'pref'
29 | /
30 | &system
31 |  ibrav = 0
32 |  nat = 1
33 |  ntyp = 1
34 |  ecutwfc = 100
35 |  occupations = 'smearing'
36 |  smearing = 'fermi-dirac'
37 |  degauss = 0.030
38 | 
39 | /
40 | &electrons
41 |  conv_thr = 1.0d-8
42 |  mixing_mode= 'plain'
43 |  diagonalization = 'david'
44 | /
45 | &ions
46 |  ion_dynamics = 'bfgs' ! default
47 | /
48 | &cell
49 |  cell_dynamics = 'bfgs' ! default
50 |  press_conv_thr = 0.5D0 ! default
51 | /
52 | ATOMIC_SPECIES
53 |  Cu 63.546 Cu_pseudo_dojo__oncv_lda.upf
54 | CELL_PARAMETERS angstrom
55 |  2.60 0.00 0.00
56 |  0.00 2.60 0.00
57 |  0.00 0.00 2.60
58 | ATOMIC_POSITIONS crystal
59 |  Cu 0.000 0.000 0.000
60 | K_POINTS automatic
61 |  6 6 6  0 0 0
62 | ```
63 | 


--------------------------------------------------------------------------------
/espresso/espresso.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --nodes=1
 3 | #SBATCH --ntasks=6
 4 | #SBATCH --mem-per-cpu=1G
 5 | 
 6 | export OMP_NUM_THREADS=1
 7 | 
 8 | module load espresso/6.3
 9 | 
10 | mpirun pw.x -in input_file.in >& output_file.out
11 | 


--------------------------------------------------------------------------------
/espresso/espresso_intel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --nodes=1
 3 | #SBATCH --ntasks=6
 4 | #SBATCH --mem-per-cpu=1G
 5 | 
 6 | 
 7 | export OMP_NUM_THREADS=1
 8 | 
 9 | module load espresso/6.3_intel
10 | module load intel
11 | 
12 | mpirun pw.x -in input_file.in >& output_file.out
13 | 


--------------------------------------------------------------------------------
/folddock/README.md:
--------------------------------------------------------------------------------
  1 | # Folddock
  2 | 
  3 | ## Installation
  4 | You will need to clone the folddock using
  5 | ```
  6 | git clone https://gitlab.com/ElofssonLab/FoldDock.git
  7 | ```
  8 | ## Running
  9 | ### Databases and Input information
 10 | Due to optimized MSA usage in the **FoldDock** protocol, it is sufficient to run only two iterations of HHblits against uniclust30_2018_08 with the options: -E 0.001 -all -oa3m -n 2
 11 | 
 12 | Get Uniclust30 here: http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/
 13 | 
 14 | We recommend running this step on CPU and the actual folding on GPU.
 15 | 
 16 | 1. For each of the two chains, run HHblits against Uniclust30 using
 17 | ```
 18 | # Load hhsuite
 19 | module load hhsuite
 20 | 
 21 | FASTAFILE=#Path to fasta file of chain
 22 | UNICLUST30=#Path to Uniclust30
 23 | OUTNAME="CHAINID.a3m"
 24 | hhblits -i $FASTAFILE -d $UNICLUST30 -E 0.001 -all -oa3m $OUTNAME
 25 | ```
 26 | 2. Create two input MSAs (paired and fused) from the HHblits results for each chain
 27 | 
 28 | **Paired** 
 29 | ```
 30 | A3M1=#Path to a3m from chain 1 (from step 1)
 31 | A3M2=#Path to a3m from chain 2 (from step 1)
 32 | MGF=0.9 #The max gap fraction allowed in the sequences
 33 | OUTNAME="CHAINID1-CHAINID2_paired.a3m"
 34 | python3 ./data/marks/hhblits/oxmatch.py --a3m1 $A3M1 --a3m2 $A3M2 --max_gap_fraction $MGF --outname $OUTNAME
 35 | ```
 36 | 
 37 | **Fused**
 38 | ```
 39 | A3M1=#Path to a3m from chain 1 (from step 1)
 40 | A3M2=#Path to a3m from chain 2 (from step 1)
 41 | MGF=0.9 #The max gap fraction allowed in the sequences
 42 | OUTNAME="CHAINID1-CHAINID2_fused.a3m"
 43 | python3 ./data/marks/hhblits/fuse_msas.py --a3m1 $A3M1 --a3m2 $A3M2 --max_gap_fraction $MGF --outname $OUTNAME
 44 | ```
 45 | 
 46 | ## Predicting
 47 | 
 48 | **Chain Break and Fasta**
 49 | ```
 50 | CB=100 #Get chain break: Length of chain 1
 51 | # E.g. seq1='AAA', seq2='BBB', catseq=AAABBB (the sequence that should be in the fasta file) and CB=3
 52 | FASTAFILE=#Path to file with concatenated fasta sequences.
 53 | ```
 54 | 
 55 | **MSA paths**
 56 | ```
 57 | PAIREDMSA=#Path to paired MSA
 58 | FUSEDMSA=#Path to fused MSA
 59 | MSAS="$PAIREDMSA,$FUSEDMSA" #Comma separated list of msa paths
 60 | ```
 61 | 
 62 | **AF2 CONFIGURATION**
 63 | ```
 64 | # This is inside the folder of FoldDock that you clone in the Installation section
 65 | AFHOME='./Alphafold2/alphafold/' # Path of alphafold directory in FoldDock
 66 | PARAM=#Path to AF2 params
 67 | OUTFOLDER=# Path where AF2 generates its output folder structure
 68 | 
 69 | PRESET='full_dbs' #Choose preset model configuration - no ensembling (full_dbs) and (reduced_dbs) or 8 model ensemblings (casp14).
 70 | MAX_RECYCLES=10 #max_recycles (default=3)
 71 | MODEL_NAME='model_1'
 72 | ```
 73 | 
 74 | **Run AF2**
 75 | This step is recommended to run on GPU as the folding will be much more efficient.
 76 | NOTE! Depending on your structure, large amounts of RAM may be required
 77 | The run mode option here is "--fold_only"
 78 | 
 79 | ```
 80 | cd $AFHOME
 81 | 
 82 | # Load Scratch
 83 | module load workspace/scratch
 84 | export SINGULARITY_BIND="/scratch:/tmp"
 85 | 
 86 | # Path to directory of supporting data, the databases!
 87 | data_dir=/srv/projects/db/alphafold 
 88 | 
 89 | # Load Alphafold
 90 | module load alphafold
 91 | 
 92 | singularity exec --nv --bind ${data_dir} $ALPHAFOLD_SING \
 93 |         python3 $AFHOME/run_alphafold.py \
 94 |                 --fasta_paths=$FASTAFILE \
 95 |                 --msas=$MSAS \
 96 |                 --chain_break_list=$CB \
 97 |                 --output_dir=$OUTFOLDER \
 98 |                 --model_names=$MODEL_NAME \
 99 |                 --data_dir=$PARAM \
100 |                 --fold_only \
101 |                 --uniref90_database_path='' \
102 |                 --mgnify_database_path='' \
103 |                 --bfd_database_path='' \
104 |                 --uniclust30_database_path='' \
105 |                 --pdb70_database_path='' \
106 |                 --template_mmcif_dir='' \
107 |                 --obsolete_pdbs_path='' \
108 |                 --preset=$PRESET \
109 |                 --max_recycles=$MAX_RECYCLES
110 | ```
111 | 


--------------------------------------------------------------------------------
/galaxy/README.md:
--------------------------------------------------------------------------------
 1 | # Galaxy
 2 | 
 3 | Outlined below is how to install Galaxy using conda and then run subsequent jobs.
 4 | Please note that differring version of conda may cause issues, please stay with the default `miniconda2`.
 5 | 
 6 | ## Prep
 7 | 
 8 | Since Galaxy can get very large, configure conda to install environments under your bigdata described here [Conda Configure](https://hpcc.ucr.edu/manuals_linux-cluster_package-manage.html#configure).
 9 | 
10 | ## Request Job
11 | 
12 | We need to do the install from a job, so lets request one:
13 | 
14 | ```bash
15 | srun -p short -c 4 --mem=10g --pty bash -l
16 | ```
17 | 
18 | ## Install
19 | 
20 | Now that we have a job, run the following to install Galaxy:
21 | 
22 | ```bash
23 | mkdir -p ~/bigdata/galaxy/
24 | cd ~/bigdata/galaxy/
25 | git clone -b release_20.05 https://github.com/galaxyproject/galaxy.git 20.05
26 | cd 20.05
27 | sh scripts/common_startup.sh
28 | exit
29 | ```
30 | 
31 | ## Run Galaxy
32 | 
33 | To run we will need to submit a new job, like this:
34 | 
35 | ```bash
36 | sbatch -p short -c 4 --mem=10g --wrap='cd ~/bigdata/galaxy/20.05; ./run.sh start; sleep infinity;'
37 | ```
38 | 


--------------------------------------------------------------------------------
/gaussian/README.md:
--------------------------------------------------------------------------------
 1 | # Gaussian
 2 | 
 3 | Here are various examples of job submission scripts for Gaussian 9 and 16.
 4 | 
 5 | More information regarding general job submission can be found [here](https://hpcc.ucr.edu/manuals/hpc_cluster/jobs/#submitting-jobs).
 6 | 
 7 | ## CPU
 8 | 
 9 | ```
10 | # Download example
11 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/gaussian/cpu_job.sh
12 | 
13 | # Make changes as needed
14 | vim cpu_job.sh
15 | 
16 | # Submit job
17 | sbatch cpu_job.sh
18 | ```
19 | 
20 | ## GPU
21 | 
22 | For GPU jobs there ere are several hardware configurations:
23 | 
24 | | Type | Qty |
25 | ------|------
26 | | P100 | 2 |
27 | | K80 | 4 |
28 | | K80 | 8 |
29 | 
30 | Choose the correct example submission script to match the hardware you wish to use.
31 | 
32 | ```
33 | # Download 2 x P100 example
34 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/gaussian/gpu_2xp100_job.sh
35 | 
36 | # Make changes as needed
37 | vim gpu_2xp100_job.sh
38 | 
39 | # Submit job
40 | sbatch gpu_2xp100_job.sh
41 | ```
42 | 


--------------------------------------------------------------------------------
/gaussian/cpu_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -c 10
 4 | #SBATCH --mem=10g
 5 | #SBATCH --time=2:00:00
 6 | #SBATCH -p short
 7 | 
 8 | # Load software based on CPU
 9 | if [[ $(cpu_type) == "intel" ]] || [[ $(cpu_type) == "xeon" ]]; then
10 |     module load gaussian/16_AVX2
11 | else
12 |     module load gaussian/16_SSE4
13 | fi
14 | 
15 | # Set scratch directory
16 | module load workspace/scratch
17 | export GAUSS_SCRDIR=${SCRATCH}
18 | 
19 | # Move to working directory 
20 | cd ~/bigdata/Projects/gaussian/
21 | 
22 | # Run Gaussian
23 | g16 ch4_opt.gjf
24 | 


--------------------------------------------------------------------------------
/gaussian/cpu_job_g09.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -c 10
 4 | #SBATCH --mem=10g
 5 | #SBATCH --time=2:00:00
 6 | #SBATCH -p short
 7 | 
 8 | # Load software
 9 | gaussian/9_SSE3
10 | 
11 | # Set scratch directory
12 | module load workspace/scratch
13 | export GAUSS_SCRDIR=${SCRATCH}
14 | 
15 | # Move to working directory 
16 | cd ~/bigdata/Projects/gaussian/
17 | 
18 | # Run Gaussian
19 | g09 ch4_opt.gjf
20 | 


--------------------------------------------------------------------------------
/gaussian/gpu_2xp100_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -c 64                  # Request all CPUs, use only: floor(AVAIL_RAM_GB/9)
 4 | #SBATCH --mem=180g             # Request RAM, calculated by: floor(AVAIL_RAM_GB/9)*9
 5 | #SBATCH --time=2-00:00:00      # Run for 2 days
 6 | #SBATCH -p gpu                 # Submit to GPU partition
 7 | #SBATCH --gpus=2               # Request 4 GPUs
 8 | ##SBATCH --nodelist=gpu01       # Request specific node
 9 | #SBATCH --exclude=gpu[01-04]   # Exclude heterogeneous nodes
10 | #SBATCH --exclusive            # This job gets whole node
11 | 
12 | # Load software
13 | module load gaussian/16_AVX2
14 | 
15 | # Create temp directory
16 | module load workspace/scratch
17 | export GAUSS_SCRDIR=${SCRATCH}
18 | 
19 | # Move to working directory 
20 | cd ~/bigdata/Projects/gaussian/gpu/
21 | 
22 | # Run Gaussian on specific CPUs
23 | g16 -c="0-20" -m="189GB" -g="0-1=0,16" ch4_opt.gjf
24 | 


--------------------------------------------------------------------------------
/gaussian/gpu_4xk80_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -c 32                  # Request all CPUs, use only: floor(AVAIL_RAM_GB/9)
 4 | #SBATCH --mem=108g             # Request RAM, calculated by: floor(AVAIL_RAM_GB/9)*9
 5 | #SBATCH --time=2-00:00:00      # Run for 2 days
 6 | #SBATCH -p gpu                 # Submit to GPU partition
 7 | #SBATCH --gpus=4               # Request 4 GPUs
 8 | ##SBATCH --nodelist=gpu01       # Request specific node
 9 | #SBATCH --exclude=gpu[03-05]   # Exclude heterogeneous nodes
10 | #SBATCH --exclusive            # This job gets whole node
11 | 
12 | # Load software
13 | module load gaussian/16_AVX2
14 | 
15 | # Use auto temp directory
16 | module load workspace/scratch
17 | export GAUSS_SCRDIR=${SCRATCH}
18 | 
19 | # Move to working directory 
20 | cd ~/bigdata/Projects/gaussian/gpu/
21 | 
22 | # Run Gaussian on specific CPUs
23 | g16 -c="0-5,8-13" -m="108GB" -g="0-3=1-2,8-9" ch4_opt.gjf
24 | 


--------------------------------------------------------------------------------
/gaussian/gpu_8xk80_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -c 48                             # Request all CPUs, use only: floor(AVAIL_RAM_GB/9)
 4 | #SBATCH --mem=432g                        # Request RAM, calculated by: floor(AVAIL_RAM_GB/9)*9
 5 | #SBATCH --time=2-00:00:00                 # Run for 2 days
 6 | #SBATCH -p gpu                            # Submit to GPU partition
 7 | #SBATCH --gpus=8                          # Request 4 GPUs
 8 | ##SBATCH --nodelist=gpu01                  # Request specific node
 9 | #SBATCH --exclude=gpu[01-02],gpu05        # Exclude heterogeneous nodes
10 | #SBATCH --exclusive                       # This job gets whole node
11 | 
12 | # Load software
13 | module load gaussian/16_AVX2
14 | 
15 | # Use auto temp directory
16 | module load workspace/scratch
17 | export GAUSS_SCRDIR=${SCRATCH}
18 | 
19 | # Move to working directory 
20 | cd ~/bigdata/Projects/gaussian/gpu/
21 | 
22 | # Run Gaussian on specific CPUs
23 | g16 -c="0-48" -m="432GB" -g="0-7=0-1,24-25,12-13,36-37" ch4_opt.gjf
24 | 


--------------------------------------------------------------------------------
/hmmer/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # DESCRIPTION
 3 | 
 4 | Show how to use HMMER, and MPI hmmer on the HPCC system
 5 | 
 6 | # EXAMPLES
 7 | 
 8 | Query files are in the `query` directory - there is a script `download.sh` which demonstrates how the data were downloaded from UniProt db.
 9 | 
10 | All the scripts are in the [pipeline](pipeline) folder so you can browse the working code.
11 | 
12 | ## Basic usage for Pfam DB searching
13 | 
14 | 1. Run hmmscan of a db of proteins against the Pfam database using the default Pfam, defaults to using the default hmmer software (3.2.1).
15 | 
16 | Note that latest version is 3.3 so it is a good idea to also specify a version in your module load to be explicit about the version you want
17 | 
18 | ```
19 | sbatch -p short pipeline/01_hmmscan321_pfam34.sh
20 | ```
21 | 
22 | 2. Run hmmscan as above but use a specific version of HMMer so we can use 3.3
23 | 
24 | ```
25 | sbatch -p short pipeline/01_hmmscan33_pfam34.sh
26 | ```
27 | 
28 | 3. Run hmmscan with an older version of Pfam (eg let's use version 31.0)
29 | 
30 | ```
31 | sbatch -p short pipeline/01_hmmscan33_pfam31.sh
32 | ```
33 | 
34 | If you compare these results you'll see the E-values and result in the `domtbl` files are not different between HMMer versions (that's good!) but if you compare different DB versions values will have changed slighted.
35 | 
36 | ```
37 | # compare the diff Pfam DB versions
38 | diff results/hmmscan33_pfam31.domtbl results/hmmscan33_pfam34.domtbl
39 | 
40 | # compare the diff HMMer versions - only things different are the version numbers and date run
41 | diff results/hmmscan321_pfam34.domtbl results/hmmscan33_pfam34.domtbl
42 | ```
43 | 
44 | ## Fetch an HMM
45 | 
46 | Let's get a specific HMM module from the DB and also search that HMM against a database of proteins using hmmsearch.
47 | This can be some cut and paste cmdline below
48 | 
49 | ```
50 | module load hmmer/3.3
51 | module load db-pfam/34.0
52 | 
53 | hmmfetch $PFAM_DB/Pfam-A.hmm COX1 > COX1.hmm
54 | ```
55 | 
56 | 1. Here's a script which requests a specific HMM from Pfam DB and then searches it against a db of proteins
57 | 
58 | ```
59 | sbatch -p short pipeline/02_hmmsearch_COX1.sh
60 | ```
61 | 
62 | ## Run MPI HMMscan
63 | 
64 | Following Sean Eddy's input on ways to take advantage of MPI speedup and ways to maximize fast running of HMMer
65 | http://cryptogenomicon.org/hmmscan-vs-hmmsearch-speed-the-numerology.html
66 | 
67 | Here is a script which will startup an MPI job, we are going to run hmmsearch instead of hmmscan and show how MPI can be used.
68 | This example is for a few proteins only, but the real speedup would be seen with a large genome or translated metagenome.
69 | 
70 | See the script [pipeline/03_hmmsearch_MPI.sh](pipeline/03_hmmsearch_MPI.sh) for more details but it uses the `srun` command when launching the hmmsearch but the resources requested are in the `#SBATCH` or cmdline requested options which set the number of CPUs to use.
71 | 
72 | The current example is a bit of a toy one but if you want to compare you can try running against a large protein DB and see the performance differences to standard multithreaded runs of hmmsearch or hmmscan searches.
73 | ```
74 | sbatch -p short pipeline/03_hmmsearch_MPI.sh
75 | ```
76 | 
77 | AUTHORS
78 | ======
79 | Jason Stajich - jason.stajich[AT]ucr.edu
80 | 


--------------------------------------------------------------------------------
/hmmer/pipeline/01_hmmscan321_pfam34.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmscan321_pfam34.log
 4 | 
 5 | module load hmmer/3.2.1
 6 | module load db-pfam/34.0
 7 | 
 8 | mkdir -p results
 9 | hmmscan --cut_ga --domtbl results/hmmscan321_pfam34.domtbl $PFAM_DB/Pfam-A.hmm query/query.pep > results/hmmscan321_pfam34.hmmer
10 | 


--------------------------------------------------------------------------------
/hmmer/pipeline/01_hmmscan33_pfam31.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmscan33_pfam31.log
 4 | 
 5 | module load hmmer/3.3
 6 | module load db-pfam/31.0
 7 | 
 8 | mkdir -p results
 9 | hmmscan --cut_ga --domtbl results/hmmscan33_pfam31.domtbl $PFAM_DB/Pfam-A.hmm query/query.pep > results/hmmscan33_pfam31.hmmer
10 | 


--------------------------------------------------------------------------------
/hmmer/pipeline/01_hmmscan33_pfam34.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmscan33_pfam34.log
 4 | 
 5 | module load hmmer/3.3
 6 | module load db-pfam/34.0
 7 | 
 8 | mkdir -p results
 9 | hmmscan --cut_ga --domtbl results/hmmscan33_pfam34.domtbl $PFAM_DB/Pfam-A.hmm query/query.pep > results/hmmscan33_pfam34.hmmer
10 | 


--------------------------------------------------------------------------------
/hmmer/pipeline/02_hmmsearch_COX1.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmsearch_COX1.log
 4 | 
 5 | module load hmmer/3.3
 6 | module load db-pfam/34.0
 7 | 
 8 | hmmfetch $PFAM_DB/Pfam-A.hmm COX1 > COX1.hmm
 9 | mkdir -p results
10 | hmmsearch --cut_ga --domtbl results/hmmsearch_COX1.domtbl COX1.hmm query/query.pep > results/hmmsearch_COX1.hmmer
11 | 


--------------------------------------------------------------------------------
/hmmer/pipeline/03_hmmsearch_MPI.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | #SBATCH -p short --ntasks 72 --mem 32gb --out hmmsearch_mpi_db.%A.log
 4 | 
 5 | module load hmmer/3.3-mpi
 6 | module load db-pfam/34.0
 7 | 
 8 | mkdir -p results
 9 | 
10 | time srun hmmsearch --mpi --cut_ga --domtbl results/hmmsearch_MPI.domtbl $PFAM_DB/Pfam-A.hmm  query/query.pep > results/hmmsearch_MPI.hmmer
11 | 


--------------------------------------------------------------------------------
/hmmer/query/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/bash
2 | curl -O https://www.uniprot.org/uniprot/Q8RXC8.fasta
3 | curl -O https://www.uniprot.org/uniprot/P49791.fasta
4 | curl -O https://www.uniprot.org/uniprot/A0A2H4MYE5.fasta
5 | 
6 | cat *.fasta > query.pep
7 | 


--------------------------------------------------------------------------------
/hmmer/query/query.pep:
--------------------------------------------------------------------------------
 1 | >tr|A0A2H4MYE5|A0A2H4MYE5_9BIVA Cytochrome c oxidase subunit 1 (Fragment) OS=Mytilus sp. BOSL3 OX=2050256 GN=COI PE=3 SV=1
 2 | GVWGGLFGASLSLMIRMQLGHPGAVFLKSDWFYNVVVTTHALMMIFFAVMPILIGAFGNW
 3 | LIPLLVGGKDMIYPRMNNLSYWLSPNALYLLMLSFSTDKGVGAGWTIYPPLSVYPYHSGP
 4 | SMDVLIVSLHLAGLSSLVGAINFASTNKNMPVLEMKGERAELYVLSISVTAVLLIISIPV
 5 | LGGGITMILFDRNFNT
 6 | >sp|P49791|NU153_RAT Nuclear pore complex protein Nup153 OS=Rattus norvegicus OX=10116 GN=Nup153 PE=1 SV=1
 7 | MASGAGGIGGGGGGGKIRTRRCHQGPVKPYQQGRPQHQGILSRVTESVKNIVPGWLQRYF
 8 | NKSENACSCSVNADEVPRWPENREDEREIYVDENTNTDDGRTTPEPTGSNTEEPSTTSTA
 9 | SNYPDVLTRPSLHRSHLNFSVLESPALHCQPSTSSAFPIGSSGFSLVKEIKDSTSQHDDD
10 | NISTTSGFSSRASEKDIAVSKNTSLPPLWSPEAERSHSLSQHTAISSKKPAFNLSAFGTL
11 | STSLGNSSILKTSQLGDSPFYPGKTTYGGAAAAVRQNKVRSTPYQAPVRRQMKAKQLNAQ
12 | SYGVTSSTARRILQSLEKMSSPLADAKRIPSAVSSPLNSPLDRSGIDSTVFQAKKEKVDS
13 | QYPPVQRLMTPKPVSIATNRTVYFKPSLTPSGDLRKTNQRIDKKNSTVDEKNISRQNREQ
14 | ESGFSYPNFSIPAANGLSSGVGGGGGKMRRERTTHFVASKPSEEEEVEVPLLPQISLPIS
15 | SSSLPTFNFSSPAISAASSSSVSPSQPLSNKVQMTSLGSTGNPVFTFSSPIVKSTQADVL
16 | PPASIGFTFSVPLAKTELSGPNSSSETVLSSSVTAQDNTVVNSSSSKKRSAPCEDPFTPA
17 | KILREGSVLDILKTPGFMSPKVDSPALQPTTTSSIVYTRPAISTFSSSGVEFGESLKAGS
18 | SWQCDTCLLQNKVTDNKCIACQAAKLPLKETAKQTGIGTPSKSDKPASTSGTGFGDKFKP
19 | AIGTWDCDTCLVQNKPEAVKCVACETPKPGTGVKRALPLTVASESPVTASSSTTVTTGTL
20 | GFGDKFKRPVGSWECPVCCVSNKAEDSRCVSCTSEKPGLVSASSSNSVPVSLPSGGCLGL
21 | DKFKKPEGSWDCEVCLVQNKADSTKCIACESAKPGTKSEFKGFGTSSSLNPAPSAFKFGI
22 | PSSSSGLSQTFTSTGNFKFGDQGGFKLGTSSDSGSTNTMNTNFKFPKPTGDFKFGVLPDS
23 | KPEEIKNDSKNDNFQFGPSSGLSNPASSAPFQFGVSTLGQQEKKEELPQSSSAGFSFGAG
24 | VANPSSAAIDTTVTSENKSGFNFGTIDTKSVSVTPFTYKTTEAKKEDASATKGGFTFGKV
25 | DSAALSSPSMFVLGRTEEKQQEPVTSTSLVFGKKADNEEPKCQPVFSFGNSEQTKDESSS
26 | KPTFSFSVAKPSVKESDQLAKATFAFGNQTNTTTDQGAAKPAFSFLNSSSSSSSTPATSS
27 | SASIFGSSTSSSSPPVAAFVFGQASNPVSSSAFGNSAESSTSQPLLFPQDGKPATTSSTA
28 | SAAPPFVFGTGASSNSTVSSGFTFGATTTSSSSGSFFVFGTGHSAPSASPAFGANQTPTF
29 | GQSQGASQPNPPSFGSISSSTALFSAGSQPVPPPTFGTVSSSSQPPVFGQQPSQSAFGSG
30 | TANASSVFQFGSSTTNFNFTNNNPSGVFTFGASPSTPAAAAQPSGSGGFSFSQSPASFTV
31 | GSNGKNMFSSSGTSVSGRKIKTAVRRKK
32 | >sp|Q8RXC8|RBK2_ARATH Receptor-like cytosolic serine/threonine-protein kinase RBK2 OS=Arabidopsis thaliana OX=3702 GN=RBK2 PE=1 SV=1
33 | MNSASAHDLRLLEVDKEKQDPKSPRGALEACLTRCSISSASSSSDDPPPNREAIDNADAD
34 | TDVQCKNHRASSNWGKFFKLWKRRSMKRLSSFPPLSGAAPPIIKQNKSADPNMNGMVLHD
35 | IYDFQSSLQNFSISDIEIATDNFSPENIIGRGGYADVYQGILPEGKLIAVKRLTKGTPDE
36 | QTAEFLSELGIIAHVDHPNTAKFIGCCIEGGMHLVFRLSPLGSLGSLLHGPSKYKLTWSR
37 | RYNVALGTADGLVYLHEGCQRRIIHRDIKADNILLTEDFQPQICDFGLAKWLPKQLTHHN
38 | VSKFEGTFGYFAPEYFMHGIVDEKTDVFAFGVLLLELITGHPALDESQQSLVLWAKPLLE
39 | RKAIKELVDPSLGDEYNREELIRLTSTASLCIDQSSLLRPRMSQVVELLLGHEDVVMTPR
40 | EAKIKMMQRTYSEELLDSVEYNSTKYLGDLDRIREVALAS
41 | 


--------------------------------------------------------------------------------
/interactive/README.txt:
--------------------------------------------------------------------------------
1 | srun --pty bash -l
2 | 


--------------------------------------------------------------------------------
/jupyter/README.md:
--------------------------------------------------------------------------------
 1 | # Jupyter Notebooks
 2 | 
 3 | ## Usage
 4 | 
 5 | There are 3 ways to run Jupyter Notebooks:
 6 |   1. __[JupyterHub](https://jupyter.hpcc.ucr.edu) server__
 7 | 
 8 |      This method is the easist, however resources are limited thus only used for light testing.
 9 | 
10 |   2. __Interactively as a Job__
11 | 
12 |      This method is the most difficult, however it provides a way to request more resources than JupyterHub.
13 | 
14 |   3. __Non-Interactively as a Job__
15 | 
16 |      This method is not difficult, in fact it is the same method we use for submitting most jobs on the cluster.
17 | 
18 | ## Workflow
19 | 
20 | The suggested workflow would be to do light development from the __[JupyterHub](https://jupyter.hpcc.ucr.edu) server__ and when you have a polished Jupyter Notebook you can submit it __non-nteractively as a job__ via `sbatch`.
21 | 
22 | The __Interactively as a Job__ method should only be used in extream situations, when exploring or testing is not possible from the __[JupyterHub](https://jupyter.hpcc.ucr.edu) server__.
23 | 
24 | ## Interactively as a Job
25 | 
26 | This meothed provides a web-based interactive development environment (IDE) similiar to the [JupyterHub](https://jupyter.hpcc.ucr.edu) server, however you are able to request more compute resources.
27 | 
28 | First review the following method, [HPCC Web Browser Access](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access). After you have read through that, you can procceed with this example.
29 | 
30 | Download the Jupyter submission script:
31 | ```bash
32 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/jupyter/submit_jupyter.sh
33 | ```
34 | 
35 | Edit script with proper Slurm resources:
36 | ```bash
37 | vim submit_jupyter.sh
38 | ``` 
39 | 
40 | Submit the Jupyter job:
41 | ```bash
42 | sbatch submit_jupyter.sh
43 | ```
44 | 
45 | Check for Jupyter job start time:
46 | ```bash
47 | squeue -u $USER -o '%i %j %S %Z'
48 | ```
49 | 
50 | If your job has already started, then check the log, which will contain the remainder of your instructions:
51 | ```
52 | cat jupyter-notebook-12345678.log
53 | ```
54 | Replace `12345678` with your real `JOBID` from the previous step.
55 | 
56 | ## Non-Interactive as a Job
57 | 
58 | Download the Jupyter Notebook (or create your own):
59 | ```bash
60 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/jupyter/notebook.ipynb
61 | ```
62 | 
63 | Download the notebook submission script:
64 | ```bash
65 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/jupyter/submit_notebook.sh
66 | ```
67 | 
68 | Edit script with proper Slurm resources, and options for your notebook:
69 | ```bash
70 | vim submit_notebook.sh
71 | ```
72 | 
73 | Submit the notebook job:
74 | ```bash
75 | sbatch submit_notebook.sh
76 | ```
77 | 
78 | Check the state of your job:
79 | ```bash
80 | squeue -u $USER
81 | ```
82 | 
83 | If your job has started, then you can check the log to verfiy that your script is running:
84 | ```
85 | cat jupyter-notebook-12345678.log
86 | ```
87 | Replace `12345678` with your real `JOBID` from the previous step.
88 | 
89 | Once the job has completed, you can download and view your HTML or Notebook results from the [JupyterHub](https://jupyter.hpcc.ucr.edu) server.
90 | 


--------------------------------------------------------------------------------
/jupyter/jupyter-notebook-3523021.log:
--------------------------------------------------------------------------------
1 | [NbConvertApp] Converting notebook notebook.ipynb to html
2 | [NbConvertApp] Writing 569303 bytes to notebook.html
3 | 


--------------------------------------------------------------------------------
/jupyter/jupyter-notebook-5205779.log:
--------------------------------------------------------------------------------
 1 | 
 2 | MacOS or linux terminal command to create your ssh tunnel:
 3 | ssh -N -L 8726:i16:8726 jhayes@cluster.hpcc.ucr.edu
 4 | 
 5 | MS Windows MobaXterm info:
 6 | 
 7 | Forwarded port:same as remote port
 8 | Remote server: i16
 9 | Remote port: 8726
10 | SSH server: ib.hpc.yale.edu
11 | SSH login: jhayes
12 | SSH port: 22
13 | 
14 | 
15 | PLEASE USE GENERATED URL BELOW IN BROWSER
16 | YOU MUST REPLACE 'i16' with 'localhost'
17 | [I 12:40:22.789 NotebookApp] JupyterLab beta preview extension loaded from /opt/linux/centos/7.x/x86_64/pkgs/anaconda3/4.5.4/lib/python3.6/site-packages/jupyterlab
18 | [I 12:40:22.790 NotebookApp] JupyterLab application directory is /bigdata/operations/pkgadmin/opt/linux/centos/7.x/x86_64/pkgs/anaconda3/4.5.4/share/jupyter/lab
19 | [I 12:40:22.803 NotebookApp] Serving notebooks from local directory: /bigdata/operations/jhayes/Projects/slurm/jupyter
20 | [I 12:40:22.803 NotebookApp] 0 active kernels
21 | [I 12:40:22.803 NotebookApp] The Jupyter Notebook is running at:
22 | [I 12:40:22.803 NotebookApp] http://i16:8726/?token=ae2e43d9c45aac71240bdb33774dd3ddb75484ba1a70a205
23 | [I 12:40:22.803 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
24 | [C 12:40:22.806 NotebookApp] 
25 |     
26 |     Copy/paste this URL into your browser when you connect for the first time,
27 |     to login with a token:
28 |         http://i16:8726/?token=ae2e43d9c45aac71240bdb33774dd3ddb75484ba1a70a205&token=ae2e43d9c45aac71240bdb33774dd3ddb75484ba1a70a205
29 | 


--------------------------------------------------------------------------------
/jupyter/notebook.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "print(\"Hello World\")"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": null,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "x=1+1"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": null,
24 |    "metadata": {},
25 |    "outputs": [],
26 |    "source": [
27 |     "print(x)"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": null,
33 |    "metadata": {},
34 |    "outputs": [],
35 |    "source": []
36 |   }
37 |  ],
38 |  "metadata": {
39 |   "kernelspec": {
40 |    "display_name": "Python 3",
41 |    "language": "python",
42 |    "name": "python3"
43 |   },
44 |   "language_info": {
45 |    "codemirror_mode": {
46 |     "name": "ipython",
47 |     "version": 3
48 |    },
49 |    "file_extension": ".py",
50 |    "mimetype": "text/x-python",
51 |    "name": "python",
52 |    "nbconvert_exporter": "python",
53 |    "pygments_lexer": "ipython3",
54 |    "version": "3.7.9"
55 |   }
56 |  },
57 |  "nbformat": 4,
58 |  "nbformat_minor": 4
59 | }
60 | 


--------------------------------------------------------------------------------
/jupyter/notebook.nbconvert.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "execution": {
 8 |      "iopub.execute_input": "2021-03-23T01:28:45.130193Z",
 9 |      "iopub.status.busy": "2021-03-23T01:28:45.129531Z",
10 |      "iopub.status.idle": "2021-03-23T01:28:45.133742Z",
11 |      "shell.execute_reply": "2021-03-23T01:28:45.132895Z"
12 |     }
13 |    },
14 |    "outputs": [
15 |     {
16 |      "name": "stdout",
17 |      "output_type": "stream",
18 |      "text": [
19 |       "Hello World\n"
20 |      ]
21 |     }
22 |    ],
23 |    "source": [
24 |     "print(\"Hello World\")"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": 2,
30 |    "metadata": {
31 |     "execution": {
32 |      "iopub.execute_input": "2021-03-23T01:28:45.138745Z",
33 |      "iopub.status.busy": "2021-03-23T01:28:45.138086Z",
34 |      "iopub.status.idle": "2021-03-23T01:28:45.141155Z",
35 |      "shell.execute_reply": "2021-03-23T01:28:45.140535Z"
36 |     }
37 |    },
38 |    "outputs": [],
39 |    "source": [
40 |     "x=1+1"
41 |    ]
42 |   },
43 |   {
44 |    "cell_type": "code",
45 |    "execution_count": 3,
46 |    "metadata": {
47 |     "execution": {
48 |      "iopub.execute_input": "2021-03-23T01:28:45.146185Z",
49 |      "iopub.status.busy": "2021-03-23T01:28:45.145522Z",
50 |      "iopub.status.idle": "2021-03-23T01:28:45.148990Z",
51 |      "shell.execute_reply": "2021-03-23T01:28:45.148296Z"
52 |     }
53 |    },
54 |    "outputs": [
55 |     {
56 |      "name": "stdout",
57 |      "output_type": "stream",
58 |      "text": [
59 |       "2\n"
60 |      ]
61 |     }
62 |    ],
63 |    "source": [
64 |     "print(x)"
65 |    ]
66 |   },
67 |   {
68 |    "cell_type": "code",
69 |    "execution_count": null,
70 |    "metadata": {},
71 |    "outputs": [],
72 |    "source": []
73 |   }
74 |  ],
75 |  "metadata": {
76 |   "kernelspec": {
77 |    "display_name": "Python 3",
78 |    "language": "python",
79 |    "name": "python3"
80 |   },
81 |   "language_info": {
82 |    "codemirror_mode": {
83 |     "name": "ipython",
84 |     "version": 3
85 |    },
86 |    "file_extension": ".py",
87 |    "mimetype": "text/x-python",
88 |    "name": "python",
89 |    "nbconvert_exporter": "python",
90 |    "pygments_lexer": "ipython3",
91 |    "version": "3.7.9"
92 |   }
93 |  },
94 |  "nbformat": 4,
95 |  "nbformat_minor": 4
96 | }
97 | 


--------------------------------------------------------------------------------
/jupyter/submit_jupyter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --partition=short
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --mem=1G
 6 | #SBATCH --time=1:00:00
 7 | #SBATCH --job-name=jupyter-notebook
 8 | #SBATCH --output=jupyter-notebook-%J.log
 9 | 
10 | # Change to HOME dir to give access to all folders within Jupyter-Lab
11 | cd $HOME
12 | 
13 | # Jupyter vars
14 | XDG_RUNTIME_DIR=""
15 | 
16 | # Get tunneling info
17 | port=$(shuf -i8000-9999 -n1)
18 | node=$(hostname -s)
19 | user=$(whoami)
20 | cluster=$(hostname -f | awk -F"." '{print $2}')
21 | 
22 | # Print tunneling instructions jupyter-log
23 | echo -e "
24 | MacOS or linux terminal command to create your ssh tunnel:
25 | ssh -NL ${port}:${node}:${port} ${user}@secure.hpcc.ucr.edu
26 | 
27 | MS Windows MobaXterm info:
28 | 
29 | Forwarded port:same as remote port
30 | Remote server: ${node}
31 | Remote port: ${port}
32 | SSH server: secure.hpcc.ucr.edu
33 | SSH login: $user
34 | SSH port: 22
35 | "
36 | 
37 | ####################################################
38 | # Load modules or activate conda environments here #
39 | ####################################################
40 | 
41 | # You can activate your own conda env with Jupyter
42 | #module load miniconda3
43 | #conda activate jupyter
44 | #OR
45 | # Load the pre installed system version
46 | module load jupyterlab
47 | 
48 | # Print instructions to user
49 | echo -e "PLEASE USE GENERATED URL BELOW IN BROWSER\nYOU MUST REPLACE '${node}' with 'localhost'"
50 | 
51 | # Launch Jupyter lab or notebook
52 | jupyter-lab --no-browser --port=${port} --ip=${node}
53 | #jupyter-notebook --no-browser --port=${port} --ip=${node}
54 | 


--------------------------------------------------------------------------------
/jupyter/submit_notebook.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --partition=short
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --mem=1G
 6 | #SBATCH --time=1:00:00
 7 | #SBATCH --job-name=jupyter-notebook
 8 | #SBATCH --output=jupyter-notebook-%J.log
 9 | 
10 | # Load and base conda
11 | module load miniconda3
12 | 
13 | # Load jupyterlab
14 | module load jupyterlab
15 | # OR activare your own conda environment
16 | #conda activate myCondaEnv
17 | 
18 | # Execute the notebook and generate HTML (notebook.html) as output file
19 | jupyter nbconvert --to html --execute notebook.ipynb
20 | # OR execute the notebook and generate another notebook (notebook.nbconvert.ipynb) as output file
21 | #jupyter nbconvert --to notebook --execute notebook.ipynb
22 | 
23 | # There are many output formats, list all possible options with this
24 | #jupyter nbconvert --help-all
25 | 


--------------------------------------------------------------------------------
/mathematica/README.md:
--------------------------------------------------------------------------------
 1 | # Activation
 2 | You will have to setup the mathematica license via a License server named "mathlm".
 3 | 
 4 | 1. Configure [X-Forward](https://hpcc.ucr.edu/manuals_linux-basics_intro.html#how-to-get-access) or [VNC](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#desktop-environments) method.
 5 | 2. Log back into the cluster (with X-Forwarding or via VNC) and run mathematica:
 6 | 
 7 | ```
 8 | module load mathematica
 9 | mathematica
10 | ```
11 | 
12 | 2. Do not use license file or key, but rather click on the bottom button `Other ways to activate`.
13 | 3. Then click the option `Connect to a license server`.
14 | 4. After that you should enter the name "mathlm" in the field and click `activate`.
15 | 
16 | 


--------------------------------------------------------------------------------
/mathematica/submission_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --mem=1G
 7 | #SBATCH --time=1-00:15:00     # 1 day and 15 minutes
 8 | ##SBATCH --mail-user=useremail@address.com
 9 | ##SBATCH --mail-type=ALL
10 | #SBATCH --job-name="just_a_test"
11 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu                                                                              
12 | 
13 | module load mathematica/11.3
14 | 
15 | math -noprompt -run '<<test.m'
16 | 


--------------------------------------------------------------------------------
/mathematica/test.m:
--------------------------------------------------------------------------------
1 | (* name this file testmath.m *)
2 | (* generate high-precision samples of a mixed distribution *)
3 | Print /@ RandomVariate[MixtureDistribution[
4 |  {1,2},
5 |  {NormalDistribution[1,2/10],
6 |   NormalDistribution[3,1/10]}],
7 |   10, WorkingPrecision -> 50]
8 | Exit[]
9 | 


--------------------------------------------------------------------------------
/matlab/Getting_Started_With_Serial_And_Parallel_MATLAB.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/matlab/Getting_Started_With_Serial_And_Parallel_MATLAB.pdf


--------------------------------------------------------------------------------
/matlab/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Simple Method: Submitting Job Script
 3 | 
 4 | To submit a job from the cluster, you can use [submission_script.sh](submission_script.sh) or [submission_script2.sh](submission_script2.sh) as a starting point.
 5 | 
 6 | ## Copy examples
 7 | 
 8 | You can simply copy the text from [submission_script.sh](submission_script.sh) and paste it into a file on the cluster via `nano`/`vim`.
 9 | 
10 | Or you can use the `wget` command to transfer the file from this repo to the cluster directly:
11 | 
12 | 1. [Login to the cluster](https://hpcc.ucr.edu/manuals_linux-basics_intro#how-to-get-access)
13 | 2. Once logged in you can run
14 | ```bash
15 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/matlab/submission_script.sh
16 | ```
17 | 
18 | ## Advance Method: Submitting Job From Matlab
19 | 
20 | To submit a job from your matlab program to the cluster, you can view [Getting_Started_With_Serial_And_Parallel_MATLAB.pdf](Getting_Started_With_Serial_And_Parallel_MATLAB.pdf)
21 | If you are getting an error when running **configCluster**, run **rehash toolboxcache** and then run **configCluster** again.
22 | 


--------------------------------------------------------------------------------
/matlab/simple_args/command_line:
--------------------------------------------------------------------------------
1 | sbatch job_script.sh 45 90
2 | 


--------------------------------------------------------------------------------
/matlab/simple_args/job_script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -l
2 | #SBATCH -p short
3 | 
4 | module load matlab
5 | 
6 | matlab -nodisplay -nodesktop -r "var1=$1;var2=$2" < matlabCode.m
7 | 
8 | 


--------------------------------------------------------------------------------
/matlab/simple_args/matlabCode.m:
--------------------------------------------------------------------------------
1 | disp(var1)
2 | disp(var2)
3 | 
4 | var3=var1+var2
5 | 
6 | disp(var3)
7 | 


--------------------------------------------------------------------------------
/matlab/simple_args/slurm-376816.out:
--------------------------------------------------------------------------------
 1 | 
 2 |                             < M A T L A B (R) >
 3 |                   Copyright 1984-2021 The MathWorks, Inc.
 4 |              R2021b Update 2 (9.11.0.1837725) 64-bit (glnxa64)
 5 |                              December 14, 2021
 6 | 
 7 |  
 8 | To get started, type doc.
 9 | For product information, visit www.mathworks.com.
10 |  
11 | 
12 | var1 = 
13 | 
14 |     "SomeValue"
15 | 
16 | >> Var SomeValue
17 | >> 


--------------------------------------------------------------------------------
/matlab/simple_args/slurm-396765.out:
--------------------------------------------------------------------------------
 1 | 
 2 |                             < M A T L A B (R) >
 3 |                   Copyright 1984-2021 The MathWorks, Inc.
 4 |              R2021b Update 2 (9.11.0.1837725) 64-bit (glnxa64)
 5 |                              December 14, 2021
 6 | 
 7 |  
 8 | To get started, type doc.
 9 | For product information, visit www.mathworks.com.
10 |  
11 | 
12 | var2 =
13 | 
14 |     90
15 | 
16 | >>     45
17 | 
18 | >>     90
19 | 
20 | >> >> 
21 | var3 =
22 | 
23 |    135
24 | 
25 | >> >>    135
26 | 
27 | >> 


--------------------------------------------------------------------------------
/matlab/submission_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --mem-per-cpu=1G
 7 | #SBATCH --time=2:00:00
 8 | ##SBATCH --mail-user=youremail@address.com
 9 | ##SBATCH --mail-type=ALL
10 | #SBATCH --job-name="just_a_test"
11 | #SBATCH -p short # You can use any of the following; epyc, intel, batch, highmem, gpu
12 | 
13 | 
14 | # Load matlab
15 | module load matlab
16 | 
17 | # Send Matlab code to Matlab
18 | matlab -nodisplay -nodesktop < my_matlab_program.m
19 | 
20 | # You can also capture the output in a log, like this
21 | #matlab -nodisplay -nosplash < my_matlab_program.m > matlab_run.log
22 | 
23 | 


--------------------------------------------------------------------------------
/matlab/submission_script2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --mem-per-cpu=100G
 7 | #SBATCH --time=5-00:00:00     # 5 day and 00 minutes
 8 | #SBATCH --output=my.stdout
 9 | ##SBATCH --mail-user=youremail@address.com
10 | ##SBATCH --mail-type=ALL
11 | #SBATCH --job-name="HT_QOptica_1e3"
12 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu
13 | 
14 | 
15 | # Print current date
16 | date
17 | 
18 | # Load matlab
19 | module load matlab/r2018a
20 | matlab -nodisplay -nosplash <ssfres_normal_thermal_resonance_shift_trans_adapt_04_QingOptica_1e3.m> run.log
21 | 


--------------------------------------------------------------------------------
/mpi/R/slurm-688508.out:
--------------------------------------------------------------------------------
 1 | [1] "We are assigned 9 processes"
 2 | 	9 slaves are spawned successfully. 0 failed.
 3 | [1] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64"
 4 | [3] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64"
 5 | [5] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64"
 6 | [7] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64"
 7 | [9] "Hello from i33 with CPU type x86_64"
 8 | [1] -2177.028
 9 |    user  system elapsed 
10 |   5.220   0.296   5.514 
11 | slurmstepd-i33: error: *** JOB 688508 ON i33 CANCELLED AT 2017-05-04T11:49:44 DUE TO TIME LIMIT ***
12 | mpirun: Forwarding signal 18 to job
13 | 


--------------------------------------------------------------------------------
/mpi/R/snow-test.R:
--------------------------------------------------------------------------------
 1 | ##
 2 | # Source: http://www.umbc.edu/hpcf/resources-tara/how-to-run-R.html
 3 | # filename: snow-test.R
 4 | # 
 5 | # SNOW quick ref: http://www.sfu.ca/~sblay/R/snow.html
 6 | #
 7 | # Notes:
 8 | #   - Library loading order matters
 9 | #   - system.time([function]) is an easy way to test optimizations
10 | #   - parApply is snow parallel version of 'apply'
11 | #
12 | ##
13 | 
14 | library(Rmpi)
15 | library(snow)
16 | 
17 | # Initialize SNOW using MPI communication. The first line will get the number of
18 | # MPI processes the scheduler assigned to us. Everything else is standard SNOW
19 | np <- mpi.universe.size() - 1
20 | print(paste('We are assigned',np, 'processes'))
21 | cluster <- makeMPIcluster(np)
22 | 
23 | # Print the hostname for each cluster member
24 | sayhello <- function() {
25 |   info <- Sys.info()[c("nodename", "machine")]
26 |   paste("Hello from", info[1], "with CPU type", info[2])
27 | }
28 | 
29 | names <- clusterCall(cluster, sayhello)
30 | print(unlist(names))
31 | 
32 | # Compute row sums in parallel using all processes, then a grand sum at the end
33 | # on the master process
34 | parallelSum <- function(m, n) {
35 |   A <- matrix(rnorm(m*n), nrow = m, ncol = n)
36 |   # Parallelize the summation
37 |   row.sums <- parApply(cluster, A, 1, sum)
38 |   print(sum(row.sums))
39 | }
40 | 
41 | # Run the operation over different size matricies
42 | system.time(parallelSum(5000, 5000))
43 | 
44 | # Always stop your cluster and exit MPI to ensure resources are properly freed
45 | stopCluster(cluster)
46 | mpi.exit()
47 | 


--------------------------------------------------------------------------------
/mpi/R/snow-test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # These bash and R script examples came from here
 4 | #  https://rcc.uchicago.edu/docs/software/environments/R/index.html#snow
 5 | 
 6 | #SBATCH --job-name=snow-test
 7 | #SBATCH --nodes=1
 8 | #SBATCH --ntasks=10
 9 | #SBATCH --time=10
10 | 
11 | module load R/3.4.0
12 | module load openmpi
13 | 
14 | # Always use -n 1 for the snow package. It uses Rmpi internally to spawn
15 | # additional processes dynamically
16 | mpirun -np 1 Rscript snow-test.R
17 | 
18 | 


--------------------------------------------------------------------------------
/mpi/SUBMIT.txt:
--------------------------------------------------------------------------------
1 | srun --pty -n 10 bash -l
2 | 
3 | OR
4 | 
5 | srun -p short --ntasks 64 -N 2 --switches=1@1-00:00:00 --pty bash -l
6 | 
7 | 


--------------------------------------------------------------------------------
/mpi/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/a.out


--------------------------------------------------------------------------------
/mpi/fortran/fhello_world_mpi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/fortran/fhello_world_mpi


--------------------------------------------------------------------------------
/mpi/fortran/fhello_world_mpi.F90:
--------------------------------------------------------------------------------
 1 | program hello_world
 2 | use mpi
 3 | implicit none
 4 | integer ( kind = 4 ) error
 5 | integer ( kind = 4 ) id
 6 | integer p
 7 | character(len=MPI_MAX_PROCESSOR_NAME) :: name
 8 | integer clen
 9 | integer, allocatable :: mype(:)
10 | real ( kind = 8 ) wtime
11 | 
12 | call MPI_Init ( error )
13 | call MPI_Comm_size ( MPI_COMM_WORLD, p, error )
14 | call MPI_Comm_rank ( MPI_COMM_WORLD, id, error )
15 | if ( id == 0 ) then
16 | 
17 | wtime = MPI_Wtime ( )
18 | 
19 | write ( *, '(a)' ) ' '
20 | write ( *, '(a)' ) 'HELLO_MPI - Master process:'
21 | write ( *, '(a)' ) '  FORTRAN90/MPI version'
22 | write ( *, '(a)' ) ' '
23 | write ( *, '(a)' ) '  An MPI test program.'
24 | write ( *, '(a)' ) ' '
25 | write ( *, '(a,i8)' ) '  The number of processes is ', p
26 | write ( *, '(a)' ) ' '
27 | 
28 | end if
29 | 
30 | call MPI_GET_PROCESSOR_NAME(NAME, CLEN, ERROR)
31 | 
32 | write ( *, '(a)' ) ' '
33 | write ( *, '(a,i8,a,a)' ) '  Process ', id, ' says "Hello, world!" ',name(1:clen)
34 | 
35 | call MPI_Finalize ( error )
36 | end program
37 | 


--------------------------------------------------------------------------------
/mpi/hello-mpi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/hello-mpi


--------------------------------------------------------------------------------
/mpi/hello-mpi.cpp:
--------------------------------------------------------------------------------
  1 | # include <cstdlib>
  2 | # include <ctime>
  3 | # include <iomanip>
  4 | # include <iostream>
  5 | # include <mpi.h>
  6 | 
  7 | using namespace std;
  8 | 
  9 | int main ( int argc, char *argv[] );
 10 | void timestamp ( );
 11 | 
 12 | //****************************************************************************80
 13 | 
 14 | int main ( int argc, char *argv[] )
 15 | 
 16 | //****************************************************************************80
 17 | 	//
 18 | 	//  Purpose:
 19 | 	//
 20 | 	//    MAIN is the main program for HELLO_MPI.
 21 | 	//
 22 | 	//  Discussion:
 23 | 	//
 24 | 	//    This is a simple MPI test program.
 25 | 	//    Each process prints out a "Hello, world!" message.
 26 | 	//    The master process also prints out a short message.
 27 | 	//
 28 | 	//    Modified to use the C MPI bindings, 14 June 2016.
 29 | 	//
 30 | 	//  Licensing:
 31 | 	//
 32 | 	//    This code is distributed under the GNU LGPL license. 
 33 | 	//
 34 | 	//  Modified:
 35 | 	//
 36 | 	//    14 June 2016
 37 | 	//
 38 | 	//  Author:
 39 | 	//
 40 | 	//    John Burkardt
 41 | 	//
 42 | 	//  Reference:
 43 | 	//
 44 | 	//    William Gropp, Ewing Lusk, Anthony Skjellum,
 45 | 	//    Using MPI: Portable Parallel Programming with the
 46 | 	//    Message-Passing Interface,
 47 | 	//    Second Edition,
 48 | 	//    MIT Press, 1999,
 49 | 	//    ISBN: 0262571323,
 50 | 	//    LC: QA76.642.G76.
 51 | 	//
 52 | 	//76     int provided;
 53 | 	//77     MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);  
 54 | 	//78     MPI_Barrier(MPI_COMM_WORLD);                                   
 55 | 	//79                                               
 56 | 	//80     MPI_Comm_rank(MPI_COMM_WORLD, &nodeId);
 57 | 	//81     MPI_Comm_size(MPI_COMM_WORLD, &numtasks); 
 58 | 	//
 59 | 	//
 60 | {
 61 | 	int id;
 62 | 	int ierr;
 63 | 	int p;
 64 | 	double wtime;
 65 | 	//
 66 | 	//  Initialize MPI.
 67 | 	//
 68 | 	ierr = MPI_Init ( &argc, &argv );
 69 | 	//
 70 | 	//  Get the number of processes.
 71 | 	//
 72 | 	ierr = MPI_Comm_size ( MPI_COMM_WORLD, &p );
 73 | 	//
 74 | 	//  Get the individual process ID.
 75 | 	//
 76 | 	ierr = MPI_Comm_rank ( MPI_COMM_WORLD, &id );
 77 | 	//
 78 | 	//  Process 0 prints an introductory message.
 79 | 	//
 80 | 	if ( id == 0 ) 
 81 | 	{
 82 | 		timestamp ( );
 83 | 		cout << "\n";
 84 | 		cout << "HELLO_MPI - Master process:\n";
 85 | 		cout << "  C++/MPI version\n";
 86 | 		cout << "  An MPI example program.\n";
 87 | 		cout << "\n";
 88 | 		cout << "  The number of processes is " << p << "\n";
 89 | 		cout << "\n";
 90 | 	}
 91 | 	//
 92 | 	//  Every process prints a hello.
 93 | 	//
 94 | 	if ( id == 0 ) 
 95 | 	{
 96 | 		wtime = MPI_Wtime ( );
 97 | 	}
 98 | 	cout << "  Process " << id << " says 'Hello, world!'\n";
 99 | 	//
100 | 	//  Process 0 says goodbye.
101 | 	//
102 | 	if ( id == 0 )
103 | 	{
104 | 		wtime = MPI_Wtime ( ) - wtime;
105 | 		cout << "  Elapsed wall clock time = " << wtime << " seconds.\n";
106 | 	}
107 | 	//
108 | 	//  Terminate MPI.
109 | 	//
110 | 	MPI_Finalize ( );
111 | 	//
112 | 	//  Terminate.
113 | 	//
114 | 	if ( id == 0 )
115 | 	{
116 | 		cout << "\n";
117 | 		cout << "HELLO_MPI:\n";
118 | 		cout << "  Normal end of execution.\n";
119 | 		cout << "\n";
120 | 		timestamp ( );
121 | 	}
122 | 	return 0;
123 | }
124 | //****************************************************************************80
125 | 
126 | void timestamp ( )
127 | 
128 | 	//****************************************************************************80
129 | 	//
130 | 	//  Purpose:
131 | 	//
132 | 	//    TIMESTAMP prints the current YMDHMS date as a time stamp.
133 | 	//
134 | 	//  Example:
135 | 	//
136 | 	//    31 May 2001 09:45:54 AM
137 | 	//
138 | 	//  Licensing:
139 | 	//
140 | 	//    This code is distributed under the GNU LGPL license. 
141 | 	//
142 | 	//  Modified:
143 | 	//
144 | 	//    08 July 2009
145 | 	//
146 | 	//  Author:
147 | 	//
148 | 	//    John Burkardt
149 | 	//
150 | 	//  Parameters:
151 | 	//
152 | 	//    None
153 | 	//
154 | {
155 | # define TIME_SIZE 40
156 | 
157 | 	static char time_buffer[TIME_SIZE];
158 | 	const struct std::tm *tm_ptr;
159 | 	size_t len;
160 | 	std::time_t now;
161 | 
162 | 	now = std::time ( NULL );
163 | 	tm_ptr = std::localtime ( &now );
164 | 
165 | 	len = std::strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm_ptr );
166 | 
167 | 	std::cout << time_buffer << "\n";
168 | 
169 | 	return;
170 | # undef TIME_SIZE
171 | }
172 | 


--------------------------------------------------------------------------------
/mpi/mpiTest:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/mpiTest


--------------------------------------------------------------------------------
/mpi/mpiTest.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | #include <unistd.h>
 4 | 
 5 | int main(int argc, char *argv[]) {
 6 | 	int numprocs, rank, namelen;
 7 | 	char processor_name[MPI_MAX_PROCESSOR_NAME];
 8 | 
 9 | 	MPI_Init(&argc, &argv);
10 | 	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
11 | 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
12 | 	MPI_Get_processor_name(processor_name, &namelen);
13 | 
14 | 	printf("Hello World! from process %d out of %d on %s\n", rank, numprocs, processor_name);
15 | 
16 | 	MPI_Finalize();
17 | }
18 | 


--------------------------------------------------------------------------------
/mpi/mpiTest_mpich:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/mpiTest_mpich


--------------------------------------------------------------------------------
/mpi/slurm-1880596.out:
--------------------------------------------------------------------------------
 1 |   Process   Process 1 says 'Hello, world!'
 2 | 2 says 'Hello, world!'
 3 | 28 March 2018 12:15:18 PM
 4 | 
 5 | HELLO_MPI - Master process:
 6 |   C++/MPI version
 7 |   An MPI example program.
 8 | 
 9 |   The number of processes is 3
10 | 
11 |   Process 0 says 'Hello, world!'
12 |   Elapsed wall clock time = 1.14441e-05 seconds.
13 | 
14 | HELLO_MPI:
15 |   Normal end of execution.
16 | 
17 | 28 March 2018 12:15:18 PM
18 | 


--------------------------------------------------------------------------------
/mpi/slurm-201.out:
--------------------------------------------------------------------------------
1 | --------------------------------------------------------------------------
2 | An ORTE daemon has unexpectedly failed after launch and before
3 | communicating back to mpirun. This could be caused by a number
4 | of factors, including an inability to create a connection back
5 | to mpirun due to a lack of common network interfaces and/or no
6 | route found between them. Please check network connectivity
7 | (including firewalls and network routing requirements).
8 | --------------------------------------------------------------------------
9 | 


--------------------------------------------------------------------------------
/multi_steps/SUBMIT.txt:
--------------------------------------------------------------------------------
1 | # First, request resources
2 |     salloc -N 10 bash
3 | # After resources have been granted, you can issue srun commands to run on the cluster
4 |     srun hostname
5 | 


--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
 1 | # Python
 2 | 
 3 | This is a basic example on how to submit Python code to the cluster.
 4 | 
 5 | Make sure your `job_wrapper.sh` and `myPyscript.py` files are in the same directory, and then submit your wrapper from that directory:
 6 | 
 7 | ```bash
 8 | # Make example directory
 9 | mkdir ~/py_example
10 | cd ~/py_example
11 | 
12 | # Download example scripts
13 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/python/job_py_wrapper.sh
14 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/python/myPyscript.py
15 | 
16 | # Submit wrapper
17 | sbatch job_py_wrapper.sh
18 | ```
19 | 


--------------------------------------------------------------------------------
/python/job_py_wrapper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --mem=10G
 7 | #SBATCH --time=1-00:15:00     # 1 day and 15 minutes
 8 | #SBATCH --mail-user=useremail@address.com
 9 | #SBATCH --mail-type=ALL
10 | #SBATCH --job-name="Python Example"
11 | #SBATCH -p epyc  # You can use any of the following; epyc, intel, batch, highmem, gpu
12 | 
13 | # A version of Python from miniconda2 is loaded by default
14 | # However, if you want to use a diferent version, then do so here
15 | #module unload miniconda2; module load anaconda3
16 | 
17 | # Optionaly you can activate a conda environment if you have created one
18 | #conda activate python3
19 | 
20 | # Use Python3 to run Python script
21 | python3 myPyscript.py
22 | 


--------------------------------------------------------------------------------
/python/myPyscript.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Use pandas for data handling
 4 | import pandas as pd
 5 | 
 6 | # Some calculation
 7 | data=1+1
 8 | 
 9 | # Convert int to data frame
10 | df=pd.DataFrame({data})
11 | 
12 | # Save data frame to csv
13 | data.to_csv('myPyresults.csv')
14 | 


--------------------------------------------------------------------------------
/rstudio-server/README.md:
--------------------------------------------------------------------------------
 1 | # RStudio Server
 2 | 
 3 | First review the following method, [HPCC Web Browser Access](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access).
 4 | After you have read through that, you can procceed with this example.
 5 | 
 6 | ## Interactive
 7 | 
 8 | The easist method is to run the RStudio Server launcher interactivly.
 9 | 
10 | First request an interactive job:
11 | 
12 | ```bash
13 | srun --partition=short --mem=8gb --cpus-per-task=2 --ntasks=1 --time=2:00:00 --pty bash -l
14 | ```
15 |    
16 | Then load the latest versions of `R` and `RStudio Server` from module system:
17 | 
18 | ```bash
19 | module unload R
20 | module load R/4.1.2 # Or latest version
21 | module load rstudio-server/2022.02.0-443 # Or latest version
22 | ```
23 | 
24 | Lastly, start the RStudio Server by running the launcher script:
25 |    
26 | ```sh
27 | start-rserver.sh
28 | ```
29 | 
30 | ## Non-Interactive
31 | 
32 | Alternativly as you can start an RStudio Server under a non-interactive job, like so:
33 | 
34 | ```bash
35 | sbatch -p short -c 4 --time=2:00:00 --mem=10g --wrap='module unload R; module load R/4.1.2; module load rstudio-server/2022.02.0-443; start-rserver.sh' --output='rstudio-%J.out'
36 | ```
37 | 
38 | These are minimal resources, for only 2 hours, so you may need to adjust them.
39 | When the job starts, you can look at the slurm log to check which node it is running on and how to setup your SSH tunnel:
40 | 
41 | ```bash
42 | cat rstudio*.out
43 | ```
44 | 
45 | ## Custom Launcher (EXPERT)
46 | 
47 | If you want to modify the RStudio Server launch script, you can download a copy from here and modify it.
48 | 
49 | Request resource on a compute node:
50 | 
51 | ```bash
52 | srun -p epyc --time=8:00:00 --mem=10gb --cpus-per-task=1 --pty bash -l
53 | ```
54 | 
55 | Download startup script:
56 | 
57 | ```bash
58 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/rstudio-server/start-rserver.sh
59 | ```
60 | 
61 | Allow execute permissions and then modify as needed:
62 | 
63 | ```bash
64 | chmod u+x start-rserver.sh
65 | vim start-rserver.sh
66 | ```
67 | 
68 | Start RStudio with script:
69 | 
70 | ```bash
71 | ./start-rserver.sh
72 | ```
73 | 
74 | Follow instructions given on screen.
75 | 


--------------------------------------------------------------------------------
/rstudio-server/start-rserver.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | # Load rstudio-server
 4 | module load rstudio-server
 5 | 
 6 | # Get script directory and go there
 7 | CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
 8 | 
 9 | # Setup user-specific secure cookie key
10 | USER=`whoami`
11 | COOKIE_KEY_PATH=/tmp/${USER}_secure-cookie-key
12 | rm -f $COOKIE_KEY_PATH
13 | mkdir -p $(dirname $COOKIE_KEY_PATH)
14 | 
15 | #python -c 'import uuid; print(uuid.uuid4())' > $COOKIE_KEY_PATH
16 | uuid > $COOKIE_KEY_PATH
17 | chmod 600 $COOKIE_KEY_PATH
18 | 
19 | export RETICULATE_PYTHON=$(which python3)
20 | 
21 | # Generate password
22 | export RSTUDIO_PASSWORD=$(date +%s | sha256sum | base64 | head -c 16 ; echo)
23 | 
24 | # Get random port
25 | PORT=$(shuf -i8000-9999 -n1)
26 | 
27 | # Print info
28 | echo -e "\nSetup your tunnel like so:
29 | \tssh -NL $PORT:$HOSTNAME:$PORT $USER@cluster.hpcc.ucr.edu
30 | This command will hang, it does not produce any output.
31 | 
32 | Next open your internet browser to:
33 | \thttp://localhost:$PORT
34 | 
35 | Use the following credentials to login:
36 | \tusername: $USER
37 | \tpassword: $RSTUDIO_PASSWORD
38 | 
39 | For more information regarding SSH tunnels, vists here:
40 | \thttps://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access
41 | 
42 | For help please email: support@hpcc.ucr.edu
43 | 
44 | Now running RStudio Server...
45 | "
46 | 
47 | RSTUDIO_PASSWORD=$RSTUDIO_PASSWORD rserver \
48 |   --server-daemonize=0 \
49 |   --rsession-path="$CWD/rsession.sh" \
50 |   --secure-cookie-key-file=$COOKIE_KEY_PATH \
51 |   --auth-encrypt-password=1 \
52 |   --rsession-which-r=$(which R) \
53 |   --www-port=$PORT \
54 |   --auth-none=0 \
55 |   --auth-pam-helper="$CWD/rstudio_auth" \
56 | 
57 | 


--------------------------------------------------------------------------------
/singularity/deepvariant/README.md:
--------------------------------------------------------------------------------
 1 | # DeepVariant
 2 | 
 3 | Load the module
 4 | 
 5 | ```bash
 6 | module load deepvariant
 7 | ```
 8 | 
 9 | Execute run_deepvariant script within the singularity container
10 | 
11 | ```bash
12 | singularity exec \
13 |   -B "YOUR_INPUT_DIR":"/input" \
14 |   -B "YOUR_OUTPUT_DIR:/output" \
15 |   $DEEPVARIANT_IMG \
16 |   /opt/deepvariant/bin/run_deepvariant \
17 |   --model_type=WGS \ **Replace this string with exactly one of the following [WGS,WES,PACBIO]**
18 |   --ref=/input/YOUR_REF \
19 |   --reads=/input/YOUR_BAM \
20 |   --output_vcf=/output/YOUR_OUTPUT_VCF \
21 |   --output_gvcf=/output/YOUR_OUTPUT_GVCF \
22 |   --num_shards=$(nproc) **This will use all your cores to run make_examples. Feel free to change.**
23 | ```
24 | 
25 | 


--------------------------------------------------------------------------------
/singularity/galaxy/README.md:
--------------------------------------------------------------------------------
 1 | # Galaxy
 2 | 
 3 | > Deprecated: Singularity is not required to install Galaxy, use conda instead [Galaxy via Conda](../../galaxy).
 4 | 
 5 | ## Prep Workspace
 6 | 
 7 | Since `Galaxy` requires write access to the `config` and `database` direcotries we need to copy them out of the container.
 8 | Once we have copies of these directories we will then mount them inside the container.
 9 | 
10 | ### Create Galaxy Home
11 | 
12 | Create a home base for Galaxy:
13 | 
14 | ```
15 | mkdir -p bigdata/galaxy/20.05
16 | cd bigdata/galaxy/20.05
17 | ```
18 | 
19 | #### Copy Files from Container
20 | 
21 | Copy databases and configs from in the container to Galaxy home:
22 | 
23 | ```
24 | module load galaxy
25 | singularity exec $GALAXY_IMG rsync -r /opt/galaxy/20.05/config/ config
26 | singularity exec $GALAXY_IMG rsync -r /opt/galaxy/20.05/database/ database
27 | ```
28 | 
29 | #### Configure Galaxy
30 | 
31 | Now that we have a writable copy of the configuration files and databases, we can make some changes.
32 | Open the config and modify the port and IP address where Galaxy will start:
33 | 
34 | ```
35 | PORT=$(shuf -i8000-9999 -n1)
36 | sed -i "s/^\s*http: .*/  http: 0.0.0.0:$PORT/" config/galaxy.yml
37 | grep '^\s*http:' config/galaxy.yml
38 | ```
39 | 
40 | Also we want to want to add a our HPCC username for the administrator of Galaxy:
41 | 
42 | ```
43 | sed -i "s/^\s*#*admin_users: .*/  admin_users: ${USER}/" config/galaxy.yml
44 | grep '^\s.*admin_users' config/galaxy.yml
45 | ```
46 | 
47 | ## Run Galaxy Job
48 | 
49 | ### Startup Script
50 | 
51 | Download startup script:
52 | 
53 | ```
54 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/singularity/galaxy/start_galaxy.sh
55 | ```
56 | 
57 | Use nano or vim to edit script to use proper paths:
58 | 
59 | ```
60 | vim start_galaxy.sh
61 | ```
62 | 
63 | Then submit the script like so:
64 | 
65 | ```
66 | sbatch -p short -c 24 --mem=100gb start_galaxy.sh
67 | ```
68 | 
69 | ## SSH Tunnel
70 | 
71 | After the galaxy job has started collect the node and port details and follow these instructions [Web Browser Access](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access).
72 | 


--------------------------------------------------------------------------------
/singularity/galaxy/start_galaxy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | # Load Galaxy
 4 | module load galaxy
 5 | 
 6 | # Set Galaxy Home
 7 | GALAXY_HOME=~/bigdata/galaxy/20.05
 8 | 
 9 | singularity exec \
10 |     -B $GALAXY_HOME/database:/opt/galaxy/20.05/database \
11 |     -B $GALAXY_HOME/config:/opt/galaxy/20.05/config \
12 |     $GALAXY_IMG \
13 |     /opt/galaxy/20.05/run.sh
14 | 


--------------------------------------------------------------------------------
/singularity/mariadb/README.md:
--------------------------------------------------------------------------------
 1 | # MariaDB & MySQL
 2 | ## Initialize Database
 3 | The easiest way to create a SQL database is to run the following script:
 4 | 
 5 | ```bash
 6 | create_mysql_db
 7 | ```
 8 | 
 9 | Or you can download the latest version from here: [create_mysql_db.sh](create_mysql_db.sh)
10 | This will walk you through the steps to create your own database using a singularity container.
11 | 
12 | ## Starting Database
13 | Once you have completed that, you should be able to submit a job to start your databases.
14 | 
15 | First download the startup job script from here: [start_mariadb.sh](start_mariadb.sh) and modify where necessary.
16 | 
17 | Then submit this just like any other job, like so:
18 | 
19 | ```bash
20 | sbatch start_mariadb.sh
21 | ```
22 | 
23 | After running the startup job you should have a log file called `db_host_port.txt` in the directory where you created your `mariadb.sif` file.
24 | This text file should contain the host and the port where your job is running, which can be used for your database connections.
25 | 


--------------------------------------------------------------------------------
/singularity/mariadb/create_mysql_db.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | 
  3 | module load singularity
  4 | 
  5 | # Get path to db files
  6 | echo -en "\nEnter path for new database files [${PWD}/mysql]: "
  7 | read db_path
  8 | if [[ -z $db_path ]]; then
  9 |     db_path=${PWD}/mysql
 10 | fi
 11 | echo -e "Using DB path: ${db_path}\n"
 12 | 
 13 | # Check if db_path is valid
 14 | if [[ -d $db_path ]]; then
 15 |     echo -e "ERROR: Database path ${db_path} already exists\n"
 16 |     exit 1
 17 | else
 18 |     echo -e "Creating directory ${db_path}\n"
 19 |     mkdir -p ${db_path} && cd ${db_path}
 20 | fi
 21 | 
 22 | echo -n "Enter name for database [workdb]: "
 23 | read db_name
 24 | if [[ -z $db_name ]]; then
 25 |     db_name="workdb"
 26 | fi
 27 | echo -e "Using DB name: ${db_name}\n"
 28 | 
 29 | # Get DB pasnamed
 30 | echo -n "Enter MySQL Password: "
 31 | read -s db_pass
 32 | echo -e "\n"
 33 | 
 34 | # Get port (hope it is not used)
 35 | db_port=$(shuf -i3000-3999 -n1)
 36 | 
 37 | # Make sure a password was given (Add more robust checks here)
 38 | if [[ -z $db_pass ]]; then
 39 |     echo "ERROR: You cannot have an empty MySQL password."
 40 |     exit 1
 41 | fi
 42 | 
 43 | # Remote login so that sudo is not required for build
 44 | echo "Attempting remote login..."
 45 | if [[ ! -f ~/.singularity/remote.yaml ]] || [[ ! -s ~/.singularity/remote.yaml ]]; then
 46 |     singularity remote login; EXITCODE=$?
 47 | fi
 48 | 
 49 | # If remote login failed, advise user
 50 | if [[ $EXITCODE -ne 0 ]]; then
 51 |     echo -e "\n
 52 | ERROR: singularity remote login failed.
 53 | Delete ~/.singularity/remote.yaml.
 54 | Then generate new access token here https://cloud.sylabs.io/auth.
 55 | Then try this script again with the new token.\n"
 56 |     exit 1
 57 | fi
 58 | 
 59 | # Create singularity definition
 60 | cat << EOF > mariadb.def
 61 | Bootstrap: docker
 62 | From: mariadb:10.3.9
 63 | 
 64 | %post
 65 | # Your username
 66 | YOUR_USERNAME="${USER}"
 67 | 
 68 | sed -ie "s/^#user.*/user = ${USER}/" /etc/mysql/my.cnf
 69 | sed -ie "s/^port.*/port = ${db_port}/" /etc/mysql/my.cnf
 70 | chmod 1777 /run/mysqld
 71 | 
 72 | %runscript
 73 | exec "mysqld" "$@"
 74 | 
 75 | %startscript
 76 | exec "mysqld_safe"
 77 | """
 78 | EOF
 79 | 
 80 | # Build singularity image
 81 | OLD_SINGULARITY_BINDPATH=$SINGULARITY_BINDPATH
 82 | unset SINGULARITY_BINDPATH
 83 | singularity build --remote mariadb.sif mariadb.def
 84 | SINGULARITY_BINDPATH=$OLD_SINGULARITY_BINDPATH
 85 | unset OLD_SINGULARITY_BINDPATH
 86 | # Create directory where db files live
 87 | mkdir db
 88 | 
 89 | # Initialize db
 90 | singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif 'mysql_install_db' &> /dev/null
 91 | ERROR_CODE=$?
 92 | if [[ $ERROR_CODE -ne 0 ]]; then
 93 |     echo "ERROR: Database failed to initialize."
 94 |     exit 1
 95 | fi
 96 | 
 97 | # Create newuser script
 98 | cat << EOF > newuser.sh
 99 | #!/bin/sh
100 | 
101 | # Start mysql
102 | mysqld_safe --datadir=/var/lib/mysql &
103 | MYSQL_PID=\$!
104 | 
105 | # Give mysql time to startup
106 | sleep 10
107 | 
108 | # Create work db and new user
109 | mysql -u root mysql < newuser.sql
110 | 
111 | # Secure mysql
112 | #mysql_secure_installation
113 | 
114 | # Update root password
115 | mysqladmin -u root --port ${db_port} password "${db_pass}"
116 | 
117 | # Stop mysql
118 | echo \$(date "+%y%m%d %T") "mysqld_safe Shutting down"
119 | mysqladmin -u root --port=${db_port} --password="${db_pass}" shutdown 2>/dev/null || kill -9 \$MYSQL_PID
120 | echo \$(date "+%y%m%d %T") "mysqld_safe Shutdown"
121 | 
122 | EOF
123 | 
124 | # Create newuser SQL file
125 | cat << EOF > newuser.sql
126 | CREATE DATABASE ${db_name};
127 | CREATE USER '${USER}'@'%' IDENTIFIED BY "${db_pass}";
128 | GRANT ALL PRIVILEGES ON *.* TO ${USER}@'%' WITH GRANT OPTION;
129 | FLUSH PRIVILEGES;
130 | EOF
131 | 
132 | # Make sure this is executable
133 | chmod u+x newuser.sh
134 | 
135 | # Create newuser
136 | singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif "${PWD}/newuser.sh"
137 | rm -f newuser.sh newuser.sql
138 | 
139 | # Stop mysql service
140 | #mysqladmin -u $USER -h $HOSTNAME --password="$db_pass" shutdown
141 | 
142 | # Create service instance
143 | echo -e "\n
144 | ##########################################################################################
145 | # IMPORTANT NOTES !!!
146 | ##########################################################################################
147 | 
148 | # Make sure you have loaded singularity:
149 |      module load singularity
150 | 
151 | # To start your service, do the following:
152 |      cd ${PWD}
153 |      singularity instance start --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif mysqldb
154 | 
155 | # To list your running services, run the following:
156 |      singularity instance list
157 | 
158 | # To connect to your running service, run the following:
159 |      mysql -u ${USER} -h ${HOSTNAME} -P ${db_port} -p ${db_name}
160 | 
161 | # To stop your service, run the following:
162 |      singularity instance stop mysqldb
163 | 
164 | # To get a debug shell into your service, do the following:
165 |      cd ${PWD}
166 |      singularity instance stop mysqldb
167 |      singularity shell --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif
168 | 
169 | ##########################################################################################\n"
170 | 
171 | 


--------------------------------------------------------------------------------
/singularity/mariadb/start_mariadb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -p epyc
 3 | #SBATCH --time=7-00:00:00
 4 | #SBATCH -c 2
 5 | #SBATCH --mem=10g
 6 | 
 7 | ############################################################################
 8 | # NOTE:                                                                    #
 9 | # Make sure you have already run and completed the create_mysql_db command #
10 | ############################################################################
11 | 
12 | # Load singularity
13 | module load singularity
14 | 
15 | # Move to where your mariadb.sif image lives
16 | cd ~/bigdata/mysql/
17 | 
18 | # Get port and host name info and save it to a file
19 | PORT=$(singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif grep -oP '^port = \K\d{4}' /etc/mysql/my.cnf | head -1)
20 | echo $HOSTNAME $PORT > db_host_port.txt
21 | 
22 | # Start your mariadb like a service
23 | singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif /usr/bin/mysqld_safe
24 | 


--------------------------------------------------------------------------------
/singularity/metaerg/README.md:
--------------------------------------------------------------------------------
 1 | # Singularity Metaerg Container
 2 | Singularity container built from docker image: [https://github.com/xiaoli-dong/metaerg](https://github.com/xiaoli-dong/metaerg)
 3 | 
 4 | ## Setup
 5 | Choose where you would like your analysis to be saved, typically a sub-directory under bigdata:
 6 | 
 7 | ```bash
 8 | mkdir ~/bigdata/metaerg
 9 | cd ~/bigdata/metaerg
10 | ```
11 | 
12 | Ensure that you have a directory called `data` in the current directory:
13 | 
14 | ```bash
15 | mkdir data
16 | ```
17 | 
18 | Also ensure that you have a fasta file called `contig.fasta` in the `data` directory:
19 | 
20 | ```bash
21 | cp /path/to/contig.fasta data/contig.fasta
22 | ```
23 | 
24 | ## Script
25 | In order to submit this job in a non-interactive way, we will need to create a submission script.
26 | Download the submission script and edit based on your needs:
27 | 
28 | ```bash
29 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/singularity/metaerg/metaerg_job.sh
30 | nano metaerg_job.sh # You could also use vim/emacs or other text editor
31 | ```
32 | 
33 | ## Submit
34 | Once you have setup your data directory and updated your submission script, you can submit your job to the cluster with the following command:
35 | 
36 | ```bash
37 | sbatch metaerg_job.sh
38 | ```
39 | 
40 | 


--------------------------------------------------------------------------------
/singularity/metaerg/metaerg_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=8
 6 | #SBATCH -p epyc
 7 | #SBATCH --mem-per-cpu=8G
 8 | #SBATCH --time=1-00:15:00     # 1 Day and 15 minutes
 9 | ##SBATCH --mail-user=emailaddress@mail.com
10 | ##SBATCH --mail-type=ALL
11 | #SBATCH --job-name="Metaerg_Sing"
12 | 
13 | # Load Modules
14 | module load metaerg # This auto loads singularity
15 | 
16 | # Create DB, this only needs to be done once
17 | singularity exec -B data:/data $METAERG_IMG setup_db.pl -o /data -v 132
18 | 
19 | # Execute script in singularity container
20 | singularity exec -B data:/data $METAERG_IMG metaerg.pl --dbdir /data/db --outdir /data/my_metaerg_output /data/contig.fasta
21 | 
22 | 


--------------------------------------------------------------------------------
/singularity/mongo/README.md:
--------------------------------------------------------------------------------
 1 | # MongoDB
 2 | 
 3 | ## Configure
 4 | 
 5 | First load the module:
 6 | 
 7 | ```bash
 8 | module load mongo/4.2.0
 9 | ```
10 | 
11 | Create and move to data location:
12 | 
13 | ```bash
14 | mkdir -p ~/bigdata/mongo/data
15 | cd ~/bigdata/mongo
16 | ```
17 | 
18 | You can run mongo simply like this, however there were numa node warnings:
19 | 
20 | ```bash
21 | singularity run -B data:/data/db $MONGO_IMG
22 | ```
23 | 
24 | Run mongo the first time with numa node support and without auth:
25 | 
26 | ```bash
27 | singularity instance start -B data:/data/db $MONGO_IMG numactl --interleave=all mongod
28 | ```
29 | 
30 | From the same terminal, connect to your mongodb service like so:
31 | ```bash
32 | singularity exec -B data:/data/db $MONGO_IMG mongo
33 | ```
34 | 
35 | Then create an admin user with the following:
36 | ```
37 | use admin
38 | db.createUser(
39 |   {
40 |     user: 'admin',
41 |     pwd: passwordPrompt(),
42 |     roles: [ { role: 'root', db: 'admin' } ]
43 |   }
44 | );
45 | ```
46 | 
47 | For more information regarding user access, please refer to the following:
48 | [https://docs.mongodb.com/manual/tutorial/enable-authentication/#create-the-user-administrator](https://docs.mongodb.com/manual/tutorial/enable-authentication/#create-the-user-administrator)
49 | 
50 | Then stop mongod service:
51 | 
52 | ```bash
53 | singularity instance stop numactl
54 | ```
55 | 
56 | ## Within the same job
57 | 
58 | You can now start it again from within a job, like so:
59 | 
60 | ```bash
61 | module load mongo
62 | cd ~/bigdata/mongo
63 | singularity instance start -B data:/data/db $MONGO_IMG numactl --interleave=all mongod --auth
64 | ```
65 | 
66 | >> NOTE: This does not seem to be working within the latest version of mongo.
67 | 
68 | ## Separate Jobs
69 | 
70 | Or as a separate job like this:
71 | 
72 | ```
73 | sbatch -p short --wrap 'module load mongo; cd ~/bigdata/mongo; singularity exec -B data:/data/db $MONGO_IMG numactl --interleave=all mongod --bind_ip_all --auth;'
74 | ```
75 | 
76 | Lastly connect to mongod service, from the same job, with the following:
77 | 
78 | ```bash
79 | singularity exec -B data:/data/db $MONGO_IMG mongo --authenticationDatabase "admin" -u "admin" -p
80 | ```
81 | 
82 | Or connnect to mongod from a different node like so:
83 | 
84 | ```bash
85 | singularity exec -B data:/data/db $MONGO_IMG mongo --host NodeName --authenticationDatabase "admin" -u "admin" -p
86 | ```
87 | 
88 | 


--------------------------------------------------------------------------------
/singularity/orthomcl/README.md:
--------------------------------------------------------------------------------
1 | # Database inside of job
2 | Refer to [../mariadb/start_mariadb.sh](../mariadb/start_mariadb.sh)
3 | 
4 | # Database outside of job
5 | Refer to [orthomcl_job.sh](orthomcl_job.sh)
6 | 


--------------------------------------------------------------------------------
/singularity/orthomcl/orthomcl_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --cpus-per-task=1
 5 | #SBATCH --mem=10G
 6 | #SBATCH --mail-type=ALL
 7 | #SBATCH --mail-user=useremail@address.com
 8 | #SBATCH --time=4:00:00
 9 | #SBATCH --job-name=orthomcl
10 | #SBATCH -p epyc
11 | 
12 | module load singularity
13 | module load orthomcl
14 | 
15 | # Go to database directory
16 | cd ~/bigdata/mysql
17 | 
18 | # Start Database
19 | PORT=$(singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif grep -oP '^port = \K\d{4}' /etc/mysql/my.cnf | head -1)
20 | singularity instance start --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif mysqldb
21 | sleep 10
22 | 
23 | # Move to bigdata
24 | cd ~/bigdata/
25 | 
26 | # Update Orthomcl.config
27 | sed -i "s/^dbConnectString.*$/dbConnectString=dbi:mysql:orthomcl:${HOSTNAME}:${PORT}/" orthomcl/orthomcl.config
28 | 
29 | # Run orthomcl
30 | orthomclInstallSchema orthomcl/orthomcl.config orthomcl/install_schema.log
31 | 
32 | # Stop Database
33 | singularity instance stop mysqldb
34 | 


--------------------------------------------------------------------------------
/spark/spark_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH -p short
 4 | #SBATCH --nodes=3
 5 | #SBATCH --cpus-per-task=8
 6 | #SBATCH --ntasks-per-node=1
 7 | #SBATCH --time=0:20:00
 8 | #SBATCH --job-name=spark-test
 9 | 
10 | ##########################################################
11 | # PBS version was pulled from here:                      #
12 | #   https://www.dursi.ca/post/spark-in-hpc-clusters.html #
13 | ##########################################################
14 | 
15 | # Get names of allocated nodes
16 | nodes=($( scontrol show hostnames $SLURM_NODELIST ))
17 | nnodes=${#nodes[@]}
18 | last=$(( $nnodes - 1 ))
19 | 
20 | # Move to directory where job was submitted from
21 | cd $SLURM_WORKING_DIR
22 | 
23 | # Assign location to Spark home var
24 | export SPARK_HOME=/rhome/jhayes/shared/pkgs/spark/2.4.0-bin-hadoop2.7
25 | # Log into first node and start master Spark process
26 | ssh ${nodes[0]}.ib.hpcc.ucr.edu "module load java/8u45; cd ${SPARK_HOME}; ./sbin/start-master.sh"
27 | sparkmaster="spark://${nodes[0]}:7077"
28 | 
29 | # Assign location to scratch var
30 | SCRATCH=~/bigdata/Projects/spark/
31 | # Create work directory
32 | mkdir -p ${SCRATCH}/work
33 | # Remove old logs, if they exist
34 | rm -f ${SCRATCH}/work/nohup*.out
35 | 
36 | # On each node, start Spark worker
37 | for i in $( seq 0 $last ); do
38 |         ssh ${nodes[$i]}.ib.hpcc.ucr.edu "cd ${SPARK_HOME}; module load java/8u45; nohup ./bin/spark-class org.apache.spark.deploy.worker.Worker ${sparkmaster} &> ${SCRATCH}/work/nohup-${nodes[$i]}.out" &
39 | done
40 | 
41 | # Remove old results, if it exists
42 | rm -rf ${SCRATCH}/wordcounts
43 | 
44 | # Create Spark Python code to be worked
45 | cat > sparkscript.py <<EOF
46 | from pyspark import SparkContext
47 | 
48 | sc = SparkContext(appName="wordCount")
49 | file = sc.textFile("${SCRATCH}/moby-dick.txt")
50 | counts = file.flatMap(lambda line: line.split(" ")).map(lambda word: (word, 1)).reduceByKey(lambda a, b: a+b)
51 | counts.saveAsTextFile("${SCRATCH}/wordcounts")
52 | EOF
53 | 
54 | # Load proper version of Java
55 | module load java/8u45
56 | 
57 | # Submit spark job
58 | ${SPARK_HOME}/bin/spark-submit --master ${sparkmaster} sparkscript.py
59 | # For more infor regarding running Spark in parallel, refer to the following:
60 | # https://spoddutur.github.io/spark-notes/distribution_of_executors_cores_and_memory_for_spark_application.html
61 | 
62 | # Stop Spark master process
63 | ssh ${nodes[0]}.ib.hpcc.ucr.edu "module load java/8u45; cd ${SPARK_HOME}; ./sbin/stop-master.sh"
64 | # Stop worker process
65 | for i in $( seq 0 $last ); do
66 |     # This kills all java processes, maybe better if we killed specific process IDs?
67 |     ssh ${nodes[$i]}.ib.hpcc.ucr.edu "killall java"
68 |     done
69 | wait
70 | 
71 | 


--------------------------------------------------------------------------------
/stata/README.md:
--------------------------------------------------------------------------------
 1 | # STATA
 2 | 
 3 | Here is a basic example on how you can submit STATA code to the cluster.
 4 | 
 5 | Make sure your `submit.sh` and `test.do` files are in the same directory, and then submit your job from that directory:
 6 | 
 7 | 1. Make example directory
 8 | 
 9 | ```bash
10 | mkdir ~/R_example
11 | cd ~/R_example
12 | ```
13 | 
14 | 2. Download example scripts
15 | 
16 | ```bash
17 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/stata/submit.sh
18 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/stata/test.do
19 | ```
20 | 
21 | 3. Submit job
22 | 
23 | ```
24 | sbatch submit.sh
25 | ```
26 | 
27 | > NOTE: When submitting a real STATA job will need to adjust the `#SBATCH` resource requests within the `submit.sh` before submitting it.
28 | 


--------------------------------------------------------------------------------
/stata/submit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --mem-per-cpu=1G
 7 | #SBATCH --time=15:00     # 15 minutes
 8 | #SBATCH --mail-type=ALL
 9 | #SBATCH --job-name="just_a_test"
10 | #SBATCH -p short # This is the default partition, you can use any of the following; intel, batch, highmem, gpu
11 | 
12 | # Load modules
13 | module load stata
14 | 
15 | # do work
16 | stata < test.do
17 | 
18 | 


--------------------------------------------------------------------------------
/stata/test.do:
--------------------------------------------------------------------------------
1 | gen f=43
2 | list f
3 | 


--------------------------------------------------------------------------------
/vasp/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --cpus-per-task=1
 5 | #SBATCH --ntasks=16
 6 | #SBATCH --mem=10G
 7 | ##SBATCH --mail-user=email@address.com
 8 | ##SBATCH --mail-type=ALL
 9 | #SBATCH --time=7-00:00:00
10 | #SBATCH --job-name="vasp"
11 | #SBATCH -p epyc
12 | 
13 | module -s load vasp/5.4.1_oneapi-2022.1.2.146
14 | export OMP_NUM_THREADS=1
15 | ulimit -s unlimited
16 | mpirun -n 16 vasp_std
17 | 
18 | 


--------------------------------------------------------------------------------
/vnc/READMD.md:
--------------------------------------------------------------------------------
 1 | # VNC
 2 | 
 3 | Submit this job like so:
 4 | 
 5 | ```bash
 6 | sbatch vnc_job.sh
 7 | ```
 8 | 
 9 | Then check to see if the job is running:
10 | 
11 | ```bash
12 | squeue -u $USER
13 | ```
14 | 
15 | Once the job has started check the slurm log to see which port and compute node is used:
16 | 
17 | ```bash
18 | cat vnc_job-*.out
19 | ```
20 | 
21 | 


--------------------------------------------------------------------------------
/vnc/vnc_job.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sbatch -o 'vnc_job-%j.out' -p epyc -c 4 --time 2:00:00 --wrap='vncserver -fg'
4 | 
5 | 


--------------------------------------------------------------------------------
/workshop/README.md:
--------------------------------------------------------------------------------
1 | # hpcc_workshop_challenge
2 | 
3 | Try this small challenge to see how much you learned from the presentation!
4 | 
5 | This sbatch script will load the `hpcc_workshop/2.0` module and generate an output file in the `output` folder.
6 | You will need to change permissions on this file in order to read it's contents.
7 | The secret message can be view by running `workshop_challenge ./output/secret_message.txt`
8 | 


--------------------------------------------------------------------------------
/workshop/SBATCH.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | 
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --mem=1G
 7 | #SBATCH --time=00:05:00
 8 | #SBATCH --mail-user=useremail@address.com
 9 | #SBATCH --mail-type=ALL
10 | #SBATCH --job-name="workshop_test"
11 | #SBATCH -p short
12 | 
13 | module purge
14 | module load hpcc_workshop/2.0
15 | module load miniconda3
16 | 
17 | mkdir -p output
18 | rm -rf output/secret_message.txt
19 | create_output_file > output/secret_message.txt
20 | 
21 | chmod 000 output/secret_message.txt
22 | 
23 | 


--------------------------------------------------------------------------------