├── .gitignore ├── R ├── README.md ├── job_wrapper.sh └── myRscript.R ├── README.md ├── abaqus ├── Balldrop_phillips_benchmark.inp └── test_job.sh ├── alphafold ├── README.md ├── kernal.json └── run_alphafold_cle.sh ├── ansys ├── HKHLR-HowTo-Ansys_Fluent.pdf ├── README.md ├── run_cfx.sh ├── run_fluent_gpu.sh └── run_fluent_mpi_journal.sh ├── array ├── README.md ├── command_line ├── inputs.txt ├── slurm-406233_1.out ├── slurm-406233_10.out ├── slurm-406233_2.out ├── slurm-406233_3.out ├── slurm-406233_4.out ├── slurm-406233_5.out ├── slurm-406233_6.out ├── slurm-406233_7.out ├── slurm-406233_8.out ├── slurm-406233_9.out └── test_job.sh ├── basic ├── slurm-1217.out ├── slurm-1218.out ├── slurm-1222.out ├── slurm-666.out ├── slurm-667.out ├── slurm-668.out ├── slurm-669.out ├── statsdept_test_job.sh ├── test_job.sh └── test_pbs.sh ├── blast └── test_job.sh ├── castep └── job.sh ├── checkpoint ├── blcr │ ├── slurm-2963303.out │ ├── slurm-2963306.out │ └── test_job.sh └── dmtcp │ ├── README │ ├── ccr_buffalo │ ├── README.txt │ ├── slurm_dmtcp_ompi_tcp │ ├── slurm_dmtcp_openmp │ └── slurm_dmtcp_serial │ ├── slurm_launch.job │ ├── slurm_rstr.job │ ├── stampede │ ├── README │ ├── slurm_launch.job │ └── slurm_rstr.job │ └── ucr-hpcc │ └── single │ ├── ckpts │ ├── ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp │ ├── ckpt_bash_afc8ad-40000-3ea0c9317bade2_files │ │ ├── count.sh_99078 │ │ └── fd-info.txt │ ├── ckpt_sleep_afc8ad-105000-5b7f491c.dmtcp │ ├── ckpt_sleep_afc8ad-110000-5b7f4957.dmtcp │ ├── ckpt_sleep_afc8ad-115000-5b7f4994.dmtcp │ ├── ckpt_sleep_afc8ad-120000-5b7f49d0.dmtcp │ ├── ckpt_sleep_afc8ad-125000-5b7f4a0c.dmtcp │ ├── ckpt_sleep_afc8ad-130000-5b7f4a49.dmtcp │ ├── ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp │ └── dmtcp_restart_script_afc8ad-40000-3ea0c918c3d461.sh │ ├── count.sh │ ├── slurm_launch.job │ └── slurm_rstr.job ├── configs ├── active_users.sh ├── cgroup.conf ├── layout.d │ └── power.conf ├── slurm.conf ├── slurmdbd.conf ├── topology.conf └── update_slurm.sh ├── dedalus ├── README.md └── ivp_2d_rayleigh_benard │ ├── job.sh │ ├── plot_snapshots.py │ └── rayleigh_benard.py ├── depend ├── README.md ├── slurm-180.out ├── slurm-181.out ├── slurm-182.out └── test_job.sh ├── espresso ├── README.md ├── espresso.sh └── espresso_intel.sh ├── folddock └── README.md ├── galaxy └── README.md ├── gaussian ├── README.md ├── cpu_job.sh ├── cpu_job_g09.sh ├── gpu_2xp100_job.sh ├── gpu_4xk80_job.sh └── gpu_8xk80_job.sh ├── hmmer ├── README.md ├── pipeline │ ├── 01_hmmscan321_pfam34.sh │ ├── 01_hmmscan33_pfam31.sh │ ├── 01_hmmscan33_pfam34.sh │ ├── 02_hmmsearch_COX1.sh │ └── 03_hmmsearch_MPI.sh └── query │ ├── download.sh │ └── query.pep ├── interactive └── README.txt ├── jupyter ├── README.md ├── jupyter-notebook-3523021.log ├── jupyter-notebook-5205779.log ├── notebook.html ├── notebook.ipynb ├── notebook.nbconvert.ipynb ├── submit_jupyter.sh └── submit_notebook.sh ├── mathematica ├── README.md ├── submission_script.sh └── test.m ├── matlab ├── Getting_Started_With_Serial_And_Parallel_MATLAB.pdf ├── README.md ├── simple_args │ ├── command_line │ ├── job_script.sh │ ├── matlabCode.m │ ├── slurm-376816.out │ └── slurm-396765.out ├── submission_script.sh └── submission_script2.sh ├── mpi ├── R │ ├── slurm-688508.out │ ├── snow-test.R │ └── snow-test.sh ├── SUBMIT.txt ├── a.out ├── fortran │ ├── fhello_world_mpi │ └── fhello_world_mpi.F90 ├── hello-mpi ├── hello-mpi.cpp ├── mpiTest ├── mpiTest.c ├── mpiTest_mpich ├── slurm-1880596.out └── slurm-201.out ├── multi_steps └── SUBMIT.txt ├── python ├── README.md ├── job_py_wrapper.sh └── myPyscript.py ├── rstudio-server ├── README.md └── start-rserver.sh ├── singularity ├── deepvariant │ └── README.md ├── galaxy │ ├── README.md │ └── start_galaxy.sh ├── mariadb │ ├── README.md │ ├── create_mysql_db.sh │ └── start_mariadb.sh ├── metaerg │ ├── README.md │ └── metaerg_job.sh ├── mongo │ └── README.md └── orthomcl │ ├── README.md │ └── orthomcl_job.sh ├── spark └── spark_job.sh ├── stata ├── README.md ├── submit.sh └── test.do ├── vasp └── run.sh ├── vnc ├── READMD.md └── vnc_job.sh └── workshop ├── README.md └── SBATCH.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.swp 3 | *.swo 4 | -------------------------------------------------------------------------------- /R/README.md: -------------------------------------------------------------------------------- 1 | # R 2 | 3 | Here is a basic example on how you can submit R code to the cluster. 4 | 5 | Make sure your `job_wrapper.sh` and `myRscript.R` files are in the same directory, and then submit your wrapper from that directory: 6 | 7 | 1. Make example directory 8 | 9 | ```bash 10 | mkdir ~/R_example 11 | cd ~/R_example 12 | ``` 13 | 14 | 2. Download example scripts 15 | 16 | ```bash 17 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/R/job_wrapper.sh 18 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/R/myRscript.R 19 | ``` 20 | 21 | 3. Submit wrapper 22 | 23 | ``` 24 | sbatch job_wrapper.sh 25 | ``` 26 | 27 | > NOTE: When using a real R script will need to adjust the `SBATCH` resource requests within the `job_wrapper.sh` before submitting it. 28 | -------------------------------------------------------------------------------- /R/job_wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --mem=10G 7 | #SBATCH --time=1-00:15:00 # 1 day and 15 minutes 8 | #SBATCH --mail-user=useremail@address.com 9 | #SBATCH --mail-type=ALL 10 | #SBATCH --job-name="R Example" 11 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu 12 | 13 | # The latest R is loaded by default 14 | # However, if you want to use a diferent version, then do so here 15 | #module load R 16 | 17 | # Use Rscript to run R script 18 | Rscript myRscript.R 19 | -------------------------------------------------------------------------------- /R/myRscript.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Some calculation 4 | data=1+1 5 | 6 | # Save output to csv 7 | write.csv(data,'myResults.csv', row.names=FALSE) 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hpcc_slurm_examples 2 | This is a collection of Slurm examples that can be used on the HPC Cluster at the University of California, Riverside. 3 | 4 | ## Credit 5 | Some examples borrowed from: 6 | 7 | [http://www.nersc.gov/users/computational-systems/cori/running-jobs/example-batch-scripts/](http://www.nersc.gov/users/computational-systems/cori/running-jobs/example-batch-scripts/) 8 | 9 | [https://docs.ycrc.yale.edu/clusters-at-yale/guides/jupyter/](https://docs.ycrc.yale.edu/clusters-at-yale/guides/jupyter/) 10 | 11 | [https://www.dursi.ca/post/spark-in-hpc-clusters.html](https://www.dursi.ca/post/spark-in-hpc-clusters.html) 12 | -------------------------------------------------------------------------------- /abaqus/test_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=5 6 | #SBATCH --mem=10G 7 | #SBATCH --time=1-00:15:00 # 1 day and 15 minutes 8 | #SBATCH --mail-user=useremail@address.com 9 | #SBATCH --mail-type=ALL 10 | #SBATCH --job-name="just_a_test" 11 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu 12 | 13 | # Unset this variable, since Abaqus errors when it is set 14 | export SLURM_GTIDS= 15 | 16 | # Load software 17 | module load abaqus 18 | 19 | # Run abaqus 20 | abaqus job="Ball" input=./Balldrop_phillips_benchmark.inp interactive 21 | 22 | # Other useful options for abaqus: 23 | # parallel_mode=MPI 24 | # mp_mode={mpi | threads} 25 | # gpus=number-of-gpgpus 26 | # memory=memory-size 27 | # interactive 28 | # scratch=scratch-dir 29 | # timeout=co-simulation timeout value in seconds 30 | 31 | -------------------------------------------------------------------------------- /alphafold/README.md: -------------------------------------------------------------------------------- 1 | # AlphaFold 2 | 3 | ## Running 4 | 5 | ### Cluster 6 | 7 | In order to run AlphaFold, you need to utilize the installed workflow under a `Singularity` container. 8 | 9 | The [run_alphafold_cle.sh](run_alphafold_cle.sh) file is an example running AlphaFold on the HPCC. 10 | 11 | Once downoaded and altered to your preferences, then you can just submit this script as a job, like so: 12 | 13 | ```bash 14 | # Download 15 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/alphafold/run_alphafold_cle.sh 16 | 17 | # Edit 18 | vim run_alphafold_cle.sh 19 | 20 | # Submit 21 | sbatch run_alphafold_cle.sh 22 | ``` 23 | 24 | ## Jupyter 25 | 26 | ### JupyterHub 27 | 28 | The `Singularity` container can also be used within our [Jupyter](https://jupyter.hpcc.ucr.edu) service (limited CPU and RAM, and no GPUs). 29 | 30 | All that is required is that you download the [kernal.json](kernal.json) file and place it under the following directory: 31 | 32 | ```bash 33 | # Download 34 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/alphafold/kernal.json 35 | 36 | # Create directory 37 | mkdir -p ~/.local/share/jupyter/kernels/alphafold 38 | 39 | # Move kernel 40 | mv kernel.json ~/.local/share/jupyter/kernels/alphafold/kernel.json 41 | ``` 42 | 43 | ### Jupyter Job 44 | 45 | If your job requires heavy resources, or GPUs, then you will have to submit your own `Jupyter` notebook server on the cluster ([Jupyter: as a job](https://github.com/ucr-hpcc/hpcc_slurm_examples/tree/master/jupyter#interactively-as-a-job)). 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /alphafold/kernal.json: -------------------------------------------------------------------------------- 1 | { 2 | "language": "python", 3 | "argv": ["/opt/linux/centos/7.x/x86_64/pkgs/singularity/3.7.3/bin/singularity", 4 | "exec", 5 | "/opt/linux/centos/7.x/x86_64/pkgs/alphafold/2.0.0/alphafold.sif", 6 | "/opt/conda/bin/python", 7 | "-m", 8 | "ipykernel", 9 | "-f", 10 | "{connection_file}" 11 | ], 12 | "display_name": "AlphaFold (2.0.0)" 13 | } 14 | -------------------------------------------------------------------------------- /alphafold/run_alphafold_cle.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | #SBATCH --ntasks=24 3 | #SBATCH -N 1 4 | #SBATCH --mem=48gb 5 | #SBATCH --time=1-00:00:00 6 | #SBATCH -p gpu 7 | #SBATCH --gres=gpu:1 8 | #SBATCH --out=logs/alphafold.%A.log 9 | #SBATCH -J calp_alpha 10 | ##SBATCH --mail-type=END # notifications for job done & fail 11 | ##SBATCH --mail-user=cassande@ucr.edu # send-to address 12 | ##SBATCH -D /rhome/cassande/shared/projects/Caulerpa/alphafold_test/ 13 | 14 | # Path to directory of supporting data, the databases! 15 | data_dir=/srv/projects/db/alphafold 16 | DOWNLOAD_DIR=$data_dir 17 | 18 | # Path to a directory that will store the results 19 | output_dir="${PWD}/CLENT_006666_model" 20 | 21 | # Names of models to use (a comma separated list) 22 | model_names=model_1 23 | 24 | # Path to a FASTA file containing one sequence 25 | fasta_path="${PWD}/query.fasta" 26 | 27 | # Last template date to consider in model in (ISO-8601 format - i.e. YYYY-MM-DD) 28 | max_template_date=2020-08-12 29 | 30 | # Enable NVIDIA runtime to run with GPUs (default: True) 31 | use_gpu=true 32 | 33 | # OpenMM threads (default: all available cores) 34 | openmm_threads=24 35 | 36 | # Comma separated list of devices to pass to 'CUDA_VISIBLE_DEVICES' (default: 0) 37 | gpu_devices=0 38 | 39 | # Choose preset model configuration - no ensembling and smaller genetic database config (reduced_dbs), no ensembling and full genetic database config (full_dbs) or full genetic database config and 8 model ensemblings (casp14) 40 | preset=full_dbs 41 | 42 | # Run multiple JAX model evaluations to obtain a timing that excludes the compilation time, which should be more indicative of the time required for inferencing many proteins (default: 'False') 43 | benchmark=false 44 | 45 | # Manually set CUDA devices 46 | #export SINGULARITYENV_CUDA_VISIBLE_DEVICES=-1 47 | #if [[ "$use_gpu" == true ]] ; then 48 | # export SINGULARITYENV_CUDA_VISIBLE_DEVICES=0 49 | 50 | # if [[ "$gpu_devices" ]] ; then 51 | # export SINGULARITYENV_CUDA_VISIBLE_DEVICES=$gpu_devices 52 | # fi 53 | #fi 54 | 55 | # OpenMM threads control 56 | #if [[ "$openmm_threads" ]] ; then 57 | # export SINGULARITYENV_OPENMM_CPU_THREADS=$openmm_threads 58 | #fi 59 | 60 | # TensorFlow control 61 | #export SINGULARITYENV_TF_FORCE_UNIFIED_MEMORY='1' 62 | 63 | # JAX control 64 | #export SINGULARITYENV_XLA_PYTHON_CLIENT_MEM_FRACTION='4.0' 65 | 66 | # Path and user config (change me if required) 67 | bfd_database_path=$data_dir/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt 68 | small_bfd_database_path=$data_dir/small_bfd/bfd-first_non_consensus_sequences.fasta 69 | mgnify_database_path=$data_dir/mgnify/mgy_clusters_2018_12.fa 70 | template_mmcif_dir=$data_dir/pdb_mmcif/mmcif_files/ 71 | obsolete_pdbs_path=$data_dir/pdb_mmcif/obsolete.dat 72 | pdb70_database_path=$data_dir/pdb70/pdb70 73 | uniclust30_database_path=$data_dir/uniclust30/uniclust30_2018_08/uniclust30_2018_08 74 | uniref90_database_path=$data_dir/uniref90/uniref90.fasta 75 | 76 | # Binary path defaults should work within singularity 77 | #hhblits_binary_path=$(which hhblits) 78 | #hhsearch_binary_path=$(which hhsearch) 79 | #jackhmmer_binary_path=$(which jackhmmer) 80 | #kalign_binary_path=$(which kalign) 81 | 82 | # Load alphafold 83 | module load alphafold/2.1.2 84 | 85 | # Load scratch 86 | module load workspace/scratch 87 | export SINGULARITY_BIND="${SCRATCH}:/tmp" 88 | 89 | # Run alphafold container with nvidia support 90 | singularity run --bind ${data_dir} --nv $ALPHAFOLD_SING \ 91 | --bfd_database_path=$bfd_database_path \ 92 | --mgnify_database_path=$mgnify_database_path \ 93 | --template_mmcif_dir=$template_mmcif_dir \ 94 | --obsolete_pdbs_path=$obsolete_pdbs_path \ 95 | --pdb70_database_path=$pdb70_database_path \ 96 | --uniclust30_database_path=$uniclust30_database_path \ 97 | --uniref90_database_path=$uniref90_database_path \ 98 | --data_dir=$data_dir \ 99 | --output_dir=$output_dir \ 100 | --fasta_paths=$fasta_path \ 101 | --model_names=$model_names \ 102 | --max_template_date=$max_template_date \ 103 | --preset=$preset \ 104 | --benchmark=$benchmark \ 105 | --logtostderr 106 | 107 | -------------------------------------------------------------------------------- /ansys/HKHLR-HowTo-Ansys_Fluent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/ansys/HKHLR-HowTo-Ansys_Fluent.pdf -------------------------------------------------------------------------------- /ansys/README.md: -------------------------------------------------------------------------------- 1 | # Ansys Examples 2 | Each `*.sh` file here can be used as a template. 3 | Modify as needed and then submitted to the cluster using the `sbatch` command. 4 | -------------------------------------------------------------------------------- /ansys/run_cfx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 # 1 node 4 | #SBATCH --ntasks=16 # 16 Cores 5 | #SBATCH --mem-per-cpu=50G # 50 GB of RAM 6 | #SBATCH --time=7-00:00:00 # 7 days 7 | #SBATCH --output=my.stdout # Standard output file 8 | #SBATCH --mail-user=useremail@address.com # Your email 9 | #SBATCH --mail-type=ALL # Send mail on start,fail,complete 10 | #SBATCH --job-name="CFX Job" # Name of Job 11 | #SBATCH -p epyc # Use epyc nodes 12 | 13 | # Load samtools 14 | module load ansys 15 | 16 | # Do work 17 | cfx5solve -partition 16 -s 51200M -scat 1.5x -def Transient.def -ini Transient.res 18 | 19 | -------------------------------------------------------------------------------- /ansys/run_fluent_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 # 1 intel node 4 | #SBATCH --ntasks=16 # 16 Cores 5 | #SBATCH --mem-per-cpu=50G # 50 GB of RAM 6 | #SBATCH --time=7-00:00:00 # 7 days 7 | #SBATCH --output=my.stdout # Standard output file 8 | #SBATCH --mail-user=email@domain.com # Your email 9 | #SBATCH --mail-type=ALL # Send mail on start,fail,complete 10 | #SBATCH --job-name="Fluent Job" # Name of Job 11 | #SBATCH -p gpu # Use gpu nodes 12 | #SBATCH --gres=gpu:1 # Use 1 gpu 13 | 14 | # Load ansys 15 | module load ansys 16 | 17 | # ToDo 18 | # Need examples 19 | # Here is a good reference: 20 | #https://www.sharcnet.ca/Software/Ansys/16.2.3/en-us/help/flu_ug/flu_ug_sec_parallel_unix_command.html 21 | 22 | # Usage 23 | #fluent version -tnprocs [-gpgpu=ngpgpus ] [-pinterconnect ] [-mpi=mpi_type ] -cnf=hosts_file 24 | 25 | -------------------------------------------------------------------------------- /ansys/run_fluent_mpi_journal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --ntasks=64 3 | #SBATCH -t 20:00:00 4 | #SBATCH --mem-per-cpu=6000 5 | 6 | module load intel 7 | module load ansys 8 | 9 | #Get an unique temporary filename to use for our nodelist 10 | FLUENTNODEFILE=$(mktemp) 11 | 12 | #Output the nodes to our nodelist file 13 | scontrol show hostnames > $FLUENTNODEFILE 14 | 15 | #Display to us the nodes being used 16 | echo "Running on nodes:" 17 | cat $FLUENTNODEFILE 18 | 19 | #Run fluent with the requested number of tasks on the assigned nodes 20 | fluent 3ddp -g -t $SLURM_NTASKS -mpi=intel -ssh -cnf="$FLUENTNODEFILE" -i YOUR_JOU_FILE 21 | 22 | #Clean up 23 | rm $FLUENTNODEFILE 24 | -------------------------------------------------------------------------------- /array/README.md: -------------------------------------------------------------------------------- 1 | # Array Job 2 | 3 | You can consider using an array job if you want to submit many jobs that look identical, except for the input. 4 | 5 | 6 | First write a job script that would work for a single input, removing a bash loop for example. 7 | Then Use the `${SLURM_ARRAY_TASK_ID}` environment variable to control which input you should be processing. 8 | 9 | After that you need to submit the job using the `array` option, like so: 10 | 11 | ```bash 12 | sbatch --array=1-10 test_job.sh 13 | ``` 14 | 15 | This will copy the job into 10 tasks and the numbers 1 through 10 will each be used for the `$SLURM_ARRAY_TASK_ID` varaible within each task. 16 | 17 | You can also control how many tasks are processed at the same time with the following syntax: 18 | 19 | ```bash 20 | sbatch --array=1-10%2 test_job.sh 21 | ``` 22 | 23 | This will only allow 2 out of 10 tasks to run at the same time. 24 | 25 | # Examples 26 | 27 | Here is an example of a [test_job.sh](test_job.sh) submission script and an [inputs.txt](inputs.txt) file to demonstrate how the `{SLURM_ARRAY_TASK_ID}` environment variable can be used to pull the correct input. 28 | 29 | -------------------------------------------------------------------------------- /array/command_line: -------------------------------------------------------------------------------- 1 | sbatch --array=1-10 -N 1 test_job.sh 2 | -------------------------------------------------------------------------------- /array/inputs.txt: -------------------------------------------------------------------------------- 1 | a 2 | b 3 | c 4 | d 5 | e 6 | f 7 | g 8 | h 9 | i 10 | j 11 | -------------------------------------------------------------------------------- /array/slurm-406233_1.out: -------------------------------------------------------------------------------- 1 | Processing task 1 2 | a 3 | -------------------------------------------------------------------------------- /array/slurm-406233_10.out: -------------------------------------------------------------------------------- 1 | Processing task 10 2 | j 3 | -------------------------------------------------------------------------------- /array/slurm-406233_2.out: -------------------------------------------------------------------------------- 1 | Processing task 2 2 | b 3 | -------------------------------------------------------------------------------- /array/slurm-406233_3.out: -------------------------------------------------------------------------------- 1 | Processing task 3 2 | c 3 | -------------------------------------------------------------------------------- /array/slurm-406233_4.out: -------------------------------------------------------------------------------- 1 | Processing task 4 2 | d 3 | -------------------------------------------------------------------------------- /array/slurm-406233_5.out: -------------------------------------------------------------------------------- 1 | Processing task 5 2 | e 3 | -------------------------------------------------------------------------------- /array/slurm-406233_6.out: -------------------------------------------------------------------------------- 1 | Processing task 6 2 | f 3 | -------------------------------------------------------------------------------- /array/slurm-406233_7.out: -------------------------------------------------------------------------------- 1 | Processing task 7 2 | g 3 | -------------------------------------------------------------------------------- /array/slurm-406233_8.out: -------------------------------------------------------------------------------- 1 | Processing task 8 2 | h 3 | -------------------------------------------------------------------------------- /array/slurm-406233_9.out: -------------------------------------------------------------------------------- 1 | Processing task 9 2 | i 3 | -------------------------------------------------------------------------------- /array/test_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks=1 4 | #SBATCH --mem-per-cpu=1G 5 | #SBATCH --time=0-00:15:00 # 15 minutes 6 | ##SBATCH --output=my.stdout 7 | ##SBATCH --mail-user=jhayes@ucr.edu 8 | ##SBATCH --mail-type=ALL 9 | ##SBATCH --job-name="just_a_test" 10 | 11 | echo "Processing task ${SLURM_ARRAY_TASK_ID}" 12 | 13 | # Get input value from file 14 | input=$(sed -n "${SLURM_ARRAY_TASK_ID}p" inputs.txt) 15 | 16 | # Run a command with input, replace "echo" with real command 17 | echo $input 18 | -------------------------------------------------------------------------------- /basic/slurm-1217.out: -------------------------------------------------------------------------------- 1 | Mon Dec 12 13:34:06 PST 2016 2 | i01 3 | -------------------------------------------------------------------------------- /basic/slurm-1218.out: -------------------------------------------------------------------------------- 1 | Mon Dec 12 13:35:24 PST 2016 2 | i02 3 | -------------------------------------------------------------------------------- /basic/slurm-1222.out: -------------------------------------------------------------------------------- 1 | Mon Dec 12 14:22:24 PST 2016 2 | Mon Dec 12 14:22:24 PST 2016 3 | Mon Dec 12 14:22:24 PST 2016 4 | Mon Dec 12 14:22:24 PST 2016 5 | Mon Dec 12 14:22:24 PST 2016 6 | Mon Dec 12 14:22:24 PST 2016 7 | Mon Dec 12 14:22:24 PST 2016 8 | Mon Dec 12 14:22:24 PST 2016 9 | Mon Dec 12 14:22:24 PST 2016 10 | Mon Dec 12 14:22:24 PST 2016 11 | Mon Dec 12 14:22:24 PST 2016 12 | Mon Dec 12 14:22:24 PST 2016 13 | Mon Dec 12 14:22:24 PST 2016 14 | Mon Dec 12 14:22:24 PST 2016 15 | Mon Dec 12 14:22:24 PST 2016 16 | Mon Dec 12 14:22:24 PST 2016 17 | Mon Dec 12 14:22:24 PST 2016 18 | Mon Dec 12 14:22:24 PST 2016 19 | Mon Dec 12 14:22:24 PST 2016 20 | Mon Dec 12 14:22:24 PST 2016 21 | Mon Dec 12 14:22:24 PST 2016 22 | Mon Dec 12 14:22:24 PST 2016 23 | Mon Dec 12 14:22:24 PST 2016 24 | Mon Dec 12 14:22:24 PST 2016 25 | Mon Dec 12 14:22:24 PST 2016 26 | Mon Dec 12 14:22:24 PST 2016 27 | Mon Dec 12 14:22:24 PST 2016 28 | Mon Dec 12 14:22:24 PST 2016 29 | Mon Dec 12 14:22:24 PST 2016 30 | Mon Dec 12 14:22:24 PST 2016 31 | Mon Dec 12 14:22:24 PST 2016 32 | Mon Dec 12 14:22:24 PST 2016 33 | Mon Dec 12 14:22:24 PST 2016 34 | Mon Dec 12 14:22:24 PST 2016 35 | Mon Dec 12 14:22:24 PST 2016 36 | Mon Dec 12 14:22:24 PST 2016 37 | Mon Dec 12 14:22:24 PST 2016 38 | Mon Dec 12 14:22:24 PST 2016 39 | Mon Dec 12 14:22:24 PST 2016 40 | Mon Dec 12 14:22:24 PST 2016 41 | Mon Dec 12 14:22:24 PST 2016 42 | Mon Dec 12 14:22:24 PST 2016 43 | Mon Dec 12 14:22:24 PST 2016 44 | Mon Dec 12 14:22:24 PST 2016 45 | Mon Dec 12 14:22:24 PST 2016 46 | Mon Dec 12 14:22:24 PST 2016 47 | Mon Dec 12 14:22:24 PST 2016 48 | Mon Dec 12 14:22:24 PST 2016 49 | Mon Dec 12 14:22:24 PST 2016 50 | Mon Dec 12 14:22:24 PST 2016 51 | Mon Dec 12 14:22:24 PST 2016 52 | Mon Dec 12 14:22:24 PST 2016 53 | Mon Dec 12 14:22:24 PST 2016 54 | Mon Dec 12 14:22:24 PST 2016 55 | Mon Dec 12 14:22:24 PST 2016 56 | Mon Dec 12 14:22:24 PST 2016 57 | Mon Dec 12 14:22:24 PST 2016 58 | Mon Dec 12 14:22:24 PST 2016 59 | Mon Dec 12 14:22:24 PST 2016 60 | Mon Dec 12 14:22:24 PST 2016 61 | Mon Dec 12 14:22:24 PST 2016 62 | Mon Dec 12 14:22:24 PST 2016 63 | Mon Dec 12 14:22:24 PST 2016 64 | Mon Dec 12 14:22:24 PST 2016 65 | slurmstepd-i02: error: execve(): slep: No such file or directory 66 | slurmstepd-i02: error: execve(): slep: No such file or directory 67 | slurmstepd-i02: error: execve(): slep: No such file or directory 68 | slurmstepd-i02: error: execve(): slep: No such file or directory 69 | slurmstepd-i02: error: execve(): slep: No such file or directory 70 | slurmstepd-i02: error: execve(): slep: No such file or directory 71 | slurmstepd-i02: error: execve(): slep: No such file or directory 72 | slurmstepd-i02: error: execve(): slep: No such file or directory 73 | slurmstepd-i02: error: execve(): slep: No such file or directory 74 | slurmstepd-i02: error: execve(): slep: No such file or directory 75 | slurmstepd-i02: error: execve(): slep: No such file or directory 76 | slurmstepd-i02: error: execve(): slep: No such file or directory 77 | slurmstepd-i02: error: execve(): slep: No such file or directory 78 | slurmstepd-i02: error: execve(): slep: No such file or directory 79 | slurmstepd-i02: error: execve(): slep: No such file or directory 80 | slurmstepd-i02: error: execve(): slep: No such file or directory 81 | slurmstepd-i02: error: execve(): slep: No such file or directory 82 | slurmstepd-i02: error: execve(): slep: No such file or directory 83 | slurmstepd-i02: error: execve(): slep: No such file or directory 84 | slurmstepd-i02: error: execve(): slep: No such file or directory 85 | slurmstepd-i02: error: execve(): slep: No such file or directory 86 | slurmstepd-i02: error: execve(): slep: No such file or directory 87 | slurmstepd-i02: error: execve(): slep: No such file or directory 88 | slurmstepd-i02: error: execve(): slep: No such file or directory 89 | slurmstepd-i02: error: execve(): slep: No such file or directory 90 | slurmstepd-i02: error: execve(): slep: No such file or directory 91 | slurmstepd-i02: error: execve(): slep: No such file or directory 92 | slurmstepd-i02: error: execve(): slep: No such file or directory 93 | slurmstepd-i02: error: execve(): slep: No such file or directory 94 | slurmstepd-i02: error: execve(): slep: No such file or directory 95 | slurmstepd-i02: error: execve(): slep: No such file or directory 96 | slurmstepd-i02: error: execve(): slep: No such file or directory 97 | slurmstepd-i02: error: execve(): slep: No such file or directory 98 | slurmstepd-i02: error: execve(): slep: No such file or directory 99 | slurmstepd-i02: error: execve(): slep: No such file or directory 100 | slurmstepd-i02: error: execve(): slep: No such file or directory 101 | slurmstepd-i02: error: execve(): slep: No such file or directory 102 | slurmstepd-i02: error: execve(): slep: No such file or directory 103 | slurmstepd-i02: error: execve(): slep: No such file or directory 104 | slurmstepd-i02: error: execve(): slep: No such file or directory 105 | slurmstepd-i02: error: execve(): slep: No such file or directory 106 | slurmstepd-i02: error: execve(): slep: No such file or directory 107 | slurmstepd-i02: error: execve(): slep: No such file or directory 108 | slurmstepd-i02: error: execve(): slep: No such file or directory 109 | slurmstepd-i02: error: execve(): slep: No such file or directory 110 | slurmstepd-i02: error: execve(): slep: No such file or directory 111 | slurmstepd-i02: error: execve(): slep: No such file or directory 112 | slurmstepd-i02: error: execve(): slep: No such file or directory 113 | slurmstepd-i02: error: execve(): slep: No such file or directory 114 | slurmstepd-i02: error: execve(): slep: No such file or directory 115 | slurmstepd-i02: error: execve(): slep: No such file or directory 116 | slurmstepd-i02: error: execve(): slep: No such file or directory 117 | slurmstepd-i02: error: execve(): slep: No such file or directory 118 | slurmstepd-i02: error: execve(): slep: No such file or directory 119 | slurmstepd-i02: error: execve(): slep: No such file or directory 120 | slurmstepd-i02: error: execve(): slep: No such file or directory 121 | slurmstepd-i02: error: execve(): slep: No such file or directory 122 | slurmstepd-i02: error: execve(): slep: No such file or directory 123 | slurmstepd-i02: error: execve(): slep: No such file or directory 124 | slurmstepd-i02: error: execve(): slep: No such file or directory 125 | slurmstepd-i02: error: execve(): slep: No such file or directory 126 | slurmstepd-i02: error: execve(): slep: No such file or directory 127 | slurmstepd-i02: error: execve(): slep: No such file or directory 128 | slurmstepd-i02: error: execve(): slep: No such file or directory 129 | srun: error: i02: tasks 0-63: Exited with exit code 2 130 | i02 131 | i02 132 | i02 133 | i02 134 | i02 135 | i02 136 | i02 137 | i02 138 | i02 139 | i02 140 | i02 141 | i02 142 | i02 143 | i02 144 | i02 145 | i02 146 | i02 147 | i02 148 | i02 149 | i02 150 | i02 151 | i02 152 | i02 153 | i02 154 | i02 155 | i02 156 | i02 157 | i02 158 | i02 159 | i02 160 | i02 161 | i02 162 | i02 163 | i02 164 | i02 165 | i02 166 | i02 167 | i02 168 | i02 169 | i02 170 | i02 171 | i02 172 | i02 173 | i02 174 | i02 175 | i02 176 | i02 177 | i02 178 | i02 179 | i02 180 | i02 181 | i02 182 | i02 183 | i02 184 | i02 185 | i02 186 | i02 187 | i02 188 | i02 189 | i02 190 | i02 191 | i02 192 | i02 193 | i02 194 | -------------------------------------------------------------------------------- /basic/slurm-666.out: -------------------------------------------------------------------------------- 1 | Thu Nov 24 01:36:29 PST 2016 2 | /rhome/jhayes/iigb/slurm/basic 3 | i01 4 | -------------------------------------------------------------------------------- /basic/slurm-667.out: -------------------------------------------------------------------------------- 1 | Thu Nov 24 01:37:26 PST 2016 2 | Currently Loaded Modulefiles: 3 | 1) vim/7.4.1952 6) ggobi/2.1.11 4 | 2) tmux/2.2 7) R/3.3.0 5 | 3) python/2.7.5 8) perl/5.20.2 6 | 4) slurm/16.05.4 9) less-highlight/1.0 7 | 5) openmpi/2.0.1-slurm-16.05.4 10) iigb_utilities/1 8 | i01 9 | -------------------------------------------------------------------------------- /basic/slurm-668.out: -------------------------------------------------------------------------------- 1 | Thu Nov 24 01:37:55 PST 2016 2 | Currently Loaded Modulefiles: 3 | 1) vim/7.4.1952 6) ggobi/2.1.11 4 | 2) tmux/2.2 7) R/3.3.0 5 | 3) python/2.7.5 8) perl/5.20.2 6 | 4) slurm/16.05.4 9) less-highlight/1.0 7 | 5) openmpi/2.0.1-slurm-16.05.4 10) iigb_utilities/1 8 | i01 9 | -------------------------------------------------------------------------------- /basic/slurm-669.out: -------------------------------------------------------------------------------- 1 | Thu Nov 24 01:38:24 PST 2016 2 | /rhome/jhayes/iigb/slurm/basic 3 | Currently Loaded Modulefiles: 4 | 1) vim/7.4.1952 6) ggobi/2.1.11 5 | 2) tmux/2.2 7) R/3.3.0 6 | 3) python/2.7.5 8) perl/5.20.2 7 | 4) slurm/16.05.4 9) less-highlight/1.0 8 | 5) openmpi/2.0.1-slurm-16.05.4 10) iigb_utilities/1 9 | i01 10 | -------------------------------------------------------------------------------- /basic/statsdept_test_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks=1 4 | #SBATCH --cpus-per-task=1 5 | #SBATCH -p statsdept 6 | #SBATCH --mem-per-cpu=1G 7 | #SBATCH --time=0-00:15:00 # 15 minutes 8 | ##SBATCH --mail-user=email@address.com 9 | ##SBATCH --mail-type=ALL 10 | ##SBATCH --job-name="just_a_test" 11 | 12 | date 13 | sleep 60 14 | hostname 15 | -------------------------------------------------------------------------------- /basic/test_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks=1 4 | #SBATCH --cpus-per-task=1 5 | #SBATCH -p epyc 6 | #SBATCH --mem-per-cpu=1G 7 | #SBATCH --time=0-00:15:00 # 15 minutes 8 | ##SBATCH --mail-user=email@address.com 9 | ##SBATCH --mail-type=ALL 10 | ##SBATCH --job-name="just_a_test" 11 | 12 | date 13 | sleep 60 14 | hostname 15 | -------------------------------------------------------------------------------- /basic/test_pbs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #PBS -N test_job 3 | #PBS -l nodes=1,walltime=01:00:00 4 | #PBS -q batch 5 | 6 | date 7 | sleep 60 8 | hostname 9 | -------------------------------------------------------------------------------- /blast/test_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -c 10 4 | #SBATCH --mem=10g 5 | #SBATCH --time=2:00:00 6 | #SBATCH -p short 7 | 8 | module load ncbi-blast 9 | cd ~/bigdata/Projects/blast_fasta/ 10 | blastp -num_threads 10 dsg sdgsdg dhfdh 11 | 12 | -------------------------------------------------------------------------------- /castep/job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -N 1 3 | #SBATCH -c 1 4 | #SBATCH -n 4 5 | #SBATCH -p short 6 | #SBATCH --time=10:00 7 | 8 | # Ensure cleanworking dir 9 | rm -rf ~/bigdata/Projects/castep 10 | mkdir -p ~/bigdata/Projects/castep 11 | 12 | # Move to working dir 13 | cd ~/bigdata/Projects/castep 14 | 15 | # Get data 16 | wget http://www.castep.org/files/Si2.tgz 17 | 18 | # Extract data 19 | tar -xf Si2.tgz 20 | 21 | # Move to data 22 | cd Si2 23 | 24 | # Clear default modules 25 | module purge 26 | # Load common modules 27 | module load slurm hpcc_user_utils 28 | 29 | # Load module based on CPU type 30 | if [[ $(cpu_type) == "intel" ]] || [[ $(cpu_type) == "xeon" ]]; then 31 | echo "Loading intel" 32 | module load castep/19.11_intel-2017 33 | else 34 | echo "Loading gcc" 35 | module load castep/19.11_gcc-8.3.0 36 | fi 37 | 38 | # Run with mpi 39 | mpirun -n 4 castep.mpi Si2 40 | -------------------------------------------------------------------------------- /checkpoint/blcr/slurm-2963303.out: -------------------------------------------------------------------------------- 1 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 2 | 1 3 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 4 | 2 5 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 6 | 3 7 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 8 | 4 9 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 10 | 5 11 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 12 | 6 13 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 14 | 7 15 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 16 | 8 17 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 18 | 9 19 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 20 | 10 21 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 22 | 11 23 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 24 | 12 25 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 26 | 13 27 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 28 | 14 29 | srun_cr: error while loading shared libraries: libcr.so.0: cannot open shared object file: No such file or directory 30 | 15 31 | slurmstepd-i09: error: *** JOB 2963303 ON i09 CANCELLED AT 2018-08-08T16:56:23 *** 32 | -------------------------------------------------------------------------------- /checkpoint/blcr/slurm-2963306.out: -------------------------------------------------------------------------------- 1 | srun_cr: fatal: failed to initialize libcr: Function not implemented 2 | 1 3 | srun_cr: fatal: failed to initialize libcr: Function not implemented 4 | 2 5 | srun_cr: fatal: failed to initialize libcr: Function not implemented 6 | 3 7 | srun_cr: fatal: failed to initialize libcr: Function not implemented 8 | 4 9 | srun_cr: fatal: failed to initialize libcr: Function not implemented 10 | 5 11 | srun_cr: fatal: failed to initialize libcr: Function not implemented 12 | 6 13 | slurmstepd-i25: error: *** JOB 2963306 ON i25 CANCELLED AT 2018-08-08T17:05:47 *** 14 | -------------------------------------------------------------------------------- /checkpoint/blcr/test_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | module load blcr 4 | 5 | count=0 6 | while [ 1 -eq 1 ]; do 7 | count=$(($count+1)) 8 | sleep $count 9 | srun_cr 10 | echo "$count" 11 | done 12 | 13 | echo "Completed $count" 14 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/README: -------------------------------------------------------------------------------- 1 | When using DMTCP, you will need two submit scripts: one for 2 | launching under checkpoint control, and one for restarting 3 | from a crashed job. 4 | 5 | For example, for SLURM, you would modify slurm_launch.job to change 6 | "" in the line for dmtcp_launch. The default script 7 | does not automatically checkpoint. Search on "dmtcp_command" for 8 | instructions on how to use it to manually request a checkpoint. 9 | Do "dmtcp_command -h" to see the options for "dmtcp_command". 10 | Alternatively, search on "start_coordinator" in slurm_launch.job, 11 | and add "-i 3600" to create a checkpoint every 3600 seconds (every hour). 12 | "dmtcp_coordinator -h" and "dmtcp_launch -h" also exist. 13 | When ready, execute the SLURM command: 14 | sbatch slurm_launch.job 15 | 16 | Upon checkpointing, a script, dmtcp_restart_script.sh, will be saved 17 | in the local directory, along with the checkpoint image files. 18 | 19 | When restarting, slurm_rstr.job assumes that the script 20 | dmtcp_restart_script.sh is in the local directory. 21 | The default for the restart script is for manually requested 22 | checkpointing. See the above instructions and "dmtcp_restart -h" 23 | for setting checkpoints at regular time intervals. Modify 24 | slurm_rstr.job if automatic checkpointing is desired. 25 | Finally, it suffices to run: 26 | sbatch slurm_rstr.job 27 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ccr_buffalo/README.txt: -------------------------------------------------------------------------------- 1 | This scipts was provided by L. Shawn Matott 2 | Center for Computational Research (CCR) University of Buffalo 3 | while deploying DMTCP on CCR rush cluster 4 | (http://ccr.buffalo.edu/support/research_facilities/general_compute.html) 5 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ccr_buffalo/slurm_dmtcp_ompi_tcp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=01:00:00 3 | #SBATCH --nodes=2 4 | #SBATCH --cpus-per-task=1 5 | #SBATCH --tasks-per-node=8 6 | #SBATCH --mail-user=your_user_name@buffalo.edu 7 | #SBATCH --mail-type=END 8 | #SBATCH --job-name=dmtcp 9 | #SBATCH --output=output.out 10 | #SBATCH --error=output.err 11 | #SBATCH --partition=debug 12 | 13 | # spit out some basic SLURM information 14 | echo "SLURM_JOBID = "$SLURM_JOB_ID 15 | echo "SLURM_SUBMIT_DIR = "$SLURM_SUBMIT_DIR 16 | echo "SLURM_NODELIST = "`nodeset -e $SLURM_NODELIST` 17 | echo "SLURM_NPROCS = "$SLURM_NPROCS 18 | echo "SLURM_NNODES = "$SLURM_NNODES 19 | echo "SLURM_CPUS_PER_TASK = "$SLURM_CPUS_PER_TASK 20 | echo "SLURMTMPDIR = "$SLURMTMPDIR 21 | 22 | module load dmtcp/2.2.1-r2777 23 | #module load openmpi/gcc-4.4.7/1.8.0 24 | module load openmpi/gcc-4.4.6/1.6.5 25 | module list 26 | ulimit -s unlimited 27 | 28 | # 29 | # How long to run the application before checkpointing. 30 | # After checkpointing, the application will be shut down. 31 | # Users will typically want to set this to occur a bit before 32 | # the job's walltime expires. 33 | # 34 | CHECKPOINT_TIME=1m 35 | 36 | # EXE is the name of the application/executable 37 | # ARGS is any command-line args 38 | # OUTFILE is the file where stdout will be redirected 39 | # ERRFILE if the file where stderr will be redirected 40 | EXE=$SLURM_SUBMIT_DIR/MonteCarloIntegration 41 | ARGS="1200000000 0 8" 42 | OUTFILE=Integrals.out 43 | ERRFILE=Integrals.err 44 | 45 | # This script with auto-sense whether to perform a checkpoint 46 | # or restart operation. Set FORCE_CHECKPOINT to yes if you 47 | # DO NOT want to restart even if a restart script is located 48 | # in the working directory. 49 | FORCE_CHECKPOINT=No 50 | 51 | # ************************************************************************************************* 52 | # ************************************************************************************************* 53 | # Users should not have to change anything beyond this point! 54 | # ************************************************************************************************* 55 | # ************************************************************************************************* 56 | export DMTCP_TMPDIR=$SLURM_SUBMIT_DIR 57 | 58 | # configure openmpi environment to use tcp 59 | export OMPI_MCA_mtl=^psm 60 | export OMPI_MCA_btl=self,tcp 61 | 62 | # ================================================================================================= 63 | # start_coordinator() 64 | # Routine provided by Artem Polyakov 65 | # 66 | # Start dmtcp coordinator on launching node. Free TCP port is automatically allocated. 67 | # this function creates dmtcp_command.$JOBID script that serves like a wrapper around 68 | # dmtcp_command that tunes it on exact dmtcp_coordinator (it's hostname and port) 69 | # instead of typing "dmtcp_command -h -p " 70 | # you just type "dmtcp_command.$JOBID " and talk to coordinator of JOBID job 71 | # ================================================================================================= 72 | start_coordinator() 73 | { 74 | fname=dmtcp_command.$SLURM_JOBID 75 | h=`hostname` 76 | echo "dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1" 77 | dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1 78 | 79 | while true; do 80 | if [ -f "$fname" ]; then 81 | p=`cat $fname` 82 | if [ -n "$p" ]; then 83 | # try to communicate ? dmtcp_command -p $p l 84 | break 85 | fi 86 | fi 87 | done 88 | 89 | # Create dmtcp_command wrapper for easy communication with coordinator 90 | p=`cat $fname` 91 | chmod +x $fname 92 | echo "#!/bin/bash" > $fname 93 | echo >> $fname 94 | echo "export PATH=$PATH" >> $fname 95 | echo "export DMTCP_HOST=$h" >> $fname 96 | echo "export DMTCP_COORD_PORT=$p" >> $fname 97 | echo "dmtcp_command \$@" >> $fname 98 | 99 | # Setup local environment for DMTCP 100 | export DMTCP_COORD_HOST=$h 101 | export DMTCP_COORD_PORT=$p 102 | } 103 | 104 | echo "Launching dmtcp coordintor daemon" 105 | echo "start_coordinator --exit-after-ckpt" 106 | start_coordinator --exit-after-ckpt 107 | 108 | # convert checkpoint time to seconds 109 | nTics=`echo $CHECKPOINT_TIME | \ 110 | sed 's/m/ \* 60/g' | \ 111 | sed 's/h/ \* 3600/g' | \ 112 | sed 's/d/ \* 86400/g' | \ 113 | sed 's/s//g' | \ 114 | bc | \ 115 | awk '{ printf("%d\n", $1); }'` 116 | echo "Checkpointing will commence after $nTics seconds" 117 | 118 | tic=`date +%s` 119 | if [[ -f ./dmtcp_restart_script.sh ]] && [[ "${FORCE_CHECKPOINT}" == "No" ]]; then 120 | echo "Restarting application under dmtcp control" 121 | echo "./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>$OUTFILE 2>>$ERRFILE" 122 | ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>${OUTFILE}.${SLURM_JOB_ID} 2>>${ERRFILE}.${SLURM_JOB_ID} 123 | cat ${OUTFILE}.${SLURM_JOB_ID} >> ${OUTFILE} 124 | rm -f ${OUTFILE}.${SLURM_JOB_ID} 125 | cat ${ERRFILE}.${SLURM_JOB_ID} >> ${ERRFILE} 126 | rm -f ${ERRFILE}.${SLURM_JOB_ID} 127 | else 128 | # clear output and error files 129 | echo "" > ${OUTFILE} 130 | echo "" > ${ERRFILE} 131 | echo "Launching application under dmtcp control" 132 | echo "srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}" 133 | srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE} 134 | fi 135 | toc=`date +%s` 136 | 137 | elapsedTime=`expr $toc - $tic` 138 | overheadTime=`expr $elapsedTime - $nTics` 139 | if [ "$overheadTime" -lt "0" ]; then 140 | overheadTime=0 141 | echo "All done - no checkpoint was required." 142 | else 143 | echo "All done - checkpoint files are listed below:" 144 | ls -1 *.dmtcp 145 | fi 146 | 147 | echo "Elapsed Time = $elapsedTime seconds" 148 | echo "Checkpoint Overhead = $overheadTime seconds" 149 | 150 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ccr_buffalo/slurm_dmtcp_openmp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=01:00:00 3 | #SBATCH --nodes=1 4 | #SBATCH --cpus-per-task=8 5 | #SBATCH --tasks-per-node=1 6 | #SBATCH --mail-user=your_user_name@buffalo.edu 7 | #SBATCH --mail-type=END 8 | #SBATCH --job-name=dmtcp 9 | #SBATCH --output=output.out 10 | #SBATCH --error=output.err 11 | #SBATCH --partition=debug 12 | 13 | # spit out some basic SLURM information 14 | echo "SLURM_JOBID = "$SLURM_JOB_ID 15 | echo "SLURM_SUBMIT_DIR = "$SLURM_SUBMIT_DIR 16 | echo "SLURM_NODELIST = "`nodeset -e $SLURM_NODELIST` 17 | echo "SLURM_NPROCS = "$SLURM_NPROCS 18 | echo "SLURM_NNODES = "$SLURM_NNODES 19 | echo "SLURM_CPUS_PER_TASK = "$SLURM_CPUS_PER_TASK 20 | echo "SLURMTMPDIR = "$SLURMTMPDIR 21 | 22 | # configure OpenMP environment 23 | export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK 24 | 25 | module load dmtcp/2.2.1-r2777 26 | module list 27 | ulimit -s unlimited 28 | 29 | # 30 | # How long to run the application before checkpointing. 31 | # After checkpointing, the application will be shut down. 32 | # Users will typically want to set this to occur a bit before 33 | # the job's walltime expires. 34 | # 35 | CHECKPOINT_TIME=1m 36 | 37 | # EXE is the name of the application/executable 38 | # ARGS is any command-line args 39 | # OUTFILE is the file where stdout will be redirected 40 | # ERRFILE if the file where stderr will be redirected 41 | EXE=$SLURM_SUBMIT_DIR/MonteCarloIntegration 42 | ARGS="1200000000 0 8" 43 | OUTFILE=Integrals.out 44 | ERRFILE=Integrals.err 45 | 46 | # This script with auto-sense whether to perform a checkpoint 47 | # or restart operation. Set FORCE_CHECKPOINT to yes if you 48 | # DO NOT want to restart even if a restart script is located 49 | # in the working directory. 50 | FORCE_CHECKPOINT=No 51 | 52 | # ************************************************************************************************* 53 | # ************************************************************************************************* 54 | # Users should not have to change anything beyond this point! 55 | # ************************************************************************************************* 56 | # ************************************************************************************************* 57 | export DMTCP_TMPDIR=$SLURM_SUBMIT_DIR 58 | 59 | # ================================================================================================= 60 | # start_coordinator() 61 | # Routine provided by Artem Polyakov 62 | # 63 | # Start dmtcp coordinator on launching node. Free TCP port is automatically allocated. 64 | # this function creates dmtcp_command.$JOBID script that serves like a wrapper around 65 | # dmtcp_command that tunes it on exact dmtcp_coordinator (it's hostname and port) 66 | # instead of typing "dmtcp_command -h -p " 67 | # you just type "dmtcp_command.$JOBID " and talk to coordinator of JOBID job 68 | # ================================================================================================= 69 | start_coordinator() 70 | { 71 | fname=dmtcp_command.$SLURM_JOBID 72 | h=`hostname` 73 | echo "dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1" 74 | dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1 75 | 76 | while true; do 77 | if [ -f "$fname" ]; then 78 | p=`cat $fname` 79 | if [ -n "$p" ]; then 80 | # try to communicate ? dmtcp_command -p $p l 81 | break 82 | fi 83 | fi 84 | done 85 | 86 | # Create dmtcp_command wrapper for easy communication with coordinator 87 | p=`cat $fname` 88 | chmod +x $fname 89 | echo "#!/bin/bash" > $fname 90 | echo >> $fname 91 | echo "export PATH=$PATH" >> $fname 92 | echo "export DMTCP_HOST=$h" >> $fname 93 | echo "export DMTCP_COORD_PORT=$p" >> $fname 94 | echo "dmtcp_command \$@" >> $fname 95 | 96 | # Setup local environment for DMTCP 97 | export DMTCP_COORD_HOST=$h 98 | export DMTCP_COORD_PORT=$p 99 | } 100 | 101 | echo "Launching dmtcp coordintor daemon" 102 | echo "start_coordinator --exit-after-ckpt" 103 | start_coordinator --exit-after-ckpt 104 | 105 | # convert checkpoint time to seconds 106 | nTics=`echo $CHECKPOINT_TIME | \ 107 | sed 's/m/ \* 60/g' | \ 108 | sed 's/h/ \* 3600/g' | \ 109 | sed 's/d/ \* 86400/g' | \ 110 | sed 's/s//g' | \ 111 | bc | \ 112 | awk '{ printf("%d\n", $1); }'` 113 | echo "Checkpointing will commence after $nTics seconds" 114 | 115 | tic=`date +%s` 116 | if [[ -f ./dmtcp_restart_script.sh ]] && [[ "${FORCE_CHECKPOINT}" == "No" ]]; then 117 | echo "Restarting application under dmtcp control" 118 | echo "./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>$OUTFILE 2>>$ERRFILE" 119 | ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>${OUTFILE}.${SLURM_JOB_ID} 2>>${ERRFILE}.${SLURM_JOB_ID} 120 | cat ${OUTFILE}.${SLURM_JOB_ID} >> ${OUTFILE} 121 | rm -f ${OUTFILE}.${SLURM_JOB_ID} 122 | cat ${ERRFILE}.${SLURM_JOB_ID} >> ${ERRFILE} 123 | rm -f ${ERRFILE}.${SLURM_JOB_ID} 124 | else 125 | # clear output and error files 126 | echo "" > ${OUTFILE} 127 | echo "" > ${ERRFILE} 128 | echo "Launching application under dmtcp control" 129 | echo "srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}" 130 | srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE} 131 | fi 132 | toc=`date +%s` 133 | 134 | elapsedTime=`expr $toc - $tic` 135 | overheadTime=`expr $elapsedTime - $nTics` 136 | if [ "$overheadTime" -lt "0" ]; then 137 | overheadTime=0 138 | echo "All done - no checkpoint was required." 139 | else 140 | echo "All done - checkpoint files are listed below:" 141 | ls -1 *.dmtcp 142 | fi 143 | 144 | echo "Elapsed Time = $elapsedTime seconds" 145 | echo "Checkpoint Overhead = $overheadTime seconds" 146 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ccr_buffalo/slurm_dmtcp_serial: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=01:00:00 3 | #SBATCH --nodes=1 4 | #SBATCH --cpus-per-task=1 5 | #SBATCH --tasks-per-node=1 6 | #SBATCH --mail-user=your_user_name@buffalo.edu 7 | #SBATCH --mail-type=END 8 | #SBATCH --job-name=dmtcp 9 | #SBATCH --output=output.out 10 | #SBATCH --error=output.err 11 | #SBATCH --partition=debug 12 | 13 | # spit out some basic SLURM information 14 | echo "SLURM_JOBID = "$SLURM_JOB_ID 15 | echo "SLURM_SUBMIT_DIR = "$SLURM_SUBMIT_DIR 16 | echo "SLURM_NODELIST = "`nodeset -e $SLURM_NODELIST` 17 | echo "SLURM_NPROCS = "$SLURM_NPROCS 18 | echo "SLURM_NNODES = "$SLURM_NNODES 19 | echo "SLURMTMPDIR = "$SLURMTMPDIR 20 | 21 | module load dmtcp/2.2.1-r2777 22 | module list 23 | ulimit -s unlimited 24 | 25 | # 26 | # How long to run the application before checkpointing. 27 | # After checkpointing, the application will be shut down. 28 | # Users will typically want to set this to occur a bit before 29 | # the job's walltime expires. 30 | # 31 | CHECKPOINT_TIME=1m 32 | 33 | # EXE is the name of the application/executable 34 | # ARGS is any command-line args 35 | # OUTFILE is the file where stdout will be redirected 36 | # ERRFILE if the file where stderr will be redirected 37 | EXE=$SLURM_SUBMIT_DIR/MonteCarloIntegration 38 | ARGS="100000000 0" 39 | OUTFILE=Integrals.out 40 | ERRFILE=Integrals.err 41 | 42 | # This script with auto-sense whether to perform a checkpoint 43 | # or restart operation. Set FORCE_CHECKPOINT to yes if you 44 | # DO NOT want to restart even if a restart script is located 45 | # in the working directory. 46 | FORCE_CHECKPOINT=No 47 | 48 | # ************************************************************************************************* 49 | # ************************************************************************************************* 50 | # Users should not have to change anything beyond this point! 51 | # ************************************************************************************************* 52 | # ************************************************************************************************* 53 | export DMTCP_TMPDIR=$SLURM_SUBMIT_DIR 54 | 55 | # ================================================================================================= 56 | # start_coordinator() 57 | # Routine provided by Artem Polyakov 58 | # 59 | # Start dmtcp coordinator on launching node. Free TCP port is automatically allocated. 60 | # this function creates dmtcp_command.$JOBID script that serves like a wrapper around 61 | # dmtcp_command that tunes it on exact dmtcp_coordinator (it's hostname and port) 62 | # instead of typing "dmtcp_command -h -p " 63 | # you just type "dmtcp_command.$JOBID " and talk to coordinator of JOBID job 64 | # ================================================================================================= 65 | start_coordinator() 66 | { 67 | fname=dmtcp_command.$SLURM_JOBID 68 | h=`hostname` 69 | echo "dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1" 70 | dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1 71 | 72 | while true; do 73 | if [ -f "$fname" ]; then 74 | p=`cat $fname` 75 | if [ -n "$p" ]; then 76 | # try to communicate ? dmtcp_command -p $p l 77 | break 78 | fi 79 | fi 80 | done 81 | 82 | # Create dmtcp_command wrapper for easy communication with coordinator 83 | p=`cat $fname` 84 | chmod +x $fname 85 | echo "#!/bin/bash" > $fname 86 | echo >> $fname 87 | echo "export PATH=$PATH" >> $fname 88 | echo "export DMTCP_COORD_HOST=$h" >> $fname 89 | echo "export DMTCP_COORD_PORT=$p" >> $fname 90 | echo "dmtcp_command \$@" >> $fname 91 | 92 | # Setup local environment for DMTCP 93 | export DMTCP_COORD_HOST=$h 94 | export DMTCP_COORD_PORT=$p 95 | } 96 | 97 | echo "Launching dmtcp coordintor daemon" 98 | echo "start_coordinator --exit-after-ckpt" 99 | start_coordinator --exit-after-ckpt 100 | 101 | # convert checkpoint time to seconds 102 | nTics=`echo $CHECKPOINT_TIME | \ 103 | sed 's/m/ \* 60/g' | \ 104 | sed 's/h/ \* 3600/g' | \ 105 | sed 's/d/ \* 86400/g' | \ 106 | sed 's/s//g' | \ 107 | bc | \ 108 | awk '{ printf("%d\n", $1); }'` 109 | echo "Checkpointing will commence after $nTics seconds" 110 | 111 | tic=`date +%s` 112 | if [[ -f ./dmtcp_restart_script.sh ]] && [[ "${FORCE_CHECKPOINT}" == "No" ]]; then 113 | echo "Restarting application under dmtcp control" 114 | echo "./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>$OUTFILE 2>>$ERRFILE" 115 | ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT -i $nTics 1>>${OUTFILE}.${SLURM_JOB_ID} 2>>${ERRFILE}.${SLURM_JOB_ID} 116 | cat ${OUTFILE}.${SLURM_JOB_ID} >> ${OUTFILE} 117 | rm -f ${OUTFILE}.${SLURM_JOB_ID} 118 | cat ${ERRFILE}.${SLURM_JOB_ID} >> ${ERRFILE} 119 | rm -f ${ERRFILE}.${SLURM_JOB_ID} 120 | else 121 | # clear output and error files 122 | echo "" > ${OUTFILE} 123 | echo "" > ${ERRFILE} 124 | echo "Launching application under dmtcp control" 125 | echo "srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE}" 126 | srun dmtcp_launch --quiet --rm -i $nTics $EXE $ARGS 1>${OUTFILE} 2>${ERRFILE} 127 | fi 128 | toc=`date +%s` 129 | 130 | elapsedTime=`expr $toc - $tic` 131 | overheadTime=`expr $elapsedTime - $nTics` 132 | if [ "$overheadTime" -lt "0" ]; then 133 | overheadTime=0 134 | echo "All done - no checkpoint was required." 135 | else 136 | echo "All done - checkpoint files are listed below:" 137 | ls -1 *.dmtcp 138 | fi 139 | 140 | echo "Elapsed Time = $elapsedTime seconds" 141 | echo "Checkpoint Overhead = $overheadTime seconds" 142 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/slurm_launch.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p short # change to proper partition name or remove 3 | #SBATCH --time=00:15:00 # put proper time of reservation here 4 | #SBATCH --nodes=2 # number of nodes 5 | #SBATCH --ntasks=8 # number of total tasks 6 | ##SBATCH --cpus-per-task=2 # number of cpus per task 7 | ##SBATCH --ntasks-per-node=4 # processes per node 8 | #SBATCH --mem=24000 # memory resource 9 | #SBATCH --job-name="dmtcp_job" # change to your job name 10 | #SBATCH --switches=1 # Try to localize IB traffice on a single switch 11 | #SBATCH --output=dmtcp.out # change to proper file name or remove for defaults 12 | # ? Any other batch options ? 13 | 14 | # ? Any module that need to be loaded ? 15 | module unload openmpi 16 | module load mpich 17 | module load dmtcp 18 | 19 | #----------------------------- Set up DMTCP environment for a job ------------# 20 | 21 | ############################################################################### 22 | # Start DMTCP coordinator on the launching node. Free TCP port is automatically 23 | # allocated. This function creates a dmtcp_command.$JOBID script, which serves 24 | # as a wrapper around dmtcp_command. The script tunes dmtcp_command for the 25 | # exact dmtcp_coordinator (its hostname and port). Instead of typing 26 | # "dmtcp_command -h -p ", 27 | # you just type "dmtcp_command.$JOBID " and talk to the coordinator 28 | # for JOBID job. 29 | ############################################################################### 30 | 31 | start_coordinator() 32 | { 33 | ############################################################ 34 | # For debugging when launching a custom coordinator, uncomment 35 | # the following lines and provide the proper host and port for 36 | # the coordinator. 37 | ############################################################ 38 | # export DMTCP_COORD_HOST=$h 39 | # export DMTCP_COORD_PORT=$p 40 | # return 41 | 42 | fname=dmtcp_command.$SLURM_JOBID 43 | h=`hostname` 44 | 45 | check_coordinator=`which dmtcp_coordinator` 46 | if [ -z "$check_coordinator" ]; then 47 | echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings." 48 | exit 0 49 | fi 50 | 51 | dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1 52 | 53 | while true; do 54 | if [ -f "$fname" ]; then 55 | p=`cat $fname` 56 | if [ -n "$p" ]; then 57 | # try to communicate ? dmtcp_command -p $p l 58 | break 59 | fi 60 | fi 61 | done 62 | 63 | # Create dmtcp_command wrapper for easy communication with coordinator 64 | p=`cat $fname` 65 | chmod +x $fname 66 | echo "#!/bin/bash" > $fname 67 | echo >> $fname 68 | echo "export PATH=$PATH" >> $fname 69 | echo "export DMTCP_COORD_HOST=$h" >> $fname 70 | echo "export DMTCP_COORD_PORT=$p" >> $fname 71 | echo "dmtcp_command \$@" >> $fname 72 | 73 | # Set up local environment for DMTCP 74 | export DMTCP_COORD_HOST=$h 75 | export DMTCP_COORD_PORT=$p 76 | 77 | } 78 | 79 | ################################################################################### 80 | # Print out the SLURM job information. Remove this if you don't need it. 81 | ################################################################################### 82 | 83 | # Print out the SLURM job information. Remove this if you don't need it. 84 | echo "SLURM_JOBID="$SLURM_JOBID 85 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST 86 | echo "SLURM_NNODES"=$SLURM_NNODES 87 | echo "SLURMTMPDIR="$SLURMTMPDIR 88 | echo "working directory = "$SLURM_SUBMIT_DIR 89 | 90 | # changedir to workdir 91 | cd $SLURM_SUBMIT_DIR 92 | 93 | 94 | #----------------------------------- Set up job environment ------------------# 95 | 96 | ############################################################################### 97 | # Load all nessesary modules or export PATH/LD_LIBRARY_PATH/etc here. 98 | # Make sure that the prefix for the DMTCP install path is in PATH 99 | # and LD_LIBRARY_PATH. 100 | ############################################################################### 101 | 102 | # **** IF USING Open MPI 1.8, SEE COMMENT BELOW **** 103 | # module load openmpi 104 | ############################################################################### 105 | # For Open MPI 1.8, if using InfiniBand, uncomment the following statement 106 | # export OMPI_MCA_mpi_leave_pinned=0 107 | # This could prevent a bug due to interaction with memalign() and ptmalloc2() 108 | # on restart. 109 | ############################################################################### 110 | 111 | # export PATH=/bin:$PATH 112 | # export LD_LIBRARY_PATH=/lib:$LD_LIBRARY_PATH 113 | 114 | #------------------------------------- Launch application ---------------------# 115 | 116 | ################################################################################ 117 | # 1. Start DMTCP coordinator 118 | ################################################################################ 119 | 120 | start_coordinator -i 60 # ... 121 | 122 | 123 | ################################################################################ 124 | # 2. Launch application 125 | # 2.1. If you use mpiexec/mpirun to launch an application, use the following 126 | # command line: 127 | # $ dmtcp_launch --rm mpiexec ./ 128 | # 2.2. If you use PMI1 to launch an application, use the following command line: 129 | # $ srun dmtcp_launch --rm ./ 130 | # Note: PMI2 is not supported yet. 131 | # 2.3. If you use the Stampede supercomputer at Texas Advanced Computing Center 132 | # (TACC), use ibrun command to launch the application (--rm is not required): 133 | # $ ibrun dmtcp_launch ./ 134 | ################################################################################ 135 | 136 | #dmtcp_launch --rm mpirun --mca btl self,tcp ./ 137 | #dmtcp_launch --rm mpirun --mca btl self,tcp ~/bigdata/Projects/iigb/slurm/mpi/mpiTest_mpich 138 | dmtcp_launch --rm ./count.sh 139 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/slurm_rstr.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -p short # change to proper partition name or remove 4 | #SBATCH --time=00:15:00 # put proper time of reservation here 5 | ##SBATCH --nodes=2 # number of nodes 6 | #SBATCH --ntasks=8 # number of tasks 7 | ##SBATCH --cpus-per-task # number of cpus per task 8 | ##SBATCH --ntasks-per-node=4 # processes per node 9 | #SBATCH --mem=24000 # memory resource 10 | #SBATCH --job-name="dmtcp_job" # change to your job name 11 | #SBATCH --output=dmtcp.out # change to proper file name or remove for defaults 12 | # ? Any other batch options ? 13 | 14 | #----------------------------- Set up DMTCP environment for a job ------------# 15 | # ? Any module that need to be loaded ? 16 | module unload openmpi 17 | module load mpich 18 | module load dmtcp 19 | 20 | ############################################################################### 21 | # Start DMTCP coordinator on the launching node. Free TCP port is automatically 22 | # allocated. This function creates a dmtcp_command.$JOBID script, which serves 23 | # as a wrapper around dmtcp_command. The script tunes dmtcp_command for the 24 | # exact dmtcp_coordinator (its hostname and port). Instead of typing 25 | # "dmtcp_command -h -p ", 26 | # you just type "dmtcp_command.$JOBID " and talk to the coordinator 27 | # for JOBID job. 28 | ############################################################################### 29 | 30 | start_coordinator() 31 | { 32 | ############################################################ 33 | # For debugging when launching a custom coordinator, uncomment 34 | # the following lines and provide the proper host and port for 35 | # the coordinator. 36 | ############################################################ 37 | # export DMTCP_COORD_HOST=$h 38 | # export DMTCP_COORD_PORT=$p 39 | # return 40 | 41 | fname=dmtcp_command.$SLURM_JOBID 42 | h=`hostname` 43 | 44 | check_coordinator=`which dmtcp_coordinator` 45 | if [ -z "$check_coordinator" ]; then 46 | echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings" 47 | exit 0 48 | fi 49 | 50 | dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1 51 | 52 | while true; do 53 | if [ -f "$fname" ]; then 54 | p=`cat $fname` 55 | if [ -n "$p" ]; then 56 | # try to communicate ? dmtcp_command -p $p l 57 | break 58 | fi 59 | fi 60 | done 61 | 62 | # Create a dmtcp_command wrapper for easy communication with the coordinator. 63 | p=`cat $fname` 64 | chmod +x $fname 65 | echo "#!/bin/bash" > $fname 66 | echo >> $fname 67 | echo "export PATH=$PATH" >> $fname 68 | echo "export DMTCP_COORD_HOST=$h" >> $fname 69 | echo "export DMTCP_COORD_PORT=$p" >> $fname 70 | echo "dmtcp_command \$@" >> $fname 71 | 72 | # Set up local environment for DMTCP 73 | export DMTCP_COORD_HOST=$h 74 | export DMTCP_COORD_PORT=$p 75 | 76 | } 77 | 78 | #----------------------- Some rutine steps and information output -------------------------# 79 | 80 | ################################################################################### 81 | # Print out the SLURM job information. Remove this if you don't need it. 82 | ################################################################################### 83 | echo "SLURM_JOBID="$SLURM_JOBID 84 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST 85 | echo "SLURM_NNODES"=$SLURM_NNODES 86 | echo "SLURMTMPDIR="$SLURMTMPDIR 87 | echo "working directory = "$SLURM_SUBMIT_DIR 88 | 89 | # changedir to workdir 90 | cd $SLURM_SUBMIT_DIR 91 | 92 | #----------------------------------- Set up job environment ------------------# 93 | 94 | ############################################################################### 95 | # Load all nessesary modules or export PATH/LD_LIBRARY_PATH/etc here. 96 | # Make sure that the prefix for the DMTCP install path is in PATH 97 | # and LD_LIBRARY_PATH. 98 | ############################################################################### 99 | 100 | # module load openmpi 101 | # export PATH=/bin:$PATH 102 | # export LD_LIBRARY_PATH=/lib:$LD_LIBRARY_PATH 103 | 104 | ############################################################################### 105 | # If you use the Stampede supercomputer at Texas Advanced Computing Center 106 | # (TACC), add the following: 107 | # HOSTFILE=hostfile 108 | # echo "SLURM_JOB_NODELIST" | scontrol show hostname > $HOSTFILE 109 | ############################################################################### 110 | 111 | #------------------------------------- Launch application ---------------------# 112 | 113 | ################################################################################ 114 | # 1. Start DMTCP coordinator 115 | ################################################################################ 116 | 117 | start_coordinator # -i 120 ... 118 | 119 | ################################################################################ 120 | # 2. Restart application 121 | ################################################################################ 122 | 123 | /bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT 124 | 125 | ############################################################################### 126 | # If you use the Stampede supercomputer at Texas Advanced Computing Center 127 | # (TACC), add the --hostfile option: 128 | # /bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT\ 129 | # --hostfile $HOSTFILE 130 | ############################################################################### 131 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/stampede/README: -------------------------------------------------------------------------------- 1 | For users who want to use DMTCP for large-scale applications on the 2 | Stampede supercomputer at Texas Advanced Computing Center (TACC), 3 | these scripts will automatically set up the coordinator on a 4 | separate node, launch the application, and restart from the 5 | previous checkpoint if needed. 6 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/stampede/slurm_launch.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Put your SLURM options here 3 | #SBATCH --time=00:30:00 # put proper time of reservation here 4 | #SBATCH --nodes=129 # number of nodes, where one node is the coordinator node, and the rest are the compute nodes 5 | #SBATCH --ntasks-per-node=16 # processes per node 6 | #SBATCH --job-name="example" 7 | #SBATCH --output=example.std 8 | #SBATCH --error=example.err 9 | #SBATCH --partition=normal 10 | #SBATCH -A project_number 11 | # ? Any other batch options ? 12 | 13 | # Start a dmtcp coordinator on launch node. A free TCP port is automatically allocated. 14 | # This function creates a dmtcp_command.$JOBID script that serves as a wrapper around 15 | # dmtcp_command that tunes it for the exact dmtcp_coordinator (its hostname and port). 16 | # Instead of typing "dmtcp_command -h -p ", 17 | # one just types "dmtcp_command.$JOBID " and talks to the coordinator of JOBID job. 18 | 19 | start_coordinator() 20 | { 21 | fname=dmtcp_command.$SLURM_JOBID 22 | h=`hostname` 23 | 24 | check_coordinator=`which dmtcp_coordinator` 25 | if [ -z "$check_coordinator" ]; then 26 | echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings" 27 | exit 0 28 | fi 29 | 30 | dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1 31 | 32 | while true; do 33 | if [ -f "$fname" ]; then 34 | p=`cat $fname` 35 | if [ -n "$p" ]; then 36 | break 37 | fi 38 | fi 39 | done 40 | 41 | # Create dmtcp_command wrapper for easy communication with coordinator 42 | p=`cat $fname` 43 | chmod +x $fname 44 | echo "#!/bin/bash" > $fname 45 | echo >> $fname 46 | echo "export PATH=$PATH" >> $fname 47 | echo "export DMTCP_COORD_HOST=$h" >> $fname 48 | echo "export DMTCP_COORD_PORT=$p" >> $fname 49 | echo "dmtcp_command \$@" >> $fname 50 | 51 | export DMTCP_COORD_HOST=$h 52 | export DMTCP_COORD_PORT=$p 53 | } 54 | 55 | 56 | # Print out SLURM job information. Remove it if you don't need it 57 | echo "SLURM_JOBID="$SLURM_JOBID 58 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST 59 | echo "SLURM_NNODES"=$SLURM_NNODES 60 | echo "SLURMTMPDIR="$SLURMTMPDIR 61 | echo "working directory = "$SLURM_SUBMIT_DIR 62 | 63 | # Change directory to workdir 64 | cd $SLURM_SUBMIT_DIR 65 | 66 | # Some initial setup like 67 | module load intel/15.0.2 68 | 69 | # DMTCP settings 70 | DMTCP_PATH="$WORK/dmtcp" 71 | export PATH="$DMTCP_PATH/bin/:$PATH" 72 | 73 | DMTCP_NODE=1 74 | NUM=$SLURM_CPUS_ON_NODE 75 | 76 | if [ -s hosts ] 77 | then 78 | rm hosts 79 | fi 80 | 81 | #Create hostfiles 82 | position=1 83 | for i in `scontrol show hostnames $SLURM_NODELIST` 84 | do 85 | if [ $position -gt $DMTCP_NODE ]; then 86 | echo $i:$NUM >>hosts 87 | fi 88 | let "position++" 89 | done 90 | 91 | if [ -s hosts ] 92 | then 93 | echo -e "App hostfile created\n" 94 | fi 95 | 96 | let APP_NODES=$SLURM_NNODES-$DMTCP_NODE 97 | let NUM_APP=$APP_NODES*$NUM 98 | 99 | start_coordinator # -i 120 ... 100 | 101 | echo "mpirun_rsh -np $NUM_APP -hostfile hosts -export dmtcp_launch --ckpt-signal 10 --ib ./a.out" 102 | 103 | mpirun_rsh -np $NUM_APP -hostfile hosts -export dmtcp_launch --ckpt-signal 10 --ib ./a.out 104 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/stampede/slurm_rstr.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Put your SLURM options here 3 | #SBATCH --time=00:30:00 # put proper time of reservation here 4 | #SBATCH --nodes=129 # number of nodes, where one node is the coordinator node, and the rest are the compute nodes 5 | #SBATCH --ntasks-per-node=16 # processes per node 6 | #SBATCH --job-name="restart" 7 | #SBATCH --output=restart.std 8 | #SBATCH --error=restart.err 9 | #SBATCH --partition=normal 10 | #SBATCH -A project_number 11 | 12 | # Start a dmtcp coordinator on launch node. A free TCP port is automatically allocated. 13 | # This function creates a dmtcp_command.$JOBID script that serves as a wrapper around 14 | # dmtcp_command that tunes it for the exact dmtcp_coordinator (its hostname and port). 15 | # Instead of typing "dmtcp_command -h -p ", 16 | # you just types "dmtcp_command.$JOBID " and talks to the coordinator of JOBID job. 17 | 18 | start_coordinator() 19 | { 20 | fname=dmtcp_command.$SLURM_JOBID 21 | h=`hostname` 22 | 23 | check_coordinator=`which dmtcp_coordinator` 24 | if [ -z "$check_coordinator" ]; then 25 | echo "No dmtcp_coordinator found. Check your DMTCP installation and PATH settings" 26 | exit 0 27 | fi 28 | 29 | dmtcp_coordinator --daemon --exit-on-last -p 0 --port-file $fname $@ 1>/dev/null 2>&1 30 | 31 | while true; do 32 | if [ -f "$fname" ]; then 33 | p=`cat $fname` 34 | if [ -n "$p" ]; then 35 | break 36 | fi 37 | fi 38 | done 39 | 40 | # Create dmtcp_command wrapper for easy communication with coordinator 41 | p=`cat $fname` 42 | chmod +x $fname 43 | echo "#!/bin/bash" > $fname 44 | echo >> $fname 45 | echo "export PATH=$PATH" >> $fname 46 | echo "export DMTCP_COORD_HOST=$h" >> $fname 47 | echo "export DMTCP_COORD_PORT=$p" >> $fname 48 | echo "dmtcp_command \$@" >> $fname 49 | 50 | # Setup local environment for DMTCP 51 | export DMTCP_COORD_HOST=$h 52 | export DMTCP_COORD_PORT=$p 53 | } 54 | 55 | 56 | # Print out SLURM job information. Remove it if you don't need it 57 | echo "SLURM_JOBID="$SLURM_JOBID 58 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST 59 | echo "SLURM_NNODES"=$SLURM_NNODES 60 | echo "SLURMTMPDIR="$SLURMTMPDIR 61 | echo "working directory = "$SLURM_SUBMIT_DIR 62 | 63 | # Change directory to workdir 64 | cd $SLURM_SUBMIT_DIR 65 | HOSTFILE=hostfile 66 | 67 | scontrol show hostname | tail -n +2 > $HOSTFILE 68 | 69 | # Some initial setup like 70 | DMTCP_PATH="$WORK/dmtcp" 71 | export PATH="$DMTCP_PATH/bin/:$PATH" 72 | 73 | start_coordinator # -i 120 ... 74 | 75 | echo "/bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT --hostfile $HOSTFILE" 76 | 77 | /bin/bash ./dmtcp_restart_script.sh -h $DMTCP_COORD_HOST -p $DMTCP_COORD_PORT --hostfile $HOSTFILE 78 | 79 | rm $HOSTFILE 80 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2_files/count.sh_99078: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | count=0 4 | echo "$count" > count.log 5 | while [ 1 -eq 1 ]; do 6 | count=$(($count+1)) 7 | sleep 10 8 | echo "$count" >> count.log 9 | done 10 | 11 | echo "Completed $count" 12 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2_files/fd-info.txt: -------------------------------------------------------------------------------- 1 | count.sh_99078:/bigdata/operations/jhayes/Projects/iigb/slurm/checkpoint/dmtcp/ucr-hpcc/single/count.sh 2 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-105000-5b7f491c.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-105000-5b7f491c.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-110000-5b7f4957.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-110000-5b7f4957.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-115000-5b7f4994.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-115000-5b7f4994.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-120000-5b7f49d0.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-120000-5b7f49d0.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-125000-5b7f4a0c.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-125000-5b7f4a0c.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-130000-5b7f4a49.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-130000-5b7f4a49.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/checkpoint/dmtcp/ucr-hpcc/single/ckpts/ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/ckpts/dmtcp_restart_script_afc8ad-40000-3ea0c918c3d461.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -m # turn on job control 4 | 5 | #This script launches all the restarts in the background. 6 | #Suggestions for editing: 7 | # 1. For those processes executing on the localhost, remove 8 | # 'ssh from the start of the line. 9 | # 2. If using ssh, verify that ssh does not require passwords or other 10 | # prompts. 11 | # 3. Verify that the dmtcp_restart command is in your path on all hosts, 12 | # otherwise set the dmt_rstr_cmd appropriately. 13 | # 4. Verify DMTCP_COORD_HOST and DMTCP_COORD_PORT match the location of 14 | # the dmtcp_coordinator. If necessary, add 15 | # 'DMTCP_COORD_PORT=' after 16 | # 'DMTCP_COORD_HOST=<...>'. 17 | # 5. Remove the '&' from a line if that process reads STDIN. 18 | # If multiple processes read STDIN then prefix the line with 19 | # 'xterm -hold -e' and put '&' at the end of the line. 20 | # 6. Processes on same host can be restarted with single dmtcp_restart 21 | # command. 22 | 23 | 24 | check_local() 25 | { 26 | worker_host=$1 27 | unset is_local_node 28 | worker_ip=$(gethostip -d $worker_host 2> /dev/null) 29 | if [ -z "$worker_ip" ]; then 30 | worker_ip=$(nslookup $worker_host | grep -A1 'Name:' | grep 'Address:' | sed -e 's/Address://' -e 's/ //' -e 's/ //') 31 | fi 32 | if [ -z "$worker_ip" ]; then 33 | worker_ip=$(getent ahosts $worker_host |grep "^[0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+ *STREAM" | cut -d' ' -f1) 34 | fi 35 | if [ -z "$worker_ip" ]; then 36 | echo Could not find ip-address for $worker_host. Exiting... 37 | exit 1 38 | fi 39 | ifconfig_path=$(which ifconfig) 40 | if [ -z "$ifconfig_path" ]; then 41 | ifconfig_path="/sbin/ifconfig" 42 | fi 43 | output=$($ifconfig_path -a | grep "inet addr:.*${worker_ip} .*Bcast") 44 | if [ -n "$output" ]; then 45 | is_local_node=1 46 | else 47 | is_local_node=0 48 | fi 49 | } 50 | 51 | 52 | pass_slurm_helper_contact() 53 | { 54 | LOCAL_FILES="$1" 55 | # Create temp directory if needed 56 | if [ -n "$DMTCP_TMPDIR" ]; then 57 | CURRENT_TMPDIR=$DMTCP_TMPDIR/dmtcp-`whoami`@`hostname` 58 | elif [ -n "$TMPDIR" ]; then 59 | CURRENT_TMPDIR=$TMPDIR/dmtcp-`whoami`@`hostname` 60 | else 61 | CURRENT_TMPDIR=/tmp/dmtcp-`whoami`@`hostname` 62 | fi 63 | if [ ! -d "$CURRENT_TMPDIR" ]; then 64 | mkdir -p $CURRENT_TMPDIR 65 | fi 66 | # Create files with SLURM environment 67 | for CKPT_FILE in $LOCAL_FILES; do 68 | SUFFIX=${CKPT_FILE%%.dmtcp} 69 | SLURM_ENV_FILE=$CURRENT_TMPDIR/slurm_env_${SUFFIX##*_} 70 | echo "DMTCP_SRUN_HELPER_ADDR=$DMTCP_SRUN_HELPER_ADDR" >> $SLURM_ENV_FILE 71 | done 72 | } 73 | 74 | 75 | usage_str='USAGE: 76 | dmtcp_restart_script.sh [OPTIONS] 77 | 78 | OPTIONS: 79 | --coord-host, -h, (environment variable DMTCP_COORD_HOST): 80 | Hostname where dmtcp_coordinator is running 81 | --coord-port, -p, (environment variable DMTCP_COORD_PORT): 82 | Port where dmtcp_coordinator is running 83 | --hostfile : 84 | Provide a hostfile (One host per line, "#" indicates comments) 85 | --ckptdir, -d, (environment variable DMTCP_CHECKPOINT_DIR): 86 | Directory to store checkpoint images 87 | (default: use the same directory used in previous checkpoint) 88 | --restartdir, -d, (environment variable DMTCP_RESTART_DIR): 89 | Directory to read checkpoint images from 90 | --tmpdir, -t, (environment variable DMTCP_TMPDIR): 91 | Directory to store temporary files (default: $TMDPIR or /tmp) 92 | --no-strict-checking: 93 | Disable uid checking for checkpoint image. This allows the 94 | checkpoint image to be restarted by a different user than the one 95 | that created it. And suppress warning about running as root. 96 | (environment variable DMTCP_DISABLE_STRICT_CHECKING) 97 | --interval, -i, (environment variable DMTCP_CHECKPOINT_INTERVAL): 98 | Time in seconds between automatic checkpoints 99 | (Default: Use pre-checkpoint value) 100 | --coord-logfile PATH (environment variable DMTCP_COORD_LOG_FILENAME 101 | Coordinator will dump its logs to the given file 102 | --help: 103 | Print this message and exit.' 104 | 105 | 106 | ckpt_timestamp="Thu Aug 23 17:00:13 2018" 107 | 108 | remote_shell_cmd="ssh" 109 | 110 | coord_host=$DMTCP_COORD_HOST 111 | if test -z "$DMTCP_COORD_HOST"; then 112 | coord_host=i11 113 | fi 114 | 115 | coord_port=$DMTCP_COORD_PORT 116 | if test -z "$DMTCP_COORD_PORT"; then 117 | coord_port=46172 118 | fi 119 | 120 | checkpoint_interval=$DMTCP_CHECKPOINT_INTERVAL 121 | if test -z "$DMTCP_CHECKPOINT_INTERVAL"; then 122 | checkpoint_interval=60 123 | fi 124 | export DMTCP_CHECKPOINT_INTERVAL=${checkpoint_interval} 125 | 126 | if [ $# -gt 0 ]; then 127 | while [ $# -gt 0 ] 128 | do 129 | if [ $1 = "--help" ]; then 130 | echo "$usage_str" 131 | exit 132 | elif [ $# -ge 1 ]; then 133 | case "$1" in 134 | --coord-host|--host|-h) 135 | coord_host="$2" 136 | shift; shift;; 137 | --coord-port|--port|-p) 138 | coord_port="$2" 139 | shift; shift;; 140 | --coord-logfile) 141 | DMTCP_COORD_LOGFILE="$2" 142 | shift; shift;; 143 | --hostfile) 144 | hostfile="$2" 145 | if [ ! -f "$hostfile" ]; then 146 | echo "ERROR: hostfile $hostfile not found" 147 | exit 148 | fi 149 | shift; shift;; 150 | --restartdir|-d) 151 | DMTCP_RESTART_DIR=$2 152 | shift; shift;; 153 | --ckptdir|-d) 154 | DMTCP_CKPT_DIR=$2 155 | shift; shift;; 156 | --tmpdir|-t) 157 | DMTCP_TMPDIR=$2 158 | shift; shift;; 159 | --no-strict-checking) 160 | noStrictChecking="--no-strict-checking" 161 | shift;; 162 | --interval|-i) 163 | checkpoint_interval=$2 164 | shift; shift;; 165 | *) 166 | echo "$0: unrecognized option '$1'. See correct usage below" 167 | echo "$usage_str" 168 | exit;; 169 | esac 170 | elif [ $1 = "--help" ]; then 171 | echo "$usage_str" 172 | exit 173 | else 174 | echo "$0: Incorrect usage. See correct usage below" 175 | echo 176 | echo "$usage_str" 177 | exit 178 | fi 179 | done 180 | fi 181 | 182 | dmt_rstr_cmd=/bigdata/operations/pkgadmin/opt/linux/centos/7.x/x86_64/pkgs/dmtcp/2.5.2/bin/dmtcp_restart 183 | which $dmt_rstr_cmd > /dev/null 2>&1 || dmt_rstr_cmd=dmtcp_restart 184 | which $dmt_rstr_cmd > /dev/null 2>&1 || echo "$0: $dmt_rstr_cmd not found" 185 | which $dmt_rstr_cmd > /dev/null 2>&1 || exit 1 186 | 187 | # Number of hosts in the computation = 1 188 | # Number of processes in the computation = 2 189 | 190 | # SYNTAX: 191 | # :: :: ... : 192 | # Host names and filenames must not include ':' 193 | # At most one fg (foreground) mode allowed; it must be last. 194 | # 'maybexterm' and 'maybebg' are set from . 195 | worker_ckpts=' 196 | :: i11 :bg: ckpts/ckpt_sleep_afc8ad-135000-5b7f4a85.dmtcp ckpts/ckpt_bash_afc8ad-40000-3ea0c9317bade2.dmtcp : ssh 197 | ' 198 | 199 | # Check for resource manager 200 | ibrun_path=$(which ibrun 2> /dev/null) 201 | if [ ! -n "$ibrun_path" ]; then 202 | discover_rm_path=$(which dmtcp_discover_rm) 203 | if [ -n "$discover_rm_path" ]; then 204 | eval $(dmtcp_discover_rm -t) 205 | srun_path=$(which srun 2> /dev/null) 206 | llaunch=`which dmtcp_rm_loclaunch` 207 | if [ $RES_MANAGER = "SLURM" ] && [ -n "$srun_path" ]; then 208 | eval $(dmtcp_discover_rm -n "$worker_ckpts") 209 | if [ -n "$DMTCP_DISCOVER_RM_ERROR" ]; then 210 | echo "Restart error: $DMTCP_DISCOVER_RM_ERROR" 211 | echo "Allocated resources: $manager_resources" 212 | exit 0 213 | fi 214 | export DMTCP_REMLAUNCH_NODES=$DMTCP_REMLAUNCH_NODES 215 | bound=$(($DMTCP_REMLAUNCH_NODES - 1)) 216 | srun_nnodes=0 217 | srun_ntasks=0 218 | for i in $(seq 0 $bound); do 219 | eval "val=\${DMTCP_REMLAUNCH_${i}_SLOTS}" 220 | #skip allocated-but-not-used nodes (dmtcp_discover_rm returns 0) 221 | test "$val" = "0" && continue 222 | srun_nnodes=$(( $srun_nnodes + 1 )) 223 | export DMTCP_REMLAUNCH_${i}_SLOTS="$val" 224 | bound2=$(($val - 1)) 225 | for j in $(seq 0 $bound2); do 226 | srun_ntasks=$(( $srun_ntasks + 1 )) 227 | eval "ckpts=\${DMTCP_REMLAUNCH_${i}_${j}}" 228 | export DMTCP_REMLAUNCH_${i}_${j}="$ckpts" 229 | done 230 | done 231 | if [ "$DMTCP_DISCOVER_PM_TYPE" = "HYDRA" ]; then 232 | export DMTCP_SRUN_HELPER_SYNCFILE=`mktemp ./tmp.XXXXXXXXXX` 233 | rm $DMTCP_SRUN_HELPER_SYNCFILE 234 | dmtcp_srun_helper -r $srun_path "$llaunch" 235 | if [ ! -f $DMTCP_SRUN_HELPER_SYNCFILE ]; then 236 | echo "Error launching application" 237 | exit 1 238 | fi 239 | # export helper contact info 240 | . $DMTCP_SRUN_HELPER_SYNCFILE 241 | pass_slurm_helper_contact "$DMTCP_LAUNCH_CKPTS" 242 | rm $DMTCP_SRUN_HELPER_SYNCFILE 243 | dmtcp_restart --join-coordinator --coord-host $DMTCP_COORD_HOST --coord-port $DMTCP_COORD_PORT $DMTCP_LAUNCH_CKPTS 244 | else 245 | DMTCP_REMLAUNCH_0_0="$DMTCP_REMLAUNCH_0_0 $DMTCP_LAUNCH_CKPTS" 246 | $srun_path --nodes=$srun_nnodes --ntasks=$srun_ntasks "$llaunch" 247 | fi 248 | exit 0 249 | elif [ $RES_MANAGER = "TORQUE" ]; then 250 | #eval $(dmtcp_discover_rm "$worker_ckpts") 251 | #if [ -n "$new_worker_ckpts" ]; then 252 | # worker_ckpts="$new_worker_ckpts" 253 | #fi 254 | eval $(dmtcp_discover_rm -n "$worker_ckpts") 255 | if [ -n "$DMTCP_DISCOVER_RM_ERROR" ]; then 256 | echo "Restart error: $DMTCP_DISCOVER_RM_ERROR" 257 | echo "Allocated resources: $manager_resources" 258 | exit 0 259 | fi 260 | arguments="PATH=$PATH DMTCP_COORD_HOST=$DMTCP_COORD_HOST DMTCP_COORD_PORT=$DMTCP_COORD_PORT" 261 | arguments=$arguments" DMTCP_CHECKPOINT_INTERVAL=$DMTCP_CHECKPOINT_INTERVAL" 262 | arguments=$arguments" DMTCP_TMPDIR=$DMTCP_TMPDIR" 263 | arguments=$arguments" DMTCP_REMLAUNCH_NODES=$DMTCP_REMLAUNCH_NODES" 264 | bound=$(($DMTCP_REMLAUNCH_NODES - 1)) 265 | for i in $(seq 0 $bound); do 266 | eval "val=\${DMTCP_REMLAUNCH_${i}_SLOTS}" 267 | arguments=$arguments" DMTCP_REMLAUNCH_${i}_SLOTS=\"$val\"" 268 | bound2=$(($val - 1)) 269 | for j in $(seq 0 $bound2); do 270 | eval "ckpts=\${DMTCP_REMLAUNCH_${i}_${j}}" 271 | arguments=$arguments" DMTCP_REMLAUNCH_${i}_${j}=\"$ckpts\"" 272 | done 273 | done 274 | pbsdsh -u "$llaunch" "$arguments" 275 | exit 0 276 | fi 277 | fi 278 | fi 279 | 280 | 281 | worker_ckpts_regexp=\ 282 | '[^:]*::[ \t\n]*\([^ \t\n]\+\)[ \t\n]*:\([a-z]\+\):[ \t\n]*\([^:]\+\)[ \t\n]*:\([^:]\+\)' 283 | 284 | worker_hosts=$(\ 285 | echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/\1 /g') 286 | restart_modes=$(\ 287 | echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/: \2/g') 288 | ckpt_files_groups=$(\ 289 | echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/: \3/g') 290 | remote_cmd=$(\ 291 | echo $worker_ckpts | sed -e 's/'"$worker_ckpts_regexp"'/: \4/g') 292 | 293 | if [ ! -z "$hostfile" ]; then 294 | worker_hosts=$(\ 295 | cat "$hostfile" | sed -e 's/#.*//' -e 's/[ \t\r]*//' -e '/^$/ d') 296 | fi 297 | 298 | localhost_ckpt_files_group= 299 | 300 | num_worker_hosts=$(echo $worker_hosts | wc -w) 301 | 302 | maybejoin= 303 | if [ "$num_worker_hosts" != "1" ]; then 304 | maybejoin='--join-coordinator' 305 | fi 306 | 307 | for worker_host in $worker_hosts 308 | do 309 | 310 | ckpt_files_group=$(\ 311 | echo $ckpt_files_groups | sed -e 's/[^:]*:[ \t\n]*\([^:]*\).*/\1/') 312 | ckpt_files_groups=$(echo $ckpt_files_groups | sed -e 's/[^:]*:[^:]*//') 313 | 314 | mode=$(echo $restart_modes | sed -e 's/[^:]*:[ \t\n]*\([^:]*\).*/\1/') 315 | restart_modes=$(echo $restart_modes | sed -e 's/[^:]*:[^:]*//') 316 | 317 | remote_shell_cmd=$(echo $remote_cmd | sed -e 's/[^:]*:[ \t\n]*\([^:]*\).*/\1/') 318 | remote_cmd=$(echo $remote_cmd | sed -e 's/[^:]*:[^:]*//') 319 | 320 | maybexterm= 321 | maybebg= 322 | case $mode in 323 | bg) maybebg='bg';; 324 | xterm) maybexterm=xterm;; 325 | fg) ;; 326 | *) echo "WARNING: Unknown Mode";; 327 | esac 328 | 329 | if [ -z "$ckpt_files_group" ]; then 330 | break; 331 | fi 332 | 333 | new_ckpt_files_group="" 334 | for tmp in $ckpt_files_group 335 | do 336 | if [ ! -z "$DMTCP_RESTART_DIR" ]; then 337 | tmp=$DMTCP_RESTART_DIR/$(basename $tmp) 338 | fi 339 | new_ckpt_files_group="$new_ckpt_files_group $tmp" 340 | done 341 | 342 | tmpdir= 343 | if [ ! -z "$DMTCP_TMPDIR" ]; then 344 | tmpdir="--tmpdir $DMTCP_TMPDIR" 345 | fi 346 | 347 | coord_logfile= 348 | if [ ! -z "$DMTCP_COORD_LOGFILE" ]; then 349 | coord_logfile="--coord-logfile $DMTCP_COORD_LOGFILE" 350 | fi 351 | 352 | check_local $worker_host 353 | if [ "$is_local_node" -eq 1 -o "$num_worker_hosts" == "1" ]; then 354 | localhost_ckpt_files_group="$new_ckpt_files_group $localhost_ckpt_files_group" 355 | continue 356 | fi 357 | if [ -z $maybebg ]; then 358 | $maybexterm /usr/bin/$remote_shell_cmd -t "$worker_host" \ 359 | $dmt_rstr_cmd --coord-host "$coord_host" --cord-port "$coord_port"\ 360 | $ckpt_dir --join-coordinator --interval "$checkpoint_interval" $tmpdir \ 361 | $new_ckpt_files_group 362 | else 363 | $maybexterm /usr/bin/$remote_shell_cmd "$worker_host" \ 364 | "/bin/sh -c '$dmt_rstr_cmd --coord-host $coord_host --coord-port $coord_port $coord_logfile\ 365 | $ckpt_dir --join-coordinator --interval "$checkpoint_interval" $tmpdir \ 366 | $new_ckpt_files_group'" & 367 | fi 368 | 369 | done 370 | 371 | if [ -n "$localhost_ckpt_files_group" ]; then 372 | exec $dmt_rstr_cmd --coord-host "$coord_host" --coord-port "$coord_port" $coord_logfile \ 373 | $ckpt_dir $maybejoin --interval "$checkpoint_interval" $tmpdir $noStrictChecking $localhost_ckpt_files_group 374 | fi 375 | 376 | #wait for them all to finish 377 | wait 378 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/count.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | count=0 4 | #echo "$count" > count.log 5 | while [ 1 -eq 1 ]; do 6 | count=$(($count+1)) 7 | sleep 10 8 | echo "$count" #>> count.log 9 | done 10 | 11 | echo "Completed $count" 12 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/slurm_launch.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p short 3 | #SBATCH --job-name=count 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks=2 6 | #SBATCH --time=15:00 7 | #SBATCH --mem=1gb 8 | #SBATCH --output=count.txt 9 | 10 | module load dmtcp 11 | 12 | dmtcp_launch --new-coordinator --ckptdir ckpts --rm --interval 60 ./count.sh 13 | 14 | -------------------------------------------------------------------------------- /checkpoint/dmtcp/ucr-hpcc/single/slurm_rstr.job: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p short 3 | #SBATCH --job-name=count 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks=2 6 | #SBATCH --time=15:00 7 | #SBATCH --mem=1gb 8 | #SBATCH --output=count_rstrt.txt 9 | 10 | # Load modules 11 | module load dmtcp 12 | 13 | dmtcp_restart_script=$(ls -t ckpts/dmtcp_restart_script_*.sh | head -1) 14 | 15 | # Start DMTCP ################ 16 | dmtcp_coordinator --daemon --port 0 --port-file /tmp/port 17 | export DMTCP_COORD_HOST=`hostname` 18 | export DMTCP_COORD_PORT=$( NOTE: This assumes you already know the basics of job managment. 4 | > If not then please take the time to read through [HPC Cluster Jobs](https://hpcc.ucr.edu/manuals/hpc_cluster/jobs/). 5 | > And/Or review the `Intro to HPCC` video from our [Events](https://hpcc.ucr.edu/events/small/) page. 6 | 7 | Running Dedalus on the cluster is similar to any other software, you need to create a job submission script that acts as a wrapper for your Python code. 8 | Look at the [job.sh](ivp_2d_rayleigh_benard/job.sh) submission script as an example. 9 | 10 | You can see that when running Dedalus on the cluster you need to use mpiexec, however you do not need to pass the number of parallel proceses to `mpiexec`, since this is determined by your `Slurm` resouce request. 11 | 12 | For example, we run Dedalus Python code, like so: 13 | 14 | ```bash 15 | mpiexec python3 rayleigh_benard.py 16 | ``` 17 | 18 | Notice the omission of the `-n` flag above, compared to the below: 19 | 20 | ```bash 21 | mpiexec -n 4 python3 rayleigh_benard.py 22 | ``` 23 | 24 | We do not need the `-n` MPI flag becuase our version of `OpenMPI` is compiled against `Slurm`. 25 | Thus, the `--ntasks` or `-n` Slurm flag determines the number of parallel MPI processes and is automatically passed to `mpiexec`. 26 | 27 | To scale this to more parallel processes, we just increase the number of tasks: 28 | 29 | ``` 30 | #SBATCH --ntasks=32 31 | ``` 32 | 33 | Since most nodes are capaible of 64 parallel processes we could request up to 64 tasks max. 34 | However, requesting more than 32 tasks may increase the queue wait time, the trade off here is up to you. 35 | 36 | If you need more than 64 parallel processes, you can increase the number of nodes: 37 | 38 | ```bash 39 | #SBATCH -N 2 40 | #SBATCH --ntasks=64 41 | ``` 42 | 43 | The above would request 64 total parallel processes, but it would distribute them across 2 nodes. 44 | 45 | The maximum number of parallel processes you can request is determined by your Slurm limits. 46 | You can check your Slurm limits with the following command: 47 | 48 | ```bash 49 | slurm_limits 50 | ``` 51 | 52 | Look for the `cpu=X` notation corresponding to the partition you are submitting to. 53 | Where `X` is the maximum number of parallel processes for your account on a particular parition. 54 | 55 | Lastly, do not forget to disable threading before calling `mpiexec`: 56 | 57 | ```bash 58 | export OMP_NUM_THREADS=1 59 | ``` 60 | -------------------------------------------------------------------------------- /dedalus/ivp_2d_rayleigh_benard/job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -N 1 4 | #SBATCH -n 4 5 | #SBATCH -c 1 6 | #SBATCH -p short 7 | #SBATCH --mem=50gb 8 | 9 | # Load dedalus 10 | module load dedalus 11 | 12 | # Disable threading 13 | export OMP_NUM_THREADS=1 14 | 15 | # Run dedalus code 16 | mpiexec python3 rayleigh_benard.py 17 | mpiexec python3 plot_snapshots.py snapshots/*.h5 18 | -------------------------------------------------------------------------------- /dedalus/ivp_2d_rayleigh_benard/plot_snapshots.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plot 2D cartesian snapshots. 3 | 4 | Usage: 5 | plot_snapshots.py ... [--output=] 6 | 7 | Options: 8 | --output= Output directory [default: ./frames] 9 | 10 | """ 11 | 12 | import h5py 13 | import numpy as np 14 | import matplotlib 15 | matplotlib.use('Agg') 16 | import matplotlib.pyplot as plt 17 | from dedalus.extras import plot_tools 18 | 19 | 20 | def main(filename, start, count, output): 21 | """Save plot of specified tasks for given range of analysis writes.""" 22 | 23 | # Plot settings 24 | tasks = ['buoyancy', 'vorticity'] 25 | scale = 1.5 26 | dpi = 200 27 | title_func = lambda sim_time: 't = {:.3f}'.format(sim_time) 28 | savename_func = lambda write: 'write_{:06}.png'.format(write) 29 | 30 | # Layout 31 | nrows, ncols = 2, 1 32 | image = plot_tools.Box(4, 1) 33 | pad = plot_tools.Frame(0.3, 0, 0, 0) 34 | margin = plot_tools.Frame(0.2, 0.1, 0, 0) 35 | 36 | # Create multifigure 37 | mfig = plot_tools.MultiFigure(nrows, ncols, image, pad, margin, scale) 38 | fig = mfig.figure 39 | 40 | # Plot writes 41 | with h5py.File(filename, mode='r') as file: 42 | for index in range(start, start+count): 43 | for n, task in enumerate(tasks): 44 | # Build subfigure axes 45 | i, j = divmod(n, ncols) 46 | axes = mfig.add_axes(i, j, [0, 0, 1, 1]) 47 | # Call 3D plotting helper, slicing in time 48 | dset = file['tasks'][task] 49 | plot_tools.plot_bot_3d(dset, 0, index, axes=axes, title=task, even_scale=True, visible_axes=False) 50 | # Add time title 51 | title = title_func(file['scales/sim_time'][index]) 52 | title_height = 1 - 0.5 * mfig.margin.top / mfig.fig.y 53 | fig.suptitle(title, x=0.44, y=title_height, ha='left') 54 | # Save figure 55 | savename = savename_func(file['scales/write_number'][index]) 56 | savepath = output.joinpath(savename) 57 | fig.savefig(str(savepath), dpi=dpi) 58 | fig.clear() 59 | plt.close(fig) 60 | 61 | 62 | if __name__ == "__main__": 63 | 64 | import pathlib 65 | from docopt import docopt 66 | from dedalus.tools import logging 67 | from dedalus.tools import post 68 | from dedalus.tools.parallel import Sync 69 | 70 | args = docopt(__doc__) 71 | 72 | output_path = pathlib.Path(args['--output']).absolute() 73 | # Create output directory if needed 74 | with Sync() as sync: 75 | if sync.comm.rank == 0: 76 | if not output_path.exists(): 77 | output_path.mkdir() 78 | post.visit_writes(args[''], main, output=output_path) 79 | 80 | -------------------------------------------------------------------------------- /dedalus/ivp_2d_rayleigh_benard/rayleigh_benard.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dedalus script simulating 2D horizontally-periodic Rayleigh-Benard convection. 3 | This script demonstrates solving a 2D cartesian initial value problem. It can 4 | be ran serially or in parallel, and uses the built-in analysis framework to save 5 | data snapshots to HDF5 files. The `plot_snapshots.py` script can be used to 6 | produce plots from the saved data. It should take about a cpu-minute to run. 7 | 8 | The problem is non-dimensionalized using the box height and freefall time, so 9 | the resulting thermal diffusivity and viscosity are related to the Prandtl 10 | and Rayleigh numbers as: 11 | 12 | kappa = (Rayleigh * Prandtl)**(-1/2) 13 | nu = (Rayleigh / Prandtl)**(-1/2) 14 | 15 | For incompressible hydro with two boundaries, we need two tau terms for each the 16 | velocity and buoyancy. Here we choose to use a first-order formulation, putting 17 | one tau term each on auxiliary first-order gradient variables and the others in 18 | the PDE, and lifting them all to the first derivative basis. This formulation puts 19 | a tau term in the divergence constraint, as required for this geometry. 20 | 21 | To run and plot using e.g. 4 processes: 22 | $ mpiexec -n 4 python3 rayleigh_benard.py 23 | $ mpiexec -n 4 python3 plot_snapshots.py snapshots/*.h5 24 | """ 25 | 26 | import numpy as np 27 | import dedalus.public as d3 28 | import logging 29 | logger = logging.getLogger(__name__) 30 | 31 | 32 | # Parameters 33 | Lx, Lz = 4, 1 34 | Nx, Nz = 256, 64 35 | Rayleigh = 2e6 36 | Prandtl = 1 37 | dealias = 3/2 38 | stop_sim_time = 50 39 | timestepper = d3.RK222 40 | max_timestep = 0.125 41 | dtype = np.float64 42 | 43 | # Bases 44 | coords = d3.CartesianCoordinates('x', 'z') 45 | dist = d3.Distributor(coords, dtype=dtype) 46 | xbasis = d3.RealFourier(coords['x'], size=Nx, bounds=(0, Lx), dealias=dealias) 47 | zbasis = d3.ChebyshevT(coords['z'], size=Nz, bounds=(0, Lz), dealias=dealias) 48 | 49 | # Fields 50 | p = dist.Field(name='p', bases=(xbasis,zbasis)) 51 | b = dist.Field(name='b', bases=(xbasis,zbasis)) 52 | u = dist.VectorField(coords, name='u', bases=(xbasis,zbasis)) 53 | tau_p = dist.Field(name='tau_p') 54 | tau_b1 = dist.Field(name='tau_b1', bases=xbasis) 55 | tau_b2 = dist.Field(name='tau_b2', bases=xbasis) 56 | tau_u1 = dist.VectorField(coords, name='tau_u1', bases=xbasis) 57 | tau_u2 = dist.VectorField(coords, name='tau_u2', bases=xbasis) 58 | 59 | # Substitutions 60 | kappa = (Rayleigh * Prandtl)**(-1/2) 61 | nu = (Rayleigh / Prandtl)**(-1/2) 62 | x, z = dist.local_grids(xbasis, zbasis) 63 | ex, ez = coords.unit_vector_fields(dist) 64 | lift_basis = zbasis.derivative_basis(1) 65 | lift = lambda A: d3.Lift(A, lift_basis, -1) 66 | grad_u = d3.grad(u) + ez*lift(tau_u1) # First-order reduction 67 | grad_b = d3.grad(b) + ez*lift(tau_b1) # First-order reduction 68 | 69 | # Problem 70 | # First-order form: "div(f)" becomes "trace(grad_f)" 71 | # First-order form: "lap(f)" becomes "div(grad_f)" 72 | problem = d3.IVP([p, b, u, tau_p, tau_b1, tau_b2, tau_u1, tau_u2], namespace=locals()) 73 | problem.add_equation("trace(grad_u) + tau_p = 0") 74 | problem.add_equation("dt(b) - kappa*div(grad_b) + lift(tau_b2) = - u@grad(b)") 75 | problem.add_equation("dt(u) - nu*div(grad_u) + grad(p) - b*ez + lift(tau_u2) = - u@grad(u)") 76 | problem.add_equation("b(z=0) = Lz") 77 | problem.add_equation("u(z=0) = 0") 78 | problem.add_equation("b(z=Lz) = 0") 79 | problem.add_equation("u(z=Lz) = 0") 80 | problem.add_equation("integ(p) = 0") # Pressure gauge 81 | 82 | # Solver 83 | solver = problem.build_solver(timestepper) 84 | solver.stop_sim_time = stop_sim_time 85 | 86 | # Initial conditions 87 | b.fill_random('g', seed=42, distribution='normal', scale=1e-3) # Random noise 88 | b['g'] *= z * (Lz - z) # Damp noise at walls 89 | b['g'] += Lz - z # Add linear background 90 | 91 | # Analysis 92 | snapshots = solver.evaluator.add_file_handler('snapshots', sim_dt=0.25, max_writes=50) 93 | snapshots.add_task(b, name='buoyancy') 94 | snapshots.add_task(-d3.div(d3.skew(u)), name='vorticity') 95 | 96 | # CFL 97 | CFL = d3.CFL(solver, initial_dt=max_timestep, cadence=10, safety=0.5, threshold=0.05, 98 | max_change=1.5, min_change=0.5, max_dt=max_timestep) 99 | CFL.add_velocity(u) 100 | 101 | # Flow properties 102 | flow = d3.GlobalFlowProperty(solver, cadence=10) 103 | flow.add_property(np.sqrt(u@u)/nu, name='Re') 104 | 105 | # Main loop 106 | startup_iter = 10 107 | try: 108 | logger.info('Starting main loop') 109 | while solver.proceed: 110 | timestep = CFL.compute_timestep() 111 | solver.step(timestep) 112 | if (solver.iteration-1) % 10 == 0: 113 | max_Re = flow.max('Re') 114 | logger.info('Iteration=%i, Time=%e, dt=%e, max(Re)=%f' %(solver.iteration, solver.sim_time, timestep, max_Re)) 115 | except: 116 | logger.error('Exception raised, triggering end of main loop.') 117 | raise 118 | finally: 119 | solver.log_stats() 120 | 121 | -------------------------------------------------------------------------------- /depend/README.md: -------------------------------------------------------------------------------- 1 | # Basic Example 2 | 3 | ``` 4 | sbatch --parsable first_job.sh 5 | 5383495 6 | sbatch --dependency=after:5383495 second_job.sh 7 | ``` 8 | 9 | # Scripted Example 10 | One way to script dependencies is to nest submissions (a job submitting a job): 11 | 12 | ```bash 13 | sbatch first_job.sh 14 | ``` 15 | 16 | Contents of `first_job.sh`: 17 | 18 | ```bash 19 | #!/bin/bash 20 | #SBATCH -p short 21 | #SBATCH --mem=1G 22 | #SBATCH --ntasks=1 23 | 24 | sbatch -p short --mem=1G --ntasks=1 --dependency=after:$SLURM_JOB_ID second_job.sh 25 | 26 | # Do some work 27 | sleep 60 28 | 29 | ``` 30 | 31 | # Complex Example 32 | This example is a simple linear chain of dependancies (max 3 jobs): 33 | 34 | ```batch 35 | sbatch test_job.sh 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /depend/slurm-180.out: -------------------------------------------------------------------------------- 1 | hi from 180 2 | submitted 181 3 | 180 done 4 | -------------------------------------------------------------------------------- /depend/slurm-181.out: -------------------------------------------------------------------------------- 1 | hi from 181 2 | submitted 182 3 | 181 done 4 | -------------------------------------------------------------------------------- /depend/slurm-182.out: -------------------------------------------------------------------------------- 1 | hi from 182 2 | 182 done 3 | -------------------------------------------------------------------------------- /depend/test_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks=1 4 | #SBATCH --mem-per-cpu=1G 5 | #SBATCH --time=0-00:15:00 # 15 minutes 6 | ##SBATCH --output=my.stdout 7 | ##SBATCH --mail-user=jhayes@ucr.edu 8 | ##SBATCH --mail-type=ALL 9 | ##SBATCH --job-name="just_a_test" 10 | 11 | : ${job_number:="1"} # set job_nubmer to 1 if it is undefined 12 | job_number_max=3 13 | 14 | echo "hi from ${SLURM_JOB_ID}" 15 | 16 | if [[ ${job_number} -lt ${job_number_max} ]] 17 | then 18 | (( job_number++ )) 19 | next_jobid=$(sbatch --export=job_number=${job_number} -d afterok:${SLURM_JOB_ID} test_job.sh | awk '{print $4}') 20 | echo "submitted ${next_jobid}" 21 | fi 22 | 23 | sleep 15 24 | echo "${SLURM_JOB_ID} done" 25 | -------------------------------------------------------------------------------- /espresso/README.md: -------------------------------------------------------------------------------- 1 | ## Espresso 2 | 3 | 4 | Submission scripts found here are for the openmpi complied version of espresso (espresso.sh) and intel complied version of espresso (espresso_intel.sh) 5 | 6 | How to run them: 7 | 8 | ```bash 9 | sbatch espresso.sh 10 | ``` 11 | 12 | ```bash 13 | sbatch espresso_intel.sh 14 | ``` 15 | 16 | 17 | ### Example input file 18 | 19 | input_file.in 20 | 21 | * you would have to edit the pseudo_dir directive to match your folders 22 | 23 | ```bash 24 | &control 25 | calculation = 'vc-relax' 26 | outdir = '_work' 27 | pseudo_dir = '/rhome/forsythc/bigdata/example-repos/qe/psp' 28 | prefix = 'pref' 29 | / 30 | &system 31 | ibrav = 0 32 | nat = 1 33 | ntyp = 1 34 | ecutwfc = 100 35 | occupations = 'smearing' 36 | smearing = 'fermi-dirac' 37 | degauss = 0.030 38 | 39 | / 40 | &electrons 41 | conv_thr = 1.0d-8 42 | mixing_mode= 'plain' 43 | diagonalization = 'david' 44 | / 45 | &ions 46 | ion_dynamics = 'bfgs' ! default 47 | / 48 | &cell 49 | cell_dynamics = 'bfgs' ! default 50 | press_conv_thr = 0.5D0 ! default 51 | / 52 | ATOMIC_SPECIES 53 | Cu 63.546 Cu_pseudo_dojo__oncv_lda.upf 54 | CELL_PARAMETERS angstrom 55 | 2.60 0.00 0.00 56 | 0.00 2.60 0.00 57 | 0.00 0.00 2.60 58 | ATOMIC_POSITIONS crystal 59 | Cu 0.000 0.000 0.000 60 | K_POINTS automatic 61 | 6 6 6 0 0 0 62 | ``` 63 | -------------------------------------------------------------------------------- /espresso/espresso.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks=6 4 | #SBATCH --mem-per-cpu=1G 5 | 6 | export OMP_NUM_THREADS=1 7 | 8 | module load espresso/6.3 9 | 10 | mpirun pw.x -in input_file.in >& output_file.out 11 | -------------------------------------------------------------------------------- /espresso/espresso_intel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks=6 4 | #SBATCH --mem-per-cpu=1G 5 | 6 | 7 | export OMP_NUM_THREADS=1 8 | 9 | module load espresso/6.3_intel 10 | module load intel 11 | 12 | mpirun pw.x -in input_file.in >& output_file.out 13 | -------------------------------------------------------------------------------- /folddock/README.md: -------------------------------------------------------------------------------- 1 | # Folddock 2 | 3 | ## Installation 4 | You will need to clone the folddock using 5 | ``` 6 | git clone https://gitlab.com/ElofssonLab/FoldDock.git 7 | ``` 8 | ## Running 9 | ### Databases and Input information 10 | Due to optimized MSA usage in the **FoldDock** protocol, it is sufficient to run only two iterations of HHblits against uniclust30_2018_08 with the options: -E 0.001 -all -oa3m -n 2 11 | 12 | Get Uniclust30 here: http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/ 13 | 14 | We recommend running this step on CPU and the actual folding on GPU. 15 | 16 | 1. For each of the two chains, run HHblits against Uniclust30 using 17 | ``` 18 | # Load hhsuite 19 | module load hhsuite 20 | 21 | FASTAFILE=#Path to fasta file of chain 22 | UNICLUST30=#Path to Uniclust30 23 | OUTNAME="CHAINID.a3m" 24 | hhblits -i $FASTAFILE -d $UNICLUST30 -E 0.001 -all -oa3m $OUTNAME 25 | ``` 26 | 2. Create two input MSAs (paired and fused) from the HHblits results for each chain 27 | 28 | **Paired** 29 | ``` 30 | A3M1=#Path to a3m from chain 1 (from step 1) 31 | A3M2=#Path to a3m from chain 2 (from step 1) 32 | MGF=0.9 #The max gap fraction allowed in the sequences 33 | OUTNAME="CHAINID1-CHAINID2_paired.a3m" 34 | python3 ./data/marks/hhblits/oxmatch.py --a3m1 $A3M1 --a3m2 $A3M2 --max_gap_fraction $MGF --outname $OUTNAME 35 | ``` 36 | 37 | **Fused** 38 | ``` 39 | A3M1=#Path to a3m from chain 1 (from step 1) 40 | A3M2=#Path to a3m from chain 2 (from step 1) 41 | MGF=0.9 #The max gap fraction allowed in the sequences 42 | OUTNAME="CHAINID1-CHAINID2_fused.a3m" 43 | python3 ./data/marks/hhblits/fuse_msas.py --a3m1 $A3M1 --a3m2 $A3M2 --max_gap_fraction $MGF --outname $OUTNAME 44 | ``` 45 | 46 | ## Predicting 47 | 48 | **Chain Break and Fasta** 49 | ``` 50 | CB=100 #Get chain break: Length of chain 1 51 | # E.g. seq1='AAA', seq2='BBB', catseq=AAABBB (the sequence that should be in the fasta file) and CB=3 52 | FASTAFILE=#Path to file with concatenated fasta sequences. 53 | ``` 54 | 55 | **MSA paths** 56 | ``` 57 | PAIREDMSA=#Path to paired MSA 58 | FUSEDMSA=#Path to fused MSA 59 | MSAS="$PAIREDMSA,$FUSEDMSA" #Comma separated list of msa paths 60 | ``` 61 | 62 | **AF2 CONFIGURATION** 63 | ``` 64 | # This is inside the folder of FoldDock that you clone in the Installation section 65 | AFHOME='./Alphafold2/alphafold/' # Path of alphafold directory in FoldDock 66 | PARAM=#Path to AF2 params 67 | OUTFOLDER=# Path where AF2 generates its output folder structure 68 | 69 | PRESET='full_dbs' #Choose preset model configuration - no ensembling (full_dbs) and (reduced_dbs) or 8 model ensemblings (casp14). 70 | MAX_RECYCLES=10 #max_recycles (default=3) 71 | MODEL_NAME='model_1' 72 | ``` 73 | 74 | **Run AF2** 75 | This step is recommended to run on GPU as the folding will be much more efficient. 76 | NOTE! Depending on your structure, large amounts of RAM may be required 77 | The run mode option here is "--fold_only" 78 | 79 | ``` 80 | cd $AFHOME 81 | 82 | # Load Scratch 83 | module load workspace/scratch 84 | export SINGULARITY_BIND="/scratch:/tmp" 85 | 86 | # Path to directory of supporting data, the databases! 87 | data_dir=/srv/projects/db/alphafold 88 | 89 | # Load Alphafold 90 | module load alphafold 91 | 92 | singularity exec --nv --bind ${data_dir} $ALPHAFOLD_SING \ 93 | python3 $AFHOME/run_alphafold.py \ 94 | --fasta_paths=$FASTAFILE \ 95 | --msas=$MSAS \ 96 | --chain_break_list=$CB \ 97 | --output_dir=$OUTFOLDER \ 98 | --model_names=$MODEL_NAME \ 99 | --data_dir=$PARAM \ 100 | --fold_only \ 101 | --uniref90_database_path='' \ 102 | --mgnify_database_path='' \ 103 | --bfd_database_path='' \ 104 | --uniclust30_database_path='' \ 105 | --pdb70_database_path='' \ 106 | --template_mmcif_dir='' \ 107 | --obsolete_pdbs_path='' \ 108 | --preset=$PRESET \ 109 | --max_recycles=$MAX_RECYCLES 110 | ``` 111 | -------------------------------------------------------------------------------- /galaxy/README.md: -------------------------------------------------------------------------------- 1 | # Galaxy 2 | 3 | Outlined below is how to install Galaxy using conda and then run subsequent jobs. 4 | Please note that differring version of conda may cause issues, please stay with the default `miniconda2`. 5 | 6 | ## Prep 7 | 8 | Since Galaxy can get very large, configure conda to install environments under your bigdata described here [Conda Configure](https://hpcc.ucr.edu/manuals_linux-cluster_package-manage.html#configure). 9 | 10 | ## Request Job 11 | 12 | We need to do the install from a job, so lets request one: 13 | 14 | ```bash 15 | srun -p short -c 4 --mem=10g --pty bash -l 16 | ``` 17 | 18 | ## Install 19 | 20 | Now that we have a job, run the following to install Galaxy: 21 | 22 | ```bash 23 | mkdir -p ~/bigdata/galaxy/ 24 | cd ~/bigdata/galaxy/ 25 | git clone -b release_20.05 https://github.com/galaxyproject/galaxy.git 20.05 26 | cd 20.05 27 | sh scripts/common_startup.sh 28 | exit 29 | ``` 30 | 31 | ## Run Galaxy 32 | 33 | To run we will need to submit a new job, like this: 34 | 35 | ```bash 36 | sbatch -p short -c 4 --mem=10g --wrap='cd ~/bigdata/galaxy/20.05; ./run.sh start; sleep infinity;' 37 | ``` 38 | -------------------------------------------------------------------------------- /gaussian/README.md: -------------------------------------------------------------------------------- 1 | # Gaussian 2 | 3 | Here are various examples of job submission scripts for Gaussian 9 and 16. 4 | 5 | More information regarding general job submission can be found [here](https://hpcc.ucr.edu/manuals/hpc_cluster/jobs/#submitting-jobs). 6 | 7 | ## CPU 8 | 9 | ``` 10 | # Download example 11 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/gaussian/cpu_job.sh 12 | 13 | # Make changes as needed 14 | vim cpu_job.sh 15 | 16 | # Submit job 17 | sbatch cpu_job.sh 18 | ``` 19 | 20 | ## GPU 21 | 22 | For GPU jobs there ere are several hardware configurations: 23 | 24 | | Type | Qty | 25 | ------|------ 26 | | P100 | 2 | 27 | | K80 | 4 | 28 | | K80 | 8 | 29 | 30 | Choose the correct example submission script to match the hardware you wish to use. 31 | 32 | ``` 33 | # Download 2 x P100 example 34 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/gaussian/gpu_2xp100_job.sh 35 | 36 | # Make changes as needed 37 | vim gpu_2xp100_job.sh 38 | 39 | # Submit job 40 | sbatch gpu_2xp100_job.sh 41 | ``` 42 | -------------------------------------------------------------------------------- /gaussian/cpu_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -c 10 4 | #SBATCH --mem=10g 5 | #SBATCH --time=2:00:00 6 | #SBATCH -p short 7 | 8 | # Load software based on CPU 9 | if [[ $(cpu_type) == "intel" ]] || [[ $(cpu_type) == "xeon" ]]; then 10 | module load gaussian/16_AVX2 11 | else 12 | module load gaussian/16_SSE4 13 | fi 14 | 15 | # Set scratch directory 16 | module load workspace/scratch 17 | export GAUSS_SCRDIR=${SCRATCH} 18 | 19 | # Move to working directory 20 | cd ~/bigdata/Projects/gaussian/ 21 | 22 | # Run Gaussian 23 | g16 ch4_opt.gjf 24 | -------------------------------------------------------------------------------- /gaussian/cpu_job_g09.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -c 10 4 | #SBATCH --mem=10g 5 | #SBATCH --time=2:00:00 6 | #SBATCH -p short 7 | 8 | # Load software 9 | gaussian/9_SSE3 10 | 11 | # Set scratch directory 12 | module load workspace/scratch 13 | export GAUSS_SCRDIR=${SCRATCH} 14 | 15 | # Move to working directory 16 | cd ~/bigdata/Projects/gaussian/ 17 | 18 | # Run Gaussian 19 | g09 ch4_opt.gjf 20 | -------------------------------------------------------------------------------- /gaussian/gpu_2xp100_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -c 64 # Request all CPUs, use only: floor(AVAIL_RAM_GB/9) 4 | #SBATCH --mem=180g # Request RAM, calculated by: floor(AVAIL_RAM_GB/9)*9 5 | #SBATCH --time=2-00:00:00 # Run for 2 days 6 | #SBATCH -p gpu # Submit to GPU partition 7 | #SBATCH --gpus=2 # Request 4 GPUs 8 | ##SBATCH --nodelist=gpu01 # Request specific node 9 | #SBATCH --exclude=gpu[01-04] # Exclude heterogeneous nodes 10 | #SBATCH --exclusive # This job gets whole node 11 | 12 | # Load software 13 | module load gaussian/16_AVX2 14 | 15 | # Create temp directory 16 | module load workspace/scratch 17 | export GAUSS_SCRDIR=${SCRATCH} 18 | 19 | # Move to working directory 20 | cd ~/bigdata/Projects/gaussian/gpu/ 21 | 22 | # Run Gaussian on specific CPUs 23 | g16 -c="0-20" -m="189GB" -g="0-1=0,16" ch4_opt.gjf 24 | -------------------------------------------------------------------------------- /gaussian/gpu_4xk80_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -c 32 # Request all CPUs, use only: floor(AVAIL_RAM_GB/9) 4 | #SBATCH --mem=108g # Request RAM, calculated by: floor(AVAIL_RAM_GB/9)*9 5 | #SBATCH --time=2-00:00:00 # Run for 2 days 6 | #SBATCH -p gpu # Submit to GPU partition 7 | #SBATCH --gpus=4 # Request 4 GPUs 8 | ##SBATCH --nodelist=gpu01 # Request specific node 9 | #SBATCH --exclude=gpu[03-05] # Exclude heterogeneous nodes 10 | #SBATCH --exclusive # This job gets whole node 11 | 12 | # Load software 13 | module load gaussian/16_AVX2 14 | 15 | # Use auto temp directory 16 | module load workspace/scratch 17 | export GAUSS_SCRDIR=${SCRATCH} 18 | 19 | # Move to working directory 20 | cd ~/bigdata/Projects/gaussian/gpu/ 21 | 22 | # Run Gaussian on specific CPUs 23 | g16 -c="0-5,8-13" -m="108GB" -g="0-3=1-2,8-9" ch4_opt.gjf 24 | -------------------------------------------------------------------------------- /gaussian/gpu_8xk80_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -c 48 # Request all CPUs, use only: floor(AVAIL_RAM_GB/9) 4 | #SBATCH --mem=432g # Request RAM, calculated by: floor(AVAIL_RAM_GB/9)*9 5 | #SBATCH --time=2-00:00:00 # Run for 2 days 6 | #SBATCH -p gpu # Submit to GPU partition 7 | #SBATCH --gpus=8 # Request 4 GPUs 8 | ##SBATCH --nodelist=gpu01 # Request specific node 9 | #SBATCH --exclude=gpu[01-02],gpu05 # Exclude heterogeneous nodes 10 | #SBATCH --exclusive # This job gets whole node 11 | 12 | # Load software 13 | module load gaussian/16_AVX2 14 | 15 | # Use auto temp directory 16 | module load workspace/scratch 17 | export GAUSS_SCRDIR=${SCRATCH} 18 | 19 | # Move to working directory 20 | cd ~/bigdata/Projects/gaussian/gpu/ 21 | 22 | # Run Gaussian on specific CPUs 23 | g16 -c="0-48" -m="432GB" -g="0-7=0-1,24-25,12-13,36-37" ch4_opt.gjf 24 | -------------------------------------------------------------------------------- /hmmer/README.md: -------------------------------------------------------------------------------- 1 | 2 | # DESCRIPTION 3 | 4 | Show how to use HMMER, and MPI hmmer on the HPCC system 5 | 6 | # EXAMPLES 7 | 8 | Query files are in the `query` directory - there is a script `download.sh` which demonstrates how the data were downloaded from UniProt db. 9 | 10 | All the scripts are in the [pipeline](pipeline) folder so you can browse the working code. 11 | 12 | ## Basic usage for Pfam DB searching 13 | 14 | 1. Run hmmscan of a db of proteins against the Pfam database using the default Pfam, defaults to using the default hmmer software (3.2.1). 15 | 16 | Note that latest version is 3.3 so it is a good idea to also specify a version in your module load to be explicit about the version you want 17 | 18 | ``` 19 | sbatch -p short pipeline/01_hmmscan321_pfam34.sh 20 | ``` 21 | 22 | 2. Run hmmscan as above but use a specific version of HMMer so we can use 3.3 23 | 24 | ``` 25 | sbatch -p short pipeline/01_hmmscan33_pfam34.sh 26 | ``` 27 | 28 | 3. Run hmmscan with an older version of Pfam (eg let's use version 31.0) 29 | 30 | ``` 31 | sbatch -p short pipeline/01_hmmscan33_pfam31.sh 32 | ``` 33 | 34 | If you compare these results you'll see the E-values and result in the `domtbl` files are not different between HMMer versions (that's good!) but if you compare different DB versions values will have changed slighted. 35 | 36 | ``` 37 | # compare the diff Pfam DB versions 38 | diff results/hmmscan33_pfam31.domtbl results/hmmscan33_pfam34.domtbl 39 | 40 | # compare the diff HMMer versions - only things different are the version numbers and date run 41 | diff results/hmmscan321_pfam34.domtbl results/hmmscan33_pfam34.domtbl 42 | ``` 43 | 44 | ## Fetch an HMM 45 | 46 | Let's get a specific HMM module from the DB and also search that HMM against a database of proteins using hmmsearch. 47 | This can be some cut and paste cmdline below 48 | 49 | ``` 50 | module load hmmer/3.3 51 | module load db-pfam/34.0 52 | 53 | hmmfetch $PFAM_DB/Pfam-A.hmm COX1 > COX1.hmm 54 | ``` 55 | 56 | 1. Here's a script which requests a specific HMM from Pfam DB and then searches it against a db of proteins 57 | 58 | ``` 59 | sbatch -p short pipeline/02_hmmsearch_COX1.sh 60 | ``` 61 | 62 | ## Run MPI HMMscan 63 | 64 | Following Sean Eddy's input on ways to take advantage of MPI speedup and ways to maximize fast running of HMMer 65 | http://cryptogenomicon.org/hmmscan-vs-hmmsearch-speed-the-numerology.html 66 | 67 | Here is a script which will startup an MPI job, we are going to run hmmsearch instead of hmmscan and show how MPI can be used. 68 | This example is for a few proteins only, but the real speedup would be seen with a large genome or translated metagenome. 69 | 70 | See the script [pipeline/03_hmmsearch_MPI.sh](pipeline/03_hmmsearch_MPI.sh) for more details but it uses the `srun` command when launching the hmmsearch but the resources requested are in the `#SBATCH` or cmdline requested options which set the number of CPUs to use. 71 | 72 | The current example is a bit of a toy one but if you want to compare you can try running against a large protein DB and see the performance differences to standard multithreaded runs of hmmsearch or hmmscan searches. 73 | ``` 74 | sbatch -p short pipeline/03_hmmsearch_MPI.sh 75 | ``` 76 | 77 | AUTHORS 78 | ====== 79 | Jason Stajich - jason.stajich[AT]ucr.edu 80 | -------------------------------------------------------------------------------- /hmmer/pipeline/01_hmmscan321_pfam34.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmscan321_pfam34.log 4 | 5 | module load hmmer/3.2.1 6 | module load db-pfam/34.0 7 | 8 | mkdir -p results 9 | hmmscan --cut_ga --domtbl results/hmmscan321_pfam34.domtbl $PFAM_DB/Pfam-A.hmm query/query.pep > results/hmmscan321_pfam34.hmmer 10 | -------------------------------------------------------------------------------- /hmmer/pipeline/01_hmmscan33_pfam31.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmscan33_pfam31.log 4 | 5 | module load hmmer/3.3 6 | module load db-pfam/31.0 7 | 8 | mkdir -p results 9 | hmmscan --cut_ga --domtbl results/hmmscan33_pfam31.domtbl $PFAM_DB/Pfam-A.hmm query/query.pep > results/hmmscan33_pfam31.hmmer 10 | -------------------------------------------------------------------------------- /hmmer/pipeline/01_hmmscan33_pfam34.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmscan33_pfam34.log 4 | 5 | module load hmmer/3.3 6 | module load db-pfam/34.0 7 | 8 | mkdir -p results 9 | hmmscan --cut_ga --domtbl results/hmmscan33_pfam34.domtbl $PFAM_DB/Pfam-A.hmm query/query.pep > results/hmmscan33_pfam34.hmmer 10 | -------------------------------------------------------------------------------- /hmmer/pipeline/02_hmmsearch_COX1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | #SBATCH -p short -N 1 -n 4 --mem 2gb --out hmmsearch_COX1.log 4 | 5 | module load hmmer/3.3 6 | module load db-pfam/34.0 7 | 8 | hmmfetch $PFAM_DB/Pfam-A.hmm COX1 > COX1.hmm 9 | mkdir -p results 10 | hmmsearch --cut_ga --domtbl results/hmmsearch_COX1.domtbl COX1.hmm query/query.pep > results/hmmsearch_COX1.hmmer 11 | -------------------------------------------------------------------------------- /hmmer/pipeline/03_hmmsearch_MPI.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | #SBATCH -p short --ntasks 72 --mem 32gb --out hmmsearch_mpi_db.%A.log 4 | 5 | module load hmmer/3.3-mpi 6 | module load db-pfam/34.0 7 | 8 | mkdir -p results 9 | 10 | time srun hmmsearch --mpi --cut_ga --domtbl results/hmmsearch_MPI.domtbl $PFAM_DB/Pfam-A.hmm query/query.pep > results/hmmsearch_MPI.hmmer 11 | -------------------------------------------------------------------------------- /hmmer/query/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | curl -O https://www.uniprot.org/uniprot/Q8RXC8.fasta 3 | curl -O https://www.uniprot.org/uniprot/P49791.fasta 4 | curl -O https://www.uniprot.org/uniprot/A0A2H4MYE5.fasta 5 | 6 | cat *.fasta > query.pep 7 | -------------------------------------------------------------------------------- /hmmer/query/query.pep: -------------------------------------------------------------------------------- 1 | >tr|A0A2H4MYE5|A0A2H4MYE5_9BIVA Cytochrome c oxidase subunit 1 (Fragment) OS=Mytilus sp. BOSL3 OX=2050256 GN=COI PE=3 SV=1 2 | GVWGGLFGASLSLMIRMQLGHPGAVFLKSDWFYNVVVTTHALMMIFFAVMPILIGAFGNW 3 | LIPLLVGGKDMIYPRMNNLSYWLSPNALYLLMLSFSTDKGVGAGWTIYPPLSVYPYHSGP 4 | SMDVLIVSLHLAGLSSLVGAINFASTNKNMPVLEMKGERAELYVLSISVTAVLLIISIPV 5 | LGGGITMILFDRNFNT 6 | >sp|P49791|NU153_RAT Nuclear pore complex protein Nup153 OS=Rattus norvegicus OX=10116 GN=Nup153 PE=1 SV=1 7 | MASGAGGIGGGGGGGKIRTRRCHQGPVKPYQQGRPQHQGILSRVTESVKNIVPGWLQRYF 8 | NKSENACSCSVNADEVPRWPENREDEREIYVDENTNTDDGRTTPEPTGSNTEEPSTTSTA 9 | SNYPDVLTRPSLHRSHLNFSVLESPALHCQPSTSSAFPIGSSGFSLVKEIKDSTSQHDDD 10 | NISTTSGFSSRASEKDIAVSKNTSLPPLWSPEAERSHSLSQHTAISSKKPAFNLSAFGTL 11 | STSLGNSSILKTSQLGDSPFYPGKTTYGGAAAAVRQNKVRSTPYQAPVRRQMKAKQLNAQ 12 | SYGVTSSTARRILQSLEKMSSPLADAKRIPSAVSSPLNSPLDRSGIDSTVFQAKKEKVDS 13 | QYPPVQRLMTPKPVSIATNRTVYFKPSLTPSGDLRKTNQRIDKKNSTVDEKNISRQNREQ 14 | ESGFSYPNFSIPAANGLSSGVGGGGGKMRRERTTHFVASKPSEEEEVEVPLLPQISLPIS 15 | SSSLPTFNFSSPAISAASSSSVSPSQPLSNKVQMTSLGSTGNPVFTFSSPIVKSTQADVL 16 | PPASIGFTFSVPLAKTELSGPNSSSETVLSSSVTAQDNTVVNSSSSKKRSAPCEDPFTPA 17 | KILREGSVLDILKTPGFMSPKVDSPALQPTTTSSIVYTRPAISTFSSSGVEFGESLKAGS 18 | SWQCDTCLLQNKVTDNKCIACQAAKLPLKETAKQTGIGTPSKSDKPASTSGTGFGDKFKP 19 | AIGTWDCDTCLVQNKPEAVKCVACETPKPGTGVKRALPLTVASESPVTASSSTTVTTGTL 20 | GFGDKFKRPVGSWECPVCCVSNKAEDSRCVSCTSEKPGLVSASSSNSVPVSLPSGGCLGL 21 | DKFKKPEGSWDCEVCLVQNKADSTKCIACESAKPGTKSEFKGFGTSSSLNPAPSAFKFGI 22 | PSSSSGLSQTFTSTGNFKFGDQGGFKLGTSSDSGSTNTMNTNFKFPKPTGDFKFGVLPDS 23 | KPEEIKNDSKNDNFQFGPSSGLSNPASSAPFQFGVSTLGQQEKKEELPQSSSAGFSFGAG 24 | VANPSSAAIDTTVTSENKSGFNFGTIDTKSVSVTPFTYKTTEAKKEDASATKGGFTFGKV 25 | DSAALSSPSMFVLGRTEEKQQEPVTSTSLVFGKKADNEEPKCQPVFSFGNSEQTKDESSS 26 | KPTFSFSVAKPSVKESDQLAKATFAFGNQTNTTTDQGAAKPAFSFLNSSSSSSSTPATSS 27 | SASIFGSSTSSSSPPVAAFVFGQASNPVSSSAFGNSAESSTSQPLLFPQDGKPATTSSTA 28 | SAAPPFVFGTGASSNSTVSSGFTFGATTTSSSSGSFFVFGTGHSAPSASPAFGANQTPTF 29 | GQSQGASQPNPPSFGSISSSTALFSAGSQPVPPPTFGTVSSSSQPPVFGQQPSQSAFGSG 30 | TANASSVFQFGSSTTNFNFTNNNPSGVFTFGASPSTPAAAAQPSGSGGFSFSQSPASFTV 31 | GSNGKNMFSSSGTSVSGRKIKTAVRRKK 32 | >sp|Q8RXC8|RBK2_ARATH Receptor-like cytosolic serine/threonine-protein kinase RBK2 OS=Arabidopsis thaliana OX=3702 GN=RBK2 PE=1 SV=1 33 | MNSASAHDLRLLEVDKEKQDPKSPRGALEACLTRCSISSASSSSDDPPPNREAIDNADAD 34 | TDVQCKNHRASSNWGKFFKLWKRRSMKRLSSFPPLSGAAPPIIKQNKSADPNMNGMVLHD 35 | IYDFQSSLQNFSISDIEIATDNFSPENIIGRGGYADVYQGILPEGKLIAVKRLTKGTPDE 36 | QTAEFLSELGIIAHVDHPNTAKFIGCCIEGGMHLVFRLSPLGSLGSLLHGPSKYKLTWSR 37 | RYNVALGTADGLVYLHEGCQRRIIHRDIKADNILLTEDFQPQICDFGLAKWLPKQLTHHN 38 | VSKFEGTFGYFAPEYFMHGIVDEKTDVFAFGVLLLELITGHPALDESQQSLVLWAKPLLE 39 | RKAIKELVDPSLGDEYNREELIRLTSTASLCIDQSSLLRPRMSQVVELLLGHEDVVMTPR 40 | EAKIKMMQRTYSEELLDSVEYNSTKYLGDLDRIREVALAS 41 | -------------------------------------------------------------------------------- /interactive/README.txt: -------------------------------------------------------------------------------- 1 | srun --pty bash -l 2 | -------------------------------------------------------------------------------- /jupyter/README.md: -------------------------------------------------------------------------------- 1 | # Jupyter Notebooks 2 | 3 | ## Usage 4 | 5 | There are 3 ways to run Jupyter Notebooks: 6 | 1. __[JupyterHub](https://jupyter.hpcc.ucr.edu) server__ 7 | 8 | This method is the easist, however resources are limited thus only used for light testing. 9 | 10 | 2. __Interactively as a Job__ 11 | 12 | This method is the most difficult, however it provides a way to request more resources than JupyterHub. 13 | 14 | 3. __Non-Interactively as a Job__ 15 | 16 | This method is not difficult, in fact it is the same method we use for submitting most jobs on the cluster. 17 | 18 | ## Workflow 19 | 20 | The suggested workflow would be to do light development from the __[JupyterHub](https://jupyter.hpcc.ucr.edu) server__ and when you have a polished Jupyter Notebook you can submit it __non-nteractively as a job__ via `sbatch`. 21 | 22 | The __Interactively as a Job__ method should only be used in extream situations, when exploring or testing is not possible from the __[JupyterHub](https://jupyter.hpcc.ucr.edu) server__. 23 | 24 | ## Interactively as a Job 25 | 26 | This meothed provides a web-based interactive development environment (IDE) similiar to the [JupyterHub](https://jupyter.hpcc.ucr.edu) server, however you are able to request more compute resources. 27 | 28 | First review the following method, [HPCC Web Browser Access](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access). After you have read through that, you can procceed with this example. 29 | 30 | Download the Jupyter submission script: 31 | ```bash 32 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/jupyter/submit_jupyter.sh 33 | ``` 34 | 35 | Edit script with proper Slurm resources: 36 | ```bash 37 | vim submit_jupyter.sh 38 | ``` 39 | 40 | Submit the Jupyter job: 41 | ```bash 42 | sbatch submit_jupyter.sh 43 | ``` 44 | 45 | Check for Jupyter job start time: 46 | ```bash 47 | squeue -u $USER -o '%i %j %S %Z' 48 | ``` 49 | 50 | If your job has already started, then check the log, which will contain the remainder of your instructions: 51 | ``` 52 | cat jupyter-notebook-12345678.log 53 | ``` 54 | Replace `12345678` with your real `JOBID` from the previous step. 55 | 56 | ## Non-Interactive as a Job 57 | 58 | Download the Jupyter Notebook (or create your own): 59 | ```bash 60 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/jupyter/notebook.ipynb 61 | ``` 62 | 63 | Download the notebook submission script: 64 | ```bash 65 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/jupyter/submit_notebook.sh 66 | ``` 67 | 68 | Edit script with proper Slurm resources, and options for your notebook: 69 | ```bash 70 | vim submit_notebook.sh 71 | ``` 72 | 73 | Submit the notebook job: 74 | ```bash 75 | sbatch submit_notebook.sh 76 | ``` 77 | 78 | Check the state of your job: 79 | ```bash 80 | squeue -u $USER 81 | ``` 82 | 83 | If your job has started, then you can check the log to verfiy that your script is running: 84 | ``` 85 | cat jupyter-notebook-12345678.log 86 | ``` 87 | Replace `12345678` with your real `JOBID` from the previous step. 88 | 89 | Once the job has completed, you can download and view your HTML or Notebook results from the [JupyterHub](https://jupyter.hpcc.ucr.edu) server. 90 | -------------------------------------------------------------------------------- /jupyter/jupyter-notebook-3523021.log: -------------------------------------------------------------------------------- 1 | [NbConvertApp] Converting notebook notebook.ipynb to html 2 | [NbConvertApp] Writing 569303 bytes to notebook.html 3 | -------------------------------------------------------------------------------- /jupyter/jupyter-notebook-5205779.log: -------------------------------------------------------------------------------- 1 | 2 | MacOS or linux terminal command to create your ssh tunnel: 3 | ssh -N -L 8726:i16:8726 jhayes@cluster.hpcc.ucr.edu 4 | 5 | MS Windows MobaXterm info: 6 | 7 | Forwarded port:same as remote port 8 | Remote server: i16 9 | Remote port: 8726 10 | SSH server: ib.hpc.yale.edu 11 | SSH login: jhayes 12 | SSH port: 22 13 | 14 | 15 | PLEASE USE GENERATED URL BELOW IN BROWSER 16 | YOU MUST REPLACE 'i16' with 'localhost' 17 | [I 12:40:22.789 NotebookApp] JupyterLab beta preview extension loaded from /opt/linux/centos/7.x/x86_64/pkgs/anaconda3/4.5.4/lib/python3.6/site-packages/jupyterlab 18 | [I 12:40:22.790 NotebookApp] JupyterLab application directory is /bigdata/operations/pkgadmin/opt/linux/centos/7.x/x86_64/pkgs/anaconda3/4.5.4/share/jupyter/lab 19 | [I 12:40:22.803 NotebookApp] Serving notebooks from local directory: /bigdata/operations/jhayes/Projects/slurm/jupyter 20 | [I 12:40:22.803 NotebookApp] 0 active kernels 21 | [I 12:40:22.803 NotebookApp] The Jupyter Notebook is running at: 22 | [I 12:40:22.803 NotebookApp] http://i16:8726/?token=ae2e43d9c45aac71240bdb33774dd3ddb75484ba1a70a205 23 | [I 12:40:22.803 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation). 24 | [C 12:40:22.806 NotebookApp] 25 | 26 | Copy/paste this URL into your browser when you connect for the first time, 27 | to login with a token: 28 | http://i16:8726/?token=ae2e43d9c45aac71240bdb33774dd3ddb75484ba1a70a205&token=ae2e43d9c45aac71240bdb33774dd3ddb75484ba1a70a205 29 | -------------------------------------------------------------------------------- /jupyter/notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "print(\"Hello World\")" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "x=1+1" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "print(x)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [] 36 | } 37 | ], 38 | "metadata": { 39 | "kernelspec": { 40 | "display_name": "Python 3", 41 | "language": "python", 42 | "name": "python3" 43 | }, 44 | "language_info": { 45 | "codemirror_mode": { 46 | "name": "ipython", 47 | "version": 3 48 | }, 49 | "file_extension": ".py", 50 | "mimetype": "text/x-python", 51 | "name": "python", 52 | "nbconvert_exporter": "python", 53 | "pygments_lexer": "ipython3", 54 | "version": "3.7.9" 55 | } 56 | }, 57 | "nbformat": 4, 58 | "nbformat_minor": 4 59 | } 60 | -------------------------------------------------------------------------------- /jupyter/notebook.nbconvert.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "execution": { 8 | "iopub.execute_input": "2021-03-23T01:28:45.130193Z", 9 | "iopub.status.busy": "2021-03-23T01:28:45.129531Z", 10 | "iopub.status.idle": "2021-03-23T01:28:45.133742Z", 11 | "shell.execute_reply": "2021-03-23T01:28:45.132895Z" 12 | } 13 | }, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Hello World\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "print(\"Hello World\")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": { 31 | "execution": { 32 | "iopub.execute_input": "2021-03-23T01:28:45.138745Z", 33 | "iopub.status.busy": "2021-03-23T01:28:45.138086Z", 34 | "iopub.status.idle": "2021-03-23T01:28:45.141155Z", 35 | "shell.execute_reply": "2021-03-23T01:28:45.140535Z" 36 | } 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "x=1+1" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "execution": { 48 | "iopub.execute_input": "2021-03-23T01:28:45.146185Z", 49 | "iopub.status.busy": "2021-03-23T01:28:45.145522Z", 50 | "iopub.status.idle": "2021-03-23T01:28:45.148990Z", 51 | "shell.execute_reply": "2021-03-23T01:28:45.148296Z" 52 | } 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "2\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "print(x)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [] 73 | } 74 | ], 75 | "metadata": { 76 | "kernelspec": { 77 | "display_name": "Python 3", 78 | "language": "python", 79 | "name": "python3" 80 | }, 81 | "language_info": { 82 | "codemirror_mode": { 83 | "name": "ipython", 84 | "version": 3 85 | }, 86 | "file_extension": ".py", 87 | "mimetype": "text/x-python", 88 | "name": "python", 89 | "nbconvert_exporter": "python", 90 | "pygments_lexer": "ipython3", 91 | "version": "3.7.9" 92 | } 93 | }, 94 | "nbformat": 4, 95 | "nbformat_minor": 4 96 | } 97 | -------------------------------------------------------------------------------- /jupyter/submit_jupyter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --partition=short 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --mem=1G 6 | #SBATCH --time=1:00:00 7 | #SBATCH --job-name=jupyter-notebook 8 | #SBATCH --output=jupyter-notebook-%J.log 9 | 10 | # Change to HOME dir to give access to all folders within Jupyter-Lab 11 | cd $HOME 12 | 13 | # Jupyter vars 14 | XDG_RUNTIME_DIR="" 15 | 16 | # Get tunneling info 17 | port=$(shuf -i8000-9999 -n1) 18 | node=$(hostname -s) 19 | user=$(whoami) 20 | cluster=$(hostname -f | awk -F"." '{print $2}') 21 | 22 | # Print tunneling instructions jupyter-log 23 | echo -e " 24 | MacOS or linux terminal command to create your ssh tunnel: 25 | ssh -NL ${port}:${node}:${port} ${user}@secure.hpcc.ucr.edu 26 | 27 | MS Windows MobaXterm info: 28 | 29 | Forwarded port:same as remote port 30 | Remote server: ${node} 31 | Remote port: ${port} 32 | SSH server: secure.hpcc.ucr.edu 33 | SSH login: $user 34 | SSH port: 22 35 | " 36 | 37 | #################################################### 38 | # Load modules or activate conda environments here # 39 | #################################################### 40 | 41 | # You can activate your own conda env with Jupyter 42 | #module load miniconda3 43 | #conda activate jupyter 44 | #OR 45 | # Load the pre installed system version 46 | module load jupyterlab 47 | 48 | # Print instructions to user 49 | echo -e "PLEASE USE GENERATED URL BELOW IN BROWSER\nYOU MUST REPLACE '${node}' with 'localhost'" 50 | 51 | # Launch Jupyter lab or notebook 52 | jupyter-lab --no-browser --port=${port} --ip=${node} 53 | #jupyter-notebook --no-browser --port=${port} --ip=${node} 54 | -------------------------------------------------------------------------------- /jupyter/submit_notebook.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --partition=short 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --mem=1G 6 | #SBATCH --time=1:00:00 7 | #SBATCH --job-name=jupyter-notebook 8 | #SBATCH --output=jupyter-notebook-%J.log 9 | 10 | # Load and base conda 11 | module load miniconda3 12 | 13 | # Load jupyterlab 14 | module load jupyterlab 15 | # OR activare your own conda environment 16 | #conda activate myCondaEnv 17 | 18 | # Execute the notebook and generate HTML (notebook.html) as output file 19 | jupyter nbconvert --to html --execute notebook.ipynb 20 | # OR execute the notebook and generate another notebook (notebook.nbconvert.ipynb) as output file 21 | #jupyter nbconvert --to notebook --execute notebook.ipynb 22 | 23 | # There are many output formats, list all possible options with this 24 | #jupyter nbconvert --help-all 25 | -------------------------------------------------------------------------------- /mathematica/README.md: -------------------------------------------------------------------------------- 1 | # Activation 2 | You will have to setup the mathematica license via a License server named "mathlm". 3 | 4 | 1. Configure [X-Forward](https://hpcc.ucr.edu/manuals_linux-basics_intro.html#how-to-get-access) or [VNC](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#desktop-environments) method. 5 | 2. Log back into the cluster (with X-Forwarding or via VNC) and run mathematica: 6 | 7 | ``` 8 | module load mathematica 9 | mathematica 10 | ``` 11 | 12 | 2. Do not use license file or key, but rather click on the bottom button `Other ways to activate`. 13 | 3. Then click the option `Connect to a license server`. 14 | 4. After that you should enter the name "mathlm" in the field and click `activate`. 15 | 16 | -------------------------------------------------------------------------------- /mathematica/submission_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --mem=1G 7 | #SBATCH --time=1-00:15:00 # 1 day and 15 minutes 8 | ##SBATCH --mail-user=useremail@address.com 9 | ##SBATCH --mail-type=ALL 10 | #SBATCH --job-name="just_a_test" 11 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu 12 | 13 | module load mathematica/11.3 14 | 15 | math -noprompt -run '< 50] 8 | Exit[] 9 | -------------------------------------------------------------------------------- /matlab/Getting_Started_With_Serial_And_Parallel_MATLAB.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/matlab/Getting_Started_With_Serial_And_Parallel_MATLAB.pdf -------------------------------------------------------------------------------- /matlab/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Simple Method: Submitting Job Script 3 | 4 | To submit a job from the cluster, you can use [submission_script.sh](submission_script.sh) or [submission_script2.sh](submission_script2.sh) as a starting point. 5 | 6 | ## Copy examples 7 | 8 | You can simply copy the text from [submission_script.sh](submission_script.sh) and paste it into a file on the cluster via `nano`/`vim`. 9 | 10 | Or you can use the `wget` command to transfer the file from this repo to the cluster directly: 11 | 12 | 1. [Login to the cluster](https://hpcc.ucr.edu/manuals_linux-basics_intro#how-to-get-access) 13 | 2. Once logged in you can run 14 | ```bash 15 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/matlab/submission_script.sh 16 | ``` 17 | 18 | ## Advance Method: Submitting Job From Matlab 19 | 20 | To submit a job from your matlab program to the cluster, you can view [Getting_Started_With_Serial_And_Parallel_MATLAB.pdf](Getting_Started_With_Serial_And_Parallel_MATLAB.pdf) 21 | If you are getting an error when running **configCluster**, run **rehash toolboxcache** and then run **configCluster** again. 22 | -------------------------------------------------------------------------------- /matlab/simple_args/command_line: -------------------------------------------------------------------------------- 1 | sbatch job_script.sh 45 90 2 | -------------------------------------------------------------------------------- /matlab/simple_args/job_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p short 3 | 4 | module load matlab 5 | 6 | matlab -nodisplay -nodesktop -r "var1=$1;var2=$2" < matlabCode.m 7 | 8 | -------------------------------------------------------------------------------- /matlab/simple_args/matlabCode.m: -------------------------------------------------------------------------------- 1 | disp(var1) 2 | disp(var2) 3 | 4 | var3=var1+var2 5 | 6 | disp(var3) 7 | -------------------------------------------------------------------------------- /matlab/simple_args/slurm-376816.out: -------------------------------------------------------------------------------- 1 | 2 | < M A T L A B (R) > 3 | Copyright 1984-2021 The MathWorks, Inc. 4 | R2021b Update 2 (9.11.0.1837725) 64-bit (glnxa64) 5 | December 14, 2021 6 | 7 | 8 | To get started, type doc. 9 | For product information, visit www.mathworks.com. 10 | 11 | 12 | var1 = 13 | 14 | "SomeValue" 15 | 16 | >> Var SomeValue 17 | >> -------------------------------------------------------------------------------- /matlab/simple_args/slurm-396765.out: -------------------------------------------------------------------------------- 1 | 2 | < M A T L A B (R) > 3 | Copyright 1984-2021 The MathWorks, Inc. 4 | R2021b Update 2 (9.11.0.1837725) 64-bit (glnxa64) 5 | December 14, 2021 6 | 7 | 8 | To get started, type doc. 9 | For product information, visit www.mathworks.com. 10 | 11 | 12 | var2 = 13 | 14 | 90 15 | 16 | >> 45 17 | 18 | >> 90 19 | 20 | >> >> 21 | var3 = 22 | 23 | 135 24 | 25 | >> >> 135 26 | 27 | >> -------------------------------------------------------------------------------- /matlab/submission_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --mem-per-cpu=1G 7 | #SBATCH --time=2:00:00 8 | ##SBATCH --mail-user=youremail@address.com 9 | ##SBATCH --mail-type=ALL 10 | #SBATCH --job-name="just_a_test" 11 | #SBATCH -p short # You can use any of the following; epyc, intel, batch, highmem, gpu 12 | 13 | 14 | # Load matlab 15 | module load matlab 16 | 17 | # Send Matlab code to Matlab 18 | matlab -nodisplay -nodesktop < my_matlab_program.m 19 | 20 | # You can also capture the output in a log, like this 21 | #matlab -nodisplay -nosplash < my_matlab_program.m > matlab_run.log 22 | 23 | -------------------------------------------------------------------------------- /matlab/submission_script2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --mem-per-cpu=100G 7 | #SBATCH --time=5-00:00:00 # 5 day and 00 minutes 8 | #SBATCH --output=my.stdout 9 | ##SBATCH --mail-user=youremail@address.com 10 | ##SBATCH --mail-type=ALL 11 | #SBATCH --job-name="HT_QOptica_1e3" 12 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu 13 | 14 | 15 | # Print current date 16 | date 17 | 18 | # Load matlab 19 | module load matlab/r2018a 20 | matlab -nodisplay -nosplash run.log 21 | -------------------------------------------------------------------------------- /mpi/R/slurm-688508.out: -------------------------------------------------------------------------------- 1 | [1] "We are assigned 9 processes" 2 | 9 slaves are spawned successfully. 0 failed. 3 | [1] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64" 4 | [3] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64" 5 | [5] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64" 6 | [7] "Hello from i33 with CPU type x86_64" "Hello from i33 with CPU type x86_64" 7 | [9] "Hello from i33 with CPU type x86_64" 8 | [1] -2177.028 9 | user system elapsed 10 | 5.220 0.296 5.514 11 | slurmstepd-i33: error: *** JOB 688508 ON i33 CANCELLED AT 2017-05-04T11:49:44 DUE TO TIME LIMIT *** 12 | mpirun: Forwarding signal 18 to job 13 | -------------------------------------------------------------------------------- /mpi/R/snow-test.R: -------------------------------------------------------------------------------- 1 | ## 2 | # Source: http://www.umbc.edu/hpcf/resources-tara/how-to-run-R.html 3 | # filename: snow-test.R 4 | # 5 | # SNOW quick ref: http://www.sfu.ca/~sblay/R/snow.html 6 | # 7 | # Notes: 8 | # - Library loading order matters 9 | # - system.time([function]) is an easy way to test optimizations 10 | # - parApply is snow parallel version of 'apply' 11 | # 12 | ## 13 | 14 | library(Rmpi) 15 | library(snow) 16 | 17 | # Initialize SNOW using MPI communication. The first line will get the number of 18 | # MPI processes the scheduler assigned to us. Everything else is standard SNOW 19 | np <- mpi.universe.size() - 1 20 | print(paste('We are assigned',np, 'processes')) 21 | cluster <- makeMPIcluster(np) 22 | 23 | # Print the hostname for each cluster member 24 | sayhello <- function() { 25 | info <- Sys.info()[c("nodename", "machine")] 26 | paste("Hello from", info[1], "with CPU type", info[2]) 27 | } 28 | 29 | names <- clusterCall(cluster, sayhello) 30 | print(unlist(names)) 31 | 32 | # Compute row sums in parallel using all processes, then a grand sum at the end 33 | # on the master process 34 | parallelSum <- function(m, n) { 35 | A <- matrix(rnorm(m*n), nrow = m, ncol = n) 36 | # Parallelize the summation 37 | row.sums <- parApply(cluster, A, 1, sum) 38 | print(sum(row.sums)) 39 | } 40 | 41 | # Run the operation over different size matricies 42 | system.time(parallelSum(5000, 5000)) 43 | 44 | # Always stop your cluster and exit MPI to ensure resources are properly freed 45 | stopCluster(cluster) 46 | mpi.exit() 47 | -------------------------------------------------------------------------------- /mpi/R/snow-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # These bash and R script examples came from here 4 | # https://rcc.uchicago.edu/docs/software/environments/R/index.html#snow 5 | 6 | #SBATCH --job-name=snow-test 7 | #SBATCH --nodes=1 8 | #SBATCH --ntasks=10 9 | #SBATCH --time=10 10 | 11 | module load R/3.4.0 12 | module load openmpi 13 | 14 | # Always use -n 1 for the snow package. It uses Rmpi internally to spawn 15 | # additional processes dynamically 16 | mpirun -np 1 Rscript snow-test.R 17 | 18 | -------------------------------------------------------------------------------- /mpi/SUBMIT.txt: -------------------------------------------------------------------------------- 1 | srun --pty -n 10 bash -l 2 | 3 | OR 4 | 5 | srun -p short --ntasks 64 -N 2 --switches=1@1-00:00:00 --pty bash -l 6 | 7 | -------------------------------------------------------------------------------- /mpi/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/a.out -------------------------------------------------------------------------------- /mpi/fortran/fhello_world_mpi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/fortran/fhello_world_mpi -------------------------------------------------------------------------------- /mpi/fortran/fhello_world_mpi.F90: -------------------------------------------------------------------------------- 1 | program hello_world 2 | use mpi 3 | implicit none 4 | integer ( kind = 4 ) error 5 | integer ( kind = 4 ) id 6 | integer p 7 | character(len=MPI_MAX_PROCESSOR_NAME) :: name 8 | integer clen 9 | integer, allocatable :: mype(:) 10 | real ( kind = 8 ) wtime 11 | 12 | call MPI_Init ( error ) 13 | call MPI_Comm_size ( MPI_COMM_WORLD, p, error ) 14 | call MPI_Comm_rank ( MPI_COMM_WORLD, id, error ) 15 | if ( id == 0 ) then 16 | 17 | wtime = MPI_Wtime ( ) 18 | 19 | write ( *, '(a)' ) ' ' 20 | write ( *, '(a)' ) 'HELLO_MPI - Master process:' 21 | write ( *, '(a)' ) ' FORTRAN90/MPI version' 22 | write ( *, '(a)' ) ' ' 23 | write ( *, '(a)' ) ' An MPI test program.' 24 | write ( *, '(a)' ) ' ' 25 | write ( *, '(a,i8)' ) ' The number of processes is ', p 26 | write ( *, '(a)' ) ' ' 27 | 28 | end if 29 | 30 | call MPI_GET_PROCESSOR_NAME(NAME, CLEN, ERROR) 31 | 32 | write ( *, '(a)' ) ' ' 33 | write ( *, '(a,i8,a,a)' ) ' Process ', id, ' says "Hello, world!" ',name(1:clen) 34 | 35 | call MPI_Finalize ( error ) 36 | end program 37 | -------------------------------------------------------------------------------- /mpi/hello-mpi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/hello-mpi -------------------------------------------------------------------------------- /mpi/hello-mpi.cpp: -------------------------------------------------------------------------------- 1 | # include 2 | # include 3 | # include 4 | # include 5 | # include 6 | 7 | using namespace std; 8 | 9 | int main ( int argc, char *argv[] ); 10 | void timestamp ( ); 11 | 12 | //****************************************************************************80 13 | 14 | int main ( int argc, char *argv[] ) 15 | 16 | //****************************************************************************80 17 | // 18 | // Purpose: 19 | // 20 | // MAIN is the main program for HELLO_MPI. 21 | // 22 | // Discussion: 23 | // 24 | // This is a simple MPI test program. 25 | // Each process prints out a "Hello, world!" message. 26 | // The master process also prints out a short message. 27 | // 28 | // Modified to use the C MPI bindings, 14 June 2016. 29 | // 30 | // Licensing: 31 | // 32 | // This code is distributed under the GNU LGPL license. 33 | // 34 | // Modified: 35 | // 36 | // 14 June 2016 37 | // 38 | // Author: 39 | // 40 | // John Burkardt 41 | // 42 | // Reference: 43 | // 44 | // William Gropp, Ewing Lusk, Anthony Skjellum, 45 | // Using MPI: Portable Parallel Programming with the 46 | // Message-Passing Interface, 47 | // Second Edition, 48 | // MIT Press, 1999, 49 | // ISBN: 0262571323, 50 | // LC: QA76.642.G76. 51 | // 52 | //76 int provided; 53 | //77 MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); 54 | //78 MPI_Barrier(MPI_COMM_WORLD); 55 | //79 56 | //80 MPI_Comm_rank(MPI_COMM_WORLD, &nodeId); 57 | //81 MPI_Comm_size(MPI_COMM_WORLD, &numtasks); 58 | // 59 | // 60 | { 61 | int id; 62 | int ierr; 63 | int p; 64 | double wtime; 65 | // 66 | // Initialize MPI. 67 | // 68 | ierr = MPI_Init ( &argc, &argv ); 69 | // 70 | // Get the number of processes. 71 | // 72 | ierr = MPI_Comm_size ( MPI_COMM_WORLD, &p ); 73 | // 74 | // Get the individual process ID. 75 | // 76 | ierr = MPI_Comm_rank ( MPI_COMM_WORLD, &id ); 77 | // 78 | // Process 0 prints an introductory message. 79 | // 80 | if ( id == 0 ) 81 | { 82 | timestamp ( ); 83 | cout << "\n"; 84 | cout << "HELLO_MPI - Master process:\n"; 85 | cout << " C++/MPI version\n"; 86 | cout << " An MPI example program.\n"; 87 | cout << "\n"; 88 | cout << " The number of processes is " << p << "\n"; 89 | cout << "\n"; 90 | } 91 | // 92 | // Every process prints a hello. 93 | // 94 | if ( id == 0 ) 95 | { 96 | wtime = MPI_Wtime ( ); 97 | } 98 | cout << " Process " << id << " says 'Hello, world!'\n"; 99 | // 100 | // Process 0 says goodbye. 101 | // 102 | if ( id == 0 ) 103 | { 104 | wtime = MPI_Wtime ( ) - wtime; 105 | cout << " Elapsed wall clock time = " << wtime << " seconds.\n"; 106 | } 107 | // 108 | // Terminate MPI. 109 | // 110 | MPI_Finalize ( ); 111 | // 112 | // Terminate. 113 | // 114 | if ( id == 0 ) 115 | { 116 | cout << "\n"; 117 | cout << "HELLO_MPI:\n"; 118 | cout << " Normal end of execution.\n"; 119 | cout << "\n"; 120 | timestamp ( ); 121 | } 122 | return 0; 123 | } 124 | //****************************************************************************80 125 | 126 | void timestamp ( ) 127 | 128 | //****************************************************************************80 129 | // 130 | // Purpose: 131 | // 132 | // TIMESTAMP prints the current YMDHMS date as a time stamp. 133 | // 134 | // Example: 135 | // 136 | // 31 May 2001 09:45:54 AM 137 | // 138 | // Licensing: 139 | // 140 | // This code is distributed under the GNU LGPL license. 141 | // 142 | // Modified: 143 | // 144 | // 08 July 2009 145 | // 146 | // Author: 147 | // 148 | // John Burkardt 149 | // 150 | // Parameters: 151 | // 152 | // None 153 | // 154 | { 155 | # define TIME_SIZE 40 156 | 157 | static char time_buffer[TIME_SIZE]; 158 | const struct std::tm *tm_ptr; 159 | size_t len; 160 | std::time_t now; 161 | 162 | now = std::time ( NULL ); 163 | tm_ptr = std::localtime ( &now ); 164 | 165 | len = std::strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm_ptr ); 166 | 167 | std::cout << time_buffer << "\n"; 168 | 169 | return; 170 | # undef TIME_SIZE 171 | } 172 | -------------------------------------------------------------------------------- /mpi/mpiTest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/mpiTest -------------------------------------------------------------------------------- /mpi/mpiTest.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]) { 6 | int numprocs, rank, namelen; 7 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 8 | 9 | MPI_Init(&argc, &argv); 10 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 11 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 12 | MPI_Get_processor_name(processor_name, &namelen); 13 | 14 | printf("Hello World! from process %d out of %d on %s\n", rank, numprocs, processor_name); 15 | 16 | MPI_Finalize(); 17 | } 18 | -------------------------------------------------------------------------------- /mpi/mpiTest_mpich: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/74b84cb0c85dedf1a096d03e6e9e515ae36a8c22/mpi/mpiTest_mpich -------------------------------------------------------------------------------- /mpi/slurm-1880596.out: -------------------------------------------------------------------------------- 1 | Process Process 1 says 'Hello, world!' 2 | 2 says 'Hello, world!' 3 | 28 March 2018 12:15:18 PM 4 | 5 | HELLO_MPI - Master process: 6 | C++/MPI version 7 | An MPI example program. 8 | 9 | The number of processes is 3 10 | 11 | Process 0 says 'Hello, world!' 12 | Elapsed wall clock time = 1.14441e-05 seconds. 13 | 14 | HELLO_MPI: 15 | Normal end of execution. 16 | 17 | 28 March 2018 12:15:18 PM 18 | -------------------------------------------------------------------------------- /mpi/slurm-201.out: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------- 2 | An ORTE daemon has unexpectedly failed after launch and before 3 | communicating back to mpirun. This could be caused by a number 4 | of factors, including an inability to create a connection back 5 | to mpirun due to a lack of common network interfaces and/or no 6 | route found between them. Please check network connectivity 7 | (including firewalls and network routing requirements). 8 | -------------------------------------------------------------------------- 9 | -------------------------------------------------------------------------------- /multi_steps/SUBMIT.txt: -------------------------------------------------------------------------------- 1 | # First, request resources 2 | salloc -N 10 bash 3 | # After resources have been granted, you can issue srun commands to run on the cluster 4 | srun hostname 5 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Python 2 | 3 | This is a basic example on how to submit Python code to the cluster. 4 | 5 | Make sure your `job_wrapper.sh` and `myPyscript.py` files are in the same directory, and then submit your wrapper from that directory: 6 | 7 | ```bash 8 | # Make example directory 9 | mkdir ~/py_example 10 | cd ~/py_example 11 | 12 | # Download example scripts 13 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/python/job_py_wrapper.sh 14 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/python/myPyscript.py 15 | 16 | # Submit wrapper 17 | sbatch job_py_wrapper.sh 18 | ``` 19 | -------------------------------------------------------------------------------- /python/job_py_wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --mem=10G 7 | #SBATCH --time=1-00:15:00 # 1 day and 15 minutes 8 | #SBATCH --mail-user=useremail@address.com 9 | #SBATCH --mail-type=ALL 10 | #SBATCH --job-name="Python Example" 11 | #SBATCH -p epyc # You can use any of the following; epyc, intel, batch, highmem, gpu 12 | 13 | # A version of Python from miniconda2 is loaded by default 14 | # However, if you want to use a diferent version, then do so here 15 | #module unload miniconda2; module load anaconda3 16 | 17 | # Optionaly you can activate a conda environment if you have created one 18 | #conda activate python3 19 | 20 | # Use Python3 to run Python script 21 | python3 myPyscript.py 22 | -------------------------------------------------------------------------------- /python/myPyscript.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Use pandas for data handling 4 | import pandas as pd 5 | 6 | # Some calculation 7 | data=1+1 8 | 9 | # Convert int to data frame 10 | df=pd.DataFrame({data}) 11 | 12 | # Save data frame to csv 13 | data.to_csv('myPyresults.csv') 14 | -------------------------------------------------------------------------------- /rstudio-server/README.md: -------------------------------------------------------------------------------- 1 | # RStudio Server 2 | 3 | First review the following method, [HPCC Web Browser Access](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access). 4 | After you have read through that, you can procceed with this example. 5 | 6 | ## Interactive 7 | 8 | The easist method is to run the RStudio Server launcher interactivly. 9 | 10 | First request an interactive job: 11 | 12 | ```bash 13 | srun --partition=short --mem=8gb --cpus-per-task=2 --ntasks=1 --time=2:00:00 --pty bash -l 14 | ``` 15 | 16 | Then load the latest versions of `R` and `RStudio Server` from module system: 17 | 18 | ```bash 19 | module unload R 20 | module load R/4.1.2 # Or latest version 21 | module load rstudio-server/2022.02.0-443 # Or latest version 22 | ``` 23 | 24 | Lastly, start the RStudio Server by running the launcher script: 25 | 26 | ```sh 27 | start-rserver.sh 28 | ``` 29 | 30 | ## Non-Interactive 31 | 32 | Alternativly as you can start an RStudio Server under a non-interactive job, like so: 33 | 34 | ```bash 35 | sbatch -p short -c 4 --time=2:00:00 --mem=10g --wrap='module unload R; module load R/4.1.2; module load rstudio-server/2022.02.0-443; start-rserver.sh' --output='rstudio-%J.out' 36 | ``` 37 | 38 | These are minimal resources, for only 2 hours, so you may need to adjust them. 39 | When the job starts, you can look at the slurm log to check which node it is running on and how to setup your SSH tunnel: 40 | 41 | ```bash 42 | cat rstudio*.out 43 | ``` 44 | 45 | ## Custom Launcher (EXPERT) 46 | 47 | If you want to modify the RStudio Server launch script, you can download a copy from here and modify it. 48 | 49 | Request resource on a compute node: 50 | 51 | ```bash 52 | srun -p epyc --time=8:00:00 --mem=10gb --cpus-per-task=1 --pty bash -l 53 | ``` 54 | 55 | Download startup script: 56 | 57 | ```bash 58 | wget https://raw.githubusercontent.com/ucr-hpcc/hpcc_slurm_examples/master/rstudio-server/start-rserver.sh 59 | ``` 60 | 61 | Allow execute permissions and then modify as needed: 62 | 63 | ```bash 64 | chmod u+x start-rserver.sh 65 | vim start-rserver.sh 66 | ``` 67 | 68 | Start RStudio with script: 69 | 70 | ```bash 71 | ./start-rserver.sh 72 | ``` 73 | 74 | Follow instructions given on screen. 75 | -------------------------------------------------------------------------------- /rstudio-server/start-rserver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | # Load rstudio-server 4 | module load rstudio-server 5 | 6 | # Get script directory and go there 7 | CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 8 | 9 | # Setup user-specific secure cookie key 10 | USER=`whoami` 11 | COOKIE_KEY_PATH=/tmp/${USER}_secure-cookie-key 12 | rm -f $COOKIE_KEY_PATH 13 | mkdir -p $(dirname $COOKIE_KEY_PATH) 14 | 15 | #python -c 'import uuid; print(uuid.uuid4())' > $COOKIE_KEY_PATH 16 | uuid > $COOKIE_KEY_PATH 17 | chmod 600 $COOKIE_KEY_PATH 18 | 19 | export RETICULATE_PYTHON=$(which python3) 20 | 21 | # Generate password 22 | export RSTUDIO_PASSWORD=$(date +%s | sha256sum | base64 | head -c 16 ; echo) 23 | 24 | # Get random port 25 | PORT=$(shuf -i8000-9999 -n1) 26 | 27 | # Print info 28 | echo -e "\nSetup your tunnel like so: 29 | \tssh -NL $PORT:$HOSTNAME:$PORT $USER@cluster.hpcc.ucr.edu 30 | This command will hang, it does not produce any output. 31 | 32 | Next open your internet browser to: 33 | \thttp://localhost:$PORT 34 | 35 | Use the following credentials to login: 36 | \tusername: $USER 37 | \tpassword: $RSTUDIO_PASSWORD 38 | 39 | For more information regarding SSH tunnels, vists here: 40 | \thttps://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access 41 | 42 | For help please email: support@hpcc.ucr.edu 43 | 44 | Now running RStudio Server... 45 | " 46 | 47 | RSTUDIO_PASSWORD=$RSTUDIO_PASSWORD rserver \ 48 | --server-daemonize=0 \ 49 | --rsession-path="$CWD/rsession.sh" \ 50 | --secure-cookie-key-file=$COOKIE_KEY_PATH \ 51 | --auth-encrypt-password=1 \ 52 | --rsession-which-r=$(which R) \ 53 | --www-port=$PORT \ 54 | --auth-none=0 \ 55 | --auth-pam-helper="$CWD/rstudio_auth" \ 56 | 57 | -------------------------------------------------------------------------------- /singularity/deepvariant/README.md: -------------------------------------------------------------------------------- 1 | # DeepVariant 2 | 3 | Load the module 4 | 5 | ```bash 6 | module load deepvariant 7 | ``` 8 | 9 | Execute run_deepvariant script within the singularity container 10 | 11 | ```bash 12 | singularity exec \ 13 | -B "YOUR_INPUT_DIR":"/input" \ 14 | -B "YOUR_OUTPUT_DIR:/output" \ 15 | $DEEPVARIANT_IMG \ 16 | /opt/deepvariant/bin/run_deepvariant \ 17 | --model_type=WGS \ **Replace this string with exactly one of the following [WGS,WES,PACBIO]** 18 | --ref=/input/YOUR_REF \ 19 | --reads=/input/YOUR_BAM \ 20 | --output_vcf=/output/YOUR_OUTPUT_VCF \ 21 | --output_gvcf=/output/YOUR_OUTPUT_GVCF \ 22 | --num_shards=$(nproc) **This will use all your cores to run make_examples. Feel free to change.** 23 | ``` 24 | 25 | -------------------------------------------------------------------------------- /singularity/galaxy/README.md: -------------------------------------------------------------------------------- 1 | # Galaxy 2 | 3 | > Deprecated: Singularity is not required to install Galaxy, use conda instead [Galaxy via Conda](../../galaxy). 4 | 5 | ## Prep Workspace 6 | 7 | Since `Galaxy` requires write access to the `config` and `database` direcotries we need to copy them out of the container. 8 | Once we have copies of these directories we will then mount them inside the container. 9 | 10 | ### Create Galaxy Home 11 | 12 | Create a home base for Galaxy: 13 | 14 | ``` 15 | mkdir -p bigdata/galaxy/20.05 16 | cd bigdata/galaxy/20.05 17 | ``` 18 | 19 | #### Copy Files from Container 20 | 21 | Copy databases and configs from in the container to Galaxy home: 22 | 23 | ``` 24 | module load galaxy 25 | singularity exec $GALAXY_IMG rsync -r /opt/galaxy/20.05/config/ config 26 | singularity exec $GALAXY_IMG rsync -r /opt/galaxy/20.05/database/ database 27 | ``` 28 | 29 | #### Configure Galaxy 30 | 31 | Now that we have a writable copy of the configuration files and databases, we can make some changes. 32 | Open the config and modify the port and IP address where Galaxy will start: 33 | 34 | ``` 35 | PORT=$(shuf -i8000-9999 -n1) 36 | sed -i "s/^\s*http: .*/ http: 0.0.0.0:$PORT/" config/galaxy.yml 37 | grep '^\s*http:' config/galaxy.yml 38 | ``` 39 | 40 | Also we want to want to add a our HPCC username for the administrator of Galaxy: 41 | 42 | ``` 43 | sed -i "s/^\s*#*admin_users: .*/ admin_users: ${USER}/" config/galaxy.yml 44 | grep '^\s.*admin_users' config/galaxy.yml 45 | ``` 46 | 47 | ## Run Galaxy Job 48 | 49 | ### Startup Script 50 | 51 | Download startup script: 52 | 53 | ``` 54 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/singularity/galaxy/start_galaxy.sh 55 | ``` 56 | 57 | Use nano or vim to edit script to use proper paths: 58 | 59 | ``` 60 | vim start_galaxy.sh 61 | ``` 62 | 63 | Then submit the script like so: 64 | 65 | ``` 66 | sbatch -p short -c 24 --mem=100gb start_galaxy.sh 67 | ``` 68 | 69 | ## SSH Tunnel 70 | 71 | After the galaxy job has started collect the node and port details and follow these instructions [Web Browser Access](https://hpcc.ucr.edu/manuals_linux-cluster_jobs.html#web-browser-access). 72 | -------------------------------------------------------------------------------- /singularity/galaxy/start_galaxy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | # Load Galaxy 4 | module load galaxy 5 | 6 | # Set Galaxy Home 7 | GALAXY_HOME=~/bigdata/galaxy/20.05 8 | 9 | singularity exec \ 10 | -B $GALAXY_HOME/database:/opt/galaxy/20.05/database \ 11 | -B $GALAXY_HOME/config:/opt/galaxy/20.05/config \ 12 | $GALAXY_IMG \ 13 | /opt/galaxy/20.05/run.sh 14 | -------------------------------------------------------------------------------- /singularity/mariadb/README.md: -------------------------------------------------------------------------------- 1 | # MariaDB & MySQL 2 | ## Initialize Database 3 | The easiest way to create a SQL database is to run the following script: 4 | 5 | ```bash 6 | create_mysql_db 7 | ``` 8 | 9 | Or you can download the latest version from here: [create_mysql_db.sh](create_mysql_db.sh) 10 | This will walk you through the steps to create your own database using a singularity container. 11 | 12 | ## Starting Database 13 | Once you have completed that, you should be able to submit a job to start your databases. 14 | 15 | First download the startup job script from here: [start_mariadb.sh](start_mariadb.sh) and modify where necessary. 16 | 17 | Then submit this just like any other job, like so: 18 | 19 | ```bash 20 | sbatch start_mariadb.sh 21 | ``` 22 | 23 | After running the startup job you should have a log file called `db_host_port.txt` in the directory where you created your `mariadb.sif` file. 24 | This text file should contain the host and the port where your job is running, which can be used for your database connections. 25 | -------------------------------------------------------------------------------- /singularity/mariadb/create_mysql_db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | module load singularity 4 | 5 | # Get path to db files 6 | echo -en "\nEnter path for new database files [${PWD}/mysql]: " 7 | read db_path 8 | if [[ -z $db_path ]]; then 9 | db_path=${PWD}/mysql 10 | fi 11 | echo -e "Using DB path: ${db_path}\n" 12 | 13 | # Check if db_path is valid 14 | if [[ -d $db_path ]]; then 15 | echo -e "ERROR: Database path ${db_path} already exists\n" 16 | exit 1 17 | else 18 | echo -e "Creating directory ${db_path}\n" 19 | mkdir -p ${db_path} && cd ${db_path} 20 | fi 21 | 22 | echo -n "Enter name for database [workdb]: " 23 | read db_name 24 | if [[ -z $db_name ]]; then 25 | db_name="workdb" 26 | fi 27 | echo -e "Using DB name: ${db_name}\n" 28 | 29 | # Get DB pasnamed 30 | echo -n "Enter MySQL Password: " 31 | read -s db_pass 32 | echo -e "\n" 33 | 34 | # Get port (hope it is not used) 35 | db_port=$(shuf -i3000-3999 -n1) 36 | 37 | # Make sure a password was given (Add more robust checks here) 38 | if [[ -z $db_pass ]]; then 39 | echo "ERROR: You cannot have an empty MySQL password." 40 | exit 1 41 | fi 42 | 43 | # Remote login so that sudo is not required for build 44 | echo "Attempting remote login..." 45 | if [[ ! -f ~/.singularity/remote.yaml ]] || [[ ! -s ~/.singularity/remote.yaml ]]; then 46 | singularity remote login; EXITCODE=$? 47 | fi 48 | 49 | # If remote login failed, advise user 50 | if [[ $EXITCODE -ne 0 ]]; then 51 | echo -e "\n 52 | ERROR: singularity remote login failed. 53 | Delete ~/.singularity/remote.yaml. 54 | Then generate new access token here https://cloud.sylabs.io/auth. 55 | Then try this script again with the new token.\n" 56 | exit 1 57 | fi 58 | 59 | # Create singularity definition 60 | cat << EOF > mariadb.def 61 | Bootstrap: docker 62 | From: mariadb:10.3.9 63 | 64 | %post 65 | # Your username 66 | YOUR_USERNAME="${USER}" 67 | 68 | sed -ie "s/^#user.*/user = ${USER}/" /etc/mysql/my.cnf 69 | sed -ie "s/^port.*/port = ${db_port}/" /etc/mysql/my.cnf 70 | chmod 1777 /run/mysqld 71 | 72 | %runscript 73 | exec "mysqld" "$@" 74 | 75 | %startscript 76 | exec "mysqld_safe" 77 | """ 78 | EOF 79 | 80 | # Build singularity image 81 | OLD_SINGULARITY_BINDPATH=$SINGULARITY_BINDPATH 82 | unset SINGULARITY_BINDPATH 83 | singularity build --remote mariadb.sif mariadb.def 84 | SINGULARITY_BINDPATH=$OLD_SINGULARITY_BINDPATH 85 | unset OLD_SINGULARITY_BINDPATH 86 | # Create directory where db files live 87 | mkdir db 88 | 89 | # Initialize db 90 | singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif 'mysql_install_db' &> /dev/null 91 | ERROR_CODE=$? 92 | if [[ $ERROR_CODE -ne 0 ]]; then 93 | echo "ERROR: Database failed to initialize." 94 | exit 1 95 | fi 96 | 97 | # Create newuser script 98 | cat << EOF > newuser.sh 99 | #!/bin/sh 100 | 101 | # Start mysql 102 | mysqld_safe --datadir=/var/lib/mysql & 103 | MYSQL_PID=\$! 104 | 105 | # Give mysql time to startup 106 | sleep 10 107 | 108 | # Create work db and new user 109 | mysql -u root mysql < newuser.sql 110 | 111 | # Secure mysql 112 | #mysql_secure_installation 113 | 114 | # Update root password 115 | mysqladmin -u root --port ${db_port} password "${db_pass}" 116 | 117 | # Stop mysql 118 | echo \$(date "+%y%m%d %T") "mysqld_safe Shutting down" 119 | mysqladmin -u root --port=${db_port} --password="${db_pass}" shutdown 2>/dev/null || kill -9 \$MYSQL_PID 120 | echo \$(date "+%y%m%d %T") "mysqld_safe Shutdown" 121 | 122 | EOF 123 | 124 | # Create newuser SQL file 125 | cat << EOF > newuser.sql 126 | CREATE DATABASE ${db_name}; 127 | CREATE USER '${USER}'@'%' IDENTIFIED BY "${db_pass}"; 128 | GRANT ALL PRIVILEGES ON *.* TO ${USER}@'%' WITH GRANT OPTION; 129 | FLUSH PRIVILEGES; 130 | EOF 131 | 132 | # Make sure this is executable 133 | chmod u+x newuser.sh 134 | 135 | # Create newuser 136 | singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif "${PWD}/newuser.sh" 137 | rm -f newuser.sh newuser.sql 138 | 139 | # Stop mysql service 140 | #mysqladmin -u $USER -h $HOSTNAME --password="$db_pass" shutdown 141 | 142 | # Create service instance 143 | echo -e "\n 144 | ########################################################################################## 145 | # IMPORTANT NOTES !!! 146 | ########################################################################################## 147 | 148 | # Make sure you have loaded singularity: 149 | module load singularity 150 | 151 | # To start your service, do the following: 152 | cd ${PWD} 153 | singularity instance start --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif mysqldb 154 | 155 | # To list your running services, run the following: 156 | singularity instance list 157 | 158 | # To connect to your running service, run the following: 159 | mysql -u ${USER} -h ${HOSTNAME} -P ${db_port} -p ${db_name} 160 | 161 | # To stop your service, run the following: 162 | singularity instance stop mysqldb 163 | 164 | # To get a debug shell into your service, do the following: 165 | cd ${PWD} 166 | singularity instance stop mysqldb 167 | singularity shell --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif 168 | 169 | ##########################################################################################\n" 170 | 171 | -------------------------------------------------------------------------------- /singularity/mariadb/start_mariadb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p epyc 3 | #SBATCH --time=7-00:00:00 4 | #SBATCH -c 2 5 | #SBATCH --mem=10g 6 | 7 | ############################################################################ 8 | # NOTE: # 9 | # Make sure you have already run and completed the create_mysql_db command # 10 | ############################################################################ 11 | 12 | # Load singularity 13 | module load singularity 14 | 15 | # Move to where your mariadb.sif image lives 16 | cd ~/bigdata/mysql/ 17 | 18 | # Get port and host name info and save it to a file 19 | PORT=$(singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif grep -oP '^port = \K\d{4}' /etc/mysql/my.cnf | head -1) 20 | echo $HOSTNAME $PORT > db_host_port.txt 21 | 22 | # Start your mariadb like a service 23 | singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif /usr/bin/mysqld_safe 24 | -------------------------------------------------------------------------------- /singularity/metaerg/README.md: -------------------------------------------------------------------------------- 1 | # Singularity Metaerg Container 2 | Singularity container built from docker image: [https://github.com/xiaoli-dong/metaerg](https://github.com/xiaoli-dong/metaerg) 3 | 4 | ## Setup 5 | Choose where you would like your analysis to be saved, typically a sub-directory under bigdata: 6 | 7 | ```bash 8 | mkdir ~/bigdata/metaerg 9 | cd ~/bigdata/metaerg 10 | ``` 11 | 12 | Ensure that you have a directory called `data` in the current directory: 13 | 14 | ```bash 15 | mkdir data 16 | ``` 17 | 18 | Also ensure that you have a fasta file called `contig.fasta` in the `data` directory: 19 | 20 | ```bash 21 | cp /path/to/contig.fasta data/contig.fasta 22 | ``` 23 | 24 | ## Script 25 | In order to submit this job in a non-interactive way, we will need to create a submission script. 26 | Download the submission script and edit based on your needs: 27 | 28 | ```bash 29 | wget https://github.com/ucr-hpcc/hpcc_slurm_examples/blob/master/singularity/metaerg/metaerg_job.sh 30 | nano metaerg_job.sh # You could also use vim/emacs or other text editor 31 | ``` 32 | 33 | ## Submit 34 | Once you have setup your data directory and updated your submission script, you can submit your job to the cluster with the following command: 35 | 36 | ```bash 37 | sbatch metaerg_job.sh 38 | ``` 39 | 40 | -------------------------------------------------------------------------------- /singularity/metaerg/metaerg_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=8 6 | #SBATCH -p epyc 7 | #SBATCH --mem-per-cpu=8G 8 | #SBATCH --time=1-00:15:00 # 1 Day and 15 minutes 9 | ##SBATCH --mail-user=emailaddress@mail.com 10 | ##SBATCH --mail-type=ALL 11 | #SBATCH --job-name="Metaerg_Sing" 12 | 13 | # Load Modules 14 | module load metaerg # This auto loads singularity 15 | 16 | # Create DB, this only needs to be done once 17 | singularity exec -B data:/data $METAERG_IMG setup_db.pl -o /data -v 132 18 | 19 | # Execute script in singularity container 20 | singularity exec -B data:/data $METAERG_IMG metaerg.pl --dbdir /data/db --outdir /data/my_metaerg_output /data/contig.fasta 21 | 22 | -------------------------------------------------------------------------------- /singularity/mongo/README.md: -------------------------------------------------------------------------------- 1 | # MongoDB 2 | 3 | ## Configure 4 | 5 | First load the module: 6 | 7 | ```bash 8 | module load mongo/4.2.0 9 | ``` 10 | 11 | Create and move to data location: 12 | 13 | ```bash 14 | mkdir -p ~/bigdata/mongo/data 15 | cd ~/bigdata/mongo 16 | ``` 17 | 18 | You can run mongo simply like this, however there were numa node warnings: 19 | 20 | ```bash 21 | singularity run -B data:/data/db $MONGO_IMG 22 | ``` 23 | 24 | Run mongo the first time with numa node support and without auth: 25 | 26 | ```bash 27 | singularity instance start -B data:/data/db $MONGO_IMG numactl --interleave=all mongod 28 | ``` 29 | 30 | From the same terminal, connect to your mongodb service like so: 31 | ```bash 32 | singularity exec -B data:/data/db $MONGO_IMG mongo 33 | ``` 34 | 35 | Then create an admin user with the following: 36 | ``` 37 | use admin 38 | db.createUser( 39 | { 40 | user: 'admin', 41 | pwd: passwordPrompt(), 42 | roles: [ { role: 'root', db: 'admin' } ] 43 | } 44 | ); 45 | ``` 46 | 47 | For more information regarding user access, please refer to the following: 48 | [https://docs.mongodb.com/manual/tutorial/enable-authentication/#create-the-user-administrator](https://docs.mongodb.com/manual/tutorial/enable-authentication/#create-the-user-administrator) 49 | 50 | Then stop mongod service: 51 | 52 | ```bash 53 | singularity instance stop numactl 54 | ``` 55 | 56 | ## Within the same job 57 | 58 | You can now start it again from within a job, like so: 59 | 60 | ```bash 61 | module load mongo 62 | cd ~/bigdata/mongo 63 | singularity instance start -B data:/data/db $MONGO_IMG numactl --interleave=all mongod --auth 64 | ``` 65 | 66 | >> NOTE: This does not seem to be working within the latest version of mongo. 67 | 68 | ## Separate Jobs 69 | 70 | Or as a separate job like this: 71 | 72 | ``` 73 | sbatch -p short --wrap 'module load mongo; cd ~/bigdata/mongo; singularity exec -B data:/data/db $MONGO_IMG numactl --interleave=all mongod --bind_ip_all --auth;' 74 | ``` 75 | 76 | Lastly connect to mongod service, from the same job, with the following: 77 | 78 | ```bash 79 | singularity exec -B data:/data/db $MONGO_IMG mongo --authenticationDatabase "admin" -u "admin" -p 80 | ``` 81 | 82 | Or connnect to mongod from a different node like so: 83 | 84 | ```bash 85 | singularity exec -B data:/data/db $MONGO_IMG mongo --host NodeName --authenticationDatabase "admin" -u "admin" -p 86 | ``` 87 | 88 | -------------------------------------------------------------------------------- /singularity/orthomcl/README.md: -------------------------------------------------------------------------------- 1 | # Database inside of job 2 | Refer to [../mariadb/start_mariadb.sh](../mariadb/start_mariadb.sh) 3 | 4 | # Database outside of job 5 | Refer to [orthomcl_job.sh](orthomcl_job.sh) 6 | -------------------------------------------------------------------------------- /singularity/orthomcl/orthomcl_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --ntasks=1 4 | #SBATCH --cpus-per-task=1 5 | #SBATCH --mem=10G 6 | #SBATCH --mail-type=ALL 7 | #SBATCH --mail-user=useremail@address.com 8 | #SBATCH --time=4:00:00 9 | #SBATCH --job-name=orthomcl 10 | #SBATCH -p epyc 11 | 12 | module load singularity 13 | module load orthomcl 14 | 15 | # Go to database directory 16 | cd ~/bigdata/mysql 17 | 18 | # Start Database 19 | PORT=$(singularity exec --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif grep -oP '^port = \K\d{4}' /etc/mysql/my.cnf | head -1) 20 | singularity instance start --writable-tmpfs -B db/:/var/lib/mysql mariadb.sif mysqldb 21 | sleep 10 22 | 23 | # Move to bigdata 24 | cd ~/bigdata/ 25 | 26 | # Update Orthomcl.config 27 | sed -i "s/^dbConnectString.*$/dbConnectString=dbi:mysql:orthomcl:${HOSTNAME}:${PORT}/" orthomcl/orthomcl.config 28 | 29 | # Run orthomcl 30 | orthomclInstallSchema orthomcl/orthomcl.config orthomcl/install_schema.log 31 | 32 | # Stop Database 33 | singularity instance stop mysqldb 34 | -------------------------------------------------------------------------------- /spark/spark_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH -p short 4 | #SBATCH --nodes=3 5 | #SBATCH --cpus-per-task=8 6 | #SBATCH --ntasks-per-node=1 7 | #SBATCH --time=0:20:00 8 | #SBATCH --job-name=spark-test 9 | 10 | ########################################################## 11 | # PBS version was pulled from here: # 12 | # https://www.dursi.ca/post/spark-in-hpc-clusters.html # 13 | ########################################################## 14 | 15 | # Get names of allocated nodes 16 | nodes=($( scontrol show hostnames $SLURM_NODELIST )) 17 | nnodes=${#nodes[@]} 18 | last=$(( $nnodes - 1 )) 19 | 20 | # Move to directory where job was submitted from 21 | cd $SLURM_WORKING_DIR 22 | 23 | # Assign location to Spark home var 24 | export SPARK_HOME=/rhome/jhayes/shared/pkgs/spark/2.4.0-bin-hadoop2.7 25 | # Log into first node and start master Spark process 26 | ssh ${nodes[0]}.ib.hpcc.ucr.edu "module load java/8u45; cd ${SPARK_HOME}; ./sbin/start-master.sh" 27 | sparkmaster="spark://${nodes[0]}:7077" 28 | 29 | # Assign location to scratch var 30 | SCRATCH=~/bigdata/Projects/spark/ 31 | # Create work directory 32 | mkdir -p ${SCRATCH}/work 33 | # Remove old logs, if they exist 34 | rm -f ${SCRATCH}/work/nohup*.out 35 | 36 | # On each node, start Spark worker 37 | for i in $( seq 0 $last ); do 38 | ssh ${nodes[$i]}.ib.hpcc.ucr.edu "cd ${SPARK_HOME}; module load java/8u45; nohup ./bin/spark-class org.apache.spark.deploy.worker.Worker ${sparkmaster} &> ${SCRATCH}/work/nohup-${nodes[$i]}.out" & 39 | done 40 | 41 | # Remove old results, if it exists 42 | rm -rf ${SCRATCH}/wordcounts 43 | 44 | # Create Spark Python code to be worked 45 | cat > sparkscript.py < NOTE: When submitting a real STATA job will need to adjust the `#SBATCH` resource requests within the `submit.sh` before submitting it. 28 | -------------------------------------------------------------------------------- /stata/submit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --mem-per-cpu=1G 7 | #SBATCH --time=15:00 # 15 minutes 8 | #SBATCH --mail-type=ALL 9 | #SBATCH --job-name="just_a_test" 10 | #SBATCH -p short # This is the default partition, you can use any of the following; intel, batch, highmem, gpu 11 | 12 | # Load modules 13 | module load stata 14 | 15 | # do work 16 | stata < test.do 17 | 18 | -------------------------------------------------------------------------------- /stata/test.do: -------------------------------------------------------------------------------- 1 | gen f=43 2 | list f 3 | -------------------------------------------------------------------------------- /vasp/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --cpus-per-task=1 5 | #SBATCH --ntasks=16 6 | #SBATCH --mem=10G 7 | ##SBATCH --mail-user=email@address.com 8 | ##SBATCH --mail-type=ALL 9 | #SBATCH --time=7-00:00:00 10 | #SBATCH --job-name="vasp" 11 | #SBATCH -p epyc 12 | 13 | module -s load vasp/5.4.1_oneapi-2022.1.2.146 14 | export OMP_NUM_THREADS=1 15 | ulimit -s unlimited 16 | mpirun -n 16 vasp_std 17 | 18 | -------------------------------------------------------------------------------- /vnc/READMD.md: -------------------------------------------------------------------------------- 1 | # VNC 2 | 3 | Submit this job like so: 4 | 5 | ```bash 6 | sbatch vnc_job.sh 7 | ``` 8 | 9 | Then check to see if the job is running: 10 | 11 | ```bash 12 | squeue -u $USER 13 | ``` 14 | 15 | Once the job has started check the slurm log to see which port and compute node is used: 16 | 17 | ```bash 18 | cat vnc_job-*.out 19 | ``` 20 | 21 | -------------------------------------------------------------------------------- /vnc/vnc_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbatch -o 'vnc_job-%j.out' -p epyc -c 4 --time 2:00:00 --wrap='vncserver -fg' 4 | 5 | -------------------------------------------------------------------------------- /workshop/README.md: -------------------------------------------------------------------------------- 1 | # hpcc_workshop_challenge 2 | 3 | Try this small challenge to see how much you learned from the presentation! 4 | 5 | This sbatch script will load the `hpcc_workshop/2.0` module and generate an output file in the `output` folder. 6 | You will need to change permissions on this file in order to read it's contents. 7 | The secret message can be view by running `workshop_challenge ./output/secret_message.txt` 8 | -------------------------------------------------------------------------------- /workshop/SBATCH.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --mem=1G 7 | #SBATCH --time=00:05:00 8 | #SBATCH --mail-user=useremail@address.com 9 | #SBATCH --mail-type=ALL 10 | #SBATCH --job-name="workshop_test" 11 | #SBATCH -p short 12 | 13 | module purge 14 | module load hpcc_workshop/2.0 15 | module load miniconda3 16 | 17 | mkdir -p output 18 | rm -rf output/secret_message.txt 19 | create_output_file > output/secret_message.txt 20 | 21 | chmod 000 output/secret_message.txt 22 | 23 | --------------------------------------------------------------------------------