├── .gitignore ├── R ├── README.md ├── parallel_caret_example │ ├── parallelR_Caret.R │ └── parallelR_Caret.slrm ├── parallel_example │ ├── parallelR.R │ ├── parallelR.slrm │ ├── parallelR_compare.R │ └── parallelR_compare.slrm └── serial_example │ ├── serialR.R │ └── serialR.slrm ├── README.rst ├── gpu └── helloworld │ ├── README.md │ ├── helloworld.cu │ ├── helloworld.slrm │ └── pi.cu ├── io ├── README.md ├── analyze_iodata.py ├── create_iodata.py ├── create_iodata.sh └── iotest.sh ├── misc └── gpg-batch-script.sh ├── mpi ├── hello_mpi │ ├── README.md │ ├── hello_mpi.c │ └── hello_mpi.slrm └── hello_mpi_fortran │ ├── README.md │ ├── hello_mpi_fortran.f90 │ └── hello_mpi_fortran.slrm ├── ngrams ├── README.md ├── array.sh ├── combine-counts.py ├── count-multi.py ├── count.py └── generate.py ├── openmp └── hello_omp │ ├── README.md │ ├── hello_omp.c │ └── hello_omp.slrm ├── postgres ├── build_postgres_image.sh ├── run_postgres_example.sh ├── test_postgres_read.py └── test_postgres_write.py ├── python ├── multiprocessing │ ├── index.rst │ ├── multiprocessing_demo.py │ └── multiprocessing_demo.slrm ├── python_openmp │ ├── README.md │ ├── python_openmp.py │ └── python_openmp.slrm └── simple │ └── simple.py ├── scip ├── 2018 │ ├── README.md │ ├── high-level-languages-ex01 │ │ ├── testRK4.R │ │ ├── testRK4.m │ │ ├── testRK4.py │ │ └── testRK4.slrm │ └── high-level-languages-ex02 │ │ ├── fit_R.R │ │ ├── fit_R.slrm │ │ ├── fit_matlab.m │ │ ├── fit_matlab.slrm │ │ ├── fit_python.py │ │ └── fit_python.slrm └── README.md └── slurm ├── index.rst ├── memory-use.py ├── pi-gpu.cu ├── pi-mpi.c ├── pi-mpi.py ├── pi-mpi.slrm ├── pi-mpi4py.slrm ├── pi-openmp.c ├── pi-sharedmemory.slrm ├── pi.py └── pi_aggregation.py /.gitignore: -------------------------------------------------------------------------------- 1 | a.out 2 | *.out -------------------------------------------------------------------------------- /R/README.md: -------------------------------------------------------------------------------- 1 | # R in Triton 2 | 3 | These examples describe how to run R in the Triton cluster 4 | 5 | ## R serial example 6 | 7 | There are two examples. First runs a simple R script that trains a Caret model. Second runs the R-benchmark-25.R benchmark script. 8 | 9 | Caret model is from Caret examples by Tobias Kind: https://github.com/tobigithub/caret-machine-learning/blob/master/caret-cv/caret-cv-simple.R 10 | 11 | R-benchmark-25.R script from rbenchmarki repository: https://github.com/rbenchmark/benchmarks/tree/master/R-benchmark-25 12 | 13 | Usage: 14 | ```bash 15 | sbatch serialR.slrm 16 | sbatch serialR.slrm 17 | ``` 18 | 19 | ## R parallel example 20 | 21 | This example runs a non-vectorized code in serial and parallel with 1 to 4 cpus. 22 | 23 | Code is adapted from an excellent blog post in: http://www.parallelr.com/r-with-parallel-computing/ 24 | 25 | Raw code of the ExplicitParallel.R example is available in: https://github.com/PatricZhao/ParallelR/blob/master/PP_for_COS/ExplicitParallel.R 26 | 27 | Usage: 28 | ```shell 29 | sbatch parallelR.slrm 30 | ``` 31 | 32 | ## R parallel example using Caret 33 | 34 | This example runs a Caret training model in serial and parallel with 4 cpus. 35 | 36 | Code is based on a Caret example by Tobias Kind: https://github.com/tobigithub/caret-machine-learning/wiki/caret-ml-parallel 37 | 38 | Raw code is available in: https://github.com/tobigithub/caret-machine-learning/blob/master/caret-parallel/caret-parallel-train.R 39 | 40 | Usage: 41 | ```shell 42 | sbatch parallelR_Caret.slrm 43 | ``` 44 | -------------------------------------------------------------------------------- /R/parallel_caret_example/parallelR_Caret.R: -------------------------------------------------------------------------------- 1 | # Adapted from caret-parallel-train.R in Caret examples 2 | # to Triton by Simo Tuomisto, 2017 3 | 4 | # Original docstring: 5 | # Run multiple caret models in parallel using lapply 6 | # https://github.com/tobigithub/caret-machine-learning 7 | # Tobias Kind (2015) 8 | 9 | 10 | # Get the number of cores to use from command line or from SLURM_CPUS_PER_TASK 11 | library("optparse") 12 | 13 | option_list = list( 14 | make_option(c("-c", "--cores"), type="integer", default=NULL, 15 | help="Number of cpus to use", metavar="integer")); 16 | 17 | opt_parser = OptionParser(option_list=option_list); 18 | opt = parse_args(opt_parser); 19 | 20 | if (is.null(opt$cores)) { 21 | cores <- as.integer(Sys.getenv("SLURM_CPUS_PER_TASK")) 22 | if (is.na(cores)) { 23 | cores <- 1 24 | } 25 | } else { 26 | cores <- opt$cores 27 | } 28 | message("Number of cores used: ",cores) 29 | 30 | # ------------------------------------------------------------------------- 31 | # FIRST sequential code (not parallel one CPU core): 32 | # ------------------------------------------------------------------------- 33 | 34 | 35 | require(caret); data(BloodBrain); set.seed(123) 36 | 37 | 38 | message('Running Caret training in serial fashion') 39 | system.time(fit1 <- train(bbbDescr, logBBB, "knn")) 40 | fit1 41 | 42 | # ------------------------------------------------------------------------- 43 | # SECOND parallel register 4 cores (no worries if you only have 2) 44 | # train the caret model in parallel 45 | # ------------------------------------------------------------------------- 46 | 47 | message('Running Caret training in parallel fashion') 48 | library(doParallel) 49 | cl <- makeCluster(cores) 50 | registerDoParallel(cl) 51 | 52 | require(caret); data(BloodBrain); set.seed(123) 53 | 54 | system.time(fit1 <- train(bbbDescr, logBBB, "knn")) 55 | fit1 56 | 57 | stopCluster(cl) 58 | registerDoSEQ() 59 | 60 | ### END 61 | -------------------------------------------------------------------------------- /R/parallel_caret_example/parallelR_Caret.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -p short 3 | #SBATCH -t 00:20:00 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks=1 6 | #SBATCH --cpus-per-task=4 7 | #SBATCH --mem=8G 8 | #SBATCH -o parallelR_Caret.out 9 | module load R 10 | 11 | srun Rscript parallelR_Caret.R 12 | -------------------------------------------------------------------------------- /R/parallel_example/parallelR.R: -------------------------------------------------------------------------------- 1 | 2 | # This code is part of the ParallelR blog ExplicitParallel example. 3 | # Adapted to work in Triton by Simo Tuomisto, 2017 4 | # See the blog post in http://www.parallelr.com/r-with-parallel-computing/ 5 | # Blog's examples are available in GitHub: https://github.com/patriczhao/ParallelR 6 | 7 | # Original author docstring: 8 | # Examples for the R and Parallel Computing blog in COS website (cos.name) 9 | # Author: Peng Zhao, 8/30/2016 10 | 11 | # Get the number of cores to use from command line or from SLURM_CPUS_PER_TASK 12 | library("optparse") 13 | 14 | option_list = list( 15 | make_option(c("-c", "--cores"), type="integer", default=NULL, 16 | help="Number of cpus to use", metavar="integer")); 17 | 18 | opt_parser = OptionParser(option_list=option_list); 19 | opt = parse_args(opt_parser); 20 | 21 | if (is.null(opt$cores)) { 22 | cores <- as.integer(Sys.getenv("SLURM_CPUS_PER_TASK")) 23 | if (is.na(cores)) { 24 | cores <- 1 25 | } 26 | } else { 27 | cores <- opt$cores 28 | } 29 | message("Number of cores used: ",cores) 30 | 31 | 32 | # Generate data 33 | message("Generating data") 34 | len <- 1e6 35 | a <- runif(len, -10, 10) 36 | a[sample(len, 100,replace=TRUE)] <- 0 37 | 38 | b <- runif(len, -10, 10) 39 | c <- runif(len, -10, 10) 40 | 41 | # Not vectorized function 42 | solve.quad.eq <- function(a, b, c) 43 | { 44 | # Not validate eqution: a and b are almost ZERO 45 | if(abs(a) < 1e-8 && abs(b) < 1e-8) return(c(NA, NA) ) 46 | 47 | # Not quad equation 48 | if(abs(a) < 1e-8 && abs(b) > 1e-8) return(c(-c/b, NA)) 49 | 50 | # No Solution 51 | if(b*b - 4*a*c < 0) return(c(NA,NA)) 52 | 53 | # Return solutions 54 | x.delta <- sqrt(b*b - 4*a*c) 55 | x1 <- (-b + x.delta)/(2*a) 56 | x2 <- (-b - x.delta)/(2*a) 57 | 58 | return(c(x1, x2)) 59 | } 60 | 61 | ############################################################################################# 62 | # *apple style 63 | ############################################################################################## 64 | # serial code 65 | message("Running lapply") 66 | system.time( 67 | res1.s <- lapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])}) 68 | ) 69 | 70 | # parallel 71 | message("Running mcapply with parallel-package") 72 | library(parallel) 73 | # multicores on Linux 74 | system.time( 75 | res1.p <- mclapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])}, mc.cores = cores) 76 | ) 77 | 78 | 79 | # cluster 80 | message("Running Cluster parLapply with parallel-package") 81 | cl <- makeCluster(cores) 82 | clusterExport(cl, c('solve.quad.eq', 'a', 'b', 'c')) 83 | system.time( 84 | res1.p <- parLapply(cl, 1:len, function(x) { solve.quad.eq(a[x], b[x], c[x]) }) 85 | ) 86 | stopCluster(cl) 87 | 88 | 89 | ########################################################################################## 90 | # For style 91 | ########################################################################################### 92 | # serial code 93 | message("Running serial for-loop") 94 | res2.s <- matrix(0, nrow=len, ncol = 2) 95 | system.time( 96 | for(i in 1:len) { 97 | res2.s[i,] <- solve.quad.eq(a[i], b[i], c[i]) 98 | } 99 | ) 100 | 101 | # foreach 102 | library(foreach) 103 | library(doParallel) 104 | 105 | # Real physical cores in my computer 106 | cl <- makeCluster(cores) 107 | registerDoParallel(cl, cores=cores) 108 | 109 | # clusterSplit are very convience to split data but it takes lots of extra memory 110 | # chunks <- clusterSplit(cl, 1:len) 111 | 112 | # split data by ourselves 113 | chunk.size <- len/cores 114 | 115 | message("Running parallel for loop with foreach- and doParallel-packages") 116 | system.time( 117 | res2.p <- foreach(i=1:cores, .combine='rbind') %dopar% 118 | { # local data for results 119 | res <- matrix(0, nrow=chunk.size, ncol=2) 120 | for(x in ((i-1)*chunk.size+1):(i*chunk.size)) { 121 | res[x - (i-1)*chunk.size,] <- solve.quad.eq(a[x], b[x], c[x]) 122 | } 123 | # return local results 124 | res 125 | } 126 | ) 127 | 128 | stopImplicitCluster() 129 | stopCluster(cl) 130 | -------------------------------------------------------------------------------- /R/parallel_example/parallelR.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -p short 3 | #SBATCH -t 00:20:00 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks=1 6 | #SBATCH --cpus-per-task=4 7 | #SBATCH --mem=3G 8 | #SBATCH -o parallelR.out 9 | 10 | module load R 11 | 12 | echo 'Running parallel R example:' 13 | 14 | srun Rscript parallelR.R 15 | -------------------------------------------------------------------------------- /R/parallel_example/parallelR_compare.R: -------------------------------------------------------------------------------- 1 | 2 | # This code is part of the ParallelR blog ExplicitParallel example. 3 | # Adapted to work in Triton by Simo Tuomisto, 2017 4 | # See the blog post in http://www.parallelr.com/r-with-parallel-computing/ 5 | # Blog's examples are available in GitHub: https://github.com/patriczhao/ParallelR 6 | 7 | # Original author docstring: 8 | # Examples for the R and Parallel Computing blog in COS website (cos.name) 9 | # Author: Peng Zhao, 8/30/2016 10 | 11 | # Get the number of cores to use from command line or from SLURM_CPUS_PER_TASK 12 | library("optparse") 13 | 14 | option_list = list( 15 | make_option(c("-c", "--cores"), type="integer", default=NULL, 16 | help="Number of cpus to use", metavar="integer")); 17 | 18 | opt_parser = OptionParser(option_list=option_list); 19 | opt = parse_args(opt_parser); 20 | 21 | if (is.null(opt$cores)) { 22 | cores <- as.integer(Sys.getenv("SLURM_CPUS_PER_TASK")) 23 | if (is.na(cores)) { 24 | cores <- 1 25 | } 26 | } else { 27 | cores <- opt$cores 28 | } 29 | message('Running benchmark with ',cores,' processes') 30 | 31 | # Generate data 32 | message("Generating data") 33 | len <- 2e6 34 | a <- runif(len, -10, 10) 35 | a[sample(len, 100,replace=TRUE)] <- 0 36 | 37 | b <- runif(len, -10, 10) 38 | c <- runif(len, -10, 10) 39 | 40 | # Not vectorized function 41 | solve.quad.eq <- function(a, b, c) 42 | { 43 | # Not validate eqution: a and b are almost ZERO 44 | if(abs(a) < 1e-8 && abs(b) < 1e-8) return(c(NA, NA) ) 45 | 46 | # Not quad equation 47 | if(abs(a) < 1e-8 && abs(b) > 1e-8) return(c(-c/b, NA)) 48 | 49 | # No Solution 50 | if(b*b - 4*a*c < 0) return(c(NA,NA)) 51 | 52 | # Return solutions 53 | x.delta <- sqrt(b*b - 4*a*c) 54 | x1 <- (-b + x.delta)/(2*a) 55 | x2 <- (-b - x.delta)/(2*a) 56 | 57 | return(c(x1, x2)) 58 | } 59 | 60 | ############################################################################################# 61 | # *apple style 62 | ############################################################################################## 63 | # serial code 64 | 65 | library(rbenchmark) 66 | 67 | benchmark( 68 | 'lapply' = { 69 | res1.s <- lapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])}) 70 | }, 71 | # parallel 72 | # multicores on Linux 73 | 'mcapply' = { 74 | library(parallel) 75 | res1.p <- mclapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])}, mc.cores = cores) 76 | }, 77 | 'parLapply' = { 78 | library(parallel) 79 | cl <- makeCluster(cores) 80 | clusterExport(cl, c('solve.quad.eq', 'a', 'b', 'c')) 81 | res1.p <- parLapply(cl, 1:len, function(x) { solve.quad.eq(a[x], b[x], c[x]) }) 82 | stopCluster(cl) 83 | }, 84 | 'for' = { 85 | res2.s <- matrix(0, nrow=len, ncol = 2) 86 | for(i in 1:len) { 87 | res2.s[i,] <- solve.quad.eq(a[i], b[i], c[i]) 88 | } 89 | }, 90 | 'foreach/dopar' = { 91 | library(foreach) 92 | library(doParallel) 93 | cl <- makeCluster(cores) 94 | registerDoParallel(cl, cores=cores) 95 | chunk.size <- len/cores 96 | res2.p <- foreach(i=1:cores, .combine='rbind') %dopar% 97 | { # local data for results 98 | res <- matrix(0, nrow=chunk.size, ncol=2) 99 | for(x in ((i-1)*chunk.size+1):(i*chunk.size)) { 100 | res[x - (i-1)*chunk.size,] <- solve.quad.eq(a[x], b[x], c[x]) 101 | } 102 | # return local results 103 | res 104 | } 105 | stopImplicitCluster() 106 | stopCluster(cl) 107 | }, 108 | replications=1, 109 | columns = c("test", "elapsed", "relative", "user.self", "sys.self") 110 | ) 111 | 112 | -------------------------------------------------------------------------------- /R/parallel_example/parallelR_compare.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -p short 3 | #SBATCH -t 00:20:00 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks=1 6 | #SBATCH --cpus-per-task=8 7 | #SBATCH --mem=16G 8 | #SBATCH -o parallelR_compare.out 9 | 10 | module restore R_eb 11 | 12 | echo 'Running parallel R comparison:' 13 | 14 | for cores in 2 4 8 ; do 15 | srun Rscript parallelR_compare.R -c $cores 16 | done 17 | -------------------------------------------------------------------------------- /R/serial_example/serialR.R: -------------------------------------------------------------------------------- 1 | # Run simple cross-validation method with caret and knn 2 | # https://github.com/tobigithub/caret-machine-learning 3 | # Tobias Kind (2015) 4 | 5 | # Single example, no cross-validation 6 | require(caret); data(BloodBrain); set.seed(123); 7 | fit1 <- train(bbbDescr, logBBB, "knn"); fit1 8 | 9 | # cross-validation example with method boot 10 | require(caret); data(BloodBrain); set.seed(123); 11 | tc <- trainControl(method="boot") 12 | fit1 <- train(bbbDescr, logBBB, trControl=tc, method="knn"); fit1 13 | 14 | 15 | ### END 16 | -------------------------------------------------------------------------------- /R/serial_example/serialR.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -p short 3 | #SBATCH -t 00:20:00 4 | #SBATCH --ntasks=1 5 | #SBATCH --mem=3G 6 | #SBATCH -o serialR.out 7 | 8 | 9 | module load R 10 | 11 | echo 'Running a simple serial R example:' 12 | 13 | srun Rscript serialR.R 14 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Triton Examples 2 | =============== 3 | 4 | .. warning:: This section is under development. 5 | 6 | This repository contains examples scripts that can be run in Aalto 7 | University's Triton-cluster, but it will be useful to others as well. 8 | 9 | .. toctree:: 10 | 11 | slurm/index 12 | 13 | * ``gpu/``: GPU usage, compiling CUDA directly. 14 | * ``openmp/``: compiling and running OpenMP* ``python/``: Basic Python scripts 15 | * ``openmpi/``: compiling and running MPI 16 | * ``R/``: Basic R scripts 17 | * ``scip/``: Material related to *Scientific Computing In Practice* lecture series. 18 | * ``slurm/``: Basic submit scripts and programs 19 | 20 | 21 | This repository can be found at 22 | https://github.com/AaltoSciComp/hpc-examples and embedded into 23 | https://scicomp.aalto.fi/ . 24 | -------------------------------------------------------------------------------- /gpu/helloworld/README.md: -------------------------------------------------------------------------------- 1 | # gpu/helloworld 2 | 3 | For up to date instructions, see [SciComp GPU page](https://scicomp.aalto.fi/triton/tut/gpu/). 4 | 5 | Running the example in the gpu queue: 6 | ```sh 7 | sbatch helloworld.slrm 8 | ``` 9 | -------------------------------------------------------------------------------- /gpu/helloworld/helloworld.cu: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | 3 | __global__ void cuda_hello(int* a){ 4 | // blockIdx has values between 0 and 4 5 | printf("Hello World from GPU a[%d]=%d \n", blockIdx.x, a[blockIdx.x]); 6 | } 7 | 8 | int main(void) { 9 | int* d_a; 10 | 11 | // Allocates an array of 5 integers 12 | cudaMalloc(&d_a, 5*sizeof(int)); 13 | 14 | // Runs 5 instances of kernel cuda_hello in parallel 15 | cuda_hello<<<5, 1>>>(d_a); 16 | 17 | // This is needed for the printf in the kernel to display 18 | cudaDeviceSynchronize(); 19 | 20 | printf("Hello from outside GPU\n"); 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /gpu/helloworld/helloworld.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=00:05:00 3 | #SBATCH --job-name=helloworld 4 | #SBATCH --mem-per-cpu=500M 5 | #SBATCH --cpus-per-task=1 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --output=helloworld.out 8 | 9 | module load cuda 10 | nvcc helloworld.cu -o helloworld 11 | ./helloworld 12 | -------------------------------------------------------------------------------- /gpu/helloworld/pi.cu: -------------------------------------------------------------------------------- 1 | // Using CUDA device to calculate pi 2 | #include 3 | #include 4 | 5 | #define NBIN 10000000 // Number of bins 6 | #define NUM_BLOCK 30 // Number of thread blocks 7 | #define NUM_THREAD 8 // Number of threads per block 8 | int tid; 9 | float pi = 0; 10 | 11 | // Kernel that executes on the CUDA device 12 | __global__ void cal_pi(float *sum, int nbin, float step, int nthreads, int nblocks) { 13 | int i; 14 | float x; 15 | int idx = blockIdx.x*blockDim.x+threadIdx.x; // Sequential thread index across the blocks 16 | for (i=idx; i< nbin; i+=nthreads*nblocks) { 17 | x = (i+0.5)*step; 18 | sum[idx] += 4.0/(1.0+x*x); 19 | } 20 | } 21 | 22 | // Main routine that executes on the host 23 | int main(void) { 24 | dim3 dimGrid(NUM_BLOCK,1,1); // Grid dimensions 25 | dim3 dimBlock(NUM_THREAD,1,1); // Block dimensions 26 | float *sumHost, *sumDev; // Pointer to host & device arrays 27 | 28 | float step = 1.0/NBIN; // Step size 29 | size_t size = NUM_BLOCK*NUM_THREAD*sizeof(float); //Array memory size 30 | sumHost = (float *)malloc(size); // Allocate array on host 31 | cudaMalloc((void **) &sumDev, size); // Allocate array on device 32 | // Initialize array in device to 0 33 | cudaMemset(sumDev, 0, size); 34 | // Do calculation on device 35 | cal_pi <<>> (sumDev, NBIN, step, NUM_THREAD, NUM_BLOCK); // call CUDA kernel 36 | // Retrieve result from device and store it in host array 37 | cudaMemcpy(sumHost, sumDev, size, cudaMemcpyDeviceToHost); 38 | for(tid=0; tid 11 | #include 12 | 13 | int main(int argc, char** argv) { 14 | MPI_Init(NULL, NULL); // initialize the MPI 15 | int world_size; 16 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); // number of processes 17 | int world_rank; 18 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); // rank of the process 19 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 20 | int name_len; 21 | MPI_Get_processor_name(processor_name, &name_len); // processor name 22 | printf("Hello world from processor %s, rank %d" 23 | " out of %d processors\n", processor_name, world_rank, world_size); 24 | MPI_Finalize(); // finalize the MPI 25 | } 26 | -------------------------------------------------------------------------------- /mpi/hello_mpi/hello_mpi.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --time=00:05:00 3 | #SBATCH --mem-per-cpu=500M 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks-per-node=4 6 | #SBATCH --output=hello_mpi.out 7 | 8 | module load gcc/11.3.0 9 | module load openmpi/4.1.5 10 | 11 | mpicc -o hello_mpi hello_mpi.c 12 | 13 | srun hello_mpi 14 | -------------------------------------------------------------------------------- /mpi/hello_mpi_fortran/README.md: -------------------------------------------------------------------------------- 1 | # mpi/hello_mpi 2 | 3 | For up to date instructions, see 4 | [SciComp page on parallel computing](https://scicomp.aalto.fi/triton/tut/parallel/). 5 | 6 | Running the example in the queue: 7 | ```sh 8 | sbatch hello_mpi_fortran.slrm 9 | ``` 10 | -------------------------------------------------------------------------------- /mpi/hello_mpi_fortran/hello_mpi_fortran.f90: -------------------------------------------------------------------------------- 1 | ! Hello World MPI 2 | ! 3 | ! Compile on Triton with: 4 | ! 5 | ! module load gcc 6 | ! module load openmpi 7 | ! mpifort hello_mpi_fortran.f90 -o hello_mpi_fortran 8 | ! 9 | ! 10 | ! Simo Tuomisto, 2021 11 | ! 12 | 13 | program hello 14 | include 'mpif.h' 15 | integer world_size, rank, ierror, tag, status(MPI_STATUS_SIZE) 16 | 17 | call MPI_INIT(ierror) ! Initialize the MPI 18 | call MPI_COMM_SIZE(MPI_COMM_WORLD, world_size, ierror) ! Number of processes 19 | call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierror) ! Rank of the process 20 | print *, 'Hello world from processor ', rank, ' out of ', & 21 | world_size , ' processors' 22 | call MPI_FINALIZE(ierror) ! Finalize the MPI 23 | end 24 | -------------------------------------------------------------------------------- /mpi/hello_mpi_fortran/hello_mpi_fortran.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --time=00:05:00 3 | #SBATCH --mem-per-cpu=500M 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks-per-node=4 6 | #SBATCH --output=hello_mpi_fortran.out 7 | 8 | module load gcc/11.3.0 9 | module load openmpi/4.1.5 10 | 11 | mpifort hello_mpi_fortran.f90 -o hello_mpi_fortran 12 | 13 | srun hello_mpi_fortran 14 | -------------------------------------------------------------------------------- /ngrams/README.md: -------------------------------------------------------------------------------- 1 | # ngram calculation HPC sample program 2 | 3 | These are data-based example scripts for a HPC cluster. They are made 4 | quickly and designed to be easy to understand (though it's not 5 | intended that people look at the code) and show interesting problems 6 | you might face when using a cluster. 7 | 8 | For the most part, see the help text of the programs for how to use 9 | them. 10 | 11 | https://en.wikipedia.org/wiki/N-gram 12 | 13 | 14 | 15 | ## Data 16 | 17 | Any text data will work, but this especially uses public-domain sample 18 | data from Project Gutenberg: 19 | 20 | - Original: https://zenodo.org/records/5783256 21 | - Reprocessed, first 100 books: https://users.aalto.fi/~darstr1/public/Gutenberg-Fiction-first100.zip 22 | - Reprocessed, first 1000 books: https://users.aalto.fi/~darstr1/public/Gutenberg-Fiction-first1000.zip 23 | 24 | On the Triton cluster, these are available in 25 | `/scratch/shareddata/teaching/`. 26 | 27 | A unique feature is that it can read `.txt` files from zipfiles 28 | without needing to decompress the zipfile. 29 | 30 | 31 | 32 | ## count.py and count-multi.py 33 | 34 | Reads in text files and outputs ngrams found within them. count-multi 35 | is a version that uses multiprocessing with the --threads option 36 | (though it's processes, not threads). 37 | 38 | It can operate with characetrs (the default) or with words using 39 | `--words`. Word mode uses much more memory. 40 | 41 | Example: 42 | 43 | ```console 44 | $ python3 ngrams/count.py -n 2 --stop 10 /scratch/shareddata/teaching/Gutenberg-Fiction.zip 45 | Loaded 18738 files from /scratch/shareddata/teaching/Gutenberg-Fiction.zip 46 | 100731 ["e", " "] 47 | 88982 [" ", "t"] 48 | 82653 ["h", "e"] 49 | 82206 ["t", "h"] 50 | ``` 51 | 52 | ```console 53 | $ python3 ngrams/count.py -n 2 /scratch/shareddata/teaching/Gutenberg-Fiction.zip -o 2grams-all.out 54 | Loaded 18738 files from /scratch/shareddata/teaching/Gutenberg-Fiction.zip 55 | ``` 56 | 57 | 58 | 59 | ## combine-counts.py 60 | 61 | Reads multiple count files and outputs one count file combining them. 62 | 63 | Example: 64 | 65 | ```console 66 | $ python3 ngrams/combine-counts.py array-2grams_*.out -o 2grams-all.out 67 | ``` 68 | 69 | 70 | 71 | ## generate.py 72 | 73 | Uses a count file to generate text based on predictions using the 74 | ngrams (for n>=2). This doesn't work well and is extremely 75 | inefficient, but probably everyone can understand what is's doing if 76 | you make a analogy with how LLMs predict the next word. 77 | 78 | Example: 79 | 80 | ```consele 81 | $ python3 ngrams/generate.py 2grams-all.out 82 | ``` 83 | 84 | 85 | 86 | ## Analysis 87 | 88 | - Increasing ngram size increases the memory use. Ngrams=1 is only 89 | charcter/word frequencies and thus is probably CPU and I/O bound. 90 | 91 | - This isn't exactly CPU bound but it does use a lot of CPU. 92 | 93 | - It works very well with array jobs and has built-in options to do that. 94 | 95 | - Speed of reading data does matter. Reading directly from the 96 | Zipfile (via Python - no extraction) gives some speedup, as long as 97 | you don't have to open the zipfile every time. 98 | 99 | - The multiprocessing version isn't much faster, since it spends so 100 | much time moving around and accumulating the memory internally. 101 | 102 | - The reading/writing of the count files takes a large amount of the 103 | time and shows the importance of good data formats. 104 | 105 | - The MaxRSS indication in the multiprocessing version may be wrong 106 | (the syscall for children claims to only return MaxRSS for the child 107 | with the most memory usage, and may always be zero). 108 | 109 | - (and more) 110 | -------------------------------------------------------------------------------- /ngrams/array.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --mem=50G 3 | #SBATCH --array=0-20 4 | #SBATCH --time=0-6 5 | #SBATCH --job-name=words-array 6 | 7 | mkdir -p /scratch/work/$USER/ngrams-output/ 8 | 9 | python3 ngrams/count.py /scratch/work/darstr1/data/Gutenberg-Fiction.zip -n 3 --words --start=$SLURM_ARRAY_TASK_ID --step=20 -o /scratch/work/$USER/ngrams-output/ngrams3-words-all-array_$SLURM_ARRAY_TASK_ID.out 10 | 11 | # Combine 12 | #python3 ngrams/combine-counts.py /scratch/work/$USER/ngrams-output/ngrams3-words-all-array_* -o /scratch/work/$USER/ngrams-output/ngrams3-words-all.out 13 | -------------------------------------------------------------------------------- /ngrams/combine-counts.py: -------------------------------------------------------------------------------- 1 | """Combine counts files 2 | 3 | Reads in multiple files and writes out a new count file. 4 | 5 | """ 6 | 7 | import argparse 8 | import collections 9 | import json 10 | import sys 11 | 12 | import argparse 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('countfile', nargs='+', help="Files with counts") 15 | parser.add_argument('--verbose', '-v', action='store_true') 16 | parser.add_argument('--output', '-o',) 17 | args = parser.parse_args() 18 | 19 | ngrams_total = collections.Counter() 20 | for file_ in args.countfile: 21 | if args.verbose: 22 | print(file_, file=sys.stderr) 23 | for line in open(file_): 24 | if not line.strip(): 25 | continue 26 | count, data = line.split(' ', 1) 27 | count = int(count) 28 | data = json.loads(data) 29 | ngrams_total[tuple(data)] += count 30 | 31 | # Save output to file if requested, otherwise print to stdout 32 | output = sys.stdout 33 | if args.output: 34 | if args.verbose: 35 | print('Writing to', args.output, file=sys.stderr) 36 | output = open(args.output, 'w') 37 | # Print the output 38 | for ngram, count in ngrams_total.most_common(): 39 | print(count, json.dumps(ngram), file=output) 40 | -------------------------------------------------------------------------------- /ngrams/count-multi.py: -------------------------------------------------------------------------------- 1 | """Count ngrams from input text files (multiprocessing version). 2 | 3 | This reads in text files and computes n-grams based on words or 4 | characters. It's designed to be a HPC example, not for serious use. 5 | 6 | The multiprocessing version is slower than the non-multiprocessing 7 | version (this is something for you to think about). 8 | 9 | n-grams are tuples such as ("the", "book", "is") or ("t", "h", "e"). It 10 | writes output to standard output, or the file given. Output format a 11 | plain-text file with: 12 | 13 | COUNT ["word1", "word2"] 14 | 15 | """ 16 | from __future__ import print_function 17 | 18 | import collections 19 | import io 20 | import itertools 21 | import json 22 | import multiprocessing 23 | import os 24 | import re 25 | import resource 26 | import sys 27 | import time 28 | import zipfile 29 | 30 | 31 | 32 | def nwise(iterable, n): 33 | """Like itertools.pairwise but for arbitrary n. 34 | 35 | Creates groups of n: 36 | 37 | 1: [a, b, c, d, e, f] -> [a, b, c, d, e, f] 38 | 2: [a, b, c, d, e, f] -> [ab, bc, cd, de, ef] 39 | 3: [a, b, c, d, e, f] -> [abc, bcd, cde, def] 40 | """ 41 | if n <= 0: 42 | raise ValueError(f"n must be a positive integer (was {n})") 43 | iterator = iter(iterable) 44 | try: 45 | ngram = tuple(next(iterator) for _ in range(n)) 46 | # RuntimeError raised within the above when it runs out of data. 47 | # Does this mask other errors though? 48 | except RuntimeError: 49 | return 50 | yield ngram 51 | for next_ in iterator: 52 | ngram = ngram[1:] + (next_, ) 53 | yield ngram 54 | 55 | def _openzip(zip, name): 56 | return lambda: io.TextIOWrapper(z.open(name), 'utf8') 57 | 58 | def opendir(dir_): 59 | """Open either a zipfile (*.txt within it), directory/*.txt, or an individual file. 60 | 61 | Returns a list of the files within the zipfile or directory, or the 62 | filename if a single filename is given. The list contains 63 | (filename, function_that_opens_the _file), so that the process_file 64 | function can handle both zipfiles and normal files the same way. 65 | """ 66 | # Zipfiles 67 | if dir_.endswith('.zip'): 68 | z = zipfile.ZipFile(dir_) 69 | file_list = [ #(z, name) 70 | f'zip::{dir_}::{name}' 71 | for name in z.namelist() 72 | if name.endswith('.txt') ] 73 | print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr) 74 | # Directories 75 | elif os.path.isdir(dir_): 76 | file_list = [ #(name, lambda name=name: open(name, 'r')) 77 | os.path.join(dir_, name) 78 | for name in os.listdir(dir_) 79 | if name.endswith('.txt') ] 80 | print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr) 81 | # regular files 82 | else: 83 | file_list = [ dir_ ] 84 | return file_list 85 | 86 | ZIPFILE_CACHE = { } 87 | def process_file(x): 88 | """Return ngrams from a given filename. 89 | 90 | filename: filename of this file. Only used for printing, since it 91 | might be a relative path inside of a zipfile. 92 | 93 | data: a function which returns the file data. This exists so that 94 | this function doesn't need to care if it's reading from a zipfile or 95 | normal file. 96 | 97 | args: arguments from the argument parser. 98 | """ 99 | filename, args = x 100 | if args.verbose: 101 | print(filename, file=sys.stderr) 102 | # Open the zipfile and cache the open zipfile object. 103 | if filename.startswith('zip::'): 104 | _, archive, element = filename.split('::', maxsplit=2) 105 | if archive in ZIPFILE_CACHE: 106 | z = ZIPFILE_CACHE[archive] 107 | else: 108 | #z = zipfile.ZipFile(archive) 109 | z = ZIPFILE_CACHE[archive] = zipfile.ZipFile(archive) 110 | data = z.open(element).read().decode() 111 | #if isinstance(filename, tuple): 112 | # name, data = filename 113 | # if isinstance(name, zipfile.ZipFile): 114 | # z, name = name, data 115 | # data = z.open(name).read() 116 | # #z = zipfile.ZipFile(archive) 117 | # data = data.decode() 118 | else: 119 | data = open(filename, 'r').read() 120 | data = data.lower() 121 | ngrams = collections.Counter() # Making a new Counter here may be inefficient. 122 | # Split by words if needed. Use a regular expression for this. 123 | if args.words: 124 | data = (m[0] for m in re.finditer(r'[a-zA-Z_-]+', data)) 125 | # For every ngram, increment its count 126 | for ngram in nwise(data, args.n): 127 | ngrams[ngram] += 1 128 | 129 | return ngrams 130 | 131 | 132 | 133 | def arg_int_auto(x): 134 | """Argparse argument type helper. 135 | 136 | Returns an integer, or the value from the environment variable 137 | SLURM_CPUS_PER_TASK if the value is 'auto'. 138 | """ 139 | if x == 'auto': 140 | return int(os.environ['SLURM_CPUS_PER_TASK']) 141 | else: 142 | return int(x) 143 | 144 | 145 | 146 | def main(): 147 | start = time.time() 148 | import argparse 149 | parser = argparse.ArgumentParser() 150 | parser.add_argument('input', nargs='+', help="Inputs, should be text files. Can be a zipfile or directory (in which case, files ending in .txt inside the zip/directory will be used), or directly filenames (in which case the file will be used)") 151 | parser.add_argument('-n', type=int, default=1, help="Size of n-grams. -n 1 is simple frequencies.") 152 | parser.add_argument('--output', '-o', help="Write output to this filename, otherwise print output to stdout.") 153 | parser.add_argument('--threads', '-t', type=arg_int_auto, default=1, help="Number of threads to usez for the analysis. If 'auto', then detect from the SLURM_CPUS_PER_TASK environment variable. If an integer 1 or greater, use multiprocessing with that many processes. If zero, do not use multiprocessing at all. Default: 1") 154 | parser.add_argument('--words', action='store_true', help="If given, use word-based ngram mode instead of character-based. Word-based mode only uses ascii letters and removes most other punctuation and special characters.") 155 | parser.add_argument('--verbose', '-v', action='store_true', help="Print more to stderr, for example the filenames that are being read.") 156 | group_selection = parser.add_argument_group("selection", "Selecting files. After a list of all files is created, apply these slice operations like you do to a list: list[start:stop:step]. Files are not sorted and used in the order given, order in the zipfile, or order the natural (unsorted) order the operating system returns them from the directory list. The directory order is unpredictable but usually the same if the directory isn't touched.") 157 | group_selection.add_argument('--start', type=int, default=None, help="Select starting file with a Python slice operation 'file_list[start:]'. The start is included, and counts go from 0. For examaple, --start=1 skips the first (0th) file.") 158 | group_selection.add_argument('--stop', type=int, default=None, help="Select stop file with file_list[:stop]. The stop is NOT included, and the counts go from zero. For example, to limit to the first 100 files (0, 1, ..., 99), use --stop=100 .") 159 | group_selection.add_argument('--step', type=int, default=None, help="Select every STEP file. For example, --start=0 --step=10 selects files 0, 10, 20, etc. and --start=1 --step=10 selects files 1, 11, 21, etc.") 160 | args = parser.parse_args() 161 | 162 | # Read the filelist from a zipfile, OR from a directory. Accumulate 163 | # a list of all files across all arguments. 164 | filelist = sum((opendir(input) for input in args.input), []) 165 | filelist = itertools.islice(filelist, args.start, args.stop, args.step) 166 | 167 | # Process every file and accumulate the counts. 168 | ngrams_total = collections.Counter() 169 | if args.threads is not None and args.threads != 0: 170 | print(f'Using multiprocessing.Pool with {args.threads} processes', file=sys.stderr) 171 | pool = multiprocessing.Pool(args.threads) 172 | for result in pool.imap_unordered(process_file, 173 | ( 174 | (name, args) 175 | #((name[1], name[0].open(name[1]).read()), args) if isinstance(name, tuple) else (name, args) 176 | for name in filelist), 177 | chunksize=10, 178 | ): 179 | ngrams_total.update(result) 180 | pool.close() 181 | pool.join() 182 | else: 183 | for filename in filelist: 184 | ngrams_total.update(process_file((filename, args))) 185 | 186 | # Save output to file if requested, otherwise print to stdout 187 | output = sys.stdout 188 | if args.output: 189 | output = open(args.output, 'w') 190 | for ngram, count in ngrams_total.most_common(): 191 | print(count, json.dumps(ngram), file=output) 192 | 193 | # Print the summary and performance information to stderr 194 | print(f'{args.n}-grams: {len(ngrams_total)}', file=sys.stderr) 195 | print(file=sys.stderr) 196 | rusage_s = resource.getrusage(resource.RUSAGE_SELF) 197 | rusage_c = resource.getrusage(resource.RUSAGE_CHILDREN) 198 | print(f'Walltime {time.time() - start:.2f} s', file=sys.stderr) 199 | print(f'User time: {rusage_s.ru_utime + rusage_c.ru_utime:.2f} s ({rusage_s.ru_utime:.2f} + {rusage_c.ru_utime:.2f})', file=sys.stderr) 200 | print(f'System time: {rusage_s.ru_stime + rusage_c.ru_stime:.2f} s ({rusage_s.ru_stime:.2f} + {rusage_c.ru_stime:.2f})', file=sys.stderr) 201 | print(f'MaxRSS: {(rusage_s.ru_maxrss + rusage_c.ru_maxrss)/2**20:.3f} GiB ({rusage_s.ru_maxrss/2**20:.3f} + {rusage_c.ru_maxrss/2**20:.3f})', file=sys.stderr) 202 | 203 | if __name__ == '__main__': 204 | main() 205 | -------------------------------------------------------------------------------- /ngrams/count.py: -------------------------------------------------------------------------------- 1 | """Count ngrams from input text files. 2 | 3 | This reads in text files and computes n-grams based on words or 4 | characters. It's designed to be a HPC example, not for serious use. 5 | 6 | n-grams are tuples such as ("the", "book", "is") or ("t", "h", "e"). It 7 | writes output to standard output, or the file given. Output format a 8 | plain-text file with: 9 | 10 | COUNT ["word1", "word2"] 11 | 12 | """ 13 | from __future__ import print_function 14 | 15 | import collections 16 | import io 17 | import itertools 18 | import json 19 | import multiprocessing 20 | import os 21 | import re 22 | import resource 23 | import sys 24 | import time 25 | import zipfile 26 | 27 | 28 | 29 | def nwise(iterable, n): 30 | """Like itertools.pairwise but for arbitrary n. 31 | 32 | Creates groups of n: 33 | 34 | 1: [a, b, c, d, e, f] -> [a, b, c, d, e, f] 35 | 2: [a, b, c, d, e, f] -> [ab, bc, cd, de, ef] 36 | 3: [a, b, c, d, e, f] -> [abc, bcd, cde, def] 37 | """ 38 | if n <= 0: 39 | raise ValueError(f"n must be a positive integer (was {n})") 40 | iterator = iter(iterable) 41 | try: 42 | ngram = tuple(next(iterator) for _ in range(n)) 43 | # RuntimeError raised within the above when it runs out of data. 44 | # Does this mask other errors though? 45 | except RuntimeError: 46 | return 47 | yield ngram 48 | for next_ in iterator: 49 | ngram = ngram[1:] + (next_, ) 50 | yield ngram 51 | 52 | 53 | 54 | def opendir(dir_): 55 | """Open either a zipfile (*.txt within it), directory/*.txt, or an individual file. 56 | 57 | Returns a list of the files within the zipfile or directory, or the 58 | filename if a single filename is given. The list contains 59 | (filename, function_that_opens_the _file), so that the process_file 60 | function can handle both zipfiles and normal files the same way. 61 | """ 62 | # Zipfiles 63 | if dir_.endswith('.zip'): 64 | z = zipfile.ZipFile(dir_) 65 | file_list = [ (name, lambda name=name: io.TextIOWrapper(z.open(name), 'utf8')) 66 | for name in z.namelist() 67 | if name.endswith('.txt') ] 68 | print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr) 69 | # Directories 70 | elif os.path.isdir(dir_): 71 | file_list = [ (name, lambda name=name, dir_=dir_: open(os.path.join(dir_, name), 'r')) 72 | for name in os.listdir(dir_) 73 | if name.endswith('.txt') ] 74 | print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr) 75 | # regular files 76 | else: 77 | file_list = [ dir_, lambda name=dir_: open(name, 'r') ] 78 | return file_list 79 | 80 | 81 | 82 | def process_file(filename, data, args): 83 | """Return ngrams from a given filename. 84 | 85 | filename: filename of this file. Only used for printing, since it 86 | might be a relative path inside of a zipfile. 87 | 88 | data: a function which returns the file data. This exists so that 89 | this function doesn't need to care if it's reading from a zipfile or 90 | normal file. 91 | 92 | args: arguments from the argument parser. 93 | """ 94 | if args.verbose: 95 | print(filename, file=sys.stderr) 96 | # Get our raw data from wherever it is 97 | data = data().read() 98 | data = data.lower() 99 | ngrams = collections.Counter() # Making a new Counter here may be inefficient. 100 | # Split by words if needed. Use a regular expression for this. 101 | if args.words: 102 | data = (m[0] for m in re.finditer(r'[a-zA-Z_-]+', data)) 103 | # For every ngram, increment its count 104 | for ngram in nwise(data, args.n): 105 | ngrams[ngram] += 1 106 | 107 | return ngrams 108 | 109 | 110 | 111 | def main(): 112 | start = time.time() 113 | import argparse 114 | parser = argparse.ArgumentParser() 115 | parser.add_argument('input', nargs='+', help="Inputs, should be text files. Can be a zipfile or directory (in which case, files ending in .txt inside the zip/directory will be used), or directly filenames (in which case the file will be used)") 116 | parser.add_argument('-n', type=int, default=1, help="Size of n-grams. -n 1 is simple frequencies.") 117 | parser.add_argument('--output', '-o', help="Write output to this filename, otherwise print output to stdout.") 118 | parser.add_argument('--words', action='store_true', help="If given, use word-based ngram mode instead of character-based. Word-based mode only uses ascii letters and removes most other punctuation and special characters.") 119 | parser.add_argument('--verbose', '-v', action='store_true', help="Print more to stderr, for example the filenames that are being read.") 120 | group_selection = parser.add_argument_group("selection", "Selecting files. After a list of all files is created, apply these slice operations like you do to a list: list[start:stop:step]. Files are not sorted and used in the order given, order in the zipfile, or order the natural (unsorted) order the operating system returns them from the directory list. The directory order is unpredictable but usually the same if the directory isn't touched.") 121 | group_selection.add_argument('--start', type=int, default=None, help="Select starting file with a Python slice operation 'file_list[start:]'. The start is included, and counts go from 0. For examaple, --start=1 skips the first (0th) file.") 122 | group_selection.add_argument('--stop', type=int, default=None, help="Select stop file with file_list[:stop]. The stop is NOT included, and the counts go from zero. For example, to limit to the first 100 files (0, 1, ..., 99), use --stop=100 .") 123 | group_selection.add_argument('--step', type=int, default=None, help="Select every STEP file. For example, --start=0 --step=10 selects files 0, 10, 20, etc. and --start=1 --step=10 selects files 1, 11, 21, etc.") 124 | args = parser.parse_args() 125 | 126 | # Read the filelist from a zipfile, OR from a directory. Accumulate 127 | # a list of all files across all arguments. 128 | filelist = sum((opendir(input) for input in args.input), []) 129 | filelist = itertools.islice(filelist, args.start, args.stop, args.step) 130 | 131 | # Process every file and accumulate the counts. 132 | ngrams_total = collections.Counter() 133 | for filename, data in filelist: 134 | ngrams_total.update(process_file(filename, data, args)) 135 | 136 | # Save output to file if requested, otherwise print to stdout 137 | output = sys.stdout 138 | if args.output: 139 | output = open(args.output, 'w') 140 | for ngram, count in ngrams_total.most_common(): 141 | print(count, json.dumps(ngram), file=output) 142 | 143 | # Print the summary and performance information to stderr 144 | print(f'{args.n}-grams: {len(ngrams_total)}', file=sys.stderr) 145 | print(file=sys.stderr) 146 | rusage_s = resource.getrusage(resource.RUSAGE_SELF) 147 | rusage_c = resource.getrusage(resource.RUSAGE_CHILDREN) 148 | print(f'Walltime {time.time() - start:.2f} s', file=sys.stderr) 149 | print(f'User time: {rusage_s.ru_utime + rusage_c.ru_utime:.2f} s ({rusage_s.ru_utime:.2f} + {rusage_c.ru_utime:.2f})', file=sys.stderr) 150 | print(f'System time: {rusage_s.ru_stime + rusage_c.ru_stime:.2f} s ({rusage_s.ru_stime:.2f} + {rusage_c.ru_stime:.2f})', file=sys.stderr) 151 | print(f'MaxRSS: {(rusage_s.ru_maxrss + rusage_c.ru_maxrss)/2**20:.3f} GiB ({rusage_s.ru_maxrss/2**20:.3f} + {rusage_c.ru_maxrss/2**20:.3f})', file=sys.stderr) 152 | 153 | if __name__ == '__main__': 154 | main() 155 | -------------------------------------------------------------------------------- /ngrams/generate.py: -------------------------------------------------------------------------------- 1 | """Use ngrams to create text. 2 | 3 | This uses ngrams to predict next words. It takes makes a mapping of 4 | (the first (n-1) parts of the ngrams) to (the last element of the 5 | ngram), and uses this to predict. It's not very good but shows a minimal use. 6 | 7 | """ 8 | 9 | import ast 10 | import argparse 11 | import collections 12 | import json 13 | import random 14 | import sys 15 | 16 | import argparse 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('countfile', nargs='+', help="Files with counts, as generated from count.py") 19 | #parser.add_argument('--output', '-o',) 20 | parser.add_argument('--words', action='store_true', help="Use 'words-mode' instead of character-mode.") 21 | parser.add_argument('--count', '-c', type=int, default=100, help="Generate this many followups.") 22 | parser.add_argument('--limit-in', type=int, help="Limit ", "Stop reading from the files in after this ngrams.") 23 | parser.add_argument('--count-threshold', type=int, help="Don't read in ngrams with fewer than this many occurrances. Used to ") 24 | parser.add_argument('--verbose', '-v', action='count', default=0) 25 | args = parser.parse_args() 26 | 27 | ngrams = collections.defaultdict(collections.Counter) 28 | for file_ in args.countfile: 29 | if args.verbose >= 1: 30 | print(f"Reading {file_}", file=sys.stderr) 31 | for line in open(file_): 32 | if not line.strip(): 33 | continue 34 | count, data = line.split(' ', 1) 35 | count = int(count) 36 | if args.count_threshold and count < args.count_threshold: 37 | break 38 | continue 39 | data = json.loads(data) 40 | ngrams[tuple(data[:-1])][data[-1]] += count 41 | if args.limit_in and len(ngrams) > args.limit_in: 42 | break 43 | if args.limit_in and len(ngrams) > args.limit_in: 44 | break 45 | 46 | # A random starting (n-1) gram. 47 | start = random.choice(list(ngrams)) 48 | if args.words: 49 | print(' '.join(start), end=' ') 50 | else: 51 | print(''.join(start), end='') 52 | 53 | 54 | for i in range(args.count): 55 | if start not in ngrams: 56 | print() 57 | print(f"can not continue from {start}") 58 | break 59 | elif args.verbose >= 2: 60 | print(f"{start} has {len(ngrams_next)} possibilities", file=sys.stderr) 61 | ngrams_next = ngrams[start] 62 | next_ = random.choices(population=list(ngrams_next.keys()), weights=ngrams_next.values()) 63 | next_ = next_[0] 64 | if args.words: 65 | print(next_, end=' ') 66 | else: 67 | print(next_, end='') 68 | start = start[1:] + (next_, ) 69 | print() 70 | -------------------------------------------------------------------------------- /openmp/hello_omp/README.md: -------------------------------------------------------------------------------- 1 | # openmp/hello_omp 2 | 3 | For up to date instructions, see 4 | [SciComp page on parallel computing](https://scicomp.aalto.fi/triton/tut/parallel/). 5 | 6 | Compiling the example with OpenMP: 7 | ```sh 8 | module load gcc/9.2.0 9 | gcc -fopenmp -O2 -g hello_omp.c -o hello_omp 10 | ``` 11 | 12 | Compiling the example without OpenMP: 13 | ```sh 14 | module load gcc/9.2.0 15 | gcc -O2 -g hello_omp.c -o hello_omp 16 | ``` 17 | 18 | Running the example in the queue: 19 | ```sh 20 | sbatch hello_omp.slrm 21 | ``` 22 | or 23 | ```sh 24 | module load gcc/9.2.0 25 | export OMP_PROC_BIND=true 26 | srun -c 2 ./hello_omp 27 | ``` 28 | -------------------------------------------------------------------------------- /openmp/hello_omp/hello_omp.c: -------------------------------------------------------------------------------- 1 | /* Hello World OpenMP 2 | * 3 | * Compile on Triton as: 4 | * gcc -fopenmp hello_omp.c -o hello_omp 5 | * 6 | * degtyai1, Wed, 28 May 2014 12:47:47 +0300 7 | * tuomiss1, Mon, 08 Jun 2020 8 | * 9 | */ 10 | 11 | #include 12 | #if defined(_OPENMP) 13 | #include 14 | #endif 15 | 16 | int main(void) { 17 | #if defined(_OPENMP) 18 | #pragma omp parallel 19 | printf("Hello, world from thread %d.\n", omp_get_thread_num()); 20 | #else 21 | printf("Hello, world.\n"); 22 | #endif 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /openmp/hello_omp/hello_omp.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --time=00:05:00 3 | #SBATCH --mem=500M 4 | #SBATCH --cpus-per-task=4 5 | #SBATCH --output=hello_omp.out 6 | 7 | module load gcc/9.2.0 8 | 9 | export OMP_PROC_BIND=true 10 | srun hello_omp 11 | -------------------------------------------------------------------------------- /postgres/build_postgres_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build a postgres image using singularity 4 | # 5 | 6 | export SINGULARITY_CACHEDIR=/tmp/$USER/singularity_cache 7 | mkdir -p $SINGULARITY_CACHEDIR 8 | 9 | singularity pull docker://library/postgres:latest 10 | -------------------------------------------------------------------------------- /postgres/run_postgres_example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --output=postgres_example.out 3 | #SBATCH --time=00:30:00 4 | #SBATCH --mem=2G 5 | 6 | # Quit if any errors occur 7 | 8 | set -e 9 | 10 | # Create directories for postgresql to store data 11 | 12 | mkdir -p var/{lib,run} 13 | 14 | # Run postgres in a singularity image, forward output to files, catch PID for process 15 | 16 | singularity run --env POSTGRES_PASSWORD=mysecretpassword --env LC_ALL=C --env PGPORT=5433 -B ${PWD}/var/lib:/var/lib/postgresql -B ${PWD}/var/run:/var/run postgres_latest.sif 2> postgres.err 1> postgres.out & 17 | POSTGRES_PID=$! 18 | 19 | # Give postgres few seconds to initialize 20 | 21 | sleep 5 22 | 23 | # Set up a trap so that postgres will be killed when job finishes 24 | 25 | trap "kill $POSTGRES_PID ; exit" TERM EXIT 26 | 27 | # Create test environment 28 | 29 | module load miniconda 30 | 31 | mamba create -n sqlalchemy_test -q -y python sqlalchemy psycopg2 32 | 33 | source activate sqlalchemy_test 34 | 35 | # Run test connection to postgresql 36 | 37 | echo 'Testing postgres writing:' 38 | 39 | python test_postgres_write.py 40 | 41 | echo 'Testing postgres reading:' 42 | 43 | python test_postgres_read.py 44 | 45 | # Remove test environment 46 | 47 | source deactivate 48 | 49 | mamba env remove -n sqlalchemy_test 50 | -------------------------------------------------------------------------------- /postgres/test_postgres_read.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Quick postgres read test based on https://docs.sqlalchemy.org/en/14/orm/quickstart.html 4 | # 5 | from sqlalchemy import Column 6 | from sqlalchemy import ForeignKey 7 | from sqlalchemy import Integer 8 | from sqlalchemy import String 9 | from sqlalchemy.orm import declarative_base 10 | from sqlalchemy.orm import relationship 11 | 12 | #### Define metadata for our tables 13 | 14 | Base = declarative_base() 15 | 16 | class User(Base): 17 | __tablename__ = "user_account" 18 | id = Column(Integer, primary_key=True) 19 | name = Column(String(30)) 20 | fullname = Column(String) 21 | addresses = relationship( 22 | "Address", back_populates="user", cascade="all, delete-orphan" 23 | ) 24 | def __repr__(self): 25 | return f"User(id={self.id!r}, name={self.name!r}, fullname={self.fullname!r})" 26 | 27 | class Address(Base): 28 | __tablename__ = "address" 29 | id = Column(Integer, primary_key=True) 30 | email_address = Column(String, nullable=False) 31 | user_id = Column(Integer, ForeignKey("user_account.id"), nullable=False) 32 | user = relationship("User", back_populates="addresses") 33 | def __repr__(self): 34 | return f"Address(id={self.id!r}, email_address={self.email_address!r})" 35 | 36 | # Set authentication parameters 37 | 38 | user = 'postgres' 39 | 40 | password = 'mysecretpassword' 41 | 42 | # Define connection engine 43 | 44 | from sqlalchemy import create_engine 45 | 46 | engine = create_engine(f"postgresql://{user}:{password}@localhost:5433/postgres", echo=True, future=True) 47 | 48 | # Read test data from database 49 | 50 | from sqlalchemy.orm import Session 51 | from sqlalchemy import select 52 | 53 | session = Session(engine) 54 | 55 | stmt = select(User).where(User.name.in_(["spongebob", "sandy"])) 56 | 57 | for user in session.scalars(stmt): 58 | print(user) 59 | -------------------------------------------------------------------------------- /postgres/test_postgres_write.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Quick postgres write test based on https://docs.sqlalchemy.org/en/14/orm/quickstart.html 4 | # 5 | from sqlalchemy import Column 6 | from sqlalchemy import ForeignKey 7 | from sqlalchemy import Integer 8 | from sqlalchemy import String 9 | from sqlalchemy.orm import declarative_base 10 | from sqlalchemy.orm import relationship 11 | 12 | #### Define metadata for our tables 13 | 14 | Base = declarative_base() 15 | 16 | class User(Base): 17 | __tablename__ = "user_account" 18 | id = Column(Integer, primary_key=True) 19 | name = Column(String(30)) 20 | fullname = Column(String) 21 | addresses = relationship( 22 | "Address", back_populates="user", cascade="all, delete-orphan" 23 | ) 24 | def __repr__(self): 25 | return f"User(id={self.id!r}, name={self.name!r}, fullname={self.fullname!r})" 26 | 27 | class Address(Base): 28 | __tablename__ = "address" 29 | id = Column(Integer, primary_key=True) 30 | email_address = Column(String, nullable=False) 31 | user_id = Column(Integer, ForeignKey("user_account.id"), nullable=False) 32 | user = relationship("User", back_populates="addresses") 33 | def __repr__(self): 34 | return f"Address(id={self.id!r}, email_address={self.email_address!r})" 35 | 36 | # Set authentication parameters 37 | 38 | user = 'postgres' 39 | 40 | password = 'mysecretpassword' 41 | 42 | # Define connection engine 43 | 44 | from sqlalchemy import create_engine 45 | 46 | engine = create_engine(f"postgresql://{user}:{password}@localhost:5433/postgres", echo=True, future=True) 47 | 48 | # Write metadata 49 | 50 | Base.metadata.create_all(engine) 51 | 52 | # Write test data to database 53 | 54 | from sqlalchemy.orm import Session 55 | 56 | with Session(engine) as session: 57 | spongebob = User( 58 | name="spongebob", 59 | fullname="Spongebob Squarepants", 60 | addresses=[Address(email_address="spongebob@sqlalchemy.org")], 61 | ) 62 | sandy = User( 63 | name="sandy", 64 | fullname="Sandy Cheeks", 65 | addresses=[ 66 | Address(email_address="sandy@sqlalchemy.org"), 67 | Address(email_address="sandy@squirrelpower.org"), 68 | ], 69 | ) 70 | patrick = User(name="patrick", fullname="Patrick Star") 71 | session.add_all([spongebob, sandy, patrick]) 72 | session.commit() 73 | 74 | -------------------------------------------------------------------------------- /python/multiprocessing/index.rst: -------------------------------------------------------------------------------- 1 | Python multiprocessing 2 | ====================== 3 | 4 | 5 | With Python multiprocessing pools, you have to set the number of CPUs that 6 | the multiprcoessing pool, otherwise it will try to use every CPU on 7 | the node - even though you haven't requested every CPU. It will 8 | be constricted to a few processors, but try to use them all. This 9 | will be inefficient. 10 | 11 | The main point is to use the Slurm ``SLURM_CPUS_PER_TASK`` environment 12 | variable to set the number of processors. 13 | 14 | 15 | The Python file: 16 | 17 | .. include:: 18 | 19 | python_multiprocessing.py 20 | 21 | 22 | .. include:: 23 | 24 | python_multiprocessing.slrm 25 | -------------------------------------------------------------------------------- /python/multiprocessing/multiprocessing_demo.py: -------------------------------------------------------------------------------- 1 | """Demonstration of multiprocessing integrated with Slurm 2 | """ 3 | 4 | import multiprocessing 5 | import os 6 | 7 | # Detect the number of CPUs we have available. If in slurm, use the SLURM_CPUS_PER_TASK environment variable which Slurm lets. 8 | if 'SLURM_CPUS_PER_TASK' in os.environ: 9 | cpus = int(os.environ['SLURM_CPUS_PER_TASK']) 10 | print("Dectected %s CPUs through slurm"%cpus) 11 | else: 12 | # None means that it will auto-detect based on os.cpu_count() 13 | cpus = None 14 | print("Running on default number of CPUs (default: all=%s)"%os.cpu_count()) 15 | 16 | 17 | def my_work(i): 18 | """This is a pointless function that uses a few CPUs-seconds. 19 | """ 20 | print("Running thread %s"%i) 21 | for x in range(10000000): 22 | x ** 2 23 | return i 24 | 25 | # Start the pool with the number of CPUs we found above, or default 26 | # value. 27 | with multiprocessing.Pool(cpus) as p: 28 | print(p.map(my_work, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])) 29 | -------------------------------------------------------------------------------- /python/multiprocessing/multiprocessing_demo.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -t 00:10:00 3 | #SBATCH --cpus-per-task=2 4 | #SBATCH --mem-per-cpu=1G 5 | 6 | module purge 7 | module load anaconda 8 | 9 | echo "Running on: $HOSTNAME with $SLURM_CPUS_PER_TASK processors" 10 | 11 | srun python multiprocessing_demo.py 12 | 13 | -------------------------------------------------------------------------------- /python/python_openmp/README.md: -------------------------------------------------------------------------------- 1 | # python/python_openmp 2 | 3 | For up to date instructions, see 4 | [SciComp page on parallel computing](https://scicomp.aalto.fi/triton/tut/parallel/) 5 | and 6 | [SciComp page on Python](https://scicomp.aalto.fi/triton/apps/python/). 7 | 8 | 9 | Running the example in the queue: 10 | ```sh 11 | sbatch python_openmp.slrm 12 | ``` 13 | or 14 | ```sh 15 | module load anaconda/2020-03-tf2 16 | srun -c 2 --time=00:05:00 python python_openmp.py 17 | ``` 18 | -------------------------------------------------------------------------------- /python/python_openmp/python_openmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | from time import time 4 | import numpy as np 5 | 6 | print('Using %d processors' % int(os.getenv('SLURM_CPUS_PER_TASK',1))) 7 | print('Using %d threads' % int(os.getenv('OMP_NUM_THREADS', 1))) 8 | print('Using %d tasks' % int(os.getenv('SLURM_NTASKS', 1))) 9 | 10 | nrounds = 5 11 | 12 | t_start = time() 13 | 14 | for i in range(nrounds): 15 | a = np.random.random([2000,2000]) 16 | a = a + a.T 17 | b = np.linalg.pinv(a) 18 | 19 | t_delta = time() - t_start 20 | 21 | print('Seconds taken to invert %d symmetric 2000x2000 matrices: %f' % (nrounds, t_delta)) 22 | -------------------------------------------------------------------------------- /python/python_openmp/python_openmp.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -t 00:10:00 3 | #SBATCH --ntasks=1 4 | #SBATCH --cpus-per-task=2 5 | #SBATCH --mem-per-cpu=1G 6 | #SBATCH -o python_openmp.out 7 | 8 | module load anaconda/2020-03-tf2 9 | 10 | export OMP_PROC_BIND=true 11 | 12 | echo 'Running on: '$HOSTNAME 13 | 14 | srun python python_openmp.py 15 | -------------------------------------------------------------------------------- /python/simple/simple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #SBATCH -p interactive 3 | #SBATCH -t 00:5:00 4 | 5 | from __future__ import print_function 6 | 7 | import os 8 | print(os.environ) 9 | 10 | # If you have an array job, you can access it this way: 11 | print(os.environ['SLURM_ARRAY_TASK_ID']) 12 | -------------------------------------------------------------------------------- /scip/2018/README.md: -------------------------------------------------------------------------------- 1 | # SCIP 2018 2 | 3 | This folder contains material from the [Crash Course for Computational Scientists](http://science-it.aalto.fi/scip/scip-summer-kickstart-2018/) 4 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex01/testRK4.R: -------------------------------------------------------------------------------- 1 | rk4 <- function(f, x0, y0, x1, n) { 2 | vx <- double(n + 1) 3 | vy <- double(n + 1) 4 | vx[1] <- x <- x0 5 | vy[1] <- y <- y0 6 | h <- (x1 - x0)/n 7 | for(i in 1:n) { 8 | k1 <- h*f(x, y) 9 | k2 <- h*f(x + 0.5*h, y + 0.5*k1) 10 | k3 <- h*f(x + 0.5*h, y + 0.5*k2) 11 | k4 <- h*f(x + h, y + k3) 12 | vx[i + 1] <- x <- x0 + i*h 13 | vy[i + 1] <- y <- y + (k1 + k2 + k2 + k3 + k3 + k4)/6 14 | } 15 | cbind(vx, vy) 16 | } 17 | 18 | sol <- rk4(function(x, y) x*sqrt(y), 0, 1, 10, 100) 19 | cbind(sol, sol[, 2] - (4 + sol[, 1]^2)^2/16)[seq(1, 101, 10), ] 20 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex01/testRK4.m: -------------------------------------------------------------------------------- 1 | function testRK4 2 | t = 0:0.1:10; 3 | y = 0.0625.*(t.^2+4).^2; 4 | [trk4, yrk4] = RK4(t); 5 | fprintf('Time\t\tExactVal\tRK4Val\t\tRK4Error\n') 6 | for k = 1:10:length(t) 7 | fprintf('%.f\t\t%7.3f\t\t%7.3f\t\t%7.3g\n', t(k), y(k), ... 8 | yrk4(k), abs(y(k)-yrk4(k))) 9 | end 10 | end 11 | 12 | function [t, y] = RK4(t) 13 | dydt = @(tVal,yVal)tVal*sqrt(yVal); 14 | y = zeros(size(t)); 15 | y(1) = 1; 16 | for k = 1:length(t)-1 17 | dt = t(k+1)-t(k); 18 | dy1 = dt*dydt(t(k), y(k)); 19 | dy2 = dt*dydt(t(k)+0.5*dt, y(k)+0.5*dy1); 20 | dy3 = dt*dydt(t(k)+0.5*dt, y(k)+0.5*dy2); 21 | dy4 = dt*dydt(t(k)+dt, y(k)+dy3); 22 | y(k+1) = y(k)+(dy1+2*dy2+2*dy3+dy4)/6; 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex01/testRK4.py: -------------------------------------------------------------------------------- 1 | def RK4(f): 2 | return lambda t, y, dt: ( 3 | lambda dy1: ( 4 | lambda dy2: ( 5 | lambda dy3: ( 6 | lambda dy4: (dy1 + 2*dy2 + 2*dy3 + dy4)/6 7 | )( dt * f( t + dt , y + dy3 ) ) 8 | )( dt * f( t + dt/2, y + dy2/2 ) ) 9 | )( dt * f( t + dt/2, y + dy1/2 ) ) 10 | )( dt * f( t , y ) ) 11 | 12 | def theory(t): return (t**2 + 4)**2 /16 13 | 14 | from math import sqrt 15 | dy = RK4(lambda t, y: t*sqrt(y)) 16 | 17 | t, y, dt = 0., 1., .1 18 | while t <= 10: 19 | if abs(round(t) - t) < 1e-5: 20 | print("y(%2.1f)\t= %4.6f \t error: %4.6g" % ( t, y, abs(y - theory(t)))) 21 | t, y = t + dt, y + dy( t, y, dt ) 22 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex01/testRK4.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## 4 | ## Write SBATCH directives here 5 | ## 6 | 7 | ## Run MATLAB example ## 8 | 9 | module purge 10 | # Load the correct MATLAB-module here 11 | module list 12 | 13 | srun matlab -nojvm -r 'testRK4(); exit()' 14 | 15 | ## Run Python example ## 16 | 17 | module purge 18 | # Load the correct Python-module here 19 | module list 20 | 21 | srun python testRK4.py 22 | 23 | ## Run R example ## 24 | 25 | module purge 26 | # Load the correct R-module here 27 | module list 28 | 29 | srun Rscript testRK4.R 30 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex02/fit_R.R: -------------------------------------------------------------------------------- 1 | x <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) 2 | y <- c(1, 6, 17, 34, 57, 86, 121, 162, 209, 262, 321) 3 | coef(lm(y ~ x + I(x^2))) 4 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex02/fit_R.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p short 3 | #SBATCH -t 00:05:00 4 | #SBATCH -n 1 5 | #SBATCH --mem-per-cpu=100 6 | #SBATCH -o fit_R.out 7 | 8 | module load r 9 | 10 | ## 11 | ## Create R call here 12 | ## 13 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex02/fit_matlab.m: -------------------------------------------------------------------------------- 1 | x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 2 | y = [1, 6, 17, 34, 57, 86, 121, 162, 209, 262, 321]; 3 | polyfit(x,y,2) 4 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex02/fit_matlab.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p short 3 | #SBATCH -t 00:05:00 4 | #SBATCH -n 1 5 | #SBATCH --mem-per-cpu=100 6 | #SBATCH -o fit_matlab.out 7 | 8 | module load matlab 9 | 10 | ## 11 | ## Create matlab call here 12 | ## 13 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex02/fit_python.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 3 | y = [1, 6, 17, 34, 57, 86, 121, 162, 209, 262, 321] 4 | coeffs = numpy.polyfit(x,y,deg=2) 5 | print(coeffs) 6 | -------------------------------------------------------------------------------- /scip/2018/high-level-languages-ex02/fit_python.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p short 3 | #SBATCH -t 00:05:00 4 | #SBATCH -n 1 5 | #SBATCH --mem-per-cpu=100 6 | #SBATCH -o fit_python.out 7 | 8 | module load anaconda3 9 | 10 | ## 11 | ## Create Python call here 12 | ## 13 | -------------------------------------------------------------------------------- /scip/README.md: -------------------------------------------------------------------------------- 1 | # SCIP 2 | 3 | This folder contains material from various Scientific Computing In Practice-courses [[1]](http://science-it.aalto.fi/scip/). 4 | -------------------------------------------------------------------------------- /slurm/index.rst: -------------------------------------------------------------------------------- 1 | Slurm examples 2 | ============== 3 | 4 | .. literalinclude:: memory-hog.py 5 | -------------------------------------------------------------------------------- /slurm/memory-use.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import print_function 3 | import argparse 4 | from time import sleep 5 | import gc 6 | import platform 7 | import resource 8 | 9 | if __name__ == "__main__": 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('mem', metavar="memory", 13 | help="Use this much memory") 14 | parser.add_argument('--sleep', 15 | help="Sleep this many seconds", type=int) 16 | args = parser.parse_args() 17 | 18 | # calculate the amount of memory requested in bytes 19 | mem = args.mem.lower() 20 | 21 | if mem.endswith('b'): 22 | mem = int(mem[:-1]) 23 | 24 | elif mem.endswith('k'): 25 | mem = int(mem[:-1])*1000**1 26 | 27 | elif mem.endswith('m'): 28 | mem = int(mem[:-1])*1000**2 29 | 30 | elif mem.endswith('g'): 31 | mem = int(mem[:-1])*1000**3 32 | 33 | elif mem.endswith('t'): 34 | mem = int(mem[:-1])*1000**4 35 | 36 | else: 37 | mem = int(mem) 38 | 39 | print("Trying to use %d bytes of memeory" %mem) 40 | 41 | allocated = 1 42 | array = [bytearray(1)] 43 | 44 | while True: 45 | array.append(bytearray(allocated)) 46 | allocated *= 2 47 | gc.collect() 48 | actual_bytes = resource.getrusage( 49 | resource.RUSAGE_SELF).ru_maxrss*(1024 if \ 50 | platform.system() == 'Linux' else 1) 51 | print("Using %d bytes so far (allocated: %s)" 52 | %(actual_bytes, allocated)) 53 | if actual_bytes > mem: 54 | break 55 | 56 | if args.sleep: 57 | time = args.sleep 58 | sleep(time) 59 | -------------------------------------------------------------------------------- /slurm/pi-gpu.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // Setup random number generator 9 | __global__ void setup_rng(curandState *random_states, uint64_t seed) 10 | { 11 | int tid = threadIdx.x + blockIdx.x * blockDim.x; 12 | curand_init(seed, tid, 0, &random_states[tid]); 13 | } 14 | 15 | // Throw nthrows per thread 16 | __global__ void throw_dart(curandState *random_states, int *nthrows, uint64_t *hits) 17 | { 18 | int tid = threadIdx.x + blockIdx.x * blockDim.x; 19 | float random_x, random_y; 20 | curandState random_state = random_states[tid]; 21 | 22 | hits[tid] = 0; 23 | for (int i=0; i 1) 38 | sscanf(argv[1], "%ld", &N); 39 | printf("Calculating pi using %ld stochastic trials\n", N); 40 | 41 | // Initialize variables 42 | int count, device; 43 | 44 | int *nthrows, *nthrows_gpu; 45 | uint64_t seed = 5; 46 | uint64_t *hits, *hits_gpu; 47 | curandState* random_states; 48 | uint64_t total_hits; 49 | float pi; 50 | 51 | // Run 2048 blocks 52 | int blocks = 512; 53 | // Run 128 threads per block. 54 | int threads = 128; 55 | 56 | int batch_size = blocks * threads; 57 | 58 | // Select device 59 | cudaGetDeviceCount(&count); 60 | cudaGetDevice(&device); 61 | 62 | // Allocate memory 63 | hits = (uint64_t*) malloc(batch_size*sizeof(uint64_t)); 64 | nthrows = (int *) malloc(batch_size*sizeof(int)); 65 | cudaMalloc(&hits_gpu, batch_size*sizeof(uint64_t)); 66 | cudaMalloc(&nthrows_gpu, batch_size*sizeof(int)); 67 | cudaMalloc(&random_states, batch_size*sizeof(curandState)); 68 | 69 | // Calculate how many throws we want per thread 70 | for (int i=0; i>>(random_states, seed); 82 | 83 | // Throw darts 84 | throw_dart<<>>(random_states, nthrows_gpu, hits_gpu); 85 | 86 | // Copy hits to host RAM 87 | cudaMemcpy(hits, hits_gpu, batch_size*sizeof(uint64_t), cudaMemcpyDeviceToHost); 88 | 89 | // Calculate the total number of hits 90 | total_hits = 0; 91 | for (int i=0; i 2 | #include 3 | #include 4 | 5 | int main(int argc, char **argv) { 6 | // MPI init 7 | int size, rank, hostname_len; 8 | char hostname[MPI_MAX_PROCESSOR_NAME]; 9 | MPI_Init(NULL, NULL); 10 | MPI_Comm_size(MPI_COMM_WORLD, &size); 11 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 12 | MPI_Get_processor_name(hostname, &hostname_len); 13 | // pi init 14 | long N=10000000; 15 | if (argc > 1) 16 | sscanf(argv[1], "%ld", &N); 17 | if (rank == 0) 18 | printf("Calculating pi using %ld stochastic trials\n", N); 19 | long N_rank = N / size; 20 | printf("%s: This is rank %d doing %ld trials\n", hostname, rank, N_rank); 21 | 22 | // Seed 23 | unsigned int seed = 5; 24 | seed += rank*5000; 25 | 26 | // Calculate trials 27 | double x, y; 28 | long i; 29 | long rank_count = 0; 30 | 31 | for (i=0; i 1: 34 | n_task = int(n / size) 35 | else: 36 | n_task = n 37 | 38 | t0 = time.perf_counter() 39 | _, n_inside_circle = sample(n_task) 40 | t = time.perf_counter() - t0 41 | 42 | print(f"before gather: rank {rank}, hostname {hostname}, n_inside_circle: {n_inside_circle}") 43 | n_inside_circle = comm.gather(n_inside_circle, root=0) 44 | print(f"after gather: rank {rank}, hostname {hostname}, n_inside_circle: {n_inside_circle}") 45 | 46 | if rank == 0: 47 | pi_estimate = 4.0 * sum(n_inside_circle) / n 48 | print( 49 | f"\nnumber of darts: {n}, estimate: {pi_estimate}, time spent: {t:.2} seconds" 50 | ) 51 | -------------------------------------------------------------------------------- /slurm/pi-mpi.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=00:10:00 3 | #SBATCH --mem=1G 4 | #SBATCH --output=pi-mpi.out 5 | #SBATCH --nodes=1 6 | #SBATCH --ntasks=2 7 | 8 | module load gcc/11.3.0 9 | module load openmpi/4.1.5 10 | 11 | mpicc -o pi-mpi pi-mpi.c 12 | 13 | srun ./pi-mpi 1000000 14 | -------------------------------------------------------------------------------- /slurm/pi-mpi4py.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=00:10:00 3 | #SBATCH --mem=2G 4 | #SBATCH --output=pi-mpi4py.out 5 | #SBATCH --ntasks=4 6 | 7 | module purge 8 | module load anaconda 9 | 10 | mpirun python pi-mpi.py 11 | -------------------------------------------------------------------------------- /slurm/pi-openmp.c: -------------------------------------------------------------------------------- 1 | // Compile with -fopenmp 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]) { 6 | long N=10000000, i; 7 | long count =0 ; 8 | //int chunk = 1000; 9 | double pi, x, y; 10 | 11 | unsigned int seed = 5; 12 | 13 | if (argc > 1) 14 | sscanf(argv[1], "%ld", &N); 15 | printf("Calculating pi using %ld stochastic trials\n", N); 16 | 17 | // Bug: does not seed per-thread. 18 | #pragma omp parallel for private(i, x,y) firstprivate(seed) reduction(+:count) 19 | for (i=0; i 1: 34 | # Compute how much will be done in each worker. 35 | iterations_serial = int(serial*iterations) 36 | iterations_parallel = iterations - iterations_serial 37 | iterations_per_worker = iterations_parallel//nprocs 38 | print("Using %d processes (%d iterations each)" % \ 39 | (nprocs, iterations_per_worker), file=sys.stderr) 40 | if serial > 0: 41 | print("... and %d iterations in serial"%iterations_serial) 42 | 43 | # Basic setup and accumulators 44 | in_circle_points = 0 45 | iters_actual = 0 46 | random_gen = random.Random(seed) 47 | 48 | # Parallel part 49 | # Starts worker processes 50 | if serial > 0: 51 | print("Beginning parallel part") 52 | pool = Pool(processes=nprocs) 53 | seeds = [random_gen.randint(0, 2**32 - 1) for _ in range(nprocs)] 54 | iters_per_worker = [iterations_per_worker]*nprocs 55 | iters_actual += sum(iters_per_worker) 56 | # This is the actual calculation: 57 | in_circle_points =+ sum(pool.map(pic_wrapper, zip(iters_per_worker, seeds))) 58 | pool.close() 59 | 60 | # Serial part 61 | if serial > 0: 62 | print("Beginning serial part") 63 | iters_actual += iterations_serial 64 | # This is the actual calculation: 65 | in_circle_points += pic_wrapper((iterations_serial, random_gen.randint(0, 2**32 - 1))) 66 | 67 | # Returns pi and in-circle points (successes) 68 | return in_circle_points*4/iters_actual, in_circle_points 69 | else: 70 | in_circle_points = points_in_circle(iterations, seed) 71 | return in_circle_points*4/iterations, in_circle_points 72 | 73 | 74 | def estimate_pi_vectorized(iterations, seed): 75 | batch_size = int(1e5) 76 | print("Calculating pi via %d stochastic trials (vectorized version)" % iterations, 77 | file=sys.stderr) 78 | 79 | rng = numpy.random.RandomState(seed) 80 | 81 | iterations_left = iterations 82 | in_circle_points = 0 83 | 84 | while iterations_left > 0: 85 | if iterations_left < batch_size: 86 | batch_size = iterations_left 87 | x,y = rng.random_sample(size=(2,batch_size)) 88 | in_circle_points += numpy.sum(numpy.sqrt(x*x + y*y) < 1.0) 89 | iterations_left -= batch_size 90 | 91 | return in_circle_points*4/iterations, in_circle_points 92 | 93 | 94 | if __name__ == "__main__": 95 | parser = argparse.ArgumentParser() 96 | parser.add_argument('--nprocs', type=int, help="Number of nprocs, " 97 | "using multiprocessing", default=1) 98 | parser.add_argument('--seed', type=int, help="Random seed", default=42) 99 | parser.add_argument('--sleep', type=int, help="Sleep this many seconds") 100 | parser.add_argument('--optimized', action='store_true', help="Run an optimized vectorized version of the code") 101 | parser.add_argument('--serial', type=float, default=0.0, 102 | help="This fraction [0.0--1.0] of iterations to be run serial.") 103 | parser.add_argument('iters', type=int, help="Number of iterations") 104 | args = parser.parse_args() 105 | 106 | if args.serial < 0.0 or args.serial > 1.0: 107 | print("ERROR: --serial should be a fraction from 0.0 to 1.0 (not percent). (given: %s)"%args.serial) 108 | sys.exit(1) 109 | 110 | if args.optimized and args.serial: 111 | print("ERROR: --serial cannot be used in conjunction with --optimized") 112 | sys.exit(1) 113 | 114 | if args.optimized and not numpy_available: 115 | print("ERROR: --optimized can only be used when numpy is available") 116 | sys.exit(1) 117 | 118 | # Calculate pi and number of in-circle points (successes) 119 | if args.optimized: 120 | pi, successes = estimate_pi_vectorized(args.iters, args.seed) 121 | else: 122 | pi, successes = estimate_pi(args.iters, args.seed, args.nprocs, serial=args.serial) 123 | # Sleep 124 | if args.sleep: 125 | time.sleep(args.sleep) 126 | 127 | # Write to a JSON file 128 | result = {"pi_estimate":pi, "iterations":args.iters, "successes":int(successes)} 129 | json.dump(result, sys.stdout) 130 | sys.stdout.write('\n') 131 | -------------------------------------------------------------------------------- /slurm/pi_aggregation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Aggregation script for Pi estimations 3 | 4 | This script aggregates the results produced 5 | by running ``pi.py``. The output is in the same 6 | format as that of ``pi.py``. 7 | 8 | Example: 9 | If results of ``pi.py`` are ouput to files ``result1.json``, 10 | ``result2.json``, etc. then you can calculate weighted 11 | average of the estimates by:: 12 | 13 | $ python pi_aggregation.py result1.json result2.json ... 14 | 15 | The result would be a more accurate estimation of Pi. 16 | 17 | """ 18 | 19 | from __future__ import print_function, division 20 | import json 21 | import sys 22 | 23 | def calculate_average_pi(filenames): 24 | total_successes = 0 25 | total_iterations = 0 26 | for filename in filenames: 27 | with open(filename, 'r') as f: 28 | estimation = json.load(f) 29 | total_successes += estimation["successes"] 30 | total_iterations += estimation["iterations"] 31 | return total_successes, total_iterations 32 | 33 | if __name__ == "__main__": 34 | if len(sys.argv) < 2: 35 | sys.exit("USAGE: {} file1.json file2.json ...".format(sys.argv[0])) 36 | successes, iterations = calculate_average_pi(sys.argv[1:]) 37 | result = {"successes": successes, "iterations": iterations, 38 | "pi_estimate": successes*4/iterations} 39 | json.dump(result, sys.stdout) 40 | --------------------------------------------------------------------------------