├── .gitignore
├── R
    ├── README.md
    ├── parallel_caret_example
    │   ├── parallelR_Caret.R
    │   └── parallelR_Caret.slrm
    ├── parallel_example
    │   ├── parallelR.R
    │   ├── parallelR.slrm
    │   ├── parallelR_compare.R
    │   └── parallelR_compare.slrm
    └── serial_example
    │   ├── serialR.R
    │   └── serialR.slrm
├── README.rst
├── gpu
    └── helloworld
    │   ├── README.md
    │   ├── helloworld.cu
    │   ├── helloworld.slrm
    │   └── pi.cu
├── io
    ├── README.md
    ├── analyze_iodata.py
    ├── create_iodata.py
    ├── create_iodata.sh
    └── iotest.sh
├── misc
    └── gpg-batch-script.sh
├── mpi
    ├── hello_mpi
    │   ├── README.md
    │   ├── hello_mpi.c
    │   └── hello_mpi.slrm
    └── hello_mpi_fortran
    │   ├── README.md
    │   ├── hello_mpi_fortran.f90
    │   └── hello_mpi_fortran.slrm
├── ngrams
    ├── README.md
    ├── array.sh
    ├── combine-counts.py
    ├── count-multi.py
    ├── count.py
    └── generate.py
├── openmp
    └── hello_omp
    │   ├── README.md
    │   ├── hello_omp.c
    │   └── hello_omp.slrm
├── postgres
    ├── build_postgres_image.sh
    ├── run_postgres_example.sh
    ├── test_postgres_read.py
    └── test_postgres_write.py
├── python
    ├── multiprocessing
    │   ├── index.rst
    │   ├── multiprocessing_demo.py
    │   └── multiprocessing_demo.slrm
    ├── python_openmp
    │   ├── README.md
    │   ├── python_openmp.py
    │   └── python_openmp.slrm
    └── simple
    │   └── simple.py
├── scip
    ├── 2018
    │   ├── README.md
    │   ├── high-level-languages-ex01
    │   │   ├── testRK4.R
    │   │   ├── testRK4.m
    │   │   ├── testRK4.py
    │   │   └── testRK4.slrm
    │   └── high-level-languages-ex02
    │   │   ├── fit_R.R
    │   │   ├── fit_R.slrm
    │   │   ├── fit_matlab.m
    │   │   ├── fit_matlab.slrm
    │   │   ├── fit_python.py
    │   │   └── fit_python.slrm
    └── README.md
└── slurm
    ├── index.rst
    ├── memory-use.py
    ├── pi-gpu.cu
    ├── pi-mpi.c
    ├── pi-mpi.py
    ├── pi-mpi.slrm
    ├── pi-mpi4py.slrm
    ├── pi-openmp.c
    ├── pi-sharedmemory.slrm
    ├── pi.py
    └── pi_aggregation.py


/.gitignore:
--------------------------------------------------------------------------------
1 | a.out
2 | *.out


--------------------------------------------------------------------------------
/R/README.md:
--------------------------------------------------------------------------------
 1 | # R in Triton
 2 | 
 3 | These examples describe how to run R in the Triton cluster
 4 | 
 5 | ## R serial example
 6 | 
 7 | There are two examples. First runs a simple R script that trains a Caret model. Second runs the R-benchmark-25.R benchmark script.
 8 | 
 9 | Caret model is from Caret examples by Tobias Kind: https://github.com/tobigithub/caret-machine-learning/blob/master/caret-cv/caret-cv-simple.R
10 | 
11 | R-benchmark-25.R script from rbenchmarki repository: https://github.com/rbenchmark/benchmarks/tree/master/R-benchmark-25
12 | 
13 | Usage:
14 | ```bash
15 | sbatch serialR.slrm
16 | sbatch serialR.slrm
17 | ```
18 | 
19 | ## R parallel example
20 | 
21 | This example runs a non-vectorized code in serial and parallel with 1 to 4 cpus.
22 | 
23 | Code is adapted from an excellent blog post in: http://www.parallelr.com/r-with-parallel-computing/ 
24 | 
25 | Raw code of the ExplicitParallel.R example is available in: https://github.com/PatricZhao/ParallelR/blob/master/PP_for_COS/ExplicitParallel.R
26 | 
27 | Usage:
28 | ```shell
29 | sbatch parallelR.slrm
30 | ```
31 | 
32 | ## R parallel example using Caret
33 | 
34 | This example runs a Caret training model in serial and parallel with 4 cpus.
35 | 
36 | Code is based on a Caret example by Tobias Kind: https://github.com/tobigithub/caret-machine-learning/wiki/caret-ml-parallel
37 | 
38 | Raw code is available in: https://github.com/tobigithub/caret-machine-learning/blob/master/caret-parallel/caret-parallel-train.R
39 | 
40 | Usage:
41 | ```shell
42 | sbatch parallelR_Caret.slrm
43 | ```
44 | 


--------------------------------------------------------------------------------
/R/parallel_caret_example/parallelR_Caret.R:
--------------------------------------------------------------------------------
 1 | # Adapted from caret-parallel-train.R in Caret examples
 2 | # to Triton by Simo Tuomisto, 2017
 3 | 
 4 | # Original docstring:
 5 | # Run multiple caret models in parallel using lapply
 6 | # https://github.com/tobigithub/caret-machine-learning
 7 | # Tobias Kind (2015)
 8 | 
 9 | 
10 | # Get the number of cores to use from command line or from SLURM_CPUS_PER_TASK
11 | library("optparse")
12 | 
13 | option_list = list(
14 |   make_option(c("-c", "--cores"), type="integer", default=NULL, 
15 |                 help="Number of cpus to use", metavar="integer")); 
16 | 
17 | opt_parser = OptionParser(option_list=option_list);
18 | opt = parse_args(opt_parser);
19 | 
20 | if (is.null(opt$cores)) {
21 |   cores <- as.integer(Sys.getenv("SLURM_CPUS_PER_TASK"))
22 |   if (is.na(cores)) {
23 |     cores <- 1
24 |   }
25 | } else {
26 |     cores <- opt$cores
27 | }
28 | message("Number of cores used: ",cores)
29 | 
30 | # -------------------------------------------------------------------------
31 | # FIRST sequential code (not parallel one CPU core):
32 | # ------------------------------------------------------------------------- 
33 | 
34 | 
35 | require(caret); data(BloodBrain); set.seed(123)
36 | 
37 | 
38 | message('Running Caret training in serial fashion')
39 | system.time(fit1 <- train(bbbDescr, logBBB, "knn"))
40 | fit1
41 | 
42 | # ------------------------------------------------------------------------- 
43 | # SECOND parallel register 4 cores (no worries if you only have 2)
44 | # train the caret model in parallel 
45 | # -------------------------------------------------------------------------
46 | 
47 | message('Running Caret training in parallel fashion')
48 | library(doParallel)
49 | cl <- makeCluster(cores)
50 | registerDoParallel(cl) 
51 | 
52 | require(caret); data(BloodBrain); set.seed(123)
53 | 
54 | system.time(fit1 <- train(bbbDescr, logBBB, "knn"))
55 | fit1
56 | 
57 | stopCluster(cl)
58 | registerDoSEQ()
59 | 
60 | ### END
61 | 


--------------------------------------------------------------------------------
/R/parallel_caret_example/parallelR_Caret.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -p short
 3 | #SBATCH -t 00:20:00
 4 | #SBATCH --nodes=1
 5 | #SBATCH --ntasks=1
 6 | #SBATCH --cpus-per-task=4
 7 | #SBATCH --mem=8G
 8 | #SBATCH -o parallelR_Caret.out
 9 | module load R
10 | 
11 | srun Rscript parallelR_Caret.R
12 | 


--------------------------------------------------------------------------------
/R/parallel_example/parallelR.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # This code is part of the ParallelR blog ExplicitParallel example.
  3 | # Adapted to work in Triton by Simo Tuomisto, 2017
  4 | # See the blog post in http://www.parallelr.com/r-with-parallel-computing/
  5 | # Blog's examples are available in GitHub: https://github.com/patriczhao/ParallelR
  6 | 
  7 | # Original author docstring:
  8 | # Examples for the R and Parallel Computing blog in COS website （cos.name)
  9 | # Author: Peng Zhao, 8/30/2016
 10 | 
 11 | # Get the number of cores to use from command line or from SLURM_CPUS_PER_TASK
 12 | library("optparse")
 13 | 
 14 | option_list = list(
 15 |   make_option(c("-c", "--cores"), type="integer", default=NULL, 
 16 |                 help="Number of cpus to use", metavar="integer")); 
 17 | 
 18 | opt_parser = OptionParser(option_list=option_list);
 19 | opt = parse_args(opt_parser);
 20 | 
 21 | if (is.null(opt$cores)) {
 22 |   cores <- as.integer(Sys.getenv("SLURM_CPUS_PER_TASK"))
 23 |   if (is.na(cores)) {
 24 |     cores <- 1
 25 |   }
 26 | } else {
 27 |     cores <- opt$cores
 28 | }
 29 | message("Number of cores used: ",cores)
 30 | 
 31 | 
 32 | # Generate data    
 33 | message("Generating data")
 34 | len <- 1e6
 35 | a <- runif(len, -10, 10)
 36 | a[sample(len, 100,replace=TRUE)] <- 0
 37 | 
 38 | b <- runif(len, -10, 10)
 39 | c <- runif(len, -10, 10)
 40 | 
 41 | # Not vectorized function
 42 | solve.quad.eq <- function(a, b, c) 
 43 | {
 44 |   # Not validate eqution: a and b are almost ZERO
 45 |   if(abs(a) < 1e-8 && abs(b) < 1e-8) return(c(NA, NA) )
 46 |   
 47 |   # Not quad equation
 48 |   if(abs(a) < 1e-8 && abs(b) > 1e-8) return(c(-c/b, NA))
 49 |   
 50 |   # No Solution
 51 |   if(b*b - 4*a*c < 0) return(c(NA,NA))
 52 |   
 53 |   # Return solutions
 54 |   x.delta <- sqrt(b*b - 4*a*c)
 55 |   x1 <- (-b + x.delta)/(2*a)
 56 |   x2 <- (-b - x.delta)/(2*a)
 57 |   
 58 |   return(c(x1, x2))
 59 | }
 60 | 
 61 | #############################################################################################
 62 | # *apple style
 63 | ##############################################################################################
 64 | # serial code
 65 | message("Running lapply")
 66 | system.time(
 67 |     res1.s <- lapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])})
 68 | )
 69 | 
 70 | # parallel
 71 | message("Running mcapply with parallel-package")
 72 | library(parallel)
 73 | # multicores on Linux
 74 | system.time(
 75 |   res1.p <- mclapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])}, mc.cores = cores)
 76 | )
 77 | 
 78 | 
 79 | # cluster
 80 | message("Running Cluster parLapply with parallel-package")
 81 | cl <- makeCluster(cores)
 82 | clusterExport(cl, c('solve.quad.eq', 'a', 'b', 'c'))
 83 | system.time(
 84 |    res1.p <- parLapply(cl, 1:len, function(x) { solve.quad.eq(a[x], b[x], c[x]) })
 85 | )
 86 | stopCluster(cl)
 87 | 
 88 | 
 89 | ##########################################################################################
 90 | # For style
 91 | ###########################################################################################
 92 | # serial code
 93 | message("Running serial for-loop")
 94 | res2.s <- matrix(0, nrow=len, ncol = 2)
 95 | system.time(
 96 |     for(i in 1:len) {
 97 |         res2.s[i,] <- solve.quad.eq(a[i], b[i], c[i])
 98 |     }
 99 | )
100 | 
101 | # foreach
102 | library(foreach)
103 | library(doParallel)
104 | 
105 | # Real physical cores in my computer
106 | cl <- makeCluster(cores)
107 | registerDoParallel(cl, cores=cores)
108 | 
109 | # clusterSplit are very convience to split data but it takes lots of extra memory
110 | # chunks <- clusterSplit(cl, 1:len)
111 | 
112 | # split data by ourselves
113 | chunk.size <- len/cores
114 | 
115 | message("Running parallel for loop with foreach- and doParallel-packages")
116 | system.time(
117 |   res2.p <- foreach(i=1:cores, .combine='rbind') %dopar%
118 |   {  # local data for results
119 |      res <- matrix(0, nrow=chunk.size, ncol=2)
120 |      for(x in ((i-1)*chunk.size+1):(i*chunk.size)) {
121 |         res[x - (i-1)*chunk.size,] <- solve.quad.eq(a[x], b[x], c[x])
122 |      }
123 |      # return local results
124 |      res
125 |   }
126 | )
127 | 
128 | stopImplicitCluster()
129 | stopCluster(cl)
130 | 


--------------------------------------------------------------------------------
/R/parallel_example/parallelR.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -p short
 3 | #SBATCH -t 00:20:00
 4 | #SBATCH --nodes=1
 5 | #SBATCH --ntasks=1
 6 | #SBATCH --cpus-per-task=4
 7 | #SBATCH --mem=3G
 8 | #SBATCH -o parallelR.out
 9 | 
10 | module load R
11 | 
12 | echo 'Running parallel R example:'
13 | 
14 | srun Rscript parallelR.R
15 | 


--------------------------------------------------------------------------------
/R/parallel_example/parallelR_compare.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # This code is part of the ParallelR blog ExplicitParallel example.
  3 | # Adapted to work in Triton by Simo Tuomisto, 2017
  4 | # See the blog post in http://www.parallelr.com/r-with-parallel-computing/
  5 | # Blog's examples are available in GitHub: https://github.com/patriczhao/ParallelR
  6 | 
  7 | # Original author docstring:
  8 | # Examples for the R and Parallel Computing blog in COS website （cos.name)
  9 | # Author: Peng Zhao, 8/30/2016
 10 | 
 11 | # Get the number of cores to use from command line or from SLURM_CPUS_PER_TASK
 12 | library("optparse")
 13 | 
 14 | option_list = list(
 15 |   make_option(c("-c", "--cores"), type="integer", default=NULL, 
 16 |                 help="Number of cpus to use", metavar="integer")); 
 17 | 
 18 | opt_parser = OptionParser(option_list=option_list);
 19 | opt = parse_args(opt_parser);
 20 | 
 21 | if (is.null(opt$cores)) {
 22 |   cores <- as.integer(Sys.getenv("SLURM_CPUS_PER_TASK"))
 23 |   if (is.na(cores)) {
 24 |     cores <- 1
 25 |   }
 26 | } else {
 27 |     cores <- opt$cores
 28 | }
 29 | message('Running benchmark with ',cores,' processes')
 30 | 
 31 | # Generate data    
 32 | message("Generating data")
 33 | len <- 2e6
 34 | a <- runif(len, -10, 10)
 35 | a[sample(len, 100,replace=TRUE)] <- 0
 36 | 
 37 | b <- runif(len, -10, 10)
 38 | c <- runif(len, -10, 10)
 39 | 
 40 | # Not vectorized function
 41 | solve.quad.eq <- function(a, b, c) 
 42 | {
 43 |   # Not validate eqution: a and b are almost ZERO
 44 |   if(abs(a) < 1e-8 && abs(b) < 1e-8) return(c(NA, NA) )
 45 |   
 46 |   # Not quad equation
 47 |   if(abs(a) < 1e-8 && abs(b) > 1e-8) return(c(-c/b, NA))
 48 |   
 49 |   # No Solution
 50 |   if(b*b - 4*a*c < 0) return(c(NA,NA))
 51 |   
 52 |   # Return solutions
 53 |   x.delta <- sqrt(b*b - 4*a*c)
 54 |   x1 <- (-b + x.delta)/(2*a)
 55 |   x2 <- (-b - x.delta)/(2*a)
 56 |   
 57 |   return(c(x1, x2))
 58 | }
 59 | 
 60 | #############################################################################################
 61 | # *apple style
 62 | ##############################################################################################
 63 | # serial code
 64 | 
 65 | library(rbenchmark)
 66 | 
 67 | benchmark( 
 68 |     'lapply' = {
 69 |         res1.s <- lapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])})
 70 |     },
 71 | # parallel
 72 | # multicores on Linux
 73 |     'mcapply' = {
 74 |         library(parallel)
 75 |         res1.p <- mclapply(1:len, FUN = function(x) { solve.quad.eq(a[x], b[x], c[x])}, mc.cores = cores)
 76 |     },
 77 |     'parLapply' = {
 78 |         library(parallel)
 79 |         cl <- makeCluster(cores)
 80 |         clusterExport(cl, c('solve.quad.eq', 'a', 'b', 'c'))
 81 |         res1.p <- parLapply(cl, 1:len, function(x) { solve.quad.eq(a[x], b[x], c[x]) })
 82 |         stopCluster(cl)
 83 |     },
 84 |     'for' = {
 85 |         res2.s <- matrix(0, nrow=len, ncol = 2)
 86 |         for(i in 1:len) {
 87 |             res2.s[i,] <- solve.quad.eq(a[i], b[i], c[i])
 88 |         }
 89 |     },
 90 |     'foreach/dopar' = {
 91 |         library(foreach)
 92 |         library(doParallel)
 93 |         cl <- makeCluster(cores)
 94 |         registerDoParallel(cl, cores=cores)
 95 |         chunk.size <- len/cores
 96 |         res2.p <- foreach(i=1:cores, .combine='rbind') %dopar%
 97 |             { # local data for results
 98 |             res <- matrix(0, nrow=chunk.size, ncol=2)
 99 |             for(x in ((i-1)*chunk.size+1):(i*chunk.size)) {
100 |                 res[x - (i-1)*chunk.size,] <- solve.quad.eq(a[x], b[x], c[x])
101 |             }
102 |             # return local results
103 |             res
104 |         }
105 |         stopImplicitCluster()
106 |         stopCluster(cl)
107 |     },
108 |     replications=1,
109 |     columns = c("test", "elapsed", "relative", "user.self", "sys.self")
110 | )
111 | 
112 | 


--------------------------------------------------------------------------------
/R/parallel_example/parallelR_compare.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -p short
 3 | #SBATCH -t 00:20:00
 4 | #SBATCH --nodes=1
 5 | #SBATCH --ntasks=1
 6 | #SBATCH --cpus-per-task=8
 7 | #SBATCH --mem=16G
 8 | #SBATCH -o parallelR_compare.out
 9 | 
10 | module restore R_eb
11 | 
12 | echo 'Running parallel R comparison:'
13 | 
14 | for cores in 2 4 8 ; do
15 |     srun Rscript parallelR_compare.R -c $cores
16 | done
17 | 


--------------------------------------------------------------------------------
/R/serial_example/serialR.R:
--------------------------------------------------------------------------------
 1 | # Run simple cross-validation method with caret and knn
 2 | # https://github.com/tobigithub/caret-machine-learning
 3 | # Tobias Kind (2015)
 4 | 
 5 | # Single example, no cross-validation
 6 |   require(caret); data(BloodBrain); set.seed(123);
 7 |   fit1 <- train(bbbDescr, logBBB, "knn"); fit1
 8 | 
 9 | # cross-validation example with method boot 
10 |   require(caret); data(BloodBrain); set.seed(123);
11 |   tc <- trainControl(method="boot")
12 |   fit1 <- train(bbbDescr, logBBB, trControl=tc, method="knn");  fit1
13 |   
14 | 
15 | ### END
16 | 


--------------------------------------------------------------------------------
/R/serial_example/serialR.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -p short
 3 | #SBATCH -t 00:20:00
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --mem=3G
 6 | #SBATCH -o serialR.out
 7 | 
 8 | 
 9 | module load R
10 | 
11 | echo 'Running a simple serial R example:'
12 | 
13 | srun Rscript serialR.R
14 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Triton Examples
 2 | ===============
 3 | 
 4 | .. warning:: This section is under development.
 5 | 
 6 | This repository contains examples scripts that can be run in Aalto
 7 | University's Triton-cluster, but it will be useful to others as well.
 8 | 
 9 | .. toctree::
10 | 
11 |    slurm/index
12 | 
13 | * ``gpu/``: GPU usage, compiling CUDA directly.
14 | * ``openmp/``: compiling and running OpenMP* ``python/``: Basic Python scripts
15 | * ``openmpi/``: compiling and running MPI
16 | * ``R/``: Basic R scripts
17 | * ``scip/``: Material related to *Scientific Computing In Practice* lecture series.
18 | * ``slurm/``: Basic submit scripts and programs
19 | 
20 | 
21 | This repository can be found at
22 | https://github.com/AaltoSciComp/hpc-examples and embedded into
23 | https://scicomp.aalto.fi/ .
24 | 


--------------------------------------------------------------------------------
/gpu/helloworld/README.md:
--------------------------------------------------------------------------------
1 | # gpu/helloworld
2 | 
3 | For up to date instructions, see [SciComp GPU page](https://scicomp.aalto.fi/triton/tut/gpu/).
4 | 
5 | Running the example in the gpu queue:
6 | ```sh
7 | sbatch helloworld.slrm
8 | ```
9 | 


--------------------------------------------------------------------------------
/gpu/helloworld/helloworld.cu:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | 
 3 | __global__ void cuda_hello(int* a){
 4 |         // blockIdx has values between 0 and 4
 5 |         printf("Hello World from GPU a[%d]=%d \n", blockIdx.x, a[blockIdx.x]);
 6 | }
 7 | 
 8 | int main(void) {
 9 |         int* d_a;
10 | 
11 |         // Allocates an array of 5 integers
12 |         cudaMalloc(&d_a, 5*sizeof(int));
13 | 
14 |         // Runs 5 instances of kernel cuda_hello in parallel
15 |         cuda_hello<<<5, 1>>>(d_a); 
16 | 
17 |         // This is needed for the printf in the kernel to display
18 |         cudaDeviceSynchronize();
19 | 
20 |         printf("Hello from outside GPU\n");
21 |         return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/gpu/helloworld/helloworld.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=00:05:00
 3 | #SBATCH --job-name=helloworld
 4 | #SBATCH --mem-per-cpu=500M
 5 | #SBATCH --cpus-per-task=1
 6 | #SBATCH --gres=gpu:1
 7 | #SBATCH --output=helloworld.out
 8 | 
 9 | module load cuda
10 | nvcc helloworld.cu -o helloworld
11 | ./helloworld
12 | 


--------------------------------------------------------------------------------
/gpu/helloworld/pi.cu:
--------------------------------------------------------------------------------
 1 | // Using CUDA device to calculate pi
 2 | #include <stdio.h>
 3 | #include <cuda.h>
 4 | 
 5 | #define NBIN 10000000  // Number of bins
 6 | #define NUM_BLOCK  30  // Number of thread blocks
 7 | #define NUM_THREAD  8  // Number of threads per block
 8 | int tid;
 9 | float pi = 0;
10 | 
11 | // Kernel that executes on the CUDA device
12 | __global__ void cal_pi(float *sum, int nbin, float step, int nthreads, int nblocks) {
13 | 	int i;
14 | 	float x;
15 | 	int idx = blockIdx.x*blockDim.x+threadIdx.x;  // Sequential thread index across the blocks
16 | 	for (i=idx; i< nbin; i+=nthreads*nblocks) {
17 | 		x = (i+0.5)*step;
18 | 		sum[idx] += 4.0/(1.0+x*x);
19 | 	}
20 | }
21 | 
22 | // Main routine that executes on the host
23 | int main(void) {
24 | 	dim3 dimGrid(NUM_BLOCK,1,1);  // Grid dimensions
25 | 	dim3 dimBlock(NUM_THREAD,1,1);  // Block dimensions
26 | 	float *sumHost, *sumDev;  // Pointer to host & device arrays
27 | 
28 | 	float step = 1.0/NBIN;  // Step size
29 | 	size_t size = NUM_BLOCK*NUM_THREAD*sizeof(float);  //Array memory size
30 | 	sumHost = (float *)malloc(size);  //  Allocate array on host
31 | 	cudaMalloc((void **) &sumDev, size);  // Allocate array on device
32 | 	// Initialize array in device to 0
33 | 	cudaMemset(sumDev, 0, size);
34 | 	// Do calculation on device
35 | 	cal_pi <<<dimGrid, dimBlock>>> (sumDev, NBIN, step, NUM_THREAD, NUM_BLOCK); // call CUDA kernel
36 | 	// Retrieve result from device and store it in host array
37 | 	cudaMemcpy(sumHost, sumDev, size, cudaMemcpyDeviceToHost);
38 | 	for(tid=0; tid<NUM_THREAD*NUM_BLOCK; tid++)
39 | 		pi += sumHost[tid];
40 | 	pi *= step;
41 | 
42 | 	// Print results
43 | 	printf("PI = %f\n",pi);
44 | 
45 | 	// Cleanup
46 | 	free(sumHost); 
47 | 	cudaFree(sumDev);
48 | 
49 | 	return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/io/README.md:
--------------------------------------------------------------------------------
1 | # Benchmarking and IO testing of filesystems
2 | 
3 | For instructions, see http://scicomp.aalto.fi/triton/tut/storage.html for now.
4 | 
5 | 


--------------------------------------------------------------------------------
/io/analyze_iodata.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | import os
 4 | from argparse import ArgumentParser
 5 | import sys
 6 | 
 7 | parser = ArgumentParser(description='Opens a datafile containing a data matrix and inverts it')
 8 | 
 9 | parser.add_argument('--input',metavar='inputfile',nargs=1, help='Input file', required=True)
10 | parser.add_argument('--output',metavar='outputfile',nargs=1, help='Output file', required=True)
11 | 
12 | args = parser.parse_args()
13 | 
14 | inputfile = os.path.abspath(args.input[0])
15 | outputfile = os.path.abspath(args.output[0])
16 | 
17 | print('Inverting {0}'.format(inputfile))
18 | 
19 | data = np.loadtxt(inputfile)
20 | data_inv = np.linalg.inv(data)
21 | print('Saving output to {0}'.format(outputfile))
22 | np.savetxt(outputfile, data_inv)
23 | 


--------------------------------------------------------------------------------
/io/create_iodata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/binenv /python
 2 | from __future__ import print_function
 3 | import numpy as np
 4 | import os
 5 | 
 6 | ndata = 10
 7 | datadim = 200
 8 | 
 9 | datadir = os.path.join(os.getcwd(),'data')
10 | 
11 | 
12 | print('Creating {0} {1}x{1} positive definite matrices.'.format(ndata,datadim))
13 | for i in range(0,ndata):
14 | 	print('Creating dataset number %d' % i)
15 | 	datamatrix = np.random.rand(datadim,datadim)
16 | 	datamatrix = datamatrix + datamatrix.T
17 | 	np.savetxt(os.path.join(datadir,'inputmatrix_%04d.dat' % i),datamatrix)
18 | print('Data created.')
19 | 


--------------------------------------------------------------------------------
/io/create_iodata.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | module load anaconda3
 4 | 
 5 | read -r -p "Will create data to "$(pwd)"/data. Are you sure? [y/N]" response
 6 | case $response in
 7 | 	[yY][eE][sS]|[yY])
 8 | 		mkdir -p $(pwd)/data
 9 | 		python `dirname $0`/create_iodata.py
10 | 		;;
11 | 	*)
12 | 		;;
13 | esac
14 | 


--------------------------------------------------------------------------------
/io/iotest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -p short,debug
 3 | #SBATCH --time=0-00:05:00
 4 | #SBATCH -o iotest.out
 5 | 
 6 | mypid=$$
 7 | echo 'My pid is '$mypid
 8 | echo "I'm running at "$HOSTNAME
 9 | 
10 | module load anaconda3
11 | 
12 | for datafile in $(ls data/inputmatrix*.dat | sort); do
13 | 	datanumber=$(echo $datafile | cut -d '_' -f 2)
14 | 	srun strace -c -e trace=file python `dirname $0`/analyze_iodata.py --input $datafile --output 'data/outputmatrix_'$datanumber
15 | done
16 | 
17 | echo 'Disk operations done:'
18 | cat /proc/$mypid/io
19 | 


--------------------------------------------------------------------------------
/misc/gpg-batch-script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script demonstrates using symmetric GPG non-interactively,
 4 | # suitable for an automated process.  This is intrinsically risky,
 5 | # because the passphrases must be passed around somehow.  However, it
 6 | # may be a necessary step sometimes.  Adapt this to your needs and
 7 | # adapt as necessary.  The biggest risk is that *all* command line
 8 | # arguments are, by default, visible to *all* users on a system.  This
 9 | # script is made to use only bash builtins to prevent this, and also
10 | # pass it to gpg securely.
11 | #
12 | # If run as a script, it will prompt for a password and encrypt each
13 | # file given on the commandline.
14 | #
15 | # - *always* make sure that you test you can decrypt files after using
16 | #   this!
17 | # - If using interactively, beware the risk of making a password
18 | #   mistake and making your files inaccessible
19 | # - If password is in a file, beware of the risks of that file ending
20 | #   up handled insecurely, being backed up, etc.
21 | 
22 | 
23 | 
24 | # Simple option, if key is in a file.  Make sure file doesn't have
25 | # extra spaces or newlines, even after the password!
26 | # gpg --batch --symmetric --passphrase-file < /some/password_file.txt
27 | 
28 | 
29 | 
30 | # Actual interactive part.
31 | 
32 | read -s -p "Enter password: " password1
33 | echo
34 | read -s -p "Verify password: " password2
35 | echo
36 | 
37 | if [[ "$password1" != "$password2" ]] ; then
38 |     echo "Passwords do not match"
39 |     exit 2
40 | fi
41 | 
42 | # Loop through each file and encrypt.
43 | for file in $@ ; do
44 |     gpg --batch --symmetric --passphrase-file <( echo "$password1" ) $file
45 | done
46 | 
47 | 


--------------------------------------------------------------------------------
/mpi/hello_mpi/README.md:
--------------------------------------------------------------------------------
 1 | # mpi/hello_mpi
 2 | 
 3 | For up to date instructions, see
 4 | [SciComp page on parallel computing](https://scicomp.aalto.fi/triton/tut/parallel/).
 5 | 
 6 | Running the example in the queue:
 7 | ```sh
 8 | sbatch hello_mpi.slrm
 9 | ```
10 | 


--------------------------------------------------------------------------------
/mpi/hello_mpi/hello_mpi.c:
--------------------------------------------------------------------------------
 1 | /* Hello World MPI
 2 |  *
 3 |  * Compile on Triton as:
 4 |  *   module load openmpi
 5 |  *   mpicc -o hello_mpi.c hello_openmpi
 6 |  * 
 7 |  * degtyai1, Wed, 28 May 2014 12:47:47 +0300
 8 |  */
 9 | 
10 | #include <mpi.h>
11 | #include <stdio.h>
12 | 
13 | int main(int argc, char** argv) {
14 |   MPI_Init(NULL, NULL); // initialize the MPI
15 |   int world_size;
16 |   MPI_Comm_size(MPI_COMM_WORLD, &world_size);  // number of processes
17 |   int world_rank;
18 |   MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);  // rank of the process
19 |   char processor_name[MPI_MAX_PROCESSOR_NAME];
20 |   int name_len;
21 |   MPI_Get_processor_name(processor_name, &name_len); // processor name
22 |   printf("Hello world from processor %s, rank %d"
23 |    " out of %d processors\n", processor_name, world_rank, world_size);
24 |   MPI_Finalize();   // finalize the MPI
25 | }
26 | 


--------------------------------------------------------------------------------
/mpi/hello_mpi/hello_mpi.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --time=00:05:00
 3 | #SBATCH --mem-per-cpu=500M
 4 | #SBATCH --nodes=1
 5 | #SBATCH --ntasks-per-node=4
 6 | #SBATCH --output=hello_mpi.out
 7 | 
 8 | module load gcc/11.3.0
 9 | module load openmpi/4.1.5
10 | 
11 | mpicc -o hello_mpi hello_mpi.c
12 | 
13 | srun hello_mpi
14 | 


--------------------------------------------------------------------------------
/mpi/hello_mpi_fortran/README.md:
--------------------------------------------------------------------------------
 1 | # mpi/hello_mpi
 2 | 
 3 | For up to date instructions, see
 4 | [SciComp page on parallel computing](https://scicomp.aalto.fi/triton/tut/parallel/).
 5 | 
 6 | Running the example in the queue:
 7 | ```sh
 8 | sbatch hello_mpi_fortran.slrm
 9 | ```
10 | 


--------------------------------------------------------------------------------
/mpi/hello_mpi_fortran/hello_mpi_fortran.f90:
--------------------------------------------------------------------------------
 1 | ! Hello World MPI
 2 | !
 3 | ! Compile on Triton with:
 4 | !
 5 | ! module load gcc
 6 | ! module load openmpi
 7 | ! mpifort hello_mpi_fortran.f90 -o hello_mpi_fortran
 8 | ! 
 9 | !
10 | ! Simo Tuomisto, 2021
11 | !
12 | 
13 | program hello
14 |    include 'mpif.h'
15 |    integer world_size, rank, ierror, tag, status(MPI_STATUS_SIZE)
16 |    
17 |    call MPI_INIT(ierror) ! Initialize the MPI
18 |    call MPI_COMM_SIZE(MPI_COMM_WORLD, world_size, ierror) ! Number of processes
19 |    call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierror) ! Rank of the process
20 |    print *, 'Hello world from processor ', rank, ' out of ', &
21 |          world_size , ' processors'
22 |    call MPI_FINALIZE(ierror) ! Finalize the MPI
23 | end
24 | 


--------------------------------------------------------------------------------
/mpi/hello_mpi_fortran/hello_mpi_fortran.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --time=00:05:00
 3 | #SBATCH --mem-per-cpu=500M
 4 | #SBATCH --nodes=1
 5 | #SBATCH --ntasks-per-node=4
 6 | #SBATCH --output=hello_mpi_fortran.out
 7 | 
 8 | module load gcc/11.3.0
 9 | module load openmpi/4.1.5
10 | 
11 | mpifort hello_mpi_fortran.f90 -o hello_mpi_fortran
12 | 
13 | srun hello_mpi_fortran
14 | 


--------------------------------------------------------------------------------
/ngrams/README.md:
--------------------------------------------------------------------------------
  1 | # ngram calculation HPC sample program
  2 | 
  3 | These are data-based example scripts for a HPC cluster. They are made
  4 | quickly and designed to be easy to understand (though it's not
  5 | intended that people look at the code) and show interesting problems
  6 | you might face when using a cluster.
  7 | 
  8 | For the most part, see the help text of the programs for how to use
  9 | them.
 10 | 
 11 | https://en.wikipedia.org/wiki/N-gram
 12 | 
 13 | 
 14 | 
 15 | ## Data
 16 | 
 17 | Any text data will work, but this especially uses public-domain sample
 18 | data from Project Gutenberg:
 19 | 
 20 | - Original: https://zenodo.org/records/5783256
 21 | - Reprocessed, first 100 books: https://users.aalto.fi/~darstr1/public/Gutenberg-Fiction-first100.zip
 22 | - Reprocessed, first 1000 books: https://users.aalto.fi/~darstr1/public/Gutenberg-Fiction-first1000.zip
 23 | 
 24 | On the Triton cluster, these are available in
 25 | `/scratch/shareddata/teaching/`.
 26 | 
 27 | A unique feature is that it can read `.txt` files from zipfiles
 28 | without needing to decompress the zipfile.
 29 | 
 30 | 
 31 | 
 32 | ## count.py and count-multi.py
 33 | 
 34 | Reads in text files and outputs ngrams found within them. count-multi
 35 | is a version that uses multiprocessing with the --threads option
 36 | (though it's processes, not threads).
 37 | 
 38 | It can operate with characetrs (the default) or with words using
 39 | `--words`.  Word mode uses much more memory.
 40 | 
 41 | Example:
 42 | 
 43 | ```console
 44 | $ python3 ngrams/count.py -n 2 --stop 10 /scratch/shareddata/teaching/Gutenberg-Fiction.zip
 45 | Loaded 18738 files from /scratch/shareddata/teaching/Gutenberg-Fiction.zip
 46 | 100731 ["e", " "]
 47 | 88982 [" ", "t"]
 48 | 82653 ["h", "e"]
 49 | 82206 ["t", "h"]
 50 | ```
 51 | 
 52 | ```console
 53 | $ python3 ngrams/count.py -n 2 /scratch/shareddata/teaching/Gutenberg-Fiction.zip -o 2grams-all.out
 54 | Loaded 18738 files from /scratch/shareddata/teaching/Gutenberg-Fiction.zip
 55 | ```
 56 | 
 57 | 
 58 | 
 59 | ## combine-counts.py
 60 | 
 61 | Reads multiple count files and outputs one count file combining them.
 62 | 
 63 | Example:
 64 | 
 65 | ```console
 66 | $ python3 ngrams/combine-counts.py array-2grams_*.out -o 2grams-all.out
 67 | ```
 68 | 
 69 | 
 70 | 
 71 | ## generate.py
 72 | 
 73 | Uses a count file to generate text based on predictions using the
 74 | ngrams (for n>=2).  This doesn't work well and is extremely
 75 | inefficient, but probably everyone can understand what is's doing if
 76 | you make a analogy with how LLMs predict the next word.
 77 | 
 78 | Example:
 79 | 
 80 | ```consele
 81 | $ python3 ngrams/generate.py 2grams-all.out
 82 | ```
 83 | 
 84 | 
 85 | 
 86 | ## Analysis
 87 | 
 88 | - Increasing ngram size increases the memory use.  Ngrams=1 is only
 89 |   charcter/word frequencies and thus is probably CPU and I/O bound.
 90 | 
 91 | - This isn't exactly CPU bound but it does use a lot of CPU.
 92 | 
 93 | - It works very well with array jobs and has built-in options to do that.
 94 | 
 95 | - Speed of reading data does matter.  Reading directly from the
 96 |   Zipfile (via Python - no extraction) gives some speedup, as long as
 97 |   you don't have to open the zipfile every time.
 98 | 
 99 | - The multiprocessing version isn't much faster, since it spends so
100 |   much time moving around and accumulating the memory internally.
101 | 
102 | - The reading/writing of the count files takes a large amount of the
103 |   time and shows the importance of good data formats.
104 | 
105 | - The MaxRSS indication in the multiprocessing version may be wrong
106 |   (the syscall for children claims to only return MaxRSS for the child
107 |   with the most memory usage, and may always be zero).
108 | 
109 | - (and more)
110 | 


--------------------------------------------------------------------------------
/ngrams/array.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --mem=50G
 3 | #SBATCH --array=0-20
 4 | #SBATCH --time=0-6
 5 | #SBATCH --job-name=words-array
 6 | 
 7 | mkdir -p /scratch/work/$USER/ngrams-output/
 8 | 
 9 | python3 ngrams/count.py /scratch/work/darstr1/data/Gutenberg-Fiction.zip -n 3 --words --start=$SLURM_ARRAY_TASK_ID --step=20 -o /scratch/work/$USER/ngrams-output/ngrams3-words-all-array_$SLURM_ARRAY_TASK_ID.out
10 | 
11 | # Combine
12 | #python3 ngrams/combine-counts.py /scratch/work/$USER/ngrams-output/ngrams3-words-all-array_* -o /scratch/work/$USER/ngrams-output/ngrams3-words-all.out
13 | 


--------------------------------------------------------------------------------
/ngrams/combine-counts.py:
--------------------------------------------------------------------------------
 1 | """Combine counts files
 2 | 
 3 | Reads in multiple files and writes out a new count file.
 4 | 
 5 | """
 6 | 
 7 | import argparse
 8 | import collections
 9 | import json
10 | import sys
11 | 
12 | import argparse
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('countfile', nargs='+', help="Files with counts")
15 | parser.add_argument('--verbose', '-v', action='store_true')
16 | parser.add_argument('--output', '-o',)
17 | args = parser.parse_args()
18 | 
19 | ngrams_total = collections.Counter()
20 | for file_ in args.countfile:
21 |     if args.verbose:
22 |         print(file_, file=sys.stderr)
23 |     for line in open(file_):
24 |         if not line.strip():
25 |             continue
26 |         count, data = line.split(' ', 1)
27 |         count = int(count)
28 |         data = json.loads(data)
29 |         ngrams_total[tuple(data)] += count
30 | 
31 | # Save output to file if requested, otherwise print to stdout
32 | output = sys.stdout
33 | if args.output:
34 |     if args.verbose:
35 |         print('Writing to', args.output, file=sys.stderr)
36 |     output = open(args.output, 'w')
37 | # Print the output
38 | for ngram, count in ngrams_total.most_common():
39 |     print(count, json.dumps(ngram), file=output)
40 | 


--------------------------------------------------------------------------------
/ngrams/count-multi.py:
--------------------------------------------------------------------------------
  1 | """Count ngrams from input text files (multiprocessing version).
  2 | 
  3 | This reads in text files and computes n-grams based on words or
  4 | characters.  It's designed to be a HPC example, not for serious use.
  5 | 
  6 | The multiprocessing version is slower than the non-multiprocessing
  7 | version (this is something for you to think about).
  8 | 
  9 | n-grams are tuples such as ("the", "book", "is") or ("t", "h", "e").  It
 10 | writes output to standard output, or the file given.  Output format a
 11 | plain-text file with:
 12 | 
 13 | COUNT ["word1", "word2"]
 14 | 
 15 | """
 16 | from __future__ import print_function
 17 | 
 18 | import collections
 19 | import io
 20 | import itertools
 21 | import json
 22 | import multiprocessing
 23 | import os
 24 | import re
 25 | import resource
 26 | import sys
 27 | import time
 28 | import zipfile
 29 | 
 30 | 
 31 | 
 32 | def nwise(iterable, n):
 33 |     """Like itertools.pairwise but for arbitrary n.
 34 | 
 35 |     Creates groups of n:
 36 | 
 37 |     1: [a, b, c, d, e, f] -> [a, b, c, d, e, f]
 38 |     2: [a, b, c, d, e, f] -> [ab, bc, cd, de, ef]
 39 |     3: [a, b, c, d, e, f] -> [abc, bcd, cde, def]
 40 |     """
 41 |     if n <= 0:
 42 |         raise ValueError(f"n must be a positive integer (was {n})")
 43 |     iterator = iter(iterable)
 44 |     try:
 45 |         ngram = tuple(next(iterator) for _ in range(n))
 46 |     # RuntimeError raised within the above when it runs out of data.
 47 |     # Does this mask other errors though?
 48 |     except RuntimeError:
 49 |         return
 50 |     yield ngram
 51 |     for next_ in iterator:
 52 |         ngram = ngram[1:] + (next_, )
 53 |         yield ngram
 54 | 
 55 | def _openzip(zip, name):
 56 |     return lambda: io.TextIOWrapper(z.open(name), 'utf8')
 57 | 
 58 | def opendir(dir_):
 59 |     """Open either a zipfile (*.txt within it), directory/*.txt, or an individual file.
 60 | 
 61 |     Returns a list of the files within the zipfile or directory, or the
 62 |     filename if a single filename is given.  The list contains
 63 |     (filename, function_that_opens_the _file), so that the process_file
 64 |     function can handle both zipfiles and normal files the same way.
 65 |     """
 66 |     # Zipfiles
 67 |     if dir_.endswith('.zip'):
 68 |         z = zipfile.ZipFile(dir_)
 69 |         file_list = [ #(z, name)
 70 |                       f'zip::{dir_}::{name}'
 71 |                       for name in z.namelist()
 72 |                       if name.endswith('.txt') ]
 73 |         print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr)
 74 |     # Directories
 75 |     elif os.path.isdir(dir_):
 76 |         file_list = [ #(name, lambda name=name: open(name, 'r'))
 77 |                       os.path.join(dir_, name)
 78 |                       for name in os.listdir(dir_)
 79 |                       if name.endswith('.txt') ]
 80 |         print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr)
 81 |     # regular files
 82 |     else:
 83 |         file_list = [ dir_ ]
 84 |     return file_list
 85 | 
 86 | ZIPFILE_CACHE = { }
 87 | def process_file(x):
 88 |     """Return ngrams from a given filename.
 89 | 
 90 |     filename: filename of this file.  Only used for printing, since it
 91 |     might be a relative path inside of a zipfile.
 92 | 
 93 |     data: a function which returns the file data.  This exists so that
 94 |     this function doesn't need to care if it's reading from a zipfile or
 95 |     normal file.
 96 | 
 97 |     args: arguments from the argument parser.
 98 |     """
 99 |     filename, args = x
100 |     if args.verbose:
101 |         print(filename, file=sys.stderr)
102 |     # Open the zipfile and cache the open zipfile object.
103 |     if filename.startswith('zip::'):
104 |         _, archive, element = filename.split('::', maxsplit=2)
105 |         if archive in ZIPFILE_CACHE:
106 |             z = ZIPFILE_CACHE[archive]
107 |         else:
108 |             #z = zipfile.ZipFile(archive)
109 |             z = ZIPFILE_CACHE[archive] = zipfile.ZipFile(archive)
110 |         data = z.open(element).read().decode()
111 |     #if isinstance(filename, tuple):
112 |     #    name, data = filename
113 |     #    if isinstance(name, zipfile.ZipFile):
114 |     #        z, name = name, data
115 |     #        data = z.open(name).read()
116 |     #    #z = zipfile.ZipFile(archive)
117 |     #    data = data.decode()
118 |     else:
119 |         data = open(filename, 'r').read()
120 |     data = data.lower()
121 |     ngrams = collections.Counter() # Making a new Counter here may be inefficient.
122 |     # Split by words if needed.  Use a regular expression for this.
123 |     if args.words:
124 |         data = (m[0] for m in re.finditer(r'[a-zA-Z_-]+', data))
125 |     # For every ngram, increment its count
126 |     for ngram in nwise(data, args.n):
127 |         ngrams[ngram] += 1
128 | 
129 |     return ngrams
130 | 
131 | 
132 | 
133 | def arg_int_auto(x):
134 |     """Argparse argument type helper.
135 | 
136 |     Returns an integer, or the value from the environment variable
137 |     SLURM_CPUS_PER_TASK if the value is 'auto'.
138 |     """
139 |     if x == 'auto':
140 |         return int(os.environ['SLURM_CPUS_PER_TASK'])
141 |     else:
142 |         return int(x)
143 | 
144 | 
145 | 
146 | def main():
147 |     start = time.time()
148 |     import argparse
149 |     parser = argparse.ArgumentParser()
150 |     parser.add_argument('input', nargs='+', help="Inputs, should be text files.  Can be a zipfile or directory (in which case, files ending in .txt inside the zip/directory will be used), or directly filenames (in which case the file will be used)")
151 |     parser.add_argument('-n', type=int, default=1, help="Size of n-grams.  -n 1 is simple frequencies.")
152 |     parser.add_argument('--output', '-o', help="Write output to this filename, otherwise print output to stdout.")
153 |     parser.add_argument('--threads', '-t', type=arg_int_auto, default=1, help="Number of threads to usez for the analysis.  If 'auto', then detect from the SLURM_CPUS_PER_TASK environment variable.  If an integer 1 or greater, use multiprocessing with that many processes.  If zero, do not use multiprocessing at all.  Default: 1")
154 |     parser.add_argument('--words', action='store_true', help="If given, use word-based ngram mode instead of character-based.  Word-based mode only uses ascii letters and removes most other punctuation and special characters.")
155 |     parser.add_argument('--verbose', '-v', action='store_true', help="Print more to stderr, for example the filenames that are being read.")
156 |     group_selection = parser.add_argument_group("selection", "Selecting files.  After a list of all files is created, apply these slice operations like you do to a list: list[start:stop:step].  Files are not sorted and used in the order given, order in the zipfile, or order the natural (unsorted) order the operating system returns them from the directory list.  The directory order is unpredictable but usually the same if the directory isn't touched.")
157 |     group_selection.add_argument('--start', type=int, default=None, help="Select starting file with a Python slice operation 'file_list[start:]'.  The start is included, and counts go from 0.  For examaple, --start=1 skips the first (0th) file.")
158 |     group_selection.add_argument('--stop', type=int, default=None, help="Select stop file with file_list[:stop].  The stop is NOT included, and the counts go from zero.  For example, to limit to the first 100 files (0, 1, ..., 99), use --stop=100 .")
159 |     group_selection.add_argument('--step', type=int, default=None, help="Select every STEP file.  For example, --start=0 --step=10 selects files 0, 10, 20, etc. and --start=1 --step=10 selects files 1, 11, 21, etc.")
160 |     args = parser.parse_args()
161 | 
162 |     # Read the filelist from a zipfile, OR from a directory.  Accumulate
163 |     # a list of all files across all arguments.
164 |     filelist = sum((opendir(input) for input in args.input), [])
165 |     filelist = itertools.islice(filelist, args.start, args.stop, args.step)
166 | 
167 |     # Process every file and accumulate the counts.
168 |     ngrams_total = collections.Counter()
169 |     if args.threads is not None and args.threads != 0:
170 |         print(f'Using multiprocessing.Pool with {args.threads} processes', file=sys.stderr)
171 |         pool = multiprocessing.Pool(args.threads)
172 |         for result in pool.imap_unordered(process_file,
173 |                                           (
174 |                                            (name, args)
175 |                                            #((name[1], name[0].open(name[1]).read()), args) if isinstance(name, tuple) else (name, args)
176 |                                            for name in filelist),
177 |                                            chunksize=10,
178 |                                          ):
179 |             ngrams_total.update(result)
180 |         pool.close()
181 |         pool.join()
182 |     else:
183 |         for filename in filelist:
184 |             ngrams_total.update(process_file((filename, args)))
185 | 
186 |     # Save output to file if requested, otherwise print to stdout
187 |     output = sys.stdout
188 |     if args.output:
189 |         output = open(args.output, 'w')
190 |     for ngram, count in ngrams_total.most_common():
191 |         print(count, json.dumps(ngram), file=output)
192 | 
193 |     # Print the summary and performance information to stderr
194 |     print(f'{args.n}-grams: {len(ngrams_total)}', file=sys.stderr)
195 |     print(file=sys.stderr)
196 |     rusage_s = resource.getrusage(resource.RUSAGE_SELF)
197 |     rusage_c = resource.getrusage(resource.RUSAGE_CHILDREN)
198 |     print(f'Walltime {time.time() - start:.2f} s', file=sys.stderr)
199 |     print(f'User time: {rusage_s.ru_utime + rusage_c.ru_utime:.2f} s ({rusage_s.ru_utime:.2f} + {rusage_c.ru_utime:.2f})', file=sys.stderr)
200 |     print(f'System time: {rusage_s.ru_stime + rusage_c.ru_stime:.2f} s ({rusage_s.ru_stime:.2f} + {rusage_c.ru_stime:.2f})', file=sys.stderr)
201 |     print(f'MaxRSS: {(rusage_s.ru_maxrss + rusage_c.ru_maxrss)/2**20:.3f} GiB ({rusage_s.ru_maxrss/2**20:.3f} + {rusage_c.ru_maxrss/2**20:.3f})', file=sys.stderr)
202 | 
203 | if __name__ == '__main__':
204 |     main()
205 | 


--------------------------------------------------------------------------------
/ngrams/count.py:
--------------------------------------------------------------------------------
  1 | """Count ngrams from input text files.
  2 | 
  3 | This reads in text files and computes n-grams based on words or
  4 | characters.  It's designed to be a HPC example, not for serious use.
  5 | 
  6 | n-grams are tuples such as ("the", "book", "is") or ("t", "h", "e").  It
  7 | writes output to standard output, or the file given.  Output format a
  8 | plain-text file with:
  9 | 
 10 | COUNT ["word1", "word2"]
 11 | 
 12 | """
 13 | from __future__ import print_function
 14 | 
 15 | import collections
 16 | import io
 17 | import itertools
 18 | import json
 19 | import multiprocessing
 20 | import os
 21 | import re
 22 | import resource
 23 | import sys
 24 | import time
 25 | import zipfile
 26 | 
 27 | 
 28 | 
 29 | def nwise(iterable, n):
 30 |     """Like itertools.pairwise but for arbitrary n.
 31 | 
 32 |     Creates groups of n:
 33 | 
 34 |     1: [a, b, c, d, e, f] -> [a, b, c, d, e, f]
 35 |     2: [a, b, c, d, e, f] -> [ab, bc, cd, de, ef]
 36 |     3: [a, b, c, d, e, f] -> [abc, bcd, cde, def]
 37 |     """
 38 |     if n <= 0:
 39 |         raise ValueError(f"n must be a positive integer (was {n})")
 40 |     iterator = iter(iterable)
 41 |     try:
 42 |         ngram = tuple(next(iterator) for _ in range(n))
 43 |     # RuntimeError raised within the above when it runs out of data.
 44 |     # Does this mask other errors though?
 45 |     except RuntimeError:
 46 |         return
 47 |     yield ngram
 48 |     for next_ in iterator:
 49 |         ngram = ngram[1:] + (next_, )
 50 |         yield ngram
 51 | 
 52 | 
 53 | 
 54 | def opendir(dir_):
 55 |     """Open either a zipfile (*.txt within it), directory/*.txt, or an individual file.
 56 | 
 57 |     Returns a list of the files within the zipfile or directory, or the
 58 |     filename if a single filename is given.  The list contains
 59 |     (filename, function_that_opens_the _file), so that the process_file
 60 |     function can handle both zipfiles and normal files the same way.
 61 |     """
 62 |     # Zipfiles
 63 |     if dir_.endswith('.zip'):
 64 |         z = zipfile.ZipFile(dir_)
 65 |         file_list = [ (name, lambda name=name: io.TextIOWrapper(z.open(name), 'utf8'))
 66 |                       for name in z.namelist()
 67 |                       if name.endswith('.txt') ]
 68 |         print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr)
 69 |     # Directories
 70 |     elif os.path.isdir(dir_):
 71 |         file_list = [ (name, lambda name=name, dir_=dir_: open(os.path.join(dir_, name), 'r'))
 72 |                       for name in os.listdir(dir_)
 73 |                       if name.endswith('.txt') ]
 74 |         print(f'Found {len(file_list)} files in {dir_}', file=sys.stderr)
 75 |     # regular files
 76 |     else:
 77 |         file_list = [ dir_, lambda name=dir_: open(name, 'r') ]
 78 |     return file_list
 79 | 
 80 | 
 81 | 
 82 | def process_file(filename, data, args):
 83 |     """Return ngrams from a given filename.
 84 | 
 85 |     filename: filename of this file.  Only used for printing, since it
 86 |     might be a relative path inside of a zipfile.
 87 | 
 88 |     data: a function which returns the file data.  This exists so that
 89 |     this function doesn't need to care if it's reading from a zipfile or
 90 |     normal file.
 91 | 
 92 |     args: arguments from the argument parser.
 93 |     """
 94 |     if args.verbose:
 95 |         print(filename, file=sys.stderr)
 96 |     # Get our raw data from wherever it is
 97 |     data = data().read()
 98 |     data = data.lower()
 99 |     ngrams = collections.Counter() # Making a new Counter here may be inefficient.
100 |     # Split by words if needed.  Use a regular expression for this.
101 |     if args.words:
102 |         data = (m[0] for m in re.finditer(r'[a-zA-Z_-]+', data))
103 |     # For every ngram, increment its count
104 |     for ngram in nwise(data, args.n):
105 |         ngrams[ngram] += 1
106 | 
107 |     return ngrams
108 | 
109 | 
110 | 
111 | def main():
112 |     start = time.time()
113 |     import argparse
114 |     parser = argparse.ArgumentParser()
115 |     parser.add_argument('input', nargs='+', help="Inputs, should be text files.  Can be a zipfile or directory (in which case, files ending in .txt inside the zip/directory will be used), or directly filenames (in which case the file will be used)")
116 |     parser.add_argument('-n', type=int, default=1, help="Size of n-grams.  -n 1 is simple frequencies.")
117 |     parser.add_argument('--output', '-o', help="Write output to this filename, otherwise print output to stdout.")
118 |     parser.add_argument('--words', action='store_true', help="If given, use word-based ngram mode instead of character-based.  Word-based mode only uses ascii letters and removes most other punctuation and special characters.")
119 |     parser.add_argument('--verbose', '-v', action='store_true', help="Print more to stderr, for example the filenames that are being read.")
120 |     group_selection = parser.add_argument_group("selection", "Selecting files.  After a list of all files is created, apply these slice operations like you do to a list: list[start:stop:step].  Files are not sorted and used in the order given, order in the zipfile, or order the natural (unsorted) order the operating system returns them from the directory list.  The directory order is unpredictable but usually the same if the directory isn't touched.")
121 |     group_selection.add_argument('--start', type=int, default=None, help="Select starting file with a Python slice operation 'file_list[start:]'.  The start is included, and counts go from 0.  For examaple, --start=1 skips the first (0th) file.")
122 |     group_selection.add_argument('--stop', type=int, default=None, help="Select stop file with file_list[:stop].  The stop is NOT included, and the counts go from zero.  For example, to limit to the first 100 files (0, 1, ..., 99), use --stop=100 .")
123 |     group_selection.add_argument('--step', type=int, default=None, help="Select every STEP file.  For example, --start=0 --step=10 selects files 0, 10, 20, etc. and --start=1 --step=10 selects files 1, 11, 21, etc.")
124 |     args = parser.parse_args()
125 | 
126 |     # Read the filelist from a zipfile, OR from a directory.  Accumulate
127 |     # a list of all files across all arguments.
128 |     filelist = sum((opendir(input) for input in args.input), [])
129 |     filelist = itertools.islice(filelist, args.start, args.stop, args.step)
130 | 
131 |     # Process every file and accumulate the counts.
132 |     ngrams_total = collections.Counter()
133 |     for filename, data in filelist:
134 |         ngrams_total.update(process_file(filename, data, args))
135 | 
136 |     # Save output to file if requested, otherwise print to stdout
137 |     output = sys.stdout
138 |     if args.output:
139 |         output = open(args.output, 'w')
140 |     for ngram, count in ngrams_total.most_common():
141 |         print(count, json.dumps(ngram), file=output)
142 | 
143 |     # Print the summary and performance information to stderr
144 |     print(f'{args.n}-grams: {len(ngrams_total)}', file=sys.stderr)
145 |     print(file=sys.stderr)
146 |     rusage_s = resource.getrusage(resource.RUSAGE_SELF)
147 |     rusage_c = resource.getrusage(resource.RUSAGE_CHILDREN)
148 |     print(f'Walltime {time.time() - start:.2f} s', file=sys.stderr)
149 |     print(f'User time: {rusage_s.ru_utime + rusage_c.ru_utime:.2f} s ({rusage_s.ru_utime:.2f} + {rusage_c.ru_utime:.2f})', file=sys.stderr)
150 |     print(f'System time: {rusage_s.ru_stime + rusage_c.ru_stime:.2f} s ({rusage_s.ru_stime:.2f} + {rusage_c.ru_stime:.2f})', file=sys.stderr)
151 |     print(f'MaxRSS: {(rusage_s.ru_maxrss + rusage_c.ru_maxrss)/2**20:.3f} GiB ({rusage_s.ru_maxrss/2**20:.3f} + {rusage_c.ru_maxrss/2**20:.3f})', file=sys.stderr)
152 | 
153 | if __name__ == '__main__':
154 |     main()
155 | 


--------------------------------------------------------------------------------
/ngrams/generate.py:
--------------------------------------------------------------------------------
 1 | """Use ngrams to create text.
 2 | 
 3 | This uses ngrams to predict next words.  It takes makes a mapping of
 4 | (the first (n-1) parts of the ngrams) to (the last element of the
 5 | ngram), and uses this to predict.  It's not very good but shows a minimal use.
 6 | 
 7 | """
 8 | 
 9 | import ast
10 | import argparse
11 | import collections
12 | import json
13 | import random
14 | import sys
15 | 
16 | import argparse
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument('countfile', nargs='+', help="Files with counts, as generated from count.py")
19 | #parser.add_argument('--output', '-o',)
20 | parser.add_argument('--words', action='store_true', help="Use 'words-mode' instead of character-mode.")
21 | parser.add_argument('--count', '-c', type=int, default=100, help="Generate this many followups.")
22 | parser.add_argument('--limit-in', type=int, help="Limit ", "Stop reading from the files in after this ngrams.")
23 | parser.add_argument('--count-threshold', type=int, help="Don't read in ngrams with fewer than this many occurrances.  Used to ")
24 | parser.add_argument('--verbose', '-v', action='count', default=0)
25 | args = parser.parse_args()
26 | 
27 | ngrams = collections.defaultdict(collections.Counter)
28 | for file_ in args.countfile:
29 |     if args.verbose >= 1:
30 |         print(f"Reading {file_}", file=sys.stderr)
31 |     for line in open(file_):
32 |         if not line.strip():
33 |             continue
34 |         count, data = line.split(' ', 1)
35 |         count = int(count)
36 |         if args.count_threshold and count < args.count_threshold:
37 |             break
38 |             continue
39 |         data = json.loads(data)
40 |         ngrams[tuple(data[:-1])][data[-1]] += count
41 |         if args.limit_in and len(ngrams) > args.limit_in:
42 |             break
43 |     if args.limit_in and len(ngrams) > args.limit_in:
44 |         break
45 | 
46 | # A random starting (n-1) gram.
47 | start = random.choice(list(ngrams))
48 | if args.words:
49 |     print(' '.join(start), end=' ')
50 | else:
51 |     print(''.join(start), end='')
52 | 
53 | 
54 | for i in range(args.count):
55 |     if start not in ngrams:
56 |         print()
57 |         print(f"can not continue from {start}")
58 |         break
59 |     elif args.verbose >= 2:
60 |         print(f"{start} has {len(ngrams_next)} possibilities", file=sys.stderr)
61 |     ngrams_next = ngrams[start]
62 |     next_ = random.choices(population=list(ngrams_next.keys()), weights=ngrams_next.values())
63 |     next_ = next_[0]
64 |     if args.words:
65 |         print(next_, end=' ')
66 |     else:
67 |         print(next_, end='')
68 |     start = start[1:] + (next_, )
69 | print()
70 | 


--------------------------------------------------------------------------------
/openmp/hello_omp/README.md:
--------------------------------------------------------------------------------
 1 | # openmp/hello_omp
 2 | 
 3 | For up to date instructions, see
 4 | [SciComp page on parallel computing](https://scicomp.aalto.fi/triton/tut/parallel/).
 5 | 
 6 | Compiling the example with OpenMP:
 7 | ```sh
 8 | module load gcc/9.2.0
 9 | gcc -fopenmp -O2 -g hello_omp.c -o hello_omp
10 | ```
11 | 
12 | Compiling the example without OpenMP:
13 | ```sh
14 | module load gcc/9.2.0
15 | gcc -O2 -g hello_omp.c -o hello_omp
16 | ```
17 | 
18 | Running the example in the queue:
19 | ```sh
20 | sbatch hello_omp.slrm
21 | ```
22 | or
23 | ```sh
24 | module load gcc/9.2.0
25 | export OMP_PROC_BIND=true
26 | srun -c 2 ./hello_omp
27 | ```
28 | 


--------------------------------------------------------------------------------
/openmp/hello_omp/hello_omp.c:
--------------------------------------------------------------------------------
 1 | /* Hello World OpenMP
 2 |  *
 3 |  * Compile on Triton as:
 4 |  *   gcc -fopenmp hello_omp.c -o hello_omp
 5 |  * 
 6 |  * degtyai1, Wed, 28 May 2014 12:47:47 +0300
 7 |  * tuomiss1, Mon, 08 Jun 2020
 8 |  *
 9 |  */
10 | 
11 | #include <stdio.h>
12 | #if defined(_OPENMP)
13 | #include <omp.h>
14 | #endif
15 | 
16 | int main(void) {
17 | #if defined(_OPENMP)
18 | #pragma omp parallel
19 |     printf("Hello, world from thread %d.\n", omp_get_thread_num());
20 | #else
21 |     printf("Hello, world.\n");
22 | #endif
23 |   return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/openmp/hello_omp/hello_omp.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH --time=00:05:00
 3 | #SBATCH --mem=500M
 4 | #SBATCH --cpus-per-task=4
 5 | #SBATCH --output=hello_omp.out
 6 | 
 7 | module load gcc/9.2.0
 8 | 
 9 | export OMP_PROC_BIND=true
10 | srun hello_omp
11 | 


--------------------------------------------------------------------------------
/postgres/build_postgres_image.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Build a postgres image using singularity
 4 | #
 5 | 
 6 | export SINGULARITY_CACHEDIR=/tmp/$USER/singularity_cache
 7 | mkdir -p $SINGULARITY_CACHEDIR
 8 | 
 9 | singularity pull docker://library/postgres:latest
10 | 


--------------------------------------------------------------------------------
/postgres/run_postgres_example.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --output=postgres_example.out
 3 | #SBATCH --time=00:30:00
 4 | #SBATCH --mem=2G
 5 | 
 6 | # Quit if any errors occur
 7 | 
 8 | set -e
 9 | 
10 | # Create directories for postgresql to store data
11 | 
12 | mkdir -p var/{lib,run}
13 | 
14 | # Run postgres in a singularity image, forward output to files, catch PID for process
15 | 
16 | singularity run --env POSTGRES_PASSWORD=mysecretpassword --env LC_ALL=C --env PGPORT=5433 -B ${PWD}/var/lib:/var/lib/postgresql -B ${PWD}/var/run:/var/run postgres_latest.sif 2> postgres.err 1> postgres.out &
17 | POSTGRES_PID=$!
18 | 
19 | # Give postgres few seconds to initialize
20 | 
21 | sleep 5
22 | 
23 | # Set up a trap so that postgres will be killed when job finishes
24 | 
25 | trap "kill $POSTGRES_PID ; exit" TERM EXIT
26 | 
27 | # Create test environment
28 | 
29 | module load miniconda
30 | 
31 | mamba create -n sqlalchemy_test -q -y python sqlalchemy psycopg2
32 | 
33 | source activate sqlalchemy_test
34 | 
35 | # Run test connection to postgresql
36 | 
37 | echo 'Testing postgres writing:'
38 | 
39 | python test_postgres_write.py
40 | 
41 | echo 'Testing postgres reading:'
42 | 
43 | python test_postgres_read.py
44 | 
45 | # Remove test environment
46 | 
47 | source deactivate
48 | 
49 | mamba env remove -n sqlalchemy_test
50 | 


--------------------------------------------------------------------------------
/postgres/test_postgres_read.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Quick postgres read test based on https://docs.sqlalchemy.org/en/14/orm/quickstart.html
 4 | #
 5 | from sqlalchemy import Column
 6 | from sqlalchemy import ForeignKey
 7 | from sqlalchemy import Integer
 8 | from sqlalchemy import String
 9 | from sqlalchemy.orm import declarative_base
10 | from sqlalchemy.orm import relationship
11 | 
12 | #### Define metadata for our tables
13 | 
14 | Base = declarative_base()
15 | 
16 | class User(Base):
17 |     __tablename__ = "user_account"
18 |     id = Column(Integer, primary_key=True)
19 |     name = Column(String(30))
20 |     fullname = Column(String)
21 |     addresses = relationship(
22 |         "Address", back_populates="user", cascade="all, delete-orphan"
23 |     )
24 |     def __repr__(self):
25 |         return f"User(id={self.id!r}, name={self.name!r}, fullname={self.fullname!r})"
26 | 
27 | class Address(Base):
28 |     __tablename__ = "address"
29 |     id = Column(Integer, primary_key=True)
30 |     email_address = Column(String, nullable=False)
31 |     user_id = Column(Integer, ForeignKey("user_account.id"), nullable=False)
32 |     user = relationship("User", back_populates="addresses")
33 |     def __repr__(self):
34 |         return f"Address(id={self.id!r}, email_address={self.email_address!r})"
35 | 
36 | # Set authentication parameters
37 | 
38 | user = 'postgres'
39 | 
40 | password = 'mysecretpassword'
41 | 
42 | # Define connection engine
43 | 
44 | from sqlalchemy import create_engine
45 | 
46 | engine = create_engine(f"postgresql://{user}:{password}@localhost:5433/postgres", echo=True, future=True)
47 | 
48 | # Read test data from database
49 | 
50 | from sqlalchemy.orm import Session
51 | from sqlalchemy import select
52 | 
53 | session = Session(engine)
54 | 
55 | stmt = select(User).where(User.name.in_(["spongebob", "sandy"]))
56 | 
57 | for user in session.scalars(stmt):
58 |     print(user)
59 | 


--------------------------------------------------------------------------------
/postgres/test_postgres_write.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Quick postgres write test based on https://docs.sqlalchemy.org/en/14/orm/quickstart.html
 4 | #
 5 | from sqlalchemy import Column
 6 | from sqlalchemy import ForeignKey
 7 | from sqlalchemy import Integer
 8 | from sqlalchemy import String
 9 | from sqlalchemy.orm import declarative_base
10 | from sqlalchemy.orm import relationship
11 | 
12 | #### Define metadata for our tables
13 | 
14 | Base = declarative_base()
15 | 
16 | class User(Base):
17 |     __tablename__ = "user_account"
18 |     id = Column(Integer, primary_key=True)
19 |     name = Column(String(30))
20 |     fullname = Column(String)
21 |     addresses = relationship(
22 |         "Address", back_populates="user", cascade="all, delete-orphan"
23 |     )
24 |     def __repr__(self):
25 |         return f"User(id={self.id!r}, name={self.name!r}, fullname={self.fullname!r})"
26 | 
27 | class Address(Base):
28 |     __tablename__ = "address"
29 |     id = Column(Integer, primary_key=True)
30 |     email_address = Column(String, nullable=False)
31 |     user_id = Column(Integer, ForeignKey("user_account.id"), nullable=False)
32 |     user = relationship("User", back_populates="addresses")
33 |     def __repr__(self):
34 |         return f"Address(id={self.id!r}, email_address={self.email_address!r})"
35 | 
36 | # Set authentication parameters
37 | 
38 | user = 'postgres'
39 | 
40 | password = 'mysecretpassword'
41 | 
42 | # Define connection engine
43 | 
44 | from sqlalchemy import create_engine
45 | 
46 | engine = create_engine(f"postgresql://{user}:{password}@localhost:5433/postgres", echo=True, future=True)
47 | 
48 | # Write metadata
49 | 
50 | Base.metadata.create_all(engine)
51 | 
52 | # Write test data to database
53 | 
54 | from sqlalchemy.orm import Session
55 | 
56 | with Session(engine) as session:
57 |     spongebob = User(
58 |         name="spongebob",
59 |         fullname="Spongebob Squarepants",
60 |         addresses=[Address(email_address="spongebob@sqlalchemy.org")],
61 |     )
62 |     sandy = User(
63 |         name="sandy",
64 |         fullname="Sandy Cheeks",
65 |         addresses=[
66 |             Address(email_address="sandy@sqlalchemy.org"),
67 |             Address(email_address="sandy@squirrelpower.org"),
68 |         ],
69 |     )
70 |     patrick = User(name="patrick", fullname="Patrick Star")
71 |     session.add_all([spongebob, sandy, patrick])
72 |     session.commit()
73 | 
74 | 


--------------------------------------------------------------------------------
/python/multiprocessing/index.rst:
--------------------------------------------------------------------------------
 1 | Python multiprocessing
 2 | ======================
 3 | 
 4 | 
 5 | With Python multiprocessing pools, you have to set the number of CPUs that
 6 | the multiprcoessing pool, otherwise it will try to use every CPU on
 7 | the node - even though you haven't requested every CPU.  It will
 8 | be constricted to a few processors, but try to use them all.  This
 9 | will be inefficient.
10 | 
11 | The main point is to use the Slurm ``SLURM_CPUS_PER_TASK`` environment
12 | variable to set the number of processors.
13 | 
14 | 
15 | The Python file:
16 | 
17 | .. include::
18 | 
19 |    python_multiprocessing.py
20 | 
21 | 
22 | .. include::
23 | 
24 |    python_multiprocessing.slrm
25 | 


--------------------------------------------------------------------------------
/python/multiprocessing/multiprocessing_demo.py:
--------------------------------------------------------------------------------
 1 | """Demonstration of multiprocessing integrated with Slurm
 2 | """
 3 | 
 4 | import multiprocessing
 5 | import os
 6 | 
 7 | # Detect the number of CPUs we have available.  If in slurm, use the SLURM_CPUS_PER_TASK environment variable which Slurm lets.
 8 | if 'SLURM_CPUS_PER_TASK' in os.environ:
 9 |     cpus = int(os.environ['SLURM_CPUS_PER_TASK'])
10 |     print("Dectected %s CPUs through slurm"%cpus)
11 | else:
12 |     # None means that it will auto-detect based on os.cpu_count()
13 |     cpus = None
14 |     print("Running on default number of CPUs (default: all=%s)"%os.cpu_count())
15 | 
16 | 
17 | def my_work(i):
18 |     """This is a pointless function that uses a few CPUs-seconds.
19 |     """
20 |     print("Running thread %s"%i)
21 |     for x in range(10000000):
22 |         x ** 2
23 |     return i
24 | 
25 | # Start the pool with the number of CPUs we found above, or default
26 | # value.
27 | with multiprocessing.Pool(cpus) as p:
28 |     print(p.map(my_work, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
29 | 


--------------------------------------------------------------------------------
/python/multiprocessing/multiprocessing_demo.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -t 00:10:00
 3 | #SBATCH --cpus-per-task=2
 4 | #SBATCH --mem-per-cpu=1G
 5 | 
 6 | module purge
 7 | module load anaconda
 8 | 
 9 | echo "Running on: $HOSTNAME with $SLURM_CPUS_PER_TASK processors"
10 | 
11 | srun python multiprocessing_demo.py
12 | 
13 | 


--------------------------------------------------------------------------------
/python/python_openmp/README.md:
--------------------------------------------------------------------------------
 1 | # python/python_openmp
 2 | 
 3 | For up to date instructions, see
 4 | [SciComp page on parallel computing](https://scicomp.aalto.fi/triton/tut/parallel/)
 5 | and
 6 | [SciComp page on Python](https://scicomp.aalto.fi/triton/apps/python/).
 7 | 
 8 | 
 9 | Running the example in the queue:
10 | ```sh
11 | sbatch python_openmp.slrm
12 | ```
13 | or
14 | ```sh
15 | module load anaconda/2020-03-tf2
16 | srun -c 2 --time=00:05:00 python python_openmp.py
17 | ```
18 | 


--------------------------------------------------------------------------------
/python/python_openmp/python_openmp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | from time import time
 4 | import numpy as np
 5 | 
 6 | print('Using %d processors' % int(os.getenv('SLURM_CPUS_PER_TASK',1)))
 7 | print('Using %d threads' % int(os.getenv('OMP_NUM_THREADS', 1)))
 8 | print('Using %d tasks' % int(os.getenv('SLURM_NTASKS', 1)))
 9 | 
10 | nrounds = 5
11 | 
12 | t_start = time()
13 | 
14 | for i in range(nrounds):
15 |     a = np.random.random([2000,2000])
16 |     a = a + a.T
17 |     b = np.linalg.pinv(a)
18 | 
19 | t_delta = time() - t_start
20 | 
21 | print('Seconds taken to invert %d symmetric 2000x2000 matrices: %f' % (nrounds, t_delta))
22 | 


--------------------------------------------------------------------------------
/python/python_openmp/python_openmp.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -t 00:10:00
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --cpus-per-task=2
 5 | #SBATCH --mem-per-cpu=1G
 6 | #SBATCH -o python_openmp.out
 7 | 
 8 | module load anaconda/2020-03-tf2
 9 | 
10 | export OMP_PROC_BIND=true
11 | 
12 | echo 'Running on: '$HOSTNAME
13 | 
14 | srun python python_openmp.py
15 | 


--------------------------------------------------------------------------------
/python/simple/simple.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #SBATCH -p interactive
 3 | #SBATCH -t 00:5:00
 4 | 
 5 | from __future__ import print_function
 6 | 
 7 | import os
 8 | print(os.environ)
 9 | 
10 | # If you have an array job, you can access it this way:
11 | print(os.environ['SLURM_ARRAY_TASK_ID'])
12 | 


--------------------------------------------------------------------------------
/scip/2018/README.md:
--------------------------------------------------------------------------------
1 | # SCIP 2018
2 | 
3 | This folder contains material from the [Crash Course for Computational Scientists](http://science-it.aalto.fi/scip/scip-summer-kickstart-2018/)
4 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex01/testRK4.R:
--------------------------------------------------------------------------------
 1 | rk4 <- function(f, x0, y0, x1, n) {
 2 |     vx <- double(n + 1)
 3 |     vy <- double(n + 1)
 4 |     vx[1] <- x <- x0
 5 |     vy[1] <- y <- y0
 6 |     h <- (x1 - x0)/n
 7 |     for(i in 1:n) {
 8 |         k1 <- h*f(x, y)
 9 |         k2 <- h*f(x + 0.5*h, y + 0.5*k1)
10 |         k3 <- h*f(x + 0.5*h, y + 0.5*k2)
11 |         k4 <- h*f(x + h, y + k3)
12 |         vx[i + 1] <- x <- x0 + i*h
13 |         vy[i + 1] <- y <- y + (k1 + k2 + k2 + k3 + k3 + k4)/6
14 |     }
15 |     cbind(vx, vy)
16 | }
17 |  
18 | sol <- rk4(function(x, y) x*sqrt(y), 0, 1, 10, 100)
19 | cbind(sol, sol[, 2] - (4 + sol[, 1]^2)^2/16)[seq(1, 101, 10), ]
20 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex01/testRK4.m:
--------------------------------------------------------------------------------
 1 | function testRK4
 2 |     t = 0:0.1:10;
 3 |     y = 0.0625.*(t.^2+4).^2;
 4 |     [trk4, yrk4] = RK4(t);
 5 |     fprintf('Time\t\tExactVal\tRK4Val\t\tRK4Error\n')
 6 |     for k = 1:10:length(t)
 7 |         fprintf('%.f\t\t%7.3f\t\t%7.3f\t\t%7.3g\n', t(k), y(k), ...
 8 |             yrk4(k), abs(y(k)-yrk4(k)))
 9 |     end
10 | end
11 |  
12 | function [t, y] = RK4(t)
13 |     dydt = @(tVal,yVal)tVal*sqrt(yVal);
14 |     y = zeros(size(t));
15 |     y(1) = 1;
16 |     for k = 1:length(t)-1
17 |         dt = t(k+1)-t(k);
18 |         dy1 = dt*dydt(t(k), y(k));
19 |         dy2 = dt*dydt(t(k)+0.5*dt, y(k)+0.5*dy1);
20 |         dy3 = dt*dydt(t(k)+0.5*dt, y(k)+0.5*dy2);
21 |         dy4 = dt*dydt(t(k)+dt, y(k)+dy3);
22 |         y(k+1) = y(k)+(dy1+2*dy2+2*dy3+dy4)/6;
23 |     end
24 | end
25 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex01/testRK4.py:
--------------------------------------------------------------------------------
 1 | def RK4(f):
 2 |     return lambda t, y, dt: (
 3 |             lambda dy1: (
 4 |             lambda dy2: (
 5 |             lambda dy3: (
 6 |             lambda dy4: (dy1 + 2*dy2 + 2*dy3 + dy4)/6
 7 |             )( dt * f( t + dt  , y + dy3   ) )
 8 |         )( dt * f( t + dt/2, y + dy2/2 ) )
 9 |         )( dt * f( t + dt/2, y + dy1/2 ) )
10 |         )( dt * f( t       , y         ) )
11 |  
12 | def theory(t): return (t**2 + 4)**2 /16
13 |  
14 | from math import sqrt
15 | dy = RK4(lambda t, y: t*sqrt(y))
16 |  
17 | t, y, dt = 0., 1., .1
18 | while t <= 10:
19 |     if abs(round(t) - t) < 1e-5:
20 |         print("y(%2.1f)\t= %4.6f \t error: %4.6g" % ( t, y, abs(y - theory(t))))
21 |     t, y = t + dt, y + dy( t, y, dt )
22 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex01/testRK4.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ##
 4 | ## Write SBATCH directives here
 5 | ##
 6 | 
 7 | ## Run MATLAB example ##
 8 | 
 9 | module purge
10 | # Load the correct MATLAB-module here
11 | module list
12 | 
13 | srun matlab -nojvm -r 'testRK4(); exit()'
14 | 
15 | ## Run Python example ##
16 | 
17 | module purge
18 | # Load the correct Python-module here
19 | module list
20 | 
21 | srun python testRK4.py
22 | 
23 | ## Run R example ## 
24 | 
25 | module purge
26 | # Load the correct R-module here
27 | module list
28 | 
29 | srun Rscript testRK4.R
30 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex02/fit_R.R:
--------------------------------------------------------------------------------
1 | x <- c(0,  1,  2,  3,  4,  5,  6,   7,   8,   9,   10)
2 | y <- c(1,  6,  17, 34, 57, 86, 121, 162, 209, 262, 321)
3 | coef(lm(y ~ x + I(x^2)))
4 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex02/fit_R.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -p short
 3 | #SBATCH -t 00:05:00
 4 | #SBATCH -n 1
 5 | #SBATCH --mem-per-cpu=100
 6 | #SBATCH -o fit_R.out
 7 | 
 8 | module load r
 9 | 
10 | ##
11 | ## Create R call here
12 | ##
13 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex02/fit_matlab.m:
--------------------------------------------------------------------------------
1 | x = [0,  1,  2,  3,  4,  5,  6,   7,   8,   9,   10];
2 | y = [1,  6,  17, 34, 57, 86, 121, 162, 209, 262, 321];
3 | polyfit(x,y,2)
4 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex02/fit_matlab.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -p short
 3 | #SBATCH -t 00:05:00
 4 | #SBATCH -n 1
 5 | #SBATCH --mem-per-cpu=100
 6 | #SBATCH -o fit_matlab.out
 7 | 
 8 | module load matlab
 9 | 
10 | ##
11 | ## Create matlab call here
12 | ##
13 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex02/fit_python.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | x = [0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]
3 | y = [1,   6,  17,  34,  57,  86, 121, 162, 209, 262, 321]
4 | coeffs = numpy.polyfit(x,y,deg=2)
5 | print(coeffs)
6 | 


--------------------------------------------------------------------------------
/scip/2018/high-level-languages-ex02/fit_python.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -l
 2 | #SBATCH -p short
 3 | #SBATCH -t 00:05:00
 4 | #SBATCH -n 1
 5 | #SBATCH --mem-per-cpu=100
 6 | #SBATCH -o fit_python.out
 7 | 
 8 | module load anaconda3
 9 | 
10 | ##
11 | ## Create Python call here
12 | ##
13 | 


--------------------------------------------------------------------------------
/scip/README.md:
--------------------------------------------------------------------------------
1 | # SCIP
2 | 
3 | This folder contains material from various Scientific Computing In Practice-courses [[1]](http://science-it.aalto.fi/scip/).
4 | 


--------------------------------------------------------------------------------
/slurm/index.rst:
--------------------------------------------------------------------------------
1 | Slurm examples
2 | ==============
3 | 
4 | .. literalinclude:: memory-hog.py
5 | 


--------------------------------------------------------------------------------
/slurm/memory-use.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from __future__ import print_function
 3 | import argparse
 4 | from time import sleep
 5 | import gc
 6 | import platform
 7 | import resource
 8 | 
 9 | if __name__ == "__main__":
10 | 
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument('mem', metavar="memory",
13 |             help="Use this much memory")
14 |     parser.add_argument('--sleep',
15 |             help="Sleep this many seconds", type=int)
16 |     args = parser.parse_args()
17 | 
18 |     # calculate the amount of memory requested in bytes
19 |     mem = args.mem.lower()
20 | 
21 |     if mem.endswith('b'):
22 |         mem = int(mem[:-1])
23 | 
24 |     elif mem.endswith('k'):
25 |         mem = int(mem[:-1])*1000**1
26 | 
27 |     elif mem.endswith('m'):
28 |         mem = int(mem[:-1])*1000**2
29 | 
30 |     elif mem.endswith('g'):
31 |         mem = int(mem[:-1])*1000**3
32 | 
33 |     elif mem.endswith('t'):
34 |         mem = int(mem[:-1])*1000**4
35 | 
36 |     else:
37 |         mem = int(mem)
38 | 
39 |     print("Trying to use %d bytes of memeory" %mem)
40 | 
41 |     allocated = 1
42 |     array = [bytearray(1)]
43 | 
44 |     while True:
45 |         array.append(bytearray(allocated))
46 |         allocated *= 2
47 |         gc.collect()
48 |         actual_bytes = resource.getrusage(
49 |                 resource.RUSAGE_SELF).ru_maxrss*(1024 if \
50 |                         platform.system() == 'Linux' else 1)
51 |         print("Using %d bytes so far (allocated: %s)"
52 |                 %(actual_bytes, allocated))
53 |         if actual_bytes > mem:
54 |             break
55 | 
56 |     if args.sleep:
57 |         time = args.sleep
58 |         sleep(time)
59 | 


--------------------------------------------------------------------------------
/slurm/pi-gpu.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "device_launch_parameters.h"
  3 | #include <curand.h>
  4 | #include <curand_kernel.h>
  5 | #include <stdint.h>
  6 | #include <stdio.h>
  7 | 
  8 | // Setup random number generator
  9 | __global__ void setup_rng(curandState *random_states,  uint64_t seed)
 10 | {
 11 |   int tid = threadIdx.x + blockIdx.x * blockDim.x;
 12 |   curand_init(seed, tid, 0, &random_states[tid]);
 13 | }
 14 | 
 15 | // Throw nthrows per thread
 16 | __global__ void throw_dart(curandState *random_states, int *nthrows, uint64_t *hits)
 17 | {
 18 |   int tid = threadIdx.x + blockIdx.x * blockDim.x;
 19 |   float random_x, random_y;
 20 |   curandState random_state = random_states[tid];
 21 | 
 22 |   hits[tid] = 0;
 23 |   for (int i=0; i<nthrows[tid]; i++) {
 24 |     random_x = curand_uniform(&random_state);
 25 |     random_y = curand_uniform(&random_state);
 26 | 
 27 |     if ((random_x*random_x + random_y*random_y) < 1.0) {
 28 |       hits[tid] += 1;
 29 |     }
 30 |   }
 31 | }
 32 | 
 33 | int main(int argc, char **argv) {
 34 | 
 35 |   // pi init
 36 |   long N=10000000;
 37 |   if (argc > 1)
 38 |     sscanf(argv[1], "%ld", &N);
 39 |   printf("Calculating pi using %ld stochastic trials\n", N);
 40 | 
 41 |   // Initialize variables
 42 |   int count, device;
 43 | 
 44 |   int *nthrows, *nthrows_gpu;
 45 |   uint64_t seed = 5;
 46 |   uint64_t *hits, *hits_gpu;
 47 |   curandState* random_states;
 48 |   uint64_t total_hits;
 49 |   float pi;
 50 | 
 51 |   // Run 2048 blocks
 52 |   int blocks = 512;
 53 |   // Run 128 threads per block.
 54 |   int threads = 128;
 55 | 
 56 |   int batch_size = blocks * threads;
 57 | 
 58 |   // Select device
 59 |   cudaGetDeviceCount(&count);
 60 |   cudaGetDevice(&device);
 61 | 
 62 |   // Allocate memory
 63 |   hits = (uint64_t*) malloc(batch_size*sizeof(uint64_t));
 64 |   nthrows = (int *) malloc(batch_size*sizeof(int));
 65 |   cudaMalloc(&hits_gpu, batch_size*sizeof(uint64_t));
 66 |   cudaMalloc(&nthrows_gpu, batch_size*sizeof(int));
 67 |   cudaMalloc(&random_states, batch_size*sizeof(curandState));
 68 | 
 69 |   // Calculate how many throws we want per thread
 70 |   for (int i=0; i<batch_size; i++) {
 71 |     nthrows[i] = N / batch_size;
 72 |     if (i < N % batch_size) {
 73 |       nthrows[i] += 1;
 74 |     }
 75 |   }
 76 | 
 77 |   // Copy throw number info to GPU VRAM
 78 |   cudaMemcpy(nthrows_gpu, nthrows, batch_size*sizeof(int), cudaMemcpyHostToDevice);
 79 | 
 80 |   // Initialize random number generator for each thread
 81 |   setup_rng<<<blocks, threads>>>(random_states, seed);
 82 | 
 83 |   // Throw darts
 84 |   throw_dart<<<blocks, threads>>>(random_states, nthrows_gpu, hits_gpu);
 85 | 
 86 |   // Copy hits to host RAM
 87 |   cudaMemcpy(hits, hits_gpu, batch_size*sizeof(uint64_t), cudaMemcpyDeviceToHost);
 88 | 
 89 |   // Calculate the total number of hits
 90 |   total_hits = 0;
 91 |   for (int i=0; i<batch_size; i++) {
 92 |     total_hits += hits[i];
 93 |   }
 94 | 
 95 |   // Calculate pi
 96 |   pi = (double) total_hits*4/N;
 97 |   printf("Throws: %lu/%lu Pi: %.10g\n", total_hits, N, pi);
 98 | 
 99 |   // Free memory
100 |   free(hits);
101 |   free(nthrows);
102 |   cudaFree(hits_gpu);
103 |   cudaFree(nthrows_gpu);
104 |   cudaFree(random_states);
105 | 
106 |   return (0);
107 | }
108 | 


--------------------------------------------------------------------------------
/slurm/pi-mpi.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | int main(int argc, char **argv) {
 6 |   // MPI init
 7 |   int size, rank, hostname_len;
 8 |   char hostname[MPI_MAX_PROCESSOR_NAME];
 9 |   MPI_Init(NULL, NULL);
10 |   MPI_Comm_size(MPI_COMM_WORLD, &size);
11 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
12 |   MPI_Get_processor_name(hostname, &hostname_len);
13 |   // pi init
14 |   long N=10000000;
15 |   if (argc > 1)
16 |     sscanf(argv[1], "%ld", &N);
17 |   if (rank == 0)
18 |     printf("Calculating pi using %ld stochastic trials\n", N);
19 |   long N_rank = N / size;
20 |   printf("%s: This is rank %d doing %ld trials\n", hostname, rank, N_rank);
21 | 
22 |   // Seed
23 |   unsigned int seed = 5;
24 |   seed += rank*5000;
25 | 
26 |   // Calculate trials
27 |   double x, y;
28 |   long i;
29 |   long rank_count = 0;
30 | 
31 |   for (i=0; i<N_rank; i++) {
32 |     x = (double) rand_r(&seed)/RAND_MAX;
33 |     y = (double) rand_r(&seed)/RAND_MAX;
34 |     if (x*x + y*y <= 1)
35 |       rank_count++;
36 |   }
37 | 
38 |   double pi;
39 |   long count;
40 | 
41 |   // Sum all trials
42 |   MPI_Reduce(&rank_count, &count, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
43 | 
44 |   if (rank == 0) {
45 |     // pi/4 = count/N
46 |     pi = (double) count*4/N;
47 |     printf("Throws: %ld / %ld Pi: %.8g\n", count, N, pi);
48 |   }
49 | 
50 |   MPI_Finalize();
51 |   return (0);
52 | }
53 | 


--------------------------------------------------------------------------------
/slurm/pi-mpi.py:
--------------------------------------------------------------------------------
 1 | # By Radovan Bast at
 2 | # https://aaltoscicomp.github.io/python-for-scicomp/parallel/
 3 | 
 4 | import random
 5 | import time
 6 | from mpi4py import MPI
 7 | 
 8 | 
 9 | def sample(n):
10 |     """Make n trials of points in the square.  Return (n, number_in_circle)
11 | 
12 |     This is our basic function.  By design, it returns everything it\
13 |     needs to compute the final answer: both n (even though it is an input
14 |     argument) and n_inside_circle.  To compute our final answer, all we
15 |     have to do is sum up the n:s and the n_inside_circle:s and do our
16 |     computation"""
17 |     n_inside_circle = 0
18 |     for i in range(n):
19 |         x = random.random()
20 |         y = random.random()
21 |         if x ** 2 + y ** 2 < 1.0:
22 |             n_inside_circle += 1
23 |     return n, n_inside_circle
24 | 
25 | 
26 | comm = MPI.COMM_WORLD
27 | size = comm.Get_size()
28 | rank = comm.Get_rank()
29 | hostname = MPI.Get_processor_name()
30 | 
31 | n = 10 ** 7
32 | 
33 | if size > 1:
34 |     n_task = int(n / size)
35 | else:
36 |     n_task = n
37 | 
38 | t0 = time.perf_counter()
39 | _, n_inside_circle = sample(n_task)
40 | t = time.perf_counter() - t0
41 | 
42 | print(f"before gather: rank {rank}, hostname {hostname}, n_inside_circle: {n_inside_circle}")
43 | n_inside_circle = comm.gather(n_inside_circle, root=0)
44 | print(f"after gather: rank {rank}, hostname {hostname}, n_inside_circle: {n_inside_circle}")
45 | 
46 | if rank == 0:
47 |     pi_estimate = 4.0 * sum(n_inside_circle) / n
48 |     print(
49 |         f"\nnumber of darts: {n}, estimate: {pi_estimate}, time spent: {t:.2} seconds"
50 |     )
51 | 


--------------------------------------------------------------------------------
/slurm/pi-mpi.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=00:10:00
 3 | #SBATCH --mem=1G
 4 | #SBATCH --output=pi-mpi.out
 5 | #SBATCH --nodes=1
 6 | #SBATCH --ntasks=2
 7 | 
 8 | module load gcc/11.3.0
 9 | module load openmpi/4.1.5
10 | 
11 | mpicc -o pi-mpi pi-mpi.c
12 | 
13 | srun ./pi-mpi 1000000
14 | 


--------------------------------------------------------------------------------
/slurm/pi-mpi4py.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=00:10:00
 3 | #SBATCH --mem=2G
 4 | #SBATCH --output=pi-mpi4py.out
 5 | #SBATCH --ntasks=4
 6 | 
 7 | module purge
 8 | module load anaconda
 9 | 
10 | mpirun python pi-mpi.py
11 | 


--------------------------------------------------------------------------------
/slurm/pi-openmp.c:
--------------------------------------------------------------------------------
 1 | // Compile with -fopenmp
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | int main(int argc, char *argv[]) {
 6 |   long N=10000000, i;
 7 |   long count =0 ;
 8 |   //int chunk = 1000;
 9 |   double pi, x, y;
10 | 
11 |   unsigned int seed = 5;
12 | 
13 |   if (argc > 1)
14 |     sscanf(argv[1], "%ld", &N);
15 |   printf("Calculating pi using %ld stochastic trials\n", N);
16 | 
17 |   // Bug: does not seed per-thread.
18 | #pragma omp parallel for private(i, x,y) firstprivate(seed) reduction(+:count)
19 |   for (i=0; i<N; i++) {
20 |     x = (double) rand_r(&seed)/RAND_MAX;
21 |     y = (double) rand_r(&seed)/RAND_MAX;
22 |     if (x*x + y*y <= 1)
23 |       count++;
24 |   }
25 |   // pi/4 = count/N
26 |   pi = (double) count*4/N;
27 |   printf("%g\n", pi);
28 | 
29 |   return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/slurm/pi-sharedmemory.slrm:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --time=00:10:00
3 | #SBATCH --mem=1G
4 | #SBATCH --output=pi.out
5 | #SBATCH --cpus-per-task=2
6 | 
7 | python pi.py --nprocs=$SLURM_CPUS_PER_TASK 1000000
8 | 


--------------------------------------------------------------------------------
/slurm/pi.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from __future__ import division, print_function
  3 | from multiprocessing import Pool
  4 | import argparse
  5 | import random
  6 | import json
  7 | import sys
  8 | import time
  9 | 
 10 | try:
 11 |     import numpy
 12 |     numpy_available = True
 13 | except ImportError:
 14 |     numpy_available = False
 15 | 
 16 | def is_in_circle(gen):
 17 |     x = gen.uniform(0, 1)
 18 |     y = gen.uniform(0, 1)
 19 |     return x**2 + y**2 < 1
 20 | 
 21 | def points_in_circle(iterations, seed):
 22 |     gen = random.Random(seed)
 23 |     return sum(1 for _ in range(iterations) if is_in_circle(gen))
 24 | 
 25 | # python2 does not come with Pool.starmap...
 26 | def pic_wrapper(a):
 27 |     return points_in_circle(*a)
 28 | 
 29 | def estimate_pi(iterations, seed, nprocs=1, serial=0.0):
 30 |     print("Calculating pi via %d stochastic trials" % iterations,
 31 |             file=sys.stderr)
 32 | 
 33 |     if nprocs > 1:
 34 |         # Compute how much will be done in each worker.
 35 |         iterations_serial = int(serial*iterations)
 36 |         iterations_parallel = iterations - iterations_serial
 37 |         iterations_per_worker = iterations_parallel//nprocs
 38 |         print("Using %d processes (%d iterations each)" % \
 39 |                 (nprocs, iterations_per_worker), file=sys.stderr)
 40 |         if serial > 0:
 41 |             print("... and %d iterations in serial"%iterations_serial)
 42 | 
 43 |         # Basic setup and accumulators
 44 |         in_circle_points = 0
 45 |         iters_actual = 0
 46 |         random_gen = random.Random(seed)
 47 | 
 48 |         # Parallel part
 49 |         # Starts <nprocs> worker processes
 50 |         if serial > 0:
 51 |             print("Beginning parallel part")
 52 |         pool = Pool(processes=nprocs)
 53 |         seeds = [random_gen.randint(0, 2**32 - 1) for _ in range(nprocs)]
 54 |         iters_per_worker = [iterations_per_worker]*nprocs
 55 |         iters_actual += sum(iters_per_worker)
 56 |         # This is the actual calculation:
 57 |         in_circle_points =+ sum(pool.map(pic_wrapper, zip(iters_per_worker, seeds)))
 58 |         pool.close()
 59 | 
 60 |         # Serial part
 61 |         if serial > 0:
 62 |             print("Beginning serial part")
 63 |         iters_actual += iterations_serial
 64 |         # This is the actual calculation:
 65 |         in_circle_points += pic_wrapper((iterations_serial, random_gen.randint(0, 2**32 - 1)))
 66 | 
 67 |         # Returns pi and in-circle points (successes)
 68 |         return in_circle_points*4/iters_actual, in_circle_points
 69 |     else:
 70 |         in_circle_points = points_in_circle(iterations, seed)
 71 |         return in_circle_points*4/iterations, in_circle_points
 72 | 
 73 | 
 74 | def estimate_pi_vectorized(iterations, seed):
 75 |     batch_size = int(1e5)
 76 |     print("Calculating pi via %d stochastic trials (vectorized version)" % iterations,
 77 |             file=sys.stderr)
 78 | 
 79 |     rng = numpy.random.RandomState(seed)
 80 | 
 81 |     iterations_left = iterations
 82 |     in_circle_points = 0
 83 | 
 84 |     while iterations_left > 0:
 85 |         if iterations_left < batch_size:
 86 |             batch_size = iterations_left
 87 |         x,y = rng.random_sample(size=(2,batch_size))
 88 |         in_circle_points += numpy.sum(numpy.sqrt(x*x + y*y) < 1.0)
 89 |         iterations_left -= batch_size
 90 | 
 91 |     return in_circle_points*4/iterations, in_circle_points
 92 | 
 93 | 
 94 | if __name__ == "__main__":
 95 |     parser = argparse.ArgumentParser()
 96 |     parser.add_argument('--nprocs', type=int, help="Number of nprocs, "
 97 |             "using multiprocessing", default=1)
 98 |     parser.add_argument('--seed', type=int, help="Random seed", default=42)
 99 |     parser.add_argument('--sleep', type=int, help="Sleep this many seconds")
100 |     parser.add_argument('--optimized', action='store_true', help="Run an optimized vectorized version of the code")
101 |     parser.add_argument('--serial', type=float, default=0.0,
102 |                         help="This fraction [0.0--1.0] of iterations to be run serial.")
103 |     parser.add_argument('iters', type=int, help="Number of iterations")
104 |     args = parser.parse_args()
105 | 
106 |     if args.serial < 0.0 or args.serial > 1.0:
107 |         print("ERROR: --serial should be a fraction from 0.0 to 1.0 (not percent).  (given: %s)"%args.serial)
108 |         sys.exit(1)
109 | 
110 |     if args.optimized and args.serial:
111 |         print("ERROR: --serial cannot be used in conjunction with --optimized")
112 |         sys.exit(1)
113 | 
114 |     if args.optimized and not numpy_available:
115 |         print("ERROR: --optimized can only be used when numpy is available")
116 |         sys.exit(1)
117 | 
118 |     # Calculate pi and number of in-circle points (successes)
119 |     if args.optimized:
120 |         pi, successes = estimate_pi_vectorized(args.iters, args.seed)
121 |     else:
122 |         pi, successes = estimate_pi(args.iters, args.seed, args.nprocs, serial=args.serial)
123 |     # Sleep
124 |     if args.sleep:
125 |         time.sleep(args.sleep)
126 | 
127 |     # Write to a JSON file
128 |     result = {"pi_estimate":pi, "iterations":args.iters, "successes":int(successes)}
129 |     json.dump(result, sys.stdout)
130 |     sys.stdout.write('\n')
131 | 


--------------------------------------------------------------------------------
/slurm/pi_aggregation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Aggregation script for Pi estimations
 3 | 
 4 | This script aggregates the results produced
 5 | by running ``pi.py``. The output is in the same
 6 | format as that of ``pi.py``.
 7 | 
 8 | Example:
 9 |     If results of ``pi.py`` are ouput to files ``result1.json``,
10 |     ``result2.json``, etc. then you can calculate weighted
11 |     average of the estimates by::
12 | 
13 |        $ python pi_aggregation.py result1.json result2.json ...
14 | 
15 |     The result would be a more accurate estimation of Pi.
16 | 
17 | """
18 | 
19 | from __future__ import print_function, division
20 | import json
21 | import sys
22 | 
23 | def calculate_average_pi(filenames):
24 |     total_successes = 0
25 |     total_iterations = 0
26 |     for filename in filenames:
27 |         with open(filename, 'r') as f:
28 |             estimation = json.load(f)
29 |             total_successes += estimation["successes"]
30 |             total_iterations += estimation["iterations"]
31 |     return total_successes, total_iterations
32 | 
33 | if __name__ == "__main__":
34 |     if len(sys.argv) < 2:
35 |         sys.exit("USAGE: {} file1.json file2.json ...".format(sys.argv[0]))
36 |     successes, iterations = calculate_average_pi(sys.argv[1:])
37 |     result = {"successes": successes, "iterations": iterations,
38 |                 "pi_estimate": successes*4/iterations}
39 |     json.dump(result, sys.stdout)
40 | 


--------------------------------------------------------------------------------