├── Assignment_1
    ├── Makefile
    ├── README.md
    ├── adding_a_test.md
    ├── bench
    ├── graph.r
    ├── main.c
    ├── run
    ├── tests.c
    ├── tests.h
    ├── util.c
    ├── util.h
    ├── worker.c
    └── worker.h
├── Assignment_2
    ├── DEBUG.md
    ├── Makefile
    ├── README.md
    ├── adding_a_test.md
    ├── bench
    ├── graph.r
    ├── main.c
    ├── run
    ├── tests.c
    ├── tests.h
    ├── util.c
    ├── util.h
    ├── worker.c
    └── worker.h
├── Assignment_3
    ├── DEBUG.md
    ├── Makefile
    ├── README.md
    ├── adding_a_test.md
    ├── bench
    ├── graph.r
    ├── main.c
    ├── run
    ├── tests.c
    ├── tests.h
    ├── util.c
    ├── util.h
    ├── worker.c
    └── worker.h
├── Assignment_4
    ├── Documentation
    │   ├── cheatsheet.txt
    │   ├── explanation.txt
    │   ├── recipes.txt
    │   └── references.txt
    ├── HERD_WISHLIST.md
    ├── MAINTAINERS
    ├── README.md
    ├── check
    ├── failgraph
    ├── graph
    ├── linux-kernel-hardware.cat
    ├── linux-kernel.bell
    ├── linux-kernel.cat
    ├── linux-kernel.cfg
    ├── linux-kernel.def
    ├── litmus-tests
    │   ├── CO+wx+wx+rx-rx+rx-rx1.litmus
    │   ├── CO+wx+wx+rx-rx+rx-rx2.litmus
    │   ├── CO+wx+wx+rx-rx1.litmus
    │   ├── CO+wx+wx+rx-rx2.litmus
    │   ├── CO+wx-wx+rx-rx1.litmus
    │   ├── CO+wx-wx+rx-rx2.litmus
    │   ├── IRIWish+rx-ry+wx+wy+ry-rx1.litmus
    │   ├── MP+wx-wy+ry-rx1.litmus
    │   ├── MP+wx-wy+ry-rx2.litmus
    │   └── MP+wy-wx+rx-ry+ry-rx1.litmus
    ├── lock.cat
    └── memory_model_README
├── answers.md
├── answers_data
    ├── 20180205_214850.jpg
    ├── bench_t24_2018-02-05_01-36-40.csv_ticks_00accesses.pdf
    ├── bench_t24_2018-02-05_01-36-40.csv_ticks_08accesses.pdf
    ├── bench_t24_2018-02-05_01-36-40.csv_ticks_800accesses.pdf
    ├── bench_t24_2018-02-05_01-36-40.csv_ticks_80accesses.pdf
    ├── bench_t24_2018-02-05_04-33-36.csv_ticks_00accesses.pdf
    ├── bench_t24_2018-02-05_04-33-36.csv_ticks_08accesses.pdf
    ├── bench_t24_2018-02-05_04-33-36.csv_ticks_800accesses.pdf
    └── bench_t24_2018-02-05_04-33-36.csv_ticks_80accesses.pdf
├── git-tutorial.html
├── index.html
├── javascripts
    └── scale.fix.js
├── params.json
└── stylesheets
    ├── github-light.css
    └── styles.css


/Assignment_1/Makefile:
--------------------------------------------------------------------------------
 1 | # TUNABLE PARAMETERS
 2 | #DEBUG           = TRUE
 3 | PRINT_WORKER    = 0
 4 | PRINT_PARENT    = 1
 5 | PRINT_VERBOSE   = 0
 6 | PRINT_CSV       = 1
 7 | 
 8 | WARN            = -Wno-attributes -Wno-format-extra-args -Wno-implicit-function-declaration
 9 | CPUS            = $(shell grep 'processor[[:space:]]*:' /proc/cpuinfo | wc -l)
10 | JOBS            = $(shell echo "$(CPUS) - 1" | bc)
11 | MAKEFLAGS      += "-j $(JOBS)"
12 | CLEAN_COMMENTS  = grep -v '^\# [[:digit:]]\+ '
13 | INSERT_NEWLINES = sed 's/__N3WLN__/\n/g'
14 | SKIP_NEWLINES   = sed 's/__N3WLN__//g'
15 | REINDENT        = echo -e "G=gg\n:wq\n" | vim 
16 | CCOPTS1         = -pthread -I/u/theod/builds/include -DPRINT_WORKER=$(PRINT_WORKER) -DPRINT_PARENT=$(PRINT_PARENT) -DPRINT_VERBOSE=$(PRINT_VERBOSE) -DPRINT_CSV=$(PRINT_CSV)
17 | CCOPTS2         = -L/u/theod/builds/lib -lurcu-qsbr 
18 | 
19 | ifeq ($(DEBUG),TRUE)
20 |   CC1           = nice gcc -E $(CCOPTS1) $(WARN)
21 |   CC2           = nice gcc -g $(CCOPTS1) $(WARN)
22 | else
23 |   CC            = nice gcc $(CCOPTS1) $(WARN) -D__N3WLN__=
24 | endif
25 | 
26 | OBJS            = util.o worker.o tests.o
27 | 
28 | # make makes the first goal the default, you can override this with
29 | #.DEFAULT_GOAL := all
30 | all: test
31 | 
32 | util: util.h util.c
33 | ifeq ($(DEBUG),TRUE)
34 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
35 | 	#$(REINDENT) $@CPP.c
36 | 	$(CC2) -xc -c -o $@.o $@CPP.c
37 | else
38 | 	$(CC) $@.c -c -o $@.o
39 | endif
40 | 
41 | worker: worker.c tests.h util
42 | ifeq ($(DEBUG),TRUE)
43 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
44 | 	#$(REINDENT) $@CPP.c
45 | 	$(CC2) -xc -c -o $@.o $@CPP.c
46 | else
47 | 	$(CC) $@.c -c -o $@.o
48 | endif
49 | 
50 | tests: util tests.c tests.h
51 | ifeq ($(DEBUG),TRUE)
52 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
53 | 	#$(REINDENT) $@CPP.c
54 | 	$(CC2) -xc -c -o $@.o $@CPP.c
55 | else
56 | 	$(CC) $@.c -c -o $@.o
57 | endif
58 | 
59 | test: main.c util worker tests
60 | ifeq ($(DEBUG),TRUE)
61 | 	$(CC1) main.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > mainCPP.c
62 | 	#$(REINDENT) $@CPP.c
63 | 	$(CC2) -o test mainCPP.c $(OBJS) $(CCOPTS2)
64 | else
65 | 	$(CC) -o test main.c $(OBJS) $(CCOPTS2)
66 | endif
67 | 
68 | .PHONY: clean
69 | clean: 
70 | 	rm -f $(OBJS) $(wildcard *CPP.c) test
71 | 


--------------------------------------------------------------------------------
/Assignment_1/adding_a_test.md:
--------------------------------------------------------------------------------
 1 | 1.  Update test_names and increment n_tests in tests.c.
 2 | 2.  Add any applicable new operations (with unique numbers) under "// OPERATIONS" in tests.h.
 3 | 3.  Add a test (with a unique number) under "// TESTS" in tests.h.
 4 | 4.  Add declarations for any applicable global data in tests.h under "// GLOBAL DATA" and initialize as needed under "// GLOBAL DATA" in tests.c.
 5 | 5.  Add any new per-thread data to ti_data_in in util.h
 6 | 6.  Add a test case under "// initialize test data" in worker.c to initialize the test data in parallel.
 7 | 7.  Add a test case under "// warm the cache" in worker.c.
 8 | 8.  Add cases for new operations under "// enter critical section" in worker.c.
 9 | 9.  Add cases for new operations under "// critical section body" (if applicable) in worker.c.
10 | 10. Add cases for new operations under "// leave critical section" in worker.c.
11 | 11. If parent needs to do some checking/work after each run, add a test case under "// if applicable, check results of run and reset test state" in tests.c
12 | 
13 | 


--------------------------------------------------------------------------------
/Assignment_1/bench:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 3 ]; then
 6 |   echo "Usage: ./bench MAX_THREADS N_OPS N_RUNS"
 7 |   exit 1
 8 | fi
 9 |   
10 | MAX_THREADS="$1"
11 | N_OPS="$2"
12 | N_RUNS="$3"
13 | 
14 | mkdir -p data
15 | 
16 | # reserve at least 2 CPUs for other users
17 | CPUS=$(echo "$(grep -c 'processor' /proc/cpuinfo) - 2" | bc)
18 | 
19 | if [ $MAX_THREADS -gt $CPUS ]; then
20 |   echo "Too many threads, we only have $CPUS CPUs" >&2
21 |   exit 1
22 | fi
23 | 
24 | FILE="bench_t${MAX_THREADS}_$(date +%F_%H-%M-%S).csv"
25 | DATA="data/${FILE}"
26 | 
27 | time ./test $MAX_THREADS $CPUS $N_OPS $N_RUNS | grep -o '[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*' > ${DATA}
28 | 
29 | # generate graphs
30 | Rscript graph.r ${DATA}
31 | 


--------------------------------------------------------------------------------
/Assignment_1/graph.r:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(scales)
 3 | 
 4 | args                 <- commandArgs(TRUE)
 5 | name                 <- args[1]
 6 | #name                 <- "data/test_my_spinlock.csv"
 7 | d                    <- read.csv(file=name,head=TRUE,sep=",")
 8 | d$ticks_per_thread   <- d$ticks / d$nt
 9 | d$nt                 <- factor(d$nt)
10 | 
11 | #tpt_by_test_and_nt   <- with(d, aggregate(ticks_per_thread ~ test + nt, d, median))
12 | ticks_by_test_and_nt <- with(d, aggregate(ticks ~ test + nt + critical_section_accesses, d, median))
13 | 
14 | n_breaks             <- 40
15 | 
16 | is_graphable <- function(s) { return (! grepl("_nograph", s)) }
17 | 
18 | gen_graph <- function(data, xc, yc, f, name, n_breaks, lbs){
19 |   data <- subset(data, f(test))
20 |   ggplot(data=data, aes_string(xc, yc, group="test")) + 
21 |     geom_line (data=data, aes_string(colour="test")) + 
22 |     geom_point(data=data, aes_string(shape="test",colour="test")) + 
23 |     scale_y_continuous(breaks = pretty_breaks(n_breaks)) +
24 |     lbs
25 |     
26 |   path <- paste0(name,".pdf")
27 |   print(paste0("generating ", path))
28 |   ggsave(path, width=16, height=9, units="in", limitsize=FALSE)
29 | }
30 | 
31 | #gen_graph(subset(tpt_by_test_and_nt, critical_section_accesses == 0), "nt", "ticks_per_thread", is_graphable, paste0(name, "_tpt"), n_breaks,
32 | #         labs(title = "Per-thread Throughput vs Number of Threads (no memory accesses during critical sections)",
33 | #              x     = "Number of Threads",
34 | #              y     = "Total duration / Number of Threads"))
35 | 
36 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 0), "nt", "ticks", is_graphable, paste0(name, "_ticks_0accesses"), n_breaks, 
37 |          labs(title = "Test Duration vs Number of Threads (no critical section accesses)",
38 |               x     = "Number of Threads",
39 |               y     = "Test Duration"))
40 | 
41 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 8), "nt", "ticks", is_graphable, paste0(name, "_ticks_8accesses"), n_breaks, 
42 |          labs(title = "Test Duration vs Number of Threads (1 cache line updated during critical section)",
43 |               x     = "Number of Threads",
44 |               y     = "Test Duration"))
45 | 
46 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 80), "nt", "ticks", is_graphable, paste0(name, "_ticks_80accesses"), n_breaks, 
47 |          labs(title = "Test Duration vs Number of Threads (10 cache lines updated during critical section)",
48 |               x     = "Number of Threads",
49 |               y     = "Test Duration"))
50 | 


--------------------------------------------------------------------------------
/Assignment_1/main.c:
--------------------------------------------------------------------------------
 1 | #include <inttypes.h>
 2 | #include <stdio.h>
 3 | 
 4 | #include "tests.h"
 5 | #include "util.h"
 6 | 
 7 | uint64_t n_threads, n_cpus, n_runs, n_ops; 
 8 | 
 9 | void show_usage(int argc, char **argv)
10 | {
11 | 	printf("Usage : %s N_THREADS N_CPUS N_OPS N_RUNS\n", argv[0]);
12 | }
13 | 
14 | int main(int argc, char** argv) {
15 |   int err, ret;
16 | 
17 | 	if (argc != 5) {
18 | 		show_usage(argc, argv);
19 | 		return -1;
20 | 	}
21 | 
22 | 	err = sscanf(argv[1], "%" PRIu64, &n_threads);
23 | 	if (err != 1) {
24 | 		show_usage(argc, argv);
25 | 		return -1;
26 | 	}
27 | 
28 | 	err = sscanf(argv[2], "%" PRIu64, &n_cpus);
29 | 	if (err != 1) {
30 | 		show_usage(argc, argv);
31 | 		return -1;
32 | 	}
33 | 
34 | 	err = sscanf(argv[3], "%" PRIu64, &n_ops);
35 | 	if (err != 1) {
36 | 		show_usage(argc, argv);
37 | 		return -1;
38 | 	}
39 | 
40 | 	err = sscanf(argv[4], "%" PRIu64, &n_runs);
41 | 	if (err != 1) {
42 | 		show_usage(argc, argv);
43 | 		return -1;
44 | 	}
45 | 
46 |   if (n_ops < n_threads) {
47 |     printf("error: N_OPS < N_THREADS.  We need at least 1 operation per thread.\n");
48 |     return -1;
49 |   }
50 | 
51 | 
52 |   //tests_single();
53 | 
54 |   tests_multi();
55 | 
56 |   return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/Assignment_1/run:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 3 ]; then
 6 |   echo "Usage: ./run MAX_THREADS N_OPS N_RUNS"
 7 |   exit 1
 8 | fi
 9 |  
10 | MAX_THREADS="$1"
11 | N_OPS="$2"
12 | N_RUNS="$3"
13 | 
14 | # reserve at least 2 CPUs for other users
15 | CPUS=$(echo "$(grep -c 'processor' /proc/cpuinfo) - 2" | bc)
16 | 
17 | time ./test $MAX_THREADS $CPUS $N_OPS $N_RUNS
18 | 


--------------------------------------------------------------------------------
/Assignment_1/tests.c:
--------------------------------------------------------------------------------
  1 | #include <pthread.h>
  2 | 
  3 | #include "tests.h"
  4 | #include "util.h"
  5 | #include "worker.h"
  6 | 
  7 | void parent_announce(uint64_t phase, int t_i) __attribute__((always_inline));
  8 | void parent_spin(ti_data_in *datas, uint64_t phase, int t_i) __attribute__((always_inline));
  9 | int parent_announce_then_spin(ti_data_in *datas, uint64_t phase1, uint64_t phase2, int t_i) __attribute__((always_inline));
 10 | 
 11 | // GLOBAL DATA
 12 | 
 13 | pthread_spinlock_t pthreads_spinlock;
 14 | volatile uint64_t my_spinlock                = UNLOCKED;
 15 | volatile uint64_t my_spinlock_shared_counter = 0;
 16 | volatile uint64_t completed_phase            = INIT_PHASE;
 17 | uint64_t global_buffer[ACCESS_BUFFER_SIZE];
 18 | 
 19 | uint64_t n_tests = 11;
 20 | 
 21 | // append "_nograph" to hide a test in the generated graphs
 22 | char *test_names[] = { "spin_try_lock_correctness_nograph"
 23 |                      , "spin_lock_correctness_nograph"
 24 |                      , "spin_wait_lock_correctness_nograph"
 25 |                      , "spin_read_lock_correctness_nograph"
 26 |                      , "spin_experimental_lock_correctness_nograph"
 27 |                      , "pthread_spin_lock"
 28 |                      , "spin_try_lock_nograph"
 29 |                      , "spin_lock" 
 30 |                      , "spin_wait_lock"
 31 |                      , "spin_read_lock"
 32 |                      , "spin_experimental_lock"
 33 |                      };
 34 | 
 35 | // skip tests with 0s, each value corresponds to the test in the same position
 36 | // in test_names above
 37 | uint64_t test_on[] = { 1
 38 |                      , 0
 39 |                      , 0
 40 |                      , 0
 41 |                      , 0
 42 |                      , 0
 43 |                      , 0
 44 |                      , 0
 45 |                      , 0
 46 |                      , 0
 47 |                      , 0
 48 |                      };
 49 | 
 50 | op **oss = NULL;
 51 | uint64_t n_accesses = 3;
 52 | uint64_t accessesv[] = { 0    // no accesses
 53 |                        , CACHE_LINE    
 54 |                        , 10 * CACHE_LINE
 55 |                        //, L1_DATA_CACHE 
 56 |                        };
 57 | 
 58 | void tests_multi() {
 59 |   int i,j,k,all_ops,test;
 60 |   pthread_t ts[n_threads];
 61 |   ti_data_in datas[n_threads];
 62 |   oss = malloc(sizeof(op*) * n_tests);
 63 |   for(test=0; test<n_tests; ++test) { oss[test] = malloc(sizeof(op) * n_ops); }
 64 |   op *cur, *cur1;
 65 |   uint64_t tsc,sum;
 66 |   unsigned int status;
 67 |   int m = n_ops / n_threads;
 68 |   int nt,o,ret,run,accesses;
 69 | 
 70 |   // header for csv
 71 |   printf_csv("ops,test,run,nt,critical_section_accesses,ticks\n","");
 72 | 
 73 |   pthread_spin_init(&pthreads_spinlock, 0);
 74 | 
 75 |   // set up workers to initialize the set of operations in parallel
 76 |   for(i=0; i<n_threads; ++i) {
 77 |     datas[i].i                   = i;
 78 |     datas[i].phase               = INIT_PHASE;
 79 |     datas[i].my_spinlock_counter = 0;
 80 |     tsc                          = __rdtscp(&status);
 81 |     srand48_r((long int)tsc + i, &datas[i].rand_state);
 82 |     //datas[i].buffer_cur          = 0;
 83 |     datas[i].buffer_cur          = (ACCESS_BUFFER_SIZE / n_threads) * i;
 84 |     datas[i].buffer              = global_buffer;
 85 |   } 
 86 | 
 87 |   // guarantee that writes to data fields come before threads start running.
 88 |   // precautionary, set_affinity() and pthread_create() probably contain atomic
 89 |   // instructions that act as full barriers. 
 90 |   cmm_smp_wmb();
 91 |   
 92 |   // set parent affinity so it's not sharing a CPU with a worker thread
 93 |   set_affinity();
 94 | 
 95 |   // spawn workers
 96 |   for(i=0; i<n_threads; ++i) { pthread_create(&ts[i], NULL, ti, &datas[i]); }
 97 | 
 98 |   for(test=0; test < n_tests; ++test) {
 99 |     if(!test_on[test]) { continue; }
100 |     printf_parent("-1: initializing test %s\n", test_names[test]);
101 |     for(run=0; run < n_runs; ++run) {
102 |       // wait for workers to finish initialization
103 |       parent_spin(datas, READY_PHASE, -1);
104 |       parent_announce(READY_PHASE, -1);
105 |       
106 |       // see how long it takes each number of threads to solve 
107 |       // the problem we've initialized
108 |       for(nt=1; nt<=n_threads; ++nt) {
109 |         // try different numbers of memory accesses in the critical section
110 |         for(accesses=0; accesses < n_accesses; ++accesses) {
111 |           printf_parent("-1: starting loop for test %s, nt %d\n, accesses %" PRIu64 "\n", test_names[test], nt, accessesv[accesses]);
112 |           parent_spin(datas, WARMED_PHASE, -1);
113 | 
114 |           // time from the parent's perspective from when parent tells workers to
115 |           // start OPS_PHASE until parent observes that all workers have reported
116 |           // back that they have completed OPS_PHASE
117 |           TIME_AND_CHECK(1,0,run,parent_announce_then_spin(datas, WARMED_PHASE, OPS_PHASE, -1), "OPS_PHASE_TIME", tsc, ret);
118 | 
119 |           // if applicable, check results of run and reset test state
120 |           all_ops = 1;
121 |           switch (test) {
122 |             case SPIN_TRY_LOCK_CORRECTNESS_TEST:
123 |               all_ops = 0;
124 |             case SPIN_LOCK_CORRECTNESS_TEST:
125 |             case SPIN_WAIT_LOCK_CORRECTNESS_TEST:
126 |             case SPIN_READ_LOCK_CORRECTNESS_TEST:
127 |             case SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST:
128 |               // see if per-thread counters sum to shared lock-protected counter
129 |               sum = 0;
130 |               for(i=0; i<nt; ++i) { sum += datas[i].my_spinlock_counter; }
131 |               if (sum == 0 || sum != my_spinlock_shared_counter || (all_ops && sum != n_ops)) {
132 |                 if (test == SPIN_TRY_LOCK_CORRECTNESS_TEST) {
133 |                   printf_error("-1: %s failed for nt %d, my_spinlock_shared_counter: %" PRIu64 
134 |                                ", sum: %" PRIu64 ", n_ops: %" PRIu64
135 |                                ".  If the lock were working correctly, counter and sum would "
136 |                                "be nonzero and the same.\n"
137 |                                , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
138 |                 }
139 |                 else {
140 |                   printf_error("-1: %s failed for nt %d, my_spinlock_shared_counter: %" PRIu64 
141 |                                ", sum: %" PRIu64 ", n_ops: %" PRIu64
142 |                                ".  If the lock were working correctly, they would all "
143 |                                "be nonzero and the same.\n"
144 |                                , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
145 |                 }
146 |                 exit(-1);
147 |               }
148 |               else {
149 |                 printf_parent("-1: %s succeeded for nt %d, my_spinlock_shared_counter: %" PRIu64 
150 |                               ", sum: %" PRIu64 ", n_ops: %" PRIu64
151 |                               ".  Values are nonzero and the same, so the lock is working correctly.\n"
152 |                              , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
153 |               }
154 |               // reset counters for next test
155 |               my_spinlock_shared_counter = 0;
156 |               for(i=0; i<nt; ++i) { datas[i].my_spinlock_counter = 0; }
157 |               break;
158 |             default: 
159 |               break;
160 |           }
161 | 
162 |           // send workers back to the top of their loop
163 |           parent_announce(OPS_PHASE, -1);
164 | 
165 |           // output results for this nt
166 |           printf_csv("%" PRIu64 ",%s,%d,%d,%" PRIu64 ",%" PRIu64 "\n",n_ops,test_names[test],run,nt,accessesv[accesses],tsc);
167 |         }
168 |       }
169 |     }
170 |     parent_spin(datas, INIT_PHASE, -1);
171 |     parent_announce(INIT_PHASE, -1);
172 |   }
173 | 
174 |   // join workers
175 |   for(i=0; i<n_threads; ++i) { pthread_join(ts[i], NULL); }
176 | 
177 |   // clean up
178 |   for(test=0; test<n_tests; ++test) { free(oss[test]); }
179 |   free(oss);
180 |   pthread_spin_destroy(&pthreads_spinlock);
181 | }
182 | 
183 | void parent_announce(uint64_t phase, int t_i)
184 | {
185 |   _CMM_STORE_SHARED(completed_phase, phase);
186 |   printf_parent("%d: announced start of phase %" PRIu64 "\n", t_i, phase);
187 | }
188 | 
189 | void parent_spin(ti_data_in *datas, uint64_t phase, int t_i)
190 | {
191 |   for(int i=0; i<n_threads; ++i) {
192 |     spin_until(&(datas[i].phase), phase, t_i);
193 |   }
194 | }
195 | 
196 | int parent_announce_then_spin(ti_data_in *datas, uint64_t phase1, uint64_t phase2, int t_i)
197 | {
198 |   parent_announce(phase1, t_i);
199 | 
200 |   parent_spin(datas, phase2, t_i);
201 | 
202 |   return 0;
203 | }
204 | 


--------------------------------------------------------------------------------
/Assignment_1/tests.h:
--------------------------------------------------------------------------------
 1 | #ifndef TESTS_H
 2 | #define TESTS_H
 3 | 
 4 | #include <sys/time.h>
 5 | 
 6 | #include "util.h"
 7 | 
 8 | // GLOBAL DATA
 9 | 
10 | extern pthread_spinlock_t pthreads_spinlock;
11 | extern volatile uint64_t  my_spinlock;
12 | extern volatile uint64_t  my_spinlock_shared_counter;
13 | 
14 | #define UNLOCKED 0
15 | #define LOCKED   1
16 | 
17 | // OPERATIONS
18 | // must all be unique
19 | 
20 | #define PTHREAD_SPIN_LOCK_LOCK_OP       1
21 | #define SPIN_LOCK_LOCK_OP               2
22 | #define SPIN_LOCK_INC_OP                3
23 | #define SPIN_WAIT_LOCK_LOCK_OP          4
24 | #define SPIN_WAIT_LOCK_INC_OP           5
25 | #define SPIN_READ_LOCK_LOCK_OP          6
26 | #define SPIN_READ_LOCK_INC_OP           7
27 | #define SPIN_TRY_LOCK_LOCK_OP           8
28 | #define SPIN_TRY_LOCK_INC_OP            9
29 | #define SPIN_EXPERIMENTAL_LOCK_LOCK_OP 10
30 | #define SPIN_EXPERIMENTAL_LOCK_INC_OP  11
31 | 
32 | // PHASES
33 | 
34 | #define INIT_PHASE   0
35 | #define READY_PHASE  1
36 | #define WARMED_PHASE 2
37 | #define OPS_PHASE    3
38 | 
39 | // TESTS
40 | // must be the sequence of natural numbers from 0 to the
41 | // total number of tests - 1.
42 | 
43 | #define SPIN_TRY_LOCK_CORRECTNESS_TEST          0
44 | #define SPIN_LOCK_CORRECTNESS_TEST              1
45 | #define SPIN_WAIT_LOCK_CORRECTNESS_TEST         2
46 | #define SPIN_READ_LOCK_CORRECTNESS_TEST         3
47 | #define SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST 4
48 | #define PTHREAD_SPIN_LOCK_TEST                  5
49 | #define SPIN_TRY_LOCK_TEST                      6
50 | #define SPIN_LOCK_TEST                          7
51 | #define SPIN_WAIT_LOCK_TEST                     8
52 | #define SPIN_READ_LOCK_TEST                     9
53 | #define SPIN_EXPERIMENTAL_LOCK_TEST            10
54 | 
55 | #define PASTE31(x, y, z) x ## y ## z
56 | #define PASTE3(x, y, z) PASTE31(x, y, z)
57 | 
58 | #define PASTE21(x, y) x ## y
59 | #define PASTE2(x, y) PASTE21(x, y)
60 | 
61 | #define STR_VALUE(arg)      #arg
62 | #define FUNCTION_NAME(name) STR_VALUE(name)
63 | 
64 | #define TIME_AND_CHECK(uid, t_i, run, fcall, label, dst, ret) __N3WLN__ \
65 |   do { __N3WLN__ \
66 |     uint64_t (PASTE3(pre,__LINE__,uid)), (PASTE3(post,__LINE__,uid)); __N3WLN__ \
67 |     struct timeval  (PASTE3(tv1,__LINE__,uid)), (PASTE3(tv2,__LINE__,uid)); __N3WLN__ \
68 |     double (PASTE3(secs,__LINE__,uid)); __N3WLN__ \
69 |     unsigned int (PASTE3(ui,__LINE__,uid)); __N3WLN__ \
70 |     printf_parent("%d: run %d: about to call %s [%s]:\n", t_i, run, FUNCTION_NAME(fcall), label); __N3WLN__ \
71 |     gettimeofday(&(PASTE3(tv1,__LINE__,uid)), NULL); __N3WLN__ \
72 |     (PASTE3(pre,__LINE__,uid)) = __rdtscp(&(PASTE3(ui,__LINE__,uid))); __N3WLN__ \
73 |     cmm_barrier(); __N3WLN__ \
74 |     ret = fcall; __N3WLN__ \
75 |     cmm_barrier(); __N3WLN__ \
76 |     (PASTE3(post,__LINE__,uid)) = __rdtscp(&(PASTE3(ui,__LINE__,uid))); __N3WLN__ \
77 |     gettimeofday(&(PASTE3(tv2,__LINE__,uid)), NULL); __N3WLN__ \
78 |     dst = (PASTE3(post,__LINE__,uid)) - (PASTE3(pre,__LINE__,uid)); __N3WLN__ \
79 |     (PASTE3(secs,__LINE__,uid)) = (double) ((PASTE3(tv2,__LINE__,uid)).tv_usec - (PASTE3(tv1,__LINE__,uid)).tv_usec) / 1000000 + (double) ((PASTE3(tv2,__LINE__,uid)).tv_sec - (PASTE3(tv1,__LINE__,uid)).tv_sec); __N3WLN__ \
80 |     printf_parent("%d: run %d: call to %s [%s] took %" PRIu64 " TSC ticks, %f seconds, returned %d\n", t_i, run, FUNCTION_NAME(fcall), label, dst, (PASTE3(secs,__LINE__,uid)), ret); __N3WLN__ \
81 |     if(!ret) { __N3WLN__ \
82 |       printf_parent("%d: run %d: call succeeded (returned 0)\n", t_i, run); __N3WLN__ \
83 |     } __N3WLN__ \
84 |     else { __N3WLN__ \
85 |       printf_parent("%d: run %d: call failed (returned %d)\n", t_i, run, ret); __N3WLN__ \
86 |     } __N3WLN__ \
87 |   } while (0) __N3WLN__ \
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/Assignment_1/util.c:
--------------------------------------------------------------------------------
 1 | #include "util.h"
 2 | 
 3 | unsigned int next_aff = 0;
 4 | 
 5 | pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
 6 | 
 7 | void set_affinity(void)
 8 | {
 9 | 	cpu_set_t mask;
10 | 	int cpu;
11 | 	int ret;
12 | 
13 | 	ret = pthread_mutex_lock(&affinity_mutex);
14 | 	if (ret) {
15 | 		perror("Error in pthread mutex lock");
16 | 		exit(-1);
17 | 	}
18 | 
19 |   if(next_aff >= n_cpus) {
20 |     perror("Ran out of CPUs, reduce n_threads");
21 |     exit(-1);
22 |   }
23 | 	cpu = next_aff++;
24 | 	ret = pthread_mutex_unlock(&affinity_mutex);
25 | 	if (ret) {
26 | 		perror("Error in pthread mutex unlock");
27 | 		exit(-1);
28 | 	}
29 | 	CPU_ZERO(&mask);
30 | 	CPU_SET(cpu, &mask);
31 | 	sched_setaffinity(0, sizeof(mask), &mask);
32 | #if defined(PRINT_AFFINITIES)
33 |   printf_verbose("set affinity %d\n", cpu);
34 | #endif
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/Assignment_1/util.h:
--------------------------------------------------------------------------------
  1 | #ifndef UTIL_H
  2 | #define UTIL_H
  3 | 
  4 | #define _GNU_SOURCE
  5 | #include <inttypes.h>
  6 | #include <pthread.h>
  7 | #include <sched.h>
  8 | #include <stdint.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #define URCU_INLINE_SMALL_FUNCTIONS
 12 | #include <urcu/arch.h>
 13 | #include <urcu-qsbr.h>
 14 | #include <x86intrin.h>
 15 | 
 16 | #define printf_error(fmt, args...)		\
 17 | 	do { \
 18 | 		printf(fmt, args); fflush(stdout); \
 19 | 	} while (0)
 20 | 
 21 | #define printf_verbose(fmt, args...)		\
 22 | 	do { \
 23 | 		if (PRINT_VERBOSE)	{	printf(fmt, args); fflush(stdout); } \
 24 | 	} while (0)
 25 | 
 26 | #define printf_parent(fmt, args...)		\
 27 | 	do { \
 28 | 		if (PRINT_PARENT)	{	printf(fmt, args); fflush(stdout); } \
 29 | 	} while (0)
 30 | 
 31 | #define printf_worker(fmt, args...)		\
 32 | 	do { \
 33 | 		if (PRINT_WORKER)	{	printf(fmt, args); fflush(stdout); } \
 34 | 	} while (0)
 35 | 
 36 | #define printf_csv(fmt, args...)		\
 37 | 	do { \
 38 | 		if (PRINT_CSV)	{	printf(fmt, args); fflush(stdout); } \
 39 | 	} while (0)
 40 | 
 41 | // If *ptr == old, atomically store new to *ptr and return old.
 42 | // Otherwise, return the value of *ptr without changing it.
 43 | #define lockcmpxchgq(ptr, old, new)         __N3WLN__ \
 44 | ({									                        __N3WLN__ \
 45 |   uint64_t __ret;                           __N3WLN__ \
 46 |   uint64_t __old = (old);                   __N3WLN__ \
 47 |   uint64_t __new = (new);                   __N3WLN__ \
 48 |   volatile uint64_t *__ptr = (ptr);         __N3WLN__ \
 49 |   asm volatile("lock; cmpxchgq %2,%1"       __N3WLN__ \
 50 |               : "=a" (__ret), "+m" (*__ptr) __N3WLN__ \
 51 |               : "r" (__new), "0" (__old)    __N3WLN__ \
 52 |               : "memory");                  __N3WLN__ \
 53 |   __ret;                                    __N3WLN__ \
 54 | })
 55 | 
 56 | 
 57 | // Store new to *ptr, and return the immediately previous value in *ptr's
 58 | // coherence order.  Excerpt from a comment in glibc from
 59 | // nptl/pthread_spin_lock.c: 
 60 | // xchgq usually takes less instructions than
 61 | // lockcmpxchg.  On the other hand,
 62 | // lockcmpxchg potentially generates less bus traffic when the lock is locked.
 63 | #define xchgq(ptr, new)                      __N3WLN__ \
 64 | ({                                           __N3WLN__ \
 65 |   uint64_t __new = (new);                    __N3WLN__ \
 66 |   volatile uint64_t *__ptr = (ptr);          __N3WLN__ \
 67 |   asm volatile ("xchgq %0, %1"               __N3WLN__ \
 68 |                : "=r" (__new), "=m" (*__ptr) __N3WLN__ \
 69 |                : "0" (__new), "m" (*__ptr)   __N3WLN__ \
 70 |                : "memory" );                 __N3WLN__ \
 71 |   __new;                                     __N3WLN__ \
 72 | })
 73 | 
 74 | typedef struct {
 75 |   uint64_t operation;
 76 |   uint64_t params[4];
 77 | } op;
 78 | 
 79 | #define CACHE_LINE                           8 // 8-byte words in a cache line
 80 | #define L1_DATA_CACHE                     4096 // 8-byte words in the 32KB L1 cache on Ivy Bridge
 81 | #define ACCESS_BUFFER_SIZE (2 * L1_DATA_CACHE)
 82 | 
 83 | typedef struct {
 84 |   int i;
 85 |   int n_ops_i;
 86 |   volatile uint64_t phase;
 87 |   volatile uint64_t my_spinlock_counter;
 88 |   struct drand48_data rand_state;
 89 | //  uint64_t buffer[ACCESS_BUFFER_SIZE];
 90 |   uint64_t *buffer;
 91 |   uint64_t buffer_cur;
 92 | } ti_data_in;
 93 | 
 94 | void set_affinity(void);
 95 | 
 96 | extern volatile uint64_t completed_phase;
 97 | extern uint64_t n_threads, n_cpus, n_ops, n_runs, n_tests, n_accesses;
 98 | extern op **oss;
 99 | extern char *test_names[];
100 | extern uint64_t test_on[];
101 | extern uint64_t accessesv[];
102 | extern uint64_t global_buffer[];
103 | 
104 | // On Intel, the busy-wait-nop instruction is called "pause",
105 | // which is actually represented as a nop with the rep prefix.
106 | // On processors before the P4 this behaves as a nop; on P4 and
107 | // later it might do something clever like yield to another
108 | // hyperthread.  In any case, Intel recommends putting one
109 | // of these in a spin lock loop.
110 | #define spin_pause() do { __asm__ __volatile__ ("rep; nop"); } while (0)
111 | #define nop() do { __asm__ __volatile__ ("nop"); } while (0)
112 | 
113 | #define access(i, d, n_words) do { \
114 |   for((i)=0; (i)<(n_words); ++(i), (d)->buffer_cur = ((d)->buffer_cur + 1) % ACCESS_BUFFER_SIZE) { \
115 |     _CMM_STORE_SHARED((d)->buffer[(d)->buffer_cur], 1 + (CMM_LOAD_SHARED((d)->buffer[(d)->buffer_cur]))); \
116 |   } \
117 | } while(0)
118 | 
119 | #if defined(PRINT_SPIN)
120 | #define spin_until(p, phase, t_i) __N3WLN__ \
121 |   do { __N3WLN__ \
122 |   while(CMM_LOAD_SHARED(*p) != phase) { spin_pause(); } __N3WLN__ \
123 |   printf_verbose("%d: finished spin waiting for phase %d\n", t_i, phase); __N3WLN__ \
124 |   fflush(stdout); __N3WLN__ \
125 |   } while (0) __N3WLN__ \
126 | 
127 | #else
128 | #define spin_until(p, phase, t_i) __N3WLN__ \
129 |   do { __N3WLN__ \
130 |   while(CMM_LOAD_SHARED(*p) != phase) { spin_pause(); } __N3WLN__ \
131 |   } while (0) __N3WLN__ \
132 | 
133 | #endif
134 | 
135 | #if defined(PRINT_SPIN)
136 | #define sleep_until(p, phase, t_i) __N3WLN__ \
137 |   do { __N3WLN__ \
138 |   while(CMM_LOAD_SHARED(*p) != phase) { usleep(100); spin_pause(); } __N3WLN__ \
139 |   printf_verbose("%d: finished spin waiting for phase %d\n", t_i, phase); __N3WLN__ \
140 |   fflush(stdout); __N3WLN__ \
141 |   } while (0) __N3WLN__ \
142 | 
143 | #else
144 | #define sleep_until(p, phase, t_i) __N3WLN__ \
145 |   do { __N3WLN__ \
146 |   while(CMM_LOAD_SHARED(*p) != phase) { usleep(100); spin_pause(); } __N3WLN__ \
147 |   } while (0) __N3WLN__ \
148 | 
149 | #endif
150 | 
151 | #endif
152 | 


--------------------------------------------------------------------------------
/Assignment_1/worker.c:
--------------------------------------------------------------------------------
  1 | #include "tests.h"
  2 | #include "util.h"
  3 | #include "worker.h"
  4 | 
  5 | void announce(volatile uint64_t *p, uint64_t phase, int t_i) __attribute__((always_inline));
  6 | void announce_then_spin(volatile uint64_t *p, uint64_t phase, int t_i) __attribute__((always_inline));
  7 | 
  8 | void* ti(void *data) {
  9 |   int                i, m, tryret, ret, run, nt, si, ei, test, accesses;
 10 |   op                 *cur, *start, *end;
 11 |   ti_data_in         *d = (ti_data_in*)data;
 12 |   pthread_spinlock_t pthreads_spinlock_copy;
 13 |   uint64_t           my_spinlock_copy;
 14 |   op                 op_copy;
 15 |   
 16 |   // get this thread running on its own CPU
 17 |   set_affinity();
 18 | 
 19 |   for(test=0; test<n_tests; ++test) {
 20 |     if(!test_on[test]) { continue; }
 21 |     for(run=0; run<n_runs; ++run) {
 22 |       // initialize test data
 23 |       m   = n_ops / n_threads;
 24 |       end = ( d->i == n_threads - 1 ? oss[test] + n_ops : oss[test] + (d->i + 1) * m);
 25 |       switch (test) {
 26 |         case SPIN_TRY_LOCK_CORRECTNESS_TEST:
 27 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 28 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_TRY_LOCK_INC_OP }));
 29 | 					}
 30 |           break;
 31 |         case SPIN_LOCK_CORRECTNESS_TEST:
 32 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 33 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_LOCK_INC_OP }));
 34 | 					}
 35 |           break;
 36 |         case SPIN_WAIT_LOCK_CORRECTNESS_TEST:
 37 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 38 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_WAIT_LOCK_INC_OP }));
 39 | 					}
 40 |           break;
 41 |         case SPIN_READ_LOCK_CORRECTNESS_TEST:
 42 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 43 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_READ_LOCK_INC_OP }));
 44 | 					}
 45 |           break;
 46 |         case SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST:
 47 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 48 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_EXPERIMENTAL_LOCK_INC_OP }));
 49 | 					}
 50 |           break;
 51 |         case PTHREAD_SPIN_LOCK_TEST:
 52 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 53 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = PTHREAD_SPIN_LOCK_LOCK_OP }));
 54 | 					}
 55 |           break;
 56 |         case SPIN_TRY_LOCK_TEST:
 57 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 58 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_TRY_LOCK_LOCK_OP }));
 59 | 					}
 60 |           break;
 61 |         case SPIN_LOCK_TEST:
 62 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 63 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_LOCK_LOCK_OP }));
 64 | 					}
 65 |           break;
 66 |         case SPIN_WAIT_LOCK_TEST:
 67 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 68 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_WAIT_LOCK_LOCK_OP }));
 69 | 					}
 70 |           break;
 71 |         case SPIN_READ_LOCK_TEST:
 72 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 73 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_READ_LOCK_LOCK_OP }));
 74 | 					}
 75 |           break;
 76 |         case SPIN_EXPERIMENTAL_LOCK_TEST:
 77 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 78 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_EXPERIMENTAL_LOCK_LOCK_OP }));
 79 | 					}
 80 |           break;
 81 |         default: 
 82 |           printf("Undefined test %d\n", test);
 83 |           break;
 84 |       }
 85 |       // let the parent know we're done and wait for parent to say that everyone
 86 |       // else is done
 87 |       announce_then_spin(&d->phase, READY_PHASE, d->i);
 88 | 
 89 |       // run the test for each number of threads
 90 |       for(nt=1; nt<=n_threads; ++nt) {
 91 |         for(accesses=0; accesses<n_accesses; ++accesses) {
 92 |           if(d->i > nt - 1) {
 93 |             // this worker is inactive for this nt, just hang out without consuming
 94 |             // a lot of resources
 95 | 
 96 |             printf_worker("%d: nt is %d, skipping this round\n", d->i, nt);
 97 | 
 98 |             // no warming to do, say I'm done and wait for parent to say that everyone
 99 |             // else is done
100 |             announce(&d->phase, WARMED_PHASE, d->i);
101 |             //sleep_until(&completed_phase, WARMED_PHASE, d->i);
102 |             spin_until(&completed_phase, WARMED_PHASE, d->i);
103 | 
104 |             // no ops to do, say I'm done and wait for parent to say that everyone
105 |             // else is done
106 |             announce(&d->phase, OPS_PHASE, d->i);
107 |             //sleep_until(&completed_phase, OPS_PHASE, d->i);
108 |             spin_until(&completed_phase, OPS_PHASE, d->i);
109 |           }
110 |           else {
111 |             // this worker is active for this nt, let's run some tests!
112 | 
113 |             m     = n_ops / nt;
114 |             si    = d->i * m;
115 |             start = oss[test] + si;
116 |             ei    = si + m + ( d->i == nt - 1 ? n_ops - si - m : 0 ) - 1;
117 |             end   = oss[test] + ei;
118 | 
119 |             printf_worker("%d: nt is %d, running ops %d-%d [0x%" PRIx64 "-0x%" PRIx64 "]\n", d->i, nt, si, ei, (uint64_t)start, (uint64_t)end);
120 | 
121 |             // warm the cache
122 |             // TODO does warming the ops help?
123 |             for(cur = start; cur <= end; ++cur) { op_copy = *cur; }
124 | 
125 |             switch (test) {
126 |               case PTHREAD_SPIN_LOCK_TEST:
127 | 				  			pthreads_spinlock_copy = pthreads_spinlock;
128 |                 break;
129 |               case SPIN_TRY_LOCK_CORRECTNESS_TEST:
130 |               case SPIN_LOCK_CORRECTNESS_TEST:
131 |               case SPIN_WAIT_LOCK_CORRECTNESS_TEST:
132 |               case SPIN_READ_LOCK_CORRECTNESS_TEST:
133 |               case SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST:
134 |               case SPIN_TRY_LOCK_TEST:
135 |               case SPIN_LOCK_TEST:
136 |               case SPIN_WAIT_LOCK_TEST:
137 |               case SPIN_READ_LOCK_TEST:
138 |               case SPIN_EXPERIMENTAL_LOCK_TEST:
139 |                 my_spinlock_copy = my_spinlock;
140 |                 break;
141 |               default:
142 | 				  			printf("Undefined test %d\n", test);
143 | 				  			break;
144 |             }
145 | 
146 |             cmm_smp_mb(); // make sure warming loads happen before announcement
147 | 
148 |             announce_then_spin(&d->phase, WARMED_PHASE, d->i);
149 | 
150 |             // run the ops!
151 |             for(cur = start, i=si; cur <= end; ++cur, ++i) {
152 |               printf_worker("%d: running op %d\n", d->i, i);
153 | 
154 |               // enter critical section
155 |               switch (cur->operation) {
156 |                 case PTHREAD_SPIN_LOCK_LOCK_OP:
157 |                   pthread_spin_lock(&pthreads_spinlock);
158 |                   break;
159 |                 case SPIN_TRY_LOCK_LOCK_OP:
160 |                 case SPIN_TRY_LOCK_INC_OP:
161 |                   tryret = spin_try_lock(&my_spinlock);
162 |                   break;
163 |                 case SPIN_LOCK_LOCK_OP:
164 |                 case SPIN_LOCK_INC_OP:
165 |                   spin_lock(&my_spinlock);
166 |                   break;
167 |                 case SPIN_WAIT_LOCK_LOCK_OP:
168 |                 case SPIN_WAIT_LOCK_INC_OP:
169 |                   spin_wait_lock(&my_spinlock);
170 |                   break;
171 |                 case SPIN_READ_LOCK_LOCK_OP:
172 |                 case SPIN_READ_LOCK_INC_OP:
173 |                   spin_read_lock(&my_spinlock);
174 |                   break;
175 |                 case SPIN_EXPERIMENTAL_LOCK_LOCK_OP:
176 |                 case SPIN_EXPERIMENTAL_LOCK_INC_OP:
177 |                   spin_experimental_lock(&my_spinlock);
178 |                   break;
179 |                 default:
180 |                   printf_error("Undefined operation %" PRIu64 "\n", cur->operation);
181 |                   exit(-1);
182 |                   break;
183 |               }
184 | 
185 |               // critical section body
186 | 
187 |               // * simulate work by accessing memory
188 |               access(i, d, accessesv[accesses]);
189 | 
190 |               // * do per-operation critical section work
191 |               switch(cur->operation) {
192 |                 case SPIN_LOCK_INC_OP:
193 |                 case SPIN_WAIT_LOCK_INC_OP:
194 |                 case SPIN_READ_LOCK_INC_OP:
195 |                 case SPIN_EXPERIMENTAL_LOCK_INC_OP:
196 |                   ++my_spinlock_shared_counter;
197 |                   ++d->my_spinlock_counter;
198 |                   break;
199 |                 case SPIN_TRY_LOCK_INC_OP:
200 |                   if (tryret) {
201 |                     ++my_spinlock_shared_counter;
202 |                     ++d->my_spinlock_counter;
203 |                   }
204 |                   break;
205 |               }
206 | 
207 |               // leave critical section
208 |               switch (cur->operation) {
209 |                 case PTHREAD_SPIN_LOCK_LOCK_OP:
210 |                   pthread_spin_unlock(&pthreads_spinlock);
211 |                   break;
212 |                 case SPIN_LOCK_LOCK_OP:
213 |                 case SPIN_LOCK_INC_OP:
214 |                 case SPIN_WAIT_LOCK_LOCK_OP:
215 |                 case SPIN_WAIT_LOCK_INC_OP:
216 |                 case SPIN_READ_LOCK_LOCK_OP:
217 |                 case SPIN_READ_LOCK_INC_OP:
218 |                 case SPIN_EXPERIMENTAL_LOCK_LOCK_OP:
219 |                 case SPIN_EXPERIMENTAL_LOCK_INC_OP:
220 |                   spin_unlock(&my_spinlock);
221 |                   break;
222 |                 case SPIN_TRY_LOCK_LOCK_OP:
223 |                 case SPIN_TRY_LOCK_INC_OP:
224 |                   if(tryret) { spin_unlock(&my_spinlock); }
225 |                   break;
226 |               }
227 | 
228 |             }
229 | 
230 |             // I'm done with my ops, say so and wait for parent to say that
231 |             // everyone else is done
232 |             announce_then_spin(&d->phase, OPS_PHASE, d->i);
233 |           }
234 |         }
235 |       }
236 |     }
237 |     announce_then_spin(&d->phase, INIT_PHASE, d->i);
238 |   }
239 | 
240 |   return data;
241 | }
242 | 
243 | // BEGIN ASSIGNMENT SECTION
244 | 
245 | // spin_try_lock
246 | int spin_try_lock(volatile uint64_t *lock) {
247 |   // TODO
248 | }
249 | 
250 | // spin_lock
251 | void spin_lock(volatile uint64_t *lock) {
252 |   // TODO
253 | }
254 | 
255 | // spin_wait_lock
256 | void spin_wait_lock(volatile uint64_t *lock) {
257 |   // TODO
258 | }
259 | 
260 | // spin_read_lock
261 | void spin_read_lock(volatile uint64_t *lock) {
262 |   // TODO
263 | }
264 | 
265 | // spin_experimental_lock
266 | void spin_experimental_lock(volatile uint64_t *lock) {
267 |   // TODO
268 | }
269 |     
270 | 
271 | void spin_unlock(volatile uint64_t *lock) {
272 |   // TODO
273 | }
274 | 
275 | // END ASSIGNMENT SECTION
276 | 
277 | void announce(volatile uint64_t *p, uint64_t phase, int t_i)
278 | {
279 |   _CMM_STORE_SHARED(*p, phase);
280 | 
281 |   cmm_smp_mb(); // necessary?
282 | 
283 |   printf_worker("%d: announced finished with phase %" PRIu64 "\n", t_i, phase);
284 | }
285 | 
286 | void announce_then_spin(volatile uint64_t *p, uint64_t phase, int t_i)
287 | {
288 |   announce(p, phase, t_i);
289 |   spin_until(&completed_phase, phase, t_i);
290 | }
291 | 
292 | 


--------------------------------------------------------------------------------
/Assignment_1/worker.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORKER_H
 2 | #define WORKER_H
 3 | 
 4 | #include "util.h"
 5 | 
 6 | void* ti(void *data);
 7 | 
 8 | int spin_try_lock(volatile uint64_t *lock) __attribute__((always_inline));
 9 | void spin_lock(volatile uint64_t *lock) __attribute__((always_inline));
10 | void spin_wait_lock(volatile uint64_t *lock) __attribute__((always_inline));
11 | void spin_read_lock(volatile uint64_t *lock) __attribute__((always_inline));
12 | void spin_unlock(volatile uint64_t *lock) __attribute__((always_inline));
13 | void spin_experimental_lock(volatile uint64_t *lock) __attribute__((always_inline));
14 | #endif
15 | 


--------------------------------------------------------------------------------
/Assignment_2/DEBUG.md:
--------------------------------------------------------------------------------
  1 | # A brief primer on debugging
  2 | 
  3 | A variety of strange things can happen as a result of bugs in these implementations.
  4 | Often, I find that I've made a mistake somewhere, and the test harness hangs in the 
  5 | middle of a particular run, cores spinning at 100%, but no progress being made.
  6 | I've found it helpful to build with debugging symbols, and run the test harness with `gdb`
  7 | so I can inspect the program state when it gets stuck.
  8 | 
  9 | ## Walkthrough
 10 | 
 11 | To get this set up, uncomment the line
 12 | ```make
 13 | #DEBUG           = TRUE
 14 | ```
 15 | in `Makefile`, then rebuild with `make`. 
 16 | 
 17 | NOTE: Your code will sometimes have _different performance characteristics_
 18 | when built this way.  Once you've resolved or learned what you needed from
 19 | debugging, remember to comment that line back out and run `make` again before
 20 | benchmarking.
 21 | 
 22 | Next, run the test harness attached to gdb, e.g.
 23 | ```bash
 24 | gdb --args ./test 4 28 8 1
 25 | ```
 26 | 
 27 | When the test harness hits the loop, stop the program and return control to
 28 | `gdb` with a `SIGINT` (`CTRL-c`).  Check out your threads with
 29 | ```gdb
 30 | info threads
 31 | ```
 32 | select one that looks like it's running your code, and switch to it, e.g. for thread 2
 33 | ```gdb
 34 | t 2
 35 | ```
 36 | and see where we are in the code.
 37 | ```gdb
 38 | list
 39 | ```
 40 | NOTE: This listing has macros expanded, and expansions often look very
 41 | different from the macro name.  Sometimes this has the benefit of helping you
 42 | figure out exactly where the program counter is in an expansion of a multi-line
 43 | macro, other times it just makes it hard to read the code.  Perhaps a future
 44 | assignment will include a way to turn this off :)
 45 | 
 46 | Once we know where the program counter is (which must be code along the path of
 47 | the infinite loop, if this thread is in fact looping forever), it might be 
 48 | helpful to print the values of some variables in scope, e.g. for `ticket_lock()`
 49 | ```gdb
 50 | p *lock
 51 | ```
 52 | ```gdb
 53 | p my_number
 54 | ```
 55 | or for `abql_nosharing_lock()`
 56 | ```gdb
 57 | p *my_place
 58 | ```
 59 | ```gdb
 60 | p flags[0]
 61 | ```
 62 | 
 63 | Usually I do this, and then stare at the implementation for a while in my editor, trying
 64 | to figure out how the state ended up this way.
 65 | 
 66 | ## Other things to do
 67 | 
 68 | ### Inspect frames of calling functions
 69 | 
 70 | If you want to inspect values in scope in a function call further up the stack,
 71 | you can check out the stack,
 72 | ```gdb
 73 | bt
 74 | ```
 75 | pick a different frame to inspect, and load it, e.g. for frame 2
 76 | ```
 77 | f 2
 78 | ```
 79 | 
 80 | And print some variables in scope in the calling function.
 81 | 
 82 | ### Breakpoints/Watchpoints and Invariants
 83 | 
 84 | You can set breakpoints and watchpoints, and do other GDB things more or less
 85 | as usual.  If you are feeling paranoid about the impact of making a system call
 86 | when you hit a watchpoint/breakpoint, or just can check what you want to more
 87 | easily in C, you can add a runtime invariant that loops forever if violated to
 88 | the code.
 89 | 
 90 | Instead of a breakpoint:
 91 | ```c
 92 | while(1) {}
 93 | ```
 94 | 
 95 | Instead of a watchpoint:
 96 | ```c
 97 | if(!invariant) { while(1) {} }
 98 | ```
 99 | 
100 | Run it through `gdb` and see if any threads get stuck in this particular loop.
101 | If one did, you can print some stuff in scope at that line, and figure out how
102 | the invariant was violated.  The disadvantage of this approach, of course, is
103 | that it's harder to resume execution afterwards (if you care about that).
104 | 


--------------------------------------------------------------------------------
/Assignment_2/Makefile:
--------------------------------------------------------------------------------
 1 | # TUNABLE PARAMETERS
 2 | #DEBUG           = TRUE
 3 | PRINT_WORKER    = 0
 4 | PRINT_PARENT    = 1
 5 | PRINT_VERBOSE   = 0
 6 | PRINT_CSV       = 1
 7 | 
 8 | WARN            = -Wno-attributes -Wno-format-extra-args -Wno-implicit-function-declaration
 9 | CPUS            = $(shell grep 'processor[[:space:]]*:' /proc/cpuinfo | wc -l)
10 | JOBS            = $(shell echo "$(CPUS) - 1" | bc)
11 | MAKEFLAGS      += "-j $(JOBS)"
12 | CLEAN_COMMENTS  = grep -v '^\# [[:digit:]]\+ '
13 | INSERT_NEWLINES = sed 's/__N3WLN__/\n/g'
14 | SKIP_NEWLINES   = sed 's/__N3WLN__//g'
15 | REINDENT        = echo -e "G=gg\n:wq\n" | vim 
16 | CCOPTS1         = -pthread -I/u/theod/builds/include -DPRINT_WORKER=$(PRINT_WORKER) -DPRINT_PARENT=$(PRINT_PARENT) -DPRINT_VERBOSE=$(PRINT_VERBOSE) -DPRINT_CSV=$(PRINT_CSV)
17 | CCOPTS2         = -L/u/theod/builds/lib -lurcu-qsbr 
18 | 
19 | ifeq ($(DEBUG),TRUE)
20 |   CC1           = nice gcc -E $(CCOPTS1) $(WARN)
21 |   CC2           = nice gcc -g $(CCOPTS1) $(WARN)
22 | else
23 |   CC            = nice gcc $(CCOPTS1) $(WARN) -D__N3WLN__=
24 | endif
25 | 
26 | OBJS            = util.o worker.o tests.o
27 | 
28 | # make makes the first goal the default, you can override this with
29 | #.DEFAULT_GOAL := all
30 | all: test
31 | 
32 | util: util.h util.c
33 | ifeq ($(DEBUG),TRUE)
34 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
35 | 	#$(REINDENT) $@CPP.c
36 | 	$(CC2) -xc -c -o $@.o $@CPP.c
37 | else
38 | 	$(CC) $@.c -c -o $@.o
39 | endif
40 | 
41 | worker: worker.c worker.h tests.h util
42 | ifeq ($(DEBUG),TRUE)
43 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
44 | 	#$(REINDENT) $@CPP.c
45 | 	$(CC2) -xc -c -o $@.o $@CPP.c
46 | else
47 | 	$(CC) $@.c -c -o $@.o
48 | endif
49 | 
50 | tests: util tests.c tests.h
51 | ifeq ($(DEBUG),TRUE)
52 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
53 | 	#$(REINDENT) $@CPP.c
54 | 	$(CC2) -xc -c -o $@.o $@CPP.c
55 | else
56 | 	$(CC) $@.c -c -o $@.o
57 | endif
58 | 
59 | test: main.c util worker tests
60 | ifeq ($(DEBUG),TRUE)
61 | 	$(CC1) main.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > mainCPP.c
62 | 	#$(REINDENT) $@CPP.c
63 | 	$(CC2) -o test mainCPP.c $(OBJS) $(CCOPTS2)
64 | else
65 | 	$(CC) -o test main.c $(OBJS) $(CCOPTS2)
66 | endif
67 | 
68 | .PHONY: clean
69 | clean: 
70 | 	rm -f $(OBJS) $(wildcard *CPP.c) test
71 | 


--------------------------------------------------------------------------------
/Assignment_2/adding_a_test.md:
--------------------------------------------------------------------------------
 1 | 1.  Update test_names and increment n_tests in tests.c.
 2 | 2.  Add any applicable new operations (with unique numbers) under "// OPERATIONS" in tests.h.
 3 | 3.  Add a test (with a unique number) under "// TESTS" in tests.h.
 4 | 4.  Add declarations for any applicable global data in tests.h under "// GLOBAL DATA" and initialize as needed under "// GLOBAL DATA" in tests.c, (or in tests_multi() in tests.c, if the initialization depends on runtime parameters).
 5 | 5.  Add any new per-thread data to ti_data_in in util.h
 6 | 6.  Add a test case under "// initialize test data" in worker.c to initialize the test data in parallel.
 7 | 7.  Add any needed per-test-run initialization under "// per-test-run initialization" in worker.c.
 8 | 8.  Add a test case under "// warm the cache" in worker.c.
 9 | 9.  Add cases for new operations under "// enter critical section" in worker.c.
10 | 10. Add cases for new operations under "// critical section body" (if applicable) in worker.c.
11 | 11. Add cases for new operations under "// leave critical section" in worker.c.
12 | 12. If parent needs to do some checking/work after each run, add a test case under "// if applicable, check results of run and reset test state" in tests.c
13 | 
14 | 


--------------------------------------------------------------------------------
/Assignment_2/bench:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 3 ]; then
 6 |   echo "Usage: ./bench MAX_THREADS N_OPS N_RUNS"
 7 |   exit 1
 8 | fi
 9 |   
10 | MAX_THREADS="$1"
11 | N_OPS="$2"
12 | N_RUNS="$3"
13 | 
14 | mkdir -p data
15 | 
16 | # reserve at least 2 CPUs for other users
17 | CPUS=$(echo "$(grep -c 'processor' /proc/cpuinfo) - 2" | bc)
18 | 
19 | if [ $MAX_THREADS -gt $CPUS ]; then
20 |   echo "Too many threads, we only have $CPUS CPUs" >&2
21 |   exit 1
22 | fi
23 | 
24 | FILE="bench_t${MAX_THREADS}_$(date +%F_%H-%M-%S).csv"
25 | DATA="data/${FILE}"
26 | 
27 | time ./test $MAX_THREADS $CPUS $N_OPS $N_RUNS | grep -o '[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*' > ${DATA}
28 | 
29 | # generate graphs
30 | Rscript graph.r ${DATA}
31 | 


--------------------------------------------------------------------------------
/Assignment_2/graph.r:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(scales)
 3 | 
 4 | args                 <- commandArgs(TRUE)
 5 | name                 <- args[1]
 6 | #name                 <- "data/test_my_spinlock.csv"
 7 | d                    <- read.csv(file=name,head=TRUE,sep=",")
 8 | d$ticks_per_thread   <- d$ticks / d$nt
 9 | d$nt                 <- factor(d$nt)
10 | 
11 | #tpt_by_test_and_nt   <- with(d, aggregate(ticks_per_thread ~ test + nt, d, median))
12 | ticks_by_test_and_nt <- with(d, aggregate(ticks ~ test + nt + critical_section_accesses, d, median))
13 | 
14 | n_breaks             <- 40
15 | 
16 | is_graphable <- function(s) { return (! grepl("_nograph", s)) }
17 | 
18 | gen_graph <- function(data, xc, yc, f, name, n_breaks, lbs){
19 |   data <- subset(data, f(test))
20 |   ggplot(data=data, aes_string(xc, yc, group="test")) + 
21 |     geom_line (data=data, aes_string(colour="test")) + 
22 |     geom_point(data=data, aes_string(shape="test",colour="test")) + 
23 |     scale_y_continuous(breaks = pretty_breaks(n_breaks)) +
24 |     lbs
25 |     
26 |   path <- paste0(name,".pdf")
27 |   print(paste0("generating ", path))
28 |   ggsave(path, width=16, height=9, units="in", limitsize=FALSE)
29 | }
30 | 
31 | #gen_graph(subset(tpt_by_test_and_nt, critical_section_accesses == 0), "nt", "ticks_per_thread", is_graphable, paste0(name, "_tpt"), n_breaks,
32 | #         labs(title = "Per-thread Throughput vs Number of Threads (no memory accesses during critical sections)",
33 | #              x     = "Number of Threads",
34 | #              y     = "Total duration / Number of Threads"))
35 | 
36 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 0), "nt", "ticks", is_graphable, paste0(name, "_ticks_00accesses"), n_breaks, 
37 |          labs(title = "Test Duration vs Number of Threads (empty critical section)",
38 |               x     = "Number of Threads",
39 |               y     = "Test Duration"))
40 | 
41 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 8), "nt", "ticks", is_graphable, paste0(name, "_ticks_08accesses"), n_breaks, 
42 |          labs(title = "Test Duration vs Number of Threads (1 cache line updated during critical section)",
43 |               x     = "Number of Threads",
44 |               y     = "Test Duration"))
45 | 
46 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 80), "nt", "ticks", is_graphable, paste0(name, "_ticks_80accesses"), n_breaks, 
47 |          labs(title = "Test Duration vs Number of Threads (10 cache lines updated during critical section)",
48 |               x     = "Number of Threads",
49 |               y     = "Test Duration"))
50 | 


--------------------------------------------------------------------------------
/Assignment_2/main.c:
--------------------------------------------------------------------------------
 1 | #include <inttypes.h>
 2 | #include <stdio.h>
 3 | 
 4 | #include "tests.h"
 5 | #include "util.h"
 6 | 
 7 | uint64_t n_threads, n_cpus, n_runs, n_ops; 
 8 | 
 9 | void show_usage(int argc, char **argv)
10 | {
11 | 	printf("Usage : %s N_THREADS N_CPUS N_OPS N_RUNS\n", argv[0]);
12 | }
13 | 
14 | int main(int argc, char** argv) {
15 |   int err, ret;
16 | 
17 | 	if (argc != 5) {
18 | 		show_usage(argc, argv);
19 | 		return -1;
20 | 	}
21 | 
22 | 	err = sscanf(argv[1], "%" PRIu64, &n_threads);
23 | 	if (err != 1) {
24 | 		show_usage(argc, argv);
25 | 		return -1;
26 | 	}
27 | 
28 | 	err = sscanf(argv[2], "%" PRIu64, &n_cpus);
29 | 	if (err != 1) {
30 | 		show_usage(argc, argv);
31 | 		return -1;
32 | 	}
33 | 
34 | 	err = sscanf(argv[3], "%" PRIu64, &n_ops);
35 | 	if (err != 1) {
36 | 		show_usage(argc, argv);
37 | 		return -1;
38 | 	}
39 | 
40 | 	err = sscanf(argv[4], "%" PRIu64, &n_runs);
41 | 	if (err != 1) {
42 | 		show_usage(argc, argv);
43 | 		return -1;
44 | 	}
45 | 
46 |   if (n_ops < n_threads) {
47 |     printf("error: N_OPS < N_THREADS.  We need at least 1 operation per thread.\n");
48 |     return -1;
49 |   }
50 | 
51 | 
52 |   //tests_single();
53 | 
54 |   tests_multi();
55 | 
56 |   return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/Assignment_2/run:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 3 ]; then
 6 |   echo "Usage: ./run MAX_THREADS N_OPS N_RUNS"
 7 |   exit 1
 8 | fi
 9 |  
10 | MAX_THREADS="$1"
11 | N_OPS="$2"
12 | N_RUNS="$3"
13 | 
14 | # reserve at least 2 CPUs for other users
15 | CPUS=$(echo "$(grep -c 'processor' /proc/cpuinfo) - 2" | bc)
16 | 
17 | time ./test $MAX_THREADS $CPUS $N_OPS $N_RUNS
18 | 


--------------------------------------------------------------------------------
/Assignment_2/tests.c:
--------------------------------------------------------------------------------
  1 | #include <pthread.h>
  2 | 
  3 | #include "tests.h"
  4 | #include "util.h"
  5 | #include "worker.h"
  6 | 
  7 | void parent_announce(uint64_t phase, int t_i) __attribute__((always_inline));
  8 | void parent_spin(ti_data_in *datas, uint64_t phase, int t_i) __attribute__((always_inline));
  9 | int parent_announce_then_spin(ti_data_in *datas, uint64_t phase1, uint64_t phase2, int t_i) __attribute__((always_inline));
 10 | 
 11 | // GLOBAL DATA (statically determined initial values)
 12 | 
 13 | pthread_spinlock_t       pthreads_spinlock;
 14 | volatile uint64_t        my_spinlock                = UNLOCKED;
 15 | volatile uint64_t        my_spinlock_shared_counter = 0;
 16 | volatile uint64_t        completed_phase            = INIT_PHASE;
 17 | uint64_t                 global_buffer[ACCESS_BUFFER_SIZE];
 18 | 
 19 | // statically initialize global data for abql_sharing
 20 | 
 21 | volatile uint64_t        queue_last_sharing         = 0;
 22 | volatile flag_sharing   *flags_sharing              = NULL; // filled in in tests_multi()
 23 | 
 24 | // statically initialize global data for abql_nosharing
 25 | 
 26 | volatile uint64_t        queue_last_nosharing       = 0;
 27 | volatile flag_nosharing *flags_nosharing            = NULL; // filled in in tests_multi()
 28 | 
 29 | // statically initialize global data for mcs_sharing
 30 | volatile mcs_sharing     mcs_global_sharing         = (mcs_sharing){ .next = 0, .locked = UNLOCKED };
 31 | volatile mcs_sharing    *mcss_sharing               = NULL; // filled in in tests_multi()
 32 | 
 33 | // statically initialize global data for mcs_nosharing
 34 | 
 35 | volatile mcs_nosharing   mcs_global_nosharing       = (mcs_nosharing){ .next = 0, .locked = UNLOCKED };
 36 | volatile mcs_nosharing  *mcss_nosharing             = NULL; // filled in in tests_multi()
 37 | 
 38 | // statically initialize global data for ticket
 39 | volatile ticket_state    my_ticket_lock             = (ticket_state){ .next = 0, .owner = 0};
 40 | 
 41 | uint64_t n_tests = 21;
 42 | char *test_names[] = { "spin_try_lock_correctness_nograph"
 43 |                      , "spin_lock_correctness_nograph"
 44 |                      , "spin_wait_lock_correctness_nograph"
 45 |                      , "spin_read_lock_correctness_nograph"
 46 |                      , "spin_experimental_lock_correctness_nograph"
 47 |                      , "ticket_correctness_nograph"
 48 |                      , "abql_sharing_correctness_nograph"
 49 |                      , "abql_nosharing_correctness_nograph"
 50 |                      , "mcs_sharing_correctness_nograph"
 51 |                      , "mcs_nosharing_correctness_nograph"
 52 | 
 53 |                      , "pthread_spin_lock"
 54 | 
 55 |                      , "spin_try_lock_nograph"
 56 |                      , "spin_lock" 
 57 |                      , "spin_wait_lock"
 58 |                      , "spin_read_lock"
 59 |                      , "spin_experimental_lock"
 60 |                      , "ticket_lock"
 61 |                      , "abql_sharing_lock"
 62 |                      , "abql_nosharing_lock"
 63 |                      , "mcs_sharing_lock"
 64 |                      , "mcs_nosharing_lock"
 65 |                      };
 66 | 
 67 | // skip tests with 0s, each value corresponds to the test in the same position
 68 | // in test_names above
 69 | uint64_t test_on[] = { 0 // spin_try_lock_correctness_nograph
 70 |                      , 0 // spin_lock_correctness_nograph
 71 |                      , 0 // spin_wait_lock_correctness_nograph
 72 |                      , 0 // spin_read_lock_correctness_nograph
 73 |                      , 0 // spin_experimental_lock_correctness_nograph
 74 |                      , 1 // ticket_correctness_nograph
 75 |                      , 0 // abql_sharing_correctness_nograph
 76 |                      , 0 // abql_nosharing_correctness_nograph
 77 |                      , 0 // mcs_sharing_correctness_nograph
 78 |                      , 0 // mcs_nosharing_correctness_nograph
 79 | 
 80 |                      , 0 // pthread_spin_lock
 81 | 
 82 |                      , 0 // spin_try_lock_nograph
 83 |                      , 0 // spin_lock 
 84 |                      , 0 // spin_wait_lock
 85 |                      , 0 // spin_read_lock
 86 |                      , 0 // spin_experimental_lock
 87 |                      , 0 // ticket_lock
 88 |                      , 0 // abql_sharing_lock
 89 |                      , 0 // abql_nosharing_lock
 90 |                      , 0 // mcs_sharing_lock
 91 |                      , 0 // mcs_nosharing_lock
 92 |                      };
 93 | 
 94 | op **oss = NULL;
 95 | uint64_t n_accesses = 3;
 96 | uint64_t accessesv[] = { 0    // no accesses
 97 |                        , CACHE_LINE    
 98 |                        , 10 * CACHE_LINE
 99 |                        //, L1_DATA_CACHE 
100 |                        };
101 | 
102 | void tests_multi() {
103 |   int i,j,k,all_ops,test;
104 |   pthread_t ts[n_threads];
105 |   ti_data_in datas[n_threads];
106 |   oss = malloc(sizeof(op*) * n_tests);
107 |   for(test=0; test<n_tests; ++test) { oss[test] = malloc(sizeof(op) * n_ops); }
108 |   op *cur, *cur1;
109 |   uint64_t tsc,sum;
110 |   unsigned int status;
111 |   int m = n_ops / n_threads;
112 |   int nt,o,ret,run,accesses;
113 | 
114 |   // GLOBAL DATA (dynamically determined initial values)
115 | 
116 |   // TODO declare and initialize data for abql_sharing
117 | 
118 |   // TODO declare and initialize data for abql_nosharing
119 | 
120 |   // TODO declare and initialize data for mcs_sharing
121 | 
122 |   // TODO declare and initialize data for mcs_nosharing
123 | 
124 |   // header for csv
125 |   printf_csv("ops,test,run,nt,critical_section_accesses,ticks\n","");
126 | 
127 |   pthread_spin_init(&pthreads_spinlock, 0);
128 | 
129 |   // set up workers to initialize the set of operations in parallel
130 |   for(i=0; i<n_threads; ++i) {
131 |     datas[i].i                   = i;
132 |     datas[i].phase               = INIT_PHASE;
133 |     datas[i].my_spinlock_counter = 0;
134 |     tsc                          = __rdtscp(&status);
135 |     srand48_r((long int)tsc + i, &datas[i].rand_state);
136 |     //datas[i].buffer_cur          = 0;
137 |     datas[i].buffer_cur          = (ACCESS_BUFFER_SIZE / n_threads) * i;
138 |     datas[i].buffer              = global_buffer;
139 |   } 
140 | 
141 |   // guarantee that writes to data fields come before threads start running.
142 |   // precautionary, set_affinity() and pthread_create() probably contain atomic
143 |   // instructions that act as full barriers. 
144 |   cmm_smp_wmb();
145 |   
146 |   // set parent affinity so it's not sharing a CPU with a worker thread
147 |   set_affinity();
148 | 
149 |   // spawn workers
150 |   for(i=0; i<n_threads; ++i) { pthread_create(&ts[i], NULL, ti, &datas[i]); }
151 | 
152 |   for(test=0; test < n_tests; ++test) {
153 |     if(!test_on[test]) { continue; }
154 |     printf_parent("-1: initializing test %s\n", test_names[test]);
155 |     for(run=0; run < n_runs; ++run) {
156 |       // wait for workers to finish initialization
157 |       parent_spin(datas, READY_PHASE, -1);
158 |       parent_announce(READY_PHASE, -1);
159 |       
160 |       // see how long it takes each number of threads to solve 
161 |       // the problem we've initialized
162 |       for(nt=1; nt<=n_threads; ++nt) {
163 |         // try different numbers of memory accesses in the critical section
164 |         for(accesses=0; accesses < n_accesses; ++accesses) {
165 |           printf_parent("-1: starting loop for test %s, nt %d\n, accesses %" PRIu64 "\n", test_names[test], nt, accessesv[accesses]);
166 |           parent_spin(datas, WARMED_PHASE, -1);
167 | 
168 |           // time from the parent's perspective from when parent tells workers to
169 |           // start OPS_PHASE until parent observes that all workers have reported
170 |           // back that they have completed OPS_PHASE
171 |           TIME_AND_CHECK(1,0,run,parent_announce_then_spin(datas, WARMED_PHASE, OPS_PHASE, -1), "OPS_PHASE_TIME", tsc, ret);
172 | 
173 |           // if applicable, check results of run and reset test state
174 |           all_ops = 1;
175 |           switch (test) {
176 |             case SPIN_TRY_LOCK_CORRECTNESS_TEST:
177 |               all_ops = 0;
178 |             case SPIN_LOCK_CORRECTNESS_TEST:
179 |             case SPIN_WAIT_LOCK_CORRECTNESS_TEST:
180 |             case SPIN_READ_LOCK_CORRECTNESS_TEST:
181 |             case SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST:
182 |             case ABQL_SHARING_CORRECTNESS_TEST:
183 |             case ABQL_NOSHARING_CORRECTNESS_TEST:
184 |             case MCS_SHARING_CORRECTNESS_TEST:
185 |             case MCS_NOSHARING_CORRECTNESS_TEST:
186 |             case TICKET_CORRECTNESS_TEST:
187 |               // see if per-thread counters sum to shared lock-protected counter
188 |               sum = 0;
189 |               for(i=0; i<nt; ++i) { sum += datas[i].my_spinlock_counter; }
190 |               if (sum == 0 || sum != my_spinlock_shared_counter || (all_ops && sum != n_ops)) {
191 |                 if (test == SPIN_TRY_LOCK_CORRECTNESS_TEST) {
192 |                   printf_error("-1: %s failed for nt %d, my_spinlock_shared_counter: %" PRIu64 
193 |                                ", sum: %" PRIu64 ", n_ops: %" PRIu64
194 |                                ".  If the lock were working correctly, counter and sum would "
195 |                                "be nonzero and the same.\n"
196 |                                , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
197 |                 }
198 |                 else {
199 |                   printf_error("-1: %s failed for nt %d, my_spinlock_shared_counter: %" PRIu64 
200 |                                ", sum: %" PRIu64 ", n_ops: %" PRIu64
201 |                                ".  If the lock were working correctly, they would all "
202 |                                "be nonzero and the same.\n"
203 |                                , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
204 |                 }
205 |                 exit(-1);
206 |               }
207 |               else {
208 |                 printf_parent("-1: %s succeeded for nt %d, my_spinlock_shared_counter: %" PRIu64 
209 |                               ", sum: %" PRIu64 ", n_ops: %" PRIu64
210 |                               ".  Values are nonzero and the same, so the lock is working correctly.\n"
211 |                              , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
212 |               }
213 |               // reset counters for next test
214 |               my_spinlock_shared_counter = 0;
215 |               for(i=0; i<nt; ++i) { datas[i].my_spinlock_counter = 0; }
216 |               break;
217 |             default: 
218 |               break;
219 |           }
220 | 
221 |           // send workers back to the top of their loop
222 |           parent_announce(OPS_PHASE, -1);
223 | 
224 |           // output results for this nt
225 |           printf_csv("%" PRIu64 ",%s,%d,%d,%" PRIu64 ",%" PRIu64 "\n",n_ops,test_names[test],run,nt,accessesv[accesses],tsc);
226 |         }
227 |       }
228 |     }
229 |     parent_spin(datas, INIT_PHASE, -1);
230 |     parent_announce(INIT_PHASE, -1);
231 |   }
232 | 
233 |   // join workers
234 |   for(i=0; i<n_threads; ++i) { pthread_join(ts[i], NULL); }
235 | 
236 |   // clean up
237 |   for(test=0; test<n_tests; ++test) { free(oss[test]); }
238 |   free(oss);
239 |   pthread_spin_destroy(&pthreads_spinlock);
240 | }
241 | 
242 | void parent_announce(uint64_t phase, int t_i)
243 | {
244 |   _CMM_STORE_SHARED(completed_phase, phase);
245 |   printf_parent("%d: announced start of phase %" PRIu64 "\n", t_i, phase);
246 | }
247 | 
248 | void parent_spin(ti_data_in *datas, uint64_t phase, int t_i)
249 | {
250 |   for(int i=0; i<n_threads; ++i) {
251 |     spin_until(&(datas[i].phase), phase, t_i);
252 |   }
253 | }
254 | 
255 | int parent_announce_then_spin(ti_data_in *datas, uint64_t phase1, uint64_t phase2, int t_i)
256 | {
257 |   parent_announce(phase1, t_i);
258 | 
259 |   parent_spin(datas, phase2, t_i);
260 | 
261 |   return 0;
262 | }
263 | 


--------------------------------------------------------------------------------
/Assignment_2/tests.h:
--------------------------------------------------------------------------------
  1 | #ifndef TESTS_H
  2 | #define TESTS_H
  3 | 
  4 | #include <sys/time.h>
  5 | 
  6 | #include "util.h"
  7 | 
  8 | // GLOBAL DATA
  9 | 
 10 | extern pthread_spinlock_t pthreads_spinlock;
 11 | extern volatile uint64_t  my_spinlock;
 12 | extern volatile uint64_t  my_spinlock_shared_counter;
 13 | 
 14 | // define a type for ticket
 15 | typedef struct {
 16 |   uint64_t next;
 17 |   uint64_t padding[7];
 18 |   uint64_t owner;
 19 | } ticket_state;
 20 | 
 21 | // declare shared data for ticket
 22 | 
 23 | extern volatile ticket_state my_ticket_lock __attribute__((aligned (64)));
 24 | 
 25 | // define a type for abql_sharing
 26 | typedef struct {
 27 |   uint64_t val;
 28 | } flag_sharing;
 29 | 
 30 | // declare shared data for abql_sharing
 31 | 
 32 | extern volatile uint64_t      queue_last_sharing;
 33 | extern volatile flag_sharing *flags_sharing;
 34 | 
 35 | // TODO define a type for abql_nosharing (alter the definition below)
 36 | typedef struct {
 37 |   uint64_t val;
 38 | } flag_nosharing;
 39 | 
 40 | // declare shared data for abql_nosharing
 41 | 
 42 | extern volatile uint64_t        queue_last_nosharing;
 43 | extern volatile flag_nosharing *flags_nosharing;
 44 | 
 45 | // define a type for mcs_sharing
 46 | typedef struct {
 47 |   uint64_t next;
 48 |   uint64_t locked;
 49 | } mcs_sharing;
 50 | 
 51 | // declare shared data for mcs_sharing
 52 | 
 53 | extern volatile mcs_sharing mcs_global_sharing;
 54 | extern volatile mcs_sharing *mcss_sharing;
 55 | 
 56 | // TODO define a type for mcs_nosharing (alter the definition below)
 57 | typedef struct {
 58 |   uint64_t next;
 59 |   uint64_t locked;
 60 | } mcs_nosharing;
 61 | 
 62 | // declare shared data for mcs_nosharing
 63 | 
 64 | extern volatile mcs_nosharing mcs_global_nosharing __attribute__((aligned (64)));
 65 | extern volatile mcs_nosharing *mcss_nosharing;
 66 | 
 67 | // LOCK VALUES
 68 | 
 69 | #define UNLOCKED  0
 70 | #define LOCKED    1
 71 | 
 72 | #define HAS_LOCK  2
 73 | #define MUST_WAIT 3
 74 | 
 75 | // OPERATIONS
 76 | // must all be unique
 77 | 
 78 | #define PTHREAD_SPIN_LOCK_LOCK_OP       1
 79 | #define SPIN_LOCK_LOCK_OP               2
 80 | #define SPIN_LOCK_INC_OP                3
 81 | #define SPIN_WAIT_LOCK_LOCK_OP          4
 82 | #define SPIN_WAIT_LOCK_INC_OP           5
 83 | #define SPIN_READ_LOCK_LOCK_OP          6
 84 | #define SPIN_READ_LOCK_INC_OP           7
 85 | #define SPIN_TRY_LOCK_LOCK_OP           8
 86 | #define SPIN_TRY_LOCK_INC_OP            9
 87 | #define SPIN_EXPERIMENTAL_LOCK_LOCK_OP 10
 88 | #define SPIN_EXPERIMENTAL_LOCK_INC_OP  11
 89 | #define ABQL_SHARING_LOCK_OP           12
 90 | #define ABQL_SHARING_INC_OP            13
 91 | #define ABQL_NOSHARING_LOCK_OP         14
 92 | #define ABQL_NOSHARING_INC_OP          15
 93 | #define MCS_SHARING_LOCK_OP            16
 94 | #define MCS_SHARING_INC_OP             17
 95 | #define MCS_NOSHARING_LOCK_OP          18
 96 | #define MCS_NOSHARING_INC_OP           19
 97 | #define TICKET_LOCK_OP                 20
 98 | #define TICKET_INC_OP                  21
 99 | 
100 | // PHASES
101 | 
102 | #define INIT_PHASE   0
103 | #define READY_PHASE  1
104 | #define WARMED_PHASE 2
105 | #define OPS_PHASE    3
106 | 
107 | // TESTS
108 | // must be the sequence of natural numbers from 0 to the
109 | // total number of tests - 1.
110 | 
111 | #define SPIN_TRY_LOCK_CORRECTNESS_TEST          0
112 | #define SPIN_LOCK_CORRECTNESS_TEST              1
113 | #define SPIN_WAIT_LOCK_CORRECTNESS_TEST         2
114 | #define SPIN_READ_LOCK_CORRECTNESS_TEST         3
115 | #define SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST 4
116 | #define TICKET_CORRECTNESS_TEST                 5
117 | #define ABQL_SHARING_CORRECTNESS_TEST           6
118 | #define ABQL_NOSHARING_CORRECTNESS_TEST         7
119 | #define MCS_SHARING_CORRECTNESS_TEST            8
120 | #define MCS_NOSHARING_CORRECTNESS_TEST          9
121 | #define PTHREAD_SPIN_LOCK_TEST                 10
122 | #define SPIN_TRY_LOCK_TEST                     11
123 | #define SPIN_LOCK_TEST                         12
124 | #define SPIN_WAIT_LOCK_TEST                    13
125 | #define SPIN_READ_LOCK_TEST                    14
126 | #define SPIN_EXPERIMENTAL_LOCK_TEST            15
127 | #define TICKET_TEST                            16
128 | #define ABQL_SHARING_TEST                      17
129 | #define ABQL_NOSHARING_TEST                    18
130 | #define MCS_SHARING_TEST                       19
131 | #define MCS_NOSHARING_TEST                     20
132 | 
133 | #define PASTE31(x, y, z) x ## y ## z
134 | #define PASTE3(x, y, z) PASTE31(x, y, z)
135 | 
136 | #define PASTE21(x, y) x ## y
137 | #define PASTE2(x, y) PASTE21(x, y)
138 | 
139 | #define STR_VALUE(arg)      #arg
140 | #define FUNCTION_NAME(name) STR_VALUE(name)
141 | 
142 | #define TIME_AND_CHECK(uid, t_i, run, fcall, label, dst, ret) __N3WLN__ \
143 |   do { __N3WLN__ \
144 |     uint64_t (PASTE3(pre,__LINE__,uid)), (PASTE3(post,__LINE__,uid)); __N3WLN__ \
145 |     struct timeval  (PASTE3(tv1,__LINE__,uid)), (PASTE3(tv2,__LINE__,uid)); __N3WLN__ \
146 |     double (PASTE3(secs,__LINE__,uid)); __N3WLN__ \
147 |     unsigned int (PASTE3(ui,__LINE__,uid)); __N3WLN__ \
148 |     printf_parent("%d: run %d: about to call %s [%s]:\n", t_i, run, FUNCTION_NAME(fcall), label); __N3WLN__ \
149 |     gettimeofday(&(PASTE3(tv1,__LINE__,uid)), NULL); __N3WLN__ \
150 |     (PASTE3(pre,__LINE__,uid)) = __rdtscp(&(PASTE3(ui,__LINE__,uid))); __N3WLN__ \
151 |     cmm_barrier(); __N3WLN__ \
152 |     ret = fcall; __N3WLN__ \
153 |     cmm_barrier(); __N3WLN__ \
154 |     (PASTE3(post,__LINE__,uid)) = __rdtscp(&(PASTE3(ui,__LINE__,uid))); __N3WLN__ \
155 |     gettimeofday(&(PASTE3(tv2,__LINE__,uid)), NULL); __N3WLN__ \
156 |     dst = (PASTE3(post,__LINE__,uid)) - (PASTE3(pre,__LINE__,uid)); __N3WLN__ \
157 |     (PASTE3(secs,__LINE__,uid)) = (double) ((PASTE3(tv2,__LINE__,uid)).tv_usec - (PASTE3(tv1,__LINE__,uid)).tv_usec) / 1000000 + (double) ((PASTE3(tv2,__LINE__,uid)).tv_sec - (PASTE3(tv1,__LINE__,uid)).tv_sec); __N3WLN__ \
158 |     printf_parent("%d: run %d: call to %s [%s] took %" PRIu64 " TSC ticks, %f seconds, returned %d\n", t_i, run, FUNCTION_NAME(fcall), label, dst, (PASTE3(secs,__LINE__,uid)), ret); __N3WLN__ \
159 |     if(!ret) { __N3WLN__ \
160 |       printf_parent("%d: run %d: call succeeded (returned 0)\n", t_i, run); __N3WLN__ \
161 |     } __N3WLN__ \
162 |     else { __N3WLN__ \
163 |       printf_parent("%d: run %d: call failed (returned %d)\n", t_i, run, ret); __N3WLN__ \
164 |     } __N3WLN__ \
165 |   } while (0) __N3WLN__ \
166 | 
167 | #endif
168 | 


--------------------------------------------------------------------------------
/Assignment_2/util.c:
--------------------------------------------------------------------------------
 1 | #include "util.h"
 2 | 
 3 | unsigned int next_aff = 0;
 4 | 
 5 | pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
 6 | 
 7 | void set_affinity(void)
 8 | {
 9 | 	cpu_set_t mask;
10 | 	int cpu;
11 | 	int ret;
12 | 
13 | 	ret = pthread_mutex_lock(&affinity_mutex);
14 | 	if (ret) {
15 | 		perror("Error in pthread mutex lock");
16 | 		exit(-1);
17 | 	}
18 | 
19 |   if(next_aff >= n_cpus) {
20 |     perror("Ran out of CPUs, reduce n_threads");
21 |     exit(-1);
22 |   }
23 | 	cpu = next_aff++;
24 | 	ret = pthread_mutex_unlock(&affinity_mutex);
25 | 	if (ret) {
26 | 		perror("Error in pthread mutex unlock");
27 | 		exit(-1);
28 | 	}
29 | 	CPU_ZERO(&mask);
30 | 	CPU_SET(cpu, &mask);
31 | 	sched_setaffinity(0, sizeof(mask), &mask);
32 | #if defined(PRINT_AFFINITIES)
33 |   printf_verbose("set affinity %d\n", cpu);
34 | #endif
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/Assignment_2/util.h:
--------------------------------------------------------------------------------
  1 | #ifndef UTIL_H
  2 | #define UTIL_H
  3 | 
  4 | #define _GNU_SOURCE
  5 | #include <inttypes.h>
  6 | #include <pthread.h>
  7 | #include <sched.h>
  8 | #include <stdint.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #define URCU_INLINE_SMALL_FUNCTIONS
 12 | #include <urcu/arch.h>
 13 | #include <urcu-qsbr.h>
 14 | #include <x86intrin.h>
 15 | 
 16 | #define printf_error(fmt, args...)		\
 17 | 	do { \
 18 | 		printf(fmt, args); fflush(stdout); \
 19 | 	} while (0)
 20 | 
 21 | #define printf_verbose(fmt, args...)		\
 22 | 	do { \
 23 | 		if (PRINT_VERBOSE)	{	printf(fmt, args); fflush(stdout); } \
 24 | 	} while (0)
 25 | 
 26 | #define printf_parent(fmt, args...)		\
 27 | 	do { \
 28 | 		if (PRINT_PARENT)	{	printf(fmt, args); fflush(stdout); } \
 29 | 	} while (0)
 30 | 
 31 | #define printf_worker(fmt, args...)		\
 32 | 	do { \
 33 | 		if (PRINT_WORKER)	{	printf(fmt, args); fflush(stdout); } \
 34 | 	} while (0)
 35 | 
 36 | #define printf_csv(fmt, args...)		\
 37 | 	do { \
 38 | 		if (PRINT_CSV)	{	printf(fmt, args); fflush(stdout); } \
 39 | 	} while (0)
 40 | 
 41 | // If *ptr == old, atomically store new to *ptr and return old.
 42 | // Otherwise, return the value of *ptr without changing it.
 43 | #define lockcmpxchgq(ptr, old, new)         __N3WLN__ \
 44 | ({									                        __N3WLN__ \
 45 |   uint64_t __ret;                           __N3WLN__ \
 46 |   uint64_t __old = (old);                   __N3WLN__ \
 47 |   uint64_t __new = (new);                   __N3WLN__ \
 48 |   volatile uint64_t *__ptr = (ptr);         __N3WLN__ \
 49 |   asm volatile("lock; cmpxchgq %2,%1"       __N3WLN__ \
 50 |               : "=a" (__ret), "+m" (*__ptr) __N3WLN__ \
 51 |               : "r" (__new), "0" (__old)    __N3WLN__ \
 52 |               : "memory");                  __N3WLN__ \
 53 |   __ret;                                    __N3WLN__ \
 54 | })
 55 | 
 56 | // Store new to *ptr, and return the immediately previous value in *ptr's
 57 | // coherence order.  Excerpt from a comment in glibc from
 58 | // nptl/pthread_spin_lock.c: 
 59 | // xchgq usually takes less instructions than
 60 | // lockcmpxchg.  On the other hand,
 61 | // lockcmpxchg potentially generates less bus traffic when the lock is locked.
 62 | #define xchgq(ptr, new)                      __N3WLN__ \
 63 | ({                                           __N3WLN__ \
 64 |   uint64_t __new = (new);                    __N3WLN__ \
 65 |   volatile uint64_t *__ptr = (ptr);          __N3WLN__ \
 66 |   asm volatile ("xchgq %0, %1"               __N3WLN__ \
 67 |                : "=r" (__new), "=m" (*__ptr) __N3WLN__ \
 68 |                : "0" (__new), "m" (*__ptr)   __N3WLN__ \
 69 |                : "memory" );                 __N3WLN__ \
 70 |   __new;                                     __N3WLN__ \
 71 | })
 72 | 
 73 | // atomically add val to *ptr, returning the pre-add value of *ptr.
 74 | #define lockxaddq(ptr, val)                 __N3WLN__ \
 75 | ({                                          __N3WLN__ \
 76 |   uint64_t __val = (val);                   __N3WLN__ \
 77 |   volatile uint64_t *__ptr = (ptr);         __N3WLN__ \
 78 |   asm volatile("lock; xaddq %0, %1"         __N3WLN__ \
 79 |               : "+r" (__val), "+m" (*__ptr) __N3WLN__ \
 80 |               : : "memory" );               __N3WLN__ \
 81 |   __val;                                    __N3WLN__ \
 82 | })
 83 | 
 84 | typedef struct {
 85 |   uint64_t operation;
 86 | //  uint64_t params[4];
 87 | } op;
 88 | 
 89 | #define CACHE_LINE                           8 // 8-byte words in a cache line
 90 | #define L1_DATA_CACHE                     4096 // 8-byte words in the 32KB L1 cache on Ivy Bridge
 91 | #define ACCESS_BUFFER_SIZE (2 * L1_DATA_CACHE)
 92 | 
 93 | typedef struct {
 94 |   int i;
 95 |   int n_ops_i;
 96 |   volatile uint64_t phase;
 97 |   volatile uint64_t my_spinlock_counter;
 98 |   struct drand48_data rand_state;
 99 | //  uint64_t buffer[ACCESS_BUFFER_SIZE];
100 |   uint64_t *buffer;
101 |   uint64_t buffer_cur;
102 | } ti_data_in;
103 | 
104 | void set_affinity(void);
105 | 
106 | extern volatile uint64_t completed_phase;
107 | extern uint64_t n_threads, n_cpus, n_ops, n_runs, n_tests, n_accesses;
108 | extern op **oss;
109 | extern char *test_names[];
110 | extern uint64_t test_on[];
111 | extern uint64_t accessesv[];
112 | extern uint64_t global_buffer[];
113 | 
114 | // On Intel, the busy-wait-nop instruction is called "pause",
115 | // which is actually represented as a nop with the rep prefix.
116 | // On processors before the P4 this behaves as a nop; on P4 and
117 | // later it might do something clever like yield to another
118 | // hyperthread.  In any case, Intel recommends putting one
119 | // of these in a spin lock loop.
120 | #define spin_pause() do { __asm__ __volatile__ ("rep; nop"); } while (0)
121 | #define nop() do { __asm__ __volatile__ ("nop"); } while (0)
122 | 
123 | #define access(i, d, n_words) do { \
124 |   for((i)=0; (i)<(n_words); ++(i), (d)->buffer_cur = ((d)->buffer_cur + 1) % ACCESS_BUFFER_SIZE) { \
125 |     _CMM_STORE_SHARED((d)->buffer[(d)->buffer_cur], 1 + (CMM_LOAD_SHARED((d)->buffer[(d)->buffer_cur]))); \
126 |   } \
127 | } while(0)
128 | 
129 | #if defined(PRINT_SPIN)
130 | #define spin_until(p, phase, t_i) __N3WLN__ \
131 |   do { __N3WLN__ \
132 |   while(CMM_LOAD_SHARED(*p) != phase) { spin_pause(); } __N3WLN__ \
133 |   printf_verbose("%d: finished spin waiting for phase %d\n", t_i, phase); __N3WLN__ \
134 |   fflush(stdout); __N3WLN__ \
135 |   } while (0) __N3WLN__ \
136 | 
137 | #else
138 | #define spin_until(p, phase, t_i) __N3WLN__ \
139 |   do { __N3WLN__ \
140 |   while(CMM_LOAD_SHARED(*p) != phase) { spin_pause(); } __N3WLN__ \
141 |   } while (0) __N3WLN__ \
142 | 
143 | #endif
144 | 
145 | #if defined(PRINT_SPIN)
146 | #define sleep_until(p, phase, t_i) __N3WLN__ \
147 |   do { __N3WLN__ \
148 |   while(CMM_LOAD_SHARED(*p) != phase) { usleep(100); spin_pause(); } __N3WLN__ \
149 |   printf_verbose("%d: finished spin waiting for phase %d\n", t_i, phase); __N3WLN__ \
150 |   fflush(stdout); __N3WLN__ \
151 |   } while (0) __N3WLN__ \
152 | 
153 | #else
154 | #define sleep_until(p, phase, t_i) __N3WLN__ \
155 |   do { __N3WLN__ \
156 |   while(CMM_LOAD_SHARED(*p) != phase) { usleep(100); spin_pause(); } __N3WLN__ \
157 |   } while (0) __N3WLN__ \
158 | 
159 | #endif
160 | 
161 | #endif
162 | 


--------------------------------------------------------------------------------
/Assignment_2/worker.c:
--------------------------------------------------------------------------------
  1 | #include "tests.h"
  2 | #include "util.h"
  3 | #include "worker.h"
  4 | 
  5 | void announce(volatile uint64_t *p, uint64_t phase, int t_i) __attribute__((always_inline));
  6 | void announce_then_spin(volatile uint64_t *p, uint64_t phase, int t_i) __attribute__((always_inline));
  7 | 
  8 | void* ti(void *data) {
  9 |   int                i, m, tryret, ret, run, nt, si, ei, test, accesses;
 10 |   op                 *cur, *start, *end;
 11 |   ti_data_in         *d = (ti_data_in*)data;
 12 |   pthread_spinlock_t pthreads_spinlock_copy;
 13 |   uint64_t           my_spinlock_copy;
 14 |   volatile uint64_t  my_place;
 15 |   op                 op_copy;
 16 |   mcs_sharing        my_mcs_sharing_copy;
 17 |   mcs_nosharing      my_mcs_nosharing_copy;
 18 |   ticket_state        my_ticket_lock_copy;
 19 |   
 20 |   // get this thread running on its own CPU
 21 |   set_affinity();
 22 | 
 23 |   for(test=0; test<n_tests; ++test) {
 24 |     if(!test_on[test]) { continue; }
 25 |     for(run=0; run<n_runs; ++run) {
 26 |       // initialize test data
 27 |       m   = n_ops / n_threads;
 28 |       end = ( d->i == n_threads - 1 ? oss[test] + n_ops : oss[test] + (d->i + 1) * m);
 29 |       switch (test) {
 30 |         case SPIN_TRY_LOCK_CORRECTNESS_TEST:
 31 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 32 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_TRY_LOCK_INC_OP }));
 33 | 					}
 34 |           break;
 35 |         case SPIN_LOCK_CORRECTNESS_TEST:
 36 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 37 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_LOCK_INC_OP }));
 38 | 					}
 39 |           break;
 40 |         case SPIN_WAIT_LOCK_CORRECTNESS_TEST:
 41 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 42 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_WAIT_LOCK_INC_OP }));
 43 | 					}
 44 |           break;
 45 |         case SPIN_READ_LOCK_CORRECTNESS_TEST:
 46 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 47 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_READ_LOCK_INC_OP }));
 48 | 					}
 49 |           break;
 50 |         case SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST:
 51 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 52 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_EXPERIMENTAL_LOCK_INC_OP }));
 53 | 					}
 54 |           break;
 55 |         case ABQL_SHARING_CORRECTNESS_TEST:
 56 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 57 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = ABQL_SHARING_INC_OP }));
 58 | 					}
 59 |           break;
 60 |         case ABQL_NOSHARING_CORRECTNESS_TEST:
 61 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 62 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = ABQL_NOSHARING_INC_OP }));
 63 | 					}
 64 |           break;
 65 |         case MCS_SHARING_CORRECTNESS_TEST:
 66 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 67 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = MCS_SHARING_INC_OP }));
 68 | 					}
 69 |           break;
 70 |         case MCS_NOSHARING_CORRECTNESS_TEST:
 71 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 72 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = MCS_NOSHARING_INC_OP }));
 73 | 					}
 74 |           break;
 75 |         case TICKET_CORRECTNESS_TEST:
 76 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 77 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = TICKET_INC_OP }));
 78 | 					}
 79 |           break;
 80 |         case PTHREAD_SPIN_LOCK_TEST:
 81 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 82 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = PTHREAD_SPIN_LOCK_LOCK_OP }));
 83 | 					}
 84 |           break;
 85 |         case SPIN_TRY_LOCK_TEST:
 86 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 87 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_TRY_LOCK_LOCK_OP }));
 88 | 					}
 89 |           break;
 90 |         case SPIN_LOCK_TEST:
 91 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 92 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_LOCK_LOCK_OP }));
 93 | 					}
 94 |           break;
 95 |         case SPIN_WAIT_LOCK_TEST:
 96 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
 97 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_WAIT_LOCK_LOCK_OP }));
 98 | 					}
 99 |           break;
100 |         case SPIN_READ_LOCK_TEST:
101 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
102 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_READ_LOCK_LOCK_OP }));
103 | 					}
104 |           break;
105 |         case SPIN_EXPERIMENTAL_LOCK_TEST:
106 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
107 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = SPIN_EXPERIMENTAL_LOCK_LOCK_OP }));
108 | 					}
109 |           break;
110 |         case ABQL_SHARING_TEST:
111 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
112 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = ABQL_SHARING_LOCK_OP }));
113 | 					}
114 |           break;
115 |         case ABQL_NOSHARING_TEST:
116 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
117 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = ABQL_NOSHARING_LOCK_OP }));
118 | 					}
119 |           break;
120 |         case MCS_SHARING_TEST:
121 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
122 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = MCS_SHARING_LOCK_OP }));
123 | 					}
124 |           break;
125 |         case MCS_NOSHARING_TEST:
126 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
127 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = MCS_NOSHARING_LOCK_OP }));
128 | 					}
129 |           break;
130 |         case TICKET_TEST:
131 | 					for(cur = oss[test] + d->i * m; cur<end; ++cur) {
132 | 						_CMM_STORE_SHARED(*cur, ((op){ .operation = TICKET_LOCK_OP }));
133 | 					}
134 |           break;
135 |         default: 
136 |           printf("Undefined test %d\n", test);
137 |           break;
138 |       }
139 |       // let the parent know we're done and wait for parent to say that everyone
140 |       // else is done
141 |       announce_then_spin(&d->phase, READY_PHASE, d->i);
142 | 
143 |       // run the test for each number of threads
144 |       for(nt=1; nt<=n_threads; ++nt) {
145 |         for(accesses=0; accesses<n_accesses; ++accesses) {
146 |           if(d->i > nt - 1) {
147 |             // this worker is inactive for this nt, just hang out without consuming
148 |             // a lot of resources
149 | 
150 |             printf_worker("%d: nt is %d, skipping this round\n", d->i, nt);
151 | 
152 |             // no warming to do, say I'm done and wait for parent to say that everyone
153 |             // else is done
154 |             announce(&d->phase, WARMED_PHASE, d->i);
155 |             //sleep_until(&completed_phase, WARMED_PHASE, d->i);
156 |             spin_until(&completed_phase, WARMED_PHASE, d->i);
157 | 
158 |             // no ops to do, say I'm done and wait for parent to say that everyone
159 |             // else is done
160 |             announce(&d->phase, OPS_PHASE, d->i);
161 |             //sleep_until(&completed_phase, OPS_PHASE, d->i);
162 |             spin_until(&completed_phase, OPS_PHASE, d->i);
163 |           }
164 |           else {
165 |             // this worker is active for this nt, let's run some tests!
166 | 
167 |             // per-test-run initialization
168 |             m     = n_ops / nt;
169 |             si    = d->i * m;
170 |             start = oss[test] + si;
171 |             ei    = si + m + ( d->i == nt - 1 ? n_ops - si - m : 0 ) - 1;
172 |             end   = oss[test] + ei;
173 | 
174 |             printf_worker("%d: nt is %d, running ops %d-%d [0x%" PRIx64 "-0x%" PRIx64 "]\n", d->i, nt, si, ei, (uint64_t)start, (uint64_t)end);
175 | 
176 |             // warm the cache
177 |             // TODO does warming the ops help?
178 |             for(cur = start; cur <= end; ++cur) { op_copy = *cur; }
179 | 
180 |             switch (test) {
181 |               case PTHREAD_SPIN_LOCK_TEST:
182 | 				  			pthreads_spinlock_copy = pthreads_spinlock;
183 |                 break;
184 |               case SPIN_TRY_LOCK_CORRECTNESS_TEST:
185 |               case SPIN_LOCK_CORRECTNESS_TEST:
186 |               case SPIN_WAIT_LOCK_CORRECTNESS_TEST:
187 |               case SPIN_READ_LOCK_CORRECTNESS_TEST:
188 |               case SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST:
189 |               case SPIN_TRY_LOCK_TEST:
190 |               case SPIN_LOCK_TEST:
191 |               case SPIN_WAIT_LOCK_TEST:
192 |               case SPIN_READ_LOCK_TEST:
193 |               case SPIN_EXPERIMENTAL_LOCK_TEST:
194 |                 my_spinlock_copy = my_spinlock;
195 |                 break;
196 |               case ABQL_SHARING_CORRECTNESS_TEST:
197 |               case ABQL_SHARING_TEST:
198 |                 for(i=0; i<n_threads; ++i) { my_spinlock_copy = flags_sharing[i].val; }
199 |                 break;
200 |               case ABQL_NOSHARING_CORRECTNESS_TEST:
201 |               case ABQL_NOSHARING_TEST:
202 |                 for(i=0; i<n_threads; ++i) { my_spinlock_copy = flags_nosharing[i].val; }
203 |                 break;
204 |               case MCS_SHARING_CORRECTNESS_TEST:
205 |               case MCS_SHARING_TEST:
206 |                 for(i=0; i<n_threads; ++i) { my_mcs_sharing_copy = mcss_sharing[i]; }
207 |                 break;
208 |               case MCS_NOSHARING_CORRECTNESS_TEST:
209 |               case MCS_NOSHARING_TEST:
210 |                 for(i=0; i<n_threads; ++i) { my_mcs_nosharing_copy = mcss_nosharing[i]; }
211 |                 break;
212 |               case TICKET_CORRECTNESS_TEST:
213 |               case TICKET_TEST:
214 |                 my_ticket_lock_copy = my_ticket_lock;
215 |                 break;
216 |               default:
217 | 				  			printf("Undefined test %d\n", test);
218 | 				  			break;
219 |             }
220 | 
221 |             cmm_smp_mb(); // make sure warming loads happen before announcement
222 | 
223 |             announce_then_spin(&d->phase, WARMED_PHASE, d->i);
224 | 
225 |             // run the ops!
226 |             for(cur = start, i=si; cur <= end; ++cur, ++i) {
227 |               printf_worker("%d: running op %d\n", d->i, i);
228 | 
229 |               // enter critical section
230 |               switch (cur->operation) {
231 |                 case PTHREAD_SPIN_LOCK_LOCK_OP:
232 |                   pthread_spin_lock(&pthreads_spinlock);
233 |                   break;
234 |                 case SPIN_TRY_LOCK_LOCK_OP:
235 |                 case SPIN_TRY_LOCK_INC_OP:
236 |                   tryret = spin_try_lock(&my_spinlock);
237 |                   break;
238 |                 case SPIN_LOCK_LOCK_OP:
239 |                 case SPIN_LOCK_INC_OP:
240 |                   spin_lock(&my_spinlock);
241 |                   break;
242 |                 case SPIN_WAIT_LOCK_LOCK_OP:
243 |                 case SPIN_WAIT_LOCK_INC_OP:
244 |                   spin_wait_lock(&my_spinlock);
245 |                   break;
246 |                 case SPIN_READ_LOCK_LOCK_OP:
247 |                 case SPIN_READ_LOCK_INC_OP:
248 |                   spin_read_lock(&my_spinlock);
249 |                   break;
250 |                 case SPIN_EXPERIMENTAL_LOCK_LOCK_OP:
251 |                 case SPIN_EXPERIMENTAL_LOCK_INC_OP:
252 |                   spin_experimental_lock(&my_spinlock);
253 |                   break;
254 |                 case ABQL_SHARING_LOCK_OP:
255 |                 case ABQL_SHARING_INC_OP:
256 |                   abql_sharing_lock(&my_place, &queue_last_sharing, flags_sharing, n_threads);
257 |                   break;
258 |                 case ABQL_NOSHARING_LOCK_OP:
259 |                 case ABQL_NOSHARING_INC_OP:
260 |                   abql_nosharing_lock(&my_place, &queue_last_nosharing, flags_nosharing, n_threads);
261 |                   break;
262 |                 case MCS_SHARING_LOCK_OP:
263 |                 case MCS_SHARING_INC_OP:
264 |                   mcs_sharing_lock(&mcs_global_sharing, &mcss_sharing[d->i]);
265 |                   break;
266 |                 case MCS_NOSHARING_LOCK_OP:
267 |                 case MCS_NOSHARING_INC_OP:
268 |                   mcs_nosharing_lock(&mcs_global_nosharing, &mcss_nosharing[d->i]);
269 |                   break;
270 |                 case TICKET_LOCK_OP:
271 |                 case TICKET_INC_OP:
272 |                   ticket_lock(&my_ticket_lock);
273 |                   break;
274 |                 default:
275 |                   printf_error("Undefined operation %" PRIu64 "\n", cur->operation);
276 |                   exit(-1);
277 |                   break;
278 |               }
279 | 
280 |               // critical section body
281 | 
282 |               // * simulate work by accessing memory
283 |               access(i, d, accessesv[accesses]);
284 | 
285 |               // * do per-operation critical section work
286 |               switch(cur->operation) {
287 |                 case SPIN_LOCK_INC_OP:
288 |                 case SPIN_WAIT_LOCK_INC_OP:
289 |                 case SPIN_READ_LOCK_INC_OP:
290 |                 case SPIN_EXPERIMENTAL_LOCK_INC_OP:
291 |                 case ABQL_SHARING_INC_OP:
292 |                 case ABQL_NOSHARING_INC_OP:
293 |                 case MCS_SHARING_INC_OP:
294 |                 case MCS_NOSHARING_INC_OP:
295 |                 case TICKET_INC_OP:
296 |                   ++my_spinlock_shared_counter;
297 |                   ++d->my_spinlock_counter;
298 |                   break;
299 |                 case SPIN_TRY_LOCK_INC_OP:
300 |                   if (tryret) {
301 |                     ++my_spinlock_shared_counter;
302 |                     ++d->my_spinlock_counter;
303 |                   }
304 |                   break;
305 |               }
306 | 
307 |               // leave critical section
308 |               switch (cur->operation) {
309 |                 case PTHREAD_SPIN_LOCK_LOCK_OP:
310 |                   pthread_spin_unlock(&pthreads_spinlock);
311 |                   break;
312 |                 case SPIN_LOCK_LOCK_OP:
313 |                 case SPIN_LOCK_INC_OP:
314 |                 case SPIN_WAIT_LOCK_LOCK_OP:
315 |                 case SPIN_WAIT_LOCK_INC_OP:
316 |                 case SPIN_READ_LOCK_LOCK_OP:
317 |                 case SPIN_READ_LOCK_INC_OP:
318 |                 case SPIN_EXPERIMENTAL_LOCK_LOCK_OP:
319 |                 case SPIN_EXPERIMENTAL_LOCK_INC_OP:
320 |                   spin_unlock(&my_spinlock);
321 |                   break;
322 |                 case SPIN_TRY_LOCK_LOCK_OP:
323 |                 case SPIN_TRY_LOCK_INC_OP:
324 |                   if(tryret) { spin_unlock(&my_spinlock); }
325 |                   break;
326 |                 case ABQL_SHARING_LOCK_OP:
327 |                 case ABQL_SHARING_INC_OP:
328 |                   abql_sharing_unlock(&my_place, flags_sharing, n_threads);
329 |                   break;
330 |                 case ABQL_NOSHARING_LOCK_OP:
331 |                 case ABQL_NOSHARING_INC_OP:
332 |                   abql_nosharing_unlock(&my_place, flags_nosharing, n_threads);
333 |                   break;
334 |                 case MCS_SHARING_LOCK_OP:
335 |                 case MCS_SHARING_INC_OP:
336 |                   mcs_sharing_unlock(&mcs_global_sharing, &mcss_sharing[d->i]);
337 |                   break;
338 |                 case MCS_NOSHARING_LOCK_OP:
339 |                 case MCS_NOSHARING_INC_OP:
340 |                   mcs_nosharing_unlock(&mcs_global_nosharing, &mcss_nosharing[d->i]);
341 |                   break;
342 |                 case TICKET_LOCK_OP:
343 |                 case TICKET_INC_OP:
344 |                   ticket_unlock(&my_ticket_lock);
345 |                   break;
346 |               }
347 | 
348 |             }
349 | 
350 |             // I'm done with my ops, say so and wait for parent to say that
351 |             // everyone else is done
352 |             announce_then_spin(&d->phase, OPS_PHASE, d->i);
353 |           }
354 |         }
355 |       }
356 |     }
357 |     announce_then_spin(&d->phase, INIT_PHASE, d->i);
358 |   }
359 | 
360 |   return data;
361 | }
362 | 
363 | // spin_try_lock
364 | int spin_try_lock(volatile uint64_t *lock) {
365 |   // TODO
366 | }
367 | 
368 | // spin_lock
369 | void spin_lock(volatile uint64_t *lock) {
370 |   // TODO
371 | }
372 | 
373 | // spin_wait_lock
374 | void spin_wait_lock(volatile uint64_t *lock) {
375 |   // TODO
376 | }
377 | 
378 | // spin_read_lock
379 | void spin_read_lock(volatile uint64_t *lock) {
380 |  // TODO
381 | }
382 | 
383 | // spin_experimental_lock
384 | void spin_experimental_lock(volatile uint64_t *lock) {
385 |  // TODO
386 | }
387 |     
388 | 
389 | void spin_unlock(volatile uint64_t *lock) {
390 |   // TODO
391 | }
392 | 
393 | // BEGIN ASSIGNMENT SECTION
394 | 
395 | void ticket_lock(volatile ticket_state *lock) {
396 |   // TODO
397 | }
398 | 
399 | void ticket_unlock(volatile ticket_state *lock) {
400 |   // TODO
401 | }
402 | 
403 | void abql_sharing_lock(volatile uint64_t *my_place, volatile uint64_t *queue_last, 
404 |                        volatile flag_sharing *flags, uint64_t n_threads) {
405 |   // TODO
406 | }
407 | 
408 | void abql_nosharing_lock(volatile uint64_t *my_place, volatile uint64_t *queue_last, 
409 |                          volatile flag_nosharing *flags, uint64_t n_threads) {
410 |   // TODO
411 | }
412 | 
413 | void abql_sharing_unlock(volatile uint64_t *my_place, volatile flag_sharing *flags, uint64_t n_threads) {
414 |   // TODO
415 | }
416 | 
417 | void abql_nosharing_unlock(volatile uint64_t *my_place, volatile flag_nosharing *flags, uint64_t n_threads) {
418 |   // TODO
419 | }
420 | 
421 | void mcs_sharing_lock(volatile mcs_sharing *global_lock, volatile mcs_sharing *local_lock) {
422 |   // TODO
423 | }
424 | 
425 | void mcs_nosharing_lock(volatile mcs_nosharing *global_lock, volatile mcs_nosharing *local_lock) {
426 |   // TODO
427 | }
428 | 
429 | void mcs_sharing_unlock(volatile mcs_sharing *global_lock, volatile mcs_sharing *local_lock) {
430 |   // TODO
431 | }
432 | 
433 | void mcs_nosharing_unlock(volatile mcs_nosharing *global_lock, volatile mcs_nosharing *local_lock) 
434 | {
435 |   // TODO
436 | }
437 | 
438 | // END ASSIGNMENT SECTION
439 | 
440 | void announce(volatile uint64_t *p, uint64_t phase, int t_i)
441 | {
442 |   _CMM_STORE_SHARED(*p, phase);
443 | 
444 |   cmm_smp_mb(); // necessary?
445 | 
446 |   printf_worker("%d: announced finished with phase %" PRIu64 "\n", t_i, phase);
447 | }
448 | 
449 | void announce_then_spin(volatile uint64_t *p, uint64_t phase, int t_i)
450 | {
451 |   announce(p, phase, t_i);
452 |   spin_until(&completed_phase, phase, t_i);
453 | }
454 | 
455 | 


--------------------------------------------------------------------------------
/Assignment_2/worker.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORKER_H
 2 | #define WORKER_H
 3 | 
 4 | #include "util.h"
 5 | 
 6 | void* ti(void *data);
 7 | 
 8 | int spin_try_lock(volatile uint64_t *lock) __attribute__((always_inline));
 9 | void spin_lock(volatile uint64_t *lock) __attribute__((always_inline));
10 | void spin_wait_lock(volatile uint64_t *lock) __attribute__((always_inline));
11 | void spin_read_lock(volatile uint64_t *lock) __attribute__((always_inline));
12 | void spin_unlock(volatile uint64_t *lock) __attribute__((always_inline));
13 | void spin_experimental_lock(volatile uint64_t *lock) __attribute__((always_inline));
14 | void abql_sharing_lock(volatile uint64_t *my_place, volatile uint64_t *queue_last, 
15 |                        volatile flag_sharing *flags, uint64_t n_threads) __attribute__((always_inline));
16 | void abql_sharing_unlock(volatile uint64_t *my_place, volatile flag_sharing *flags, 
17 |                          uint64_t n_threads) __attribute__((always_inline));
18 | void abql_nosharing_lock(volatile uint64_t *my_place, volatile uint64_t *queue_last, 
19 |                          volatile flag_nosharing *flags, uint64_t n_threads) __attribute__((always_inline));
20 | void abql_nosharing_unlock(volatile uint64_t *my_place, volatile flag_nosharing *flags, 
21 |                            uint64_t n_threads) __attribute__((always_inline));
22 | void mcs_sharing_lock(volatile mcs_sharing *global_lock, volatile mcs_sharing *local_lock) __attribute__((always_inline));
23 | void mcs_nosharing_lock(volatile mcs_nosharing *global_lock, volatile mcs_nosharing *local_lock) __attribute__((always_inline));
24 | void mcs_sharing_unlock(volatile mcs_sharing *global_lock, volatile mcs_sharing *local_lock) __attribute__((always_inline));
25 | void mcs_nosharing_unlock(volatile mcs_nosharing *global_lock, volatile mcs_nosharing *local_lock) __attribute__((always_inline));
26 | void ticket_lock(volatile ticket_state *lock) __attribute__((always_inline));
27 | void ticket_unlock(volatile ticket_state *lock) __attribute__((always_inline));
28 | #endif
29 | 


--------------------------------------------------------------------------------
/Assignment_3/DEBUG.md:
--------------------------------------------------------------------------------
  1 | # A brief primer on debugging
  2 | 
  3 | A variety of strange things can happen as a result of bugs in these implementations.
  4 | Often, I find that I've made a mistake somewhere, and the test harness hangs in the 
  5 | middle of a particular run, cores spinning at 100%, but no progress being made.
  6 | I've found it helpful to build with debugging symbols, and run the test harness with `gdb`
  7 | so I can inspect the program state when it gets stuck.
  8 | 
  9 | ## Walkthrough
 10 | 
 11 | To get this set up, uncomment the line
 12 | ```make
 13 | #DEBUG           = TRUE
 14 | ```
 15 | in `Makefile`, then rebuild with `make`. 
 16 | 
 17 | NOTE: Your code will sometimes have _different performance characteristics_
 18 | when built this way.  Once you've resolved or learned what you needed from
 19 | debugging, remember to comment that line back out and run `make` again before
 20 | benchmarking.
 21 | 
 22 | Next, run the test harness attached to gdb, e.g.
 23 | ```bash
 24 | gdb --args ./test 4 28 8 1 100
 25 | ```
 26 | 
 27 | When the test harness hits the loop, stop the program and return control to
 28 | `gdb` with a `SIGINT` (`CTRL-c`).  Check out your threads with
 29 | ```gdb
 30 | info threads
 31 | ```
 32 | select one that looks like it's running your code, and switch to it, e.g. for thread 2
 33 | ```gdb
 34 | t 2
 35 | ```
 36 | and see where we are in the code.
 37 | ```gdb
 38 | list
 39 | ```
 40 | NOTE: This listing has macros expanded, and expansions often look very
 41 | different from the macro name.  Sometimes this has the benefit of helping you
 42 | figure out exactly where the program counter is in an expansion of a multi-line
 43 | macro, other times it just makes it hard to read the code.  Perhaps a future
 44 | assignment will include a way to turn this off :)
 45 | 
 46 | Once we know where the program counter is (which must be code along the path of
 47 | the infinite loop, if this thread is in fact looping forever), it might be 
 48 | helpful to print the values of some variables in scope, e.g. for `ticket_lock()`
 49 | ```gdb
 50 | p *lock
 51 | ```
 52 | ```gdb
 53 | p my_number
 54 | ```
 55 | or for `abql_nosharing_lock()`
 56 | ```gdb
 57 | p *my_place
 58 | ```
 59 | ```gdb
 60 | p flags[0]
 61 | ```
 62 | 
 63 | Usually I do this, and then stare at the implementation for a while in my editor, trying
 64 | to figure out how the state ended up this way.
 65 | 
 66 | ## Other things to do
 67 | 
 68 | ### Inspect frames of calling functions
 69 | 
 70 | If you want to inspect values in scope in a function call further up the stack,
 71 | you can check out the stack,
 72 | ```gdb
 73 | bt
 74 | ```
 75 | pick a different frame to inspect, and load it, e.g. for frame 2
 76 | ```
 77 | f 2
 78 | ```
 79 | 
 80 | And print some variables in scope in the calling function.
 81 | 
 82 | ### Breakpoints/Watchpoints and Invariants
 83 | 
 84 | You can set breakpoints and watchpoints, and do other GDB things more or less
 85 | as usual.  If you are feeling paranoid about the impact of making a system call
 86 | when you hit a watchpoint/breakpoint, or just can check what you want to more
 87 | easily in C, you can add a runtime invariant that loops forever if violated to
 88 | the code.
 89 | 
 90 | Instead of a breakpoint:
 91 | ```c
 92 | while(1) {}
 93 | ```
 94 | 
 95 | Instead of a watchpoint:
 96 | ```c
 97 | if(!invariant) { while(1) {} }
 98 | ```
 99 | 
100 | Run it through `gdb` and see if any threads get stuck in this particular loop.
101 | If one did, you can print some stuff in scope at that line, and figure out how
102 | the invariant was violated.  The disadvantage of this approach, of course, is
103 | that it's harder to resume execution afterwards (if you care about that).
104 | 


--------------------------------------------------------------------------------
/Assignment_3/Makefile:
--------------------------------------------------------------------------------
 1 | # TUNABLE PARAMETERS
 2 | #DEBUG           = TRUE
 3 | PRINT_WORKER    = 0
 4 | PRINT_PARENT    = 1
 5 | PRINT_VERBOSE   = 0
 6 | PRINT_CSV       = 1
 7 | 
 8 | WARN            = -Wno-attributes -Wno-format-extra-args -Wno-implicit-function-declaration
 9 | CPUS            = $(shell grep 'processor[[:space:]]*:' /proc/cpuinfo | wc -l)
10 | JOBS            = $(shell echo "$(CPUS) - 1" | bc)
11 | MAKEFLAGS      += "-j $(JOBS)"
12 | CLEAN_COMMENTS  = grep -v '^\# [[:digit:]]\+ '
13 | INSERT_NEWLINES = sed 's/__N3WLN__/\n/g'
14 | SKIP_NEWLINES   = sed 's/__N3WLN__//g'
15 | REINDENT        = echo -e "G=gg\n:wq\n" | vim 
16 | CCOPTS1         = -pthread -I/u/theod/builds/include -DPRINT_WORKER=$(PRINT_WORKER) -DPRINT_PARENT=$(PRINT_PARENT) -DPRINT_VERBOSE=$(PRINT_VERBOSE) -DPRINT_CSV=$(PRINT_CSV)
17 | # turn off all compiler optimizations
18 | #CCOPTS1        += -O0
19 | # use locally built urcu library instead of system-wide
20 | #CCOPTS2         = -L/u/theod/builds/lib -lurcu-qsbr 
21 | CCOPTS2         = -lurcu-qsbr 
22 | 
23 | ifeq ($(DEBUG),TRUE)
24 |   CC1           = nice gcc -E $(CCOPTS1) $(WARN)
25 |   CC2           = nice gcc -g $(CCOPTS1) $(WARN)
26 | else
27 |   CC            = nice gcc $(CCOPTS1) $(WARN) -D__N3WLN__=
28 | endif
29 | 
30 | OBJS            = util.o worker.o tests.o
31 | 
32 | # make makes the first goal the default, you can override this with
33 | #.DEFAULT_GOAL := all
34 | all: test
35 | 
36 | util: util.h util.c
37 | ifeq ($(DEBUG),TRUE)
38 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
39 | 	#$(REINDENT) $@CPP.c
40 | 	$(CC2) -xc -c -o $@.o $@CPP.c
41 | else
42 | 	$(CC) $@.c -c -o $@.o
43 | endif
44 | 
45 | worker: worker.c worker.h tests.h util
46 | ifeq ($(DEBUG),TRUE)
47 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
48 | 	#$(REINDENT) $@CPP.c
49 | 	$(CC2) -xc -c -o $@.o $@CPP.c
50 | else
51 | 	$(CC) $@.c -c -o $@.o
52 | endif
53 | 
54 | tests: util tests.c tests.h
55 | ifeq ($(DEBUG),TRUE)
56 | 	$(CC1) $@.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > $@CPP.c 
57 | 	#$(REINDENT) $@CPP.c
58 | 	$(CC2) -xc -c -o $@.o $@CPP.c
59 | else
60 | 	$(CC) $@.c -c -o $@.o
61 | endif
62 | 
63 | test: main.c util worker tests
64 | ifeq ($(DEBUG),TRUE)
65 | 	$(CC1) main.c | $(CLEAN_COMMENTS) | $(INSERT_NEWLINES) > mainCPP.c
66 | 	#$(REINDENT) $@CPP.c
67 | 	$(CC2) -o test mainCPP.c $(OBJS) $(CCOPTS2)
68 | else
69 | 	$(CC) -o test main.c $(OBJS) $(CCOPTS2)
70 | endif
71 | 
72 | .PHONY: clean
73 | clean: 
74 | 	rm -f $(OBJS) $(wildcard *CPP.c) test
75 | 


--------------------------------------------------------------------------------
/Assignment_3/adding_a_test.md:
--------------------------------------------------------------------------------
 1 | 1.  Update test_names and increment n_tests in tests.c.
 2 | 2.  Add any applicable new operations (with unique numbers) under "// OPERATIONS" in tests.h.
 3 | 3.  Add a test (with a unique number) under "// TESTS" in tests.h.
 4 | 4.  Add declarations for any applicable global data in tests.h under "// GLOBAL DATA" and initialize as needed under "// GLOBAL DATA" in tests.c, (or in tests_multi() in tests.c, if the initialization depends on runtime parameters).
 5 | 5.  Add any new per-thread data to ti_data_in in util.h
 6 | 6.  Add a test case under "// initialize test data" in worker.c to initialize the test data in parallel.
 7 | 7.  Add any needed per-test-run initialization under "// per-test-run initialization" in worker.c.
 8 | 8.  Add a test case under "// warm the cache" in worker.c.
 9 | 9.  Add cases for new operations under "// enter critical section" in worker.c.
10 | 10. Add cases for new operations under "// critical section body" (if applicable) in worker.c.
11 | 11. Add cases for new operations under "// leave critical section" in worker.c.
12 | 12. If parent needs to do some checking/work after each run, add a test case under "// if applicable, check results of run and reset test state" in tests.c
13 | 
14 | 


--------------------------------------------------------------------------------
/Assignment_3/bench:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 4 ]; then
 6 |   echo "Usage: ./bench MAX_THREADS N_OPS N_RUNS N_POOL_LINES"
 7 |   exit 1
 8 | fi
 9 |   
10 | MAX_THREADS="$1"
11 | N_OPS="$2"
12 | N_RUNS="$3"
13 | N_POOL_LINES="$4"
14 | 
15 | mkdir -p data
16 | 
17 | # reserve at least 2 CPUs for other users
18 | CPUS=$(echo "$(grep -c 'processor' /proc/cpuinfo) - 2" | bc)
19 | 
20 | if [ $MAX_THREADS -gt $CPUS ]; then
21 |   echo "Too many threads, we only have $CPUS CPUs" >&2
22 |   exit 1
23 | fi
24 | 
25 | FILE="bench_t${MAX_THREADS}_$(date +%F_%H-%M-%S).csv"
26 | DATA="data/${FILE}"
27 | 
28 | time ./test $MAX_THREADS $CPUS $N_OPS $N_RUNS $N_POOL_LINES | grep -o '[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*,[^ ,]*' > ${DATA}
29 | 
30 | # generate graphs
31 | Rscript graph.r ${DATA}
32 | 


--------------------------------------------------------------------------------
/Assignment_3/graph.r:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(scales)
 3 | 
 4 | args                 <- commandArgs(TRUE)
 5 | name                 <- args[1]
 6 | #name                 <- "data/test_my_spinlock.csv"
 7 | d                    <- read.csv(file=name,head=TRUE,sep=",")
 8 | d$ticks_per_thread   <- d$ticks / d$nt
 9 | d$nt                 <- factor(d$nt)
10 | 
11 | #tpt_by_test_and_nt   <- with(d, aggregate(ticks_per_thread ~ test + nt, d, median))
12 | ticks_by_test_and_nt <- with(d, aggregate(ticks ~ test + nt + critical_section_accesses, d, median))
13 | 
14 | n_breaks             <- 40
15 | 
16 | is_graphable <- function(s) { return (! grepl("_nograph", s)) }
17 | 
18 | gen_graph <- function(data, xc, yc, f, name, n_breaks, lbs){
19 |   data <- subset(data, f(test))
20 |   if(nrow(data) == 0) {
21 |     print(paste0("No data for ",name,", not generating a graph."))
22 |     return ()
23 |   }
24 |   ggplot(data=data, aes_string(xc, yc, group="test")) + 
25 |     geom_line (data=data, aes_string(colour="test")) + 
26 |     geom_point(data=data, aes_string(shape="test",colour="test")) + 
27 |     scale_y_continuous(breaks = pretty_breaks(n_breaks)) +
28 |     lbs
29 |     
30 |   path <- paste0(name,".pdf")
31 |   print(paste0("generating ", path))
32 |   ggsave(path, width=16, height=9, units="in", limitsize=FALSE)
33 | }
34 | 
35 | #gen_graph(subset(tpt_by_test_and_nt, critical_section_accesses == 0), "nt", "ticks_per_thread", is_graphable, paste0(name, "_tpt"), n_breaks,
36 | #         labs(title = "Per-thread Throughput vs Number of Threads (no memory accesses during critical sections)",
37 | #              x     = "Number of Threads",
38 | #              y     = "Total duration / Number of Threads"))
39 | 
40 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 0), "nt", "ticks", is_graphable, paste0(name, "_ticks_00accesses"), n_breaks, 
41 |          labs(title = "Test Duration vs Number of Threads (empty critical section)",
42 |               x     = "Number of Threads",
43 |               y     = "Test Duration"))
44 | 
45 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 8), "nt", "ticks", is_graphable, paste0(name, "_ticks_08accesses"), n_breaks, 
46 |          labs(title = "Test Duration vs Number of Threads (1 cache line updated during critical section)",
47 |               x     = "Number of Threads",
48 |               y     = "Test Duration"))
49 | 
50 | gen_graph(subset(ticks_by_test_and_nt, critical_section_accesses == 80), "nt", "ticks", is_graphable, paste0(name, "_ticks_80accesses"), n_breaks, 
51 |          labs(title = "Test Duration vs Number of Threads (10 cache lines updated during critical section)",
52 |               x     = "Number of Threads",
53 |               y     = "Test Duration"))
54 | 


--------------------------------------------------------------------------------
/Assignment_3/main.c:
--------------------------------------------------------------------------------
 1 | #include <inttypes.h>
 2 | #include <stdio.h>
 3 | 
 4 | #include "tests.h"
 5 | #include "util.h"
 6 | 
 7 | uint64_t n_threads, n_cpus, n_runs, n_ops, n_pool_lines; 
 8 | 
 9 | void show_usage(int argc, char **argv)
10 | {
11 | 	printf("Usage : %s N_THREADS N_CPUS N_OPS N_RUNS N_POOL_LINES\n", argv[0]);
12 | }
13 | 
14 | int main(int argc, char** argv) {
15 |   int err, ret;
16 | 
17 | 	if (argc != 6) {
18 | 		show_usage(argc, argv);
19 | 		return -1;
20 | 	}
21 | 
22 | 	err = sscanf(argv[1], "%" PRIu64, &n_threads);
23 | 	if (err != 1) {
24 | 		show_usage(argc, argv);
25 | 		return -1;
26 | 	}
27 | 
28 | 	err = sscanf(argv[2], "%" PRIu64, &n_cpus);
29 | 	if (err != 1) {
30 | 		show_usage(argc, argv);
31 | 		return -1;
32 | 	}
33 | 
34 | 	err = sscanf(argv[3], "%" PRIu64, &n_ops);
35 | 	if (err != 1) {
36 | 		show_usage(argc, argv);
37 | 		return -1;
38 | 	}
39 | 
40 | 	err = sscanf(argv[4], "%" PRIu64, &n_runs);
41 | 	if (err != 1) {
42 | 		show_usage(argc, argv);
43 | 		return -1;
44 | 	}
45 | 
46 | 	err = sscanf(argv[5], "%" PRIu64, &n_pool_lines);
47 | 	if (err != 1) {
48 | 		show_usage(argc, argv);
49 | 		return -1;
50 | 	}
51 | 
52 |   if (n_ops < n_threads) {
53 |     printf("error: N_OPS < N_THREADS.  We need at least 1 operation per thread.\n");
54 |     return -1;
55 |   }
56 | 
57 | 
58 |   //tests_single();
59 | 
60 |   tests_multi();
61 | 
62 |   return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/Assignment_3/run:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 4 ]; then
 6 |   echo "Usage: ./bench MAX_THREADS N_OPS N_RUNS N_POOL_LINES"
 7 |   exit 1
 8 | fi
 9 |   
10 |  
11 | MAX_THREADS="$1"
12 | N_OPS="$2"
13 | N_RUNS="$3"
14 | N_POOL_LINES="$4"
15 | 
16 | # reserve at least 2 CPUs for other users
17 | CPUS=$(echo "$(grep -c 'processor' /proc/cpuinfo) - 2" | bc)
18 | 
19 | time ./test $MAX_THREADS $CPUS $N_OPS $N_RUNS $N_POOL_LINES
20 | 


--------------------------------------------------------------------------------
/Assignment_3/tests.c:
--------------------------------------------------------------------------------
  1 | #include <pthread.h>
  2 | 
  3 | #include "tests.h"
  4 | #include "util.h"
  5 | #include "worker.h"
  6 | 
  7 | void parent_announce(uint64_t phase, int t_i) __attribute__((always_inline));
  8 | void parent_spin(ti_data_in *datas, uint64_t phase, int t_i) __attribute__((always_inline));
  9 | int parent_announce_then_spin(ti_data_in *datas, uint64_t phase1, uint64_t phase2, int t_i) __attribute__((always_inline));
 10 | 
 11 | // GLOBAL DATA (statically determined initial values)
 12 | 
 13 | pthread_spinlock_t       pthreads_spinlock;
 14 | volatile uint64_t        my_spinlock                = UNLOCKED;
 15 | volatile uint64_t        my_spinlock_shared_counter = 0;
 16 | volatile uint64_t        completed_phase            = INIT_PHASE;
 17 | uint64_t                 global_buffer[ACCESS_BUFFER_SIZE];
 18 | uint64_t                *global_dequeues            = NULL;
 19 | ti_data_in              *global_datas               = NULL;
 20 | pool_meta               *global_pool_meta           = NULL;
 21 | uint64_t                *global_pool_data           = NULL;
 22 | 
 23 | // statically initialize global data for ticket
 24 | 
 25 | volatile ticket_state    my_ticket_lock             = (ticket_state){ .next = 0, .owner = 0};
 26 | 
 27 | // statically initialize global data for abql_sharing
 28 | 
 29 | volatile uint64_t        queue_last_sharing         = 0;
 30 | volatile flag_sharing   *flags_sharing              = NULL; // filled in in tests_multi()
 31 | 
 32 | // statically initialize global data for abql_nosharing
 33 | 
 34 | volatile uint64_t        queue_last_nosharing       = 0;
 35 | volatile flag_nosharing *flags_nosharing            = NULL; // filled in in tests_multi()
 36 | 
 37 | // statically initialize global data for mcs_sharing
 38 | volatile mcs_sharing     mcs_global_sharing         = (mcs_sharing){ .next = 0, .locked = UNLOCKED };
 39 | volatile mcs_sharing    *mcss_sharing               = NULL; // filled in in tests_multi()
 40 | 
 41 | // statically initialize global data for mcs_nosharing
 42 | 
 43 | volatile mcs_nosharing   mcs_global_nosharing       = (mcs_nosharing){ .next = 0, .locked = UNLOCKED };
 44 | volatile mcs_nosharing  *mcss_nosharing             = NULL; // filled in in tests_multi()
 45 | 
 46 | // TODO statically initialize global data for coarse_queue (change if you'd
 47 | // like to use a lock that's represented by something besides a word
 48 | // initialized to UNLOCKED)
 49 | 
 50 | volatile coarse_queue    my_coarse_queue            = (coarse_queue){ .head = 0, .tail = 0, .lock = UNLOCKED };
 51 | 
 52 | // statically initialize global data for nb_queue
 53 | 
 54 | volatile nb_queue        my_nb_queue                = (nb_queue){ .head = 0, .tail = 0 };
 55 | 
 56 | uint64_t n_tests = 27;
 57 | char *test_names[] = { "spin_try_lock_correctness_nograph"
 58 |                      , "spin_lock_correctness_nograph"
 59 |                      , "spin_wait_lock_correctness_nograph"
 60 |                      , "spin_read_lock_correctness_nograph"
 61 |                      , "spin_experimental_lock_correctness_nograph"
 62 |                      , "ticket_correctness_nograph"
 63 |                      , "abql_sharing_correctness_nograph"
 64 |                      , "abql_nosharing_correctness_nograph"
 65 |                      , "mcs_sharing_correctness_nograph"
 66 |                      , "mcs_nosharing_correctness_nograph"
 67 |                      , "coarse_queue_correctness_nograph"
 68 |                      , "nb_queue_correctness_nograph"
 69 | 
 70 |                      , "pthread_spin_lock"
 71 | 
 72 |                      , "spin_try_lock_nograph"
 73 |                      , "spin_lock" 
 74 |                      , "spin_wait_lock"
 75 |                      , "spin_read_lock"
 76 |                      , "spin_experimental_lock"
 77 |                      , "ticket_lock"
 78 |                      , "abql_sharing_lock"
 79 |                      , "abql_nosharing_lock"
 80 |                      , "mcs_sharing_lock"
 81 |                      , "mcs_nosharing_lock"
 82 |                      , "coarse_queue"
 83 |                      , "coarse_queue_enq"
 84 |                      , "nb_queue"
 85 |                      , "nb_queue_enq"
 86 |                      };
 87 | 
 88 | // skip tests with 0s, each value corresponds to the test in the same position
 89 | // in test_names above
 90 | uint64_t test_on[] = { 0 // spin_try_lock_correctness_nograph
 91 |                      , 0 // spin_lock_correctness_nograph
 92 |                      , 0 // spin_wait_lock_correctness_nograph
 93 |                      , 0 // spin_read_lock_correctness_nograph
 94 |                      , 0 // spin_experimental_lock_correctness_nograph
 95 |                      , 0 // ticket_correctness_nograph
 96 |                      , 0 // abql_sharing_correctness_nograph
 97 |                      , 0 // abql_nosharing_correctness_nograph
 98 |                      , 0 // mcs_sharing_correctness_nograph
 99 |                      , 0 // mcs_nosharing_correctness_nograph
100 |                      , 1 // coarse_queue_correctness_nograph
101 |                      , 0 // nb_queue_correctness_nograph
102 | 
103 |                      , 0 // pthread_spin_lock
104 | 
105 |                      , 0 // spin_try_lock_nograph
106 |                      , 0 // spin_lock 
107 |                      , 0 // spin_wait_lock
108 |                      , 0 // spin_read_lock
109 |                      , 0 // spin_experimental_lock
110 |                      , 0 // ticket_lock
111 |                      , 0 // abql_sharing_lock
112 |                      , 0 // abql_nosharing_lock
113 |                      , 0 // mcs_sharing_lock
114 |                      , 0 // mcs_nosharing_lock
115 |                      , 0 // coarse_queue
116 |                      , 0 // coarse_queue_enq
117 |                      , 0 // nb_queue
118 |                      , 0 // nb_queue_enq
119 |                      };
120 | 
121 | uint64_t accesses_on[] = { 1 // spin_try_lock_correctness_nograph
122 |                          , 1 // spin_lock_correctness_nograph
123 |                          , 1 // spin_wait_lock_correctness_nograph
124 |                          , 1 // spin_read_lock_correctness_nograph
125 |                          , 1 // spin_experimental_lock_correctness_nograph
126 |                          , 1 // ticket_correctness_nograph
127 |                          , 1 // abql_sharing_correctness_nograph
128 |                          , 1 // abql_nosharing_correctness_nograph
129 |                          , 1 // mcs_sharing_correctness_nograph
130 |                          , 1 // mcs_nosharing_correctness_nograph
131 |                          , 0 // coarse_queue_correctness_nograph
132 |                          , 0 // nb_queue_correctness_nograph
133 | 
134 |                          , 1 // pthread_spin_lock
135 | 
136 |                          , 1 // spin_try_lock_nograph
137 |                          , 1 // spin_lock 
138 |                          , 1 // spin_wait_lock
139 |                          , 1 // spin_read_lock
140 |                          , 1 // spin_experimental_lock
141 |                          , 1 // ticket_lock
142 |                          , 1 // abql_sharing_lock
143 |                          , 1 // abql_nosharing_lock
144 |                          , 1 // mcs_sharing_lock
145 |                          , 1 // mcs_nosharing_lock
146 |                          , 0 // coarse_queue
147 |                          , 0 // coarse_queue_enq
148 |                          , 0 // nb_queue
149 |                          , 0 // nb_queue_enq
150 |                          };
151 | op **oss = NULL;
152 | uint64_t n_accesses = 3;
153 | uint64_t accessesv[] = { 0    // no accesses
154 |                        , CACHE_LINE    
155 |                        , 10 * CACHE_LINE
156 |                        //, L1_DATA_CACHE 
157 |                        };
158 | 
159 | uint64_t coarse_count(volatile coarse_queue *queue) { 
160 |   uint64_t n = 0;
161 |   uint64_t head = CMM_LOAD_SHARED(queue->head);
162 | 
163 |   if(head != 0) {
164 |     while(head != 0) {
165 |       ++n;
166 |       head = CMM_LOAD_SHARED(((coarse_node*)get_qptr(head))->next);
167 |     }
168 |   }
169 | 
170 |   return n;
171 | }
172 | 
173 | uint64_t nb_count(volatile nb_queue *queue) { 
174 |   uint64_t n = 0;
175 |   uint64_t head = CMM_LOAD_SHARED(queue->head);
176 | 
177 |   if(head != 0) {
178 |     while(head != 0) {
179 |       ++n;
180 |       head = CMM_LOAD_SHARED(((nb_node*)get_qptr(head))->next);
181 |     }
182 |     --n; // account for dummy node
183 |   }
184 | 
185 |   return n;
186 | }
187 | 
188 | void tests_multi() {
189 |   int i,j,k,all_ops,test;
190 |   pthread_t ts[n_threads];
191 |   ti_data_in datas[n_threads];
192 |   global_datas = datas;
193 |   oss = malloc(sizeof(op*) * n_tests);
194 |   for(test=0; test<n_tests; ++test) { oss[test] = malloc(sizeof(op) * n_ops); }
195 |   global_dequeues = (uint64_t*)malloc(sizeof(uint64_t) * n_ops); // probably just need half of this
196 |   op *cur, *cur1;
197 |   uint64_t tsc,sum;
198 |   unsigned int status;
199 |   int nt,o,ret,run,accesses;
200 |   uint64_t total_enqueues, total_dequeues, remaining_in_queue;
201 | 
202 |   // GLOBAL DATA (dynamically determined initial values)
203 | 
204 |   // initialize allocation pool
205 |   global_pool_meta = malloc(n_pool_lines * sizeof(pool_meta));
206 |   // 8 bytes per word
207 |   global_pool_data = aligned_alloc(CACHE_LINE_BYTES, n_pool_lines * CACHE_LINE_BYTES);
208 | 
209 |   // TODO declare and initialize data for abql_sharing
210 |   volatile flag_sharing flags_local_sharing[n_threads];
211 |   flags_local_sharing[0].val = HAS_LOCK;
212 |   for(i=1; i<n_threads; ++i) { flags_local_sharing[i].val = MUST_WAIT; }
213 |   flags_sharing = flags_local_sharing;
214 | 
215 |   // TODO declare and initialize data for abql_nosharing
216 |   volatile flag_nosharing flags_local_nosharing[n_threads] __attribute__((aligned (CACHE_LINE_BYTES)));
217 |   flags_local_nosharing[0].val = HAS_LOCK;
218 |   for(i=1; i<n_threads; ++i) { flags_local_nosharing[i].val = MUST_WAIT; }
219 |   flags_nosharing = flags_local_nosharing;
220 | 
221 |   // TODO declare and initialize array for mcs with sharing
222 |   volatile mcs_sharing mcss_local_sharing[n_threads];
223 |   for(i=0; i<n_threads; ++i) {
224 |     mcss_local_sharing[i].next = 0;
225 |     mcss_local_sharing[i].locked = UNLOCKED;
226 |   }
227 |   mcss_sharing = mcss_local_sharing;
228 | 
229 |   // TODO declare and initialize array for mcs with nosharing
230 |   volatile mcs_nosharing mcss_local_nosharing[n_threads];
231 |   for(i=0; i<n_threads; ++i) {
232 |     mcss_local_nosharing[i].next = 0;
233 |     mcss_local_nosharing[i].locked = UNLOCKED;
234 |   }
235 |   mcss_nosharing = mcss_local_nosharing;
236 | 
237 |   // header for csv
238 |   printf_csv("ops,test,run,nt,critical_section_accesses,ticks\n","");
239 | 
240 |   pthread_spin_init(&pthreads_spinlock, 0);
241 | 
242 |   // set up workers to initialize the set of operations in parallel
243 |   for(i=0; i<n_threads; ++i) {
244 |     datas[i].i                   = i;
245 |     datas[i].phase               = INIT_PHASE;
246 |     datas[i].my_spinlock_counter = 0;
247 |     tsc                          = __rdtscp(&status);
248 |     srand48_r((long int)tsc + i, &datas[i].rand_state);
249 |     datas[i].buffer_cur          = (ACCESS_BUFFER_SIZE / n_threads) * i;
250 |     datas[i].buffer              = global_buffer;
251 |     datas[i].ti_dequeues_curs    = malloc(n_threads * sizeof(uint64_t));
252 |   } 
253 | 
254 |   // guarantee that writes to data fields come before threads start running.
255 |   // precautionary, set_affinity() and pthread_create() probably contain atomic
256 |   // instructions that act as full barriers. 
257 |   cmm_smp_wmb();
258 |   
259 |   // set parent affinity so it's not sharing a CPU with a worker thread
260 |   set_affinity();
261 | 
262 |   // spawn workers
263 |   for(i=0; i<n_threads; ++i) { pthread_create(&ts[i], NULL, ti, &datas[i]); }
264 | 
265 |   for(test=0; test < n_tests; ++test) {
266 |     if(!test_on[test]) { continue; }
267 |     printf_parent("-1: initializing test %s\n", test_names[test]);
268 |     for(run=0; run < n_runs; ++run) {
269 |       // wait for workers to finish initialization
270 |       parent_spin(datas, READY_PHASE, -1);
271 |       parent_announce(READY_PHASE, -1);
272 |       
273 |       // see how long it takes each number of threads to solve 
274 |       // the problem we've initialized
275 |       for(nt=1; nt<=n_threads; ++nt) {
276 |         // try different numbers of memory accesses in the critical section
277 |         for(accesses=0; accesses < (accesses_on[test] ? n_accesses : 1); ++accesses) {
278 |           printf_parent("-1: starting loop for test %s, nt %d\n, accesses %" PRIu64 "\n", test_names[test], nt, accessesv[accesses]);
279 | 
280 |           // global per-run initialization
281 |           switch(test) {
282 |             case COARSE_QUEUE_CORRECTNESS_TEST:
283 |             case COARSE_QUEUE_TEST:
284 |             case COARSE_QUEUE_ENQ_TEST:
285 |               // initialize coarse_queue state for each run
286 |               _CMM_STORE_SHARED(my_coarse_queue.head, 0);
287 |               _CMM_STORE_SHARED(my_coarse_queue.tail, 0);
288 |               // TODO initialize lock state for each run
289 |               _CMM_STORE_SHARED(my_coarse_queue.lock, UNLOCKED);
290 |               break;
291 |             case NB_QUEUE_CORRECTNESS_TEST:
292 |             case NB_QUEUE_TEST:
293 |             case NB_QUEUE_ENQ_TEST:
294 |               break;
295 |           }
296 | 
297 |           parent_spin(datas, WARMED_PHASE, -1);
298 | 
299 |           // time from the parent's perspective from when parent tells workers to
300 |           // start OPS_PHASE until parent observes that all workers have reported
301 |           // back that they have completed OPS_PHASE
302 |           TIME_AND_CHECK(1,0,run,parent_announce_then_spin(datas, WARMED_PHASE, OPS_PHASE, -1), "OPS_PHASE_TIME", tsc, ret);
303 | 
304 |           // tell workers to start their checking pass
305 |           parent_announce(OPS_PHASE, -1);
306 | 
307 |           // wait for workers to complete checking pass before running parent checks
308 |           parent_spin(datas, CHECK_PHASE, -1);
309 | 
310 |           // if applicable, check results of run and reset test state
311 |           all_ops = 1;
312 |           switch (test) {
313 |             case SPIN_TRY_LOCK_CORRECTNESS_TEST:
314 |               all_ops = 0;
315 |             case SPIN_LOCK_CORRECTNESS_TEST:
316 |             case SPIN_WAIT_LOCK_CORRECTNESS_TEST:
317 |             case SPIN_READ_LOCK_CORRECTNESS_TEST:
318 |             case SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST:
319 |             case ABQL_SHARING_CORRECTNESS_TEST:
320 |             case ABQL_NOSHARING_CORRECTNESS_TEST:
321 |             case MCS_SHARING_CORRECTNESS_TEST:
322 |             case MCS_NOSHARING_CORRECTNESS_TEST:
323 |             case TICKET_CORRECTNESS_TEST:
324 |               // see if per-thread counters sum to shared lock-protected counter
325 |               sum = 0;
326 |               for(i=0; i<nt; ++i) { sum += datas[i].my_spinlock_counter; }
327 |               if (sum == 0 || sum != my_spinlock_shared_counter || (all_ops && sum != n_ops)) {
328 |                 if (test == SPIN_TRY_LOCK_CORRECTNESS_TEST) {
329 |                   printf_error("-1: %s failed for nt %d, my_spinlock_shared_counter: %" PRIu64 
330 |                                ", sum: %" PRIu64 ", n_ops: %" PRIu64
331 |                                ".  If the lock were working correctly, counter and sum would "
332 |                                "be nonzero and the same.\n"
333 |                                , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
334 |                 }
335 |                 else {
336 |                   printf_error("-1: %s failed for nt %d, my_spinlock_shared_counter: %" PRIu64 
337 |                                ", sum: %" PRIu64 ", n_ops: %" PRIu64
338 |                                ".  If the lock were working correctly, they would all "
339 |                                "be nonzero and the same.\n"
340 |                                , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
341 |                 }
342 |                 exit(-1);
343 |               }
344 |               else {
345 |                 printf_parent("-1: %s succeeded for nt %d, my_spinlock_shared_counter: %" PRIu64 
346 |                               ", sum: %" PRIu64 ", n_ops: %" PRIu64
347 |                               ".  Values are nonzero and the same, so the lock is working correctly.\n"
348 |                              , test_names[test], nt, my_spinlock_shared_counter, sum, n_ops);
349 |               }
350 |               // reset counters for next test
351 |               my_spinlock_shared_counter = 0;
352 |               for(i=0; i<nt; ++i) { datas[i].my_spinlock_counter = 0; }
353 |               break;
354 |             case COARSE_QUEUE_CORRECTNESS_TEST:
355 |             case NB_QUEUE_CORRECTNESS_TEST:
356 |               switch (test) {
357 |                 case COARSE_QUEUE_CORRECTNESS_TEST:
358 |                   remaining_in_queue = coarse_count(&my_coarse_queue);
359 |                   break;
360 |                 case NB_QUEUE_CORRECTNESS_TEST:
361 |                   remaining_in_queue = nb_count(&my_nb_queue);
362 |                   break;
363 |               }
364 |               total_enqueues = 0;
365 |               total_dequeues = 0;
366 |               for(i=0; i<n_threads; ++i) {
367 |                 total_enqueues += datas[i].n_enqueues;
368 |               }
369 |               for(i=0; i<nt; ++i) {
370 |                 total_dequeues += datas[i].n_dequeues;
371 |               }
372 |               if(total_enqueues == total_dequeues + remaining_in_queue) {
373 |                 printf_parent("-1: %s succeeded for nt %d.  Total enqueues equals total dequeues "
374 |                               "plus nodes remaining in queue. No worker's enqueues were dequeued out "
375 |                               "of order, so the queue is FIFO.\n", test_names[test], nt);
376 |               }
377 |               else {
378 |                 printf_error("-1: %s failed for nt %d.  Total enqueues (%" PRIu64 ") is not equal to "
379 |                              "total dequeues (%" PRIu64 ") plus nodes remaining in queue (%" PRIu64 "), "
380 |                              "so we must have dropped some enqueues or dequeued some nodes more than once.\n"
381 |                              , test_names[test], nt, total_enqueues, total_dequeues, remaining_in_queue);
382 |                 exit(-1);
383 |               }
384 |               break;
385 |             default: 
386 |               break;
387 |           }
388 | 
389 |           // send workers back to the top of their loop
390 |           parent_announce(CHECK_PHASE, -1);
391 | 
392 |           // output results for this nt
393 |           printf_csv("%" PRIu64 ",%s,%d,%d,%" PRIu64 ",%" PRIu64 "\n",n_ops,test_names[test],run,nt,accessesv[accesses],tsc);
394 |         }
395 |       }
396 |     }
397 |     parent_spin(datas, INIT_PHASE, -1);
398 |     parent_announce(INIT_PHASE, -1);
399 |   }
400 | 
401 |   // join workers
402 |   for(i=0; i<n_threads; ++i) { pthread_join(ts[i], NULL); }
403 | 
404 |   // clean up
405 |   for(test=0; test<n_tests; ++test) { free(oss[test]); }
406 |   for(i=0; i<n_threads; ++i) { free(datas[i].ti_dequeues_curs); }
407 |   free(oss);
408 |   free(global_pool_data);
409 |   free(global_pool_meta);
410 |   free(global_dequeues);
411 |   pthread_spin_destroy(&pthreads_spinlock);
412 | }
413 | 
414 | void parent_announce(uint64_t phase, int t_i)
415 | {
416 |   _CMM_STORE_SHARED(completed_phase, phase);
417 |   printf_parent("%d: announced start of phase %" PRIu64 "\n", t_i, phase);
418 | }
419 | 
420 | void parent_spin(ti_data_in *datas, uint64_t phase, int t_i)
421 | {
422 |   for(int i=0; i<n_threads; ++i) {
423 |     spin_until(&(datas[i].phase), phase, t_i);
424 |   }
425 | }
426 | 
427 | int parent_announce_then_spin(ti_data_in *datas, uint64_t phase1, uint64_t phase2, int t_i)
428 | {
429 |   parent_announce(phase1, t_i);
430 | 
431 |   parent_spin(datas, phase2, t_i);
432 | 
433 |   return 0;
434 | }
435 | 


--------------------------------------------------------------------------------
/Assignment_3/tests.h:
--------------------------------------------------------------------------------
  1 | #ifndef tests_h
  2 | #define tests_h
  3 | 
  4 | #include <sys/time.h>
  5 | 
  6 | #include "util.h"
  7 | 
  8 | // global data
  9 | 
 10 | extern pthread_spinlock_t pthreads_spinlock;
 11 | extern volatile uint64_t  my_spinlock;
 12 | extern volatile uint64_t  my_spinlock_shared_counter;
 13 | 
 14 | // define a type for ticket
 15 | typedef struct {
 16 |   uint64_t next;
 17 |   uint64_t padding[7];
 18 |   uint64_t owner;
 19 | } ticket_state;
 20 | 
 21 | // declare shared data for ticket
 22 | 
 23 | extern volatile ticket_state my_ticket_lock __attribute__((aligned (CACHE_LINE_BYTES)));
 24 | 
 25 | // define a type for abql_sharing
 26 | typedef struct {
 27 |   uint64_t val;
 28 | } flag_sharing;
 29 | 
 30 | // declare shared data for abql_sharing
 31 | 
 32 | extern volatile uint64_t      queue_last_sharing;
 33 | extern volatile flag_sharing *flags_sharing;
 34 | 
 35 | // define a type for abql_nosharing (alter the definition below)
 36 | typedef struct {
 37 |   uint64_t val;
 38 |   uint64_t padding[7];
 39 | } flag_nosharing;
 40 | 
 41 | // declare shared data for abql_nosharing
 42 | 
 43 | extern volatile uint64_t        queue_last_nosharing;
 44 | extern volatile flag_nosharing *flags_nosharing;
 45 | 
 46 | // define a type for mcs_sharing
 47 | typedef struct {
 48 |   uint64_t next;
 49 |   uint64_t locked;
 50 | } mcs_sharing;
 51 | 
 52 | // declare shared data for mcs_sharing
 53 | 
 54 | extern volatile mcs_sharing mcs_global_sharing;
 55 | extern volatile mcs_sharing *mcss_sharing;
 56 | 
 57 | // define a type for mcs_nosharing (alter the definition below)
 58 | typedef struct {
 59 |   uint64_t next;
 60 |   uint64_t padding[7];
 61 |   uint64_t locked;
 62 | } mcs_nosharing;
 63 | 
 64 | // declare shared data for mcs_nosharing
 65 | 
 66 | extern volatile mcs_nosharing mcs_global_nosharing __attribute__((aligned (CACHE_LINE_BYTES)));
 67 | extern volatile mcs_nosharing *mcss_nosharing;
 68 | 
 69 | // TODO define types for coarse_node and coarse_queue (change the type of lock
 70 | // in coarse_queue if you'd like to use a lock that's represented by something
 71 | // besides a word, e.g.  mcs_nosharing)
 72 | 
 73 | typedef struct {
 74 |   uint64_t val;
 75 |   uint64_t next;
 76 |   uint64_t padding[6];
 77 | } coarse_node;
 78 | 
 79 | typedef struct {
 80 |   uint64_t head;
 81 |   uint64_t tail;
 82 |   uint64_t lock;
 83 | } coarse_queue;
 84 | 
 85 | // declare shared data for coarse_queue
 86 | 
 87 | extern volatile coarse_queue my_coarse_queue __attribute__((aligned (CACHE_LINE_BYTES)));
 88 | 
 89 | // define types for nb_node and nb_queue
 90 | 
 91 | typedef struct {
 92 |   uint64_t val;
 93 |   uint64_t next;
 94 |   uint64_t padding[6];
 95 | } nb_node;
 96 | 
 97 | typedef struct {
 98 |   uint64_t head;
 99 |   uint64_t padding[7];
100 |   uint64_t tail;
101 |   uint64_t padding1[7];
102 | } nb_queue;
103 | 
104 | // declare shared data for nb_queue
105 | 
106 | extern volatile nb_queue my_nb_queue __attribute__((aligned (CACHE_LINE_BYTES)));
107 | 
108 | // LOCK VALUES
109 | 
110 | #define UNLOCKED  0
111 | #define LOCKED    1
112 | 
113 | #define HAS_LOCK  2
114 | #define MUST_WAIT 3
115 | 
116 | // OPERATIONS
117 | // must all be unique
118 | 
119 | #define PTHREAD_SPIN_LOCK_LOCK_OP       1
120 | #define SPIN_LOCK_LOCK_OP               2
121 | #define SPIN_LOCK_INC_OP                3
122 | #define SPIN_WAIT_LOCK_LOCK_OP          4
123 | #define SPIN_WAIT_LOCK_INC_OP           5
124 | #define SPIN_READ_LOCK_LOCK_OP          6
125 | #define SPIN_READ_LOCK_INC_OP           7
126 | #define SPIN_TRY_LOCK_LOCK_OP           8
127 | #define SPIN_TRY_LOCK_INC_OP            9
128 | #define SPIN_EXPERIMENTAL_LOCK_LOCK_OP 10
129 | #define SPIN_EXPERIMENTAL_LOCK_INC_OP  11
130 | #define ABQL_SHARING_LOCK_OP           12
131 | #define ABQL_SHARING_INC_OP            13
132 | #define ABQL_NOSHARING_LOCK_OP         14
133 | #define ABQL_NOSHARING_INC_OP          15
134 | #define MCS_SHARING_LOCK_OP            16
135 | #define MCS_SHARING_INC_OP             17
136 | #define MCS_NOSHARING_LOCK_OP          18
137 | #define MCS_NOSHARING_INC_OP           19
138 | #define TICKET_LOCK_OP                 20
139 | #define TICKET_INC_OP                  21
140 | #define COARSE_QUEUE_ENQ_OP            22
141 | #define COARSE_QUEUE_DEQ_OP            23
142 | #define COARSE_QUEUE_DEQ_REC_OP        24
143 | #define NB_QUEUE_ENQ_OP                25
144 | #define NB_QUEUE_DEQ_OP                26
145 | #define NB_QUEUE_DEQ_REC_OP            27
146 | 
147 | // PHASES
148 | 
149 | #define INIT_PHASE   0
150 | #define READY_PHASE  1
151 | #define WARMED_PHASE 2
152 | #define OPS_PHASE    3
153 | #define CHECK_PHASE  4
154 | 
155 | // TESTS
156 | // must be the sequence of natural numbers from 0 to the
157 | // total number of tests - 1.
158 | 
159 | #define SPIN_TRY_LOCK_CORRECTNESS_TEST          0
160 | #define SPIN_LOCK_CORRECTNESS_TEST              1
161 | #define SPIN_WAIT_LOCK_CORRECTNESS_TEST         2
162 | #define SPIN_READ_LOCK_CORRECTNESS_TEST         3
163 | #define SPIN_EXPERIMENTAL_LOCK_CORRECTNESS_TEST 4
164 | #define TICKET_CORRECTNESS_TEST                 5
165 | #define ABQL_SHARING_CORRECTNESS_TEST           6
166 | #define ABQL_NOSHARING_CORRECTNESS_TEST         7
167 | #define MCS_SHARING_CORRECTNESS_TEST            8
168 | #define MCS_NOSHARING_CORRECTNESS_TEST          9
169 | #define COARSE_QUEUE_CORRECTNESS_TEST          10
170 | #define NB_QUEUE_CORRECTNESS_TEST              11
171 | #define PTHREAD_SPIN_LOCK_TEST                 12
172 | #define SPIN_TRY_LOCK_TEST                     13
173 | #define SPIN_LOCK_TEST                         14
174 | #define SPIN_WAIT_LOCK_TEST                    15
175 | #define SPIN_READ_LOCK_TEST                    16
176 | #define SPIN_EXPERIMENTAL_LOCK_TEST            17
177 | #define TICKET_TEST                            18
178 | #define ABQL_SHARING_TEST                      19
179 | #define ABQL_NOSHARING_TEST                    20
180 | #define MCS_SHARING_TEST                       21
181 | #define MCS_NOSHARING_TEST                     22
182 | #define COARSE_QUEUE_TEST                      23
183 | #define COARSE_QUEUE_ENQ_TEST                  24
184 | #define NB_QUEUE_TEST                          25
185 | #define NB_QUEUE_ENQ_TEST                      26
186 | 
187 | #define PASTE31(x, y, z) x ## y ## z
188 | #define PASTE3(x, y, z) PASTE31(x, y, z)
189 | 
190 | #define PASTE21(x, y) x ## y
191 | #define PASTE2(x, y) PASTE21(x, y)
192 | 
193 | #define STR_VALUE(arg)      #arg
194 | #define FUNCTION_NAME(name) STR_VALUE(name)
195 | 
196 | #define TIME_AND_CHECK(uid, t_i, run, fcall, label, dst, ret) __N3WLN__ \
197 |   do { __N3WLN__ \
198 |     uint64_t (PASTE3(pre,__LINE__,uid)), (PASTE3(post,__LINE__,uid)); __N3WLN__ \
199 |     struct timeval  (PASTE3(tv1,__LINE__,uid)), (PASTE3(tv2,__LINE__,uid)); __N3WLN__ \
200 |     double (PASTE3(secs,__LINE__,uid)); __N3WLN__ \
201 |     unsigned int (PASTE3(ui,__LINE__,uid)); __N3WLN__ \
202 |     printf_parent("%d: run %d: about to call %s [%s]:\n", t_i, run, FUNCTION_NAME(fcall), label); __N3WLN__ \
203 |     gettimeofday(&(PASTE3(tv1,__LINE__,uid)), NULL); __N3WLN__ \
204 |     (PASTE3(pre,__LINE__,uid)) = __rdtscp(&(PASTE3(ui,__LINE__,uid))); __N3WLN__ \
205 |     cmm_barrier(); __N3WLN__ \
206 |     ret = fcall; __N3WLN__ \
207 |     cmm_barrier(); __N3WLN__ \
208 |     (PASTE3(post,__LINE__,uid)) = __rdtscp(&(PASTE3(ui,__LINE__,uid))); __N3WLN__ \
209 |     gettimeofday(&(PASTE3(tv2,__LINE__,uid)), NULL); __N3WLN__ \
210 |     dst = (PASTE3(post,__LINE__,uid)) - (PASTE3(pre,__LINE__,uid)); __N3WLN__ \
211 |     (PASTE3(secs,__LINE__,uid)) = (double) ((PASTE3(tv2,__LINE__,uid)).tv_usec - (PASTE3(tv1,__LINE__,uid)).tv_usec) / 1000000 + (double) ((PASTE3(tv2,__LINE__,uid)).tv_sec - (PASTE3(tv1,__LINE__,uid)).tv_sec); __N3WLN__ \
212 |     printf_parent("%d: run %d: call to %s [%s] took %" PRIu64 " TSC ticks, %f seconds, returned %d\n", t_i, run, FUNCTION_NAME(fcall), label, dst, (PASTE3(secs,__LINE__,uid)), ret); __N3WLN__ \
213 |     if(!ret) { __N3WLN__ \
214 |       printf_parent("%d: run %d: call succeeded (returned 0)\n", t_i, run); __N3WLN__ \
215 |     } __N3WLN__ \
216 |     else { __N3WLN__ \
217 |       printf_parent("%d: run %d: call failed (returned %d)\n", t_i, run, ret); __N3WLN__ \
218 |     } __N3WLN__ \
219 |   } while (0) __N3WLN__ \
220 | 
221 | #endif
222 | 


--------------------------------------------------------------------------------
/Assignment_3/util.c:
--------------------------------------------------------------------------------
  1 | #include "util.h"
  2 | 
  3 | unsigned int next_aff = 0;
  4 | 
  5 | pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
  6 | 
  7 | void set_affinity(void)
  8 | {
  9 | 	cpu_set_t mask;
 10 | 	int cpu;
 11 | 	int ret;
 12 | 
 13 | 	ret = pthread_mutex_lock(&affinity_mutex);
 14 | 	if (ret) {
 15 | 		perror("Error in pthread mutex lock");
 16 | 		exit(-1);
 17 | 	}
 18 | 
 19 |   if(next_aff >= n_cpus) {
 20 |     perror("Ran out of CPUs, reduce n_threads");
 21 |     exit(-1);
 22 |   }
 23 | 	cpu = next_aff++;
 24 | 	ret = pthread_mutex_unlock(&affinity_mutex);
 25 | 	if (ret) {
 26 | 		perror("Error in pthread mutex unlock");
 27 | 		exit(-1);
 28 | 	}
 29 | 	CPU_ZERO(&mask);
 30 | 	CPU_SET(cpu, &mask);
 31 | 	sched_setaffinity(0, sizeof(mask), &mask);
 32 | #if defined(PRINT_AFFINITIES)
 33 |   printf_verbose("set affinity %d\n", cpu);
 34 | #endif
 35 | }
 36 | 
 37 | #define in_pool(line) (global_pool_data <= (line) && (line) <= global_pool_data + (n_pool_lines - 1) * CACHE_LINE)
 38 | 
 39 | uint64_t pool_allocate(ti_data_in *d, uint64_t *p) {
 40 |   uint64_t i = d->pool_cur;
 41 |   uint64_t addr, ret, seq;
 42 |   do {
 43 |     if (CMM_LOAD_SHARED(d->pool_meta[i].status) == FREE) {
 44 |       _CMM_STORE_SHARED(d->pool_meta[i].status, USED);
 45 |       d->pool_cur = (i + 1) % d->pool_lines;
 46 |       addr = (uint64_t)(d->pool_data + i * CACHE_LINE);
 47 |       seq = d->pool_meta[i].sequence_number;
 48 |       if (addr % CACHE_LINE_BYTES != 0) {
 49 |         printf_error("%d: Allocator isn't giving back cache-line-aligned addresses.  Tried to return %" PRIx64 ", sequence number %" PRIu64 "\n",d->i,addr,seq);
 50 |         while(1);
 51 |       }
 52 |       if (!in_pool((uint64_t*)get_qptr(addr))) {
 53 |         printf_error("%d: Why am I allocating memory outside the pool?  Tried to return %" PRIx64 "\n",d->i,addr);
 54 |         while(1);
 55 |       }
 56 |       ret = addr | seq;
 57 |       //printf("%d: allocated %" PRIx64 ", sequence number %" PRIu64 "\n", d->i, addr, d->pool_meta[i].sequence_number);
 58 |       _CMM_STORE_SHARED(*p, ret);
 59 |       // TODO why is returning uint64_t broken?  It sets the top half of the word for no reason,
 60 |       // works fine if I instead store to an address as above.
 61 |       return 1;
 62 |     }
 63 |     i = (i + 1) % d->pool_lines;
 64 |   } while (i != d->pool_cur);
 65 | 
 66 |   printf_error("%d: Ran out of memory in my allocation pool, crank up pool size or reduce number of operations.\n", d->i);
 67 |   exit(-1);
 68 | }
 69 | 
 70 | // this is okay, because the freer will always be unique (nobody else dequeued
 71 | // that node), and the allocation happens in enqueue, which will always happen
 72 | // before the corresponding dequeue.
 73 | void pool_free(ti_data_in *d, uint64_t *line) {
 74 |   if(!in_pool(line)) {
 75 |     printf_error("%d: Tried to free a line %" PRIx64 " not from the pool, failing.\n", d->i, (uint64_t)line); 
 76 |     exit(-1);
 77 |   }
 78 | 
 79 |   uint64_t i = (uint64_t)(line - global_pool_data) / CACHE_LINE;
 80 |   if(global_pool_data + i * CACHE_LINE != line) { 
 81 |     printf_error("%d: pool_free isn't lining up lines right\n",d->i); 
 82 |     while(1); 
 83 |   }
 84 |   if(CMM_LOAD_SHARED(global_pool_meta[i].status) != USED) {
 85 |     printf_error("%d: Tried to free a line %" PRIx64 " not in use, failing.\n", d->i, (uint64_t)line);
 86 |     while(1);
 87 |     exit(-1);
 88 |   }
 89 | 
 90 |   uint64_t sequence_number = global_pool_meta[i].sequence_number;
 91 |   if (sequence_number >= 63) {
 92 |     _CMM_STORE_SHARED(global_pool_meta[i].status, DEAD);
 93 |   } 
 94 |   else {
 95 |     _CMM_STORE_SHARED(global_pool_meta[i].sequence_number, sequence_number + 1);
 96 |     // atomically update status at the end, so the next allocator finds the
 97 |     // correct sequence number.
 98 |     _CMM_STORE_SHARED(global_pool_meta[i].status, FREE);
 99 |   }
100 | 
101 |   return;
102 | }
103 | 


--------------------------------------------------------------------------------
/Assignment_3/util.h:
--------------------------------------------------------------------------------
  1 | #ifndef UTIL_H
  2 | #define UTIL_H
  3 | 
  4 | #define _GNU_SOURCE
  5 | #include <inttypes.h>
  6 | #include <pthread.h>
  7 | #include <sched.h>
  8 | #include <stdint.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #define URCU_INLINE_SMALL_FUNCTIONS
 12 | #include <urcu/arch.h>
 13 | #include <urcu-qsbr.h>
 14 | #include <x86intrin.h>
 15 | 
 16 | #define printf_error(fmt, args...)		\
 17 | 	do { \
 18 | 		printf(fmt, args); fflush(stdout); \
 19 | 	} while (0)
 20 | 
 21 | #define printf_verbose(fmt, args...)		\
 22 | 	do { \
 23 | 		if (PRINT_VERBOSE)	{	printf(fmt, args); fflush(stdout); } \
 24 | 	} while (0)
 25 | 
 26 | #define printf_parent(fmt, args...)		\
 27 | 	do { \
 28 | 		if (PRINT_PARENT)	{	printf(fmt, args); fflush(stdout); } \
 29 | 	} while (0)
 30 | 
 31 | #define printf_worker(fmt, args...)		\
 32 | 	do { \
 33 | 		if (PRINT_WORKER)	{	printf(fmt, args); fflush(stdout); } \
 34 | 	} while (0)
 35 | 
 36 | #define printf_csv(fmt, args...)		\
 37 | 	do { \
 38 | 		if (PRINT_CSV)	{	printf(fmt, args); fflush(stdout); } \
 39 | 	} while (0)
 40 | 
 41 | // If *ptr == old, atomically store new to *ptr and return old.
 42 | // Otherwise, return the value of *ptr without changing it.
 43 | #define lockcmpxchgq(ptr, old, new)         __N3WLN__ \
 44 | ({									                        __N3WLN__ \
 45 |   uint64_t __ret;                           __N3WLN__ \
 46 |   uint64_t __old = (old);                   __N3WLN__ \
 47 |   uint64_t __new = (new);                   __N3WLN__ \
 48 |   volatile uint64_t *__ptr = (ptr);         __N3WLN__ \
 49 |   asm volatile("lock; cmpxchgq %2,%1"       __N3WLN__ \
 50 |               : "=a" (__ret), "+m" (*__ptr) __N3WLN__ \
 51 |               : "r" (__new), "0" (__old)    __N3WLN__ \
 52 |               : "memory");                  __N3WLN__ \
 53 |   __ret;                                    __N3WLN__ \
 54 | })
 55 | 
 56 | // Store new to *ptr, and return the immediately previous value in *ptr's
 57 | // coherence order.  Excerpt from a comment in glibc from
 58 | // nptl/pthread_spin_lock.c: 
 59 | // xchgq usually takes less instructions than
 60 | // lockcmpxchg.  On the other hand,
 61 | // lockcmpxchg potentially generates less bus traffic when the lock is locked.
 62 | #define xchgq(ptr, new)                      __N3WLN__ \
 63 | ({                                           __N3WLN__ \
 64 |   uint64_t __new = (new);                    __N3WLN__ \
 65 |   volatile uint64_t *__ptr = (ptr);          __N3WLN__ \
 66 |   asm volatile ("xchgq %0, %1"               __N3WLN__ \
 67 |                : "=r" (__new), "=m" (*__ptr) __N3WLN__ \
 68 |                : "0" (__new), "m" (*__ptr)   __N3WLN__ \
 69 |                : "memory" );                 __N3WLN__ \
 70 |   __new;                                     __N3WLN__ \
 71 | })
 72 | 
 73 | // atomically add val to *ptr, returning the pre-add value of *ptr.
 74 | #define lockxaddq(ptr, val)                 __N3WLN__ \
 75 | ({                                          __N3WLN__ \
 76 |   uint64_t __val = (val);                   __N3WLN__ \
 77 |   volatile uint64_t *__ptr = (ptr);         __N3WLN__ \
 78 |   asm volatile("lock; xaddq %0, %1"         __N3WLN__ \
 79 |               : "+r" (__val), "+m" (*__ptr) __N3WLN__ \
 80 |               : : "memory" );               __N3WLN__ \
 81 |   __val;                                    __N3WLN__ \
 82 | })
 83 | 
 84 | typedef struct {
 85 |   uint64_t operation;
 86 |   uint64_t params[1];
 87 | } op;
 88 | 
 89 | #define CACHE_LINE                           8 // 8-byte words in a cache line
 90 | #define CACHE_LINE_BYTES      (8 * CACHE_LINE)
 91 | #define L1_DATA_CACHE                     4096 // 8-byte words in the 32KB L1 cache on Ivy Bridge
 92 | #define ACCESS_BUFFER_SIZE (2 * L1_DATA_CACHE)
 93 | 
 94 | #define POOL_SIZE          (L1_DATA_CACHE / 4) // each hyperthread uses no more than half of its cores
 95 |                                                // L1 data cache for allocated memory.  we still need
 96 |                                                // space for ops and allocation metadata (and the actual
 97 |                                                // data concurrent operations touch), after all
 98 | #define POOL_LINES             (POOL_SIZE / 8)
 99 | 
100 | #define FREE                                 1
101 | #define USED                                 2
102 | #define DEAD                                 3
103 | 
104 | typedef struct {
105 |   uint64_t status;
106 |   uint64_t sequence_number;
107 | } pool_meta;
108 | 
109 | typedef struct {
110 |   int i;
111 |   int n_ops_i;
112 |   volatile uint64_t phase;
113 |   volatile uint64_t my_spinlock_counter;
114 |   struct drand48_data rand_state;
115 |   // for critical section access simulation buffer
116 |   uint64_t *buffer;
117 |   uint64_t buffer_cur;
118 |   // for per-thread allocation pool
119 |   uint64_t pool_cur;
120 |   uint64_t pool_lines;
121 |   pool_meta *pool_meta;
122 |   uint64_t *pool_data;
123 |   // for testing queue implementations
124 |   uint64_t *dequeues;
125 |   uint64_t n_dequeues;
126 |   uint64_t n_enqueues;
127 |   uint64_t *ti_dequeues_curs;
128 | } ti_data_in;
129 | 
130 | void set_affinity(void);
131 | 
132 | extern volatile uint64_t completed_phase;
133 | extern uint64_t n_threads, n_cpus, n_ops, n_runs, n_tests, n_accesses, n_pool_lines;
134 | extern op **oss;
135 | extern char *test_names[];
136 | extern uint64_t test_on[];
137 | extern uint64_t accesses_on[];
138 | extern uint64_t accessesv[];
139 | extern uint64_t global_buffer[];
140 | extern pool_meta *global_pool_meta;
141 | extern uint64_t *global_pool_data;
142 | extern uint64_t *global_dequeues;
143 | extern ti_data_in *global_datas;
144 | 
145 | // blank out the lower-order bits in a 64-bit pointer mashed together with a
146 | // sequence number to obtain a pointer we can use.
147 | // 0xffffffffffffffc0 is 58 1s followed by 6 0s, since we're pointing to 64-byte
148 | // objects (cache lines), so we don't need the bottom 6 bits (2^6 == 64) and can
149 | // use them to store the sequence number.
150 | #define get_qptr(ptr) ((ptr) & 0xffffffffffffffc0)
151 | 
152 | // On Intel, the busy-wait-nop instruction is called "pause",
153 | // which is actually represented as a nop with the rep prefix.
154 | // On processors before the P4 this behaves as a nop; on P4 and
155 | // later it might do something clever like yield to another
156 | // hyperthread.  In any case, Intel recommends putting one
157 | // of these in a spin lock loop.
158 | #define spin_pause() do { __asm__ __volatile__ ("rep; nop"); } while (0)
159 | #define nop() do { __asm__ __volatile__ ("nop"); } while (0)
160 | 
161 | #define access(i, d, n_words) do { \
162 |   for((i)=0; (i)<(n_words); ++(i), (d)->buffer_cur = ((d)->buffer_cur + 1) % ACCESS_BUFFER_SIZE) { \
163 |     _CMM_STORE_SHARED((d)->buffer[(d)->buffer_cur], 1 + (CMM_LOAD_SHARED((d)->buffer[(d)->buffer_cur]))); \
164 |   } \
165 | } while(0)
166 | 
167 | #if defined(PRINT_SPIN)
168 | #define spin_until(p, phase, t_i) __N3WLN__ \
169 |   do { __N3WLN__ \
170 |   while(CMM_LOAD_SHARED(*p) != phase) { spin_pause(); } __N3WLN__ \
171 |   printf_verbose("%d: finished spin waiting for phase %d\n", t_i, phase); __N3WLN__ \
172 |   fflush(stdout); __N3WLN__ \
173 |   } while (0) __N3WLN__ \
174 | 
175 | #else
176 | #define spin_until(p, phase, t_i) __N3WLN__ \
177 |   do { __N3WLN__ \
178 |   while(CMM_LOAD_SHARED(*p) != phase) { spin_pause(); } __N3WLN__ \
179 |   } while (0) __N3WLN__ \
180 | 
181 | #endif
182 | 
183 | #if defined(PRINT_SPIN)
184 | #define sleep_until(p, phase, t_i) __N3WLN__ \
185 |   do { __N3WLN__ \
186 |   while(CMM_LOAD_SHARED(*p) != phase) { usleep(100); spin_pause(); } __N3WLN__ \
187 |   printf_verbose("%d: finished spin waiting for phase %d\n", t_i, phase); __N3WLN__ \
188 |   fflush(stdout); __N3WLN__ \
189 |   } while (0) __N3WLN__ \
190 | 
191 | #else
192 | #define sleep_until(p, phase, t_i) __N3WLN__ \
193 |   do { __N3WLN__ \
194 |   while(CMM_LOAD_SHARED(*p) != phase) { usleep(100); spin_pause(); } __N3WLN__ \
195 |   } while (0) __N3WLN__ \
196 | 
197 | #endif
198 | 
199 | #endif
200 | 


--------------------------------------------------------------------------------
/Assignment_3/worker.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORKER_H
 2 | #define WORKER_H
 3 | 
 4 | #include "util.h"
 5 | 
 6 | void* ti(void *data);
 7 | 
 8 | int spin_try_lock(volatile uint64_t *lock) __attribute__((always_inline));
 9 | void spin_lock(volatile uint64_t *lock) __attribute__((always_inline));
10 | void spin_wait_lock(volatile uint64_t *lock) __attribute__((always_inline));
11 | void spin_read_lock(volatile uint64_t *lock) __attribute__((always_inline));
12 | void spin_unlock(volatile uint64_t *lock) __attribute__((always_inline));
13 | void spin_experimental_lock(volatile uint64_t *lock) __attribute__((always_inline));
14 | void abql_sharing_lock(volatile uint64_t *my_place, volatile uint64_t *queue_last, 
15 |                        volatile flag_sharing *flags, uint64_t n_threads) __attribute__((always_inline));
16 | void abql_sharing_unlock(volatile uint64_t *my_place, volatile flag_sharing *flags, 
17 |                          uint64_t n_threads) __attribute__((always_inline));
18 | void abql_nosharing_lock(volatile uint64_t *my_place, volatile uint64_t *queue_last, 
19 |                          volatile flag_nosharing *flags, uint64_t n_threads) __attribute__((always_inline));
20 | void abql_nosharing_unlock(volatile uint64_t *my_place, volatile flag_nosharing *flags, 
21 |                            uint64_t n_threads) __attribute__((always_inline));
22 | void mcs_sharing_lock(volatile mcs_sharing *global_lock, volatile mcs_sharing *local_lock) __attribute__((always_inline));
23 | void mcs_nosharing_lock(volatile mcs_nosharing *global_lock, volatile mcs_nosharing *local_lock) __attribute__((always_inline));
24 | void mcs_sharing_unlock(volatile mcs_sharing *global_lock, volatile mcs_sharing *local_lock) __attribute__((always_inline));
25 | void mcs_nosharing_unlock(volatile mcs_nosharing *global_lock, volatile mcs_nosharing *local_lock) __attribute__((always_inline));
26 | void ticket_lock(volatile ticket_state *lock) __attribute__((always_inline));
27 | void ticket_unlock(volatile ticket_state *lock) __attribute__((always_inline));
28 | void coarse_enqueue(ti_data_in *d, volatile coarse_queue *queue, uint64_t value) __attribute__((always_inline));
29 | int coarse_dequeue(ti_data_in *d, volatile coarse_queue *queue, uint64_t *ret) __attribute__((always_inline));
30 | void nb_enqueue(ti_data_in *d, volatile nb_queue *queue, uint64_t value) __attribute__((always_inline));
31 | int nb_dequeue(ti_data_in *d, volatile nb_queue *queue, uint64_t *ret) __attribute__((always_inline));
32 | #endif
33 | 


--------------------------------------------------------------------------------
/Assignment_4/Documentation/cheatsheet.txt:
--------------------------------------------------------------------------------
 1 |                                   Prior Operation     Subsequent Operation
 2 |                                   ---------------  ---------------------------
 3 |                                C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV
 4 |                               __  ----  -  -  ---  ----  -  -  --  --  ---  --
 5 | 
 6 | Store, e.g., WRITE_ONCE()            Y                                       Y
 7 | Load, e.g., READ_ONCE()              Y                              Y        Y
 8 | Unsuccessful RMW operation           Y                              Y        Y
 9 | smp_read_barrier_depends()              Y                       Y   Y
10 | *_dereference()                      Y                          Y   Y        Y
11 | Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y
12 | Successful *_release()         C        Y  Y    Y     W                      Y
13 | smp_rmb()                               Y       R        Y      Y        R
14 | smp_wmb()                                  Y    W           Y       Y    W
15 | smp_mb() & synchronize_rcu()  CP        Y  Y    Y        Y  Y   Y   Y    Y
16 | Successful full non-void RMW  CP     Y  Y  Y    Y     Y  Y  Y   Y   Y    Y   Y
17 | smp_mb__before_atomic()       CP        Y  Y    Y        a  a   a   a    Y
18 | smp_mb__after_atomic()        CP        a  a    Y        Y  Y   Y   Y
19 | 
20 | 
21 | Key:	C:	Ordering is cumulative
22 | 	P:	Ordering propagates
23 | 	R:	Read, for example, READ_ONCE(), or read portion of RMW
24 | 	W:	Write, for example, WRITE_ONCE(), or write portion of RMW
25 | 	Y:	Provides ordering
26 | 	a:	Provides ordering given intervening RMW atomic operation
27 | 	DR:	Dependent read (address dependency)
28 | 	DW:	Dependent write (address, data, or control dependency)
29 | 	RMW:	Atomic read-modify-write operation
30 | 	SV	Same-variable access
31 | 


--------------------------------------------------------------------------------
/Assignment_4/Documentation/references.txt:
--------------------------------------------------------------------------------
  1 | This document provides background reading for memory models and related
  2 | tools.  These documents are aimed at kernel hackers who are interested
  3 | in memory models.
  4 | 
  5 | 
  6 | Hardware manuals and models
  7 | ===========================
  8 | 
  9 | o	SPARC International Inc. (Ed.). 1994. "The SPARC Architecture
 10 | 	Reference Manual Version 9". SPARC International Inc.
 11 | 
 12 | o	Compaq Computer Corporation (Ed.). 2002. "Alpha Architecture
 13 | 	Reference Manual".  Compaq Computer Corporation.
 14 | 
 15 | o	Intel Corporation (Ed.). 2002. "A Formal Specification of Intel
 16 | 	Itanium Processor Family Memory Ordering". Intel Corporation.
 17 | 
 18 | o	Intel Corporation (Ed.). 2002. "Intel 64 and IA-32 Architectures
 19 | 	Software Developer’s Manual". Intel Corporation.
 20 | 
 21 | o	Peter Sewell, Susmit Sarkar, Scott Owens, Francesco Zappa Nardelli,
 22 | 	and Magnus O. Myreen. 2010. "x86-TSO: A Rigorous and Usable
 23 | 	Programmer's Model for x86 Multiprocessors". Commun. ACM 53, 7
 24 | 	(July, 2010), 89-97. http://doi.acm.org/10.1145/1785414.1785443
 25 | 
 26 | o	IBM Corporation (Ed.). 2009. "Power ISA Version 2.06". IBM
 27 | 	Corporation.
 28 | 
 29 | o	ARM Ltd. (Ed.). 2009. "ARM Barrier Litmus Tests and Cookbook".
 30 | 	ARM Ltd.
 31 | 
 32 | o	Susmit Sarkar, Peter Sewell, Jade Alglave, Luc Maranget, and
 33 | 	Derek Williams.  2011. "Understanding POWER Multiprocessors". In
 34 | 	Proceedings of the 32Nd ACM SIGPLAN Conference on Programming
 35 | 	Language Design and Implementation (PLDI ’11). ACM, New York,
 36 | 	NY, USA, 175–186.
 37 | 
 38 | o	Susmit Sarkar, Kayvan Memarian, Scott Owens, Mark Batty,
 39 | 	Peter Sewell, Luc Maranget, Jade Alglave, and Derek Williams.
 40 | 	2012. "Synchronising C/C++ and POWER". In Proceedings of the 33rd
 41 | 	ACM SIGPLAN Conference on Programming Language Design and
 42 | 	Implementation (PLDI '12). ACM, New York, NY, USA, 311-322.
 43 | 
 44 | o	ARM Ltd. (Ed.). 2014. "ARM Architecture Reference Manual (ARMv8,
 45 | 	for ARMv8-A architecture profile)". ARM Ltd.
 46 | 
 47 | o	Imagination Technologies, LTD. 2015. "MIPS(R) Architecture
 48 | 	For Programmers, Volume II-A: The MIPS64(R) Instruction,
 49 | 	Set Reference Manual". Imagination Technologies,
 50 | 	LTD. https://imgtec.com/?do-download=4302.
 51 | 
 52 | o	Shaked Flur, Kathryn E. Gray, Christopher Pulte, Susmit
 53 | 	Sarkar, Ali Sezgin, Luc Maranget, Will Deacon, and Peter
 54 | 	Sewell. 2016. "Modelling the ARMv8 Architecture, Operationally:
 55 | 	Concurrency and ISA". In Proceedings of the 43rd Annual ACM
 56 | 	SIGPLAN-SIGACT Symposium on Principles of Programming Languages
 57 | 	(POPL ’16). ACM, New York, NY, USA, 608–621.
 58 | 
 59 | o	Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis,
 60 | 	Luc Maranget, Kathryn E. Gray, Ali Sezgin, Mark Batty, and Peter
 61 | 	Sewell. 2017. "Mixed-size Concurrency: ARM, POWER, C/C++11,
 62 | 	and SC". In Proceedings of the 44th ACM SIGPLAN Symposium on
 63 | 	Principles of Programming Languages (POPL 2017). ACM, New York,
 64 | 	NY, USA, 429–442.
 65 | 
 66 | 
 67 | Linux-kernel memory model
 68 | =========================
 69 | 
 70 | o	Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney,
 71 | 	and Jade Alglave.  2017. "A formal model of
 72 | 	Linux-kernel memory ordering - companion webpage".
 73 | 	http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online;
 74 | 	accessed 30-January-2017].
 75 | 
 76 | o	Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
 77 | 	Alan Stern.  2017.  "A formal kernel memory-ordering model (part 1)"
 78 | 	Linux Weekly News.  https://lwn.net/Articles/718628/
 79 | 
 80 | o	Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
 81 | 	Alan Stern.  2017.  "A formal kernel memory-ordering model (part 2)"
 82 | 	Linux Weekly News.  https://lwn.net/Articles/720550/
 83 | 
 84 | 
 85 | Memory-model tooling
 86 | ====================
 87 | 
 88 | o	Daniel Jackson. 2002. "Alloy: A Lightweight Object Modelling
 89 | 	Notation". ACM Trans. Softw. Eng. Methodol. 11, 2 (April 2002),
 90 | 	256–290. http://doi.acm.org/10.1145/505145.505149
 91 | 
 92 | o	Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
 93 | 	Cats: Modelling, Simulation, Testing, and Data Mining for Weak
 94 | 	Memory". ACM Trans. Program. Lang. Syst. 36, 2, Article 7 (July
 95 | 	2014), 7:1–7:74 pages.
 96 | 
 97 | o	Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
 98 | 	semantics of the weak consistency model specification language
 99 | 	cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
100 | 
101 | 
102 | Memory-model comparisons
103 | ========================
104 | 
105 | o	Paul E. McKenney, Ulrich Weigand, Andrea Parri, and Boqun
106 | 	Feng. 2016. "Linux-Kernel Memory Model". (6 June 2016).
107 | 	http://open-std.org/JTC1/SC22/WG21/docs/papers/2016/p0124r2.html.
108 | 


--------------------------------------------------------------------------------
/Assignment_4/HERD_WISHLIST.md:
--------------------------------------------------------------------------------
1 | * Print which check in the model failed, ideally for a wiring that satisfies
2 |   the condition but is rejected by the model.
3 | * 
4 | 


--------------------------------------------------------------------------------
/Assignment_4/MAINTAINERS:
--------------------------------------------------------------------------------
 1 | LINUX KERNEL MEMORY MODEL
 2 | M:	Alan Stern <stern@rowland.harvard.edu>
 3 | M:	Andrea Parri <parri.andrea@gmail.com>
 4 | M:	Will Deacon <will.deacon@arm.com>
 5 | M:	Peter Zijlstra <peterz@infradead.org>
 6 | M:	Boqun Feng <boqun.feng@gmail.com>
 7 | M:	Nicholas Piggin <npiggin@gmail.com>
 8 | M:	David Howells <dhowells@redhat.com>
 9 | M:	Jade Alglave <j.alglave@ucl.ac.uk>
10 | M:	Luc Maranget <luc.maranget@inria.fr>
11 | M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
12 | L:	linux-kernel@vger.kernel.org
13 | S:	Supported
14 | T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
15 | F:	tools/memory-model/
16 | 


--------------------------------------------------------------------------------
/Assignment_4/check:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | herd7 -conf linux-kernel.cfg "$1"
4 | 


--------------------------------------------------------------------------------
/Assignment_4/failgraph:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | LITMUS="$1"
 4 | BNAME=$(basename "$LITMUS")
 5 | NAME="${BNAME%.litmus}"
 6 | DOT="${NAME}.dot"
 7 | PDF="${NAME}.pdf"
 8 | PDOT="dot/$DOT"
 9 | PPDF="pdf/$PDF"
10 | 
11 | mkdir -p dot pdf
12 | rm -f "$PDOT"
13 | herd7 -conf linux-kernel.cfg -skipchecks happens-before,propagation,coherence,rcu -o dot "$LITMUS"
14 | #herd7 -conf linux-kernel.cfg -skipchecks coherence -o dot "$LITMUS"
15 | if [ -s "$PDOT" ]; then
16 |   dot -Tpdf "$PDOT" -o "$PPDF"
17 |   echo "generated $PPDF"
18 | else
19 |   echo "graph generation failed"
20 | fi
21 | 


--------------------------------------------------------------------------------
/Assignment_4/graph:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | LITMUS="$1"
 4 | BNAME=$(basename "$LITMUS")
 5 | NAME="${BNAME%.litmus}"
 6 | DOT="${NAME}.dot"
 7 | PDF="${NAME}.pdf"
 8 | PDOT="dot/$DOT"
 9 | PPDF="pdf/$PDF"
10 | 
11 | mkdir -p dot pdf
12 | rm -f "$PDOT"
13 | herd7 -conf linux-kernel.cfg -o dot "$LITMUS"
14 | if [ -s "$PDOT" ]; then
15 |   dot -Tpdf "$PDOT" -o "$PPDF"
16 |   echo "generated $PPDF"
17 | else
18 |   echo "graph generation failed"
19 | fi
20 | 


--------------------------------------------------------------------------------
/Assignment_4/linux-kernel-hardware.cat:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: GPL-2.0+
  2 | (*
  3 |  * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
  4 |  * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
  5 |  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
  6 |  *                    Andrea Parri <parri.andrea@gmail.com>
  7 |  *)
  8 | 
  9 | "Linux kernel hardware memory model"
 10 | 
 11 | (*
 12 |  * File "lock.cat" handles locks and is experimental.
 13 |  * It can be replaced by include "cos.cat" for tests that do not use locks.
 14 |  *)
 15 | 
 16 | include "lock.cat"
 17 | 
 18 | (*******************)
 19 | (* Basic relations *)
 20 | (*******************)
 21 | 
 22 | (* Fences *)
 23 | let rb-dep = [R] ; fencerel(Rb_dep) ; [R]
 24 | let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R]
 25 | let wmb = [W] ; fencerel(Wmb) ; [W]
 26 | let mb = ([M] ; fencerel(Mb) ; [M]) |
 27 | 	([M] ; fencerel(Before_atomic) ; [RMW] ; po? ; [M]) |
 28 | 	([M] ; po? ; [RMW] ; fencerel(After_atomic) ; [M]) |
 29 | 	([M] ; po? ; [LKW] ; fencerel(After_spinlock) ; [M])
 30 | let gp = po ; [Sync] ; po?
 31 | 
 32 | let strong-fence = mb | gp
 33 | 
 34 | (* Release Acquire *)
 35 | let acq-po = [Acquire] ; po ; [M]
 36 | let po-rel = [M] ; po ; [Release]
 37 | let rfi-rel-acq = [Release] ; rfi ; [Acquire]
 38 | 
 39 | (**********************************)
 40 | (* Fundamental coherence ordering *)
 41 | (**********************************)
 42 | 
 43 | (* Sequential Consistency Per Variable *)
 44 | let com = rf | co | fr
 45 | acyclic po-loc | com as coherence
 46 | 
 47 | (* Atomic Read-Modify-Write *)
 48 | empty rmw & (fre ; coe) as atomic
 49 | 
 50 | (**********************************)
 51 | (* Instruction execution ordering *)
 52 | (**********************************)
 53 | 
 54 | (* Preserved Program Order *)
 55 | let dep = addr | data
 56 | let rwdep = (dep | ctrl) ; [W]
 57 | let addrpo = addr ; po ; [W]
 58 | let overwrite = co | fr
 59 | let to-w = rwdep | addrpo | (overwrite & int)
 60 | let rdw = po-loc & (fre ; rfe)
 61 | let detour = po-loc & (coe ; rfe)
 62 | let rrdep = addr | (dep ; rfi) | rdw
 63 | let strong-rrdep = rrdep+ & rb-dep
 64 | let to-r = strong-rrdep | rfi-rel-acq
 65 | let fence = strong-fence | wmb | po-rel | rmb | acq-po
 66 | let ppo = (rrdep* ; (to-r | to-w | fence)) | rdw | detour
 67 | 
 68 | (* Happens Before *)
 69 | let A-cumul(r) = rfe? ; r
 70 | let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
 71 | 
 72 | let rec prop = (overwrite & ext)? ; cumul-fence ; hb*
 73 |     and hb = ppo | rfe | ((hb* ; prop) & int)
 74 | acyclic hb as happens-before
 75 | 
 76 | (****************************************)
 77 | (* Write and fence propagation ordering *)
 78 | (****************************************)
 79 | 
 80 | (* Propagation *)
 81 | let prop2 = prop | ((overwrite & ext)? ; rfe?)
 82 | let pb = (prop2 ; strong-fence ; hb*) | ([W] ; prop ; [W])
 83 | acyclic pb as propagation
 84 | 
 85 | (*******)
 86 | (* RCU *)
 87 | (*******)
 88 | 
 89 | let rscs = po ; crit^-1 ; po?
 90 | let link = hb* ; pb* ; prop2
 91 | 
 92 | (* Chains that affect the RCU grace-period guarantee *)
 93 | let gp-link = gp ; link
 94 | let rscs-link = rscs ; link
 95 | 
 96 | let rec rcu-path =
 97 | 	gp-link |
 98 | 	(gp-link ; rscs-link) |
 99 | 	(rscs-link ; gp-link) |
100 | 	(rcu-path ; rcu-path) |
101 | 	(gp-link ; rcu-path ; rscs-link) |
102 | 	(rscs-link ; rcu-path ; gp-link)
103 | 
104 | irreflexive rcu-path as rcu
105 | 


--------------------------------------------------------------------------------
/Assignment_4/linux-kernel.bell:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: GPL-2.0+
 2 | (*
 3 |  * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
 4 |  * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
 5 |  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
 6 |  *                    Andrea Parri <parri.andrea@gmail.com>
 7 |  *
 8 |  * An earlier version of this file appears in the companion webpage for
 9 |  * "Frightening small children and disconcerting grown-ups: Concurrency
10 |  * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
11 |  * which is to appear in ASPLOS 2018.
12 |  *)
13 | 
14 | "Linux kernel memory model"
15 | 
16 | enum Accesses = 'once (*READ_ONCE,WRITE_ONCE,ACCESS_ONCE*) ||
17 | 		'release (*smp_store_release*) ||
18 | 		'acquire (*smp_load_acquire*) ||
19 | 		'noreturn (* R of non-return RMW *)
20 | instructions R[{'once,'acquire,'noreturn}]
21 | instructions W[{'once,'release}]
22 | instructions RMW[{'once,'acquire,'release}]
23 | 
24 | enum Barriers = 'wmb (*smp_wmb*) ||
25 | 		'rmb (*smp_rmb*) ||
26 | 		'mb (*smp_mb*) ||
27 | 		'rb_dep (*smp_read_barrier_depends*) ||
28 | 		'rcu_read_lock (*rcu_read_lock*)  ||
29 | 		'rcu_read_unlock (*rcu_read_unlock*) ||
30 | 		'sync (*synchronize_rcu*) ||
31 | 		'before_atomic (*smp_mb__before_atomic*) ||
32 | 		'after_atomic (*smp_mb__after_atomic*) ||
33 | 		'after_spinlock (*smp_mb__after_spinlock*)
34 | instructions F[Barriers]
35 | 
36 | (* Compute matching pairs of nested Rcu_read_lock and Rcu_read_unlock *)
37 | let matched = let rec
38 | 	    unmatched-locks = Rcu_read_lock \ domain(matched)
39 | 	and unmatched-unlocks = Rcu_read_unlock \ range(matched)
40 | 	and unmatched = unmatched-locks | unmatched-unlocks
41 | 	and unmatched-po = [unmatched] ; po ; [unmatched]
42 | 	and unmatched-locks-to-unlocks =
43 | 		[unmatched-locks] ; po ; [unmatched-unlocks]
44 | 	and matched = matched | (unmatched-locks-to-unlocks \
45 | 		(unmatched-po ; unmatched-po))
46 | 	in matched
47 | 
48 | (* Validate nesting *)
49 | flag ~empty Rcu_read_lock \ domain(matched) as unbalanced-rcu-locking
50 | flag ~empty Rcu_read_unlock \ range(matched) as unbalanced-rcu-locking
51 | 
52 | (* Outermost level of nesting only *)
53 | let crit = matched \ (po^-1 ; matched ; po^-1)
54 | 


--------------------------------------------------------------------------------
/Assignment_4/linux-kernel.cat:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: GPL-2.0+
  2 | (*
  3 |  * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
  4 |  * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
  5 |  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
  6 |  *                    Andrea Parri <parri.andrea@gmail.com>
  7 |  *
  8 |  * An earlier version of this file appears in the companion webpage for
  9 |  * "Frightening small children and disconcerting grown-ups: Concurrency
 10 |  * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
 11 |  * which is to appear in ASPLOS 2018.
 12 |  *)
 13 | 
 14 | "Linux kernel memory model"
 15 | 
 16 | (*
 17 |  * File "lock.cat" handles locks and is experimental.
 18 |  * It can be replaced by include "cos.cat" for tests that do not use locks.
 19 |  *)
 20 | 
 21 | (* include "lock.cat" *)
 22 | include "cos.cat"
 23 | 
 24 | (*******************)
 25 | (* Basic relations *)
 26 | (*******************)
 27 | 
 28 | (* Fences *)
 29 | let rb-dep = [R] ; fencerel(Rb_dep) ; [R]
 30 | let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]
 31 | let wmb = [W] ; fencerel(Wmb) ; [W]
 32 | let mb = ([M] ; fencerel(Mb) ; [M]) |
 33 | 	([M] ; fencerel(Before_atomic) ; [RMW] ; po? ; [M]) |
 34 | 	([M] ; po? ; [RMW] ; fencerel(After_atomic) ; [M]) |
 35 | 	([M] ; po? ; [LKW] ; fencerel(After_spinlock) ; [M])
 36 | let gp = po ; [Sync] ; po?
 37 | 
 38 | let strong-fence = mb | gp
 39 | 
 40 | (* Release Acquire *)
 41 | let acq-po = [Acquire] ; po ; [M]
 42 | let po-rel = [M] ; po ; [Release]
 43 | let rfi-rel-acq = [Release] ; rfi ; [Acquire]
 44 | 
 45 | (**********************************)
 46 | (* Fundamental coherence ordering *)
 47 | (**********************************)
 48 | 
 49 | (* Sequential Consistency Per Variable *)
 50 | let com = rf | co | fr
 51 | let cb = po-loc | com
 52 | acyclic cb as coherence
 53 | 
 54 | (* Atomic Read-Modify-Write *)
 55 | empty rmw & (fre ; coe) as atomic
 56 | 
 57 | (**********************************)
 58 | (* Instruction execution ordering *)
 59 | (**********************************)
 60 | 
 61 | (* Preserved Program Order *)
 62 | let dep = addr | data
 63 | let rwdep = (dep | ctrl) ; [W]
 64 | let overwrite = co | fr
 65 | let to-w = rwdep | (overwrite & int)
 66 | let rrdep = addr | (dep ; rfi)
 67 | let strong-rrdep = rrdep+ & rb-dep
 68 | let to-r = strong-rrdep | rfi-rel-acq
 69 | let fence = strong-fence | wmb | po-rel | rmb | acq-po
 70 | let ppo = rrdep* ; (to-r | to-w | fence)
 71 | 
 72 | (* Propagation: Ordering from release operations and strong fences. *)
 73 | let A-cumul(r) = rfe? ; r
 74 | let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
 75 | let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
 76 | let prop-irrefl = prop \ id
 77 | 
 78 | (*
 79 |  * Happens Before: Ordering from the passage of time.
 80 |  * No fences needed here for prop because relation confined to one process.
 81 |  *)
 82 | let prop-local = prop-irrefl & int
 83 | let hb = ppo | rfe | prop-local
 84 | acyclic hb as happens-before
 85 | 
 86 | (****************************************)
 87 | (* Write and fence propagation ordering *)
 88 | (****************************************)
 89 | 
 90 | (* Propagation: Each non-rf link needs a strong fence. *)
 91 | let pb = prop ; strong-fence ; hb*
 92 | acyclic pb as propagation
 93 | 
 94 | (*******)
 95 | (* RCU *)
 96 | (*******)
 97 | 
 98 | (*
 99 |  * Effect of read-side critical section proceeds from the rcu_read_lock()
100 |  * onward on the one hand and from the rcu_read_unlock() backwards on the
101 |  * other hand.
102 |  *)
103 | let rscs = po ; crit^-1 ; po?
104 | 
105 | (*
106 |  * The synchronize_rcu() strong fence is special in that it can order not
107 |  * one but two non-rf relations, but only in conjunction with an RCU
108 |  * read-side critical section.
109 |  *)
110 | let link = hb* ; pb* ; prop
111 | 
112 | (* Chains that affect the RCU grace-period guarantee *)
113 | let gp-link = gp ; link
114 | let rscs-link = rscs ; link
115 | 
116 | (*
117 |  * A cycle containing at least as many grace periods as RCU read-side
118 |  * critical sections is forbidden.
119 |  *)
120 | let rec rcu-path =
121 | 	gp-link |
122 | 	(gp-link ; rscs-link) |
123 | 	(rscs-link ; gp-link) |
124 | 	(rcu-path ; rcu-path) |
125 | 	(gp-link ; rcu-path ; rscs-link) |
126 | 	(rscs-link ; rcu-path ; gp-link)
127 | 
128 | irreflexive rcu-path as rcu
129 | 


--------------------------------------------------------------------------------
/Assignment_4/linux-kernel.cfg:
--------------------------------------------------------------------------------
 1 | macros linux-kernel.def
 2 | bell linux-kernel.bell
 3 | model linux-kernel.cat
 4 | graph columns
 5 | squished true
 6 | showevents noregs
 7 | movelabel true
 8 | fontsize 8
 9 | xscale 2.0
10 | yscale 1.5
11 | arrowsize 0.8
12 | showinitrf false
13 | showfinalrf false
14 | showinitwrites false
15 | splines spline
16 | pad 0.1
17 | edgeattr hb,color,indigo
18 | edgeattr co,color,blue
19 | edgeattr mb,color,darkgreen
20 | edgeattr wmb,color,darkgreen
21 | edgeattr rmb,color,darkgreen
22 | doshow ppo,data,addr,ctrl,co,cb,hb
23 | show prop
24 | 


--------------------------------------------------------------------------------
/Assignment_4/linux-kernel.def:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: GPL-2.0+
  2 | //
  3 | // An earlier version of this file appears in the companion webpage for
  4 | // "Frightening small children and disconcerting grown-ups: Concurrency
  5 | // in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
  6 | // which is to appear in ASPLOS 2018.
  7 | 
  8 | // ONCE
  9 | READ_ONCE(X) __load{once}(X)
 10 | WRITE_ONCE(X,V) { __store{once}(X,V); }
 11 | 
 12 | // Release Acquire and friends
 13 | smp_store_release(X,V) { __store{release}(*X,V); }
 14 | smp_load_acquire(X) __load{acquire}(*X)
 15 | rcu_assign_pointer(X,V) { __store{release}(X,V); }
 16 | lockless_dereference(X) __load{lderef}(X)
 17 | rcu_dereference(X) __load{deref}(X)
 18 | 
 19 | // Fences
 20 | smp_mb() { __fence{mb} ; }
 21 | smp_rmb() { __fence{rmb} ; }
 22 | smp_wmb() { __fence{wmb} ; }
 23 | smp_read_barrier_depends() { __fence{rb_dep}; }
 24 | smp_mb__before_atomic() { __fence{before_atomic} ; }
 25 | smp_mb__after_atomic() { __fence{after_atomic} ; }
 26 | smp_mb__after_spinlock() { __fence{after_spinlock} ; }
 27 | 
 28 | // Exchange
 29 | xchg(X,V)  __xchg{mb}(X,V)
 30 | xchg_relaxed(X,V) __xchg{once}(X,V)
 31 | xchg_release(X,V) __xchg{release}(X,V)
 32 | xchg_acquire(X,V) __xchg{acquire}(X,V)
 33 | cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
 34 | cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
 35 | cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
 36 | cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
 37 | 
 38 | // Spinlocks
 39 | spin_lock(X) { __lock(X) ; }
 40 | spin_unlock(X) { __unlock(X) ; }
 41 | spin_trylock(X) __trylock(X)
 42 | 
 43 | // RCU
 44 | rcu_read_lock() { __fence{rcu_read_lock}; }
 45 | rcu_read_unlock() { __fence{rcu_read_unlock};}
 46 | synchronize_rcu() { __fence{sync}; }
 47 | synchronize_rcu_expedited() { __fence{sync}; }
 48 | 
 49 | // Atomic
 50 | atomic_read(X) READ_ONCE(*X)
 51 | atomic_set(X,V) { WRITE_ONCE(*X,V) ; }
 52 | atomic_read_acquire(X) smp_load_acquire(X)
 53 | atomic_set_release(X,V) { smp_store_release(X,V); }
 54 | 
 55 | atomic_add(V,X) { __atomic_op(X,+,V) ; }
 56 | atomic_sub(V,X) { __atomic_op(X,-,V) ; }
 57 | atomic_inc(X)   { __atomic_op(X,+,1) ; }
 58 | atomic_dec(X)   { __atomic_op(X,-,1) ; }
 59 | 
 60 | atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
 61 | atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
 62 | atomic_add_return_acquire(V,X) __atomic_op_return{acquire}(X,+,V)
 63 | atomic_add_return_release(V,X) __atomic_op_return{release}(X,+,V)
 64 | atomic_fetch_add(V,X) __atomic_fetch_op{mb}(X,+,V)
 65 | atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{once}(X,+,V)
 66 | atomic_fetch_add_acquire(V,X) __atomic_fetch_op{acquire}(X,+,V)
 67 | atomic_fetch_add_release(V,X) __atomic_fetch_op{release}(X,+,V)
 68 | 
 69 | atomic_inc_return(X) __atomic_op_return{mb}(X,+,1)
 70 | atomic_inc_return_relaxed(X) __atomic_op_return{once}(X,+,1)
 71 | atomic_inc_return_acquire(X) __atomic_op_return{acquire}(X,+,1)
 72 | atomic_inc_return_release(X) __atomic_op_return{release}(X,+,1)
 73 | atomic_fetch_inc(X) __atomic_fetch_op{mb}(X,+,1)
 74 | atomic_fetch_inc_relaxed(X) __atomic_fetch_op{once}(X,+,1)
 75 | atomic_fetch_inc_acquire(X) __atomic_fetch_op{acquire}(X,+,1)
 76 | atomic_fetch_inc_release(X) __atomic_fetch_op{release}(X,+,1)
 77 | 
 78 | atomic_sub_return(V,X) __atomic_op_return{mb}(X,-,V)
 79 | atomic_sub_return_relaxed(V,X) __atomic_op_return{once}(X,-,V)
 80 | atomic_sub_return_acquire(V,X) __atomic_op_return{acquire}(X,-,V)
 81 | atomic_sub_return_release(V,X) __atomic_op_return{release}(X,-,V)
 82 | atomic_fetch_sub(V,X) __atomic_fetch_op{mb}(X,-,V)
 83 | atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{once}(X,-,V)
 84 | atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{acquire}(X,-,V)
 85 | atomic_fetch_sub_release(V,X) __atomic_fetch_op{release}(X,-,V)
 86 | 
 87 | atomic_dec_return(X) __atomic_op_return{mb}(X,-,1)
 88 | atomic_dec_return_relaxed(X) __atomic_op_return{once}(X,-,1)
 89 | atomic_dec_return_acquire(X) __atomic_op_return{acquire}(X,-,1)
 90 | atomic_dec_return_release(X) __atomic_op_return{release}(X,-,1)
 91 | atomic_fetch_dec(X) __atomic_fetch_op{mb}(X,-,1)
 92 | atomic_fetch_dec_relaxed(X) __atomic_fetch_op{once}(X,-,1)
 93 | atomic_fetch_dec_acquire(X) __atomic_fetch_op{acquire}(X,-,1)
 94 | atomic_fetch_dec_release(X) __atomic_fetch_op{release}(X,-,1)
 95 | 
 96 | atomic_xchg(X,V) __xchg{mb}(X,V)
 97 | atomic_xchg_relaxed(X,V) __xchg{once}(X,V)
 98 | atomic_xchg_release(X,V) __xchg{release}(X,V)
 99 | atomic_xchg_acquire(X,V) __xchg{acquire}(X,V)
100 | atomic_cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
101 | atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
102 | atomic_cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
103 | atomic_cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
104 | 
105 | atomic_sub_and_test(V,X) __atomic_op_return{mb}(X,-,V) == 0
106 | atomic_dec_and_test(X)  __atomic_op_return{mb}(X,-,1) == 0
107 | atomic_inc_and_test(X)  __atomic_op_return{mb}(X,+,1) == 0
108 | atomic_add_negative(V,X) __atomic_op_return{mb}(X,+,V) < 0
109 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/CO+wx+wx+rx-rx+rx-rx1.litmus:
--------------------------------------------------------------------------------
 1 | C CO+wx+wx+rx-rx+rx-rx1
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 | }
 9 | 
10 | P1(int *x)
11 | {
12 |   WRITE_ONCE(*x, 2);
13 | }
14 | 
15 | P2(int *x)
16 | {
17 |   int r0;
18 |   int r1;
19 | 
20 |   r0 = READ_ONCE(*x);
21 |   r1 = READ_ONCE(*x);
22 | }
23 | 
24 | P3(int *x)
25 | {
26 |   int r2;
27 |   int r3;
28 | 
29 |   r2 = READ_ONCE(*x);
30 |   r3 = READ_ONCE(*x);
31 | }
32 | 
33 | exists (2:r0=1 /\ 2:r1=2 /\ 3:r2=1 /\ 3:r3=2)
34 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/CO+wx+wx+rx-rx+rx-rx2.litmus:
--------------------------------------------------------------------------------
 1 | C CO+wx+wx+rx-rx+rx-rx2
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 | }
 9 | 
10 | P1(int *x)
11 | {
12 |   WRITE_ONCE(*x, 2);
13 | }
14 | 
15 | P2(int *x)
16 | {
17 |   int r0;
18 |   int r1;
19 | 
20 |   r0 = READ_ONCE(*x);
21 |   r1 = READ_ONCE(*x);
22 | }
23 | 
24 | P3(int *x)
25 | {
26 |   int r2;
27 |   int r3;
28 | 
29 |   r2 = READ_ONCE(*x);
30 |   r3 = READ_ONCE(*x);
31 | }
32 | 
33 | exists (2:r0=1 /\ 2:r1=2 /\ 3:r2=2 /\ 3:r3=1)
34 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/CO+wx+wx+rx-rx1.litmus:
--------------------------------------------------------------------------------
 1 | C CO+wx+wx+rx-rx1
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 | }
 9 | 
10 | P1(int *x)
11 | {
12 |   WRITE_ONCE(*x, 2);
13 | }
14 | 
15 | P2(int *x)
16 | {
17 |   int r0;
18 |   int r1;
19 | 
20 |   r0 = READ_ONCE(*x);
21 |   r1 = READ_ONCE(*x);
22 | }
23 | 
24 | exists (2:r0=2 /\ 2:r1=1)
25 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/CO+wx+wx+rx-rx2.litmus:
--------------------------------------------------------------------------------
 1 | C CO+wx+wx+rx-rx2
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 | }
 9 | 
10 | P1(int *x)
11 | {
12 |   WRITE_ONCE(*x, 2);
13 | }
14 | 
15 | P2(int *x)
16 | {
17 |   int r0;
18 |   int r1;
19 | 
20 |   r0 = READ_ONCE(*x);
21 |   r1 = READ_ONCE(*x);
22 | }
23 | 
24 | exists (2:r0=1 /\ 2:r1=2)
25 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/CO+wx-wx+rx-rx1.litmus:
--------------------------------------------------------------------------------
 1 | C CO+wx-wx+rx-rx1
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 |   WRITE_ONCE(*x, 2);
 9 | }
10 | 
11 | P1(int *x, int *y)
12 | {
13 |   int r0;
14 |   int r1;
15 | 
16 |   r0 = READ_ONCE(*x);
17 |   r1 = READ_ONCE(*x);
18 | }
19 | 
20 | exists (1:r0=1 /\ 1:r1=2)
21 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/CO+wx-wx+rx-rx2.litmus:
--------------------------------------------------------------------------------
 1 | C CO+wx-wx+rx-rx2
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 |   WRITE_ONCE(*x, 2);
 9 | }
10 | 
11 | P1(int *x, int *y)
12 | {
13 |   int r0;
14 |   int r1;
15 | 
16 |   r0 = READ_ONCE(*x);
17 |   r1 = READ_ONCE(*x);
18 | }
19 | 
20 | exists (1:r0=2 /\ 1:r1=1)
21 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/IRIWish+rx-ry+wx+wy+ry-rx1.litmus:
--------------------------------------------------------------------------------
 1 | C IRIWish+rx-ry+wx+wy+ry-rx1
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x, int *y)
 6 | {
 7 |   int r0;
 8 |   int r1;
 9 | 
10 |   r0 = READ_ONCE(*x);
11 |   smp_rmb();
12 |   r1 = READ_ONCE(*y);
13 | }
14 | 
15 | P1(int *x)
16 | {
17 |   WRITE_ONCE(*x, 1);
18 | }
19 | 
20 | P2(int *y)
21 | {
22 |   WRITE_ONCE(*y, 1);
23 | }
24 | 
25 | P3(int *x, int *y)
26 | {
27 |   int r2;
28 |   int r3;
29 | 
30 |   r2 = READ_ONCE(*y);
31 |   smp_rmb();
32 |   r3 = READ_ONCE(*x);
33 | }
34 | 
35 | exists (0:r0=1 /\ 0:r1=0 /\ 3:r2=1 /\ 3:r3=0)
36 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/MP+wx-wy+ry-rx1.litmus:
--------------------------------------------------------------------------------
 1 | C MP+wx-wy+ry-rx1
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x, int *y)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 |   WRITE_ONCE(*y, 1);
 9 | }
10 | 
11 | P1(int *x, int *y)
12 | {
13 |   int r0;
14 |   int r1;
15 | 
16 |   r0 = READ_ONCE(*y);
17 |   r1 = READ_ONCE(*x);
18 | }
19 | 
20 | exists (1:r0=0 /\ 1:r1=1)
21 | 
22 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/MP+wx-wy+ry-rx2.litmus:
--------------------------------------------------------------------------------
 1 | C MP+wx-wy+ry-rx2
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x, int *y)
 6 | {
 7 |   WRITE_ONCE(*x, 1);
 8 |   WRITE_ONCE(*y, 1);
 9 | }
10 | 
11 | P1(int *x, int *y)
12 | {
13 |   int r0;
14 |   int r1;
15 | 
16 |   r0 = READ_ONCE(*y);
17 |   r1 = READ_ONCE(*x);
18 | }
19 | 
20 | exists (1:r0=1 /\ 1:r1=0)
21 | 


--------------------------------------------------------------------------------
/Assignment_4/litmus-tests/MP+wy-wx+rx-ry+ry-rx1.litmus:
--------------------------------------------------------------------------------
 1 | C MP+wy-wx+rx-ry+ry-rx1
 2 | 
 3 | {}
 4 | 
 5 | P0(int *x, int *y)
 6 | {
 7 |   WRITE_ONCE(*y, 1);
 8 |   WRITE_ONCE(*x, 1);
 9 | }
10 | 
11 | P1(int *x, int *y)
12 | {
13 |   int r0;
14 |   int r1;
15 | 
16 |   r0 = READ_ONCE(*x);
17 |   smp_rmb();
18 |   r1 = READ_ONCE(*y);
19 | }
20 | 
21 | P2(int *x, int *y)
22 | {
23 |   int r2;
24 |   int r3;
25 | 
26 |   r2 = READ_ONCE(*y);
27 |   smp_rmb();
28 |   r3 = READ_ONCE(*x);
29 | }
30 | 
31 | exists (1:r0=1 /\ 1:r1=0 /\ 2:r2=1 /\ 2:r3=0)
32 | 


--------------------------------------------------------------------------------
/Assignment_4/lock.cat:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: GPL-2.0+
  2 | (*
  3 |  * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
  4 |  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>
  5 |  *)
  6 | 
  7 | (* Generate coherence orders and handle lock operations *)
  8 | 
  9 | include "cross.cat"
 10 | 
 11 | (* From lock reads to their partner lock writes *)
 12 | let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
 13 | let rmw = rmw | lk-rmw
 14 | 
 15 | (*
 16 |  * A paired LKR must always see an unlocked value; spin_lock() calls nested
 17 |  * inside a critical section (for the same lock) always deadlock.
 18 |  *)
 19 | empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc)
 20 | 	as lock-nest
 21 | 
 22 | (* The litmus test is invalid if an LKW event is not part of an RMW pair *)
 23 | flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
 24 | 
 25 | (* This will be allowed if we implement spin_is_locked() *)
 26 | flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
 27 | 
 28 | (* There should be no R or W accesses to spinlocks *)
 29 | let ALL-LOCKS = LKR | LKW | UL | LF
 30 | flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
 31 | 
 32 | (* The final value of a spinlock should not be tested *)
 33 | flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final
 34 | 
 35 | 
 36 | (*
 37 |  * Put lock operations in their appropriate classes, but leave UL out of W
 38 |  * until after the co relation has been generated.
 39 |  *)
 40 | let R = R | LKR | LF
 41 | let W = W | LKW
 42 | 
 43 | let Release = Release | UL
 44 | let Acquire = Acquire | LKR
 45 | 
 46 | 
 47 | (* Match LKW events to their corresponding UL events *)
 48 | let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc)
 49 | 
 50 | flag ~empty UL \ range(critical) as unmatched-unlock
 51 | 
 52 | (* Allow up to one unmatched LKW per location; more must deadlock *)
 53 | let UNMATCHED-LKW = LKW \ domain(critical)
 54 | empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks
 55 | 
 56 | 
 57 | (* rfi for LF events: link each LKW to the LF events in its critical section *)
 58 | let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc)
 59 | 
 60 | (* rfe for LF events *)
 61 | let all-possible-rfe-lf =
 62 |   (*
 63 |    * Given an LF event r, compute the possible rfe edges for that event
 64 |    * (all those starting from LKW events in other threads),
 65 |    * and then convert that relation to a set of single-edge relations.
 66 |    *)
 67 |   let possible-rfe-lf r =
 68 |     let pair-to-relation p = p ++ 0
 69 |     in map pair-to-relation ((LKW * {r}) & loc & ext)
 70 |   (* Do this for each LF event r that isn't in rfi-lf *)
 71 |   in map possible-rfe-lf (LF \ range(rfi-lf))
 72 | 
 73 | (* Generate all rf relations for LF events *)
 74 | with rfe-lf from cross(all-possible-rfe-lf)
 75 | let rf = rf | rfi-lf | rfe-lf
 76 | 
 77 | 
 78 | (* Generate all co relations, including LKW events but not UL *)
 79 | let co0 = co0 | ([IW] ; loc ; [LKW]) |
 80 | 	(([LKW] ; loc ; [UNMATCHED-LKW]) \ [UNMATCHED-LKW])
 81 | include "cos-opt.cat"
 82 | let W = W | UL
 83 | let M = R | W
 84 | 
 85 | (* Merge UL events into co *)
 86 | let co = (co | critical | (critical^-1 ; co))+
 87 | let coe = co & ext
 88 | let coi = co & int
 89 | 
 90 | (* Merge LKR events into rf *)
 91 | let rf = rf | ([IW | UL] ; singlestep(co) ; lk-rmw^-1)
 92 | let rfe = rf & ext
 93 | let rfi = rf & int
 94 | 
 95 | let fr = rf^-1 ; co
 96 | let fre = fr & ext
 97 | let fri = fr & int
 98 | 
 99 | show co,rf,fr
100 | 


--------------------------------------------------------------------------------
/Assignment_4/memory_model_README:
--------------------------------------------------------------------------------
  1 | 			=========================
  2 | 			LINUX KERNEL MEMORY MODEL
  3 | 			=========================
  4 | 
  5 | ============
  6 | INTRODUCTION
  7 | ============
  8 | 
  9 | This directory contains the memory model of the Linux kernel, written
 10 | in the "cat" language and executable by the (externally provided)
 11 | "herd7" simulator, which exhaustively explores the state space of
 12 | small litmus tests.
 13 | 
 14 | In addition, the "klitmus7" tool (also externally provided) may be used
 15 | to convert a litmus test to a Linux kernel module, which in turn allows
 16 | that litmus test to be exercised within the Linux kernel.
 17 | 
 18 | 
 19 | ============
 20 | REQUIREMENTS
 21 | ============
 22 | 
 23 | The "herd7" and "klitmus7" tools must be downloaded separately:
 24 | 
 25 |   https://github.com/herd/herdtools7
 26 | 
 27 | See "herdtools7/INSTALL.md" for installation instructions.
 28 | 
 29 | Alternatively, Abhishek Bhardwaj has kindly provided a Docker image
 30 | of these tools at "abhishek40/memory-model".  Abhishek suggests the
 31 | following commands to install and use this image:
 32 | 
 33 |   - Users should install Docker for their distribution.
 34 |   - docker run -itd abhishek40/memory-model
 35 |   - docker attach <id-emitted-from-the-previous-command>
 36 | 
 37 | Gentoo users might wish to make use of Patrick McLean's package:
 38 | 
 39 |   https://gitweb.gentoo.org/repo/gentoo.git/tree/dev-util/herdtools7
 40 | 
 41 | These packages may not be up-to-date with respect to the GitHub
 42 | repository.
 43 | 
 44 | 
 45 | ==================
 46 | BASIC USAGE: HERD7
 47 | ==================
 48 | 
 49 | The memory model is used, in conjunction with "herd7", to exhaustively
 50 | explore the state space of small litmus tests.
 51 | 
 52 | For example, to run SB+mbonceonces.litmus against the memory model:
 53 | 
 54 |   $ herd7 -conf linux-kernel.cfg litmus-tests/SB+mbonceonces.litmus
 55 | 
 56 | Here is the corresponding output:
 57 | 
 58 |   Test SB+mbonceonces Allowed
 59 |   States 3
 60 |   0:r0=0; 1:r0=1;
 61 |   0:r0=1; 1:r0=0;
 62 |   0:r0=1; 1:r0=1;
 63 |   No
 64 |   Witnesses
 65 |   Positive: 0 Negative: 3
 66 |   Condition exists (0:r0=0 /\ 1:r0=0)
 67 |   Observation SB+mbonceonces Never 0 3
 68 |   Time SB+mbonceonces 0.01
 69 |   Hash=d66d99523e2cac6b06e66f4c995ebb48
 70 | 
 71 | The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
 72 | this litmus test's "exists" clause can not be satisfied.
 73 | 
 74 | See "herd7 -help" or "herdtools7/doc/" for more information.
 75 | 
 76 | 
 77 | =====================
 78 | BASIC USAGE: KLITMUS7
 79 | =====================
 80 | 
 81 | The "klitmus7" tool converts a litmus test into a Linux kernel module,
 82 | which may then be loaded and run.
 83 | 
 84 | For example, to run SB+mbonceonces.litmus against hardware:
 85 | 
 86 |   $ mkdir mymodules
 87 |   $ klitmus7 -o mymodules litmus-tests/SB+mbonceonces.litmus
 88 |   $ cd mymodules ; make
 89 |   $ sudo sh run.sh
 90 | 
 91 | The corresponding output includes:
 92 | 
 93 |   Test SB+mbonceonces Allowed
 94 |   Histogram (3 states)
 95 |   644580  :>0:r0=1; 1:r0=0;
 96 |   644328  :>0:r0=0; 1:r0=1;
 97 |   711092  :>0:r0=1; 1:r0=1;
 98 |   No
 99 |   Witnesses
100 |   Positive: 0, Negative: 2000000
101 |   Condition exists (0:r0=0 /\ 1:r0=0) is NOT validated
102 |   Hash=d66d99523e2cac6b06e66f4c995ebb48
103 |   Observation SB+mbonceonces Never 0 2000000
104 |   Time SB+mbonceonces 0.16
105 | 
106 | The "Positive: 0 Negative: 2000000" and the "Never 0 2000000" indicate
107 | that during two million trials, the state specified in this litmus
108 | test's "exists" clause was not reached.
109 | 
110 | And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
111 | for more information.
112 | 
113 | 
114 | ====================
115 | DESCRIPTION OF FILES
116 | ====================
117 | 
118 | Documentation/cheatsheet.txt
119 | 	Quick-reference guide to the Linux-kernel memory model.
120 | 	
121 | Documentation/explanation.txt
122 | 	Describes the memory model in detail.
123 | 	
124 | Documentation/recipes.txt
125 | 	Lists common memory-ordering patterns.
126 | 	
127 | Documentation/references.txt
128 | 	Provides background reading.
129 | 
130 | linux-kernel.bell
131 | 	Categorizes the relevant instructions, including memory
132 | 	references, memory barriers, atomic read-modify-write operations,
133 | 	lock acquisition/release, and RCU operations.
134 | 
135 | 	More formally, this file (1) lists the subtypes of the various
136 | 	event types used by the memory model and (2) performs RCU
137 | 	read-side critical section nesting analysis.
138 | 
139 | linux-kernel.cat
140 | 	Specifies what reorderings are forbidden by memory references,
141 | 	memory barriers, atomic read-modify-write operations, and RCU.
142 | 
143 | 	More formally, this file specifies what executions are forbidden
144 | 	by the memory model.  Allowed executions are those which
145 | 	satisfy the model's "coherence", "atomic", "happens-before",
146 | 	"propagation", and "rcu" axioms, which are defined in the file.
147 | 
148 | linux-kernel.cfg
149 | 	Convenience file that gathers the common-case herd7 command-line
150 | 	arguments.
151 | 
152 | linux-kernel.def
153 | 	Maps from C-like syntax to herd7's internal litmus-test
154 | 	instruction-set architecture.
155 | 
156 | litmus-tests
157 | 	Directory containing a few representative litmus tests, which
158 | 	are listed in litmus-tests/README.  A great deal more litmus
159 | 	tests are available at https://github.com/paulmckrcu/litmus.
160 | 
161 | lock.cat
162 | 	Provides a front-end analysis of lock acquisition and release,
163 | 	for example, associating a lock acquisition with the preceding
164 | 	and following releases and checking for self-deadlock.
165 | 
166 | 	More formally, this file defines a performance-enhanced scheme
167 | 	for generation of the possible reads-from and coherence order
168 | 	relations on the locking primitives.
169 | 
170 | README
171 | 	This file.
172 | 
173 | 
174 | ===========
175 | LIMITATIONS
176 | ===========
177 | 
178 | The Linux-kernel memory model has the following limitations:
179 | 
180 | 1.	Compiler optimizations are not modeled.  Of course, the use
181 | 	of READ_ONCE() and WRITE_ONCE() limits the compiler's ability
182 | 	to optimize, but there is Linux-kernel code that uses bare C
183 | 	memory accesses.  Handling this code is on the to-do list.
184 | 	For more information, see Documentation/explanation.txt (in
185 | 	particular, the "THE PROGRAM ORDER RELATION: po AND po-loc"
186 | 	and "A WARNING" sections).
187 | 
188 | 2.	Multiple access sizes for a single variable are not supported,
189 | 	and neither are misaligned or partially overlapping accesses.
190 | 
191 | 3.	Exceptions and interrupts are not modeled.  In some cases,
192 | 	this limitation can be overcome by modeling the interrupt or
193 | 	exception with an additional process.
194 | 
195 | 4.	I/O such as MMIO or DMA is not supported.
196 | 
197 | 5.	Self-modifying code (such as that found in the kernel's
198 | 	alternatives mechanism, function tracer, Berkeley Packet Filter
199 | 	JIT compiler, and module loader) is not supported.
200 | 
201 | 6.	Complete modeling of all variants of atomic read-modify-write
202 | 	operations, locking primitives, and RCU is not provided.
203 | 	For example, call_rcu() and rcu_barrier() are not supported.
204 | 	However, a substantial amount of support is provided for these
205 | 	operations, as shown in the linux-kernel.def file.
206 | 
207 | The "herd7" tool has some additional limitations of its own, apart from
208 | the memory model:
209 | 
210 | 1.	Non-trivial data structures such as arrays or structures are
211 | 	not supported.	However, pointers are supported, allowing trivial
212 | 	linked lists to be constructed.
213 | 
214 | 2.	Dynamic memory allocation is not supported, although this can
215 | 	be worked around in some cases by supplying multiple statically
216 | 	allocated variables.
217 | 
218 | Some of these limitations may be overcome in the future, but others are
219 | more likely to be addressed by incorporating the Linux-kernel memory model
220 | into other tools.
221 | 


--------------------------------------------------------------------------------
/answers_data/20180205_214850.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/20180205_214850.jpg


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_00accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_00accesses.pdf


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_08accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_08accesses.pdf


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_800accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_800accesses.pdf


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_80accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_01-36-40.csv_ticks_80accesses.pdf


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_00accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_00accesses.pdf


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_08accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_08accesses.pdf


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_800accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_800accesses.pdf


--------------------------------------------------------------------------------
/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_80accesses.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anthezium/CS510-Advanced-Topics-in-Concurrency/9b9b570d42e8abb89cd2f2329a593e59cff48000/answers_data/bench_t24_2018-02-05_04-33-36.csv_ticks_80accesses.pdf


--------------------------------------------------------------------------------
/git-tutorial.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="chrome=1">
 6 |     <title>CS510 Advanced Topics in Concurrency, Winter 2018</title>
 7 | 
 8 |     <link rel="stylesheet" href="stylesheets/styles.css">
 9 |     <link rel="stylesheet" href="stylesheets/github-light.css">
10 |     <meta name="viewport" content="width=device-width">
11 |     <!--[if lt IE 9]>
12 |     <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
13 |     <![endif]-->
14 |   </head>
15 |   <body>
16 |     <div class="wrapper">
17 |       <header>
18 |         <h1>CS510 Advanced Topics in Concurrency</h1>
19 |         <p>Portland State University, Winter 2018</p>
20 | 
21 |         <p class="view"><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency">View the Project on GitHub <small>anthezium/CS510-Advanced-Topics-in-Concurrency</small></a></p>
22 |       </header>
23 |       <section>
24 |         <h1>Git and Github Tutorial</h1>
25 |         <p><strong>Note:</strong> Using Git and GitHub is completely optional. Your grade <strong>will not</strong> suffer in any way if you decide not to use it. If you choose not to use Git and/or GitHub for the CS510 project feel free to get all the files by clicking on the download zip or tarball instructions on the left. You should be able to click on one of those links to download the files to your computer and then you can upload the files you need to the babbage server in the normal way. If you need help with that, come to office hours or make an appointment and we'll tackle it one-on-one.</p>
26 |         <p>Git is a version control system. It allows you to save a complete history of any files and a complete history of all changes you've made to those files. Handy! We won't be using a ton of the features of Git, but there is one feature that you might find useful.</p>
27 |         <p>First things first, you need to have git installed on your machine. If you log into your babbage account, you can type <code>git --version</code> and you should see something like <code>git version 2.9.3</code> as a result. Great news! Git, the software package, is already installed on babbage. If you're trying to install Git on your personal computer, visit <a href="https://help.github.com/articles/set-up-git/">GitHub's help pages</a></p>
28 |         <p>Once git is installed on your preferred machine, there is only one step required to get all the files in the repository (which is git-speak for the code in the project under consideration). If, for instance, you're logged into babbage, simply go to your home dir <code>cd</code> and then type <code>git clone https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency.git</code>. This will put all the files into a new directory called CS510-Advanced-Topics-in-Concurrency. Type <code>git clone --help</code> for other options and more information.</p>
29 |         <p>The next thing you'll want to do is to create a branch where you can do your work. How your branches are set up is up to you. Maybe you want to have a single branch where you will put all of your work for all of your assignments. Maybe you'll want a branch for each assignment. In either case, first make sure you are on the master branch and that you have the latest version of the code. Go to the CS510-Advanced-Topics-in-Concurrency folder (or whatever you've named that) and type <code>git checkout master</code> and then <code>git pull</code>. This will put you on the master branch and will pull in any changes that I've made to the master branch since you last checked. Then to create a new branch type <code>git checkout -b my-work</code>. Feel free to change "my-work" to any name you'd like. <strong>NOTE:</strong> Please do not create a new remote branch or make any pull requests since this will make all of your work visible to everyone.</p>
30 |         <p>In order to make things a bit easier, I'll have you submit your work via email as per the directions on the assignment pages. So you won't have to concern yourself with other features of git for CS510. I'm happy to help with some git-related questions if I have some extra time, though.</p>
31 |       </section>
32 |       <footer>
33 |         <p><strong>CS510 TA: <a href="mailto:theod@pdx.edu">Ted Cooper</a></strong></br>
34 |         I will try to answer emails within 24 hours.</br>
35 |         <strong>Office Hours:</strong> TBD, and by appointment</br>
36 |         <strong>Please make sure you've signed up for the <a href="https://mailhost.cecs.pdx.edu/mailman/listinfo/cs510walpole">class mailing list</a></strong>
37 |       </footer>
38 |     </div>
39 |     <script src="javascripts/scale.fix.js"></script>
40 |   </body>
41 | </html>
42 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="chrome=1">
 6 |     <title>CS510 Advanced Topics in Concurrency, Winter 2018</title>
 7 | 
 8 |     <link rel="stylesheet" href="stylesheets/styles.css">
 9 |     <link rel="stylesheet" href="stylesheets/github-light.css">
10 |     <meta name="viewport" content="width=device-width">
11 |     <!--[if lt IE 9]>
12 |     <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
13 |     <![endif]-->
14 |   </head>
15 |   <body>
16 |     <div class="wrapper">
17 |       <header>
18 |         <h1>CS510 Advanced Topics in Concurrency</h1>
19 |         <p>Portland State University, Winter 2018</p>
20 | 
21 |         <p class="view"><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency">View the Project on GitHub <small>anthezium/CS510-Advanced-Topics-in-Concurrency</small></a></p>
22 |       </header>
23 |       <section>
24 |         <h1>
25 | <a id="cs510-course-project" class="anchor" href="#cs510-course-project" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>CS510 Course Project</h1>
26 | 
27 | <p>Throughout the term, you will be developing true concurrent code, ranging from synchronization primitives to concurrent operations on data structures.</p>
28 | 
29 | <p>Because many of the mechanisms we will be using are platform-dependent, it is important that everyone is using the same environment. To that end, we ask that students work remotely on the CS department's general-purpose Linux platform, <code>babbage.cs.pdx.edu</code>. Babbage is a 30-core x86-64 virtual server running Ubuntu Linux with a variety of useful software pre-installed.  It is hosted on a 20-core physical server with Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz CPUs with hyperthreading enabled, providing a total of 40 hyperthreaded cores.  Gnat also hosts tanto, babbage, and wakizashi, so we need to be mindful citizens of a shared computing environment and always leave some cores unused for the OS and other users.</p>
30 | 
31 | <p>To connect to Babbage, you will need an SSH client (I suggest using the terminal or <a href="https://iterm2.com">iTerm2</a> for macOS, <a href="https://mosh.mit.edu/">mosh</a> for Linux/Cygwin and <a href="http://www.chiark.greenend.org.uk/%7Esgtatham/putty/">PuTTY</a> for Windows), and an MCECS resource account, which is separate from your ODIN account and is maintained by <a href="http://cat.pdx.edu/">the CAT</a>. If you do not have an MCECS account, please visit the <a href="https://cat.pdx.edu/users/cs-tutors/">CS Tutors</a> in FAB 88.</p>
32 | 
33 | <p>All code will be written in the C programming language, possibly with a small portion in GNU Assembler. We assume you have some knowledge of C; if you need to brush up, we recommend <a href="http://en.wikipedia.org/wiki/The_C_Programming_Language">The C Programming Language</a> by Kernighan and Ritchie. A copy of this book is available from the CS Tutors for checkout while you are in the CS lounge (FAB 88).</p>
34 | 
35 | <p>This project also assumes some knowledge of working on the Unix/Linux command-line, especially the use of the GNU Compiler Collection (gcc), GNU Make (make), and the GNU Project Debugger (gdb). If you need a refresher, consider attending the workshops offered by the CS Tutors near the beginning of the term. If you need additional assistance, contact <a href="mailto:theod@pdx.edu">the TA (Ted)</a> or the <a href="https://mailhost.cecs.pdx.edu/mailman/listinfo/cs510walpole">course mailing list</a>.</p>
36 | 
37 | <h2>
38 | <a id="assignments" class="anchor" href="#assignments" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Assignments</h2>
39 | 
40 | <p>The project is divided into the following assignments, roughly increasing in complexity. Assignments are due a week after relevant subject matter is introduced in class. To complete each project, it is highly recommended that you start on them as early as possible!</p>
41 | 
42 | <ul>
43 | <li><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency/tree/master/Assignment_1">Basic Spinlocks</a>: <strong>Starts January 9, due January 16</strong>
44 | </li>
45 | <li><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency/tree/master/Assignment_2">FIFO Spinlocks</a>: <strong>Starts January 16, due January 23</strong>
46 | </li>
47 | <li><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency/tree/master/Assignment_3">Concurrent Queues</a>: <strong>Starts January 29, due February 6 at 4:19pm PST</strong>
48 | </li>
49 | <li><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency/tree/master/Assignment_4">The Linux-Kernel Memory Model</a>: <strong>Starts February 26, due March 4 at 4:19pm PST</strong>
50 | </li>
51 | <li><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency/tree/master/Assignment_5">RCU Linked List</a>: <strong>Dates TBD</strong>
52 | </li>
53 | <li><a href="https://github.com/anthezium/CS510-Advanced-Topics-in-Concurrency/tree/master/Assignment_6">(Optional) New Relativistic Data Structure</a>: <strong>Dates TBD</strong>
54 | </li>
55 | </ul>
56 | 
57 | <h2>
58 | <a id="grading" class="anchor" href="#grading" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Grading</h2>
59 | 
60 | <p>The main goal of this project is to give you hands-on experience with the implementation and performance characteristics of some of the ideas and mechanisms we will be encountering throughout the course.</p>
61 | 
62 | <p>With that in mind, we will use the project primarily as an educational tool, rather than an assessment tool. Your project assignments will not be graded, hence they will not contribute directly to your overall grade. However, the midterm and final exams will contain questions that relate to the project assignments, in addition to those that test understanding of the concepts covered in the lectures and course reading material. Hence, it is important that you complete the project assignments and learn from them. They should not be considered optional!</p>
63 |       </section>
64 |       <footer>
65 |         <p><strong>NEW!</strong> For an intro to git, go <a href="https://anthezium.github.io/CS510-Advanced-Topics-in-Concurrency/git-tutorial.html">here</a></br></p>
66 |         <p><strong>CS510 TA: <a href="mailto:theod@pdx.edu">Ted Cooper</a></strong></br>
67 |         I will try to answer emails within 24 hours.</br>
68 |         <strong>Office Hours:</strong> 12:30-13:30 on Tuesdays and 3:20-4:20 on Thursdays in the fishbowl, and by appointment</br>
69 |         <strong>Please make sure you've signed up for the <a href="https://mailhost.cecs.pdx.edu/mailman/listinfo/cs510walpole">class mailing list</a></strong>
70 |       </footer>
71 |     </div>
72 |     <script src="javascripts/scale.fix.js"></script>
73 | 
74 |   </body>
75 | </html>
76 | 


--------------------------------------------------------------------------------
/javascripts/scale.fix.js:
--------------------------------------------------------------------------------
 1 | var metas = document.getElementsByTagName('meta');
 2 | var i;
 3 | if (navigator.userAgent.match(/iPhone/i)) {
 4 |   for (i=0; i<metas.length; i++) {
 5 |     if (metas[i].name == "viewport") {
 6 |       metas[i].content = "width=device-width, minimum-scale=1.0, maximum-scale=1.0";
 7 |     }
 8 |   }
 9 |   document.addEventListener("gesturestart", gestureStart, false);
10 | }
11 | function gestureStart() {
12 |   for (i=0; i<metas.length; i++) {
13 |     if (metas[i].name == "viewport") {
14 |       metas[i].content = "width=device-width, minimum-scale=0.25, maximum-scale=1.6";
15 |     }
16 |   }
17 | }


--------------------------------------------------------------------------------
/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "CS533-Operating-Systems",
3 |   "tagline": "Project files for CS533 Concepts of Operating Systems at Portland State University",
4 |   "body": "# CS533 Course Project\r\n\r\nThroughout the term, you will be developing aspects of a user-level threads package, including context-switching, scheduling, and synchronization.\r\n\r\nBecause many of the mechanisms we will be using are platform-dependent, it is important that everyone is using the same environment. To that end, we ask that students work remotely on the CS department's general-purpose Linux platform, `ada.cs.pdx.edu`. Ada is a 30-core x86-64 server running Ubuntu Linux with a variety of useful software pre-installed.\r\n\r\nTo connect to Ada, you will need an SSH client (I suggest using the terminal or [iTerm2](https://iterm2.com) for macOS, [mosh](https://mosh.mit.edu/) for Linux/Cygwin and [PuTTY](http://www.chiark.greenend.org.uk/~sgtatham/putty/) for Windows), and an MCECS resource account, which is separate from your ODIN account and is maintained by [the CAT](http://cat.pdx.edu/). If you do not have an MCECS account, please visit the [CS Tutors](http://cat.pdx.edu/tutors.html) in FAB 88.\r\n\r\nAll code will be written in the C programming language, along with a small portion in GNU Assembler for the first assignment. We assume you have some knowledge of C; if you need to brush up, we recommend [The C Programming Language](http://en.wikipedia.org/wiki/The_C_Programming_Language) by Kernighan and Ritchie. A copy of this book is available from the CS Tutors for checkout while you are in the CS lounge (FAB 88). Knowledge of GNU Assembler syntax is helpful, but not required; some tips on the assembly required are provided [here](http://web.cecs.pdx.edu/~walpole/class/cs533/fall2016/project/project/assign1/asm_hints.md).\r\n\r\nThis project also assumes some knowledge of working on the Unix/Linux command-line, especially the use of the GNU Compiler Collection (gcc). If you need a refresher, consider attending the workshops offered by the CS Tutors near the beginning of the term, and/or browsing their [tutorials page](http://cat.pdx.edu/tutors/tutorials.html). If you need additional assistance, contact [the TA](http://web.cecs.pdx.edu/~walpole/class/cs533/fall2016/project/) or the [course mailing list](https://mailhost.cecs.pdx.edu/mailman/listinfo/cs533).\r\n\r\n## Assignments\r\n\r\nThe project is divided into five assignments, each increasing in complexity. Assignments are due every two weeks. To complete each project, it is highly recommended that you start on them as early as possible!\r\n\r\n* Context Switching: **Due October 7**\r\n* Round-robin Scheduling: **Due October 21**\r\n* Asynchronous I/O: **Due November 4**\r\n* Synchronization: **Due November 18**\r\n* M:N Threading: **Due December 2**\r\n\r\n## Grading\r\n\r\nThe main goal of this project is to give you hands-on experience with the implementation of some of the complex concepts we will be encountering throughout the course.\r\n\r\nWith that in mind, we will use the project primarily as an educational tool, rather than an assessment tool. Your project assignments will not be graded, hence they will not contribute directly to your overall grade. However, the midterm and final exams will contain questions that relate to the project assignments, in addition to those that test understanding of the concepts covered in the lectures and course reading material. Hence, it is important that you complete the project assignments and learn from them. They should not be considered optional!",
5 |   "note": "Don't delete this file! It's used internally to help with page regeneration."
6 | }
7 | 


--------------------------------------------------------------------------------
/stylesheets/github-light.css:
--------------------------------------------------------------------------------
  1 | /*
  2 | The MIT License (MIT)
  3 | 
  4 | Copyright (c) 2016 GitHub, Inc.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | 
 24 | */
 25 | 
 26 | .pl-c /* comment */ {
 27 |   color: #969896;
 28 | }
 29 | 
 30 | .pl-c1 /* constant, variable.other.constant, support, meta.property-name, support.constant, support.variable, meta.module-reference, markup.raw, meta.diff.header */,
 31 | .pl-s .pl-v /* string variable */ {
 32 |   color: #0086b3;
 33 | }
 34 | 
 35 | .pl-e /* entity */,
 36 | .pl-en /* entity.name */ {
 37 |   color: #795da3;
 38 | }
 39 | 
 40 | .pl-smi /* variable.parameter.function, storage.modifier.package, storage.modifier.import, storage.type.java, variable.other */,
 41 | .pl-s .pl-s1 /* string source */ {
 42 |   color: #333;
 43 | }
 44 | 
 45 | .pl-ent /* entity.name.tag */ {
 46 |   color: #63a35c;
 47 | }
 48 | 
 49 | .pl-k /* keyword, storage, storage.type */ {
 50 |   color: #a71d5d;
 51 | }
 52 | 
 53 | .pl-s /* string */,
 54 | .pl-pds /* punctuation.definition.string, string.regexp.character-class */,
 55 | .pl-s .pl-pse .pl-s1 /* string punctuation.section.embedded source */,
 56 | .pl-sr /* string.regexp */,
 57 | .pl-sr .pl-cce /* string.regexp constant.character.escape */,
 58 | .pl-sr .pl-sre /* string.regexp source.ruby.embedded */,
 59 | .pl-sr .pl-sra /* string.regexp string.regexp.arbitrary-repitition */ {
 60 |   color: #183691;
 61 | }
 62 | 
 63 | .pl-v /* variable */ {
 64 |   color: #ed6a43;
 65 | }
 66 | 
 67 | .pl-id /* invalid.deprecated */ {
 68 |   color: #b52a1d;
 69 | }
 70 | 
 71 | .pl-ii /* invalid.illegal */ {
 72 |   color: #f8f8f8;
 73 |   background-color: #b52a1d;
 74 | }
 75 | 
 76 | .pl-sr .pl-cce /* string.regexp constant.character.escape */ {
 77 |   font-weight: bold;
 78 |   color: #63a35c;
 79 | }
 80 | 
 81 | .pl-ml /* markup.list */ {
 82 |   color: #693a17;
 83 | }
 84 | 
 85 | .pl-mh /* markup.heading */,
 86 | .pl-mh .pl-en /* markup.heading entity.name */,
 87 | .pl-ms /* meta.separator */ {
 88 |   font-weight: bold;
 89 |   color: #1d3e81;
 90 | }
 91 | 
 92 | .pl-mq /* markup.quote */ {
 93 |   color: #008080;
 94 | }
 95 | 
 96 | .pl-mi /* markup.italic */ {
 97 |   font-style: italic;
 98 |   color: #333;
 99 | }
100 | 
101 | .pl-mb /* markup.bold */ {
102 |   font-weight: bold;
103 |   color: #333;
104 | }
105 | 
106 | .pl-md /* markup.deleted, meta.diff.header.from-file */ {
107 |   color: #bd2c00;
108 |   background-color: #ffecec;
109 | }
110 | 
111 | .pl-mi1 /* markup.inserted, meta.diff.header.to-file */ {
112 |   color: #55a532;
113 |   background-color: #eaffea;
114 | }
115 | 
116 | .pl-mdr /* meta.diff.range */ {
117 |   font-weight: bold;
118 |   color: #795da3;
119 | }
120 | 
121 | .pl-mo /* meta.output */ {
122 |   color: #1d3e81;
123 | }
124 | 
125 | 


--------------------------------------------------------------------------------
/stylesheets/styles.css:
--------------------------------------------------------------------------------
  1 | @font-face {
  2 |   font-family: 'Noto Sans';
  3 |   font-weight: 400;
  4 |   font-style: normal;
  5 |   src: url('../fonts/Noto-Sans-regular/Noto-Sans-regular.eot');
  6 |   src: url('../fonts/Noto-Sans-regular/Noto-Sans-regular.eot?#iefix') format('embedded-opentype'),
  7 |        local('Noto Sans'),
  8 |        local('Noto-Sans-regular'),
  9 |        url('../fonts/Noto-Sans-regular/Noto-Sans-regular.woff2') format('woff2'),
 10 |        url('../fonts/Noto-Sans-regular/Noto-Sans-regular.woff') format('woff'),
 11 |        url('../fonts/Noto-Sans-regular/Noto-Sans-regular.ttf') format('truetype'),
 12 |        url('../fonts/Noto-Sans-regular/Noto-Sans-regular.svg#NotoSans') format('svg');
 13 | }
 14 | 
 15 | @font-face {
 16 |   font-family: 'Noto Sans';
 17 |   font-weight: 700;
 18 |   font-style: normal;
 19 |   src: url('../fonts/Noto-Sans-700/Noto-Sans-700.eot');
 20 |   src: url('../fonts/Noto-Sans-700/Noto-Sans-700.eot?#iefix') format('embedded-opentype'),
 21 |        local('Noto Sans Bold'),
 22 |        local('Noto-Sans-700'),
 23 |        url('../fonts/Noto-Sans-700/Noto-Sans-700.woff2') format('woff2'),
 24 |        url('../fonts/Noto-Sans-700/Noto-Sans-700.woff') format('woff'),
 25 |        url('../fonts/Noto-Sans-700/Noto-Sans-700.ttf') format('truetype'),
 26 |        url('../fonts/Noto-Sans-700/Noto-Sans-700.svg#NotoSans') format('svg');
 27 | }
 28 | 
 29 | @font-face {
 30 |   font-family: 'Noto Sans';
 31 |   font-weight: 400;
 32 |   font-style: italic;
 33 |   src: url('../fonts/Noto-Sans-italic/Noto-Sans-italic.eot');
 34 |   src: url('../fonts/Noto-Sans-italic/Noto-Sans-italic.eot?#iefix') format('embedded-opentype'),
 35 |        local('Noto Sans Italic'),
 36 |        local('Noto-Sans-italic'),
 37 |        url('../fonts/Noto-Sans-italic/Noto-Sans-italic.woff2') format('woff2'),
 38 |        url('../fonts/Noto-Sans-italic/Noto-Sans-italic.woff') format('woff'),
 39 |        url('../fonts/Noto-Sans-italic/Noto-Sans-italic.ttf') format('truetype'),
 40 |        url('../fonts/Noto-Sans-italic/Noto-Sans-italic.svg#NotoSans') format('svg');
 41 | }
 42 | 
 43 | @font-face {
 44 |   font-family: 'Noto Sans';
 45 |   font-weight: 700;
 46 |   font-style: italic;
 47 |   src: url('../fonts/Noto-Sans-700italic/Noto-Sans-700italic.eot');
 48 |   src: url('../fonts/Noto-Sans-700italic/Noto-Sans-700italic.eot?#iefix') format('embedded-opentype'),
 49 |        local('Noto Sans Bold Italic'),
 50 |        local('Noto-Sans-700italic'),
 51 |        url('../fonts/Noto-Sans-700italic/Noto-Sans-700italic.woff2') format('woff2'),
 52 |        url('../fonts/Noto-Sans-700italic/Noto-Sans-700italic.woff') format('woff'),
 53 |        url('../fonts/Noto-Sans-700italic/Noto-Sans-700italic.ttf') format('truetype'),
 54 |        url('../fonts/Noto-Sans-700italic/Noto-Sans-700italic.svg#NotoSans') format('svg');
 55 | }
 56 | 
 57 | body {
 58 |   background-color: #fff;
 59 |   padding:50px;
 60 |   font: 14px/1.5 "Noto Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
 61 |   color:#727272;
 62 |   font-weight:400;
 63 | }
 64 | 
 65 | h1, h2, h3, h4, h5, h6 {
 66 |   color:#222;
 67 |   margin:0 0 20px;
 68 | }
 69 | 
 70 | p, ul, ol, table, pre, dl {
 71 |   margin:0 0 20px;
 72 | }
 73 | 
 74 | h1, h2, h3 {
 75 |   line-height:1.1;
 76 | }
 77 | 
 78 | h1 {
 79 |   font-size:28px;
 80 | }
 81 | 
 82 | h2 {
 83 |   color:#393939;
 84 | }
 85 | 
 86 | h3, h4, h5, h6 {
 87 |   color:#494949;
 88 | }
 89 | 
 90 | a {
 91 |   color:#39c;
 92 |   text-decoration:none;
 93 | }
 94 | 
 95 | a:hover {
 96 |   color:#069;
 97 | }
 98 | 
 99 | a small {
100 |   font-size:11px;
101 |   color:#777;
102 |   margin-top:-0.3em;
103 |   display:block;
104 | }
105 | 
106 | a:hover small {
107 |   color:#777;
108 | }
109 | 
110 | .wrapper {
111 |   width:860px;
112 |   margin:0 auto;
113 | }
114 | 
115 | blockquote {
116 |   border-left:1px solid #e5e5e5;
117 |   margin:0;
118 |   padding:0 0 0 20px;
119 |   font-style:italic;
120 | }
121 | 
122 | code, pre {
123 |   font-family:Monaco, Bitstream Vera Sans Mono, Lucida Console, Terminal, Consolas, Liberation Mono, DejaVu Sans Mono, Courier New, monospace;
124 |   color:#333;
125 |   font-size:12px;
126 | }
127 | 
128 | pre {
129 |   padding:8px 15px;
130 |   background: #f8f8f8;
131 |   border-radius:5px;
132 |   border:1px solid #e5e5e5;
133 |   overflow-x: auto;
134 | }
135 | 
136 | table {
137 |   width:100%;
138 |   border-collapse:collapse;
139 | }
140 | 
141 | th, td {
142 |   text-align:left;
143 |   padding:5px 10px;
144 |   border-bottom:1px solid #e5e5e5;
145 | }
146 | 
147 | dt {
148 |   color:#444;
149 |   font-weight:700;
150 | }
151 | 
152 | th {
153 |   color:#444;
154 | }
155 | 
156 | img {
157 |   max-width:100%;
158 | }
159 | 
160 | header {
161 |   width:270px;
162 |   float:left;
163 |   position:fixed;
164 |   -webkit-font-smoothing:subpixel-antialiased;
165 | }
166 | 
167 | header ul {
168 |   list-style:none;
169 |   height:40px;
170 |   padding:0;
171 |   background: #f4f4f4;
172 |   border-radius:5px;
173 |   border:1px solid #e0e0e0;
174 |   width:270px;
175 | }
176 | 
177 | header li {
178 |   width:89px;
179 |   float:left;
180 |   border-right:1px solid #e0e0e0;
181 |   height:40px;
182 | }
183 | 
184 | header li:first-child a {
185 |   border-radius:5px 0 0 5px;
186 | }
187 | 
188 | header li:last-child a {
189 |   border-radius:0 5px 5px 0;
190 | }
191 | 
192 | header ul a {
193 |   line-height:1;
194 |   font-size:11px;
195 |   color:#999;
196 |   display:block;
197 |   text-align:center;
198 |   padding-top:6px;
199 |   height:34px;
200 | }
201 | 
202 | header ul a:hover {
203 |   color:#999;
204 | }
205 | 
206 | header ul a:active {
207 |   background-color:#f0f0f0;
208 | }
209 | 
210 | strong {
211 |   color:#222;
212 |   font-weight:700;
213 | }
214 | 
215 | header ul li + li + li {
216 |   border-right:none;
217 |   width:89px;
218 | }
219 | 
220 | header ul a strong {
221 |   font-size:14px;
222 |   display:block;
223 |   color:#222;
224 | }
225 | 
226 | section {
227 |   width:500px;
228 |   float:right;
229 |   padding-bottom:50px;
230 | }
231 | 
232 | small {
233 |   font-size:11px;
234 | }
235 | 
236 | hr {
237 |   border:0;
238 |   background:#e5e5e5;
239 |   height:1px;
240 |   margin:0 0 20px;
241 | }
242 | 
243 | footer {
244 |   width:270px;
245 |   float:left;
246 |   position:fixed;
247 |   bottom:50px;
248 |   -webkit-font-smoothing:subpixel-antialiased;
249 | }
250 | 
251 | @media print, screen and (max-width: 960px) {
252 | 
253 |   div.wrapper {
254 |     width:auto;
255 |     margin:0;
256 |   }
257 | 
258 |   header, section, footer {
259 |     float:none;
260 |     position:static;
261 |     width:auto;
262 |   }
263 | 
264 |   header {
265 |     padding-right:320px;
266 |   }
267 | 
268 |   section {
269 |     border:1px solid #e5e5e5;
270 |     border-width:1px 0;
271 |     padding:20px 0;
272 |     margin:0 0 20px;
273 |   }
274 | 
275 |   header a small {
276 |     display:inline;
277 |   }
278 | 
279 |   header ul {
280 |     position:absolute;
281 |     right:50px;
282 |     top:52px;
283 |   }
284 | }
285 | 
286 | @media print, screen and (max-width: 720px) {
287 |   body {
288 |     word-wrap:break-word;
289 |   }
290 | 
291 |   header {
292 |     padding:0;
293 |   }
294 | 
295 |   header ul, header p.view {
296 |     position:static;
297 |   }
298 | 
299 |   pre, code {
300 |     word-wrap:normal;
301 |   }
302 | }
303 | 
304 | @media print, screen and (max-width: 480px) {
305 |   body {
306 |     padding:15px;
307 |   }
308 | 
309 |   header ul {
310 |     width:99%;
311 |   }
312 | 
313 |   header li, header ul li + li + li {
314 |     width:33%;
315 |   }
316 | }
317 | 
318 | @media print {
319 |   body {
320 |     padding:0.4in;
321 |     font-size:12pt;
322 |     color:#444;
323 |   }
324 | }
325 | 


--------------------------------------------------------------------------------