├── .gitignore
├── .gitmodules
├── README.md
├── apps
    └── flexkvs
    │   ├── Makefile
    │   ├── bench
    │       ├── benchmark.h
    │       ├── commandline.c
    │       ├── main-flexnic.c
    │       ├── main.c
    │       ├── rng.c
    │       ├── rng.h
    │       └── workload.c
    │   ├── common
    │       └── include
    │       │   └── protocol_binary.h
    │   ├── flexkvs.conf
    │   ├── kvsbench.conf
    │   ├── server
    │       ├── hashtable.c
    │       ├── ialloc.c
    │       ├── iokvs.h
    │       ├── jenkins_hash.c
    │       ├── main-flexnic.c
    │       ├── main-ll.c
    │       ├── main-ll2.c
    │       ├── main.c
    │       └── settings.c
    │   └── unlink_socks.sh
├── build.sh
├── microbenchmarks
    ├── Makefile
    ├── gups-hotset-move.c
    ├── gups-random.c
    ├── gups-small.c
    ├── gups.c
    ├── gups.h
    ├── run-instantaneous.sh
    ├── run-random.sh
    ├── run-threads.sh
    ├── run.sh
    ├── test.c
    └── zipf.c
└── src
    ├── Makefile
    ├── fifo.c
    ├── fifo.h
    ├── hemem.c
    ├── hemem.h
    ├── interpose.c
    ├── interpose.h
    ├── pebs.c
    ├── pebs.h
    ├── policies
        ├── hemem-mmgr.c
        ├── hemem-mmgr.h
        ├── lru.c
        ├── lru.h
        ├── paging.c
        ├── paging.h
        ├── simple.c
        └── simple.h
    ├── spsc-ring.c
    ├── spsc-ring.h
    ├── timer.c
    ├── timer.h
    ├── userfaultfd.h
    └── uthash.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.so
 3 | *.txt
 4 | *.patch
 5 | *.data
 6 | *.sh
 7 | *.csv
 8 | *.pdf
 9 | *.py
10 | microbenchmarks/gups
11 | microbenchmarks/gups-pebs
12 | microbenchmarks/gups-lru
13 | microbenchmarks/gups-simple
14 | microbenchmarks/gups-lru-swap
15 | microbenchmarks/gups-random
16 | microbenchmarks/gups-hotset-move
17 | memsim/mmgr_simple
18 | memsim/mmgr_linux
19 | memsim/mmgr_simple_mmm
20 | memsim/mmgr_hemem
21 | apps/
22 | logs/
23 | results/
24 | scripts/
25 | *tags
26 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "apps/silo"]
 2 | 	path = apps/silo
 3 | 	url = https://ajaustin@bitbucket.org/ajaustin/silo.git
 4 | [submodule "apps/gapbs"]
 5 | 	path = apps/gapbs
 6 | 	url = https://ajaustin@bitbucket.org/ajaustin/gapbs.git
 7 | [submodule "Hoard"]
 8 | 	path = Hoard
 9 | 	url = https://github.com/emeryberger/Hoard.git
10 | [submodule "linux"]
11 | 	path = linux
12 | 	url = ../hemem-linux
13 | [submodule "syscall_intercept"]
14 | 	path = syscall_intercept
15 | 	url = https://github.com/pmem/syscall_intercept
16 | [submodule "capstone"]
17 | 	path = capstone
18 | 	url = https://github.com/capstone-engine/capstone
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HeMem
 2 | 
 3 | This document describes the artifact for our [SOSP 2021 paper](https://dl.acm.org/doi/10.1145/3477132.3483550 "SOSP 2021 paper") on HeMem. HeMem is a tiered main memory management system designed from scratch for commercially available NVM and the big data applications that use it. HeMem manages tiered memory asynchronously, batching and amortizing memory access tracking, migration, and associated TLB synchronization overheads. HeMem monitors application memory use by sampling memory access via CPU events, rather than page tables. This allows HeMem to scale to terabytes of memory, keeping small and ephemeral data structures in fast memory, and allocating scarce, asymmetric NVM bandwidth according to access patterns. Finally, HeMem is flexible by placing per-application memory management policy at user-level.
 4 | 
 5 | ## Overview
 6 | 
 7 | * `apps/` contains the application benchmarks evaluated with HeMem
 8 | * `microbenchmarks/` contains the GUPS microbenchmark used to evaluate HeMem
 9 | * `src/` contains the source code of HeMem
10 | 	* `src/policies` contains extra memory policies used for testing HeMem, such as a page-table based LRU policy
11 | * `Hoard/` contains the Hoard memory allocator that HeMem depends on
12 | * `linux/` contains the linux kernel version required to run HeMem
13 | 
14 | ### Building and Running HeMem
15 | 
16 | #### Setup
17 | 
18 | You may set up HeMem to run on your own machine provided you have Intel Optane NVM. HeMem uses `/dev/dax` files to represent DRAM and NVM. Some additional setup is required for setting up the DRAM and NVM `/dev/dax` files to run HeMem.
19 | 
20 | To set up the `/dev/dax` file representing DRAM, follow the instructions [here](https://pmem.io/2016/02/22/pm-emulation.html "here") in order to reserve a block of DRAM at machine startup to represent the DRAM `/dev/dax` file. HeMem reserves its 140GB of DRAM in this way (enough for its 128GB of reserved DRAM plus some metadata needed for `ndctl`). If your machine has multiple NUMA nodes, ensure that the block of DRAM you reserve is located on the same NUMA node that has NVM. **Do not follow the last set of instructions from pmem.io on setting up a file system on the reserved DRAM.** Instead, set up a `/dev/dax` file to represent it:
21 | 
22 | 1. First, determine the name of the namespace representing the reserved DRAM:
23 | 
24 | `ndctl list --human`
25 | 
26 | 2. You should see your reserved DRAM. If multiple namespaces are listed, some represent NVM namespaces (described below). You should be able to differentiate the DRAM namespace based on size. Your DRAM namespace is likely in `fsdax` mode. Change the namespace over to `devdax` mode using the following command (in this example, the DRAM namespace is called `namespace0.0`):
27 | 
28 | `sudo ndctl create-namespace -f -e namespace0.0 --mode=devdax --align 2M`
29 | 
30 | 3. Make note of the `chardev` name of the DRAM `/dev/dax` file. This will be used to tell HeMem which `/dev/dax` file represents DRAM. If this is different from `dax0.0`, then you will need to edit the `src/hemem.h` file `DRAMPATH` macro to point it towards your actual DRAM `/dev/dax` file.
31 | 
32 | To set up the `/dev/dax` file representing NVM, ensure that your machine has NVM in App Direct mode. If you do not already have namespaces representing NVM, then you will need to create them. Follow these steps:
33 | 
34 | 1. List the regions available on your machine:
35 | 
36 | `ndctl list --regions --human`
37 | 
38 | 2. Note which regions represent NVM. You can differentiate them from the reserved DRAM region based on size or via the `persistence_domain` field, which, for NVM, will read `memory_controller`. Pick the region that is on the same NUMA node as your reserved DRAM. In this example, this is "region1". Create a namespace over this region:
39 | 
40 | `ndctl create-namespace --region=1 --mode=devdax`
41 | 
42 | 3. Make note of the `chardev` name of the NVM `/dev/dax` file. This will be used to tell HeMem which `/dev/dax` file represents NVM. If this is different from `dax1.0`, then you will need to edit the `src/hemem.h` file `NVMPATH` macro to point it towards your actual NVM `/dev/dax` file.
43 | 
44 | 
45 | #### Building
46 | 
47 | To build HeMem, you must first build the linux kernel HeMem depends on. Build, install, and run the kernel located in the `linux/` directory.
48 | 
49 | Next, HeMem depends on Hoard. Follow the instructions to build the Hoard library located in the `Hoard/` directory.
50 | 
51 | HeMem also depends on libsyscall_intercept to intercept memory allocation system calls. Follow the instructions to build and install libsyscall_intercept [here](https://github.com/pmem/syscall_intercept).
52 | 
53 | Once the proper kernel version is running, the `/dev/dax` files have been set up, and all dependencies have been installed, HeMem can be built with the supplied Makefile by typing `make` from the `src/` directory.
54 | 
55 | #### Running
56 | 
57 | You will likely need to add the paths to the build HeMem library and the Hoard library to your LD_LIBRARY_PATH variable:
58 | 
59 | `export LD_LIBRARY_PATH=path/to/hemem/lib:/path/to/Hoard/lib:$LD_LIBRARY_PATH`
60 | 
61 | You may also need to increase the number of allowed mmap ranges:
62 | 
63 | `echo 1000000 > /proc/sys/vm/max_map_count`
64 | 
65 | HeMem requires the user be root in order to run. Applications can either be linked with Hemem or run unmodified via the `LD_PRELOAD` environment variable:
66 | 
67 | `LD_PRELOAD=/path/to/hemem/lib.so ./foo [args]`
68 | 
69 | ### Microbenchmarks
70 | 
71 | A Makefile is provided to build the GUPS microbenchmarks.
72 | 
73 | To reproduce the Uniform GUPS results, run the `run-random.sh` script. Results will be printed to the `random.txt` file. The throughput results shown in the paper are the "GUPS" lines.
74 | 
75 | To reproduce the Hotset GUPS results, run the `run.sh` script. Results will be printed to the `results.txt` file. The throughput results shown in the paper are the "GUPS" lines.
76 | 
77 | To reproduce the Instantaneous GUPS results, run the `run-instantaneous.sh` script. Results will be printed to the `tot_gups.txt` file.
78 | 
79 | ### Application Benchmarks
80 | 
81 | Applications tested with HeMem are located in the `apps/` directory.
82 | 
83 | #### Silo 
84 | 
85 | The Silo application can be found in the `apps/silo_hemem/silo` directory.. Run the provided `run_batch.sh` script. Results will be in the `batch/results.txt` file. The reported throughput numbers are numbers in the first column of the file.
86 | 
87 | #### FlexKVS
88 | 
89 | The FlexKVS application can be found in the `apps/flexkvs` directory. These results require a separate machine for the clients.
90 | 
91 | #### GapBS
92 | 
93 | The GapBS application can be found in the `apps/gapbs` directory. To run the BC algorithm reported in the paper, you may run the following command:
94 | 
95 | `LD_PRELOAD=/path/to/hemem/lib ./bc -g <scale>`
96 | 
97 | which will run the bc algorithm with HeMem on a graph with 2^scale vertices.
98 | 
99 | 


--------------------------------------------------------------------------------
/apps/flexkvs/Makefile:
--------------------------------------------------------------------------------
 1 | FLEXKVS_COMMON=hashtable.o ialloc.o jenkins_hash.o settings.o
 2 | BENCH_COMMON=rng.o commandline.o workload.o
 3 | 
 4 | TAS_CODE?=/home/tstamler/tas
 5 | HEMEM_CODE=/home/amanda/hemem/
 6 | 
 7 | CFLAGS = -std=gnu99 -g -Wall -Icommon/include -O3 -march=native \
 8 | 	-I$(TAS_CODE)/include -I$(TAS_CODE)/lib/tas/include 
 9 | #CFLAGS += -DDEL_TEST
10 | LDFLAGS = -pthread -g -O3
11 | LDLIBS = -lrt -lm -lpthread -lrt -ldl
12 | 
13 | include ../common/Makefile.mtcp
14 | 
15 | 
16 | all: flexkvs kvsbench
17 | all-sockets: flexkvs kvsbench
18 | all-mtcp: flexkvs-mtcp kvsbench-mtcp
19 | all-ll: flexkvs-ll
20 | 
21 | flexkvs: $(addprefix server/,$(FLEXKVS_COMMON) main.o)
22 | 	$(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS) -L$(HEMEM_CODE) -lhemem
23 | 
24 | kvsbench: $(addprefix bench/,$(BENCH_COMMON) main.o) \
25 | 		$(TAS_CODE)/lib/utils/timeout.o $(TAS_CODE)/lib/utils/utils.o
26 | 	$(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS)
27 | 
28 | flexkvs-ll: $(addprefix server/,$(FLEXKVS_COMMON) main-ll.o) \
29 | 		$(TAS_CODE)/lib/libtas.so
30 | 	$(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS)
31 | 
32 | flexkvs-mtcp: $(addprefix server/,$(FLEXKVS_COMMON:.o=.mtcp.o) main.mtcp.o)
33 | 	$(CC) $(LDFLAGS) $(MTCP_LDFLAGS) -o $@ $^ $(LDLIBS) $(MTCP_LDLIBS)
34 | 
35 | kvsbench-mtcp: $(addprefix bench/,$(BENCH_COMMON:.o=.mtcp.o) main.mtcp.o) \
36 | 		../utils/utils.o
37 | 	$(CC) $(LDFLAGS) $(MTCP_LDFLAGS) -o $@ $^ $(LDLIBS) $(MTCP_LDLIBS)
38 | 
39 | %.mtcp.o: %.c
40 | 	$(CC) $(CFLAGS) $(MTCP_CFLAGS) -c -o $@ $<
41 | 
42 | clean:
43 | 	rm -f */*.o flexkvs kvsbench flexkvs-mtcp kvsbench-mtcp flexkvs-ll
44 | 
45 | .PHONY: all all-sockets all-mtcp all-ll clean
46 | 


--------------------------------------------------------------------------------
/apps/flexkvs/bench/benchmark.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2019 University of Washington, Max Planck Institute for
  3 |  * Software Systems, and The University of Texas at Austin
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining
  6 |  * a copy of this software and associated documentation files (the
  7 |  * "Software"), to deal in the Software without restriction, including
  8 |  * without limitation the rights to use, copy, modify, merge, publish,
  9 |  * distribute, sublicense, and/or sell copies of the Software, and to
 10 |  * permit persons to whom the Software is furnished to do so, subject to
 11 |  * the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be
 14 |  * included in all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <stdbool.h>
 26 | 
 27 | #include "rng.h"
 28 | 
 29 | #define DEL_RATIO 0.7
 30 | 
 31 | enum key_dist {
 32 |     DIST_UNIFORM,
 33 |     DIST_ZIPF,
 34 | };
 35 | 
 36 | struct settings {
 37 |     uint32_t dstip;
 38 |     uint16_t dstport;
 39 |     uint16_t threads;
 40 |     uint16_t conns;
 41 |     uint16_t pending;
 42 | 
 43 |     uint32_t keynum;
 44 |     union {
 45 |         struct {
 46 |             double s;
 47 |         } zipf;
 48 |     } keydistparams;
 49 |     double get_prob;
 50 |     enum key_dist keydist;
 51 |     uint64_t key_seed;
 52 |     uint64_t op_seed;
 53 |     uint32_t request_gap;
 54 |     uint32_t warmup_time;
 55 |     uint32_t cooldown_time;
 56 |     uint32_t run_time;
 57 |     uint16_t keysize;
 58 |     uint16_t valuesize;
 59 | 
 60 |     uint8_t batchsize;
 61 | 
 62 |     bool keybased;
 63 | };
 64 | 
 65 | struct key {
 66 |     void *key;
 67 |     size_t keylen;
 68 |     double cdf;
 69 | };
 70 | 
 71 | struct workload {
 72 |     struct rng op_rng;
 73 |     struct key *keys;
 74 |     size_t keys_num;
 75 | };
 76 | 
 77 | struct workload_core {
 78 |     struct rng rng;
 79 | };
 80 | 
 81 | enum workload_op {
 82 |     WL_OP_GET,
 83 |     WL_OP_SET,
 84 |     WL_OP_DELETE
 85 | };
 86 | 
 87 | extern struct settings settings;
 88 | 
 89 | enum error_ids {
 90 |     ERR_SUCCESS,
 91 |     ERR_KEY_ENOENT,
 92 |     ERR_KEY_EEXIST,
 93 |     ERR_E2BIG,
 94 |     ERR_EINVAL,
 95 |     ERR_NOT_STORED,
 96 |     ERR_DELTA_BADVAL,
 97 |     ERR_UNKNOWN_CMD,
 98 |     ERR_ENOMEM,
 99 |     ERR_OTHER,
100 |     ERR_MAX,
101 | };
102 | 
103 | void print_usage(void);
104 | void init_settings(struct settings *s);
105 | int parse_settings(int argc, char *argv[], struct settings *s);
106 | 
107 | bool trace_open(const char *path);
108 | bool trace_init(void);
109 | void trace_request_get(uint8_t thread, struct key *key, uint16_t id);
110 | void trace_request_set(uint8_t thread, struct key *key, uint32_t valsz,
111 |         uint16_t id);
112 | void trace_response(uint8_t thread, uint16_t id, uint8_t err);
113 | void trace_flush(uint8_t thread);
114 | 
115 | void workload_init(struct workload *wl);
116 | void workload_adjust(struct workload *wl, struct workload *wl2);
117 | void workload_core_init(struct workload *wl, struct workload_core *wc);
118 | void workload_op(struct workload *wl, struct workload_core *wc, struct key **k,
119 |         enum workload_op *op);
120 | 


--------------------------------------------------------------------------------
/apps/flexkvs/bench/commandline.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2019 University of Washington, Max Planck Institute for
  3 |  * Software Systems, and The University of Texas at Austin
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining
  6 |  * a copy of this software and associated documentation files (the
  7 |  * "Software"), to deal in the Software without restriction, including
  8 |  * without limitation the rights to use, copy, modify, merge, publish,
  9 |  * distribute, sublicense, and/or sell copies of the Software, and to
 10 |  * permit persons to whom the Software is furnished to do so, subject to
 11 |  * the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be
 14 |  * included in all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <stdio.h>
 26 | #include <stdlib.h>
 27 | #include <string.h>
 28 | #include <getopt.h>
 29 | #include <arpa/inet.h>
 30 | #include <utils.h>
 31 | 
 32 | #include "benchmark.h"
 33 | 
 34 | void print_usage(void)
 35 | {
 36 |     fprintf(stderr,
 37 |         "./benchmark [options] dst-ip:dst-port\n"
 38 |         "Options:\n"
 39 |         "  -t, --threads=COUNT   Number of sending threads [default 1].\n"
 40 |         "  -C, --connns=COUNT    # connections / thread    [default 1].\n"
 41 |         "  -p, --pending=NUM     Number of pend. req/conn. [default 1].\n"
 42 |         "  -k, --key-size=BYTES  Key size in bytes         [default 32].\n"
 43 |         "  -n, --key-num=COUNT   Number of keys            [default 1000].\n"
 44 |         "  -u, --key-uniform     Uniform key distribution  [default]\n"
 45 |         "  -z, --key-zipf=S      Zipf key distribution;\n"
 46 |         "                        S is the zipf parameter.\n"
 47 |         "  -v, --val-size=BYTES  Value size in bytes       [default 1024].\n"
 48 |         "  -g, --get-prob=PROB   Probability of GET Reqs.  [default .9].\n"
 49 |         "  -T, --time=SECS       Measurement time in [s].  [default 10].\n"
 50 |         "  -w, --warmup=SECS     Warmup time [s].          [default 5].\n"
 51 |         "  -c, --cooldown=SECS   Cooldown time [s].        [default 5].\n"
 52 |         "  -s, --key-seed=SEED   Seed for key PRG.\n"
 53 |         "  -o, --op-seed=SEED    Seed for operation PRG.\n"
 54 |         "  -r, --trace=FILE      Write operation trace to file.\n"
 55 |         "  -K, --keysteer        Key-based steering.\n");
 56 | }
 57 | 
 58 | void init_settings(struct settings *s)
 59 | {
 60 |     s->threads = 1;
 61 |     s->conns = 1;
 62 |     s->pending = 1;
 63 |     s->keysize = 32;
 64 |     s->keynum = 1000;
 65 |     s->keydist = DIST_UNIFORM;
 66 |     s->valuesize = 1024;
 67 |     s->get_prob = 0.9;
 68 |     s->warmup_time = 5;
 69 |     s->cooldown_time = 5;
 70 |     s->run_time = 10;
 71 |     s->request_gap = 100 * 1000;
 72 |     s->key_seed = 0x123457890123ULL;
 73 |     s->op_seed =  0x987654321098ULL;
 74 |     s->keybased = false;
 75 |     s->batchsize = 32;
 76 | }
 77 | 
 78 | int parse_settings(int argc, char *argv[], struct settings *s)
 79 | {
 80 |     static struct option long_opts[] = {
 81 |             {"threads",     required_argument, NULL, 't'},
 82 |             {"conns",       required_argument, NULL, 'C'},
 83 |             {"pending",     required_argument, NULL, 'p'},
 84 |             {"key-size",    required_argument, NULL, 'k'},
 85 |             {"key-num",     required_argument, NULL, 'n'},
 86 |             {"key-uniform", no_argument,       NULL, 'u'},
 87 |             {"key-zipf",    required_argument, NULL, 'z'},
 88 |             {"val-size",    required_argument, NULL, 'v'},
 89 |             {"get-prob",    required_argument, NULL, 'g'},
 90 |             {"time",        required_argument, NULL, 'T'},
 91 |             {"warmup",      required_argument, NULL, 'w'},
 92 |             {"cooldown",    required_argument, NULL, 'c'},
 93 |             {"delay",       required_argument, NULL, 'd'},
 94 |             {"key-seed",    required_argument, NULL, 's'},
 95 |             {"op-seed",     required_argument, NULL, 'o'},
 96 |             {"keysteer",    no_argument,       NULL, 'K'},
 97 |         };
 98 |     static const char *short_opts = "t:C:p:k:n:uz:v:g:T:w:c:d:s:o:r:K";
 99 |     int c, opt_idx, done = 0;
100 |     char *end;
101 | 
102 |     while (!done) {
103 |         c = getopt_long(argc, argv, short_opts, long_opts, &opt_idx);
104 |         switch (c) {
105 |             case 't':
106 |                 s->threads = strtoul(optarg, &end, 10);
107 |                 if (!*optarg || *end || s->threads < 1) {
108 |                     fprintf(stderr, "threads needs to be a positive "
109 |                             "integer\n");
110 |                     return -1;
111 |                 }
112 |                 break;
113 |             case 'C':
114 |                 s->conns = strtoul(optarg, &end, 10);
115 |                 if (!*optarg || *end || s->conns < 1) {
116 |                     fprintf(stderr, "conns needs to be a positive "
117 |                             "integer\n");
118 |                     return -1;
119 |                 }
120 |                 break;
121 |             case 'p':
122 |                 s->pending = strtoul(optarg, &end, 10);
123 |                 if (!*optarg || *end || s->pending < 1) {
124 |                     fprintf(stderr, "pending needs to be a positive "
125 |                             "integer\n");
126 |                     return -1;
127 |                 }
128 |                 break;
129 | 
130 |             case 'k':
131 |                 s->keysize = strtoul(optarg, &end, 10);
132 |                 if (!*optarg || *end || s->keysize < 1) {
133 |                     fprintf(stderr, "Key size needs to be a positive "
134 |                             "integer\n");
135 |                     return -1;
136 |                 }
137 |                 break;
138 |             case 'n':
139 |                 s->keynum = strtoul(optarg, &end, 10);
140 |                 if (!*optarg || *end || s->keynum < 1) {
141 |                     fprintf(stderr, "Key count needs to be a positive "
142 |                             "integer\n");
143 |                     return -1;
144 |                 }
145 |                 break;
146 |             case 'v':
147 |                 s->valuesize = strtoul(optarg, &end, 10);
148 |                 if (!*optarg || *end || s->valuesize < 1) {
149 |                     fprintf(stderr, "Value size needs to be a positive "
150 |                             "integer\n");
151 |                     return -1;
152 |                 }
153 |                 break;
154 | 
155 |             case 'u':
156 |                 s->keydist = DIST_UNIFORM;
157 |                 break;
158 |             case 'z':
159 |                 s->keydist = DIST_ZIPF;
160 |                 s->keydistparams.zipf.s = strtod(optarg, &end);
161 |                 if (!*optarg || *end) {
162 |                     fprintf(stderr, "Zipf parameter needs to be a floating "
163 |                             "point number.\n");
164 |                     return -1;
165 |                 }
166 |                 break;
167 |             case'g':
168 |                 s->get_prob = strtod(optarg, &end);
169 |                 if (!*optarg || *end || s->get_prob < 0 || s->get_prob > 1) {
170 |                     fprintf(stderr, "GET probability needs to be a floating "
171 |                             "point number between 0 and 1.\n");
172 |                     return -1;
173 |                 }
174 |                 break;
175 |             case 'T':
176 |                 s->run_time = strtoul(optarg, &end, 10);
177 |                 if (!*optarg || *end || s->run_time < 1) {
178 |                     fprintf(stderr, "Run time needs to be a positive "
179 |                             "integer\n");
180 |                     return -1;
181 |                 }
182 |                 break;
183 |             case 'w':
184 |                 s->warmup_time = strtoul(optarg, &end, 10);
185 |                 if (!*optarg || *end) {
186 |                     fprintf(stderr, "Warmup time needs to be a positive "
187 |                             "integer\n");
188 |                     return -1;
189 |                 }
190 |                 break;
191 |             case 'c':
192 |                 s->cooldown_time = strtoul(optarg, &end, 10);
193 |                 if (!*optarg || *end) {
194 |                     fprintf(stderr, "Cool down time needs to be a positive "
195 |                             "integer\n");
196 |                     return -1;
197 |                 }
198 |                 break;
199 |             case 'd':
200 |                 s->request_gap = strtoul(optarg, &end, 10);
201 |                 if (!*optarg || *end) {
202 |                     fprintf(stderr, "Delay needs to be a positive "
203 |                             "integer\n");
204 |                     return -1;
205 |                 }
206 |                 break;
207 |             case 's':
208 |                 s->key_seed = strtoull(optarg, &end, 0);
209 |                 if (!*optarg || *end) {
210 |                     fprintf(stderr, "Key seed needs to be an integer.\n");
211 |                     return -1;
212 |                 }
213 |                 break;
214 |             case 'o':
215 |                 s->op_seed = strtoull(optarg, &end, 0);
216 |                 if (!*optarg || *end) {
217 |                     fprintf(stderr, "Op seed needs to be an integer.\n");
218 |                     return -1;
219 |                 }
220 |                 break;
221 |             case 'K':
222 |                 settings.keybased = true;
223 |                 break;
224 |             case -1:
225 |                 done = 1;
226 |                 break;
227 |             case '?':
228 |                 return -1;
229 |             default:
230 |                 abort();
231 |         }
232 |     }
233 | 
234 |     if (optind + 1 != argc) {
235 |         return -1;
236 |     }
237 | 
238 |     /* separate ip and port at colon */
239 |     if ((end = strchr(argv[optind], ':')) == NULL) {
240 |         fprintf(stderr, "Colon separating IP and port not found\n");
241 |         return -1;
242 |     }
243 |     *end = '\0';
244 |     end++;
245 | 
246 |     /* parse ip */
247 |     if (util_parse_ipv4(argv[optind], &s->dstip) != 0) {
248 |         fprintf(stderr, "Parsing ip address failed\n");
249 |         return -1;
250 |     }
251 | 
252 |     /* parse port */
253 |     s->dstport = strtoul(end, NULL, 10);
254 | 
255 |     // TODO: ensure key size / key num combination is valid
256 | 
257 |     return 0;
258 | }
259 | 


--------------------------------------------------------------------------------
/apps/flexkvs/bench/rng.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2019 University of Washington, Max Planck Institute for
 3 |  * Software Systems, and The University of Texas at Austin
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining
 6 |  * a copy of this software and associated documentation files (the
 7 |  * "Software"), to deal in the Software without restriction, including
 8 |  * without limitation the rights to use, copy, modify, merge, publish,
 9 |  * distribute, sublicense, and/or sell copies of the Software, and to
10 |  * permit persons to whom the Software is furnished to do so, subject to
11 |  * the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be
14 |  * included in all copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 |  */
24 | 
25 | #include "rng.h"
26 | 
27 | static const uint64_t a = 0x5deece66dULL;
28 | static const uint64_t c = 0xb;
29 | static const uint64_t m = 1ULL << 48;
30 | 
31 | void rng_init(struct rng *rng, uint64_t seed)
32 | {
33 |     rng->seed = (seed ^ a) % m;
34 | }
35 | 
36 | uint32_t rng_gen32(struct rng *rng)
37 | {
38 |     uint64_t next;
39 |     next = (a * rng->seed + c) % m;
40 |     rng->seed = next;
41 |     return next >> 16;
42 | }
43 | 
44 | double rng_gend(struct rng *rng)
45 | {
46 |     // This is what Java seems to do
47 |     uint64_t x =
48 |             (((uint64_t) rng_gen32(rng) >> 6) << 27) + (rng_gen32(rng) >> 5);
49 |     return x / ((double) (1ULL << 53));
50 | }
51 | 
52 | void rng_gen(struct rng *rng, void *buf, size_t size)
53 | {
54 |     uint32_t x;
55 |     while (size >= 4) {
56 |         * ((uint32_t *) buf) = rng_gen32(rng);
57 |         buf = (void*) ((uintptr_t) buf + 4);
58 |         size -= 4;
59 |     }
60 | 
61 |     x = rng_gen32(rng);
62 |     while (size > 0) {
63 |         * ((uint8_t *) buf) = x >> 24;
64 |         x <<= 8;
65 |         buf = (void*) ((uintptr_t) buf + 1);
66 |         size--;
67 |     }
68 | 
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/apps/flexkvs/bench/rng.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2019 University of Washington, Max Planck Institute for
 3 |  * Software Systems, and The University of Texas at Austin
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining
 6 |  * a copy of this software and associated documentation files (the
 7 |  * "Software"), to deal in the Software without restriction, including
 8 |  * without limitation the rights to use, copy, modify, merge, publish,
 9 |  * distribute, sublicense, and/or sell copies of the Software, and to
10 |  * permit persons to whom the Software is furnished to do so, subject to
11 |  * the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be
14 |  * included in all copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 |  */
24 | 
25 | #ifndef RNG_H_
26 | #define RNG_H_
27 | 
28 | #include <stddef.h>
29 | #include <stdint.h>
30 | 
31 | struct rng {
32 |     uint64_t seed;
33 | };
34 | 
35 | void rng_init(struct rng *rng, uint64_t seed);
36 | uint32_t rng_gen32(struct rng *rng);
37 | /** Double uniform between 0 and 1 */
38 | double rng_gend(struct rng *rng);
39 | void rng_gen(struct rng *rng, void *buf, size_t size);
40 | 
41 | #endif // ndef RNG_H_
42 | 


--------------------------------------------------------------------------------
/apps/flexkvs/bench/workload.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2019 University of Washington, Max Planck Institute for
  3 |  * Software Systems, and The University of Texas at Austin
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining
  6 |  * a copy of this software and associated documentation files (the
  7 |  * "Software"), to deal in the Software without restriction, including
  8 |  * without limitation the rights to use, copy, modify, merge, publish,
  9 |  * distribute, sublicense, and/or sell copies of the Software, and to
 10 |  * permit persons to whom the Software is furnished to do so, subject to
 11 |  * the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be
 14 |  * included in all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <stdlib.h>
 26 | #include <string.h>
 27 | #include <stdio.h>
 28 | #include <math.h>
 29 | 
 30 | #include "benchmark.h"
 31 | #include "rng.h"
 32 | 
 33 | static struct key *generate_keys(struct rng *rng, size_t n, size_t ks);
 34 | static void distribute_uniform(struct key *keys, size_t n);
 35 | static void distribute_zipf(struct key *keys, size_t n, double s);
 36 | static struct key *draw_key(struct key *keys, size_t n, struct rng *rng);
 37 | 
 38 | 
 39 | void workload_init(struct workload *wl)
 40 | {
 41 |     struct rng key_rng;
 42 | 
 43 |     /* prepare rngs and distributions for keys */
 44 |     rng_init(&key_rng, settings.key_seed);
 45 |     rng_init(&wl->op_rng, settings.op_seed);
 46 |     wl->keys = generate_keys(&key_rng, settings.keynum, settings.keysize);
 47 |     wl->keys_num = settings.keynum;
 48 |     if (settings.keydist == DIST_UNIFORM) {
 49 |         distribute_uniform(wl->keys, wl->keys_num);
 50 |     } else {
 51 |         distribute_zipf(wl->keys, wl->keys_num, settings.keydistparams.zipf.s);
 52 |     }
 53 | }
 54 | 
 55 | void workload_adjust(struct workload *wl, struct workload *wl2)
 56 | {
 57 |     struct rng key_rng;
 58 | 
 59 |     /* prepare rngs and distributions for keys */
 60 |     wl->keys = generate_keys(&key_rng, settings.keynum * (1-DEL_RATIO), settings.keysize);
 61 |     wl2->keys = generate_keys(&key_rng, settings.keynum * (DEL_RATIO), settings.keysize);
 62 |     wl->keys_num = settings.keynum * (1-DEL_RATIO);
 63 |     wl2->keys_num = settings.keynum * DEL_RATIO;
 64 |     if (settings.keydist == DIST_UNIFORM) {
 65 |         distribute_uniform(wl->keys, wl->keys_num);
 66 |         distribute_uniform(wl2->keys, wl2->keys_num);
 67 |     } else {
 68 |         distribute_zipf(wl->keys, wl->keys_num, settings.keydistparams.zipf.s);
 69 |         distribute_zipf(wl2->keys, wl2->keys_num, settings.keydistparams.zipf.s);
 70 |     }
 71 | }
 72 | 
 73 | 
 74 | void workload_core_init(struct workload *wl, struct workload_core *wc)
 75 | {
 76 |     rng_init(&wc->rng,
 77 |             ((uint64_t) rng_gen32(&wl->op_rng) << 16) ^ rng_gen32(&wl->op_rng));
 78 | }
 79 | 
 80 | void workload_op(struct workload *wl, struct workload_core *wc, struct key **k,
 81 |         enum workload_op *op)
 82 | {
 83 |     if (rng_gend(&wc->rng) <= settings.get_prob) {
 84 |         *op = WL_OP_GET;
 85 |     } else {
 86 |         *op = WL_OP_SET;
 87 |     }
 88 |     *k = draw_key(wl->keys, wl->keys_num, &wc->rng);
 89 | }
 90 | 
 91 | /** Generate n keys (no distribution set) */
 92 | static struct key *generate_keys(struct rng *rng, size_t n, size_t keysz)
 93 | {
 94 |     size_t i;
 95 |     struct key *k = malloc(n * sizeof(*k) + n * keysz);
 96 |     uint8_t *keys = (uint8_t *) (k + n);
 97 |     if (k == NULL) {
 98 |         abort();
 99 |     }
100 |     for (i = 0; i < n; i++) {
101 |         rng_gen(rng, keys, keysz);
102 |         k[i].key = keys;
103 |         k[i].keylen = keysz;
104 |         keys += keysz;
105 |     }
106 | 
107 |     // TODO: Fix duplicates
108 |     return k;
109 | }
110 | 
111 | /** Distribute keys uniformly */
112 | static void distribute_uniform(struct key *keys, size_t n)
113 | {
114 |     size_t i;
115 |     double p = (double) 1 / (double) n;
116 |     double sum = 0;
117 |     for (i = 0; i < n; i++) {
118 |         sum += p;
119 |         keys[i].cdf = sum;
120 |     }
121 | }
122 | 
123 | /** Distribute keys according to zipf distribution with parameter s. */
124 | static void distribute_zipf(struct key *keys, size_t n, double s)
125 | {
126 |     size_t i;
127 |     double c = 0;
128 |     double sum = 0;
129 | 
130 |     for (i = 0; i < n; i++) {
131 |         c += 1 / pow(i + 1, s);
132 |     }
133 | 
134 |     for (i = 0; i < n; i++) {
135 |         sum += 1 / pow(i + 1, s) / c;
136 |         keys[i].cdf = sum;
137 |     }
138 | }
139 | 
140 | /** Binary search helper (returns -1 to go left, 0 if found, 1 to go right). */
141 | static inline int key_in_range(struct key *keys, size_t n, size_t i, double x)
142 | {
143 |     double cdf = keys[i].cdf;
144 |     //printf("key in range n=%lu i=%lu x=%lf cdf=%lf\n", n, i, x, cdf);
145 |     if (x < cdf) {
146 |         if (i == 0) {
147 |             return 0;
148 |         } else {
149 |             return (x <= keys[i - 1].cdf ? -1 : 0);
150 |         }
151 |     } else if (x > cdf) {
152 |         if (i == n - 1) {
153 |             // Already at right-most value (could happen due to rounding errors
154 |             // when generating the distribution)
155 |             return 0;
156 |         } else {
157 |             return 1;
158 |         }
159 |     } else {
160 |         return 0;
161 |     }
162 | }
163 | 
164 | /** Draw a key at random, according to the configured distribution. */
165 | static struct key *draw_key(struct key *keys, size_t n, struct rng *rng)
166 | {
167 |     double x = rng_gend(rng);
168 |     size_t l, r, mid = 0;
169 |     int res;
170 |     //printf("draw_key(n=%lu)\n", n);
171 | 
172 |     l = 0;
173 |     r = n - 1;
174 |     while (l < r) {
175 |         mid = (l + r) / 2;
176 |         res = key_in_range(keys, n, mid, x);
177 |         if (res < 0) {
178 |             r = mid - 1;
179 |         } else if (res > 0) {
180 |             l = mid + 1;
181 |         } else {
182 |             break;
183 |         }
184 |     }
185 | 
186 |     return keys + mid;
187 | }
188 | 


--------------------------------------------------------------------------------
/apps/flexkvs/flexkvs.conf:
--------------------------------------------------------------------------------
 1 | ############### mtcp configuration file ###############
 2 | 
 3 | # The underlying I/O module you want to use. Please
 4 | # enable only one out of the two.
 5 | #io = psio
 6 | #io = netmap
 7 | io = dpdk
 8 | 
 9 | # No. of cores setting (enabling this option will override
10 | # the `cpu' config for those applications that accept
11 | # num_cores as command line arguments)
12 | # 
13 | # e.g. in case ./epserver is executed with `-N 4', the
14 | # mtcp core will still invoke 8 mTCP threads if the
15 | # following line is uncommented.
16 | num_cores = 4
17 | 
18 | # Number of memory channels per processor socket (dpdk-only)
19 | num_mem_ch = 2
20 | 
21 | # Enable multi-process support (under development)
22 | #multiprocess = 0 master
23 | #multiprocess = 1
24 | 
25 | # Used port (please adjust accordingly)
26 | #------ PSIO ports -------#
27 | #port = xge0 xge1
28 | #port = xge1
29 | #------ DPDK ports -------#
30 | port = dpdk0 dpdk1 ens2f1
31 | #port = dpdk0 dpdk1
32 | #port = dpdk0:0
33 | #port = dpdk0:1
34 | 
35 | # Maximum concurrency per core
36 | max_concurrency = 512
37 | 
38 | # Maximum number of socket buffers per core
39 | # Set this to small value if there are many idle connections
40 | max_num_buffers = 512
41 | #max_num_buffers = 32
42 | 
43 | # Receive buffer size of sockets
44 | rcvbuf = 8192
45 | #rcvbuf = 1048576
46 | 
47 | # Send buffer size of sockets
48 | sndbuf = 8192
49 | #sndbuf = 1048576
50 | 
51 | # TCP timeout seconds
52 | # (tcp_timeout = -1 can disable the timeout check)
53 | tcp_timeout = 1
54 | 
55 | # TCP timewait seconds
56 | tcp_timewait = 0
57 | 
58 | # Interface to print stats (please adjust accordingly)
59 | # You can enable multiple ports in separate lines
60 | #------ PSIO ports -------#
61 | #stat_print = xge0
62 | #stat_print = xge1
63 | #------ DPDK ports -------#
64 | #stat_print = dpdk1
65 | #stat_print = dpdk0:0
66 | #stat_print = dpdk0:1
67 | #stat_print = dpdk1
68 | 
69 | #######################################################
70 | 


--------------------------------------------------------------------------------
/apps/flexkvs/kvsbench.conf:
--------------------------------------------------------------------------------
 1 | ############### mtcp configuration file ###############
 2 | 
 3 | # The underlying I/O module you want to use. Please
 4 | # enable only one out of the two.
 5 | #io = psio
 6 | #io = netmap
 7 | io = dpdk
 8 | 
 9 | # No. of cores setting (enabling this option will override
10 | # the `cpu' config for those applications that accept
11 | # num_cores as command line arguments)
12 | # 
13 | # e.g. in case ./epserver is executed with `-N 4', the
14 | # mtcp core will still invoke 8 mTCP threads if the
15 | # following line is uncommented.
16 | num_cores = 1
17 | 
18 | # Number of memory channels per processor socket (dpdk-only)
19 | num_mem_ch = 2
20 | 
21 | # Enable multi-process support (under development)
22 | #multiprocess = 0 master
23 | #multiprocess = 1
24 | 
25 | # Used port (please adjust accordingly)
26 | #------ PSIO ports -------#
27 | #port = xge0 xge1
28 | #port = xge1
29 | #------ DPDK ports -------#
30 | port = dpdk0 dpdk1 ens2f1
31 | #port = dpdk0 dpdk1
32 | #port = dpdk0:0
33 | #port = dpdk0:1
34 | 
35 | # Maximum concurrency per core
36 | max_concurrency = 512
37 | 
38 | # Maximum number of socket buffers per core
39 | # Set this to small value if there are many idle connections
40 | max_num_buffers = 512
41 | #max_num_buffers = 32
42 | 
43 | # Receive buffer size of sockets
44 | rcvbuf = 8192
45 | #rcvbuf = 1048576
46 | 
47 | # Send buffer size of sockets
48 | sndbuf = 8192
49 | #sndbuf = 1048576
50 | 
51 | # TCP timeout seconds
52 | # (tcp_timeout = -1 can disable the timeout check)
53 | tcp_timeout = 1
54 | 
55 | # TCP timewait seconds
56 | tcp_timewait = 0
57 | 
58 | # Interface to print stats (please adjust accordingly)
59 | # You can enable multiple ports in separate lines
60 | #------ PSIO ports -------#
61 | #stat_print = xge0
62 | #stat_print = xge1
63 | #------ DPDK ports -------#
64 | #stat_print = dpdk1
65 | #stat_print = dpdk0:0
66 | #stat_print = dpdk0:1
67 | #stat_print = dpdk1
68 | 
69 | #######################################################
70 | 


--------------------------------------------------------------------------------
/apps/flexkvs/server/hashtable.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2019 University of Washington, Max Planck Institute for
  3 |  * Software Systems, and The University of Texas at Austin
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining
  6 |  * a copy of this software and associated documentation files (the
  7 |  * "Software"), to deal in the Software without restriction, including
  8 |  * without limitation the rights to use, copy, modify, merge, publish,
  9 |  * distribute, sublicense, and/or sell copies of the Software, and to
 10 |  * permit persons to whom the Software is furnished to do so, subject to
 11 |  * the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be
 14 |  * included in all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <stdlib.h>
 26 | #include <stdio.h>
 27 | #include <pthread.h>
 28 | 
 29 | #include "iokvs.h"
 30 | 
 31 | #define HASHTABLE_POWER 31
 32 | #define TABLESZ(p) (1ULL << (p))
 33 | 
 34 | _Static_assert(sizeof(pthread_spinlock_t) == 4, "Bad spinlock size");
 35 | 
 36 | #define BUCKET_NITEMS 5
 37 | 
 38 | //#define NOHTLOCKS 1
 39 | 
 40 | struct hash_bucket {
 41 |     struct item *items[BUCKET_NITEMS];
 42 |     uint32_t hashes[BUCKET_NITEMS];
 43 |     pthread_spinlock_t lock;
 44 | } __attribute__((packed));
 45 | 
 46 | _Static_assert(sizeof(struct hash_bucket) == 64, "Bad hash bucket size");
 47 | 
 48 | /******************************************************************************/
 49 | /* Hashtable */
 50 | 
 51 | static size_t nbuckets;
 52 | static struct hash_bucket *buckets;
 53 | 
 54 | void hasht_init(void)
 55 | {
 56 |     size_t i;
 57 | 
 58 |     nbuckets = TABLESZ(HASHTABLE_POWER);
 59 |     printf("allocing %zu buckets for %zu bytes\n", nbuckets, nbuckets * sizeof(*buckets));
 60 |     buckets = calloc(nbuckets + 1, sizeof(*buckets));
 61 |     buckets = (struct hash_bucket *) (((uintptr_t) buckets + 63) & ~63ULL);
 62 |     if (buckets == NULL) {
 63 |         perror("Allocating item hash table failed");
 64 |         abort();
 65 |     }
 66 | 
 67 |     for (i = 0; i < nbuckets; i++) {
 68 |         if (pthread_spin_init(&buckets[i].lock, 0) != 0) {
 69 |           perror("Initializing spin lock failed");
 70 |           abort();
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | 
 76 | static inline bool item_key_matches(struct item *it, const void *key,
 77 |         size_t klen)
 78 | {
 79 |     return klen == it->keylen && !__builtin_memcmp(item_key(it), key, klen);
 80 | }
 81 | 
 82 | static inline bool item_hkey_matches(struct item *it, const void *key,
 83 |         size_t klen, uint32_t hv)
 84 | {
 85 |     return it->hv == hv && item_key_matches(it, key, klen);
 86 | }
 87 | 
 88 | #if 0
 89 | void hasht_prefetch1(uint32_t hv)
 90 | {
 91 |     rte_prefetch0(buckets + (hv % nbuckets));
 92 | }
 93 | 
 94 | void hasht_prefetch2(uint32_t hv)
 95 | {
 96 |     struct hash_bucket *b;
 97 |     size_t i;
 98 | 
 99 |     b = buckets + (hv % nbuckets);
100 |     for (i = 0; i < BUCKET_NITEMS; i++) {
101 |         if (b->items[i] != NULL && b->hashes[i] == hv) {
102 |             rte_prefetch0(b->items[i]);
103 |         }
104 |     }
105 | }
106 | #endif
107 | 
108 | 
109 | struct item *hasht_get(const void *key, size_t klen, uint32_t hv)
110 | {
111 |     struct hash_bucket *b;
112 |     struct item *it;
113 |     size_t i;
114 | 
115 |     b = buckets + (hv % nbuckets);
116 | #ifndef NOHTLOCKS
117 |     pthread_spin_lock(&b->lock);
118 | #endif
119 | 
120 |     for (i = 0; i < BUCKET_NITEMS; i++) {
121 |         if (b->items[i] != NULL && b->hashes[i] == hv) {
122 |             it = b->items[i];
123 |             if (item_key_matches(it, key, klen)) {
124 |                 goto done;
125 |             }
126 |         }
127 |     }
128 |     it = b->items[BUCKET_NITEMS - 1];
129 |     if (it != NULL) {
130 |         it = it->next;
131 |         while (it != NULL && !item_hkey_matches(it, key, klen, hv)) {
132 |             it = it->next;
133 |         }
134 |     }
135 | done:
136 |     if (it != NULL) {
137 |         item_ref(it);
138 |     }
139 | #ifndef NOHTLOCKS
140 |     pthread_spin_unlock(&b->lock);
141 | #endif
142 |     return it;
143 | }
144 | 
145 | 
146 | void hasht_put(struct item *nit, struct item *cas)
147 | {
148 |     struct hash_bucket *b;
149 |     struct item *it, *prev;
150 |     size_t i, di;
151 |     bool has_direct = false;
152 |     uint32_t hv = nit->hv;
153 |     void *key = item_key(nit);
154 |     size_t klen = nit->keylen;
155 | 
156 | 
157 |     b = buckets + (hv % nbuckets);
158 | #ifndef NOHTLOCKS
159 |     pthread_spin_lock(&b->lock);
160 | #endif
161 | 
162 |     // Check if we need to replace an existing item
163 |     for (i = 0; i < BUCKET_NITEMS; i++) {
164 |         if (b->items[i] == NULL) {
165 |             has_direct = true;
166 |             di = i;
167 |         } else if (b->hashes[i] == hv) {
168 |             it = b->items[i];
169 |             if (item_key_matches(it, key, klen)) {
170 |                 // Were doing a compare and set
171 |                 if (cas != NULL && cas != it) {
172 |                     goto done;
173 |                 }
174 |                 assert(nit != it);
175 |                 item_ref(nit);
176 |                 nit->next = it->next;
177 |                 b->items[i] = nit;
178 |                 item_unref(it);
179 |                 goto done;
180 |             }
181 |         }
182 |     }
183 | 
184 |     if (cas != NULL) {
185 |         goto done;
186 |     }
187 | 
188 |     item_ref(nit);
189 | 
190 |     // Note it does not match, otherwise we would have already bailed in the for
191 |     // loop
192 |     it = b->items[BUCKET_NITEMS - 1];
193 |     if (it != NULL) {
194 |         prev = it;
195 |         it = it->next;
196 |         while (it != NULL && !item_hkey_matches(it, key, klen, hv)) {
197 |             prev = it;
198 |             it = it->next;
199 |         }
200 | 
201 |         if (it != NULL) {
202 |             nit->next = it->next;
203 |             prev->next = nit;
204 |             item_unref(it);
205 |             goto done;
206 |         }
207 |     }
208 | 
209 |     // We did not find an existing entry to replace, just stick it in wherever
210 |     // we find room
211 |     if (!has_direct) {
212 |         di = BUCKET_NITEMS - 1;
213 |     }
214 |     nit->next = b->items[di];
215 |     b->hashes[di] = hv;
216 |     b->items[di] = nit;
217 | 
218 | done:
219 | #ifndef NOHTLOCKS
220 |     pthread_spin_unlock(&b->lock);
221 | #endif
222 |     return;
223 | }
224 | 
225 | 
226 | 


--------------------------------------------------------------------------------
/apps/flexkvs/server/ialloc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2019 University of Washington, Max Planck Institute for
  3 |  * Software Systems, and The University of Texas at Austin
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining
  6 |  * a copy of this software and associated documentation files (the
  7 |  * "Software"), to deal in the Software without restriction, including
  8 |  * without limitation the rights to use, copy, modify, merge, publish,
  9 |  * distribute, sublicense, and/or sell copies of the Software, and to
 10 |  * permit persons to whom the Software is furnished to do so, subject to
 11 |  * the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be
 14 |  * included in all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <stdio.h>
 26 | #include <stdlib.h>
 27 | #include <string.h>
 28 | #include <sys/mman.h>
 29 | #include <pthread.h>
 30 | #include <memory.h>
 31 | 
 32 | #include "iokvs.h"
 33 | 
 34 | 
 35 | #define SF_INACTIVE 1
 36 | #define SF_CLEANED 4
 37 | 
 38 | struct segment_header {
 39 |     void *data;
 40 |     struct segment_header *next;
 41 |     struct segment_header *prev;
 42 |     uint32_t offset;
 43 |     uint32_t freed;
 44 |     uint32_t size;
 45 |     uint32_t flags;
 46 | };
 47 | 
 48 | static struct segment_header *free_segments;
 49 | static pthread_spinlock_t segalloc_lock;
 50 | static void *seg_base;
 51 | static struct segment_header **seg_headers;
 52 | static size_t seg_alloced;
 53 | 
 54 | #ifdef BARRELFISH
 55 | void *mem_base;
 56 | uint64_t mem_base_phys;
 57 | #endif
 58 | 
 59 | void ialloc_init(void)
 60 | {
 61 |     pthread_spin_init(&segalloc_lock, 0);
 62 |     free_segments = NULL;
 63 |     size_t total;
 64 | 
 65 |     seg_alloced = 0;
 66 |     total = settings.segsize * settings.segmaxnum;
 67 |     printf("Allocating %lu bytes\n", (long unsigned int) total);
 68 | 
 69 | #ifdef BARRELFISH
 70 |     {
 71 |         errval_t r;
 72 |         struct capref cap;
 73 |         struct frame_identity id;
 74 | 
 75 |         r = myt_alloc_map(VREGION_FLAGS_READ_WRITE, total, &seg_base, &cap);
 76 |         if (err_is_fail(r)) {
 77 |             USER_PANIC_ERR(r, "Preallocating failed");
 78 |         }
 79 | 
 80 |         r = invoke_frame_identify(cap, &id);
 81 |         if (err_is_fail(r)) {
 82 |             USER_PANIC_ERR(r, "identify failed");
 83 |         }
 84 | 
 85 |         mem_base = seg_base;
 86 |         mem_base_phys = id.base;
 87 |     }
 88 | #else
 89 |     //if ((seg_base = mmap(NULL, total, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE,
 90 |     //if ((seg_base = mmap(NULL, total, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
 91 |     //                -1, 0)) == MAP_FAILED)
 92 |     if ((seg_base = mmap(NULL, total, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
 93 |                     -1, 0)) == MAP_FAILED)
 94 |     {
 95 |         perror("mmap() of segments base failed");
 96 |         abort();
 97 |     }
 98 |     printf("seg base %p through %p\n", seg_base, seg_base + total);
 99 | #endif
100 |     if ((seg_headers = calloc(settings.segmaxnum, sizeof(*seg_headers))) ==
101 |             NULL)
102 |     {
103 |         perror("Allocating segment header array failed");
104 |         abort();
105 |     }
106 | }
107 | 
108 | static struct segment_header *segment_alloc(void)
109 | {
110 |     struct segment_header *h = NULL;
111 |     void *data;
112 |     size_t i, segsz;
113 | 
114 |     /* Try to get a segment from the freelist */
115 |     if (free_segments != NULL) {
116 |         pthread_spin_lock(&segalloc_lock);
117 |         if (free_segments != NULL) {
118 |             h = free_segments;
119 |             free_segments = h->next;
120 |         }
121 |         pthread_spin_unlock(&segalloc_lock);
122 | 
123 |         if (h != NULL) {
124 |             goto init_h;
125 |         }
126 |     }
127 | 
128 |     /* Check if there are still unallocated segments (note: unlocked) */
129 |     i = seg_alloced;
130 |     if (i >= settings.segmaxnum) {
131 |         pthread_spin_unlock(&segalloc_lock);
132 |         return NULL;
133 |     }
134 | 
135 |     /* If there is a possiblity that there are still unallocated segments, let's
136 |      * go for it. */
137 |     pthread_spin_lock(&segalloc_lock);
138 |     i = seg_alloced;
139 |     if (i >= settings.segmaxnum) {
140 |         pthread_spin_unlock(&segalloc_lock);
141 |         return NULL;
142 |     }
143 | 
144 |     seg_alloced++;
145 |     pthread_spin_unlock(&segalloc_lock);
146 | 
147 |     segsz = settings.segsize;
148 |     data = (void *) ((uintptr_t) seg_base + segsz * i);
149 | #ifndef BARRELFISH
150 |     if (mprotect(data, settings.segsize, PROT_READ | PROT_WRITE) != 0) {
151 |         perror("mprotect failed");
152 |         /* TODO: check what to do here */
153 |         return NULL;
154 |     }
155 | #endif
156 | 
157 |     h = malloc(sizeof(*h));
158 |     if (h == NULL) {
159 |         /* TODO: check what to do here */
160 |         return NULL;
161 |     }
162 |     seg_headers[i] = h;
163 | 
164 |     h->size = segsz;
165 |     h->data = data;
166 |     //printf("allocating segment with size %zu and data at %p\n", segsz, data);
167 | init_h:
168 |     h->offset = 0;
169 |     h->flags = 0;
170 |     h->freed = 0;
171 |     return h;
172 | }
173 | 
174 | static inline struct segment_header *segment_from_part(void *data)
175 | {
176 |     size_t i = ((uintptr_t) data - (uintptr_t) seg_base) / settings.segsize;
177 |     assert(i < settings.segmaxnum);
178 |     return seg_headers[i];
179 | }
180 | 
181 | static void segment_free(struct segment_header *h)
182 | {
183 |     pthread_spin_lock(&segalloc_lock);
184 |     h->offset = 0;
185 |     h->next = free_segments;
186 |     free_segments = h;
187 |     pthread_spin_unlock(&segalloc_lock);
188 | }
189 | 
190 | static void segment_item_free(struct segment_header *h, size_t total)
191 | {
192 |     if (h->size != __sync_add_and_fetch(&h->freed, total)) {
193 |         return;
194 |     }
195 | }
196 | 
197 | static struct item *segment_item_alloc(struct segment_header *h, size_t total)
198 | {
199 |     struct item *it = (struct item *) ((uintptr_t) h->data + h->offset);
200 |     size_t avail;
201 | 
202 |     /* Not enough room in this segment */
203 |     avail = h->size - h->offset;
204 |     if (avail == 0) {
205 |         return NULL;
206 |     } else if (avail < total) {
207 |         if (avail >= sizeof(struct item)) {
208 |             it->refcount = 0;
209 |             /* needed for log scan */
210 |             it->keylen = avail - sizeof(struct item);
211 |             it->vallen = 0;
212 |         }
213 |         segment_item_free(h, avail);
214 |         h->offset += avail;
215 |         return NULL;
216 |     }
217 | 
218 |     /* Ordering here is important */
219 |     it->refcount = 1;
220 | 
221 |     h->offset += total;
222 | 
223 |     return it;
224 | }
225 | 
226 | 
227 | void ialloc_init_allocator(struct item_allocator *ia)
228 | {
229 |     struct segment_header *h;
230 | 
231 |     memset(ia, 0, sizeof(*ia));
232 | 
233 |     if ((h = segment_alloc()) == NULL) {
234 |         fprintf(stderr, "Allocating segment failed\n");
235 |         abort();
236 |     }
237 |     h->next = NULL;
238 |     ia->cur = h;
239 |     ia->oldest = h;
240 | 
241 |     if ((h = segment_alloc()) == NULL) {
242 |         fprintf(stderr, "Allocating reserved segment failed\n");
243 |         abort();
244 |     }
245 |     h->next = NULL;
246 |     ia->reserved = h;
247 | 
248 |     printf("Initializing allocator: %lu\n", (unsigned long) (settings.segcqsize *
249 |             sizeof(*ia->cleanup_queue)));
250 |     ia->cleanup_queue = calloc(settings.segcqsize, sizeof(*ia->cleanup_queue));
251 |     ia->cq_head = ia->cq_tail = 0;
252 |     ia->cleaning = NULL;
253 | }
254 | 
255 | struct item *ialloc_alloc(struct item_allocator *ia, size_t total, bool cleanup)
256 | {
257 |     struct segment_header *h, *old;
258 |     struct item *it;
259 |     assert(total < settings.segsize);
260 | 
261 |     /* If the reserved segment is currently active, only allocations for cleanup
262 |      * are allowed */
263 |     if (ia->reserved == NULL && !cleanup) {
264 |         printf("Only cleanup!\n");
265 |         return NULL;
266 |     }
267 | 
268 |     old = ia->cur;
269 |     if ((it = segment_item_alloc(old, total)) != NULL) {
270 |         return it;
271 |     }
272 | 
273 |     if ((h = segment_alloc()) == NULL) {
274 |         /* We're currently doing cleanup, and still have the reserved segment
275 |          * then that can be used now */
276 |         if (cleanup && ia->reserved != NULL) {
277 |             h = ia->reserved;
278 |             ia->reserved = NULL;
279 |         } else {
280 |             printf("Fail 2!\n");
281 |             return NULL;
282 |         }
283 |     }
284 |     old->next = h;
285 |     h->next = NULL;
286 |     /* Mark old segment as GC-able */
287 |     old->flags |= SF_INACTIVE;
288 |     ia->cur = h;
289 | 
290 |     it = segment_item_alloc(h, total);
291 |     if (it == NULL) {
292 |         printf("Fail 3!\n");
293 |         return NULL;
294 |     }
295 |     return it;
296 | }
297 | 
298 | void ialloc_free(struct item *it, size_t total)
299 | {
300 |     struct segment_header *h = segment_from_part(it);
301 |     segment_item_free(h, total);
302 | }
303 | 
304 | void ialloc_free_dont_need(struct item *it, size_t total)
305 | {
306 |     struct segment_header *h = segment_from_part(it);
307 |     segment_item_free(h, total);
308 |     if (madvise(it, item_totalsz(it), MADV_DONTNEED) != 0){
309 | 	  perror("madvise");
310 |     }
311 | }
312 | 
313 | struct item *ialloc_cleanup_item(struct item_allocator *ia, bool idle)
314 | {
315 |     size_t i;
316 |     struct item *it;
317 | 
318 |     if (!idle) {
319 |         if (ia->cleanup_count >= 32) {
320 |             return NULL;
321 |         }
322 |         ia->cleanup_count++;
323 |     }
324 | 
325 |     i = ia->cq_head;
326 |     it = ia->cleanup_queue[i];
327 |     if (it != NULL) {
328 |         ia->cleanup_queue[i] = NULL;
329 |         ia->cq_head = (i + 1) % settings.segcqsize;
330 |     }
331 |     if (ia->reserved == NULL) {
332 |         ia->reserved = segment_alloc();
333 |     }
334 |     return it;
335 | }
336 | 
337 | void ialloc_cleanup_nextrequest(struct item_allocator *ia)
338 | {
339 |     ia->cleanup_count = 0;
340 | }
341 | 
342 | void ialloc_maintenance(struct item_allocator *ia)
343 | {
344 | #if 0
345 |     struct segment_header *h, *prev, *next, *cand;
346 |     struct item *it,  **cq = ia->cleanup_queue;
347 |     size_t off, size, idx;
348 |     double cand_ratio, ratio;
349 |     void *data;
350 | 
351 |     /* Check if we can now free some segments? While we're at it, we can also
352 |      * look for a candidate to be cleaned */
353 |     h = ia->oldest;
354 |     prev = NULL;
355 |     cand = NULL;
356 |     cand_ratio = 0;
357 |     while (h != NULL && (h->flags & SF_INACTIVE) == SF_INACTIVE) {
358 |         next = h->next;
359 |         /* Done with this segment? */
360 |         if (h->freed == h->size) {
361 |             if (prev == NULL) {
362 |                 ia->oldest = h->next;
363 |             } else {
364 |                 prev->next = h->next;
365 |             }
366 |             segment_free(h);
367 |             h = prev;
368 |         } else {
369 |             /* Otherwise we also look for the next cleanup candidate if
370 |              * necessary */
371 |             ratio = (double) h->freed / h->size;
372 |             if (ratio >= 0.5 && ratio > cand_ratio) {
373 |                 cand_ratio = ratio;
374 |                 cand = h;
375 |             }
376 |         }
377 |         prev = h;
378 |         h = next;
379 |     }
380 | 
381 |     /* Check if we're currently working on cleaning a segment */
382 |     h = ia->cleaning;
383 |     off = ia->clean_offset;
384 |     size = (h == NULL ? 0 : h->size);
385 |     if (h == NULL || off == size) {
386 |         h = cand;
387 |         ia->cleaning = h;
388 |         off = ia->clean_offset = 0;
389 |     }
390 | 
391 |     /* No segments to clean, that's great! */
392 |     if (h == NULL) {
393 |         return;
394 |     }
395 | 
396 |     /* Enqueue clean requests to worker untill we run out or the queue is filled
397 |      * up */
398 |     idx = ia->cq_tail;
399 |     data = h->data;
400 |     while (off < size && cq[idx] == NULL) {
401 |         it = (struct item *) ((uintptr_t) data + off);
402 |         if (size - off < sizeof(struct item)) {
403 |             off = size;
404 |             break;
405 |         }
406 |         if (item_tryref(it)) {
407 |             cq[idx] = it;
408 |             idx = (idx + 1) % settings.segcqsize;
409 |         }
410 |         off += item_totalsz(it);
411 |     }
412 |     ia->cq_tail = idx;
413 |     ia->clean_offset = off;
414 | #endif
415 |     struct segment_header *h, *prev, *next, *cand;
416 |     struct item *it,  **cq = ia->cleanup_queue;
417 |     size_t off, size, idx;
418 |     double cand_ratio, ratio;
419 |     void *data;
420 | 
421 |     /* Check if we can now free some segments? While we're at it, we can also
422 |      * look for a candidate to be cleaned */
423 |     cand = NULL;
424 |     cand_ratio = 0;
425 |     h = ia->oldest;
426 |     prev = NULL;
427 |     /* We stop before the last segment in the list, and if we hit any
428 |      * non-inactive segments. This prevents us from having to touch the cur
429 |      * pointers. */
430 |     while (h != NULL && h->next != NULL &&
431 |             (h->flags & SF_INACTIVE) == SF_INACTIVE)
432 |     {
433 |         next = h->next;
434 |         ratio = (double) h->freed / h->size;
435 |         /* Done with this segment? */
436 |         if (h->freed == h->size) {
437 |             if (prev == NULL) {
438 |                 ia->oldest = h->next;
439 |             } else {
440 |                 prev->next = h->next;
441 |             }
442 |             segment_free(h);
443 |             h = prev;
444 |         } else if ((h->flags & SF_CLEANED) != SF_CLEANED) {
445 |             /* Otherwise we also look for the next cleanup candidate if
446 |              * necessary */
447 |             ratio = (double) h->freed / h->size;
448 |             if (ratio >= settings.clean_ratio && ratio > cand_ratio) {
449 |                 cand_ratio = ratio;
450 |                 cand = h;
451 |             }
452 |         }
453 |         prev = h;
454 |         h = next;
455 |     }
456 | 
457 |     /* Check if we're currently working on cleaning a segment */
458 |     h = ia->cleaning;
459 |     off = ia->clean_offset;
460 |     size = (h == NULL ? 0 : h->size);
461 |     if (h == NULL || off == size) {
462 |         h = cand;
463 |         ia->cleaning = h;
464 |         off = ia->clean_offset = 0;
465 |         if (h != NULL) {
466 |             h->flags |= SF_CLEANED;
467 |         }
468 |     }
469 | 
470 |     /* No segments to clean, that's great! */
471 |     if (h == NULL) {
472 |         return;
473 |     }
474 | 
475 |     /* Enqueue clean requests to worker untill we run out or the queue is filled
476 |      * up */
477 |     idx = ia->cq_tail;
478 |     data = h->data;
479 |     while (off < size && cq[idx] == NULL) {
480 |         it = (struct item *) ((uintptr_t) data + off);
481 |         if (size - off < sizeof(struct item)) {
482 |             off = size;
483 |             break;
484 |         }
485 |         if (item_tryref(it)) {
486 |             cq[idx] = it;
487 |             idx = (idx + 1) % settings.segcqsize;
488 |         }
489 |         off += item_totalsz(it);
490 |     }
491 |     ia->cq_tail = idx;
492 |     ia->clean_offset = off;
493 | 
494 | }
495 | 


--------------------------------------------------------------------------------
/apps/flexkvs/server/iokvs.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2019 University of Washington, Max Planck Institute for
  3 |  * Software Systems, and The University of Texas at Austin
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining
  6 |  * a copy of this software and associated documentation files (the
  7 |  * "Software"), to deal in the Software without restriction, including
  8 |  * without limitation the rights to use, copy, modify, merge, publish,
  9 |  * distribute, sublicense, and/or sell copies of the Software, and to
 10 |  * permit persons to whom the Software is furnished to do so, subject to
 11 |  * the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be
 14 |  * included in all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | #ifndef IOKVS_H_
 26 | #define IOKVS_H_
 27 | 
 28 | #define GNU_SOURCE_
 29 | #include <stddef.h>
 30 | #include <stdint.h>
 31 | #include <stdbool.h>
 32 | #include <assert.h>
 33 | 
 34 | 
 35 | /******************************************************************************/
 36 | /* Settings */
 37 | 
 38 | /** Configurable settings */
 39 | struct settings {
 40 |     /** Size of log segments in bytes */
 41 |     size_t segsize;
 42 |     /** Maximal number of segments to use  */
 43 |     size_t segmaxnum;
 44 |     /** Size of seqment clean queue */
 45 |     size_t segcqsize;
 46 |     /** Segment cleaning ratio */
 47 |     double clean_ratio;
 48 |     /** UDP port to listen on */
 49 |     uint16_t udpport;
 50 |     /** Verbosity for log messages. */
 51 |     uint8_t verbose;
 52 |     /** Number of cores */
 53 |     uint8_t numcores;
 54 |     /** Config file */
 55 |     char *config_file;
 56 | };
 57 | 
 58 | /** Global settings */
 59 | extern struct settings settings;
 60 | 
 61 | /** Initialize global settings from command-line. */
 62 | int settings_init(int argc, char *argv[]);
 63 | 
 64 | 
 65 | /******************************************************************************/
 66 | /* Hash table operations */
 67 | 
 68 | /** Initialize hash table. */
 69 | void hasht_init(void);
 70 | 
 71 | /** Prefetch hash table slot */
 72 | void hasht_prefetch1(uint32_t hv);
 73 | 
 74 | /** Prefetch matching items */
 75 | void hasht_prefetch2(uint32_t hv);
 76 | 
 77 | /**
 78 |  * Lookup key in hash table.
 79 |  * @param key  Key
 80 |  * @param klen Length of key in bytes
 81 |  * @param hv   Hash of key
 82 |  * @return Pointer to item or NULL
 83 |  */
 84 | struct item *hasht_get(const void *key, size_t klen, uint32_t hv);
 85 | 
 86 | /**
 87 |  * Insert item into hash table
 88 |  * @param it  Item
 89 |  * @param cas If != NULL, will only store `it' if cas is the object currently
 90 |  *            stored for the key (compare and set).
 91 |  */
 92 | void hasht_put(struct item *it, struct item *cas);
 93 | 
 94 | 
 95 | /******************************************************************************/
 96 | /* Item Allocation */
 97 | struct segment_header;
 98 | struct item;
 99 | /**
100 |  * Item allocator struct. Should be considered to be opaque outside ialloc.c
101 |  *
102 |  * This struct is slightly ugly, as it is split up into 3 parts to reduce false
103 |  * sharing as much as possible.
104 |  */
105 | struct item_allocator {
106 |     /***********************************************************/
107 |     /* Part 1: mostly read-only for maintenance and worker */
108 | 
109 |     /* Reserved segment for log cleaning in case we run out */
110 |     struct segment_header *reserved;
111 |     /* Queue for communication  */
112 |     struct item **cleanup_queue;
113 | 
114 |     uint8_t pad_0[48];
115 |     /***********************************************************/
116 |     /* Part 2: Only accessed by worker threads */
117 | 
118 |     /* Current segment */
119 |     struct segment_header *cur;
120 |     /* Head pointer in cleanup queue */
121 |     size_t cq_head;
122 |     /* Clenanup counter, limits mandatory cleanup per request */
123 |     size_t cleanup_count;
124 | 
125 |     uint8_t pad_1[40];
126 |     /***********************************************************/
127 |     /* Part 3: Only accessed by maintenance threads */
128 | 
129 |     /* Oldest segment */
130 |     struct segment_header *oldest;
131 |     /* Tail pointer for cleanup queue */
132 |     size_t cq_tail;
133 |     /*  */
134 |     struct segment_header *cleaning;
135 |     /*  */
136 |     size_t clean_offset;
137 | };
138 | 
139 | _Static_assert(offsetof(struct item_allocator, cur) % 64 == 0,
140 |         "Alignment in struct item_allocator broken 1");
141 | _Static_assert(offsetof(struct item_allocator, oldest) % 64 == 0,
142 |         "Alignment in struct item_allocator broken 2");
143 | 
144 | /** Initialize item allocation. Prepares memory regions etc. */
145 | void ialloc_init(void);
146 | 
147 | /** Initialize an item allocator instance. */
148 | void ialloc_init_allocator(struct item_allocator *ia);
149 | 
150 | /**
151 |  * Allocate an item.
152 |  *
153 |  * Note this function has two modes: cleanup and non-cleanup. In cleanup mode,
154 |  * the allocator will use the segment reserved for log cleanup if no other
155 |  * allocation is possible, otherwise it will just return NULL and leave the
156 |  * reserved segment untouched.
157 |  *
158 |  * @param ia      Allocator instance
159 |  * @param total   Total number of bytes (includes item struct)
160 |  * @param cleanup true if this allocation is for a cleanup operation
161 |  * @return Allocated item or NULL.
162 |  */
163 | struct item *ialloc_alloc(struct item_allocator *ia, size_t total,
164 |         bool cleanup);
165 | 
166 | /**
167 |  * Free an item.
168 |  * @param it    Item
169 |  * @param total Total number of bytes (includes item struct)
170 |  */
171 | void ialloc_free(struct item *it, size_t total);
172 | void ialloc_free_dont_need(struct item *it, size_t total);
173 | 
174 | /**
175 |  * Get item from cleanup queue for this allocator.
176 |  * @param ia   Allocator instance
177 |  * @param idle true if there are currently no pending requests, false otherwise
178 |  * @return Item or NULL
179 |  */
180 | struct item *ialloc_cleanup_item(struct item_allocator *ia, bool idle);
181 | 
182 | /**
183 |  * Resets per-request cleanup counters. Should be called when a new request is
184 |  * ready to be processed before calling ialloc_cleanup_item.
185 |  */
186 | void ialloc_cleanup_nextrequest(struct item_allocator *ia);
187 | 
188 | /**
189 |  * Dispatch log cleanup operations for this instance, if required. To be called
190 |  * from maintenance thread.
191 |  */
192 | void ialloc_maintenance(struct item_allocator *ia);
193 | 
194 | 
195 | /******************************************************************************/
196 | /* Items */
197 | 
198 | /**
199 |  * Item.
200 |  * The item struct is immediately followed by first the key, and then the
201 |  * associated value.
202 |  */
203 | struct item {
204 |     /** Next item in the hash chain. */
205 |     struct item *next;
206 |     /** Hash value for this item */
207 |     uint32_t hv;
208 |     /** Length of value in bytes */
209 |     uint32_t vallen;
210 |     /** Reference count */
211 |     volatile uint16_t refcount;
212 |     /** Length of key in bytes */
213 |     uint16_t keylen;
214 |     /** Flags (currently unused, but provides padding) */
215 |     uint32_t flags;
216 | };
217 | 
218 | /** Get pointer to the item's key */
219 | static inline void *item_key(struct item *it)
220 | {
221 |     return it + 1;
222 | }
223 | 
224 | /** Get pointer to the item's value */
225 | static inline void *item_value(struct item *it)
226 | {
227 |     return (void *) ((uintptr_t) (it + 1) + it->keylen);
228 | }
229 | 
230 | /** Total number of bytes for this item (includes item struct) */
231 | static inline size_t item_totalsz(struct item *it)
232 | {
233 |     return sizeof(*it) + it->vallen + it->keylen;
234 | }
235 | 
236 | /** Increment item's refcount (original refcount must not be 0). */
237 | static inline void item_ref(struct item *it)
238 | {
239 |     uint16_t old;
240 |     old = __sync_add_and_fetch(&it->refcount, 1);
241 |     assert(old != 1);
242 | }
243 | 
244 | /**
245 |  * Increment item's refcount if it is not zero.
246 |  * @return true if the refcount was increased, false otherwise.
247 |  */
248 | static inline bool item_tryref(struct item *it)
249 | {
250 |     uint16_t c;
251 |     do {
252 |         c = it->refcount;
253 |         if (c == 0) {
254 |             return false;
255 |         }
256 |     } while (!__sync_bool_compare_and_swap(&it->refcount, c, c + 1));
257 |     return true;
258 | }
259 | 
260 | /**
261 |  * Decrement item's refcount, and free item if refcount = 0.
262 |  * The original refcount must be > 0.
263 |  */
264 | static inline void item_unref(struct item *it)
265 | {
266 |     uint16_t c;
267 |     assert(it->refcount > 0);
268 |     if ((c = __sync_sub_and_fetch(&it->refcount, 1)) == 0) {
269 |         ialloc_free(it, item_totalsz(it));
270 |     }
271 | }
272 | 
273 | /** Wrapper for transport code */
274 | static inline void myt_item_release(void *it)
275 | {
276 |     item_unref(it);
277 | }
278 | 
279 | 
280 | 
281 | 
282 | uint32_t jenkins_hash(const void *key, size_t length);
283 | 
284 | #endif // ndef IOKVS_H_
285 | 


--------------------------------------------------------------------------------
/apps/flexkvs/server/jenkins_hash.c:
--------------------------------------------------------------------------------
  1 | /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
  2 | /*
  3 |  * Hash table
  4 |  *
  5 |  * The hash function used here is by Bob Jenkins, 1996:
  6 |  *    <http://burtleburtle.net/bob/hash/doobs.html>
  7 |  *       "By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.
  8 |  *       You may use this code any way you wish, private, educational,
  9 |  *       or commercial.  It's free."
 10 |  *
 11 |  */
 12 | #include "iokvs.h"
 13 | 
 14 | #define ENDIAN_LITTLE 1
 15 | 
 16 | /*
 17 |  * Since the hash function does bit manipulation, it needs to know
 18 |  * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
 19 |  * are set in the configure script.
 20 |  */
 21 | #if ENDIAN_BIG == 1
 22 | # define HASH_LITTLE_ENDIAN 0
 23 | # define HASH_BIG_ENDIAN 1
 24 | #else
 25 | # if ENDIAN_LITTLE == 1
 26 | #  define HASH_LITTLE_ENDIAN 1
 27 | #  define HASH_BIG_ENDIAN 0
 28 | # else
 29 | #  define HASH_LITTLE_ENDIAN 0
 30 | #  define HASH_BIG_ENDIAN 0
 31 | # endif
 32 | #endif
 33 | 
 34 | #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
 35 | 
 36 | /*
 37 | -------------------------------------------------------------------------------
 38 | mix -- mix 3 32-bit values reversibly.
 39 | 
 40 | This is reversible, so any information in (a,b,c) before mix() is
 41 | still in (a,b,c) after mix().
 42 | 
 43 | If four pairs of (a,b,c) inputs are run through mix(), or through
 44 | mix() in reverse, there are at least 32 bits of the output that
 45 | are sometimes the same for one pair and different for another pair.
 46 | This was tested for:
 47 | * pairs that differed by one bit, by two bits, in any combination
 48 |   of top bits of (a,b,c), or in any combination of bottom bits of
 49 |   (a,b,c).
 50 | * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 51 |   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 52 |   is commonly produced by subtraction) look like a single 1-bit
 53 |   difference.
 54 | * the base values were pseudorandom, all zero but one bit set, or
 55 |   all zero plus a counter that starts at zero.
 56 | 
 57 | Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
 58 | satisfy this are
 59 |     4  6  8 16 19  4
 60 |     9 15  3 18 27 15
 61 |    14  9  3  7 17  3
 62 | Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
 63 | for "differ" defined as + with a one-bit base and a two-bit delta.  I
 64 | used http://burtleburtle.net/bob/hash/avalanche.html to choose
 65 | the operations, constants, and arrangements of the variables.
 66 | 
 67 | This does not achieve avalanche.  There are input bits of (a,b,c)
 68 | that fail to affect some output bits of (a,b,c), especially of a.  The
 69 | most thoroughly mixed value is c, but it doesn't really even achieve
 70 | avalanche in c.
 71 | 
 72 | This allows some parallelism.  Read-after-writes are good at doubling
 73 | the number of bits affected, so the goal of mixing pulls in the opposite
 74 | direction as the goal of parallelism.  I did what I could.  Rotates
 75 | seem to cost as much as shifts on every machine I could lay my hands
 76 | on, and rotates are much kinder to the top and bottom bits, so I used
 77 | rotates.
 78 | -------------------------------------------------------------------------------
 79 | */
 80 | #define mix(a,b,c) \
 81 | { \
 82 |   a -= c;  a ^= rot(c, 4);  c += b; \
 83 |   b -= a;  b ^= rot(a, 6);  a += c; \
 84 |   c -= b;  c ^= rot(b, 8);  b += a; \
 85 |   a -= c;  a ^= rot(c,16);  c += b; \
 86 |   b -= a;  b ^= rot(a,19);  a += c; \
 87 |   c -= b;  c ^= rot(b, 4);  b += a; \
 88 | }
 89 | 
 90 | /*
 91 | -------------------------------------------------------------------------------
 92 | final -- final mixing of 3 32-bit values (a,b,c) into c
 93 | 
 94 | Pairs of (a,b,c) values differing in only a few bits will usually
 95 | produce values of c that look totally different.  This was tested for
 96 | * pairs that differed by one bit, by two bits, in any combination
 97 |   of top bits of (a,b,c), or in any combination of bottom bits of
 98 |   (a,b,c).
 99 | * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
100 |   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
101 |   is commonly produced by subtraction) look like a single 1-bit
102 |   difference.
103 | * the base values were pseudorandom, all zero but one bit set, or
104 |   all zero plus a counter that starts at zero.
105 | 
106 | These constants passed:
107 |  14 11 25 16 4 14 24
108 |  12 14 25 16 4 14 24
109 | and these came close:
110 |   4  8 15 26 3 22 24
111 |  10  8 15 26 3 22 24
112 |  11  8 15 26 3 22 24
113 | -------------------------------------------------------------------------------
114 | */
115 | #define final(a,b,c) \
116 | { \
117 |   c ^= b; c -= rot(b,14); \
118 |   a ^= c; a -= rot(c,11); \
119 |   b ^= a; b -= rot(a,25); \
120 |   c ^= b; c -= rot(b,16); \
121 |   a ^= c; a -= rot(c,4);  \
122 |   b ^= a; b -= rot(a,14); \
123 |   c ^= b; c -= rot(b,24); \
124 | }
125 | 
126 | #if HASH_LITTLE_ENDIAN == 1
127 | uint32_t jenkins_hash(
128 |   const void *key,       /* the key to hash */
129 |   size_t      length)    /* length of the key */
130 | {
131 |   uint32_t a,b,c;                                          /* internal state */
132 |   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
133 | 
134 |   /* Set up the internal state */
135 |   a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;
136 | 
137 |   u.ptr = key;
138 |   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
139 |     const uint32_t *k = key;                           /* read 32-bit chunks */
140 | #ifdef VALGRIND
141 |     const uint8_t  *k8;
142 | #endif /* ifdef VALGRIND */
143 | 
144 |     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
145 |     while (length > 12)
146 |     {
147 |       a += k[0];
148 |       b += k[1];
149 |       c += k[2];
150 |       mix(a,b,c);
151 |       length -= 12;
152 |       k += 3;
153 |     }
154 | 
155 |     /*----------------------------- handle the last (probably partial) block */
156 |     /*
157 |      * "k[2]&0xffffff" actually reads beyond the end of the string, but
158 |      * then masks off the part it's not allowed to read.  Because the
159 |      * string is aligned, the masked-off tail is in the same word as the
160 |      * rest of the string.  Every machine with memory protection I've seen
161 |      * does it on word boundaries, so is OK with this.  But VALGRIND will
162 |      * still catch it and complain.  The masking trick does make the hash
163 |      * noticeably faster for short strings (like English words).
164 |      */
165 | #ifndef VALGRIND
166 | 
167 |     switch(length)
168 |     {
169 |     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
170 |     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
171 |     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
172 |     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
173 |     case 8 : b+=k[1]; a+=k[0]; break;
174 |     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
175 |     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
176 |     case 5 : b+=k[1]&0xff; a+=k[0]; break;
177 |     case 4 : a+=k[0]; break;
178 |     case 3 : a+=k[0]&0xffffff; break;
179 |     case 2 : a+=k[0]&0xffff; break;
180 |     case 1 : a+=k[0]&0xff; break;
181 |     case 0 : return c;  /* zero length strings require no mixing */
182 |     }
183 | 
184 | #else /* make valgrind happy */
185 | 
186 |     k8 = (const uint8_t *)k;
187 |     switch(length)
188 |     {
189 |     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
190 |     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
191 |     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
192 |     case 9 : c+=k8[8];                   /* fall through */
193 |     case 8 : b+=k[1]; a+=k[0]; break;
194 |     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
195 |     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
196 |     case 5 : b+=k8[4];                   /* fall through */
197 |     case 4 : a+=k[0]; break;
198 |     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
199 |     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
200 |     case 1 : a+=k8[0]; break;
201 |     case 0 : return c;  /* zero length strings require no mixing */
202 |     }
203 | 
204 | #endif /* !valgrind */
205 | 
206 |   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
207 |     const uint16_t *k = key;                           /* read 16-bit chunks */
208 |     const uint8_t  *k8;
209 | 
210 |     /*--------------- all but last block: aligned reads and different mixing */
211 |     while (length > 12)
212 |     {
213 |       a += k[0] + (((uint32_t)k[1])<<16);
214 |       b += k[2] + (((uint32_t)k[3])<<16);
215 |       c += k[4] + (((uint32_t)k[5])<<16);
216 |       mix(a,b,c);
217 |       length -= 12;
218 |       k += 6;
219 |     }
220 | 
221 |     /*----------------------------- handle the last (probably partial) block */
222 |     k8 = (const uint8_t *)k;
223 |     switch(length)
224 |     {
225 |     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
226 |              b+=k[2]+(((uint32_t)k[3])<<16);
227 |              a+=k[0]+(((uint32_t)k[1])<<16);
228 |              break;
229 |     case 11: c+=((uint32_t)k8[10])<<16;     /* @fallthrough */
230 |     case 10: c+=k[4];                       /* @fallthrough@ */
231 |              b+=k[2]+(((uint32_t)k[3])<<16);
232 |              a+=k[0]+(((uint32_t)k[1])<<16);
233 |              break;
234 |     case 9 : c+=k8[8];                      /* @fallthrough */
235 |     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
236 |              a+=k[0]+(((uint32_t)k[1])<<16);
237 |              break;
238 |     case 7 : b+=((uint32_t)k8[6])<<16;      /* @fallthrough */
239 |     case 6 : b+=k[2];
240 |              a+=k[0]+(((uint32_t)k[1])<<16);
241 |              break;
242 |     case 5 : b+=k8[4];                      /* @fallthrough */
243 |     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
244 |              break;
245 |     case 3 : a+=((uint32_t)k8[2])<<16;      /* @fallthrough */
246 |     case 2 : a+=k[0];
247 |              break;
248 |     case 1 : a+=k8[0];
249 |              break;
250 |     case 0 : return c;  /* zero length strings require no mixing */
251 |     }
252 | 
253 |   } else {                        /* need to read the key one byte at a time */
254 |     const uint8_t *k = key;
255 | 
256 |     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
257 |     while (length > 12)
258 |     {
259 |       a += k[0];
260 |       a += ((uint32_t)k[1])<<8;
261 |       a += ((uint32_t)k[2])<<16;
262 |       a += ((uint32_t)k[3])<<24;
263 |       b += k[4];
264 |       b += ((uint32_t)k[5])<<8;
265 |       b += ((uint32_t)k[6])<<16;
266 |       b += ((uint32_t)k[7])<<24;
267 |       c += k[8];
268 |       c += ((uint32_t)k[9])<<8;
269 |       c += ((uint32_t)k[10])<<16;
270 |       c += ((uint32_t)k[11])<<24;
271 |       mix(a,b,c);
272 |       length -= 12;
273 |       k += 12;
274 |     }
275 | 
276 |     /*-------------------------------- last block: affect all 32 bits of (c) */
277 |     switch(length)                   /* all the case statements fall through */
278 |     {
279 |     case 12: c+=((uint32_t)k[11])<<24;
280 |     case 11: c+=((uint32_t)k[10])<<16;
281 |     case 10: c+=((uint32_t)k[9])<<8;
282 |     case 9 : c+=k[8];
283 |     case 8 : b+=((uint32_t)k[7])<<24;
284 |     case 7 : b+=((uint32_t)k[6])<<16;
285 |     case 6 : b+=((uint32_t)k[5])<<8;
286 |     case 5 : b+=k[4];
287 |     case 4 : a+=((uint32_t)k[3])<<24;
288 |     case 3 : a+=((uint32_t)k[2])<<16;
289 |     case 2 : a+=((uint32_t)k[1])<<8;
290 |     case 1 : a+=k[0];
291 |              break;
292 |     case 0 : return c;  /* zero length strings require no mixing */
293 |     }
294 |   }
295 | 
296 |   final(a,b,c);
297 |   return c;             /* zero length strings require no mixing */
298 | }
299 | 
300 | #elif HASH_BIG_ENDIAN == 1
301 | /*
302 |  * hashbig():
303 |  * This is the same as hashword() on big-endian machines.  It is different
304 |  * from hashlittle() on all machines.  hashbig() takes advantage of
305 |  * big-endian byte ordering.
306 |  */
307 | uint32_t jenkins_hash( const void *key, size_t length)
308 | {
309 |   uint32_t a,b,c;
310 |   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
311 | 
312 |   /* Set up the internal state */
313 |   a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;
314 | 
315 |   u.ptr = key;
316 |   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
317 |     const uint32_t *k = key;                           /* read 32-bit chunks */
318 | #ifdef VALGRIND
319 |     const uint8_t  *k8;
320 | #endif /* ifdef VALGRIND */
321 | 
322 |     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
323 |     while (length > 12)
324 |     {
325 |       a += k[0];
326 |       b += k[1];
327 |       c += k[2];
328 |       mix(a,b,c);
329 |       length -= 12;
330 |       k += 3;
331 |     }
332 | 
333 |     /*----------------------------- handle the last (probably partial) block */
334 |     /*
335 |      * "k[2]<<8" actually reads beyond the end of the string, but
336 |      * then shifts out the part it's not allowed to read.  Because the
337 |      * string is aligned, the illegal read is in the same word as the
338 |      * rest of the string.  Every machine with memory protection I've seen
339 |      * does it on word boundaries, so is OK with this.  But VALGRIND will
340 |      * still catch it and complain.  The masking trick does make the hash
341 |      * noticeably faster for short strings (like English words).
342 |      */
343 | #ifndef VALGRIND
344 | 
345 |     switch(length)
346 |     {
347 |     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
348 |     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
349 |     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
350 |     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
351 |     case 8 : b+=k[1]; a+=k[0]; break;
352 |     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
353 |     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
354 |     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
355 |     case 4 : a+=k[0]; break;
356 |     case 3 : a+=k[0]&0xffffff00; break;
357 |     case 2 : a+=k[0]&0xffff0000; break;
358 |     case 1 : a+=k[0]&0xff000000; break;
359 |     case 0 : return c;              /* zero length strings require no mixing */
360 |     }
361 | 
362 | #else  /* make valgrind happy */
363 | 
364 |     k8 = (const uint8_t *)k;
365 |     switch(length)                   /* all the case statements fall through */
366 |     {
367 |     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
368 |     case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
369 |     case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
370 |     case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
371 |     case 8 : b+=k[1]; a+=k[0]; break;
372 |     case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
373 |     case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
374 |     case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
375 |     case 4 : a+=k[0]; break;
376 |     case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
377 |     case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
378 |     case 1 : a+=((uint32_t)k8[0])<<24; break;
379 |     case 0 : return c;
380 |     }
381 | 
382 | #endif /* !VALGRIND */
383 | 
384 |   } else {                        /* need to read the key one byte at a time */
385 |     const uint8_t *k = key;
386 | 
387 |     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
388 |     while (length > 12)
389 |     {
390 |       a += ((uint32_t)k[0])<<24;
391 |       a += ((uint32_t)k[1])<<16;
392 |       a += ((uint32_t)k[2])<<8;
393 |       a += ((uint32_t)k[3]);
394 |       b += ((uint32_t)k[4])<<24;
395 |       b += ((uint32_t)k[5])<<16;
396 |       b += ((uint32_t)k[6])<<8;
397 |       b += ((uint32_t)k[7]);
398 |       c += ((uint32_t)k[8])<<24;
399 |       c += ((uint32_t)k[9])<<16;
400 |       c += ((uint32_t)k[10])<<8;
401 |       c += ((uint32_t)k[11]);
402 |       mix(a,b,c);
403 |       length -= 12;
404 |       k += 12;
405 |     }
406 | 
407 |     /*-------------------------------- last block: affect all 32 bits of (c) */
408 |     switch(length)                   /* all the case statements fall through */
409 |     {
410 |     case 12: c+=k[11];
411 |     case 11: c+=((uint32_t)k[10])<<8;
412 |     case 10: c+=((uint32_t)k[9])<<16;
413 |     case 9 : c+=((uint32_t)k[8])<<24;
414 |     case 8 : b+=k[7];
415 |     case 7 : b+=((uint32_t)k[6])<<8;
416 |     case 6 : b+=((uint32_t)k[5])<<16;
417 |     case 5 : b+=((uint32_t)k[4])<<24;
418 |     case 4 : a+=k[3];
419 |     case 3 : a+=((uint32_t)k[2])<<8;
420 |     case 2 : a+=((uint32_t)k[1])<<16;
421 |     case 1 : a+=((uint32_t)k[0])<<24;
422 |              break;
423 |     case 0 : return c;
424 |     }
425 |   }
426 | 
427 |   final(a,b,c);
428 |   return c;
429 | }
430 | #else /* HASH_XXX_ENDIAN == 1 */
431 | #error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
432 | #endif /* HASH_XXX_ENDIAN == 1 */
433 | 


--------------------------------------------------------------------------------
/apps/flexkvs/server/settings.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2019 University of Washington, Max Planck Institute for
 3 |  * Software Systems, and The University of Texas at Austin
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining
 6 |  * a copy of this software and associated documentation files (the
 7 |  * "Software"), to deal in the Software without restriction, including
 8 |  * without limitation the rights to use, copy, modify, merge, publish,
 9 |  * distribute, sublicense, and/or sell copies of the Software, and to
10 |  * permit persons to whom the Software is furnished to do so, subject to
11 |  * the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be
14 |  * included in all copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 |  */
24 | 
25 | #include <stdio.h>
26 | #include <stdlib.h>
27 | 
28 | #include "iokvs.h"
29 | 
30 | struct settings settings;
31 | 
32 | int settings_init(int argc, char *argv[])
33 | {
34 |     settings.udpport = 11211;
35 |     settings.verbose = 1;
36 |     //settings.segsize = 256 * 1024;
37 |     //settings.segsize = 128 * 65536;
38 |     settings.segsize = 1024 * 1024 * 1024;
39 |     //settings.segmaxnum = 4096;
40 |     //settings.segmaxnum = 64 * 4096;
41 |     settings.segmaxnum = 700;
42 |     //settings.segcqsize = 32 * 1024;
43 |     settings.segcqsize = 1500;
44 |     settings.clean_ratio = 0.8;
45 |     //settings.clean_ratio = 1.1;
46 | 
47 |     if (argc != 3) {
48 |         fprintf(stderr, "Usage: flexkvs CONFIG THREADS\n");
49 |         return -1;
50 |     }
51 | 
52 |     settings.numcores = atoi(argv[2]);
53 |     settings.config_file = argv[1];
54 |     return 0;
55 | }
56 | 


--------------------------------------------------------------------------------
/apps/flexkvs/unlink_socks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | for i in {0..15}; do unlink kvs_sock$i; done
4 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | SCRIPTDIR="$(dirname "$(readlink -f "$0")")"
 5 | PREFIX="$HOME"/.local
 6 | 
 7 | export PKG_CONFIG_PATH="$PREFIX"/lib64/pkgconfig C_INCLUDE_PATH="$PREFIX"/include LIBRARY_PATH="$PREFIX"/lib64
 8 | 
 9 | # capstone
10 | cd "$SCRIPTDIR"/capstone
11 | # rm -rf build
12 | cmake -S . -B build -DCMAKE_INSTALL_PREFIX="$PREFIX" -DCMAKE_BUILD_TYPE=Release && cmake --build build --config Release --parallel --target=install
13 | 
14 | # syscall_intercept
15 | cd "$SCRIPTDIR"/syscall_intercept
16 | # rm -rf build
17 | cmake -S . -B build -DCMAKE_INSTALL_PREFIX="$PREFIX" -DCMAKE_BUILD_TYPE=Release && cmake --build build --config Release --parallel --target=install
18 | 
19 | # hoard
20 | cd "$SCRIPTDIR"/Hoard/src
21 | make clean && make -j
22 | cp libhoard.so "$PREFIX"/lib64
23 | 
24 | # hemem
25 | cd "$SCRIPTDIR"/src
26 | make clean && make -j
27 | cp libhemem.so "$PREFIX"/lib64
28 | 
29 | 


--------------------------------------------------------------------------------
/microbenchmarks/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -g -Wall -O3
 3 | #CFLAGS = -g3 -Wall -O0
 4 | INCLUDES = -I../linux/usr/include
 5 | LIBS = -lm -lpthread
 6 | 
 7 | default: gups-pebs gups-random gups-hotset-move
 8 | 
 9 | all: gups-pebs gups-random gups-simple gups-lru gups-lru-swap gups-hotset-move #gups
10 | 
11 | gups-random: gups-random.o
12 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups-random gups-random.o zipf.o $(LIBS) -L../src/ -lhemem
13 | 
14 | gups-small: gups-small.o
15 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups-small gups-small.o ../timer.o $(LIBS)
16 | 
17 | gups: gups.o
18 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups gups.o zipf.o ../timer.o $(LIBS)
19 | 
20 | gups-pebs: gups.o
21 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups-pebs gups.o zipf.o $(LIBS) -L../src/ -lhemem
22 | 
23 | gups-hotset-move: gups-hotset-move.o
24 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups-hotset-move gups-hotset-move.o zipf.o $(LIBS) -L../src/ -lhemem
25 | 
26 | gups-simple: gups.o
27 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups-simple gups.o zipf.o $(LIBS) -L../src/ -lhemem-simple
28 | 
29 | gups-lru: gups.o
30 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups-lru gups.o zipf.o $(LIBS) -L../src/ -lhemem-lru
31 | 
32 | gups-lru-swap: gups.o
33 | 	$(CC) $(CFLAGS) $(INCLUDES) -o gups-lru-swap gups.o zipf.o $(LIBS) -L../src/ -lhemem-lru-swap
34 | 
35 | gups-random.o: gups-random.c zipf.c gups.h
36 | 	$(CC) $(CFLAGS) $(INCLUDES) -c gups-random.c zipf.c
37 | 
38 | gups.o: gups.c zipf.c  gups.h
39 | 	$(CC) $(CFLAGS) $(INCLUDES) -c gups.c zipf.c
40 | 
41 | gups-hotset-move.o: gups-hotset-move.c zipf.c gups.h
42 | 	$(CC) $(CFLAGS) $(INCLUDES) -c gups-hotset-move.c zipf.c
43 | 
44 | clean:
45 | 	$(RM) *.o gups gups-hotset-move gups-lru-swap gups-lru gups-simple gups-random gups-pebs
46 | 


--------------------------------------------------------------------------------
/microbenchmarks/gups-hotset-move.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * =====================================================================================
  3 |  *
  4 |  *       Filename:  gups.c
  5 |  *
  6 |  *    Description:
  7 |  *
  8 |  *        Version:  1.0
  9 |  *        Created:  02/21/2018 02:36:27 PM
 10 |  *       Revision:  none
 11 |  *       Compiler:  gcc
 12 |  *
 13 |  *         Author:  YOUR NAME (),
 14 |  *   Organization:
 15 |  *
 16 |  * =====================================================================================
 17 |  */
 18 | 
 19 | #define _GNU_SOURCE
 20 | 
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <assert.h>
 24 | #include <unistd.h>
 25 | #include <sys/time.h>
 26 | #include <fcntl.h>
 27 | #include <sys/types.h>
 28 | #include <sys/stat.h>
 29 | #include <math.h>
 30 | #include <string.h>
 31 | #include <pthread.h>
 32 | #include <sys/mman.h>
 33 | #include <errno.h>
 34 | #include <stdint.h>
 35 | #include <stdbool.h>
 36 | 
 37 | #include "../src/timer.h"
 38 | #include "../src/hemem.h"
 39 | 
 40 | 
 41 | #include "gups.h"
 42 | 
 43 | #define MAX_THREADS     64
 44 | 
 45 | #define GUPS_PAGE_SIZE      (4 * 1024)
 46 | #define PAGE_NUM            3
 47 | #define PAGES               2048
 48 | 
 49 | #ifdef HOTSPOT
 50 | extern uint64_t hotset_start;
 51 | extern double hotset_fraction;
 52 | #endif
 53 | 
 54 | int threads;
 55 | 
 56 | bool move_hotset1 = false;
 57 | 
 58 | uint64_t hot_start = 0;
 59 | uint64_t hotsize = 0;
 60 | 
 61 | struct gups_args {
 62 |   int tid;                      // thread id
 63 |   uint64_t *indices;       // array of indices to access
 64 |   void* field;                  // pointer to start of thread's region
 65 |   uint64_t iters;          // iterations to perform
 66 |   uint64_t size;           // size of region
 67 |   uint64_t elt_size;       // size of elements
 68 |   uint64_t hot_start;            // start of hot set
 69 |   uint64_t hotsize;        // size of hot set
 70 | };
 71 | 
 72 | 
 73 | static inline uint64_t rdtscp(void)
 74 | {
 75 |     uint32_t eax, edx;
 76 |     // why is "ecx" in clobber list here, anyway? -SG&MH,2017-10-05
 77 |     __asm volatile ("rdtscp" : "=a" (eax), "=d" (edx) :: "ecx", "memory");
 78 |     return ((uint64_t)edx << 32) | eax;
 79 | }
 80 | 
 81 | uint64_t thread_gups[MAX_THREADS];
 82 | 
 83 | static unsigned long updates, nelems;
 84 | 
 85 | bool stop = false;
 86 | 
 87 | static void *timing_thread()
 88 | {
 89 |   uint64_t tic = -1;
 90 |   bool printed1 = false;
 91 |   for (;;) {
 92 |     tic++;
 93 |     if (tic >= 150 && tic < 300) {
 94 |       if (!printed1) {
 95 |         move_hotset1 = true;
 96 |         fprintf(stderr, "moved hotset1\n");
 97 |         printed1 = true;
 98 |       }
 99 |     }
100 |     if (tic >= 250) {
101 |       stop = true;
102 |     }
103 |     sleep(1);
104 |   }
105 |   return 0;
106 | }
107 | 
108 | uint64_t tot_updates = 0;
109 | 
110 | static void *print_instantaneous_gups()
111 | {
112 |   FILE *tot;
113 |   uint64_t tot_gups, tot_last_second_gups = 0;
114 | 
115 | 
116 |   tot = fopen("tot_gups.txt", "w");
117 |   if (tot == NULL) {
118 |     perror("fopen");
119 |   }
120 | 
121 |   for (;;) {
122 |     tot_gups = 0;
123 |     for (int i = 0; i < threads; i++) {
124 |       tot_gups += thread_gups[i];
125 |     }
126 |     fprintf(tot, "%.10f\n", (1.0 * (abs(tot_gups - tot_last_second_gups))) / (1.0e9));
127 |     tot_updates += abs(tot_gups - tot_last_second_gups);
128 |     tot_last_second_gups = tot_gups;
129 |     sleep(1);
130 |   }
131 | 
132 |   return NULL;
133 | }
134 | 
135 | 
136 | static uint64_t lfsr_fast(uint64_t lfsr)
137 | {
138 |   lfsr ^= lfsr >> 7;
139 |   lfsr ^= lfsr << 9;
140 |   lfsr ^= lfsr >> 13;
141 |   return lfsr;
142 | }
143 | 
144 | char *filename = "indices1.txt";
145 | 
146 | FILE *hotsetfile = NULL;
147 | 
148 | static void *do_gups(void *arguments)
149 | {
150 |   //printf("do_gups entered\n");
151 |   struct gups_args *args = (struct gups_args*)arguments;
152 |   uint64_t *field = (uint64_t*)(args->field);
153 |   uint64_t i;
154 |   uint64_t index1, index2;
155 |   uint64_t elt_size = args->elt_size;
156 |   char data[elt_size];
157 |   uint64_t lfsr;
158 |   uint64_t hot_num;
159 |   uint64_t tmp;
160 |   uint64_t start, end;
161 |   uint64_t before_accesses = 0;
162 | 
163 |   srand(args->tid);
164 |   lfsr = rand();
165 | 
166 |   index1 = 0;
167 |   index2 = 0;
168 | 
169 |   fprintf(hotsetfile, "Thread %d region: %p - %p\thot set: %p - %p\n", args->tid, field, field + (args->size * elt_size), field + args->hot_start, field + args->hot_start + (args->hotsize * elt_size));   
170 | 
171 |   for (i = 0; i < args->iters; i++) {
172 |     hot_num = lfsr_fast(lfsr) % 100;
173 |     if (hot_num < 90) {
174 |       lfsr = lfsr_fast(lfsr);
175 |       index1 = args->hot_start + (lfsr % args->hotsize);
176 |       if (move_hotset1) {
177 |         if ((index1 < (args->hotsize / 4))) {
178 |           index1 += args->hotsize;
179 |         }
180 |       }
181 |       else {
182 |         if ((index1 < (args->hotsize / 4))) {
183 |           before_accesses++;
184 |         }
185 |       }
186 |       start = rdtscp();
187 |       if (elt_size == 8) {
188 |         uint64_t  tmp = field[index1];
189 |         tmp = tmp + i;
190 |         field[index1] = tmp;
191 |       }
192 |       else {
193 |         memcpy(data, &field[index1 * elt_size], elt_size);
194 |         memset(data, data[0] + i, elt_size);
195 |         memcpy(&field[index1 * elt_size], data, elt_size);
196 |       }
197 |       end = rdtscp();
198 |     }
199 |     else {
200 |       lfsr = lfsr_fast(lfsr);
201 |       index2 = lfsr % (args->size);
202 |       start = rdtscp();
203 |       if (elt_size == 8) {
204 |         uint64_t tmp = field[index2];
205 |         tmp = tmp + i;
206 |         field[index2] = tmp;
207 |       }
208 |       else {
209 |         memcpy(data, &field[index2 * elt_size], elt_size);
210 |         memset(data, data[0] + i, elt_size);
211 |         memcpy(&field[index2 * elt_size], data, elt_size);
212 |       }
213 |       end = rdtscp();
214 |     }
215 | 
216 |     if (i % 10000 == 0) {
217 |       thread_gups[args->tid] += 10000;
218 |     }
219 | 
220 |     if (stop) {
221 |       break;
222 |     }
223 |   }
224 | 
225 |   fprintf(stderr, "before_accesses: %lu\n", before_accesses);
226 | 
227 |   //fclose(timefile);
228 |   return 0;
229 | }
230 | 
231 | int main(int argc, char **argv)
232 | {
233 |   unsigned long expt;
234 |   unsigned long size, elt_size;
235 |   unsigned long tot_hot_size;
236 |   int log_hot_size;
237 |   struct timeval starttime, stoptime;
238 |   double secs, gups;
239 |   int i;
240 |   void *p;
241 |   struct gups_args** ga;
242 |   pthread_t t[MAX_THREADS];
243 | 
244 |   if (argc != 6) {
245 |     fprintf(stderr, "Usage: %s [threads] [updates per thread] [exponent] [data size (bytes)] [noremap/remap]\n", argv[0]);
246 |     fprintf(stderr, "  threads\t\t\tnumber of threads to launch\n");
247 |     fprintf(stderr, "  updates per thread\t\tnumber of updates per thread\n");
248 |     fprintf(stderr, "  exponent\t\t\tlog size of region\n");
249 |     fprintf(stderr, "  data size\t\t\tsize of data in array (in bytes)\n");
250 |     fprintf(stderr, "  hot size\t\t\tlog size of hot set\n");
251 |     return 0;
252 |   }
253 | 
254 |   gettimeofday(&starttime, NULL);
255 | 
256 |   threads = atoi(argv[1]);
257 |   assert(threads <= MAX_THREADS);
258 |   ga = (struct gups_args**)malloc(threads * sizeof(struct gups_args*));
259 | 
260 |   updates = atol(argv[2]);
261 |   updates -= updates % 256;
262 |   expt = atoi(argv[3]);
263 |   assert(expt > 8);
264 |   assert(updates > 0 && (updates % 256 == 0));
265 |   size = (unsigned long)(1) << expt;
266 |   size -= (size % 256);
267 |   assert(size > 0 && (size % 256 == 0));
268 |   elt_size = atoi(argv[4]);
269 |   log_hot_size = atof(argv[5]);
270 |   tot_hot_size = (unsigned long)(1) << log_hot_size;
271 | 
272 |   fprintf(stderr, "%lu updates per thread (%d threads)\n", updates, threads);
273 |   fprintf(stderr, "field of 2^%lu (%lu) bytes\n", expt, size);
274 |   fprintf(stderr, "%ld byte element size (%ld elements total)\n", elt_size, size / elt_size);
275 | 
276 |   p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
277 |   if (p == MAP_FAILED) {
278 |     perror("mmap");
279 |     assert(0);
280 |   }
281 | 
282 |   gettimeofday(&stoptime, NULL);
283 |   fprintf(stderr, "Init took %.4f seconds\n", elapsed(&starttime, &stoptime));
284 |   fprintf(stderr, "Region address: %p - %p\t size: %ld\n", p, (p + size), size);
285 |   
286 |   nelems = (size / threads) / elt_size; // number of elements per thread
287 |   fprintf(stderr, "Elements per thread: %lu\n", nelems);
288 | 
289 |   memset(thread_gups, 0, sizeof(thread_gups));
290 | 
291 |   hotsetfile = fopen("hotsets.txt", "w");
292 |   if (hotsetfile == NULL) {
293 |     perror("fopen");
294 |     assert(0);
295 |   }
296 | 
297 |   gettimeofday(&stoptime, NULL);
298 |   secs = elapsed(&starttime, &stoptime);
299 |   fprintf(stderr, "Initialization time: %.4f seconds.\n", secs);
300 | 
301 |   //hemem_start_timing();
302 | 
303 |   hot_start = 0;
304 |   hotsize = (tot_hot_size / threads) / elt_size;
305 |   //printf("hot_start: %p\thot_end: %p\thot_size: %lu\n", p + hot_start, p + hot_start + (hotsize * elt_size), hotsize);
306 | 
307 |   gettimeofday(&starttime, NULL);
308 |   for (i = 0; i < threads; i++) {
309 |     //printf("starting thread [%d]\n", i);
310 |     ga[i] = (struct gups_args*)malloc(sizeof(struct gups_args));
311 |     ga[i]->tid = i;
312 |     ga[i]->field = p + (i * nelems * elt_size);
313 |     ga[i]->iters = updates;
314 |     ga[i]->size = nelems;
315 |     ga[i]->elt_size = elt_size;
316 |     ga[i]->hot_start = 0;        // hot set at start of thread's region
317 |     ga[i]->hotsize = hotsize;
318 |   }
319 | 
320 |   // run through gups once to touch all memory
321 |   // spawn gups worker threads
322 |   for (i = 0; i < threads; i++) {
323 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
324 |     assert(r == 0);
325 |   }
326 | 
327 |   // wait for worker threads
328 |   for (i = 0; i < threads; i++) {
329 |     int r = pthread_join(t[i], NULL);
330 |     assert(r == 0);
331 |   }
332 |   //hemem_print_stats();
333 | 
334 |   gettimeofday(&stoptime, NULL);
335 | 
336 |   secs = elapsed(&starttime, &stoptime);
337 |   //printf("Elapsed time: %.4f seconds.\n", secs);
338 |   gups = threads * ((double)updates) / (secs * 1.0e9);
339 |   //printf("GUPS = %.10f\n", gups);
340 |   memset(thread_gups, 0, sizeof(thread_gups));
341 | 
342 |   filename = "indices2.txt";
343 | 
344 |   pthread_t print_thread;
345 |   int pt = pthread_create(&print_thread, NULL, print_instantaneous_gups, NULL);
346 |   assert(pt == 0);
347 | 
348 | 
349 |   pthread_t timer_thread;
350 |   int tt = pthread_create(&timer_thread, NULL, timing_thread, NULL);
351 |   assert (tt == 0);
352 | 
353 |   fprintf(stderr, "Timing.\n");
354 |   gettimeofday(&starttime, NULL);
355 | 
356 |   //hemem_clear_stats();
357 |   // spawn gups worker threads
358 |   for (i = 0; i < threads; i++) {
359 |     ga[i]->iters = updates * 2;
360 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
361 |     assert(r == 0);
362 |   }
363 | 
364 |   // wait for worker threads
365 |   for (i = 0; i < threads; i++) {
366 |     int r = pthread_join(t[i], NULL);
367 |     assert(r == 0);
368 |   }
369 |   gettimeofday(&stoptime, NULL);
370 |   //hemem_print_stats();
371 |   //hemem_clear_stats();
372 | 
373 |   secs = elapsed(&starttime, &stoptime);
374 |   printf("Elapsed time: %.4f seconds.\n", secs);
375 |   gups = ((double)tot_updates) / (secs * 1.0e9);
376 |   printf("GUPS = %.10f\n", gups);
377 | 
378 |   memset(thread_gups, 0, sizeof(thread_gups));
379 | 
380 | #if 0
381 | #ifdef HOTSPOT
382 |   filename = "indices3.txt";
383 |   move_hotset = true;
384 | 
385 |   printf("Timing.\n");
386 |   gettimeofday(&starttime, NULL);
387 | 
388 |   // spawn gups worker threads
389 |   for (i = 0; i < threads; i++) {
390 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
391 |     assert(r == 0);
392 |   }
393 | 
394 |   // wait for worker threads
395 |   for (i = 0; i < threads; i++) {
396 |     int r = pthread_join(t[i], NULL);
397 |     assert(r == 0);
398 |   }
399 | 
400 |   gettimeofday(&stoptime, NULL);
401 | 
402 |   secs = elapsed(&starttime, &stoptime);
403 |   printf("Elapsed time: %.4f seconds.\n", secs);
404 |   gups = threads * ((double)updates) / (secs * 1.0e9);
405 |   printf("GUPS = %.10f\n", gups);
406 | 
407 |   //hemem_print_stats();
408 | #endif
409 | #endif
410 | 
411 |   //hemem_stop_timing();
412 | 
413 |   for (i = 0; i < threads; i++) {
414 |     //free(ga[i]->indices);
415 |     free(ga[i]);
416 |   }
417 |   free(ga);
418 | 
419 |   //getchar();
420 | 
421 |   munmap(p, size);
422 | 
423 |   return 0;
424 | }
425 | 
426 | 


--------------------------------------------------------------------------------
/microbenchmarks/gups-random.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * =====================================================================================
  3 |  *
  4 |  *       Filename:  gups.c
  5 |  *
  6 |  *    Description:
  7 |  *
  8 |  *        Version:  1.0
  9 |  *        Created:  02/21/2018 02:36:27 PM
 10 |  *       Revision:  none
 11 |  *       Compiler:  gcc
 12 |  *
 13 |  *         Author:  YOUR NAME (),
 14 |  *   Organization:
 15 |  *
 16 |  * =====================================================================================
 17 |  */
 18 | 
 19 | #define _GNU_SOURCE
 20 | 
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <assert.h>
 24 | #include <unistd.h>
 25 | #include <sys/time.h>
 26 | #include <fcntl.h>
 27 | #include <sys/types.h>
 28 | #include <sys/stat.h>
 29 | #include <math.h>
 30 | #include <string.h>
 31 | #include <pthread.h>
 32 | #include <sys/mman.h>
 33 | #include <errno.h>
 34 | #include <stdint.h>
 35 | #include <stdbool.h>
 36 | 
 37 | #include "../src/timer.h"
 38 | #include "../src/hemem.h"
 39 | 
 40 | 
 41 | #include "gups.h"
 42 | 
 43 | #define MAX_THREADS     64
 44 | 
 45 | #define GUPS_PAGE_SIZE      (4 * 1024)
 46 | #define PAGE_NUM            3
 47 | #define PAGES               2048
 48 | 
 49 | #ifdef HOTSPOT
 50 | extern uint64_t hotset_start;
 51 | extern double hotset_fraction;
 52 | #endif
 53 | 
 54 | int threads;
 55 | 
 56 | uint64_t hot_start = 0;
 57 | uint64_t hotsize = 0;
 58 | uint64_t hot_offset_page = 0;
 59 | bool move_hotset = false;
 60 | 
 61 | struct gups_args {
 62 |   int tid;                      // thread id
 63 |   uint64_t *indices;       // array of indices to access
 64 |   void* field;                  // pointer to start of thread's region
 65 |   uint64_t iters;          // iterations to perform
 66 |   uint64_t size;           // size of region
 67 |   uint64_t elt_size;       // size of elements
 68 |   uint64_t hot_start;            // start of hot set
 69 |   uint64_t hotsize;        // size of hot set
 70 | };
 71 | 
 72 | 
 73 | static inline uint64_t rdtscp(void)
 74 | {
 75 |     uint32_t eax, edx;
 76 |     // why is "ecx" in clobber list here, anyway? -SG&MH,2017-10-05
 77 |     __asm volatile ("rdtscp" : "=a" (eax), "=d" (edx) :: "ecx", "memory");
 78 |     return ((uint64_t)edx << 32) | eax;
 79 | }
 80 | 
 81 | //uint64_t thread_gups[MAX_THREADS];
 82 | 
 83 | static unsigned long updates, nelems;
 84 | 
 85 | static void *print_instantaneous_gups()
 86 | {
 87 |   uint64_t last_second_gups[threads];
 88 |   FILE *f[threads];
 89 |   char fname[20];
 90 | 
 91 |   for (int i = 0; i < threads; i++) {
 92 |     last_second_gups[i] = 0;
 93 |     snprintf(fname, 20, "gups_%d.txt", i);
 94 |     //printf("file name: %s\n", fname);
 95 |     f[i] = fopen(fname, "w");
 96 |     if (f[i] == NULL) {
 97 |       perror("fopen");
 98 |       assert(0);
 99 |     }
100 |   }
101 | 
102 |   for (;;) {
103 |     for (int i = 0; i < threads; i++) {
104 |       //fprintf(f[i], "%.10f\n", (1.0 * (abs(thread_gups[i] - last_second_gups[i]))) / (1.0e9));
105 |       //last_second_gups[i] = thread_gups[i];
106 |     }
107 |     sleep(1);
108 |     //printf("GUPS: %.10f\n", (1.0 * (abs(thread_gups[0]- last_second_gups))) / (1.0e9));
109 |     //last_second_gups = thread_gups[0];
110 |     //sleep(1);
111 |   }
112 | 
113 |   return NULL;
114 | }
115 | 
116 | 
117 | static uint64_t lfsr_fast(uint64_t lfsr)
118 | {
119 |   lfsr ^= lfsr >> 7;
120 |   lfsr ^= lfsr << 9;
121 |   lfsr ^= lfsr >> 13;
122 |   return lfsr;
123 | }
124 | 
125 | char *filename = "indices1.txt";
126 | 
127 | FILE *hotsetfile = NULL;
128 | 
129 | bool hotset_only = false;
130 | 
131 | static void *prefill_hotset(void* arguments)
132 | {
133 |   struct gups_args *args = (struct gups_args*)arguments;
134 |   uint64_t *field = (uint64_t*)(args->field);
135 |   uint64_t i;
136 |   uint64_t index1;
137 |   uint64_t elt_size = args->elt_size;
138 |   char data[elt_size];
139 | 
140 |   index1 = 0;
141 | 
142 |   for (i = 0; i < args->hotsize; i++) {
143 |     index1 = i;
144 |     if (elt_size == 8) {
145 |       uint64_t  tmp = field[index1];
146 |       tmp = tmp + i;
147 |       field[index1] = tmp;
148 |     }
149 |     else {
150 |       memcpy(data, &field[index1 * elt_size], elt_size);
151 |       memset(data, data[0] + i, elt_size);
152 |       memcpy(&field[index1 * elt_size], data, elt_size);
153 |     }
154 |   }
155 |   return 0;
156 |   
157 | }
158 | 
159 | static void *do_gups(void *arguments)
160 | {
161 |   //printf("do_gups entered\n");
162 |   struct gups_args *args = (struct gups_args*)arguments;
163 |   uint64_t *field = (uint64_t*)(args->field);
164 |   uint64_t i;
165 |   uint64_t index1, index2;
166 |   uint64_t elt_size = args->elt_size;
167 |   char data[elt_size];
168 |   uint64_t lfsr;
169 |   uint64_t hot_num;
170 |   uint64_t offset;
171 |   uint64_t start, end;
172 | 
173 |   srand(args->tid);
174 |   lfsr = rand();
175 | 
176 |   index1 = 0;
177 |   index2 = 0;
178 | 
179 |   fprintf(hotsetfile, "Thread %d region: %p - %p\thot set: %p - %p\n", args->tid, field, field + (args->size * elt_size), field + args->hot_start, field + args->hot_start + (args->hotsize * elt_size));   
180 | 
181 |   for (i = 0; i < args->iters; i++) {
182 |     hot_num = lfsr_fast(lfsr) % 100;
183 |     lfsr = lfsr_fast(lfsr);
184 |     index2 = lfsr % (args->size);
185 |     start = rdtscp();
186 |     if (elt_size == 8) {
187 |       uint64_t tmp = field[index2];
188 |       tmp = tmp + i;
189 |       field[index2] = tmp;
190 |     }
191 |     else {
192 |       memcpy(data, &field[index2 * elt_size], elt_size);
193 |       memset(data, data[0] + i, elt_size);
194 |       memcpy(&field[index2 * elt_size], data, elt_size);
195 |     }
196 |     end = rdtscp();
197 |   }
198 | 
199 |   return 0;
200 | }
201 | 
202 | int main(int argc, char **argv)
203 | {
204 |   unsigned long expt;
205 |   unsigned long size, elt_size;
206 |   unsigned long tot_hot_size;
207 |   int log_hot_size;
208 |   struct timeval starttime, stoptime;
209 |   double secs, gups;
210 |   int i;
211 |   void *p;
212 |   struct gups_args** ga;
213 |   pthread_t t[MAX_THREADS];
214 | 
215 |   if (argc != 6) {
216 |     fprintf(stderr, "Usage: %s [threads] [updates per thread] [exponent] [data size (bytes)] [noremap/remap]\n", argv[0]);
217 |     fprintf(stderr, "  threads\t\t\tnumber of threads to launch\n");
218 |     fprintf(stderr, "  updates per thread\t\tnumber of updates per thread\n");
219 |     fprintf(stderr, "  exponent\t\t\tlog size of region\n");
220 |     fprintf(stderr, "  data size\t\t\tsize of data in array (in bytes)\n");
221 |     fprintf(stderr, "  hot size\t\t\tlog size of hot set\n");
222 |     return 0;
223 |   }
224 | 
225 |   gettimeofday(&starttime, NULL);
226 | 
227 |   threads = atoi(argv[1]);
228 |   assert(threads <= MAX_THREADS);
229 |   ga = (struct gups_args**)malloc(threads * sizeof(struct gups_args*));
230 | 
231 |   updates = atol(argv[2]);
232 |   updates -= updates % 256;
233 |   expt = atoi(argv[3]);
234 |   assert(expt > 8);
235 |   assert(updates > 0 && (updates % 256 == 0));
236 |   size = (unsigned long)(1) << expt;
237 |   size -= (size % 256);
238 |   assert(size > 0 && (size % 256 == 0));
239 |   elt_size = atoi(argv[4]);
240 |   log_hot_size = atof(argv[5]);
241 |   tot_hot_size = (unsigned long)(1) << log_hot_size;
242 | 
243 |   fprintf(stderr, "%lu updates per thread (%d threads)\n", updates, threads);
244 |   fprintf(stderr, "field of 2^%lu (%lu) bytes\n", expt, size);
245 |   fprintf(stderr, "%ld byte element size (%ld elements total)\n", elt_size, size / elt_size);
246 | 
247 |   p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
248 |   if (p == MAP_FAILED) {
249 |     perror("mmap");
250 |     assert(0);
251 |   }
252 | 
253 |   gettimeofday(&stoptime, NULL);
254 |   fprintf(stderr, "Init took %.4f seconds\n", elapsed(&starttime, &stoptime));
255 |   fprintf(stderr, "Region address: %p - %p\t size: %ld\n", p, (p + size), size);
256 |   
257 |   nelems = (size / threads) / elt_size; // number of elements per thread
258 |   fprintf(stderr, "Elements per thread: %lu\n", nelems);
259 | 
260 |   //memset(thread_gups, 0, sizeof(thread_gups));
261 | 
262 |   hotsetfile = fopen("hotsets.txt", "w");
263 |   if (hotsetfile == NULL) {
264 |     perror("fopen");
265 |     assert(0);
266 |   }
267 | 
268 |   gettimeofday(&stoptime, NULL);
269 |   secs = elapsed(&starttime, &stoptime);
270 |   fprintf(stderr, "Initialization time: %.4f seconds.\n", secs);
271 | 
272 |   //hemem_start_timing();
273 | 
274 |   //pthread_t print_thread;
275 |   //int pt = pthread_create(&print_thread, NULL, print_instantaneous_gups, NULL);
276 |   //assert(pt == 0);
277 | 
278 | 
279 |   hot_start = 0;
280 |   hotsize = (tot_hot_size / threads) / elt_size;
281 |   //printf("hot_start: %p\thot_end: %p\thot_size: %lu\n", p + hot_start, p + hot_start + (hotsize * elt_size), hotsize);
282 | 
283 |   gettimeofday(&starttime, NULL);
284 |   for (i = 0; i < threads; i++) {
285 |     //printf("starting thread [%d]\n", i);
286 |     ga[i] = (struct gups_args*)malloc(sizeof(struct gups_args));
287 |     ga[i]->tid = i;
288 |     ga[i]->field = p + (i * nelems * elt_size);
289 |     ga[i]->iters = updates;
290 |     ga[i]->size = nelems;
291 |     ga[i]->elt_size = elt_size;
292 |     ga[i]->hot_start = 0;        // hot set at start of thread's region
293 |     ga[i]->hotsize = hotsize;
294 |   }
295 | 
296 |   if (!hotset_only) {
297 |     for (i = 0; i < threads; i++) {
298 |       int r = pthread_create(&t[i], NULL, prefill_hotset, (void*)ga[i]);
299 |       assert(r == 0);
300 |     }
301 |     // wait for worker threads
302 |     for (i = 0; i < threads; i++) {
303 |       int r = pthread_join(t[i], NULL);
304 |       assert(r == 0);
305 |     }
306 |   }
307 | 
308 |   // run through gups once to touch all memory
309 |   // spawn gups worker threads
310 |   for (i = 0; i < threads; i++) {
311 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
312 |     assert(r == 0);
313 |   }
314 | 
315 |   // wait for worker threads
316 |   for (i = 0; i < threads; i++) {
317 |     int r = pthread_join(t[i], NULL);
318 |     assert(r == 0);
319 |   }
320 |   //hemem_print_stats();
321 | 
322 |   gettimeofday(&stoptime, NULL);
323 | 
324 |   secs = elapsed(&starttime, &stoptime);
325 |   //printf("Elapsed time: %.4f seconds.\n", secs);
326 |   gups = threads * ((double)updates) / (secs * 1.0e9);
327 |   //printf("GUPS = %.10f\n", gups);
328 |   //memset(thread_gups, 0, sizeof(thread_gups));
329 | 
330 |   filename = "indices2.txt";
331 | 
332 |   fprintf(stderr, "Timing.\n");
333 |   gettimeofday(&starttime, NULL);
334 | 
335 |   //hemem_clear_stats();
336 |   // spawn gups worker threads
337 |   for (i = 0; i < threads; i++) {
338 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
339 |     assert(r == 0);
340 |   }
341 | 
342 |   // wait for worker threads
343 |   for (i = 0; i < threads; i++) {
344 |     int r = pthread_join(t[i], NULL);
345 |     assert(r == 0);
346 |   }
347 |   gettimeofday(&stoptime, NULL);
348 |   //hemem_print_stats();
349 |   //hemem_clear_stats();
350 | 
351 |   secs = elapsed(&starttime, &stoptime);
352 |   printf("Elapsed time: %.4f seconds.\n", secs);
353 |   gups = threads * ((double)updates) / (secs * 1.0e9);
354 |   printf("GUPS = %.10f\n", gups);
355 | 
356 |   //memset(thread_gups, 0, sizeof(thread_gups));
357 | #if 0
358 | #ifdef HOTSPOT
359 |   move_hotset = true;
360 |   hot_offset_page = hotsize / GUPS_PAGE_SIZE;
361 |   //hot_start = (16UL * 1024UL * 1024UL * 1024UL) / elt_size;              // 16GB to the right;
362 |   printf("hot_start: %lu\thot_size: %lu\n", hot_start, hotsize);
363 |   printf("hot_offset_page: %lu\n", hot_offset_page);
364 | 
365 |   filename = "indices3.txt";
366 | 
367 |   printf("Timing.\n");
368 |   gettimeofday(&starttime, NULL);
369 | 
370 |   // spawn gups worker threads
371 |   for (i = 0; i < threads; i++) {
372 |     ga[i]->tid = i;
373 |     ga[i]->iters = updates;
374 |     ga[i]->size = nelems;
375 |     ga[i]->elt_size = elt_size;
376 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
377 |     assert(r == 0);
378 |   }
379 | 
380 |   // wait for worker threads
381 |   for (i = 0; i < threads; i++) {
382 |     int r = pthread_join(t[i], NULL);
383 |     assert(r == 0);
384 |   }
385 | 
386 |   gettimeofday(&stoptime, NULL);
387 | 
388 |   secs = elapsed(&starttime, &stoptime);
389 |   printf("Elapsed time: %.4f seconds.\n", secs);
390 |   gups = threads * ((double)updates) / (secs * 1.0e9);
391 |   printf("GUPS = %.10f\n", gups);
392 | 
393 |   //hemem_print_stats();
394 | #endif
395 | #endif
396 | 
397 |   //hemem_stop_timing();
398 | 
399 |   for (i = 0; i < threads; i++) {
400 |     //free(ga[i]->indices);
401 |     free(ga[i]);
402 |   }
403 |   free(ga);
404 | 
405 |   //getchar();
406 | 
407 |   munmap(p, size);
408 | 
409 |   return 0;
410 | }
411 | 
412 | 
413 | 


--------------------------------------------------------------------------------
/microbenchmarks/gups-small.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * =====================================================================================
  3 |  *
  4 |  *       Filename:  gups.c
  5 |  *
  6 |  *    Description:
  7 |  *
  8 |  *        Version:  1.0
  9 |  *        Created:  02/21/2018 02:36:27 PM
 10 |  *       Revision:  none
 11 |  *       Compiler:  gcc
 12 |  *
 13 |  *         Author:  YOUR NAME (),
 14 |  *   Organization:
 15 |  *
 16 |  * =====================================================================================
 17 |  */
 18 | 
 19 | #define _GNU_SOURCE
 20 | 
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <assert.h>
 24 | #include <unistd.h>
 25 | #include <sys/time.h>
 26 | #include <fcntl.h>
 27 | #include <sys/types.h>
 28 | #include <sys/stat.h>
 29 | #include <math.h>
 30 | #include <string.h>
 31 | #include <pthread.h>
 32 | #include <sys/mman.h>
 33 | #include <linux/userfaultfd.h>
 34 | #include <poll.h>
 35 | #include <sys/syscall.h>
 36 | #include <sys/ioctl.h>
 37 | #include <errno.h>
 38 | #include <stdint.h>
 39 | #include <inttypes.h>
 40 | #include <stdbool.h>
 41 | 
 42 | #include "../timer.h"
 43 | 
 44 | uint64_t size = 2 * 1024 * 1024;
 45 | uint64_t elt_size = 8;
 46 | uint64_t iters = 1000000000;
 47 | 
 48 | int dramfd, nvmfd, uffd;
 49 | void *dram_devdax_mmap, *nvm_devdax_mmap;
 50 | 
 51 | bool migrating = false;
 52 | 
 53 | void wp_page(uint64_t addr)
 54 | {
 55 |   struct uffdio_writeprotect wp;
 56 |   int ret;
 57 |  
 58 |   assert(addr != 0);
 59 |   
 60 |   wp.range.start = addr;
 61 |   wp.range.len = 2 * 1024 * 1024;
 62 |   wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
 63 |   ret = ioctl(uffd, UFFDIO_WRITEPROTECT, &wp);
 64 | 
 65 |   if (ret < 0) {
 66 |     perror("uffdio writeprotect");
 67 |     assert(0);
 68 |   }
 69 | }
 70 | 
 71 | void migrate(uint64_t va, bool migrate_down)
 72 | {
 73 |   void *newptr;
 74 |   uint64_t pagesize;
 75 |   void *old_addr, *new_addr;
 76 | 
 77 |   migrating = true;
 78 | 
 79 |   pagesize = 2 * 1024 * 1024;
 80 | 
 81 |   if (migrate_down) {
 82 |     old_addr = dram_devdax_mmap;
 83 |     new_addr = nvm_devdax_mmap;
 84 |   }
 85 |   else {
 86 |     old_addr = nvm_devdax_mmap;
 87 |     new_addr = dram_devdax_mmap;
 88 |   }
 89 |   
 90 |   memcpy(new_addr, old_addr, pagesize);
 91 | 
 92 |   if (migrate_down) {
 93 |     newptr = mmap((void*)va, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, nvmfd, 0);
 94 |   }
 95 |   else {
 96 |     newptr = mmap((void*)va, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, dramfd, 0);
 97 |   }
 98 | 
 99 |   if (newptr == MAP_FAILED) {
100 |     perror("newptr mmap");
101 |     assert(0);
102 |   }
103 |   if (newptr != (void*)va) {
104 |     fprintf(stderr, "mapped address is not same as faulting address\n");
105 |   }
106 |   
107 |   // re-register new mmap region with userfaultfd
108 |   struct uffdio_register uffdio_register;
109 |   uffdio_register.range.start = (uint64_t)newptr;
110 |   uffdio_register.range.len = pagesize;
111 |   uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING | UFFDIO_REGISTER_MODE_WP;
112 |   uffdio_register.ioctls = 0;
113 |   if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
114 |     perror("ioctl uffdio_register");
115 |     assert(0);
116 |   }
117 | 
118 |   migrating = false;
119 | }
120 | 
121 | void *do_migration(void *addr)
122 | {
123 |   for (;;) {
124 |     wp_page((uint64_t)addr);
125 |     migrate((uint64_t)addr, true);
126 |     //usleep(50000);
127 |     
128 |     wp_page((uint64_t)addr);
129 |     migrate((uint64_t)addr, false);
130 |     //usleep(50000);
131 |   }
132 | 
133 |   return NULL;
134 | }
135 | 
136 | void handle_wp_fault(uint64_t page_boundry)
137 | {
138 |   fprintf(stderr, "encountered a wp fault, waiting for migration...");
139 |   while (migrating) {
140 |     // just wait for migrating to be done
141 |   }
142 |   fprintf(stderr, "done\n");
143 | }
144 | 
145 | void *handle_fault()
146 | {
147 |   static struct uffd_msg *msg;
148 |   ssize_t nread;
149 |   uint64_t fault_addr;
150 |   uint64_t fault_flags;
151 |   uint64_t page_boundry;
152 |   struct uffdio_range range;
153 |   int ret;
154 | 
155 |   for (;;) {
156 |     struct pollfd pollfd;
157 |     int pollres;
158 |     pollfd.fd = uffd;
159 |     pollfd.events = POLLIN;
160 | 
161 |     pollres = poll(&pollfd, 1, -1);
162 | 
163 |     switch (pollres) {
164 |     case -1:
165 |       perror("poll");
166 |       assert(0);
167 |     case 0:
168 |       fprintf(stderr, "poll read 0\n");
169 |       continue;
170 |     case 1:
171 |       break;
172 |     default:
173 |       fprintf(stderr, "unexpected poll result\n");
174 |       assert(0);
175 |     }
176 | 
177 |     if (pollfd.revents & POLLERR) {
178 |       fprintf(stderr, "pollerr\n");
179 |       assert(0);
180 |     }
181 | 
182 |     if (!pollfd.revents & POLLIN) {
183 |       continue;
184 |     }
185 | 
186 |     nread = read(uffd, msg, sizeof(struct uffd_msg));
187 |     if (nread == 0) {
188 |       fprintf(stderr, "EOF on userfaultfd\n");
189 |       assert(0);
190 |     }
191 | 
192 |     if (nread < 0) {
193 |       if (errno == EAGAIN) {
194 |         continue;
195 |       }
196 |       perror("read");
197 |       assert(0);
198 |     }
199 | 
200 |     if ((nread != sizeof(struct uffd_msg))) {
201 |       fprintf(stderr, "invalid msg size: [%ld]\n", nread);
202 |       assert(0);
203 |     }
204 | 
205 |     if (msg->event & UFFD_EVENT_PAGEFAULT) {
206 |       fault_addr = (uint64_t)msg->arg.pagefault.address;
207 |       fault_flags = msg->arg.pagefault.flags;
208 | 
209 |       // allign faulting address to page boundry
210 |       // huge page boundry in this case due to dax allignment
211 |       page_boundry = fault_addr & ~((2 * 1024 * 1024) - 1);
212 | 
213 |       if (fault_flags & UFFD_PAGEFAULT_FLAG_WP) {
214 |         handle_wp_fault(page_boundry);
215 |       }
216 |       else {
217 |         assert(!"page faults with MAP_POPULATE should not happen\n");
218 |       }
219 | 
220 |       // wake the faulting thread
221 |       range.start = (uint64_t)page_boundry;
222 |       range.len = 2 * 1024 * 1024;
223 | 
224 |       ret = ioctl(uffd, UFFDIO_WAKE, &range);
225 | 
226 |       if (ret < 0) {
227 |         perror("uffdio wake");
228 |         assert(0);
229 |       }
230 |     }
231 |     else if (msg->event & UFFD_EVENT_UNMAP){
232 |       fprintf(stderr, "Received an unmap event\n");
233 |       assert(0);
234 |     }
235 |     else if (msg->event & UFFD_EVENT_REMOVE) {
236 |       fprintf(stderr, "received a remove event\n");
237 |       assert(0);
238 |     }
239 |     else {
240 |       fprintf(stderr, "received a non page fault event\n");
241 |       assert(0);
242 |     }
243 |   }
244 | }
245 | 
246 | 
247 | static uint64_t lfsr_fast(uint64_t lfsr)
248 | {
249 |   lfsr ^= lfsr >> 7;
250 |   lfsr ^= lfsr << 9;
251 |   lfsr ^= lfsr >> 13;
252 |   return lfsr;
253 | }
254 | 
255 | static void *do_gups(void *argument)
256 | {
257 |   //printf("do_gups entered\n");
258 |   char *field = (char*)(argument);
259 |   uint64_t i;
260 |   uint64_t index;
261 |   char data[elt_size];
262 |   uint64_t lfsr;
263 |   
264 |   srand(0);
265 |   lfsr = rand();
266 | 
267 |   for (i = 0; i < iters; i++) {
268 |     lfsr = lfsr_fast(lfsr);
269 |     index = lfsr % (size / elt_size);
270 |     memcpy(data, &field[index * elt_size], elt_size);
271 |     memset(data, data[0] + i, elt_size);
272 |   }
273 |   return 0;
274 | }
275 | 
276 | int main(int argc, char **argv)
277 | {
278 |   struct timeval starttime, stoptime;
279 |   double secs, gups;
280 |   void *p;
281 |   uint64_t nelems;
282 |   pthread_t gups_thread, fault_thread, migrate_thread;
283 | 
284 |   uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
285 |   if (uffd == -1) {
286 |     perror("uffd");
287 |     assert(0);
288 |   }
289 | 
290 |   struct uffdio_api uffdio_api;
291 |   uffdio_api.api = UFFD_API;
292 |   uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP |  UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MISSING_HUGETLBFS;// | UFFD_FEATURE_EVENT_UNMAP | UFFD_FEATURE_EVENT_REMOVE;
293 |   uffdio_api.ioctls = 0;
294 |   if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
295 |     perror("ioctl uffdio_api");
296 |     assert(0);
297 |   }
298 | 
299 |   dramfd = open("/dev/dax0.0", O_RDWR);
300 |   if (dramfd < 0) {
301 |     perror("dram open");
302 |   }
303 |   assert(dramfd >= 0);
304 | 
305 |   nvmfd = open("/dev/dax1.0", O_RDWR);
306 |   if (nvmfd < 0) {
307 |     perror("nvm open");
308 |   }
309 |   assert(nvmfd >= 0);
310 | 
311 |   dram_devdax_mmap = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, dramfd, 0);
312 |   if (dram_devdax_mmap == MAP_FAILED) {
313 |     perror("dram devdax mmap");
314 |     assert(0);
315 |   }
316 | 
317 |   nvm_devdax_mmap = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, nvmfd, 0);
318 |   if (nvm_devdax_mmap == MAP_FAILED) {
319 |     perror("nvm devdax mmap");
320 |     assert(0);
321 |   }
322 | 
323 |   gettimeofday(&starttime, NULL);
324 | 
325 |   fprintf(stderr, "%ld updates per thread (1 threads)\n", iters);
326 |   fprintf(stderr, "field of 2^21 (%lu) bytes\n", size);
327 |   fprintf(stderr, "8 byte element size (%ld elements total)\n", size / elt_size);
328 | 
329 |   p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, dramfd, 0);
330 |   if (p == MAP_FAILED) {
331 |     perror("mmap");
332 |     assert(0);
333 |   }
334 | 
335 |   struct uffdio_register uffdio_register;
336 |   uffdio_register.range.start = (uint64_t)p;
337 |   uffdio_register.range.len =  size;
338 |   uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING | UFFDIO_REGISTER_MODE_WP;
339 |   uffdio_register.ioctls = 0;
340 |   if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
341 |     perror("ioctl uffdio_register");
342 |     assert(0);
343 |   }
344 | 
345 |   int r = pthread_create(&fault_thread, NULL, handle_fault, NULL);
346 |   assert(r == 0);
347 | 
348 |   r = pthread_create(&migrate_thread, NULL, do_migration, p);
349 |   assert(r == 0);
350 | 
351 |   gettimeofday(&stoptime, NULL);
352 |   fprintf(stderr, "Init took %.4f seconds\n", elapsed(&starttime, &stoptime));
353 |   fprintf(stderr, "Region address: %p\t size: %ld\n", p, size);
354 | 
355 |   nelems = size / elt_size; // number of elements per thread
356 |   fprintf(stderr, "Elements per thread: %lu\n", nelems);
357 | 
358 |   gettimeofday(&starttime, NULL);
359 | 
360 |   // run through gups once to touch all memory
361 |   // spawn gups worker thread
362 |   r = pthread_create(&gups_thread, NULL, do_gups, (void*)p);
363 |   assert(r == 0);
364 | 
365 |   r = pthread_join(gups_thread, NULL);
366 |   assert(r == 0);
367 | 
368 |   gettimeofday(&stoptime, NULL);
369 | 
370 |   secs = elapsed(&starttime, &stoptime);
371 |   printf("Elapsed time: %.4f seconds.\n", secs);
372 |   gups = ((double)iters) / (secs * 1.0e9);
373 |   printf("GUPS = %.10f\n", gups);
374 | 
375 |   fprintf(stderr, "Timing.\n");
376 |   gettimeofday(&starttime, NULL);
377 |   
378 |   r = pthread_create(&gups_thread, NULL, do_gups, (void*)p);
379 |   assert(r == 0);
380 | 
381 |   r = pthread_join(gups_thread, NULL);
382 |   assert(r == 0);
383 |   
384 |   gettimeofday(&stoptime, NULL);
385 | 
386 |   secs = elapsed(&starttime, &stoptime);
387 |   printf("Elapsed time: %.4f seconds.\n", secs);
388 |   gups = ((double)iters) / (secs * 1.0e9);
389 |   printf("GUPS = %.10f\n", gups);
390 | 
391 |   printf("Timing.\n");
392 |   gettimeofday(&starttime, NULL);
393 | 
394 |   // spawn gups worker threads
395 |   r = pthread_create(&gups_thread, NULL, do_gups, (void*)p);
396 |   assert(r == 0);
397 | 
398 |   r = pthread_join(gups_thread, NULL);
399 |   assert(r == 0);
400 | 
401 |   gettimeofday(&stoptime, NULL);
402 | 
403 |   secs = elapsed(&starttime, &stoptime);
404 |   printf("Elapsed time: %.4f seconds.\n", secs);
405 |   gups = ((double)iters) / (secs * 1.0e9);
406 |   printf("GUPS = %.10f\n", gups);
407 |   
408 |   munmap(p, size);
409 | 
410 |   return 0;
411 | }
412 | 
413 | 


--------------------------------------------------------------------------------
/microbenchmarks/gups.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * =====================================================================================
  3 |  *
  4 |  *       Filename:  gups.c
  5 |  *
  6 |  *    Description:
  7 |  *
  8 |  *        Version:  1.0
  9 |  *        Created:  02/21/2018 02:36:27 PM
 10 |  *       Revision:  none
 11 |  *       Compiler:  gcc
 12 |  *
 13 |  *         Author:  YOUR NAME (),
 14 |  *   Organization:
 15 |  *
 16 |  * =====================================================================================
 17 |  */
 18 | 
 19 | #define _GNU_SOURCE
 20 | 
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <assert.h>
 24 | #include <unistd.h>
 25 | #include <sys/time.h>
 26 | #include <fcntl.h>
 27 | #include <sys/types.h>
 28 | #include <sys/stat.h>
 29 | #include <math.h>
 30 | #include <string.h>
 31 | #include <pthread.h>
 32 | #include <sys/mman.h>
 33 | #include <errno.h>
 34 | #include <stdint.h>
 35 | #include <stdbool.h>
 36 | 
 37 | #include "../src/timer.h"
 38 | #include "../src/hemem.h"
 39 | 
 40 | 
 41 | #include "gups.h"
 42 | 
 43 | #define MAX_THREADS     64
 44 | 
 45 | #define GUPS_PAGE_SIZE      (4 * 1024)
 46 | #define PAGE_NUM            3
 47 | #define PAGES               2048
 48 | 
 49 | #ifdef HOTSPOT
 50 | extern uint64_t hotset_start;
 51 | extern double hotset_fraction;
 52 | #endif
 53 | 
 54 | int threads;
 55 | 
 56 | uint64_t hot_start = 0;
 57 | uint64_t hotsize = 0;
 58 | uint64_t hot_offset_page = 0;
 59 | bool move_hotset = false;
 60 | 
 61 | struct gups_args {
 62 |   int tid;                      // thread id
 63 |   uint64_t *indices;       // array of indices to access
 64 |   void* field;                  // pointer to start of thread's region
 65 |   uint64_t iters;          // iterations to perform
 66 |   uint64_t size;           // size of region
 67 |   uint64_t elt_size;       // size of elements
 68 |   uint64_t hot_start;            // start of hot set
 69 |   uint64_t hotsize;        // size of hot set
 70 | };
 71 | 
 72 | 
 73 | static inline uint64_t rdtscp(void)
 74 | {
 75 |     uint32_t eax, edx;
 76 |     // why is "ecx" in clobber list here, anyway? -SG&MH,2017-10-05
 77 |     __asm volatile ("rdtscp" : "=a" (eax), "=d" (edx) :: "ecx", "memory");
 78 |     return ((uint64_t)edx << 32) | eax;
 79 | }
 80 | 
 81 | //uint64_t thread_gups[MAX_THREADS];
 82 | 
 83 | static unsigned long updates, nelems;
 84 | 
 85 | static void *print_instantaneous_gups()
 86 | {
 87 |   uint64_t last_second_gups[threads];
 88 |   FILE *f[threads];
 89 |   char fname[20];
 90 | 
 91 |   for (int i = 0; i < threads; i++) {
 92 |     last_second_gups[i] = 0;
 93 |     snprintf(fname, 20, "gups_%d.txt", i);
 94 |     //printf("file name: %s\n", fname);
 95 |     f[i] = fopen(fname, "w");
 96 |     if (f[i] == NULL) {
 97 |       perror("fopen");
 98 |       assert(0);
 99 |     }
100 |   }
101 | 
102 |   for (;;) {
103 |     for (int i = 0; i < threads; i++) {
104 |       //fprintf(f[i], "%.10f\n", (1.0 * (abs(thread_gups[i] - last_second_gups[i]))) / (1.0e9));
105 |       //last_second_gups[i] = thread_gups[i];
106 |     }
107 |     sleep(1);
108 |     //printf("GUPS: %.10f\n", (1.0 * (abs(thread_gups[0]- last_second_gups))) / (1.0e9));
109 |     //last_second_gups = thread_gups[0];
110 |     //sleep(1);
111 |   }
112 | 
113 |   return NULL;
114 | }
115 | 
116 | 
117 | static uint64_t lfsr_fast(uint64_t lfsr)
118 | {
119 |   lfsr ^= lfsr >> 7;
120 |   lfsr ^= lfsr << 9;
121 |   lfsr ^= lfsr >> 13;
122 |   return lfsr;
123 | }
124 | 
125 | char *filename = "indices1.txt";
126 | 
127 | FILE *hotsetfile = NULL;
128 | 
129 | bool hotset_only = false;
130 | 
131 | static void *prefill_hotset(void* arguments)
132 | {
133 |   struct gups_args *args = (struct gups_args*)arguments;
134 |   uint64_t *field = (uint64_t*)(args->field);
135 |   uint64_t i;
136 |   uint64_t index1;
137 |   uint64_t elt_size = args->elt_size;
138 |   char data[elt_size];
139 | 
140 |   index1 = 0;
141 | 
142 |   for (i = 0; i < args->hotsize; i++) {
143 |     index1 = i;
144 |     if (elt_size == 8) {
145 |       uint64_t  tmp = field[index1];
146 |       tmp = tmp + i;
147 |       field[index1] = tmp;
148 |     }
149 |     else {
150 |       memcpy(data, &field[index1 * elt_size], elt_size);
151 |       memset(data, data[0] + i, elt_size);
152 |       memcpy(&field[index1 * elt_size], data, elt_size);
153 |     }
154 |   }
155 |   return 0;
156 |   
157 | }
158 | 
159 | static void *do_gups(void *arguments)
160 | {
161 |   //printf("do_gups entered\n");
162 |   struct gups_args *args = (struct gups_args*)arguments;
163 |   uint64_t *field = (uint64_t*)(args->field);
164 |   uint64_t i;
165 |   uint64_t index1, index2;
166 |   uint64_t elt_size = args->elt_size;
167 |   char data[elt_size];
168 |   uint64_t lfsr;
169 |   uint64_t hot_num;
170 |   uint64_t offset;
171 |   uint64_t start, end;
172 | 
173 |   srand(args->tid);
174 |   lfsr = rand();
175 | 
176 |   index1 = 0;
177 |   index2 = 0;
178 | 
179 |   fprintf(hotsetfile, "Thread %d region: %p - %p\thot set: %p - %p\n", args->tid, field, field + (args->size * elt_size), field + args->hot_start, field + args->hot_start + (args->hotsize * elt_size));   
180 | 
181 |   for (i = 0; i < args->iters; i++) {
182 |     hot_num = lfsr_fast(lfsr) % 100;
183 |     if (hot_num < 90) {
184 |       lfsr = lfsr_fast(lfsr);
185 |       index1 = args->hot_start + (lfsr % args->hotsize);
186 |       start = rdtscp();
187 |       if (elt_size == 8) {
188 |         uint64_t  tmp = field[index1];
189 |         tmp = tmp + i;
190 |         field[index1] = tmp;
191 |       }
192 |       else {
193 |         memcpy(data, &field[index1 * elt_size], elt_size);
194 |         memset(data, data[0] + i, elt_size);
195 |         memcpy(&field[index1 * elt_size], data, elt_size);
196 |       }
197 |       end = rdtscp();
198 |     }
199 |     else {
200 |       lfsr = lfsr_fast(lfsr);
201 |       index2 = lfsr % (args->size);
202 |       start = rdtscp();
203 |       if (elt_size == 8) {
204 |         uint64_t tmp = field[index2];
205 |         tmp = tmp + i;
206 |         field[index2] = tmp;
207 |       }
208 |       else {
209 |         memcpy(data, &field[index2 * elt_size], elt_size);
210 |         memset(data, data[0] + i, elt_size);
211 |         memcpy(&field[index2 * elt_size], data, elt_size);
212 |       }
213 |       end = rdtscp();
214 |     }
215 |   }
216 | 
217 |   return 0;
218 | }
219 | 
220 | int main(int argc, char **argv)
221 | {
222 |   unsigned long expt;
223 |   unsigned long size, elt_size;
224 |   unsigned long tot_hot_size;
225 |   int log_hot_size;
226 |   struct timeval starttime, stoptime;
227 |   double secs, gups;
228 |   int i;
229 |   void *p;
230 |   struct gups_args** ga;
231 |   pthread_t t[MAX_THREADS];
232 | 
233 |   if (argc != 6) {
234 |     fprintf(stderr, "Usage: %s [threads] [updates per thread] [exponent] [data size (bytes)] [noremap/remap]\n", argv[0]);
235 |     fprintf(stderr, "  threads\t\t\tnumber of threads to launch\n");
236 |     fprintf(stderr, "  updates per thread\t\tnumber of updates per thread\n");
237 |     fprintf(stderr, "  exponent\t\t\tlog size of region\n");
238 |     fprintf(stderr, "  data size\t\t\tsize of data in array (in bytes)\n");
239 |     fprintf(stderr, "  hot size\t\t\tlog size of hot set\n");
240 |     return 0;
241 |   }
242 | 
243 |   gettimeofday(&starttime, NULL);
244 | 
245 |   threads = atoi(argv[1]);
246 |   assert(threads <= MAX_THREADS);
247 |   ga = (struct gups_args**)malloc(threads * sizeof(struct gups_args*));
248 | 
249 |   updates = atol(argv[2]);
250 |   updates -= updates % 256;
251 |   expt = atoi(argv[3]);
252 |   assert(expt > 8);
253 |   assert(updates > 0 && (updates % 256 == 0));
254 |   size = (unsigned long)(1) << expt;
255 |   size -= (size % 256);
256 |   assert(size > 0 && (size % 256 == 0));
257 |   elt_size = atoi(argv[4]);
258 |   log_hot_size = atof(argv[5]);
259 |   tot_hot_size = (unsigned long)(1) << log_hot_size;
260 | 
261 |   fprintf(stderr, "%lu updates per thread (%d threads)\n", updates, threads);
262 |   fprintf(stderr, "field of 2^%lu (%lu) bytes\n", expt, size);
263 |   fprintf(stderr, "%ld byte element size (%ld elements total)\n", elt_size, size / elt_size);
264 | 
265 |   p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
266 |   if (p == MAP_FAILED) {
267 |     perror("mmap");
268 |     assert(0);
269 |   }
270 | 
271 |   gettimeofday(&stoptime, NULL);
272 |   fprintf(stderr, "Init took %.4f seconds\n", elapsed(&starttime, &stoptime));
273 |   fprintf(stderr, "Region address: %p - %p\t size: %ld\n", p, (p + size), size);
274 |   
275 |   nelems = (size / threads) / elt_size; // number of elements per thread
276 |   fprintf(stderr, "Elements per thread: %lu\n", nelems);
277 | 
278 |   //memset(thread_gups, 0, sizeof(thread_gups));
279 | 
280 |   hotsetfile = fopen("hotsets.txt", "w");
281 |   if (hotsetfile == NULL) {
282 |     perror("fopen");
283 |     assert(0);
284 |   }
285 | 
286 |   gettimeofday(&stoptime, NULL);
287 |   secs = elapsed(&starttime, &stoptime);
288 |   fprintf(stderr, "Initialization time: %.4f seconds.\n", secs);
289 | 
290 |   //hemem_start_timing();
291 | 
292 |   //pthread_t print_thread;
293 |   //int pt = pthread_create(&print_thread, NULL, print_instantaneous_gups, NULL);
294 |   //assert(pt == 0);
295 | 
296 | 
297 |   hot_start = 0;
298 |   hotsize = (tot_hot_size / threads) / elt_size;
299 |   //printf("hot_start: %p\thot_end: %p\thot_size: %lu\n", p + hot_start, p + hot_start + (hotsize * elt_size), hotsize);
300 | 
301 |   gettimeofday(&starttime, NULL);
302 |   for (i = 0; i < threads; i++) {
303 |     //printf("starting thread [%d]\n", i);
304 |     ga[i] = (struct gups_args*)malloc(sizeof(struct gups_args));
305 |     ga[i]->tid = i;
306 |     ga[i]->field = p + (i * nelems * elt_size);
307 |     ga[i]->iters = updates;
308 |     ga[i]->size = nelems;
309 |     ga[i]->elt_size = elt_size;
310 |     ga[i]->hot_start = 0;        // hot set at start of thread's region
311 |     ga[i]->hotsize = hotsize;
312 |   }
313 | 
314 |   if (hotset_only) {
315 |     for (i = 0; i < threads; i++) {
316 |       int r = pthread_create(&t[i], NULL, prefill_hotset, (void*)ga[i]);
317 |       assert(r == 0);
318 |     }
319 |     // wait for worker threads
320 |     for (i = 0; i < threads; i++) {
321 |       int r = pthread_join(t[i], NULL);
322 |       assert(r == 0);
323 |     }
324 |   }
325 | 
326 |   // run through gups once to touch all memory
327 |   // spawn gups worker threads
328 |   for (i = 0; i < threads; i++) {
329 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
330 |     assert(r == 0);
331 |   }
332 | 
333 |   // wait for worker threads
334 |   for (i = 0; i < threads; i++) {
335 |     int r = pthread_join(t[i], NULL);
336 |     assert(r == 0);
337 |   }
338 |   //hemem_print_stats();
339 | 
340 |   gettimeofday(&stoptime, NULL);
341 | 
342 |   secs = elapsed(&starttime, &stoptime);
343 |   printf("Elapsed time: %.4f seconds.\n", secs);
344 |   gups = threads * ((double)updates) / (secs * 1.0e9);
345 |   printf("GUPS = %.10f\n", gups);
346 |   //memset(thread_gups, 0, sizeof(thread_gups));
347 | 
348 |   filename = "indices2.txt";
349 | 
350 |   fprintf(stderr, "Timing.\n");
351 |   gettimeofday(&starttime, NULL);
352 | 
353 |   //hemem_clear_stats();
354 |   // spawn gups worker threads
355 |   for (i = 0; i < threads; i++) {
356 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
357 |     assert(r == 0);
358 |   }
359 | 
360 |   // wait for worker threads
361 |   for (i = 0; i < threads; i++) {
362 |     int r = pthread_join(t[i], NULL);
363 |     assert(r == 0);
364 |   }
365 |   gettimeofday(&stoptime, NULL);
366 |   //hemem_print_stats();
367 |   //hemem_clear_stats();
368 | 
369 |   secs = elapsed(&starttime, &stoptime);
370 |   printf("Elapsed time: %.4f seconds.\n", secs);
371 |   gups = threads * ((double)updates) / (secs * 1.0e9);
372 |   printf("GUPS = %.10f\n", gups);
373 | 
374 |   //memset(thread_gups, 0, sizeof(thread_gups));
375 | #if 0
376 | #ifdef HOTSPOT
377 |   move_hotset = true;
378 |   hot_offset_page = hotsize / GUPS_PAGE_SIZE;
379 |   //hot_start = (16UL * 1024UL * 1024UL * 1024UL) / elt_size;              // 16GB to the right;
380 |   printf("hot_start: %lu\thot_size: %lu\n", hot_start, hotsize);
381 |   printf("hot_offset_page: %lu\n", hot_offset_page);
382 | 
383 |   filename = "indices3.txt";
384 | 
385 |   printf("Timing.\n");
386 |   gettimeofday(&starttime, NULL);
387 | 
388 |   // spawn gups worker threads
389 |   for (i = 0; i < threads; i++) {
390 |     ga[i]->tid = i;
391 |     ga[i]->iters = updates;
392 |     ga[i]->size = nelems;
393 |     ga[i]->elt_size = elt_size;
394 |     int r = pthread_create(&t[i], NULL, do_gups, (void*)ga[i]);
395 |     assert(r == 0);
396 |   }
397 | 
398 |   // wait for worker threads
399 |   for (i = 0; i < threads; i++) {
400 |     int r = pthread_join(t[i], NULL);
401 |     assert(r == 0);
402 |   }
403 | 
404 |   gettimeofday(&stoptime, NULL);
405 | 
406 |   secs = elapsed(&starttime, &stoptime);
407 |   printf("Elapsed time: %.4f seconds.\n", secs);
408 |   gups = threads * ((double)updates) / (secs * 1.0e9);
409 |   printf("GUPS = %.10f\n", gups);
410 | 
411 |   //hemem_print_stats();
412 | #endif
413 | #endif
414 | 
415 |   FILE* pebsfile = fopen("pebs.txt", "w+");
416 |   assert(pebsfile != NULL);
417 |   for (uint64_t addr = (uint64_t)p; addr < (uint64_t)p + size; addr += (2*1024*1024)) {
418 |     struct hemem_page *pg = get_hemem_page(addr);
419 |     assert(pg != NULL);
420 |     if (pg != NULL) {
421 |       fprintf(pebsfile, "0x%lx:\t%lu\t%lu\t%lu\n", pg->va, pg->tot_accesses[DRAMREAD], pg->tot_accesses[NVMREAD], pg->tot_accesses[WRITE]);
422 |     }
423 |   }
424 | 
425 |   //hemem_stop_timing();
426 | 
427 |   for (i = 0; i < threads; i++) {
428 |     //free(ga[i]->indices);
429 |     free(ga[i]);
430 |   }
431 |   free(ga);
432 | 
433 |   //getchar();
434 | 
435 |   munmap(p, size);
436 | 
437 |   return 0;
438 | }
439 | 
440 | 
441 | 


--------------------------------------------------------------------------------
/microbenchmarks/gups.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * =====================================================================================
 3 |  *
 4 |  *       Filename:  gups.h
 5 |  *
 6 |  *    Description:  i
 7 |  *
 8 |  *        Version:  1.0
 9 |  *        Created:  02/17/2020 09:13:33 AM
10 |  *       Revision:  none
11 |  *       Compiler:  gcc
12 |  *
13 |  *         Author:  YOUR NAME (), 
14 |  *   Organization:  
15 |  *
16 |  * =====================================================================================
17 |  */
18 | #ifndef GUPS_H
19 | #define GUPS_H
20 | 
21 | #define INDEX_FILE "logs/indices.txt"
22 | 
23 | //#define ZIPFIAN
24 | #define HOTSPOT
25 | //#define UNIFORM_RANDOM
26 | 
27 | void calc_indices(unsigned long* indices, unsigned long updates, unsigned long nelems);
28 | 
29 | #ifdef HOTSPOT
30 | extern uint64_t hotset_start;
31 | extern double hotset_fraction;
32 | #endif
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/microbenchmarks/run-instantaneous.sh:
--------------------------------------------------------------------------------
1 | numactl -N0 -m0 -- ./gups-hotset-move 16 1000000000 39 8 34
2 | 


--------------------------------------------------------------------------------
/microbenchmarks/run-random.sh:
--------------------------------------------------------------------------------
 1 | #clear-caches
 2 | echo "=== 30 ===" > random.txt
 3 | numactl -N0 -m0 -- ./gups-random 16 1000000000 30 8 30 >> random.txt
 4 | #clear-caches
 5 | echo "=== 31 ===" >> random.txt
 6 | numactl -N0 -m0 -- ./gups-random 16 1000000000 31 8 31 >> random.txt
 7 | #clear-caches
 8 | echo "=== 32 ===" >> random.txt
 9 | numactl -N0 -m0 -- ./gups-random 16 1000000000 32 8 32 >> random.txt
10 | #clear-caches
11 | echo "=== 33 ===" >> random.txt
12 | numactl -N0 -m0 -- ./gups-random 16 1000000000 33 8 33 >> random.txt
13 | #clear-caches
14 | echo "=== 34 ===" >> random.txt
15 | numactl -N0 -m0 -- ./gups-random 16 1000000000 34 8 34 >> random.txt
16 | #clear-caches
17 | echo "=== 35 ===" >> random.txt
18 | numactl -N0 -m0 -- ./gups-random 16 1000000000 35 8 35 >> random.txt
19 | #clear-caches
20 | echo "=== 36 ===" >> random.txt
21 | numactl -N0 -m0 -- ./gups-random 16 1000000000 36 8 36 >> random.txt
22 | #clear-caches
23 | echo "=== 37 ===" >> random.txt
24 | numactl -N0 -m0 -- ./gups-random 16 1000000000 37 8 37 >> random.txt
25 | #clear-caches
26 | echo "=== 38 ===" >> random.txt
27 | numactl -N0 -m0 -- ./gups-random 16 1000000000 38 8 38 >> random.txt
28 | #clear-caches
29 | echo "=== 39 ===" >> random.txt
30 | numactl -N0 -m0 -- ./gups-random 16 1000000000 39 8 39 >> random.txt
31 | 


--------------------------------------------------------------------------------
/microbenchmarks/run-threads.sh:
--------------------------------------------------------------------------------
 1 | #clear-caches
 2 | echo "=== 1 ===" > threads.txt
 3 | numactl -N0 --preferred=0 -- ./gups-hotset-move 1 1000000000 39 8 34 >> threads.txt
 4 | #clear-caches
 5 | echo "=== 2 ===" >> threads.txt
 6 | numactl -N0 --preferred=0 -- ./gups-hotset-move 2 1000000000 39 8 34 >> threads.txt
 7 | #clear-caches
 8 | echo "=== 4 ===" >> threads.txt
 9 | numactl -N0 --preferred=0 -- ./gups-hotset-move 4 1000000000 39 8 34 >> threads.txt
10 | #clear-caches
11 | echo "=== 8 ===" >> threads.txt
12 | numactl -N0 --preferred=0 -- ./gups-hotset-move 8 1000000000 39 8 34 >> threads.txt
13 | #clear-caches
14 | echo "=== 16 ===" >> threads.txt
15 | numactl -N0 --preferred=0 -- ./gups-hotset-move 16 1000000000 39 8 34 >> threads.txt
16 | #clear-caches
17 | echo "=== 20 ===" >> threads.txt
18 | numactl -N0 --preferred=0 -- ./gups-hotset-move 16 1000000000 39 8 34 >> threads.txt
19 | #clear-caches
20 | echo "=== 21 ===" >> threads.txt
21 | numactl -N0 --preferred=0 -- ./gups-hotset-move 16 1000000000 39 8 34 >> threads.txt
22 | #clear-caches
23 | echo "=== 22 ===" >> threads.txt
24 | numactl -N0 --preferred=0 -- ./gups-hotset-move 16 1000000000 39 8 34 >> threads.txt
25 | #clear-caches
26 | echo "=== 23 ===" >> threads.txt
27 | numactl -N0 --preferred=0 -- ./gups-hotset-move 16 1000000000 39 8 34 >> threads.txt
28 | #clear-caches
29 | echo "=== 24 ===" >> threads.txt
30 | numactl -N0 --preferred=0 -- ./gups-hotset-move 16 1000000000 39 8 34 >> threads.txt
31 | #clear-caches
32 | 


--------------------------------------------------------------------------------
/microbenchmarks/run.sh:
--------------------------------------------------------------------------------
 1 | #clear-caches
 2 | echo "=== 30 ===" >> results.txt
 3 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 30 >> results.txt
 4 | #clear-caches
 5 | echo "=== 31 ===" >> results.txt
 6 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 31 >> results.txt
 7 | #clear-caches
 8 | echo "=== 32 ===" >> results.txt
 9 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 32 >> results.txt
10 | #clear-caches
11 | echo "=== 33 ===" >> results.txt
12 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 33 >> results.txt
13 | #clear-caches
14 | echo "=== 34 ===" >> results.txt
15 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 34 >> results.txt
16 | #clear-caches
17 | echo "=== 35 ===" >> results.txt
18 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 35 >> results.txt
19 | #clear-caches
20 | echo "=== 36 ===" >> results.txt
21 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 36 >> results.txt
22 | #clear-caches
23 | echo "=== 37 ===" >> results.txt
24 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 37 >> results.txt
25 | #clear-caches
26 | echo "=== 38 ===" >> results.txt
27 | numactl -N0 -m0 -- ./gups-pebs 16 1000000000 39 8 38 >> results.txt
28 | 


--------------------------------------------------------------------------------
/microbenchmarks/test.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * =====================================================================================
  3 |  *
  4 |  *       Filename:  test.c
  5 |  *
  6 |  *    Description:  
  7 |  *
  8 |  *        Version:  1.0
  9 |  *        Created:  03/17/2020 06:24:25 AM
 10 |  *       Revision:  none
 11 |  *       Compiler:  gcc
 12 |  *
 13 |  *         Author:  YOUR NAME (), 
 14 |  *   Organization:  
 15 |  *
 16 |  * =====================================================================================
 17 |  */
 18 | #define _GNU_SOURCE
 19 | 
 20 | #include <stdlib.h>
 21 | #include <stdio.h>
 22 | #include <assert.h>
 23 | #include <unistd.h>
 24 | #include <sys/time.h>
 25 | #include <fcntl.h>
 26 | #include <sys/types.h>
 27 | #include <sys/stat.h>
 28 | #include <math.h>
 29 | #include <string.h>
 30 | #include <pthread.h>
 31 | #include <sys/mman.h>
 32 | #include <errno.h>
 33 | #include <stdint.h>
 34 | 
 35 | #include "../timer.h"
 36 | #include "../hemem.h"
 37 | 
 38 | #define KB(x)     ((uint64_t)x * 1024)
 39 | #define MB(x)     (KB(x) * 1024)
 40 | #define GB(x)     (MB(x) * 1024)
 41 | 
 42 | #define SIZE      (GB(256))
 43 | 
 44 | int main(int argc, char **argv)
 45 | {
 46 |   void *p;
 47 |   uint64_t i;
 48 |   uint64_t *region;
 49 |   uint64_t nelems;
 50 |   struct timeval start, end;
 51 |   uint64_t startval;
 52 | 
 53 |   if (argc != 2) {
 54 |     printf("usage: %s val\n", argv[0]);
 55 |     return 0;
 56 |   }
 57 | 
 58 |   startval = atoi(argv[1]);
 59 | 
 60 |   p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 61 |   if (p == MAP_FAILED) {
 62 |     perror("mmap");
 63 |     assert(0);
 64 |   }
 65 | 
 66 |   region = (uint64_t*)p;
 67 |   nelems = (SIZE / sizeof(uint64_t));
 68 |   printf("there are %lu elements\n", nelems);
 69 | 
 70 |   gettimeofday(&start, NULL);
 71 |   for (i = 0; i < nelems; i++) {
 72 |     region[i] = startval;
 73 |     if (region[i] != startval) {
 74 |       assert(region[i] == startval);
 75 |     }
 76 |   }
 77 |   gettimeofday(&end, NULL);
 78 |   printf("init region took %.4f seconds\n", elapsed(&start, &end));
 79 |   hemem_print_stats();
 80 | 
 81 |   for (i = 0; i < nelems; i++) {
 82 |     if (region[i] != startval) {
 83 |       assert(region[i] == startval);
 84 |     }
 85 |   }
 86 |   hemem_print_stats();
 87 | 
 88 |   gettimeofday(&start, NULL);
 89 |   for (i = 0; i < nelems; i++) {
 90 |     region[i] = region[i] + 2;
 91 |     if (region[i] != startval + 2) {
 92 |       assert(region[i] == startval + 2);
 93 |     }
 94 |   }
 95 |   gettimeofday(&end, NULL);
 96 |   printf("calc region took %.4f seconds\n", elapsed(&start, &end));
 97 |   hemem_print_stats();
 98 | 
 99 |   for (i = 0; i < nelems; i++) {
100 |     if (region[i] != startval + 2) {
101 |       assert(region[i] == startval + 2);
102 |     }
103 |   }
104 |   hemem_print_stats();
105 | 
106 |   return 0;
107 | }
108 | 


--------------------------------------------------------------------------------
/microbenchmarks/zipf.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * =====================================================================================
  3 |  *
  4 |  *       Filename:  zipf.c
  5 |  *
  6 |  *    Description:  
  7 |  *
  8 |  *        Version:  1.0
  9 |  *        Created:  05/06/2019 11:24:53 AM
 10 |  *       Revision:  none
 11 |  *       Compiler:  gcc
 12 |  *
 13 |  *         Author:  YOUR NAME (), 
 14 |  *   Organization:  
 15 |  *
 16 |  * =====================================================================================
 17 |  */
 18 | #include <stdlib.h>
 19 | #include <math.h>
 20 | #include <stdio.h>
 21 | #include <assert.h>
 22 | #include <stdint.h>
 23 | 
 24 | #include "gups.h"
 25 | 
 26 | #ifdef ZIPFIAN
 27 | 
 28 | static const double ZETAN = 26.46902820178302;
 29 | static const double ZIPFIAN_CONSTANT = 0.99;
 30 | static unsigned long min, max, itemcount;
 31 | static unsigned long items, base, countforzeta;
 32 | static double zipfianconstant, alpha, zetan, eta, theta, zeta2theta;
 33 | static unsigned long lastVal;
 34 | static int allowitemdecrease = 0;
 35 | static const long FNV_OFFSET_BASIS_64 = 0xCBF29CE484222325L;
 36 | static const long FNV_PRIME_64 = 1099511628211L;
 37 | 
 38 | static unsigned long fnvhash64(unsigned long val) {
 39 |   long hashval = FNV_OFFSET_BASIS_64;
 40 | 
 41 |   for (int i = 0; i < 8; i++) {
 42 |     long octet = val & 0x00ff;
 43 |     val = val >> 8;
 44 | 
 45 |     hashval = hashval ^ octet;
 46 |     hashval = hashval * FNV_PRIME_64;
 47 |   }
 48 | 
 49 |   return (unsigned long)abs(hashval);
 50 | }
 51 | 
 52 | static double _zetastatic(unsigned long st, unsigned long n, double theta, double initialsum)
 53 | {
 54 |   double sum = initialsum;
 55 |   for (unsigned long i = st; i < n; i++) {
 56 |     sum += 1 / (pow(i + 1, theta));
 57 |   }
 58 |   return sum;
 59 | }
 60 | 
 61 | static double _zeta(unsigned long st, unsigned long n, double thetaVal, double initialsum)
 62 | {
 63 |   countforzeta = n;
 64 |   return _zetastatic(st, n, thetaVal, initialsum);
 65 | }
 66 | 
 67 | static double zetastatic(unsigned long n, double theta)
 68 | {
 69 |   return _zetastatic(0, n, theta, 0);
 70 | }
 71 | 
 72 | static double zeta(unsigned long n, double thetaVal)
 73 | {
 74 |   countforzeta = n;
 75 |   return zetastatic(n, thetaVal);
 76 | 
 77 | }
 78 | 
 79 | static unsigned long nextValue(unsigned long itemcount)
 80 | {
 81 |   if (itemcount != countforzeta) {
 82 |     if (itemcount > countforzeta) {
 83 |       printf("recomputing zeta due to item increase\n");
 84 |       zetan = _zeta(countforzeta, itemcount, theta, zetan);
 85 |       eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan);
 86 |     } else if (itemcount > countforzeta) {
 87 |       printf("recomputing zeta due to item decrease (warning: slow)\n");
 88 |       zetan = zeta(itemcount, theta);
 89 |       eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan);
 90 |     }
 91 |   }
 92 | 
 93 |   double u = (double)rand() / RAND_MAX;
 94 |   double uz = u * zetan;
 95 | 
 96 |   if (uz < 1.0) {
 97 |     return base;
 98 |   }
 99 | 
100 |   if (uz < 1.0 + pow(0.5, theta)) {
101 |     return base + 1;
102 |   }
103 | 
104 |   unsigned long ret = base + (unsigned long)((itemcount) * pow(eta * u - eta + 1, alpha));
105 |   lastVal = ret;
106 |   return ret;
107 | }
108 | 
109 | void calc_indices(unsigned long* indices, unsigned long updates, unsigned long nelems)
110 | {
111 |   FILE* f;
112 |   unsigned int i;
113 | 
114 |   assert(!"Not thread-safe");
115 |   
116 |   f = fopen(INDEX_FILE, "w");
117 |   if (f == NULL) {
118 |     perror("fopen");
119 |     assert(0);
120 |   }
121 |   assert(indices != NULL);
122 | 
123 |   // init zipfian distrobution variables
124 |   min = 0;
125 |   max = nelems - 1;
126 |   itemcount = max - min + 1;
127 |   items = max - min + 1;
128 |   base = min;
129 |   zipfianconstant = ZIPFIAN_CONSTANT;
130 |   theta = zipfianconstant;
131 |   zeta2theta = zeta(2, theta);
132 | 
133 |   alpha = 1.0 / (1.0 - theta);
134 |   zetan = ZETAN;
135 |   countforzeta = items;
136 |   eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan);
137 |   nextValue(nelems);
138 | 
139 |   for (i = 0; i < updates; i++) {
140 |     unsigned long ret = nextValue(nelems);
141 |     ret = min + fnvhash64(ret) % itemcount;
142 |     lastVal = ret;
143 |     indices[i] = ret;
144 |     //fprintf(f, "%d\n", indices[i]);
145 |   }
146 | 
147 |   fclose(f);
148 | }
149 | 
150 | #elif defined HOTSPOT
151 | 
152 | #define RAND_WITHIN(x)	(((double)rand_r(&seed) / RAND_MAX) * (x))
153 | 
154 | uint64_t hotset_start = 0;
155 | double hotset_fraction = 0.1;
156 | static double hotset_prob = 0.9;
157 | 
158 | void calc_indices(unsigned long* indices, unsigned long updates, unsigned long nelems)
159 | {
160 |   int i;
161 |   uint64_t hotset_size = (uint64_t)(hotset_fraction * nelems);
162 |   unsigned int seed = 0;
163 | 
164 |   assert(hotset_start + hotset_size <= nelems);
165 | 
166 |   printf("hotset start: %lu\thotset size: %lu\thotset probability: %f\n", hotset_start, hotset_size, hotset_prob);
167 |   
168 |   /* srand(0); */
169 | 
170 |   for (i = 0; i < updates; i++) {
171 |     if (RAND_WITHIN(1) < hotset_prob) {
172 |       indices[i] = hotset_start + (uint64_t)RAND_WITHIN(hotset_size);
173 |     }
174 |     else {
175 |       indices[i] = (uint64_t)RAND_WITHIN(nelems);
176 |     }
177 |   }
178 | }
179 | 
180 | #else // UNIFORM_RANDOM
181 | 
182 | void calc_indices(unsigned long* indices, unsigned long updates, unsigned long nelems)
183 | {
184 |   unsigned int i;
185 |   assert(indices != NULL);
186 |   unsigned int seed = 0;
187 | 
188 |   /* srand(0); */
189 | 
190 |   for (i = 0; i < updates; i++) {
191 |     indices[i] = rand_r(&seed) % nelems;
192 |   }
193 | }
194 | 
195 | #endif
196 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -g -Wall -O3 -fPIC
 3 | #CFLAGS = -g3 -Wall -O0 -fPIC
 4 | LDFLAGS = -shared -Wl,--allow-multiple-definition -Wl,-rpath,'$$ORIGIN'
 5 | INCLUDES =
 6 | LIBS = -lm -lpthread
 7 | HEMEM_LIBS = $(LIBS) -ldl -lsyscall_intercept -lcapstone -lhoard
 8 | 
 9 | default: libhemem.so
10 | 
11 | all: hemem-libs
12 | 
13 | hemem-libs: libhemem-lru.so libhemem-simple.so libhemem-lru-swap.so libhemem.so
14 | 
15 | libhemem.so: hemem.o pebs.o timer.o interpose.o fifo.o spsc-ring.o
16 | 	$(CC) $(LDFLAGS) -o libhemem.so hemem.o timer.o interpose.o pebs.o fifo.o spsc-ring.o $(HEMEM_LIBS)
17 | 
18 | libhemem-lru.so: policies/hemem-lru.o policies/lru.o timer.o interpose.o policies/paging.o fifo.o spsc-ring.o
19 | 	$(CC) $(LDFLAGS) -o libhemem-lru.so policies/hemem-lru.o timer.o policies/lru.o interpose.o policies/paging.o fifo.o spsc-ring.o $(HEMEM_LIBS)
20 | 
21 | libhemem-simple.so: policies/hemem-simple.o policies/simple.o timer.o interpose.o policies/paging.o fifo.o spsc-ring.o
22 | 	$(CC) $(LDFLAGS) -o libhemem-simple.so policies/hemem-simple.o timer.o policies/simple.o interpose.o policies/paging.o fifo.o spsc-ring.o $(HEMEM_LIBS)
23 | 
24 | libhemem-lru-swap.so: policies/hemem-lru-swap.o policies/lru_swap.o timer.o interpose.o policies/paging.o fifo.o spsc-ring.o
25 | 	$(CC) $(LDFLAGS) -o libhemem-lru-swap.so policies/hemem-lru-swap.o timer.o policies/lru_swap.o interpose.o policies/paging.o fifo.o spsc-ring.o $(HEMEM_LIBS)
26 | 
27 | hemem.o: hemem.c hemem.h pebs.h interpose.h fifo.h spsc-ring.h
28 | 	$(CC) $(CFLAGS) $(INCLUDES) -D ALLOC_HEMEM -c hemem.c -o hemem.o
29 | 	
30 | policies/hemem-lru.o: hemem.c hemem.h policies/lru.h interpose.h policies/paging.h fifo.h spsc-ring.h
31 | 	$(CC) $(CFLAGS) $(INCLUDES) -D ALLOC_LRU -c hemem.c -o policies/hemem-lru.o
32 | 
33 | policies/hemem-simple.o: hemem.c hemem.h policies/simple.h interpose.h policies/paging.h fifo.h spsc-ring.h
34 | 	$(CC) $(CFLAGS) $(INCLUDES) -D ALLOC_SIMPLE -c hemem.c -o policies/hemem-simple.o
35 | 
36 | policies/hemem-lru-swap.o: hemem.c hemem.h policies/lru.h interpose.h policies/paging.h fifo.h spsc-ring.h
37 | 	$(CC) $(CFLAGS) $(INCLUDES) -D ALLOC_LRU -D LRU_SWAP -c hemem.c -o policies/hemem-lru-swap.o
38 | 
39 | interpose.o: interpose.c interpose.h hemem.h
40 | 	$(CC) $(CFLAGS) $(INCLUDES) -c interpose.c
41 | 
42 | timer.o: timer.c timer.h
43 | 	$(CC) $(CFLAGS) $(INCLUDES) -c timer.c
44 | 
45 | policies/lru.o: policies/lru.c policies/lru.h hemem.h fifo.h spsc-ring.h
46 | 	$(CC) $(CFLAGS) $(INCLUDES) -D ALLOC_LRU -c policies/lru.c -o policies/lru.o
47 | 
48 | policies/simple.o: policies/simple.c policies/simple.h hemem.h spsc-ring.h
49 | 	$(CC) $(CFLAGS) $(INCLUDES) -c policies/simple.c -o policies/simple.o
50 | 
51 | policies/lru_swap.o: policies/lru.c policies/lru.h hemem.h fifo.h spsc-ring.h
52 | 	$(CC) $(CFLAGS) $(INCLUDES) -D LRU_SWAP -D ALLOC_LRU -c policies/lru.c -o policies/lru_swap.o
53 | 
54 | policies/paging.o: policies/paging.c policies/paging.h
55 | 	$(CC) $(CFLAGS) $(INCLUDES) -c policies/paging.c -o policies/paging.o
56 | 
57 | pebs.o: pebs.c pebs.h hemem.h fifo.h
58 | 	$(CC) $(CFLAGS) $(INCLUDES) -c pebs.c
59 | 
60 | fifo.o: fifo.c fifo.h hemem.h
61 | 	$(CC) $(CFLAGS) $(INCLUDES) -c fifo.c
62 | 
63 | spsc-ring.o: spsc-ring.c spsc-ring.h
64 | 	$(CC) $(CFLAGS) $(INCLUDES) -c spsc-ring.c
65 | 
66 | clean:
67 | 	$(RM) *.o *.so policies/*.o
68 | 


--------------------------------------------------------------------------------
/src/fifo.c:
--------------------------------------------------------------------------------
  1 | #include <pthread.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "hemem.h"
  5 | #include "fifo.h"
  6 | 
  7 | void enqueue_fifo(struct fifo_list *queue, struct hemem_page *entry)
  8 | {
  9 |   pthread_mutex_lock(&(queue->list_lock));
 10 |   assert(entry->prev == NULL);
 11 |   entry->next = queue->first;
 12 |   if(queue->first != NULL) {
 13 |     assert(queue->first->prev == NULL);
 14 |     queue->first->prev = entry;
 15 |   } else {
 16 |     assert(queue->last == NULL);
 17 |     assert(queue->numentries == 0);
 18 |     queue->last = entry;
 19 |   }
 20 | 
 21 |   queue->first = entry;
 22 |   entry->list = queue;
 23 |   queue->numentries++;
 24 |   pthread_mutex_unlock(&(queue->list_lock));
 25 | }
 26 | 
 27 | struct hemem_page *dequeue_fifo(struct fifo_list *queue)
 28 | {
 29 |   pthread_mutex_lock(&(queue->list_lock));
 30 |   struct hemem_page *ret = queue->last;
 31 | 
 32 |   if(ret == NULL) {
 33 |     //assert(queue->numentries == 0);
 34 |     pthread_mutex_unlock(&(queue->list_lock));
 35 |     return ret;
 36 |   }
 37 | 
 38 |   queue->last = ret->prev;
 39 |   if(queue->last != NULL) {
 40 |     queue->last->next = NULL;
 41 |   } else {
 42 |     queue->first = NULL;
 43 |   }
 44 | 
 45 |   ret->prev = ret->next = NULL;
 46 |   ret->list = NULL;
 47 |   assert(queue->numentries > 0);
 48 |   queue->numentries--;
 49 |   pthread_mutex_unlock(&(queue->list_lock));
 50 | 
 51 |   return ret;
 52 | }
 53 | 
 54 | void page_list_remove_page(struct fifo_list *list, struct hemem_page *page)
 55 | {
 56 |   pthread_mutex_lock(&(list->list_lock));
 57 |   if (list->first == NULL) {
 58 |     assert(list->last == NULL);
 59 |     assert(list->numentries == 0);
 60 |     pthread_mutex_unlock(&(list->list_lock));
 61 |     LOG("page_list_remove_page: list was empty!\n");
 62 |     return;
 63 |   }
 64 | 
 65 |   if (list->first == page) {
 66 |     list->first = page->next;
 67 |   }
 68 | 
 69 |   if (list->last == page) {
 70 |     list->last = page->prev;
 71 |   }
 72 | 
 73 |   if (page->next != NULL) {
 74 |     page->next->prev = page->prev;
 75 |   }
 76 | 
 77 |   if (page->prev != NULL) {
 78 |     page->prev->next = page->next;
 79 |   }
 80 | 
 81 |   assert(list->numentries > 0);
 82 |   list->numentries--;
 83 |   page->next = NULL;
 84 |   page->prev = NULL;
 85 |   page->list = NULL;
 86 |   pthread_mutex_unlock(&(list->list_lock));
 87 | }
 88 | 
 89 | struct hemem_page* next_page(struct fifo_list *list, struct hemem_page *page)
 90 | {
 91 |     struct hemem_page* next_page = NULL;
 92 | 
 93 |     pthread_mutex_lock(&(list->list_lock));
 94 |     if (page == NULL) {
 95 |         next_page = list->last;
 96 |     }
 97 |     else {
 98 |         next_page = page->prev;
 99 |         assert(page->list == list);
100 |     }
101 |     pthread_mutex_unlock(&(list->list_lock));
102 | 
103 |     return next_page;
104 | }
105 | 


--------------------------------------------------------------------------------
/src/fifo.h:
--------------------------------------------------------------------------------
 1 | #ifndef HEMEM_FIFO_H
 2 | #define HEMEM_FIFO_H
 3 | 
 4 | #include <pthread.h>
 5 | #include <stdint.h>
 6 | #include <inttypes.h>
 7 | #include <stdbool.h>
 8 | #include <stdlib.h>
 9 | 
10 | #include "hemem.h"
11 | 
12 | struct fifo_list {
13 |   struct hemem_page *first, *last;
14 |   pthread_mutex_t list_lock;
15 |   size_t numentries;
16 | };
17 | 
18 | 
19 | void enqueue_fifo(struct fifo_list *list, struct hemem_page *page);
20 | struct hemem_page* dequeue_fifo(struct fifo_list *list);
21 | void page_list_remove_page(struct fifo_list *list, struct hemem_page *page);
22 | struct hemem_page* next_page(struct fifo_list *list, struct hemem_page *page);
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/src/hemem.h:
--------------------------------------------------------------------------------
  1 | #ifndef HEMEM_H
  2 | 
  3 | #define HEMEM_H
  4 | 
  5 | #include <pthread.h>
  6 | #include <stdint.h>
  7 | #include <inttypes.h>
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <stdarg.h>
 12 | #include <assert.h>
 13 | 
 14 | #ifndef __cplusplus
 15 | #include <stdatomic.h>
 16 | #else
 17 | #include <atomic>
 18 | #define _Atomic(X) std::atomic< X >
 19 | #endif
 20 | 
 21 | #ifdef __cplusplus
 22 | extern "C" {
 23 | #endif
 24 | 
 25 | #ifdef ALLOC_LRU
 26 | #include "policies/lru.h"
 27 | #endif
 28 | 
 29 | #ifdef ALLOC_SIMPLE
 30 | #include "policies/simple.h"
 31 | #endif 
 32 | 
 33 | #include "pebs.h"
 34 | #include "timer.h"
 35 | #include "interpose.h"
 36 | #include "uthash.h"
 37 | #include "fifo.h"
 38 | 
 39 | //#define HEMEM_DEBUG
 40 | //#define USE_PEBS
 41 | //#define STATS_THREAD
 42 | 
 43 | #define NUM_CHANNS 2
 44 | // #define USE_DMA
 45 | #define SIZE_PER_DMA_REQUEST (1024*1024)
 46 | 
 47 | #define MEM_BARRIER() __sync_synchronize()
 48 | 
 49 | #define NVMSIZE   (3L* (1024L * 1024L * 1024L))
 50 | #define DRAMSIZE  (5L* (1024L * 1024L * 1024L))
 51 | #define SMALLALLOCSIZE (1L << 30)
 52 | 
 53 | #define DRAMPATH  "/dev/dax0.0"
 54 | #define NVMPATH   "/dev/dax1.0"
 55 | 
 56 | //#define PAGE_SIZE (1024 * 1024 * 1024)
 57 | //#define PAGE_SIZE (2 * (1024 * 1024))
 58 | #define BASEPAGE_SIZE	  (4UL * 1024UL)
 59 | #define HUGEPAGE_SIZE 	(2UL * 1024UL * 1024UL)
 60 | #define GIGAPAGE_SIZE   (1024UL * 1024UL * 1024UL)
 61 | #define PAGE_SIZE 	    HUGEPAGE_SIZE
 62 | 
 63 | #define FASTMEM_PAGES   ((DRAMSIZE) / (PAGE_SIZE))
 64 | #define SLOWMEM_PAGES   ((NVMSIZE) / (PAGE_SIZE))
 65 | 
 66 | #define BASEPAGE_MASK	(BASEPAGE_SIZE - 1)
 67 | #define HUGEPAGE_MASK	(HUGEPAGE_SIZE - 1)
 68 | #define GIGAPAGE_MASK   (GIGAPAGE_SIZE - 1)
 69 | 
 70 | #define BASE_PFN_MASK	(BASEPAGE_MASK ^ UINT64_MAX)
 71 | #define HUGE_PFN_MASK	(HUGEPAGE_MASK ^ UINT64_MAX)
 72 | #define GIGA_PFN_MASK   (GIGAPAGE_MASK ^ UINT64_MAX)
 73 | 
 74 | #define FAULT_THREAD_CPU  (0)
 75 | #define STATS_THREAD_CPU  (3)
 76 | 
 77 | FILE *hememlogf;
 78 | //#define LOG(...) fprintf(stderr, __VA_ARGS__)
 79 | //#define LOG(...)	fprintf(hememlogf, __VA_ARGS__)
 80 | #define LOG(str, ...) while(0) {}
 81 | 
 82 | FILE *timef;
 83 | extern bool timing;
 84 | 
 85 | static inline void log_time(const char* fmt, ...)
 86 | {
 87 |   if (timing) {
 88 |     va_list args;
 89 |     va_start(args, fmt);
 90 |     vfprintf(timef, fmt, args);
 91 |     va_end(args);
 92 |   }
 93 | }
 94 | 
 95 | 
 96 | //#define LOG_TIME(str, ...) log_time(str, __VA_ARGS__)
 97 | //#define LOG_TIME(str, ...) fprintf(timef, str, __VA_ARGS__)
 98 | #define LOG_TIME(str, ...) while(0) {}
 99 | 
100 | FILE *statsf;
101 | #define LOG_STATS(str, ...) fprintf(stderr, str,  __VA_ARGS__)
102 | //#define LOG_STATS(str, ...) fprintf(statsf, str, __VA_ARGS__)
103 | //#define LOG_STATS(str, ...) while (0) {}
104 | 
105 | #if defined (ALLOC_HEMEM)
106 |   #define pagefault(...) pebs_pagefault(__VA_ARGS__)
107 |   #define paging_init(...) pebs_init(__VA_ARGS__)
108 |   #define mmgr_remove(...) pebs_remove_page(__VA_ARGS__)
109 |   #define mmgr_stats(...) pebs_stats(__VA_ARGS__)
110 |   #define policy_shutdown(...) pebs_shutdown(__VA_ARGS__)
111 | #elif defined (ALLOC_LRU)
112 |   #define pagefault(...) lru_pagefault(__VA_ARGS__)
113 |   #define paging_init(...) lru_init(__VA_ARGS__)
114 |   #define mmgr_remove(...) lru_remove_page(__VA_ARGS__)
115 |   #define mmgr_stats(...) lru_stats(__VA_ARGS__)
116 |   #define policy_shutdown(...) while(0) {}
117 | #elif defined (ALLOC_SIMPLE)
118 |   #define pagefault(...) simple_pagefault(__VA_ARGS__)
119 |   #define paging_init(...) simple_init(__VA_ARGS__)
120 |   #define mmgr_remove(...) simple_remove_page(__VA_ARGS__)
121 |   #define mmgr_stats(...) simple_stats(__VA_ARGS__)
122 |   #define policy_shutdown(...) while(0) {}
123 | #endif
124 | 
125 | 
126 | #define MAX_UFFD_MSGS	    (1)
127 | #define MAX_COPY_THREADS  (4)
128 | 
129 | extern uint64_t cr3;
130 | extern int dramfd;
131 | extern int nvmfd;
132 | extern int devmemfd;
133 | extern bool is_init;
134 | extern uint64_t missing_faults_handled;
135 | extern uint64_t migrations_up;
136 | extern uint64_t migrations_down;
137 | extern __thread bool internal_malloc;
138 | extern __thread bool old_internal_call;
139 | extern __thread bool internal_call;
140 | extern __thread bool internal_munmap;
141 | extern void* devmem_mmap;
142 | 
143 | enum memtypes {
144 |   FASTMEM = 0,
145 |   SLOWMEM = 1,
146 |   NMEMTYPES,
147 | };
148 | 
149 | enum pagetypes {
150 |   HUGEP = 0,
151 |   BASEP = 1,
152 |   NPAGETYPES
153 | };
154 | 
155 | struct hemem_page {
156 |   uint64_t va;
157 |   uint64_t devdax_offset;
158 |   bool in_dram;
159 |   enum pagetypes pt;
160 |   volatile bool migrating;
161 |   bool present;
162 |   bool written;
163 |   bool hot;
164 |   uint64_t naccesses;
165 |   uint64_t migrations_up, migrations_down;
166 |   uint64_t local_clock;
167 |   bool ring_present;
168 |   uint64_t accesses[NPBUFTYPES];
169 |   uint64_t tot_accesses[NPBUFTYPES];
170 |   pthread_mutex_t page_lock;
171 | 
172 |   UT_hash_handle hh;
173 |   struct hemem_page *next, *prev;
174 |   struct fifo_list *list;
175 | };
176 | 
177 | static inline uint64_t pt_to_pagesize(enum pagetypes pt)
178 | {
179 |   switch(pt) {
180 |   case HUGEP: return HUGEPAGE_SIZE;
181 |   case BASEP: return BASEPAGE_SIZE;
182 |   default: assert(!"Unknown page type");
183 |   }
184 | }
185 | 
186 | static inline enum pagetypes pagesize_to_pt(uint64_t pagesize)
187 | {
188 |   switch (pagesize) {
189 |     case BASEPAGE_SIZE: return BASEP;
190 |     case HUGEPAGE_SIZE: return HUGEP;
191 |     default: assert(!"Unknown page ssize");
192 |   }
193 | }
194 | 
195 | void log_init();
196 | void hemem_init();
197 | void hemem_stop();
198 | void* hemem_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
199 | int hemem_munmap(void* addr, size_t length);
200 | void *handle_fault();
201 | void hemem_migrate_up(struct hemem_page *page, uint64_t dram_offset);
202 | void hemem_migrate_down(struct hemem_page *page, uint64_t nvm_offset);
203 | void hemem_wp_page(struct hemem_page *page, bool protect);
204 | void hemem_promote_pages(uint64_t addr);
205 | void hemem_demote_pages(uint64_t addr);
206 | 
207 | #ifdef ALLOC_LRU
208 | void hemem_clear_bits(struct hemem_page *page);
209 | uint64_t hemem_get_bits(struct hemem_page *page);
210 | void hemem_tlb_shootdown(uint64_t va);
211 | #endif
212 | 
213 | struct hemem_page* get_hemem_page(uint64_t va);
214 | 
215 | void hemem_print_stats();
216 | void hemem_clear_stats();
217 | 
218 | void hemem_start_timing(void);
219 | void hemem_stop_timing(void);
220 | 
221 | #ifdef __cplusplus
222 | }
223 | #endif
224 | 
225 | #define max(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; })
226 | #define min(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; })
227 | 
228 | #endif /* HEMEM_H */
229 | 


--------------------------------------------------------------------------------
/src/interpose.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <stdarg.h>
  4 | #include <libsyscall_intercept_hook_point.h>
  5 | #include <syscall.h>
  6 | #include <errno.h>
  7 | #define __USE_GNU
  8 | #include <dlfcn.h>
  9 | #include <pthread.h>
 10 | #include <sys/mman.h>
 11 | #include <assert.h>
 12 | 
 13 | #include "hemem.h"
 14 | #include "interpose.h"
 15 | 
 16 | void* (*libc_mmap)(void *addr, size_t length, int prot, int flags, int fd, off_t offset) = NULL;
 17 | int (*libc_munmap)(void *addr, size_t length) = NULL;
 18 | void* (*libc_malloc)(size_t size) = NULL;
 19 | void (*libc_free)(void* ptr) = NULL;
 20 | 
 21 | static int mmap_filter(void *addr, size_t length, int prot, int flags, int fd, off_t offset, uint64_t *result)
 22 | {
 23 |   //ensure_init();
 24 | 
 25 |   //TODO: figure out which mmap calls should go to libc vs hemem
 26 |   // non-anonymous mappings should probably go to libc (e.g., file mappings)
 27 |   if (((flags & MAP_ANONYMOUS) != MAP_ANONYMOUS) && !((fd == dramfd) || (fd == nvmfd) || (fd == devmemfd))) {
 28 |     LOG("hemem interpose: calling libc mmap due to non-anonymous, non-devdax mapping: mmap(0x%lx, %ld, %x, %x, %d, %ld)\n", (uint64_t)addr, length, prot, flags, fd, offset);
 29 |     return 1;
 30 |   }
 31 | 
 32 |   if ((flags & MAP_STACK) == MAP_STACK) {
 33 |     // pthread mmaps are called with MAP_STACK
 34 |     LOG("hemem interpose: calling libc mmap due to stack mapping: mmap(0x%lx, %ld, %x, %x, %d, %ld)\n", (uint64_t)addr, length, prot, flags, fd, offset);
 35 |     return 1;
 36 |   }
 37 | 
 38 |   //if (((flags & MAP_NORESERVE) == MAP_NORESERVE)) {
 39 |     // thread stack is called without swap space reserved, so we can probably ignore these
 40 |     //fprintf(stderr, "hemem interpose: calling libc mmap due to non-swap space reserved mapping: mmap(0x%lx, %ld, %x, %x, %d, %ld)\n", (uint64_t)addr, length, prot, flags, fd, offset);
 41 |     //return 1;
 42 |   //}
 43 |   
 44 |   if ((fd == dramfd) || (fd == nvmfd) || (fd == devmemfd)) {
 45 |     //LOG("hemem interpose: calling libc mmap due to hemem devdax mapping\n");
 46 |     return 1;
 47 |   }
 48 | 
 49 |   if (internal_call) {
 50 |     LOG("hemem interpose: calling libc mmap due to internal memory call: mmap(0x%lx, %ld, %x, %x, %d, %ld)\n", (uint64_t)addr, length, prot, flags, fd, offset);
 51 |     return 1;
 52 |   }
 53 |   
 54 |   if (!is_init) {
 55 |     //LOG("hemem interpose: calling libc mmap due to hemem init in progress\n");
 56 |     return 1;
 57 |   }
 58 | 
 59 |     LOG("hemem interpose calling libc mmap due to small allocation size: mmap(0x%lx, %ld, %x, %x, %d, %ld)\n", (uint64_t)addr, length, prot, flags, fd, offset);
 60 |   if (length < SMALLALLOCSIZE) {
 61 |     return 1;
 62 |   }
 63 | 
 64 |   LOG("hemem interpose: calling hemem mmap(0x%lx, %ld, %x, %x, %d, %ld)\n", (uint64_t)addr, length, prot, flags, fd, offset);
 65 |   if ((*result = (uint64_t)hemem_mmap(addr, length, prot, flags, fd, offset)) == (uint64_t)MAP_FAILED) {
 66 |     // hemem failed for some reason, try libc
 67 |     LOG("hemem mmap failed\n\tmmap(0x%lx, %ld, %x, %x, %d, %ld)\n", (uint64_t)addr, length, prot, flags, fd, offset);
 68 |   }
 69 |   return 0;
 70 | }
 71 | 
 72 | 
 73 | static int munmap_filter(void *addr, size_t length, uint64_t* result)
 74 | {
 75 |   //ensure_init();
 76 |   
 77 |   //TODO: figure out which munmap calls should go to libc vs hemem
 78 |   
 79 |   if (internal_call) {
 80 |     return 1;
 81 |   }
 82 | 
 83 |   if ((*result = hemem_munmap(addr, length)) == -1) {
 84 |     LOG("hemem munmap failed\n\tmunmap(0x%lx, %ld)\n", (uint64_t)addr, length);
 85 |   }
 86 |   return 0;
 87 | }
 88 | 
 89 | 
 90 | static void* bind_symbol(const char *sym)
 91 | {
 92 |   void *ptr;
 93 |   if ((ptr = dlsym(RTLD_NEXT, sym)) == NULL) {
 94 |     fprintf(stderr, "hemem memory manager interpose: dlsym failed (%s)\n", sym);
 95 |     abort();
 96 |   }
 97 |   return ptr;
 98 | }
 99 | 
100 | static int hook(long syscall_number, long arg0, long arg1, long arg2, long arg3,	long arg4, long arg5,	long *result)
101 | {
102 | 	if (syscall_number == SYS_mmap) {
103 | 	  return mmap_filter((void*)arg0, (size_t)arg1, (int)arg2, (int)arg3, (int)arg4, (off_t)arg5, (uint64_t*)result);
104 | 	} else if (syscall_number == SYS_munmap){
105 |     return munmap_filter((void*)arg0, (size_t)arg1, (uint64_t*)result);
106 |   } else {
107 |     // ignore non-mmap system calls
108 | 		return 1;
109 | 	}
110 | }
111 | 
112 | static __attribute__((constructor)) void init(void)
113 | {
114 |   log_init();
115 | 
116 |   libc_mmap = bind_symbol("mmap");
117 |   libc_munmap = bind_symbol("munmap");
118 |   libc_malloc = bind_symbol("malloc");
119 |   libc_free = bind_symbol("free");
120 |   intercept_hook_point = hook;
121 | 
122 |   hemem_init();
123 | }
124 | 
125 | static __attribute__((destructor)) void hemem_shutdown(void)
126 | {
127 |   hemem_stop();
128 | }
129 | 
130 | /* 
131 | void* malloc(size_t size)
132 | {
133 |   void* ret;
134 |   if(libc_malloc == NULL) {
135 |     libc_malloc = bind_symbol("malloc");
136 |   }
137 |   assert(libc_malloc != NULL);
138 |   ret = libc_malloc(size);
139 |   return ret;
140 | }
141 | 
142 | void free(void* ptr)
143 | {
144 |   if(libc_free == NULL) {
145 |     libc_free = bind_symbol("free");
146 |   }
147 |   assert(libc_free != NULL);
148 |   libc_free(ptr);
149 | }
150 | */
151 | 


--------------------------------------------------------------------------------
/src/interpose.h:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | // function pointers to libc functions
 4 | extern void* (*libc_mmap)(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
 5 | extern int (*libc_munmap)(void *addr, size_t length);
 6 | extern void* (*libc_malloc)(size_t size);
 7 | extern void (*libc_free)(void* p);
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/pebs.h:
--------------------------------------------------------------------------------
 1 | #ifndef HEMEM_PEBS_H
 2 | #define HEMEM_PEBS_H
 3 | 
 4 | #include <pthread.h>
 5 | #include <stdint.h>
 6 | #include <inttypes.h>
 7 | #include <linux/perf_event.h>
 8 | #include <linux/hw_breakpoint.h>
 9 | 
10 | #include "hemem.h"
11 | 
12 | #define PEBS_KSWAPD_INTERVAL      (10000) // in us (10ms)
13 | #define PEBS_KSWAPD_MIGRATE_RATE  (10UL * 1024UL * 1024UL * 1024UL) // 10GB
14 | #define HOT_READ_THRESHOLD        (4)
15 | #define HOT_WRITE_THRESHOLD       (4)
16 | #define PEBS_COOLING_THRESHOLD    (10)
17 | 
18 | #define HOT_RING_REQS_THRESHOLD   (1024*1024)
19 | #define COLD_RING_REQS_THRESHOLD  (128)
20 | #define CAPACITY                  (16*1024*1024)
21 | #define COOLING_PAGES             (8192)
22 | 
23 | #define PEBS_NPROCS 4
24 | #define PERF_PAGES	(1 + (1 << 14))	// Has to be == 1+2^n
25 | //#define SAMPLE_PERIOD	10007
26 | #define SAMPLE_PERIOD 19997
27 | //#define SAMPLE_FREQ	100
28 | 
29 | 
30 | #define SCANNING_THREAD_CPU (FAULT_THREAD_CPU + 1)
31 | #define MIGRATION_THREAD_CPU (SCANNING_THREAD_CPU + 1)
32 | 
33 | struct perf_sample {
34 |   struct perf_event_header header;
35 |   __u64	ip;
36 |   __u32 pid, tid;    /* if PERF_SAMPLE_TID */
37 |   __u64 addr;        /* if PERF_SAMPLE_ADDR */
38 |   __u64 weight;      /* if PERF_SAMPLE_WEIGHT */
39 |   /* __u64 data_src;    /\* if PERF_SAMPLE_DATA_SRC *\/ */
40 | };
41 | 
42 | enum pbuftype {
43 |   DRAMREAD = 0,
44 |   NVMREAD = 1,  
45 |   WRITE = 2,
46 |   NPBUFTYPES
47 | };
48 | 
49 | void *pebs_kswapd();
50 | struct hemem_page* pebs_pagefault(void);
51 | struct hemem_page* pebs_pagefault_unlocked(void);
52 | void pebs_init(void);
53 | void pebs_remove_page(struct hemem_page *page);
54 | void pebs_stats();
55 | void pebs_shutdown();
56 | 
57 | #endif /*  HEMEM_LRU_MODIFIED_H  */
58 | 


--------------------------------------------------------------------------------
/src/policies/hemem-mmgr.h:
--------------------------------------------------------------------------------
 1 | #ifndef HEMEM_MMGR_H
 2 | #define HEMEM_MMGR_H
 3 | 
 4 | #include <stdint.h>
 5 | #include <inttypes.h>
 6 | #include <pthread.h>
 7 | 
 8 | #include "hemem.h"
 9 | #include "paging.h"
10 | 
11 | #define HEMEM_INTERVAL 10000ULL // in us
12 | 
13 | #define HEMEM_FASTFREE    (DRAMSIZE / 10)
14 | #define HEMEM_COOL_RATE   (10ULL * 1024ULL * 1024ULL * 1024ULL)
15 | #define HEMEM_THAW_RATE   (NVMSIZE + DRAMSIZE)
16 | 
17 | #define FASTMEM_HUGE_PAGES  ((DRAMSIZE) / (HUGEPAGE_SIZE))
18 | #define FASTMEM_BASE_PAGES  ((DRAMSIZE) / (BASEPAGE_SIZE))
19 | 
20 | #define SLOWMEM_HUGE_PAGES  ((NVMSIZE) / (HUGEPAGE_SIZE))
21 | #define SLOWMEM_BASE_PAGES  ((NVMSIZE) / (BASEPAGE_SIZE))
22 | 
23 | struct mmgr_node {
24 |   struct hemem_page *page;
25 |   uint64_t accesses, tot_accesses;
26 |   uint64_t offset;
27 |   struct mmgr_node *next, *prev;
28 |   struct mmgr_list *list;
29 | };
30 | 
31 | struct mmgr_list {
32 |   struct mmgr_node *first;
33 |   struct mmgr_node *last;
34 |   size_t numentries;
35 |   pthread_mutex_t list_lock;
36 | };
37 | 
38 | void *mmgr_kswapd(void);
39 | struct hemem_page* hemem_mmgr_pagefault();
40 | struct hemem_page* hemem_mmgr_pagefault_unlocked();
41 | void hemem_mmgr_init(void);
42 | void hemem_mmgr_remove_page(struct hemem_page *page);
43 | void hemem_mmgr_stats();
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/src/policies/lru.h:
--------------------------------------------------------------------------------
 1 | #ifndef HEMEM_LRU_H
 2 | #define HEMEM_LRU_H
 3 | 
 4 | #include <pthread.h>
 5 | #include <stdint.h>
 6 | #include <inttypes.h>
 7 | 
 8 | #include "../hemem.h"
 9 | #include "paging.h"
10 | 
11 | 
12 | #define KSCAND_INTERVAL   (50000) // in us (20ms)
13 | #define KSWAPD_INTERVAL   (1000000) // in us (1s)
14 | #define KSWAPD_MIGRATE_RATE  (50UL * 1024UL * 1024UL * 1024UL) // 50GB
15 | 
16 | void *lru_kswapd();
17 | struct hemem_page* lru_pagefault(void);
18 | struct hemem_page* lru_pagefault_unlocked(void);
19 | void lru_init(void);
20 | void lru_remove_page(struct hemem_page *page);
21 | void lru_stats();
22 | 
23 | 
24 | #endif /*  HEMEM_LRU_MODIFIED_H  */
25 | 


--------------------------------------------------------------------------------
/src/policies/paging.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <assert.h>
  4 | #include <unistd.h>
  5 | #include <sys/time.h>
  6 | #include <fcntl.h>
  7 | #include <sys/types.h>
  8 | #include <sys/stat.h>
  9 | #include <math.h>
 10 | #include <string.h>
 11 | #include <pthread.h>
 12 | #include <sys/mman.h>
 13 | #include <linux/userfaultfd.h>
 14 | #include <poll.h>
 15 | #include <sys/syscall.h>
 16 | #include <sys/ioctl.h>
 17 | #include <errno.h>
 18 | #include <stdint.h>
 19 | #include <inttypes.h>
 20 | 
 21 | #include "../hemem.h"
 22 | #include "../timer.h"
 23 | #include "paging.h"
 24 | #include "../interpose.h"
 25 | 
 26 | #if 0
 27 | 
 28 | uint64_t* va_to_pa(uint64_t va)
 29 | {
 30 |   uint64_t pt_base = ((uint64_t)(cr3 & ADDRESS_MASK));
 31 |   uint64_t *pgd;
 32 |   uint64_t *pud;
 33 |   uint64_t *pmd;
 34 |   uint64_t *pte; 
 35 |   uint64_t pgd_offset;
 36 |   uint64_t pud_offset;
 37 |   uint64_t pmd_offset;
 38 |   uint64_t pte_offset; 
 39 |   uint64_t pgd_entry;
 40 |   uint64_t pud_entry;
 41 |   uint64_t pmd_entry;
 42 |   uint64_t pte_entry;
 43 | 
 44 |   pgd = (devmem_mmap + pt_base);
 45 |   pgd_offset = (((va) >> HEMEM_PGDIR_SHIFT) & (HEMEM_PTRS_PER_PGD - 1));
 46 |   assert(pgd_offset < PAGE_SIZE);
 47 |   pgd_entry = *(pgd + pgd_offset);
 48 |   LOG("pgd_entry: %lx\n", pgd_entry);
 49 |   if (!((pgd_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
 50 |     LOG("hemem_va_to_pa: pgd not present: %016lx\n", pgd_entry);
 51 |     assert(0);
 52 |   }
 53 | 
 54 |   pud = (uint64_t*)(pgd_entry & ADDRESS_MASK);
 55 |   pud_offset = (((va) >> HEMEM_PUD_SHIFT) & (HEMEM_PTRS_PER_PUD - 1));
 56 |   assert(pud_offset < PAGE_SIZE);
 57 |   pud_entry = *(pud + pud_offset);
 58 |   LOG("pud_entry: %lx\n", pud_entry);
 59 |   if (!((pud_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
 60 |     LOG("hemem_va_to_pa: pud not present: %016lx\n", pud_entry);
 61 |     assert(0);
 62 |   }
 63 | 
 64 |   pmd = (uint64_t*)(pud_entry & ADDRESS_MASK);
 65 |   pmd_offset = (((va) >> HEMEM_PMD_SHIFT) & (HEMEM_PTRS_PER_PMD - 1));
 66 |   assert(pmd_offset < PAGE_SIZE);
 67 |   pmd_entry = *(pmd + pmd_offset);
 68 |   LOG("pmd_entry: %lx\n", pmd_entry);
 69 |   if (!((pmd_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
 70 |     LOG("hemem_va_to_pa: pmd not present: %016lx\n", pmd_entry);
 71 |     assert(0);
 72 |   }
 73 | 
 74 |   if ((pmd_entry & HEMEM_HUGEPAGE_FLAG) == HEMEM_HUGEPAGE_FLAG) {
 75 |     return pmd + pmd_offset;
 76 |   }
 77 | 
 78 |   pte = (uint64_t*)(pmd_entry & ADDRESS_MASK);
 79 |   pte_offset = (((va) >> HEMEM_PAGE_SHIFT) & (HEMEM_PTRS_PER_PTE - 1));
 80 |   assert(pte_offset < PAGE_SIZE);
 81 |   pte_entry = *(pte + pte_offset);
 82 |   LOG("pte_entry: %lx\n", pte_entry);
 83 |   if (!((pte_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
 84 |     LOG("hemem_va_to_pa: pte not present: %016lx\n", pte_entry);
 85 |     assert(0);
 86 |   }
 87 | 
 88 |   return pte + pte_offset;
 89 | }
 90 | 
 91 | void clear_bit(uint64_t va, uint64_t bit)
 92 | {
 93 |   uint64_t pt_base = ((uint64_t)(cr3 & ADDRESS_MASK));
 94 |   uint64_t *pgd;
 95 |   uint64_t *pud;
 96 |   uint64_t *pmd;
 97 |   uint64_t *pte; 
 98 |   uint64_t pgd_offset;
 99 |   uint64_t pud_offset;
100 |   uint64_t pmd_offset;
101 |   uint64_t pte_offset; 
102 |   uint64_t *pgd_entry;
103 |   uint64_t *pud_entry;
104 |   uint64_t *pmd_entry;
105 |   uint64_t *pte_entry;
106 | 
107 |   pgd = (devmem_mmap + pt_base);  
108 |   pgd_offset = (((va) >> HEMEM_PGDIR_SHIFT) & (HEMEM_PTRS_PER_PGD - 1));
109 |   assert(pgd_offset < PAGE_SIZE);
110 |   pgd_entry = (pgd + pgd_offset);
111 |   LOG("pgd_entry: %lx\n", *pgd_entry);
112 |   if (!((*pgd_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
113 |     LOG("clear_bit: pgd not present: %016lx\n", *pgd_entry);
114 |     //assert(0);
115 |     return;
116 |   }
117 | 
118 |   pud = (uint64_t*)(*pgd_entry & ADDRESS_MASK);
119 |   pud_offset = (((va) >> HEMEM_PUD_SHIFT) & (HEMEM_PTRS_PER_PUD - 1));
120 |   assert(pud_offset < PAGE_SIZE);
121 |   pud_entry = (pud + pud_offset);
122 |   LOG("pud_entry: %lx\n", *pud_entry);
123 |   if (!((*pud_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
124 |     LOG("clear_bit: pud not present: %016lx\n", *pud_entry);
125 |     //assert(0);
126 |     return;
127 |   }
128 | 
129 |   pmd = (uint64_t*)(*pud_entry & ADDRESS_MASK);
130 |   pmd_offset = (((va) >> HEMEM_PMD_SHIFT) & (HEMEM_PTRS_PER_PMD - 1));
131 |   assert(pmd_offset < PAGE_SIZE);
132 |   pmd_entry = (pmd + pmd_offset);
133 |   LOG("pmd_entry: %lx\n", *pmd_entry);
134 |   if (!((*pmd_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
135 |     LOG("clear_bit: pmd not present: %016lx\n", *pmd_entry);
136 |     //assert(0);
137 |     return;
138 |   }
139 | 
140 |   if ((*pmd_entry & HEMEM_HUGEPAGE_FLAG) == HEMEM_HUGEPAGE_FLAG) {
141 |     *pmd_entry = *pmd_entry & ~bit;
142 |     return;
143 |   }
144 | 
145 |   pte = (uint64_t*)(*pmd_entry & ADDRESS_MASK);
146 |   pte_offset = (((va) >> HEMEM_PAGE_SHIFT) & (HEMEM_PTRS_PER_PTE - 1));
147 |   assert(pte_offset < PAGE_SIZE);
148 |   pte_entry = (pte + pte_offset);
149 |   LOG("pte_entry: %lx\n", *pte_entry);
150 |   if (!((*pte_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
151 |     LOG("clear_bit: pte not present: %016lx\n", *pte_entry);
152 |     //assert(0);
153 |     return;
154 |   }
155 | 
156 |   *pte_entry = *pte_entry & ~bit;
157 | }
158 | 
159 | uint64_t get_bit(uint64_t va, uint64_t bit)
160 | {
161 |   uint64_t pt_base = ((uint64_t)(cr3 & ADDRESS_MASK));
162 |   uint64_t *pgd;
163 |   uint64_t *pud;
164 |   uint64_t *pmd;
165 |   uint64_t *pte; 
166 |   uint64_t pgd_offset;
167 |   uint64_t pud_offset;
168 |   uint64_t pmd_offset;
169 |   uint64_t pte_offset; 
170 |   uint64_t *pgd_entry;
171 |   uint64_t *pud_entry;
172 |   uint64_t *pmd_entry;
173 |   uint64_t *pte_entry;
174 | 
175 |   pgd = (devmem_mmap + pt_base);  
176 |   pgd_offset = (((va) >> HEMEM_PGDIR_SHIFT) & (HEMEM_PTRS_PER_PGD - 1));
177 |   assert(pgd_offset < PAGE_SIZE);
178 |   pgd_entry = (pgd + pgd_offset);
179 |   LOG("pgd_entry: %lx\n", *pgd_entry);
180 |   if (!((*pgd_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
181 |     LOG("set_bit: pgd not present: %016lx\n", *pgd_entry);
182 |     //assert(0);
183 |     return 0;
184 |   }
185 | 
186 |   pud = (uint64_t*)(*pgd_entry & ADDRESS_MASK);
187 |   pud_offset = (((va) >> HEMEM_PUD_SHIFT) & (HEMEM_PTRS_PER_PUD - 1));
188 |   assert(pud_offset < PAGE_SIZE);
189 |   pud_entry = (pud + pud_offset);
190 |   LOG("pud_entry: %lx\n", *pud_entry);
191 |   if (!((*pud_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
192 |     LOG("set_bit: pud not present: %016lx\n", *pud_entry);
193 |     //assert(0);
194 |     return 0;
195 |   }
196 | 
197 |   pmd = (uint64_t*)(*pud_entry & ADDRESS_MASK);
198 |   pmd_offset = (((va) >> HEMEM_PMD_SHIFT) & (HEMEM_PTRS_PER_PMD - 1));
199 |   assert(pmd_offset < PAGE_SIZE);
200 |   pmd_entry = (pmd + pmd_offset);
201 |   LOG("pmd_entry: %lx\n", *pmd_entry);
202 |   if (!((*pmd_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
203 |     LOG("set_bit: pmd not present: %016lx\n", *pmd_entry);
204 |     //assert(0);
205 |     return 0;
206 |   }
207 | 
208 |   if ((*pmd_entry & HEMEM_HUGEPAGE_FLAG) == HEMEM_HUGEPAGE_FLAG) {
209 |     return *pmd_entry & bit;
210 |   }
211 | 
212 |   pte = (uint64_t*)(*pmd_entry & ADDRESS_MASK);
213 |   pte_offset = (((va) >> HEMEM_PAGE_SHIFT) & (HEMEM_PTRS_PER_PTE - 1));
214 |   assert(pte_offset < PAGE_SIZE);
215 |   pte_entry = (pte + pte_offset);
216 |   LOG("pte_entry: %lx\n", *pte_entry);
217 |   if (!((*pte_entry & HEMEM_PRESENT_FLAG) == HEMEM_PRESENT_FLAG)) {
218 |     LOG("set_bit: pte not present: %016lx\n", *pte_entry);
219 |     //assert(0);
220 |     return 0;
221 |   }
222 | 
223 |   return *pte_entry & bit;
224 | }
225 | 
226 | 
227 | void clear_accessed_bit(uint64_t va)
228 | {
229 |   clear_bit(va, HEMEM_ACCESSED_FLAG);
230 | }
231 | 
232 | 
233 | uint64_t get_accessed_bit(uint64_t va)
234 | {
235 |   return get_bit(va, HEMEM_ACCESSED_FLAG);
236 | }
237 | 
238 | 
239 | void clear_dirty_bit(uint64_t va)
240 | {
241 |   clear_bit(va, HEMEM_DIRTY_FLAG);
242 | }
243 | 
244 | 
245 | uint64_t get_dirty_bit(uint64_t va)
246 | {
247 |   return get_bit(va, HEMEM_DIRTY_FLAG);
248 | }
249 | 
250 | #endif
251 | FILE *ptes, *pdes, *pdtpes, *pml4es, *valid;
252 | 
253 | 
254 | void scan_fourth_level(uint64_t pde, bool clear_flag, uint64_t flag)
255 | {
256 |   uint64_t *ptable4_ptr;
257 |   uint64_t *pte_ptr;
258 |   uint64_t pte;
259 | 
260 |   ptable4_ptr = libc_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, devmemfd, pde & ADDRESS_MASK);
261 |   if (ptable4_ptr == MAP_FAILED) {
262 |     perror("third level page table mmap");
263 |     assert(0);
264 |   }
265 | 
266 |   pte_ptr = (uint64_t*)ptable4_ptr;
267 |   for (int i = 0; i < 512; i++) {
268 |     pte = *pte_ptr;
269 |     fprintf(ptes, "%016lx\n", pte);
270 | 
271 |     if (((pte & FLAGS_MASK) & HEMEM_PAGE_WALK_FLAGS) == HEMEM_PAGE_WALK_FLAGS) {
272 |       if (((pte & FLAGS_MASK) & HEMEM_PWTPCD_FLAGS) == 0) {
273 |         fprintf(valid, "pte[%x]:   %016lx\n", i, pte);
274 | 
275 |         if (clear_flag) {
276 |           pte = pte & ~flag;
277 |         }
278 |       }
279 |     }
280 | 
281 |     pte_ptr++;
282 |   }
283 | 
284 |   munmap(ptable4_ptr, PAGE_SIZE);
285 | }
286 | 
287 | 
288 | void scan_third_level(uint64_t pdtpe, bool clear_flag, uint64_t flag)
289 | {
290 |   uint64_t *ptable3_ptr;
291 |   uint64_t *pde_ptr;
292 |   uint64_t pde;
293 | 
294 |   ptable3_ptr = libc_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, devmemfd, pdtpe & ADDRESS_MASK);
295 |   if (ptable3_ptr == MAP_FAILED) {
296 |     perror("third level page table mmap");
297 |     assert(0);
298 |   }
299 | 
300 |   pde_ptr = (uint64_t*)ptable3_ptr;
301 |   for (int i = 0; i < 512; i++) {
302 |     pde = *pde_ptr;
303 |     fprintf(pdes, "%016lx\n", pde);
304 | 
305 |     if (((pde & FLAGS_MASK) & HEMEM_PAGE_WALK_FLAGS) == HEMEM_PAGE_WALK_FLAGS) {
306 |       if (((pde & FLAGS_MASK) & HEMEM_PWTPCD_FLAGS) == 0) {
307 |         fprintf(valid, "pde[%x]:   %016lx\n", i, pde);
308 |         scan_fourth_level(pde, clear_flag, flag);
309 |       }
310 |     }
311 | 
312 |     pde_ptr++;
313 |   }
314 | 
315 |   munmap(ptable3_ptr, PAGE_SIZE);
316 | }
317 | 
318 | 
319 | void scan_second_level(uint64_t pml4e, bool clear_flag, uint64_t flag)
320 | {
321 |   uint64_t *ptable2_ptr;
322 |   uint64_t *pdtpe_ptr;
323 |   uint64_t pdtpe;
324 | 
325 |   ptable2_ptr = libc_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, devmemfd, pml4e & ADDRESS_MASK);
326 |   if (ptable2_ptr == MAP_FAILED) {
327 |     perror("second level page table mmap");
328 |     assert(0);
329 |   }
330 | 
331 |   pdtpe_ptr = (uint64_t*)ptable2_ptr;
332 |   for (int i = 0; i < 512; i++) {
333 |     pdtpe = *pdtpe_ptr;
334 |     fprintf(pdtpes, "%016lx\n", pdtpe);
335 | 
336 |     if (((pdtpe & FLAGS_MASK) & HEMEM_PAGE_WALK_FLAGS) == HEMEM_PAGE_WALK_FLAGS) {
337 |       if (((pdtpe & FLAGS_MASK) & HEMEM_PWTPCD_FLAGS) == 0) {
338 |         fprintf(valid, "pdtpe[%x]: %016lx\n", i, pdtpe);
339 |         scan_third_level(pdtpe, clear_flag, flag);
340 |       }
341 |     }
342 | 
343 |     pdtpe_ptr++;
344 |   }
345 | 
346 |   munmap(ptable2_ptr, PAGE_SIZE);
347 | }
348 | 
349 | 
350 | void _scan_pagetable(bool clear_flag, uint64_t flag)
351 | {
352 |   int *rootptr;
353 |   uint64_t *pml4e_ptr;
354 |   uint64_t pml4e;
355 | 
356 |   pml4es = fopen("logs/pml4es.txt", "w+");
357 |   if (pml4es == NULL) {
358 |     perror("pml4e file open");
359 |     assert(0);
360 |   }
361 |   
362 |   pdtpes = fopen("logs/pdtpes.txt", "w+");
363 |   if (pdtpes == NULL) {
364 |     perror("pdtpes open");
365 |     assert(0);
366 |   }
367 | 
368 |   pdes = fopen("logs/pdes.txt", "w+");
369 |   if (pdes == NULL) {
370 |     perror("pdes open");
371 |     assert(0);
372 |   }
373 |   
374 |   ptes = fopen("logs/ptes.txt", "w+");
375 |   if (ptes == NULL) {
376 |     perror("ptes open");
377 |     assert(0);
378 |   }
379 | 
380 |   valid = fopen("logs/valid.txt", "w+");
381 |   if (valid == NULL) {
382 |     perror("valid open");
383 |     assert(0);
384 |   }
385 | 
386 |   rootptr = libc_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, devmemfd, cr3 & ADDRESS_MASK);
387 |   if (rootptr == MAP_FAILED) {
388 |     perror("/dev/mem mmap");
389 |     assert(0);
390 |   }
391 | 
392 |   pml4e_ptr = (uint64_t*)rootptr;
393 |   for (int i = 0; i < 512; i++) {
394 |     pml4e = *pml4e_ptr;
395 |     fprintf(pml4es, "%016lx\n", pml4e);
396 | 
397 |     if (((pml4e & FLAGS_MASK) & HEMEM_PAGE_WALK_FLAGS) == HEMEM_PAGE_WALK_FLAGS) {
398 |       if (((pml4e & FLAGS_MASK) & HEMEM_PWTPCD_FLAGS) == 0) {
399 |         fprintf(valid, "pml4e[%x]: %016lx\n", i, pml4e);
400 |         scan_second_level(pml4e, clear_flag, flag); 
401 |       }
402 |     }
403 |     pml4e_ptr++;
404 |   }
405 | 
406 |   munmap(rootptr, PAGE_SIZE);
407 | }
408 | 
409 | void scan_pagetable()
410 | {
411 |   _scan_pagetable(false, 0);
412 | }
413 | 
414 | #ifdef EXAMINE_PGTABLES
415 | void *examine_pagetables()
416 | {
417 |   FILE *maps;
418 |   int pagemaps;
419 |   FILE *kpageflags;
420 |   char *line = NULL;
421 |   ssize_t nread;
422 |   size_t len;
423 |   uint64_t vm_start, vm_end;
424 |   int n, num_pages;
425 |   long index;
426 |   off_t o;
427 |   ssize_t t;
428 |   struct pagemapEntry entry;
429 |   int maps_copy;
430 |   ssize_t nwritten;
431 |   FILE *pfn_file;
432 |   uint64_t num_pfn = 0;
433 | 
434 |   maps = fopen("/proc/self/maps", "r");
435 |   if (maps == NULL) {
436 |     perror("/proc/self/maps fopen");
437 |     assert(0);
438 |   }
439 | 
440 |   pagemaps = open("/proc/self/pagemap", O_RDONLY);
441 |   if (pagemaps == -1) {
442 |     perror("/proc/self/pagemap fopen");
443 |     ignore_this_mamp = true;
444 |     assert(0);
445 |   }
446 | 
447 |   maps_copy = open("logs/maps.txt", O_CREAT | O_RDWR);
448 |   if (maps_copy == -1) {
449 |     perror("map.txt open");
450 |     assert(0);
451 |   }
452 | 
453 |   kpageflags = fopen("/proc/kpageflags", "r");
454 |   if (kpageflags == NULL) {
455 |     perror("/proc/kpageflags fopen");
456 |     assert(0);
457 |   }
458 | 
459 |   pfn_file = fopen("logs/pfn.txt", "w+");
460 |   if (pfn_file == NULL) {
461 |     perror("pfn.txt open");
462 |     assert(0);
463 |   }
464 | 
465 |   nread = getline(&line, &len, maps);
466 |   while (nread != -1) {
467 |     nwritten = write(maps_copy, line, nread);
468 |     if (nwritten < 0) {
469 |       perror("maps_copy write");
470 |       assert(0);
471 |     }
472 |     if (strstr(line, DRAMPATH) != NULL) {
473 |       n = sscanf(line, "%lX-%lX", &vm_start, &vm_end);
474 |       if (n != 2) {
475 |         fprintf(stderr, "error, invalid line: %s\n", line);
476 |         assert(0);
477 |       }
478 | 
479 |       num_pages = (vm_end - vm_start) / PAGE_SIZE;
480 |       if (num_pages > 0) {
481 |         index = (vm_start / PAGE_SIZE) * sizeof(uint64_t);
482 | 
483 |         o = lseek(pagemaps, index, SEEK_SET);
484 |         if (o != index) {
485 |           perror("pagemaps lseek");
486 |           assert(0);
487 |         }
488 | 
489 |         while (num_pages > 0) {
490 |           uint64_t pfn;
491 |           t = read(pagemaps, &pfn, sizeof(uint64_t));
492 |           if (t < 0) {
493 |             perror("pagemaps read");
494 |             assert(0);
495 |           }
496 | 
497 |           entry.pfn = pfn & 0x7ffffffffffff;
498 |           entry.soft_dirty = (pfn >> 55) & 1;
499 |           entry.exclusive = (pfn >> 56) & 1;
500 |           entry.file_page = (pfn >> 61) & 1;
501 |           entry.swapped = (pfn >> 62) & 1;
502 |           entry.present = (pfn >> 63) & 1;
503 | 
504 |           fprintf(pfn_file, "DRAM: %016lX\n", (entry.pfn * sysconf(_SC_PAGESIZE)));
505 |           num_pages--;
506 |     num_pfn++;
507 |         }
508 |       }
509 |     }
510 |     else if (strstr(line, NVMPATH) != NULL) {
511 |       n = sscanf(line, "%lX-%lX", &vm_start, &vm_end);
512 |       if (n != 2) {
513 |         fprintf(stderr, "error, invalid line: %s\n", line);
514 |         assert(0);
515 |       }
516 | 
517 |       num_pages = (vm_end - vm_start) / PAGE_SIZE;
518 |       if (num_pages > 0) {
519 |         index = (vm_start / PAGE_SIZE) * sizeof(uint64_t);
520 | 
521 |         o = lseek(pagemaps, index, SEEK_SET);
522 |         if (o != index) {
523 |           perror("pagemaps lseek");
524 |           assert(0);
525 |         }
526 | 
527 |         while (num_pages > 0) {
528 |           uint64_t pfn;
529 |           t = read(pagemaps, &pfn, sizeof(uint64_t));
530 |           if (t < 0) {
531 |             perror("pagemaps read");
532 |             assert(0);
533 |           }
534 | 
535 |           entry.pfn = pfn & 0x7ffffffffffff;
536 |           entry.soft_dirty = (pfn >> 55) & 1;
537 |           entry.exclusive = (pfn >> 56) & 1;
538 |           entry.file_page = (pfn >> 61) & 1;
539 |           entry.swapped = (pfn >> 62) & 1;
540 |           entry.present = (pfn >> 63) & 1;
541 | 
542 |           fprintf(pfn_file, "NVM:  %016lX\n", (entry.pfn * sysconf(_SC_PAGE_SIZE)));
543 |           num_pages--;
544 |     num_pfn++;
545 |         }
546 |       }
547 |     }
548 |     nread = getline(&line, &len, maps);
549 |   }
550 | 
551 |   fclose(maps);
552 |   close(pagemaps);
553 |   fclose(kpageflags);
554 |   close(maps_copy);
555 |   fclose(pfn_file);
556 | 
557 |   return 0;
558 | }
559 | #endif
560 | 
561 | 


--------------------------------------------------------------------------------
/src/policies/paging.h:
--------------------------------------------------------------------------------
 1 | #ifndef HEMEM_PAGING_H
 2 | #define HEMEM_PAGING_H
 3 | 
 4 | #include <pthread.h>
 5 | #include <stdint.h>
 6 | #include <inttypes.h>
 7 | 
 8 | #include "../hemem.h"
 9 | 
10 | 
11 | #define ADDRESS_MASK  ((uint64_t)0x00000ffffffff000UL)
12 | #define FLAGS_MASK  ((uint64_t)0x0000000000000fffUL)
13 | 
14 | #define HEMEM_PRESENT_FLAG  ((uint64_t)0x0000000000000001UL)
15 | #define HEMEM_WRITE_FLAG  ((uint64_t)0x0000000000000002UL)
16 | #define HEMEM_USER_FLAG   ((uint64_t)0x0000000000000004UL)
17 | #define HEMEM_PWT_FLAG    ((uint64_t)0x0000000000000008UL)
18 | #define HEMEM_PCD_FLAG    ((uint64_t)0x0000000000000010UL)
19 | #define HEMEM_ACCESSED_FLAG ((uint64_t)0x0000000000000020UL)
20 | #define HEMEM_DIRTY_FLAG  ((uint64_t)0x0000000000000040UL)
21 | #define HEMEM_HUGEPAGE_FLAG ((uint64_t)0x0000000000000080UL)
22 | 
23 | 
24 | #define HEMEM_PAGE_WALK_FLAGS (HEMEM_PRESENT_FLAG |   \
25 |                HEMEM_WRITE_FLAG | \
26 |          HEMEM_USER_FLAG |  \
27 |          HEMEM_ACCESSED_FLAG |  \
28 |          HEMEM_DIRTY_FLAG)
29 | 
30 | #define HEMEM_PWTPCD_FLAGS  (HEMEM_PWT_FLAG | HEMEM_PCD_FLAG)
31 | 
32 | #define HEMEM_PGDIR_SHIFT 39
33 | #define HEMEM_PTRS_PER_PGD  512
34 | #define HEMEM_PUD_SHIFT   30
35 | #define HEMEM_PTRS_PER_PUD  512
36 | #define HEMEM_PMD_SHIFT   21
37 | #define HEMEM_PTRS_PER_PMD  512
38 | #define HEMEM_PAGE_SHIFT  12
39 | #define HEMEM_PTRS_PER_PTE  512
40 | 
41 | //#define EXAMINE_PGTABLES
42 | 
43 | 
44 | void scan_pagetable();
45 | void _scan_pagetable(bool clear_flag, uint64_t flag);
46 | 
47 | //void clear_accessed_bit(uint64_t pa);
48 | //uint64_t get_accessed_bit(uint64_t pa);
49 | //void clear_dirty_bit(uint64_t pa);
50 | //uint64_t get_dirty_bit(uint64_t pa);
51 | //
52 | //uint64_t* va_to_pa(uint64_t va);
53 | 
54 | #ifdef EXAMINE_PGTABLES
55 | 
56 | struct pagemapEntry {
57 |   uint64_t pfn : 54;
58 |   unsigned int soft_dirty : 1;
59 |   unsigned int exclusive : 1;
60 |   unsigned int file_page : 1;
61 |   unsigned int swapped : 1;
62 |   unsigned int present : 1;
63 | };
64 | 
65 | void *examine_pagetables();
66 | 
67 | #endif /*EXAMINE_PGTABLES*/
68 | 
69 | #endif /* HEMEM_PAGING_H */
70 | 
71 | 


--------------------------------------------------------------------------------
/src/policies/simple.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * =====================================================================================
  3 |  *
  4 |  *       Filename:  simple.c
  5 |  *
  6 |  *    Description:  
  7 |  *
  8 |  *        Version:  1.0
  9 |  *        Created:  02/04/2020 09:58:58 AM
 10 |  *       Revision:  none
 11 |  *       Compiler:  gcc
 12 |  *
 13 |  *         Author:  YOUR NAME (), 
 14 |  *   Organization:  
 15 |  *
 16 |  * =====================================================================================
 17 |  */
 18 | #include <stdlib.h>
 19 | #include <pthread.h>
 20 | #include <stdint.h>
 21 | #include <inttypes.h>
 22 | #include <stdbool.h>
 23 | #include <pthread.h>
 24 | #include <assert.h>
 25 | #include <sys/time.h>
 26 | 
 27 | #include "../hemem.h"
 28 | #include "paging.h"
 29 | #include "../timer.h"
 30 | #include "../fifo.h"
 31 | 
 32 | uint64_t fastmem = 0;
 33 | uint64_t slowmem = 0;
 34 | bool slowmem_switch = false;
 35 | 
 36 | static struct fifo_list dram_free, nvm_free;
 37 | 
 38 | void simple_remove_page(struct hemem_page *page)
 39 | {
 40 |   if (page->in_dram) {
 41 |     page->present = false;
 42 |     enqueue_fifo(&dram_free, page);
 43 |     fastmem -= PAGE_SIZE;
 44 |   }
 45 |   else {
 46 |     page->present = false;
 47 |     enqueue_fifo(&nvm_free, page);
 48 |     slowmem -= PAGE_SIZE;
 49 |   }
 50 | }
 51 | 
 52 | struct hemem_page* simple_pagefault(void)
 53 | {
 54 |   struct timeval start, end;
 55 |   struct hemem_page *page;
 56 | 
 57 |   gettimeofday(&start, NULL);
 58 | 
 59 |   page = dequeue_fifo(&dram_free);
 60 |   if (page != NULL) {
 61 |     assert(!page->present);
 62 |     page->present = true;
 63 |     fastmem += PAGE_SIZE;  
 64 |   }
 65 |   else {
 66 |     assert(slowmem < NVMSIZE);
 67 |     page = dequeue_fifo(&nvm_free);
 68 |     
 69 |     assert(page != NULL);
 70 |     assert(!page->present);
 71 | 
 72 |     page->present = true;
 73 |     slowmem += PAGE_SIZE;
 74 |   }
 75 |   gettimeofday(&end, NULL);
 76 |   LOG_TIME("mem_policy_allocate_page: %f s\n", elapsed(&start, &end));
 77 |   
 78 |   return page;
 79 | }
 80 | 
 81 | void simple_init(void)
 82 | {
 83 |   pthread_mutex_init(&(dram_free.list_lock), NULL);
 84 |   for (int i = 0; i < DRAMSIZE / PAGE_SIZE; i++) {
 85 |     struct hemem_page *p = calloc(1, sizeof(struct hemem_page));
 86 |     p->devdax_offset = i * PAGE_SIZE;
 87 |     p->present = false;
 88 |     p->in_dram = true;
 89 |     p->pt = pagesize_to_pt(PAGE_SIZE);
 90 |     pthread_mutex_init(&(p->page_lock), NULL);
 91 |     enqueue_fifo(&dram_free, p);
 92 |   }
 93 | 
 94 |   pthread_mutex_init(&(nvm_free.list_lock), NULL);
 95 |   for (int i = 0; i < NVMSIZE / PAGE_SIZE; i++) {
 96 |     struct hemem_page *p = calloc(1, sizeof(struct hemem_page));
 97 |     p->devdax_offset = i * PAGE_SIZE;
 98 |     p->present = false;
 99 |     p->in_dram = false;
100 |     p->pt = pagesize_to_pt(PAGE_SIZE);
101 |     pthread_mutex_init(&(p->page_lock), NULL);
102 |     enqueue_fifo(&nvm_free, p);
103 |   }
104 |   LOG("Memory management policy is simple\n");
105 | }
106 | 
107 | void simple_stats()
108 | {
109 |   LOG_STATS("\tfastmem_allocated: [%ld]\tslowmem_allocated: [%ld]\n", fastmem, slowmem);
110 | }
111 | 


--------------------------------------------------------------------------------
/src/policies/simple.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * =====================================================================================
 3 |  *
 4 |  *       Filename:  simple.h
 5 |  *
 6 |  *    Description:  
 7 |  *
 8 |  *        Version:  1.0
 9 |  *        Created:  02/04/2020 09:56:26 AM
10 |  *       Revision:  none
11 |  *       Compiler:  gcc
12 |  *
13 |  *         Author:  YOUR NAME (), 
14 |  *   Organization:  
15 |  *
16 |  * =====================================================================================
17 |  */
18 | #ifndef HEMEM_SIMPLE_H
19 | #define HEMEM_SIMPLE_H
20 | 
21 | #include <stdint.h>
22 | #include <stdbool.h>
23 | 
24 | #include "../hemem.h"
25 | #include "paging.h"
26 | 
27 | struct hemem_page* simple_pagefault(void);
28 | void simple_init(void);
29 | void simple_remove_page(struct hemem_page *page);
30 | void simple_stats();
31 | 
32 | #endif // HEMEM_SIMPLE_H
33 | 


--------------------------------------------------------------------------------
/src/spsc-ring.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdint.h>
  3 | #include <stddef.h>
  4 | #include <assert.h>
  5 | 
  6 | #include "spsc-ring.h"
  7 | 
  8 | struct ring_buf_t {
  9 | 	uint64_t** buffer;
 10 | 	size_t head;
 11 | 	size_t tail;
 12 | 	size_t capacity;
 13 | };
 14 | 
 15 | static void advance_pointer(ring_handle_t rbuf)
 16 | {
 17 | 	assert(rbuf);
 18 | 
 19 | 	if(ring_buf_full(rbuf))
 20 |     {
 21 |     	if(++(rbuf->tail) == rbuf->capacity)
 22 |     	{
 23 |     		rbuf->tail = 0;
 24 |     	}
 25 |     }
 26 | 
 27 |     if(++(rbuf->head) == rbuf->capacity)
 28 | 	{
 29 | 		rbuf->head = 0;
 30 | 	}
 31 | }
 32 | 
 33 | static void retreat_pointer(ring_handle_t rbuf)
 34 | {
 35 | 	assert(rbuf);
 36 | 
 37 | 	if(++(rbuf->tail) == rbuf->capacity)
 38 | 	{
 39 | 		rbuf->tail = 0;
 40 | 	}
 41 | }
 42 | 
 43 | ring_handle_t ring_buf_init(uint64_t** buffer, size_t size)
 44 | {
 45 | 	assert(buffer && size);
 46 | 
 47 | 	ring_handle_t rbuf = malloc(sizeof(ring_buf_t));
 48 | 	assert(rbuf);
 49 | 
 50 | 	rbuf->buffer = buffer;
 51 | 	rbuf->capacity = size;
 52 | 	ring_buf_reset(rbuf);
 53 | 
 54 | 	assert(ring_buf_empty(rbuf));
 55 | 
 56 | 	return rbuf;
 57 | }
 58 | 
 59 | void ring_buf_free(ring_handle_t rbuf)
 60 | {
 61 | 	assert(rbuf);
 62 | 	free(rbuf);
 63 | }
 64 | 
 65 | void ring_buf_reset(ring_handle_t rbuf)
 66 | {
 67 |     assert(rbuf);
 68 | 
 69 |     rbuf->head = 0;
 70 |     rbuf->tail = 0;
 71 | }
 72 | 
 73 | size_t ring_buf_size(ring_handle_t rbuf)
 74 | {
 75 | 	assert(rbuf);
 76 | 
 77 | 	size_t size = rbuf->capacity;
 78 | 
 79 | 	if(!ring_buf_full(rbuf))
 80 | 	{
 81 | 		if(rbuf->head >= rbuf->tail)
 82 | 		{
 83 | 			size = (rbuf->head - rbuf->tail);
 84 | 		}
 85 | 		else
 86 | 		{
 87 | 			size = (rbuf->capacity + rbuf->head - rbuf->tail);
 88 | 		}
 89 | 
 90 | 	}
 91 | 
 92 | 	return size;
 93 | }
 94 | 
 95 | size_t ring_buf_capacity(ring_handle_t rbuf)
 96 | {
 97 | 	assert(rbuf);
 98 | 
 99 | 	return rbuf->capacity;
100 | }
101 | 
102 | void ring_buf_put(ring_handle_t rbuf, uint64_t* data)
103 | {
104 | 	assert(rbuf && rbuf->buffer);
105 | 
106 |     rbuf->buffer[rbuf->head] = data;
107 | 
108 |     advance_pointer(rbuf);
109 | }
110 | 
111 | int ring_buf_put2(ring_handle_t rbuf, uint64_t* data)
112 | {
113 |     int r = -1;
114 | 
115 |     assert(rbuf && rbuf->buffer);
116 | 
117 |     if(!ring_buf_full(rbuf))
118 |     {
119 |         rbuf->buffer[rbuf->head] = data;
120 |         advance_pointer(rbuf);
121 |         r = 0;
122 |     }
123 | 
124 |     return r;
125 | }
126 | 
127 | uint64_t* ring_buf_get(ring_handle_t rbuf)
128 | {
129 |     assert(rbuf && rbuf->buffer);
130 |     uint64_t* cur_read;
131 | 
132 |     if(!ring_buf_empty(rbuf))
133 |     {
134 |         cur_read = rbuf->buffer[rbuf->tail];
135 |         retreat_pointer(rbuf);
136 | 
137 |         return cur_read;
138 |     }
139 | 
140 |     return NULL;
141 | }
142 | 
143 | bool ring_buf_empty(ring_handle_t rbuf)
144 | {
145 | 	assert(rbuf);
146 | 
147 |     return (!ring_buf_full(rbuf) && (rbuf->head == rbuf->tail));
148 | }
149 | 
150 | bool ring_buf_full(ring_buf_t* rbuf)
151 | {
152 | 	size_t head = rbuf->head + 1;
153 | 	if(head == rbuf->capacity)
154 | 	{
155 | 		head = 0;
156 | 	}
157 | 
158 | 	return head == rbuf->tail;
159 | }
160 | 


--------------------------------------------------------------------------------
/src/spsc-ring.h:
--------------------------------------------------------------------------------
 1 | #ifndef SPSC_RING_H
 2 | #define SPSC_RING_H
 3 | 
 4 | #include <stdbool.h>
 5 | 
 6 | typedef struct ring_buf_t ring_buf_t;
 7 | 
 8 | typedef ring_buf_t* ring_handle_t;
 9 | 
10 | ring_handle_t ring_buf_init(uint64_t** buffer, size_t size);
11 | void ring_buf_free(ring_handle_t rbuf);
12 | void ring_buf_reset(ring_handle_t rbuf);
13 | void ring_buf_put(ring_handle_t rbuf, uint64_t* data);
14 | int ring_buf_put2(ring_handle_t rbuf, uint64_t* data);
15 | uint64_t* ring_buf_get(ring_handle_t rbuf);
16 | bool ring_buf_empty(ring_handle_t rbuf);
17 | bool ring_buf_full(ring_handle_t rbuf);
18 | size_t ring_buf_capacity(ring_handle_t rbuf);
19 | size_t ring_buf_size(ring_handle_t rbuf);
20 | 
21 | #endif //SPSC_RING_H
22 | 


--------------------------------------------------------------------------------
/src/timer.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <unistd.h>
 4 | #include <sys/time.h>
 5 | #include <sys/types.h>
 6 | 
 7 | #include "timer.h"
 8 | 
 9 | /* Useful for doing arithmetic on struct timevals. M*/
10 | void timeDiff(struct timeval *d, struct timeval *a, struct timeval *b)
11 | {
12 |   d->tv_sec = a->tv_sec - b->tv_sec;
13 |   d->tv_usec = a->tv_usec - b->tv_usec;
14 |   if (d->tv_usec < 0) {
15 |     d->tv_sec -= 1;
16 |     d->tv_usec += 1000000;
17 |   }
18 | }
19 | 
20 | 
21 | /* Return the no. of elapsed seconds between Starttime and Endtime. */
22 | double elapsed(struct timeval *starttime, struct timeval *endtime)
23 | {
24 |   struct timeval diff;
25 | 
26 |   timeDiff(&diff, endtime, starttime);
27 |   return tv_to_double(diff);
28 | }
29 | 
30 | long clock_time_elapsed(struct timespec start, struct timespec end)
31 | {
32 |     long seconds = end.tv_sec - start.tv_sec;
33 |     long nanoseconds = end.tv_nsec - start.tv_nsec;
34 |     return seconds * 1000000000 + nanoseconds;
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/src/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef HEMEM_TIMER_H
 2 | #define HEMEM_TIMER_H
 3 | 
 4 | /* Returns the number of seconds encoded in T, a "struct timeval". */
 5 | #define tv_to_double(t) (t.tv_sec + (t.tv_usec / 1000000.0))
 6 | 
 7 | void timeDiff(struct timeval *d, struct timeval *a, struct timeval *b);
 8 | double elapsed(struct timeval *starttime, struct timeval *endtime);
 9 | long clock_time_elapsed(struct timespec start, struct timespec end);
10 | 
11 | #endif /* HEMEM_TIMER_H */
12 | 


--------------------------------------------------------------------------------
/src/userfaultfd.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
  2 | /*
  3 |  *  include/linux/userfaultfd.h
  4 |  *
  5 |  *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
  6 |  *  Copyright (C) 2015  Red Hat, Inc.
  7 |  *
  8 |  */
  9 | 
 10 | #ifndef _LINUX_USERFAULTFD_H
 11 | #define _LINUX_USERFAULTFD_H
 12 | 
 13 | #include <linux/types.h>
 14 | 
 15 | /*
 16 |  * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
 17 |  * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR.  In
 18 |  * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
 19 |  * means the userland is reading).
 20 |  */
 21 | #define UFFD_API ((__u64)0xAA)
 22 | #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP |	\
 23 | 			   UFFD_FEATURE_EVENT_FORK |		\
 24 | 			   UFFD_FEATURE_EVENT_REMAP |		\
 25 | 			   UFFD_FEATURE_EVENT_REMOVE |	\
 26 | 			   UFFD_FEATURE_EVENT_UNMAP |		\
 27 | 			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
 28 | 			   UFFD_FEATURE_MISSING_SHMEM |		\
 29 | 			   UFFD_FEATURE_SIGBUS |		\
 30 | 			   UFFD_FEATURE_THREAD_ID)
 31 | #define UFFD_API_IOCTLS				\
 32 | 	((__u64)1 << _UFFDIO_REGISTER |		\
 33 | 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
 34 | 	 (__u64)1 << _UFFDIO_API)
 35 | #define UFFD_API_RANGE_IOCTLS			\
 36 | 	((__u64)1 << _UFFDIO_WAKE |		\
 37 | 	 (__u64)1 << _UFFDIO_COPY |		\
 38 | 	 (__u64)1 << _UFFDIO_ZEROPAGE |		\
 39 | 	 (__u64)1 << _UFFDIO_WRITEPROTECT)
 40 | #define UFFD_API_RANGE_IOCTLS_BASIC		\
 41 | 	((__u64)1 << _UFFDIO_WAKE |		\
 42 | 	 (__u64)1 << _UFFDIO_COPY |		\
 43 | 	 (__u64)1 << _UFFDIO_TLBFLUSH | \
 44 |    (__u64)1 << _UFFDIO_CR3      | \
 45 |    (__u64)1 << _UFFDIO_GET_FLAG | \
 46 |    (__u64)1 << _UFFDIO_CLEAR_FLAG | \
 47 |    (__u64)1 << _UFFDIO_DMA_COPY | \
 48 |    (__u64)1 << _UFFDIO_DMA_REQUEST_CHANNS | \
 49 |    (__u64)1 << _UFFDIO_DMA_RELEASE_CHANNS)
 50 | 
 51 | /*
 52 |  * Valid ioctl command number range with this API is from 0x00 to
 53 |  * 0x3F.  UFFDIO_API is the fixed number, everything else can be
 54 |  * changed by implementing a different UFFD_API. If sticking to the
 55 |  * same UFFD_API more ioctl can be added and userland will be aware of
 56 |  * which ioctl the running kernel implements through the ioctl command
 57 |  * bitmask written by the UFFDIO_API.
 58 |  */
 59 | #define _UFFDIO_REGISTER		(0x00)
 60 | #define _UFFDIO_UNREGISTER		(0x01)
 61 | #define _UFFDIO_WAKE			(0x02)
 62 | #define _UFFDIO_COPY			(0x03)
 63 | #define _UFFDIO_ZEROPAGE		(0x04)
 64 | #define _UFFDIO_WRITEPROTECT		(0x06)
 65 | #define _UFFDIO_API			(0x3F)
 66 | #define _UFFDIO_TLBFLUSH		(0x08)
 67 | #define _UFFDIO_CR3        (0x0a)
 68 | #define _UFFDIO_GET_FLAG  (0x0b)
 69 | #define _UFFDIO_CLEAR_FLAG  (0x0c)
 70 | #define _UFFDIO_DMA_COPY  (0x0d)
 71 | #define _UFFDIO_DMA_REQUEST_CHANNS  (0x0e)
 72 | #define _UFFDIO_DMA_RELEASE_CHANNS  (0x0f)
 73 | 
 74 | 
 75 | /* userfaultfd ioctl ids */
 76 | #define UFFDIO 0xAA
 77 | #define UFFDIO_API		_IOWR(UFFDIO, _UFFDIO_API,	\
 78 | 				      struct uffdio_api)
 79 | #define UFFDIO_REGISTER		_IOWR(UFFDIO, _UFFDIO_REGISTER, \
 80 | 				      struct uffdio_register)
 81 | #define UFFDIO_UNREGISTER	_IOR(UFFDIO, _UFFDIO_UNREGISTER,	\
 82 | 				     struct uffdio_range)
 83 | #define UFFDIO_WAKE		_IOR(UFFDIO, _UFFDIO_WAKE,	\
 84 | 				     struct uffdio_range)
 85 | #define UFFDIO_COPY		_IOWR(UFFDIO, _UFFDIO_COPY,	\
 86 | 				      struct uffdio_copy)
 87 | #define UFFDIO_ZEROPAGE		_IOWR(UFFDIO, _UFFDIO_ZEROPAGE,	\
 88 | 				      struct uffdio_zeropage)
 89 | #define UFFDIO_WRITEPROTECT	_IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
 90 | 				      struct uffdio_writeprotect)
 91 | #define UFFDIO_TLBFLUSH		_IOR(UFFDIO, _UFFDIO_TLBFLUSH,	\
 92 | 				      struct uffdio_range)
 93 | #define UFFDIO_CR3       _IOR(UFFDIO, _UFFDIO_CR3,      \
 94 |               struct uffdio_cr3)
 95 | #define UFFDIO_GET_FLAG   _IOWR(UFFDIO, _UFFDIO_GET_FLAG, \
 96 |               struct uffdio_page_flags)
 97 | #define UFFDIO_CLEAR_FLAG   _IOWR(UFFDIO, _UFFDIO_CLEAR_FLAG, \
 98 |               struct uffdio_page_flags)
 99 | #define UFFDIO_DMA_COPY		_IOWR(UFFDIO, _UFFDIO_DMA_COPY,	\
100 | 				      struct uffdio_dma_copy)
101 | #define UFFDIO_DMA_REQUEST_CHANNS		_IOWR(UFFDIO, _UFFDIO_DMA_REQUEST_CHANNS,	\
102 | 				      struct uffdio_dma_channs)
103 | #define UFFDIO_DMA_RELEASE_CHANNS		_IOWR(UFFDIO, _UFFDIO_DMA_RELEASE_CHANNS, \
104 |                        struct uffdio_dma_channs)
105 | 
106 | /* read() structure */
107 | struct uffd_msg {
108 | 	__u8	event;
109 | 
110 | 	__u8	reserved1;
111 | 	__u16	reserved2;
112 | 	__u32	reserved3;
113 | 
114 | 	union {
115 | 		struct {
116 | 			__u64	flags;
117 | 			__u64	address;
118 | 			union {
119 | 				__u32 ptid;
120 | 			} feat;
121 | 		} pagefault;
122 | 
123 | 		struct {
124 | 			__u32	ufd;
125 | 		} fork;
126 | 
127 | 		struct {
128 | 			__u64	from;
129 | 			__u64	to;
130 | 			__u64	len;
131 | 		} remap;
132 | 
133 | 		struct {
134 | 			__u64	start;
135 | 			__u64	end;
136 | 		} remove;
137 | 
138 | 		struct {
139 | 			/* unused reserved fields */
140 | 			__u64	reserved1;
141 | 			__u64	reserved2;
142 | 			__u64	reserved3;
143 | 		} reserved;
144 | 	} arg;
145 | } __packed;
146 | 
147 | /*
148 |  * Start at 0x12 and not at 0 to be more strict against bugs.
149 |  */
150 | #define UFFD_EVENT_PAGEFAULT	0x12
151 | #define UFFD_EVENT_FORK		0x13
152 | #define UFFD_EVENT_REMAP	0x14
153 | #define UFFD_EVENT_REMOVE	0x15
154 | #define UFFD_EVENT_UNMAP	0x16
155 | 
156 | /* flags for UFFD_EVENT_PAGEFAULT */
157 | #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
158 | #define UFFD_PAGEFAULT_FLAG_WP		(1<<1)	/* If reason is VM_UFFD_WP */
159 | 
160 | struct uffdio_api {
161 | 	/* userland asks for an API number and the features to enable */
162 | 	__u64 api;
163 | 	/*
164 | 	 * Kernel answers below with the all available features for
165 | 	 * the API, this notifies userland of which events and/or
166 | 	 * which flags for each event are enabled in the current
167 | 	 * kernel.
168 | 	 *
169 | 	 * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
170 | 	 * are to be considered implicitly always enabled in all kernels as
171 | 	 * long as the uffdio_api.api requested matches UFFD_API.
172 | 	 *
173 | 	 * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
174 | 	 * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
175 | 	 * hugetlbfs virtual memory ranges. Adding or not adding
176 | 	 * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
177 | 	 * no real functional effect after UFFDIO_API returns, but
178 | 	 * it's only useful for an initial feature set probe at
179 | 	 * UFFDIO_API time. There are two ways to use it:
180 | 	 *
181 | 	 * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
182 | 	 *    uffdio_api.features before calling UFFDIO_API, an error
183 | 	 *    will be returned by UFFDIO_API on a kernel without
184 | 	 *    hugetlbfs missing support
185 | 	 *
186 | 	 * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
187 | 	 *    uffdio_api.features and instead it will be set by the
188 | 	 *    kernel in the uffdio_api.features if the kernel supports
189 | 	 *    it, so userland can later check if the feature flag is
190 | 	 *    present in uffdio_api.features after UFFDIO_API
191 | 	 *    succeeded.
192 | 	 *
193 | 	 * UFFD_FEATURE_MISSING_SHMEM works the same as
194 | 	 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
195 | 	 * (i.e. tmpfs and other shmem based APIs).
196 | 	 *
197 | 	 * UFFD_FEATURE_SIGBUS feature means no page-fault
198 | 	 * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
199 | 	 * a SIGBUS signal will be sent to the faulting process.
200 | 	 *
201 | 	 * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
202 | 	 * be returned, if feature is not requested 0 will be returned.
203 | 	 */
204 | #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
205 | #define UFFD_FEATURE_EVENT_FORK			(1<<1)
206 | #define UFFD_FEATURE_EVENT_REMAP		(1<<2)
207 | #define UFFD_FEATURE_EVENT_REMOVE		(1<<3)
208 | #define UFFD_FEATURE_MISSING_HUGETLBFS		(1<<4)
209 | #define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
210 | #define UFFD_FEATURE_EVENT_UNMAP		(1<<6)
211 | #define UFFD_FEATURE_SIGBUS			(1<<7)
212 | #define UFFD_FEATURE_THREAD_ID			(1<<8)
213 | 	__u64 features;
214 | 
215 | 	__u64 ioctls;
216 | };
217 | 
218 | struct uffdio_range {
219 | 	__u64 start;
220 | 	__u64 len;
221 | };
222 | 
223 | struct uffdio_register {
224 | 	struct uffdio_range range;
225 | #define UFFDIO_REGISTER_MODE_MISSING	((__u64)1<<0)
226 | #define UFFDIO_REGISTER_MODE_WP		((__u64)1<<1)
227 | 	__u64 mode;
228 | 
229 | 	/*
230 | 	 * kernel answers which ioctl commands are available for the
231 | 	 * range, keep at the end as the last 8 bytes aren't read.
232 | 	 */
233 | 	__u64 ioctls;
234 | };
235 | 
236 | struct uffdio_copy {
237 | 	__u64 dst;
238 | 	__u64 src;
239 | 	__u64 len;
240 | #define UFFDIO_COPY_MODE_DONTWAKE		((__u64)1<<0)
241 | 	/*
242 | 	 * UFFDIO_COPY_MODE_WP will map the page write protected on
243 | 	 * the fly.  UFFDIO_COPY_MODE_WP is available only if the
244 | 	 * write protected ioctl is implemented for the range
245 | 	 * according to the uffdio_register.ioctls.
246 | 	 */
247 | #define UFFDIO_COPY_MODE_WP			((__u64)1<<1)
248 | 	__u64 mode;
249 | 
250 | 	/*
251 | 	 * "copy" is written by the ioctl and must be at the end: the
252 | 	 * copy_from_user will not read the last 8 bytes.
253 | 	 */
254 | 	__s64 copy;
255 | };
256 | 
257 | struct uffdio_zeropage {
258 | 	struct uffdio_range range;
259 | #define UFFDIO_ZEROPAGE_MODE_DONTWAKE		((__u64)1<<0)
260 | 	__u64 mode;
261 | 
262 | 	/*
263 | 	 * "zeropage" is written by the ioctl and must be at the end:
264 | 	 * the copy_from_user will not read the last 8 bytes.
265 | 	 */
266 | 	__s64 zeropage;
267 | };
268 | 
269 | struct uffdio_writeprotect {
270 | 	struct uffdio_range range;
271 | /*
272 |  * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range,
273 |  * unset the flag to undo protection of a range which was previously
274 |  * write protected.
275 |  *
276 |  * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up
277 |  * any wait thread after the operation succeeds.
278 |  *
279 |  * NOTE: Write protecting a region (WP=1) is unrelated to page faults,
280 |  * therefore DONTWAKE flag is meaningless with WP=1.  Removing write
281 |  * protection (WP=0) in response to a page fault wakes the faulting
282 |  * task unless DONTWAKE is set.
283 |  */
284 | #define UFFDIO_WRITEPROTECT_MODE_WP		((__u64)1<<0)
285 | #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE	((__u64)1<<1)
286 | 	__u64 mode;
287 | };
288 | 
289 | struct uffdio_cr3 {
290 |   //struct uffdio_range range;
291 |   __u64 cr3;       // base page table ptr
292 | };
293 | 
294 | struct uffdio_page_flags {
295 |   __u64 va;     // virtual address
296 |   __u64 flag1;  // the first flag of interest
297 |   __u64 flag2;  // the second flag of interest
298 |   __u64 res1;   // result of operation (flag1 value if get, success/fail if set)
299 |   __u64 res2;   // result of operation (flag2 value)
300 | };
301 | 
302 | #define DMA_BATCH 32
303 | #define MAX_DMA_CHANS 16
304 | //#define DEBUG_TM
305 | struct uffdio_dma_copy {
306 |     __u64 dst[DMA_BATCH];
307 |     __u64 src[DMA_BATCH];
308 |     __u64 len[DMA_BATCH];
309 |     __u64 count;
310 | 
311 |     /*
312 |      * There will be a wrprotection flag later that allows to map
313 |      * pages wrprotected on the fly. And such a flag will be
314 |      * available if the wrprotection ioctl are implemented for the
315 |      * range according to the uffdio_register.ioctls.
316 |      */
317 | #define UFFDIO_COPY_MODE_DONTWAKE       ((__u64)1<<0)
318 |     __u64 mode;
319 | 
320 |     /*
321 |      * "copy" is written by the ioctl and must be at the end: the
322 |      * copy_from_user will not read the last 8 bytes.
323 |      */
324 |     __s64 copy;
325 | };
326 | 
327 | struct uffdio_dma_channs {
328 |     __u32 num_channs;
329 |     __u32 size_per_dma_request;
330 | };
331 | 
332 | #endif /* _LINUX_USERFAULTFD_H */
333 | 


--------------------------------------------------------------------------------