├── .bazelrc
├── AUTHORS.md
├── BUILD
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── WORKSPACE
├── abi.bzl
├── abi
├── 84
│ └── kernel
│ │ └── ghost.h
├── 90
│ └── kernel
│ │ └── ghost.h
└── latest
│ └── kernel
│ └── ghost.h
├── bpf
├── bpf.bzl
└── user
│ ├── agent.c
│ ├── agent.h
│ ├── ghost_shared.h
│ ├── schedclasstop.c
│ ├── schedfair.c
│ ├── schedghostidle.c
│ ├── schedlat.c
│ ├── schedlat_shared.h
│ ├── schedrun.c
│ └── schedrun_shared.h
├── docs
└── ghostfs.md
├── experiments
├── antagonist
│ ├── cfs_orchestrator.cc
│ ├── cfs_orchestrator.h
│ ├── ghost_orchestrator.cc
│ ├── ghost_orchestrator.h
│ ├── main.cc
│ ├── options_test.cc
│ ├── orchestrator.cc
│ ├── orchestrator.h
│ ├── orchestrator_test.cc
│ ├── results.cc
│ ├── results.h
│ └── results_test.cc
├── microbenchmarks
│ ├── global_scalability.cc
│ └── ioctl_test.cc
├── rocksdb
│ ├── cfs_orchestrator.cc
│ ├── cfs_orchestrator.h
│ ├── clock.h
│ ├── database.cc
│ ├── database.h
│ ├── database_test.cc
│ ├── ghost_orchestrator.cc
│ ├── ghost_orchestrator.h
│ ├── ingress.cc
│ ├── ingress.h
│ ├── latency.cc
│ ├── latency.h
│ ├── latency_test.cc
│ ├── main.cc
│ ├── options_test.cc
│ ├── orchestrator.cc
│ ├── orchestrator.h
│ ├── orchestrator_test.cc
│ ├── request.h
│ └── synthetic_network_test.cc
├── scripts
│ ├── BUILD
│ ├── __init__.py
│ ├── centralized_queuing.py
│ ├── options.py
│ ├── run.py
│ ├── setup.py
│ ├── shenango.py
│ ├── shinjuku.py
│ └── shinjuku_shenango.py
└── shared
│ ├── prio_table_helper.cc
│ ├── prio_table_helper.h
│ ├── thread_pool.cc
│ ├── thread_pool.h
│ ├── thread_pool_test.cc
│ ├── thread_wait.cc
│ └── thread_wait.h
├── kernel
├── ghost_uapi.h
└── vmlinux_ghost_5_11.h
├── lib
├── agent.cc
├── agent.h
├── arr_structs.bpf.h
├── avl.bpf.h
├── base.cc
├── base.h
├── channel.cc
├── channel.h
├── enclave.cc
├── enclave.h
├── flux.h
├── ghost.cc
├── ghost.h
├── ghost_uapi.cc
├── ghost_uapi.h
├── logging.h
├── queue.bpf.h
├── scheduler.h
├── topology.cc
├── topology.h
├── trivial_status.cc
└── trivial_status.h
├── requirements.txt
├── schedulers
├── biff
│ ├── agent_biff.cc
│ ├── biff_scheduler.cc
│ └── biff_scheduler.h
├── cfs
│ ├── README.md
│ ├── cfs_agent.cc
│ ├── cfs_scheduler.cc
│ └── cfs_scheduler.h
├── cfs_bpf
│ ├── agent_cfs.cc
│ ├── cfs_scheduler.cc
│ └── cfs_scheduler.h
├── edf
│ ├── agent_exp.cc
│ ├── edf_scheduler.cc
│ ├── edf_scheduler.h
│ ├── orchestrator.cc
│ └── orchestrator.h
├── fifo
│ ├── centralized
│ │ ├── fifo_agent.cc
│ │ ├── fifo_scheduler.cc
│ │ └── fifo_scheduler.h
│ └── per_cpu
│ │ ├── fifo_agent.cc
│ │ ├── fifo_scheduler.cc
│ │ └── fifo_scheduler.h
├── flux
│ ├── agent_flux.cc
│ ├── flux_scheduler.cc
│ └── flux_scheduler.h
├── shinjuku
│ ├── agent_shinjuku.cc
│ ├── shinjuku_orchestrator.cc
│ ├── shinjuku_orchestrator.h
│ ├── shinjuku_scheduler.cc
│ └── shinjuku_scheduler.h
└── sol
│ ├── agent_sol.cc
│ ├── sol_scheduler.cc
│ └── sol_scheduler.h
├── shared
├── fd_server.cc
├── fd_server.h
├── prio_table.cc
├── prio_table.h
├── shmem.cc
└── shmem.h
├── tests
├── agent_test.cc
├── api_test.cc
├── base_test.cc
├── biff_test.cc
├── bpf_avl_test.cc
├── bpf_queue_test.cc
├── capabilities_test.cc
├── capabilities_test.h
├── cfs_bpf_test.cc
├── cfs_test.cc
├── channel_test.cc
├── edf_test.cc
├── enclave_test.cc
├── fd_server_test.cc
├── flux_test.cc
├── prio_table_test.cc
├── simple_cfs.cc
├── simple_edf.cc
├── simple_exp.cc
├── sol_test.cc
└── topology_test.cc
├── third_party
├── BUILD.bazel
├── bpf
│ ├── BUILD
│ ├── LICENSE
│ ├── biff.bpf.c
│ ├── biff_bpf.h
│ ├── biff_flux.bpf.c
│ ├── biff_flux_bpf.h
│ ├── cfs.bpf.c
│ ├── cfs_bpf.h
│ ├── common.bpf.h
│ ├── edf.bpf.c
│ ├── edf.h
│ ├── flux.bpf.c
│ ├── flux_api.bpf.c
│ ├── flux_bpf.h
│ ├── flux_dispatch.bpf.c
│ ├── flux_header_bpf.h
│ ├── ghost_shared_bpf.h
│ ├── idle_flux.bpf.c
│ ├── idle_flux_bpf.h
│ ├── pntring.bpf.h
│ ├── pntring_funcs.bpf.h
│ ├── prov_flux.bpf.c
│ ├── prov_flux_bpf.h
│ ├── roci_flux.bpf.c
│ ├── roci_flux_bpf.h
│ ├── schedclasstop.bpf.c
│ ├── schedfair.bpf.c
│ ├── schedfair.h
│ ├── schedghostidle.bpf.c
│ ├── schedlat.bpf.c
│ ├── schedlat.h
│ ├── schedlat_shared_bpf.h
│ ├── schedrun.bpf.c
│ ├── schedrun.h
│ ├── schedrun_shared_bpf.h
│ ├── test.bpf.c
│ ├── topology.bpf.h
│ └── vmlinux_ghost.h
├── iovisor_bcc
│ ├── LICENSE
│ ├── bits.bpf.h
│ └── trace_helpers.h
├── linux.BUILD
├── rocksdb.BUILD
└── util
│ ├── LICENSE
│ └── util.h
└── util
├── cgroup_scraper.sh
├── enclave_watcher.cc
├── fdcat.cc
├── fdsrv.cc
└── pushtosched.cc
/.bazelrc:
--------------------------------------------------------------------------------
1 | build --cxxopt='-std=c++2a'
2 |
--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | The people below have contributed to ghOSt. The list is ordered by when each
2 | person first joined the project. If you contribute to ghOSt, feel free to add
3 | your name to the end.
4 |
5 |
6 | - Paul Turner
7 | - Neel Natu
8 | - Ashwin Chaugule
9 | - Oleg Rombakh
10 | - Jack Humphries
11 | - Christos Kozyrakis
12 | - Luigi Rizzo
13 | - Barret Rhoden
14 | - Josh Don
15 | - Ofir Weisse
16 | - Hannah Pan
17 | - Sourav Panda
18 | - Andrew Delgadillo
19 | - Dohyun Kim
20 | - Stanko Novakovic
21 |
22 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | to see your current agreements on file or
13 | to sign a new one.
14 |
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 |
19 | ## Code Reviews
20 |
21 | All submissions, including submissions by project members, require review. We
22 | use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 |
26 | ## Community Guidelines
27 |
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2022 Google LLC
2 |
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions are
5 | met:
6 |
7 | * Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 | * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 | * Neither the name of Google LLC nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 |
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | To report a security issue, please email
2 | [kernel-ghost@google.com](mailto:kernel-ghost@google.com) with a description of
3 | the issue, the steps you took to create the issue, affected versions, and, if
4 | known, mitigations for the issue. Our vulnerability management team will respond
5 | within 3 working days of your email. If the issue is confirmed as a
6 | vulnerability, we will open a Security Advisory. This project follows a 90 day
7 | disclosure timeline.
8 |
--------------------------------------------------------------------------------
/bpf/bpf.bzl:
--------------------------------------------------------------------------------
1 | """The open source build rules for eBPF programs and skeleton headers."""
2 |
3 | def bpf_program(name, src, hdrs, bpf_object, macros = [], **kwargs):
4 | """Generates an eBPF object file from .c source code.
5 |
6 | Args:
7 | name: target name for eBPF program.
8 | src: eBPF program source code in C.
9 | hdrs: list of header files depended on by src.
10 | bpf_object: name of generated eBPF object file.
11 | macros: additional macros that will be passed to clang.
12 | **kwargs: additional arguments.
13 | """
14 | native.genrule(
15 | name = name,
16 | srcs = ["@linux//:libbpf"] + [src] + hdrs,
17 | outs = [bpf_object],
18 | cmd = (
19 | "clang-12 -g -O2 -target bpf -D__TARGET_ARCH_x86 -D__x86_64__ " +
20 | # The `.` directory is the project root, so we pass it with the `-I`
21 | # flag so that #includes work in the source files.
22 | #
23 | # `$(BINDIR)/external/linux` contains the outputs of the targets in
24 | # linux.BUILD. Thus, the headers for libbpf are within that
25 | # directory at libbpf/include/*
26 | # (i.e., $(BINDIR)/external/linux/libbpf/include/*).
27 | #
28 | # `$@` is the location to write the eBPF object file.
29 | "-I . -I /usr/include/x86_64-linux-gnu " +
30 | "-I $(BINDIR)/external/linux/libbpf/include " +
31 | "-c $(location " + src + ") -o $@ " +
32 | "".join([" -D%s" % m for m in macros]) + " && llvm-strip -g $@"
33 | ),
34 | **kwargs
35 | )
36 |
37 | def bpf_skeleton(name, bpf_object, skel_hdr, **kwargs):
38 | """Generates eBPF skeleton from object file to .c source code.
39 |
40 | Args:
41 | name: target name for eBPF program.
42 | bpf_object: built eBPF program.
43 | skel_hdr: name of generated skeleton header file.
44 | **kwargs: additional arguments.
45 | """
46 | native.genrule(
47 | name = name,
48 | # bpftool does not seem to be compiled when I include it in the `tools`
49 | # attribute list instead.
50 | srcs = ["@linux//:bpftool", bpf_object],
51 | outs = [skel_hdr],
52 | cmd = (
53 | "$(BINDIR)/external/linux/bpftool/bin/bpftool gen skeleton $(location " + bpf_object + ") > $@ && " +
54 | # The libbpf headers are located in `libbpf` rather than `bpf`.
55 | "sed -i 's/#include
11 |
12 | #include "libbpf/bpf.h"
13 | #include "libbpf/libbpf.h"
14 |
15 | #include "lib/ghost_uapi.h" // for GHOST_VERSION.
16 |
17 | // See e.g. smp_store_release(). We can't check when we compile the BPF
18 | // programs, which are built with clang -target bpf, but all agents that load
19 | // bpf programs include this header.
20 | #ifndef __x86_64__
21 | #error "BPF shared memory sync only works on x86"
22 | #endif
23 |
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 |
28 | #ifndef GHOST_BPF
29 | // The definitions below are needed when the userspace code is compiled on a
30 | // machine that is *not* running the ghOSt kernel and therefore does not have
31 | // the ghOSt declarations below in the bpf.h UAPI header.
32 |
33 | // From include/uapi/linux/bpf.h for the ghost kernel.
34 |
35 | enum {
36 | BPF_PROG_TYPE_GHOST_SCHED = 1000,
37 | BPF_PROG_TYPE_GHOST_MSG,
38 | #if GHOST_VERSION >= 83
39 | BPF_PROG_TYPE_GHOST_SELECT_RQ,
40 | #endif
41 | #if GHOST_VERSION >= 84
42 | BPF_PROG_TYPE_GHOST_HALT_POLL,
43 | #endif
44 |
45 | BPF_GHOST_SCHED_PNT = 2000,
46 | BPF_GHOST_MSG_SEND,
47 | #if GHOST_VERSION >= 83
48 | BPF_GHOST_SELECT_RQ,
49 | #endif
50 | #if GHOST_VERSION >= 84
51 | BPF_GHOST_HALT_POLL,
52 | #endif
53 | __MAX_BPF_GHOST_ATTACH_TYPE
54 | };
55 |
56 | // end include/uapi/linux/bpf.h
57 |
58 | #endif
59 |
60 | // Generic BPF helpers
61 |
62 | size_t bpf_map__mmap_sz(struct bpf_map *map);
63 | void *bpf_map__mmap(struct bpf_map *map);
64 | int bpf_map__munmap(struct bpf_map *map, void *addr);
65 | void bpf_program__set_types(struct bpf_program *prog, int prog_type,
66 | int expected_attach_type);
67 |
68 | // Common BPF initialization
69 | //
70 | // Returns 0 on success, -1 with errno set on failure.
71 | int agent_bpf_init(void);
72 |
73 | // Registers `prog` to be inserted at attach point `eat` during
74 | // agent_bpf_insert_registered(). You must load the programs before calling
75 | // insert. You may call this repeatedly, and it will only insert each program
76 | // once. In particular, you may temporarily get EBUSY during an agent handoff.
77 | //
78 | // Returns 0 on success, -1 with errno set on failure.
79 | int agent_bpf_register(struct bpf_program *prog, int eat);
80 |
81 | // Inserts the programs you previously registered and loaded.
82 | //
83 | // Returns 0 on success, -1 with errno set on failure. Any programs inserted
84 | // are not removed on error; call bpf_destroy() or just exit your process.
85 | int agent_bpf_insert_registered(int ctl_fd);
86 |
87 | // Gracefully unlinks and unloads the BPF programs. When agents call this, they
88 | // explicitly close (and thus unlink/detach) BPF programs from the enclave,
89 | // which will speed up agent upgrade/handoff.
90 | void agent_bpf_destroy(void);
91 |
92 | enum {
93 | AGENT_BPF_TRACE_SCHEDGHOSTIDLE,
94 | MAX_AGENT_BPF_TRACE,
95 | };
96 |
97 | int agent_bpf_trace_init(unsigned int type);
98 | void agent_bpf_trace_output(FILE *to, unsigned int type);
99 | void agent_bpf_trace_reset(unsigned int type);
100 |
101 | #ifdef __cplusplus
102 | } /* extern "C" */
103 | #endif
104 |
105 | #endif // GHOST_BPF_USER_AGENT_H_
106 |
--------------------------------------------------------------------------------
/bpf/user/ghost_shared.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2021 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #ifndef GHOST_LIB_BPF_GHOST_SHARED_H_
18 | #define GHOST_LIB_BPF_GHOST_SHARED_H_
19 |
20 | // Keep this file's structs in sync with bpf/ghost_shared_bpf.h.
21 | // We need different headers for BPF and C programs due to various Google3
22 | // reasons.
23 |
24 | #include
25 |
26 | struct ghost_per_cpu_data {
27 | uint8_t want_tick;
28 | } __attribute__((aligned(64)));
29 |
30 | #endif // GHOST_LIB_BPF_GHOST_SHARED_H_
31 |
--------------------------------------------------------------------------------
/bpf/user/schedghostidle.c:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | #include "bpf/user/bpf_schedghostidle.skel.h"
17 | #include "third_party/iovisor_bcc/trace_helpers.h"
18 | #include "libbpf/bpf.h"
19 | #include "libbpf/libbpf.h"
20 |
21 | #define handle_error(msg) \
22 | do { perror(msg); exit(-1); } while (0)
23 |
24 | /* Keep this in sync with schedghostidle.bpf.c. */
25 | #define NR_SLOTS 25
26 |
27 | static uint64_t start_time_ns, print_time_ns;
28 |
29 | static void print_hist(int fd)
30 | {
31 | unsigned int nr_cpus = libbpf_num_possible_cpus();
32 | unsigned int hist[NR_SLOTS] = {0};
33 | uint64_t *count;
34 | uint64_t total = 0;
35 | float total_sec;
36 |
37 | count = calloc(nr_cpus, sizeof(*count));
38 | if (!count)
39 | handle_error("calloc");
40 |
41 | for (int i = 0; i < NR_SLOTS; i++) {
42 | if (bpf_map_lookup_elem(fd, &i, count))
43 | handle_error("lookup");
44 | hist[i] = 0;
45 | for (int c = 0; c < nr_cpus; c++) {
46 | hist[i] += count[c];
47 | total += count[c];
48 | }
49 | }
50 | free(count);
51 |
52 | printf("\n");
53 | printf("Latency of a CPU going Idle until a task is Latched:\n");
54 | printf("----------------------------------------------------\n");
55 | print_log2_hist(hist, NR_SLOTS, "usec");
56 |
57 | total_sec = 1.0 * (print_time_ns - start_time_ns) / NSEC_PER_SEC;
58 | printf("\nTotal: %lu events over %f seconds (%f / sec) on %u cpus\n\n",
59 | total, total_sec, total / total_sec, nr_cpus);
60 | }
61 |
62 | static volatile bool exiting;
63 |
64 | static void sig_hand(int signr)
65 | {
66 | exiting = true;
67 | }
68 |
69 | static struct sigaction sigact = {.sa_handler = sig_hand};
70 |
71 | int main(int argc, char **argv)
72 | {
73 | struct bpf_schedghostidle_bpf *obj;
74 | int err;
75 |
76 | sigaction(SIGINT, &sigact, 0);
77 | err = bump_memlock_rlimit();
78 | if (err) {
79 | fprintf(stderr, "failed to increase rlimit: %d\n", err);
80 | return -1;
81 | }
82 |
83 | obj = bpf_schedghostidle_bpf__open_and_load();
84 | if (!obj) {
85 | fprintf(stderr, "failed to open BPF object\n");
86 | return -1;
87 | }
88 |
89 | err = bpf_schedghostidle_bpf__attach(obj);
90 | if (err) {
91 | fprintf(stderr, "failed to attach BPF programs\n");
92 | goto cleanup;
93 | }
94 |
95 | start_time_ns = get_ktime_ns();
96 |
97 | printf("Ctrl-c to exit\n");
98 |
99 | while (!exiting)
100 | sleep(9999999);
101 |
102 | print_time_ns = get_ktime_ns();
103 | print_hist(bpf_map__fd(obj->maps.hist));
104 |
105 | printf("Total latches: %lu, bpf_latches %lu (%f), idle_to_bpf_latches %lu (%f)\n\n",
106 | obj->bss->nr_latches,
107 | obj->bss->nr_bpf_latches,
108 | 100.0 * obj->bss->nr_bpf_latches / obj->bss->nr_latches,
109 | obj->bss->nr_idle_to_bpf_latches,
110 | 100.0 * obj->bss->nr_idle_to_bpf_latches / obj->bss->nr_latches);
111 |
112 | cleanup:
113 | bpf_schedghostidle_bpf__destroy(obj);
114 |
115 | return 0;
116 | }
117 |
--------------------------------------------------------------------------------
/bpf/user/schedlat.c:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include "third_party/bpf/schedlat.h"
16 | #include "bpf/user/schedlat_bpf.skel.h"
17 | #include "third_party/iovisor_bcc/trace_helpers.h"
18 | #include "libbpf/bpf.h"
19 | #include "libbpf/libbpf.h"
20 |
21 | #define handle_error(msg) \
22 | do { perror(msg); exit(-1); } while (0)
23 |
24 | static const char *titles[] = {
25 | [RUNNABLE_TO_LATCHED] = "Latency from Runnable to Latched",
26 | [LATCHED_TO_RUN] = "Latency from Latched to Run",
27 | [RUNNABLE_TO_RUN] = "Latency from Runnable to Run",
28 | };
29 |
30 | static void print_hists(int fd)
31 | {
32 | unsigned int nr_cpus = libbpf_num_possible_cpus();
33 | struct hist *hist;
34 | uint32_t total[MAX_NR_HIST_SLOTS];
35 |
36 | /*
37 | * There are NR_HISTS members of the PERCPU_ARRAY. Each one we read is
38 | * an *array[nr_cpus]* of the struct hist, one for each cpu. This
39 | * differs from a accessing an element from within a BPF program, where
40 | * we only get the percpu element.
41 | */
42 | hist = calloc(nr_cpus, sizeof(struct hist));
43 | if (!hist)
44 | handle_error("calloc");
45 |
46 | for (int i = 0; i < NR_HISTS; i++) {
47 | if (bpf_map_lookup_elem(fd, &i, hist))
48 | handle_error("lookup");
49 | memset(total, 0, sizeof(total));
50 | for (int c = 0; c < nr_cpus; c++) {
51 | for (int s = 0; s < MAX_NR_HIST_SLOTS; s++)
52 | total[s] += hist[c].slots[s];
53 | }
54 | printf("\n%s:\n----------\n", titles[i]);
55 | print_log2_hist(total, MAX_NR_HIST_SLOTS, "usec");
56 | }
57 |
58 | free(hist);
59 | }
60 |
61 | static volatile bool exiting;
62 |
63 | static void sig_hand(int signr)
64 | {
65 | exiting = true;
66 | }
67 |
68 | static struct sigaction sigact = {.sa_handler = sig_hand};
69 |
70 | int main(int argc, char **argv)
71 | {
72 | struct schedlat_bpf *obj;
73 | int err;
74 |
75 | sigaction(SIGINT, &sigact, 0);
76 | err = bump_memlock_rlimit();
77 | if (err) {
78 | fprintf(stderr, "failed to increase rlimit: %d\n", err);
79 | return -1;
80 | }
81 |
82 | obj = schedlat_bpf__open_and_load();
83 | if (!obj) {
84 | fprintf(stderr, "failed to open BPF object\n");
85 | return -1;
86 | }
87 |
88 | err = schedlat_bpf__attach(obj);
89 | if (err) {
90 | fprintf(stderr, "failed to attach BPF programs\n");
91 | goto cleanup;
92 | }
93 |
94 | printf("Ctrl-c to exit\n");
95 | while (!exiting)
96 | sleep(9999999);
97 |
98 | print_hists(bpf_map__fd(obj->maps.hists));
99 |
100 | printf("Exiting\n");
101 |
102 | cleanup:
103 | schedlat_bpf__destroy(obj);
104 |
105 | return 0;
106 | }
107 |
--------------------------------------------------------------------------------
/bpf/user/schedlat_shared.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2021 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #ifndef GHOST_LIB_BPF_SCHEDLAT_SHARED_H_
18 | #define GHOST_LIB_BPF_SCHEDLAT_SHARED_H_
19 |
20 | // Keep this file's structs in sync with bpf/schedlat_shared_bpf.h.
21 | // We need different headers for BPF and C programs due to various Google3
22 | // reasons.
23 |
24 | #include
25 |
26 | #define MAX_PIDS 102400
27 | #define MAX_NR_HIST_SLOTS 25
28 |
29 | struct task_stat {
30 | uint64_t runnable_at;
31 | uint64_t latched_at;
32 | uint64_t ran_at;
33 | };
34 |
35 | /*
36 | * Power of 2 histogram, <=1 us, 2us, 4us, etc. This struct must be at least
37 | * 8-byte aligned, since it is a value for a BPF map.
38 | */
39 | struct hist {
40 | uint32_t slots[MAX_NR_HIST_SLOTS];
41 | } __attribute__((aligned(64)));
42 |
43 | enum {
44 | RUNNABLE_TO_LATCHED,
45 | LATCHED_TO_RUN,
46 | RUNNABLE_TO_RUN,
47 | NR_HISTS,
48 | };
49 |
50 | #endif // GHOST_LIB_BPF_SCHEDLAT_SHARED_H_
51 |
--------------------------------------------------------------------------------
/bpf/user/schedrun.c:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include "third_party/bpf/schedrun.h"
16 | #include "bpf/user/schedrun_bpf.skel.h"
17 | #include "third_party/iovisor_bcc/trace_helpers.h"
18 | #include "libbpf/bpf.h"
19 | #include "libbpf/libbpf.h"
20 |
21 | #define error_exit(msg) do { \
22 | perror(msg); \
23 | exit(EXIT_FAILURE); \
24 | } while (0)
25 |
26 | static bool ghost_only = false;
27 | static pid_t pid = 0;
28 |
29 | static const char *titles[] = {
30 | [RUNTIMES_PREEMPTED_YIELDED] = "Runtimes of preempted/yielded tasks",
31 | [RUNTIMES_BLOCKED] = "Runtimes of tasks that blocked",
32 | [RUNTIMES_ALL] = "All task runtimes",
33 | };
34 |
35 | // TODO: refactor (copied from schedlat.c).
36 | static void print_hists(int fd)
37 | {
38 | unsigned int nr_cpus = libbpf_num_possible_cpus();
39 | struct hist *hist;
40 | uint32_t total[MAX_NR_HIST_SLOTS];
41 |
42 | /*
43 | * There are NR_HISTS members of the PERCPU_ARRAY. Each one we read is
44 | * an *array[nr_cpus]* of the struct hist, one for each cpu. This
45 | * differs from accessing an element from within a BPF program, where
46 | * we only get the percpu element.
47 | */
48 | hist = calloc(nr_cpus, sizeof(struct hist));
49 | if (!hist)
50 | error_exit("calloc");
51 |
52 | for (int i = 0; i < NR_HISTS; i++) {
53 | if (bpf_map_lookup_elem(fd, &i, hist))
54 | error_exit("bpf_map_lookup_elem");
55 | memset(total, 0, sizeof(total));
56 | for (int c = 0; c < nr_cpus; c++) {
57 | for (int s = 0; s < MAX_NR_HIST_SLOTS; s++)
58 | total[s] += hist[c].slots[s];
59 | }
60 | printf("\n%s:\n----------\n", titles[i]);
61 | print_log2_hist(total, MAX_NR_HIST_SLOTS, "usec");
62 | }
63 | }
64 |
65 | int main(int argc, char **argv)
66 | {
67 | sigset_t set;
68 | int opt, err, sig;
69 | struct schedrun_bpf *skel;
70 |
71 | if (sigemptyset(&set))
72 | error_exit("sigemptyset");
73 | if (sigaddset(&set, SIGINT))
74 | error_exit("sigaddset");
75 | if (sigprocmask(SIG_BLOCK, &set, NULL))
76 | error_exit("sigprocmask");
77 |
78 | while ((opt = getopt(argc, argv, "gp:")) != -1) {
79 | switch (opt) {
80 | case 'g':
81 | ghost_only = true;
82 | break;
83 | case 'p':
84 | errno = 0;
85 | pid = strtol(optarg, NULL, 10);
86 | if (errno)
87 | error_exit("strtol");
88 | if (pid <= 0) {
89 | fprintf(stderr, "Invalid pid: %s\n", optarg);
90 | return 1;
91 | }
92 | break;
93 | default:
94 | fprintf(stderr, "Usage: %s [-p pid | -g]\n", argv[0]);
95 | return 1;
96 | }
97 | }
98 |
99 | if (ghost_only && pid) {
100 | fprintf(stderr, "-g and -p options are mutually exclusive\n");
101 | return 1;
102 | }
103 |
104 | if (bump_memlock_rlimit())
105 | error_exit("bump_memlock_rlimit");
106 |
107 | skel = schedrun_bpf__open();
108 | if (!skel) {
109 | fprintf(stderr, "Failed to open BPF skeleton\n");
110 | return 1;
111 | }
112 |
113 | skel->rodata->ghost_only = ghost_only;
114 | skel->rodata->targ_tgid = pid;
115 |
116 | err = schedrun_bpf__load(skel);
117 | if (err) {
118 | fprintf(stderr, "Failed to load BPF skeleton\n");
119 | return 1;
120 | }
121 |
122 | err = schedrun_bpf__attach(skel);
123 | if (err) {
124 | fprintf(stderr, "Failed to attach BPF skeleton\n");
125 | goto cleanup;
126 | }
127 |
128 | printf("Ctrl-c to exit\n");
129 |
130 | if (sigwait(&set, &sig))
131 | error_exit("sigwait");
132 |
133 | print_hists(bpf_map__fd(skel->maps.hists));
134 | printf("Exiting\n");
135 |
136 | cleanup:
137 | schedrun_bpf__destroy(skel);
138 | return -err;
139 | }
140 |
--------------------------------------------------------------------------------
/bpf/user/schedrun_shared.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2021 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #ifndef GHOST_LIB_BPF_SCHEDRUN_SHARED_H_
18 | #define GHOST_LIB_BPF_SCHEDRUN_SHARED_H_
19 |
20 | // Keep this file's structs in sync with bpf/schedrun_shared_bpf.h.
21 | // We need different headers for BPF and C programs due to various Google3
22 | // reasons.
23 |
24 | #include
25 |
26 | #define MAX_PIDS 102400
27 | #define MAX_NR_HIST_SLOTS 25
28 |
29 | /*
30 | * Power of 2 histogram, <=1 us, 2us, 4us, etc. This struct must be at least
31 | * 8-byte aligned, since it is a value for a BPF map.
32 | */
33 | struct hist {
34 | uint32_t slots[MAX_NR_HIST_SLOTS];
35 | } __attribute__((aligned(64)));
36 |
37 | enum {
38 | RUNTIMES_PREEMPTED_YIELDED,
39 | RUNTIMES_BLOCKED,
40 | RUNTIMES_ALL,
41 | NR_HISTS,
42 | };
43 |
44 | #endif // GHOST_LIB_BPF_SCHEDRUN_SHARED_H_
45 |
--------------------------------------------------------------------------------
/experiments/antagonist/cfs_orchestrator.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/antagonist/cfs_orchestrator.h"
8 |
9 | #include "absl/functional/bind_front.h"
10 |
11 | namespace ghost_test {
12 |
13 | void CfsOrchestrator::InitThreadPool() {
14 | std::vector kernel_schedulers(
15 | options().num_threads, ghost::GhostThread::KernelScheduler::kCfs);
16 | std::vector> thread_work(
17 | options().num_threads, absl::bind_front(&CfsOrchestrator::Worker, this));
18 |
19 | CHECK_EQ(kernel_schedulers.size(), options().num_threads);
20 | CHECK_EQ(kernel_schedulers.size(), thread_work.size());
21 | thread_pool().Init(kernel_schedulers, thread_work);
22 | }
23 |
24 | CfsOrchestrator::CfsOrchestrator(Orchestrator::Options opts)
25 | : Orchestrator(std::move(opts)), threads_ready_(options().num_threads + 1) {
26 | CHECK_EQ(options().num_threads, options().cpus.Size());
27 |
28 | InitThreadPool();
29 | threads_ready_.Block();
30 | set_start(absl::Now());
31 | }
32 |
33 | void CfsOrchestrator::Worker(uint32_t sid) {
34 | if (!thread_triggers().Triggered(sid)) {
35 | thread_triggers().Trigger(sid);
36 | const ghost::Cpu cpu = options().cpus.GetNthCpu(sid);
37 | CHECK_EQ(
38 | ghost::GhostHelper()->SchedSetAffinity(
39 | ghost::Gtid::Current(), ghost::MachineTopology()->ToCpuList({cpu})),
40 | 0);
41 | printf("Worker (SID %u, TID: %ld, affined to CPU %u)\n", sid,
42 | syscall(SYS_gettid), cpu.id());
43 | threads_ready_.Block();
44 | }
45 |
46 | Soak(sid);
47 | }
48 |
49 | } // namespace ghost_test
50 |
--------------------------------------------------------------------------------
/experiments/antagonist/cfs_orchestrator.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_ANTAGONIST_CFS_ORCHESTRATOR_H_
8 | #define GHOST_EXPERIMENTS_ANTAGONIST_CFS_ORCHESTRATOR_H_
9 |
10 | #include "absl/synchronization/barrier.h"
11 | #include "experiments/antagonist/orchestrator.h"
12 |
13 | namespace ghost_test {
14 |
15 | // This is the orchestrator for the CFS (Linux Completely Fair Scheduler)
16 | // experiments. All threads are scheduled by CFS.
17 | //
18 | // Example:
19 | // Orchestrator::Options options;
20 | // ... Fill in the options.
21 | // CfsOrchestrator orchestrator(options);
22 | // (Constructs orchestrator with options.)
23 | // ...
24 | // orchestrator.Terminate();
25 | // (Tells orchestrator to stop the experiment and print the results.)
26 | class CfsOrchestrator : public Orchestrator {
27 | public:
28 | explicit CfsOrchestrator(Orchestrator::Options opts);
29 | ~CfsOrchestrator() final {}
30 |
31 | private:
32 | // Initializes the thread pool.
33 | void InitThreadPool();
34 |
35 | void Worker(uint32_t sid) final;
36 |
37 | // Used so that the main thread does not start the timer (and workers do not
38 | // start spinning) until the worker threads have initialized.
39 | absl::Barrier threads_ready_;
40 | };
41 |
42 | } // namespace ghost_test
43 |
44 | #endif // GHOST_EXPERIMENTS_ANTAGONIST_CFS_ORCHESTRATOR_H_
45 |
--------------------------------------------------------------------------------
/experiments/antagonist/ghost_orchestrator.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/antagonist/ghost_orchestrator.h"
8 |
9 | #include "absl/functional/bind_front.h"
10 |
11 | namespace ghost_test {
12 | namespace {
13 | // We do not need a different class of service (e.g., different expected
14 | // runtimes, different QoS (Quality-of-Service) classes, etc.) across workers in
15 | // our experiments. Furthermore, all workers are ghOSt one-shots. Thus, put all
16 | // worker sched items in the same work class.
17 | static constexpr uint32_t kWorkClassIdentifier = 0;
18 | } // namespace
19 |
20 | void GhostOrchestrator::InitThreadPool() {
21 | std::vector kernel_schedulers(
22 | options().num_threads, ghost::GhostThread::KernelScheduler::kGhost);
23 | std::vector> thread_work(
24 | options().num_threads,
25 | absl::bind_front(&GhostOrchestrator::Worker, this));
26 |
27 | CHECK_EQ(kernel_schedulers.size(), options().num_threads);
28 | CHECK_EQ(kernel_schedulers.size(), thread_work.size());
29 | thread_pool().Init(kernel_schedulers, thread_work);
30 | }
31 |
32 | void GhostOrchestrator::InitGhost() {
33 | const std::vector gtids = thread_pool().GetGtids();
34 | CHECK_EQ(gtids.size(), options().num_threads);
35 |
36 | ghost::work_class wc;
37 | prio_table_helper_.GetWorkClass(kWorkClassIdentifier, wc);
38 | wc.id = kWorkClassIdentifier;
39 | wc.flags = WORK_CLASS_ONESHOT;
40 | wc.qos = options().ghost_qos;
41 | // Write the max unsigned 64-bit integer as the deadline just in case we want
42 | // to run the experiment with the ghOSt EDF (Earliest-Deadline-First)
43 | // scheduler.
44 | wc.exectime = std::numeric_limits::max();
45 | // 'period' is irrelevant because all threads scheduled by ghOSt are
46 | // one-shots.
47 | wc.period = 0;
48 | prio_table_helper_.SetWorkClass(kWorkClassIdentifier, wc);
49 |
50 | for (size_t i = 0; i < gtids.size(); ++i) {
51 | ghost::sched_item si;
52 | prio_table_helper_.GetSchedItem(/*sid=*/i, si);
53 | si.sid = i;
54 | si.wcid = kWorkClassIdentifier;
55 | si.gpid = gtids[i].id();
56 | si.flags = SCHED_ITEM_RUNNABLE;
57 | si.deadline = 0;
58 | prio_table_helper_.SetSchedItem(/*sid=*/i, si);
59 | }
60 | }
61 |
62 | GhostOrchestrator::GhostOrchestrator(Orchestrator::Options opts)
63 | : Orchestrator(std::move(opts)),
64 | prio_table_helper_(/*num_sched_items=*/options().num_threads,
65 | /*num_work_classes=*/1) {
66 | CHECK(options().cpus.Empty());
67 |
68 | InitThreadPool();
69 | // This must be called after 'InitThreadPool' since it accesses the GTIDs of
70 | // the threads in the thread pool.
71 | InitGhost();
72 | set_start(absl::Now());
73 | }
74 |
75 | void GhostOrchestrator::Worker(uint32_t sid) {
76 | if (!thread_triggers().Triggered(sid)) {
77 | thread_triggers().Trigger(sid);
78 | printf("Worker (SID %u, TID: %ld, not affined to any CPU)\n", sid,
79 | syscall(SYS_gettid));
80 | }
81 |
82 | Soak(sid);
83 | }
84 |
85 | } // namespace ghost_test
86 |
--------------------------------------------------------------------------------
/experiments/antagonist/ghost_orchestrator.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_ANTAGONIST_GHOST_ORCHESTRATOR_H_
8 | #define GHOST_EXPERIMENTS_ANTAGONIST_GHOST_ORCHESTRATOR_H_
9 |
10 | #include "experiments/antagonist/orchestrator.h"
11 | #include "experiments/shared/prio_table_helper.h"
12 |
13 | namespace ghost_test {
14 |
15 | // This is the orchestrator for the ghOSt experiments. All threads are scheduled
16 | // by ghOSt.
17 | //
18 | // Example:
19 | // Orchestrator::Options options;
20 | // ... Fill in the options.
21 | // GhostOrchestrator orchestrator(options);
22 | // (Constructs orchestrator with options.)
23 | // ...
24 | // orchestrator.Terminate();
25 | // (Tells orchestrator to stop the experiment and print the results.)
26 | class GhostOrchestrator : public Orchestrator {
27 | public:
28 | explicit GhostOrchestrator(Orchestrator::Options opts);
29 | ~GhostOrchestrator() final {}
30 |
31 | private:
32 | // Initializes the thread pool.
33 | void InitThreadPool();
34 |
35 | // Initializes the ghOSt PrioTable.
36 | void InitGhost();
37 |
38 | void Worker(uint32_t sid) final;
39 |
40 | // Manages communication with ghOSt via the shared PrioTable.
41 | PrioTableHelper prio_table_helper_;
42 | };
43 |
44 | } // namespace ghost_test
45 |
46 | #endif // GHOST_EXPERIMENTS_ANTAGONIST_GHOST_ORCHESTRATOR_H_
47 |
--------------------------------------------------------------------------------
/experiments/antagonist/options_test.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "gmock/gmock.h"
8 | #include "gtest/gtest.h"
9 | #include "experiments/antagonist/orchestrator.h"
10 |
11 | // These tests check that the application prints options and parses command line
12 | // flags properly.
13 |
14 | namespace ghost_test {
15 | namespace {
16 |
17 | using ::testing::Eq;
18 |
19 | // Returns orchestrator options suitable for the tests.
20 | Orchestrator::Options GetOptions() {
21 | Orchestrator::Options options;
22 |
23 | options.print_options.pretty = true;
24 | options.work_share = 0.9;
25 | options.num_threads = 4;
26 | options.cpus =
27 | ghost::MachineTopology()->ToCpuList(std::vector{1, 2, 3, 4});
28 | options.experiment_duration = absl::Seconds(15);
29 | options.scheduler = ghost::GhostThread::KernelScheduler::kCfs;
30 | options.ghost_qos = 2;
31 |
32 | return options;
33 | }
34 |
35 | // This tests that the '<<' operator prints all options and their values in
36 | // alphabetical order by option name.
37 | TEST(OptionsTest, PrintOptions) {
38 | Orchestrator::Options options = GetOptions();
39 | std::ostringstream os;
40 |
41 | os << options;
42 | std::string expected = R"(cpus: 1 2 3 4
43 | experiment_duration: 15s
44 | ghost_qos: 2
45 | num_threads: 4
46 | print_format: pretty
47 | scheduler: cfs
48 | work_share: 0.9)";
49 | EXPECT_THAT(os.str(), Eq(expected));
50 | }
51 |
52 | } // namespace
53 | } // namespace ghost_test
54 |
--------------------------------------------------------------------------------
/experiments/antagonist/results.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/antagonist/results.h"
8 |
9 | #include
10 | #include
11 |
12 | #include "lib/base.h"
13 |
14 | namespace ghost_test {
15 |
16 | namespace {
17 | constexpr size_t kWorkerLen = 8;
18 | constexpr size_t kDurationLen = 20;
19 | constexpr size_t kShareLen = 12;
20 | // Add 2 to the end to account for the space between each column in the results.
21 | constexpr size_t kNumDashes = kWorkerLen + kDurationLen + kShareLen + 2;
22 |
23 | // Prints the results in human-readable form.
24 | template
25 | void PrintLinePretty(std::ostream& os, const std::string& worker,
26 | T run_duration, U work_share, bool dashes) {
27 | os << std::left;
28 | os << std::setw(kWorkerLen) << worker << " ";
29 | os << std::setw(kDurationLen) << run_duration << " ";
30 | os << std::setw(kShareLen) << work_share << " ";
31 | os << std::endl;
32 | if (dashes) {
33 | os << std::string(kNumDashes, '-') << std::endl;
34 | }
35 | }
36 |
37 | // Prints the results in CSV form.
38 | template
39 | void PrintLineCsv(std::ostream& os, const std::string& worker, T run_duration,
40 | U work_share) {
41 | os << std::left;
42 | os << worker << ",";
43 | os << run_duration << ",";
44 | os << work_share;
45 | os << std::endl;
46 | }
47 |
48 | // Prints the preface to the results if pretty mode is set.
49 | void PrintPrettyPreface(PrintOptions options) {
50 | CHECK(options.pretty);
51 |
52 | PrintLinePretty(*options.os, "Worker", "Run Duration (ns)", "Work Share",
53 | /*dashes=*/true);
54 | }
55 |
56 | // Adds/averages all results and prints out the summary.
57 | void PrintTotal(const std::vector& run_durations,
58 | absl::Duration runtime, PrintOptions options) {
59 | absl::Duration run_duration;
60 | for (const absl::Duration& r : run_durations) {
61 | run_duration += r;
62 | }
63 | const double work_share =
64 | absl::ToDoubleMilliseconds(run_duration) /
65 | (run_durations.size() * absl::ToDoubleMilliseconds(runtime));
66 |
67 | if (options.pretty) {
68 | PrintLinePretty(*options.os, "Total",
69 | absl::ToInt64Nanoseconds(run_duration), work_share,
70 | /*dashes=*/false);
71 | } else {
72 | PrintLineCsv(*options.os, "Total", absl::ToInt64Nanoseconds(run_duration),
73 | work_share);
74 | }
75 | }
76 | } // namespace
77 |
78 | // Prints all results.
79 | void Print(const std::vector& run_durations,
80 | absl::Duration runtime, const PrintOptions& options) {
81 | CHECK_NE(options.os, nullptr);
82 |
83 | if (options.pretty) {
84 | PrintPrettyPreface(options);
85 | }
86 |
87 | for (size_t i = 0; i < run_durations.size(); i++) {
88 | const double work_share = absl::ToDoubleMilliseconds(run_durations[i]) /
89 | absl::ToDoubleMilliseconds(runtime);
90 | const int64_t run_duration = absl::ToInt64Nanoseconds(run_durations[i]);
91 | if (options.pretty) {
92 | PrintLinePretty(*options.os, std::to_string(i), run_duration, work_share,
93 | /*dashes=*/false);
94 | } else {
95 | PrintLineCsv(*options.os, std::to_string(i), run_duration, work_share);
96 | }
97 | }
98 | PrintTotal(run_durations, runtime, options);
99 | }
100 |
101 | } // namespace ghost_test
102 |
--------------------------------------------------------------------------------
/experiments/antagonist/results.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_ANTAGONIST_RESULTS_H_
8 | #define GHOST_EXPERIMENTS_ANTAGONIST_RESULTS_H_
9 |
10 | #include "absl/time/clock.h"
11 |
12 | namespace ghost_test {
13 |
14 | struct PrintOptions {
15 | // If true, prints the results in human-readable form. Otherwise, prints the
16 | // results in CSV form.
17 | bool pretty;
18 | // The output stream to send the results to. We make 'os' a pointer rather
19 | // than a reference since a reference cannot be reassigned.
20 | //
21 | // 'os' is owned by whoever instantiated this struct.
22 | std::ostream* os;
23 | };
24 |
25 | // Prints the results for the workers.
26 | void Print(const std::vector& run_durations,
27 | absl::Duration runtime, const PrintOptions& options);
28 |
29 | } // namespace ghost_test
30 |
31 | #endif // GHOST_EXPERIMENTS_ANTAGONIST_RESULTS_H_
32 |
--------------------------------------------------------------------------------
/experiments/microbenchmarks/ioctl_test.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 |
9 | #include "benchmark/benchmark.h"
10 | #include "lib/enclave.h"
11 | #include "lib/ghost.h"
12 | #include "lib/topology.h"
13 |
14 | namespace ghost {
15 |
16 | void BM_ghost_null_ioctl(benchmark::State& state) {
17 | GhostHelper()->InitCore();
18 | Topology* topology = MachineTopology();
19 | LocalEnclave enclave(AgentConfig(topology, CpuList(*topology)));
20 | int ctl = GhostHelper()->GetGlobalEnclaveCtlFd();
21 |
22 | for (auto _ : state) {
23 | CHECK_EQ(ioctl(ctl, GHOST_IOC_NULL), 0);
24 | }
25 | }
26 | BENCHMARK(BM_ghost_null_ioctl);
27 |
28 | void BM_getpid(benchmark::State& state) {
29 | for (auto _ : state) {
30 | CHECK_GT(syscall(SYS_getpid), 0);
31 | }
32 | }
33 | BENCHMARK(BM_getpid);
34 |
35 | } // namespace ghost
36 |
37 | int main(int argc, char** argv) {
38 | ::benchmark::RunSpecifiedBenchmarks();
39 | }
40 |
--------------------------------------------------------------------------------
/experiments/rocksdb/cfs_orchestrator.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_ROCKSDB_CFS_ORCHESTRATOR_H_
8 | #define GHOST_EXPERIMENTS_ROCKSDB_CFS_ORCHESTRATOR_H_
9 |
10 | #include "absl/synchronization/barrier.h"
11 | #include "experiments/rocksdb/latency.h"
12 | #include "experiments/rocksdb/orchestrator.h"
13 | #include "experiments/rocksdb/request.h"
14 | #include "experiments/shared/thread_wait.h"
15 |
16 | namespace ghost_test {
17 |
18 | // This is the orchestrator for the CFS (Linux Completely Fair Scheduler)
19 | // experiments. All threads are scheduled by CFS. The worker threads may either
20 | // (1) spin when waiting for more work to be assigned to them or (2) sleep on a
21 | // futex until more work is assigned to them.
22 | //
23 | // Example:
24 | // Options options;
25 | // ... Fill in the options.
26 | // CfsOrchestrator orchestrator_(options);
27 | // (Constructs orchestrator with options.)
28 | // ...
29 | // orchestrator_.Terminate();
30 | // (Tells orchestrator to stop the experiment and print the results.)
31 | class CfsOrchestrator final : public Orchestrator {
32 | public:
33 | explicit CfsOrchestrator(Options opts);
34 | ~CfsOrchestrator() final {}
35 |
36 | void Terminate() final;
37 |
38 | protected:
39 | // For CFS, the load generator passes requests to the dispatcher.
40 | void LoadGenerator(uint32_t sid) final;
41 |
42 | void Dispatcher(uint32_t sid) final;
43 |
44 | void Worker(uint32_t sid) final;
45 |
46 | private:
47 | // Initializes the thread pool.
48 | void InitThreadPool();
49 |
50 | // The dispatcher calls this method to receive requests sent to it by the load
51 | // generator.
52 | void HandleLoadGenerator(uint32_t sid);
53 |
54 | // The dispatcher calls this method to populate 'idle_sids_' with a list of
55 | // the SIDs of idle workers. Note that this method clears 'idle_sids_' before
56 | // filling it in.
57 | void GetIdleWorkerSIDs(uint32_t sid);
58 |
59 | // Allows runnable threads to run and keeps idle threads either spinning or
60 | // sleeping on a futex until they are marked runnable again.
61 | ThreadWait thread_wait_;
62 |
63 | // Each thread (the load generator, the dispatcher, and the workers)
64 | // decrements this once they have initialized themselves. This barrier is used
65 | // to block the load generator until all threads have been initialized so that
66 | // it does not generate load while the system is initializing. If it generated
67 | // load while the system is initializing, the experiment results would be bad
68 | // solely due to initialization costs rather than any deficiency in the
69 | // system. The initialization costs are irrelevant to the experiment.
70 | absl::Barrier threads_ready_;
71 |
72 | // The max number of requests that the load generator will send at a time to
73 | // the dispatcher.
74 | static constexpr size_t kLoadGeneratorBatchSize = 100;
75 |
76 | // The dispatchers' queues to hold waiting requests that will later be
77 | // assigned to workers.
78 | std::vector> dispatcher_queue_;
79 |
80 | // The dispatchers use this to store idle SIDs. We make this a class member
81 | // rather than a local variable in the 'Dispatcher' method to avoid repeatedly
82 | // allocating memory for the list backing in the dispatchers' common case,
83 | // which is expensive.
84 | std::vector> idle_sids_;
85 | };
86 |
87 | } // namespace ghost_test
88 |
89 | #endif // GHOST_EXPERIMENTS_ROCKSDB_CFS_ORCHESTRATOR_H_
90 |
--------------------------------------------------------------------------------
/experiments/rocksdb/clock.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_ROCKSDB_CLOCK_H_
8 | #define GHOST_EXPERIMENTS_ROCKSDB_CLOCK_H_
9 |
10 | #include "absl/time/clock.h"
11 | #include "lib/base.h"
12 |
13 | // This is a pure virtual parent class that represents a clock.
14 | class Clock {
15 | public:
16 | virtual ~Clock() = 0;
17 |
18 | // Returns the current clock time.
19 | virtual absl::Time TimeNow() const = 0;
20 | };
21 |
22 | inline Clock::~Clock() {}
23 |
24 | // This represents a real clock that returns the current time from
25 | // `ghost::MonotonicNow()`.
26 | //
27 | // Example:
28 | // RealClock clock;
29 | // absl::Time now = clock.TimeNow();
30 | class RealClock final : public Clock {
31 | public:
32 | // Returns the current time (from `ghost::MonotonicNow()`).
33 | absl::Time TimeNow() const final { return ghost::MonotonicNow(); }
34 | };
35 |
36 | // This represents a simulated clock whose time can be arbitrarily changed. This
37 | // is mainly useful for testing code that depends on time, such as the `Ingress`
38 | // class.
39 | //
40 | // Example:
41 | // SimulatedClock clock;
42 | // clock.SetTime(ghost::MonotonicNow());
43 | // clock.AdvanceTime(absl::Minutes(10));
44 | // absl::Time time = clock.TimeNow();
45 | // (`time` is equal to the time about 10 minutes from now.)
46 | class SimulatedClock final : public Clock {
47 | public:
48 | absl::Time TimeNow() const { return time_; }
49 |
50 | // Set the clock to `time`.
51 | void SetTime(absl::Time time) { time_ = time; }
52 |
53 | // Change the time on the clock by `duration`.
54 | void AdvanceTime(absl::Duration duration) { time_ += duration; }
55 |
56 | private:
57 | // The current time for this clock.
58 | absl::Time time_;
59 | };
60 |
61 | #endif // GHOST_EXPERIMENTS_ROCKSDB_CLOCK_H_
62 |
--------------------------------------------------------------------------------
/experiments/rocksdb/database.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/rocksdb/database.h"
8 |
9 | #include
10 |
11 | #include "rocksdb/table.h"
12 |
13 | namespace ghost_test {
14 |
15 | bool Database::OpenDatabase(const std::filesystem::path& path) {
16 | rocksdb::Options options;
17 | options.create_if_missing = true;
18 | options.allow_mmap_reads = true;
19 | options.allow_mmap_writes = true;
20 | options.error_if_exists = false;
21 |
22 | rocksdb::BlockBasedTableOptions table_options;
23 | // Use a ClockCache as the default LRU cache requires locking a per-shard
24 | // mutex, even on lookups. Using a ClockCache improves lookup throughput as a
25 | // mutex is only acquired on inserts.
26 | table_options.block_cache = rocksdb::NewClockCache(kCacheSize, 0);
27 | CHECK_NE(table_options.block_cache, nullptr);
28 | options.table_factory.reset(
29 | rocksdb::NewBlockBasedTableFactory(table_options));
30 |
31 | options.compression = rocksdb::kNoCompression;
32 | options.OptimizeLevelStyleCompaction();
33 | rocksdb::Status status = rocksdb::DB::Open(options, path.string(), &db_);
34 | return status.ok();
35 | }
36 |
37 | Database::Database(const std::filesystem::path& path) {
38 | if (!OpenDatabase(path)) {
39 | // The database is corrupted.
40 | CHECK(std::filesystem::exists(path));
41 | CHECK_GT(std::filesystem::remove_all(path), 0);
42 | CHECK(OpenDatabase(path));
43 | }
44 | CHECK(Fill());
45 | PrepopulateCache();
46 | }
47 |
48 | Database::~Database() { delete db_; }
49 |
50 | bool Database::Fill() {
51 | for (uint32_t i = 0; i < kNumEntries; i++) {
52 | rocksdb::Status status =
53 | db_->Put(rocksdb::WriteOptions(), Key(i), Value(i));
54 | if (!status.ok()) {
55 | return false;
56 | }
57 | }
58 | return true;
59 | }
60 |
61 | void Database::PrepopulateCache() const {
62 | std::string value;
63 | for (int i = 0; i < kNumEntries; i++) {
64 | CHECK(Get(i, value));
65 | }
66 | }
67 |
68 | bool Database::Get(uint32_t entry, std::string& value) const {
69 | rocksdb::Status status = db_->Get(rocksdb::ReadOptions(), Key(entry), &value);
70 | if (status.ok()) {
71 | CHECK_EQ(value, Value(entry));
72 | return true;
73 | }
74 | return false;
75 | }
76 |
77 | bool Database::RangeQuery(uint32_t start_entry, uint32_t range_size,
78 | std::string& value) const {
79 | std::stringstream ss;
80 | std::unique_ptr it(
81 | db_->NewIterator(rocksdb::ReadOptions()));
82 | it->Seek(Key(start_entry));
83 |
84 | for (uint32_t i = 0; i < range_size; i++) {
85 | if (!it->Valid()) {
86 | return false;
87 | }
88 | CHECK_EQ(it->value().ToString(), Value(start_entry + i));
89 | ss << it->value().ToString();
90 | if (i < range_size - 1) {
91 | ss << ",";
92 | }
93 | it->Next();
94 | }
95 | value = ss.str();
96 | return true;
97 | }
98 |
99 | } // namespace ghost_test
100 |
--------------------------------------------------------------------------------
/experiments/rocksdb/ingress.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/rocksdb/ingress.h"
8 |
9 | #include "experiments/rocksdb/database.h"
10 |
11 | namespace ghost_test {
12 |
13 | SyntheticNetwork::SyntheticNetwork(double throughput, double range_query_ratio,
14 | Clock& clock)
15 | : ingress_(throughput, clock), range_query_ratio_(range_query_ratio) {
16 | CHECK_GE(range_query_ratio, 0.0);
17 | CHECK_LE(range_query_ratio, 1.0);
18 | }
19 |
20 | void SyntheticNetwork::Start() {
21 | CHECK(!start_.HasBeenNotified());
22 |
23 | ingress_.Start();
24 | start_.Notify();
25 | }
26 |
27 | bool SyntheticNetwork::Poll(Request& request) {
28 | CHECK(start_.HasBeenNotified());
29 |
30 | const auto [arrived, arrival_time] = ingress_.HasNewArrival();
31 | if (!arrived) {
32 | return false;
33 | }
34 | // A request is in the ingress queue
35 | absl::Time received = ghost::MonotonicNow();
36 | bool get = absl::Bernoulli(gen_, 1.0 - range_query_ratio_);
37 | if (get) {
38 | // Get request
39 | request.work = Request::Get{
40 | .entry = absl::Uniform(gen_, 0, Database::kNumEntries)};
41 | } else {
42 | // Range query
43 | request.work = Request::Range{
44 | .start_entry = absl::Uniform(
45 | gen_, 0, Database::kNumEntries - kRangeQuerySize + 1),
46 | .size = kRangeQuerySize};
47 | }
48 | request.request_generated = arrival_time;
49 | request.request_received = received;
50 | return true;
51 | }
52 |
53 | } // namespace ghost_test
54 |
--------------------------------------------------------------------------------
/experiments/rocksdb/latency.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_ROCKSDB_LATENCY_H_
8 | #define GHOST_EXPERIMENTS_ROCKSDB_LATENCY_H_
9 |
10 | #include "absl/time/clock.h"
11 | #include "experiments/rocksdb/request.h"
12 |
13 | namespace ghost_test {
14 |
15 | namespace latency {
16 |
17 | struct PrintOptions {
18 | // If true, prints the results in human-readable form. Otherwise, prints the
19 | // results in CSV form.
20 | bool pretty;
21 | // If true, only prints the end-to-end results, rather than the results for
22 | // each stage.
23 | bool print_last;
24 | // If true, prints the entire distribution.
25 | bool distribution;
26 | // If true, prints the latencies in units of nanoseconds. If false, prints the
27 | // latencies in units of microseconds.
28 | bool ns;
29 | // The output stream to send the results to. We make 'os' a pointer rather
30 | // than a reference since a reference cannot be reassigned.
31 | std::ostream* os;
32 | };
33 |
34 | void Print(const std::vector& requests, absl::Duration runtime,
35 | PrintOptions options);
36 |
37 | // We put these in the header rather than in latency.cc since latency_test needs
38 | // these in order to generate the correct number of dashes for the pretty print
39 | // prefix.
40 | constexpr size_t kStageLen = 28;
41 | constexpr size_t kTotalRequestsLen = 18;
42 | constexpr size_t kThroughputLen = 22;
43 | constexpr size_t kResultLen = 12;
44 | // Add 8 to the end to account for the space between each column in the results.
45 | constexpr size_t kNumDashes =
46 | kStageLen + kTotalRequestsLen + kThroughputLen + (6 * kResultLen) + 8;
47 |
48 | } // namespace latency
49 |
50 | } // namespace ghost_test
51 |
52 | #endif // GHOST_EXPERIMENTS_ROCKSDB_LATENCY_H_
53 |
--------------------------------------------------------------------------------
/experiments/rocksdb/options_test.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "gmock/gmock.h"
8 | #include "gtest/gtest.h"
9 | #include "experiments/rocksdb/orchestrator.h"
10 |
11 | // These tests check that the application prints options and parses command line
12 | // flags properly.
13 |
14 | namespace ghost_test {
15 | namespace {
16 |
17 | using ::testing::Eq;
18 |
19 | // Returns orchestrator options suitable for the tests.
20 | Options GetOptions() {
21 | Options options;
22 |
23 | options.print_options.pretty = true;
24 | options.print_options.distribution = false;
25 | options.print_options.ns = false;
26 | options.print_options.os = &std::cout;
27 | options.print_get = true;
28 | options.print_range = false;
29 | options.rocksdb_db_path = "/tmp/orch_db";
30 | options.throughput = 20'000.0;
31 | options.range_query_ratio = 0.005;
32 | options.load_generator_cpus =
33 | ghost::MachineTopology()->ToCpuList(std::vector{1});
34 | options.cfs_dispatcher_cpus =
35 | ghost::MachineTopology()->ToCpuList(std::vector{2});
36 | options.num_workers = 2;
37 | options.cfs_wait_type = ThreadWait::WaitType::kSpin;
38 | options.worker_cpus =
39 | ghost::MachineTopology()->ToCpuList(std::vector{3, 4});
40 | options.ghost_wait_type = GhostWaitType::kFutex;
41 | options.get_duration = absl::Microseconds(10);
42 | options.range_duration = absl::Milliseconds(5);
43 | options.get_exponential_mean = absl::ZeroDuration();
44 | options.batch = 1;
45 | options.experiment_duration = absl::Seconds(15);
46 | options.discard_duration = absl::Seconds(2);
47 | options.scheduler = ghost::GhostThread::KernelScheduler::kCfs;
48 | options.ghost_qos = 2;
49 |
50 | return options;
51 | }
52 |
53 | // The '<<' operator for 'Options' should print all options and
54 | // their values in alphabetical order by option name.
55 | std::string GetExpectedOutput() {
56 | return R"(batch: 1
57 | cfs_dispatcher_cpus: 2
58 | cfs_wait_type: spin
59 | discard_duration: 2s
60 | experiment_duration: 15s
61 | get_duration: 10us
62 | get_exponential_mean: 0
63 | ghost_qos: 2
64 | ghost_wait_type: futex
65 | load_generator_cpus: 1
66 | num_workers: 2
67 | print_distribution: false
68 | print_format: pretty
69 | print_get: true
70 | print_ns: false
71 | print_range: false
72 | range_duration: 5ms
73 | range_query_ratio: 0.005000
74 | rocksdb_db_path: /tmp/orch_db
75 | scheduler: cfs
76 | throughput: 20000.000000
77 | worker_cpus: 3 4)";
78 | }
79 |
80 | // This tests that the '<<' operator prints all options and their values in
81 | // alphabetical order by option name.
82 | TEST(OptionsTest, PrintOptions) {
83 | Options options = GetOptions();
84 | std::ostringstream os;
85 |
86 | os << options;
87 | EXPECT_THAT(os.str(), Eq(GetExpectedOutput()));
88 | }
89 |
90 | } // namespace
91 | } // namespace ghost_test
92 |
--------------------------------------------------------------------------------
/experiments/rocksdb/request.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_ROCKSDB_REQUEST_H_
8 | #define GHOST_EXPERIMENTS_ROCKSDB_REQUEST_H_
9 |
10 | #include "absl/random/random.h"
11 | #include "absl/time/clock.h"
12 | #include "lib/base.h"
13 |
14 | namespace ghost_test {
15 |
16 | // A synthetic request for RocksDB generated by 'Ingress'.
17 | struct Request {
18 | struct Get {
19 | // The entry to access for the Get request.
20 | uint32_t entry;
21 | };
22 |
23 | struct Range {
24 | // The accessed range is [start_entry, start_entry + size).
25 |
26 | // The first entry in the range.
27 | uint32_t start_entry;
28 | // The range size.
29 | uint32_t size;
30 | };
31 |
32 | // Returns a sample duration from an exponential distribution with a mean
33 | // duration of 'mean'.
34 | // This is used to generate a request service time from an exponential
35 | // distribution (so the request service times follow a lightly-tailed
36 | // distribution).
37 | static absl::Duration GetExponentialHandleTime(absl::BitGen& gen,
38 | absl::Duration mean) {
39 | int64_t mean_ns = absl::ToInt64Nanoseconds(mean);
40 | // In the exponential distribution Exp('lambda'), the expected value (i.e.,
41 | // the mean) is equal to '1 / lambda'. Thus, we need to pass '1 / mean_ns'
42 | // as 'lambda' to the exponential distribution to have a mean sample value
43 | // of 'mean_ns'.
44 | double handle_ns = absl::Exponential(gen, 1.0 / mean_ns);
45 | return absl::Nanoseconds(handle_ns);
46 | }
47 |
48 | // Returns true if this is a Get request. Returns false otherwise (i.e., this
49 | // is a Range query).
50 | bool IsGet() const { return work.index() == 0; }
51 |
52 | // Returns true if this is a Range query. Returns false otherwise (i.e., this
53 | // is a Get request).
54 | bool IsRange() const { return work.index() == 1; }
55 |
56 | // Unique request identifier.
57 | uint64_t id;
58 |
59 | // When the request was generated.
60 | absl::Time request_generated;
61 | // When the request was picked up by the app.
62 | absl::Time request_received;
63 | // When the request was assigned to a worker.
64 | absl::Time request_assigned;
65 | // When the request started to be handled by a worker.
66 | absl::Time request_start;
67 | // When the worker finished handling the request.
68 | absl::Time request_finished;
69 |
70 | // The work to do. The request is either a Get request or a Range query.
71 | std::variant work;
72 | };
73 |
74 | } // namespace ghost_test
75 |
76 | #endif // GHOST_EXPERIMENTS_ROCKSDB_REQUEST_H_
77 |
--------------------------------------------------------------------------------
/experiments/scripts/BUILD:
--------------------------------------------------------------------------------
1 | # Note: If you modify this BUILD file, please contact jhumphri@ first to ensure
2 | # that you are not breaking the Copybara script.
3 |
4 | # Runs the RocksDB and Antagonist experiments on ghOSt and CFS (Linux Completely Fair Scheduler).
5 |
6 | package(default_applicable_licenses = ["//:license"])
7 |
8 | licenses(["notice"])
9 |
10 | load("@subpar//:subpar.bzl", "par_binary")
11 | load("@my_deps//:requirements.bzl", "requirement")
12 |
13 | # This library supports all experiments.
14 | py_library(
15 | name = "experiments",
16 | srcs = [
17 | "options.py",
18 | "run.py",
19 | "setup.py",
20 | ],
21 | data = [
22 | "//:agent_shinjuku",
23 | "//:antagonist",
24 | "//:rocksdb",
25 | ],
26 | )
27 |
28 | # Runs the centralized queuing experiments.
29 | par_binary(
30 | name = "centralized_queuing",
31 | srcs = [
32 | "centralized_queuing.py",
33 | ],
34 | python_version = "PY3",
35 | deps = [
36 | ":experiments",
37 | requirement("absl-py"),
38 | ],
39 | )
40 |
41 | # Runs the Shinjuku experiments.
42 | par_binary(
43 | name = "shinjuku",
44 | srcs = [
45 | "shinjuku.py",
46 | ],
47 | python_version = "PY3",
48 | deps = [
49 | ":experiments",
50 | requirement("absl-py"),
51 | ],
52 | )
53 |
54 | # Runs the Shenango experiments.
55 | par_binary(
56 | name = "shenango",
57 | srcs = [
58 | "shenango.py",
59 | ],
60 | python_version = "PY3",
61 | deps = [
62 | ":experiments",
63 | requirement("absl-py"),
64 | ],
65 | )
66 |
67 | # Runs the Shinjuku+Shenango experiments.
68 | par_binary(
69 | name = "shinjuku_shenango",
70 | srcs = [
71 | "shinjuku_shenango.py",
72 | ],
73 | python_version = "PY3",
74 | deps = [
75 | ":experiments",
76 | requirement("absl-py"),
77 | ],
78 | )
79 |
--------------------------------------------------------------------------------
/experiments/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/ghost-userspace/9ca0a1fb6ed88f0c4b0b40a5a35502938efa567f/experiments/scripts/__init__.py
--------------------------------------------------------------------------------
/experiments/scripts/centralized_queuing.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 | """Runs the RocksDB centralized-queuing experiments.
7 |
8 | This script runs the centralized-queuing RocksDB experiments on ghOSt and on
9 | CFS. In these experiments, there is a centralized queue maintained for RocksDB
10 | requests and the requests are not reordered or preempted. This script should be
11 | run on a machine with an Intel Xeon Platinum 8173M as that is what we used in
12 | the paper. If another CPU is used, the throughput ranges below should be
13 | adjusted.
14 | """
15 |
16 | from typing import Sequence
17 | from absl import app
18 | from experiments.scripts.options import CheckSchedulers
19 | from experiments.scripts.options import GetGhostOptions
20 | from experiments.scripts.options import GetRocksDBOptions
21 | from experiments.scripts.options import Scheduler
22 | from experiments.scripts.run import Experiment
23 | from experiments.scripts.run import Run
24 |
25 | _NUM_CPUS = 8
26 | _NUM_CFS_WORKERS = _NUM_CPUS - 2
27 | _NUM_GHOST_WORKERS = 11
28 |
29 |
30 | def RunCfs():
31 | """Runs the CFS (Linux Completely Fair Scheduler) experiment."""
32 | e: Experiment = Experiment()
33 | # Run throughputs 10000, 20000, 30000, ... 440000.
34 | e.throughputs = list(i for i in range(10000, 441000, 10000))
35 | # Toward the end, run throughputs 450000, 451000, 452000, ..., 480000.
36 | e.throughputs.extend(list(i for i in range(450000, 481000, 1000)))
37 | e.rocksdb = GetRocksDBOptions(Scheduler.CFS, _NUM_CPUS, _NUM_CFS_WORKERS)
38 | e.rocksdb.get_exponential_mean = '1us'
39 | e.antagonist = None
40 | e.ghost = None
41 |
42 | Run(e)
43 |
44 |
45 | def RunGhost():
46 | """Runs the ghOSt experiment."""
47 | e: Experiment = Experiment()
48 | # Run throughputs 10000, 20000, 30000, ..., 420000.
49 | e.throughputs = list(i for i in range(10000, 421000, 10000))
50 | # Toward the end, run throughputs 430000, 431000, 432000, ..., 460000.
51 | e.throughputs.extend(list(i for i in range(430000, 461000, 1000)))
52 | e.rocksdb = GetRocksDBOptions(Scheduler.GHOST, _NUM_CPUS, _NUM_GHOST_WORKERS)
53 | e.rocksdb.get_exponential_mean = '1us'
54 | e.antagonist = None
55 | e.ghost = GetGhostOptions(_NUM_CPUS)
56 | # There is no time-based preemption for centralized queuing, so set the
57 | # preemption time slice to infinity.
58 | e.ghost.preemption_time_slice = 'inf'
59 |
60 | Run(e)
61 |
62 |
63 | def main(argv: Sequence[str]):
64 | if len(argv) > 3:
65 | raise app.UsageError('Too many command-line arguments.')
66 | elif len(argv) == 1:
67 | raise app.UsageError(
68 | 'No experiment specified. Pass `cfs` and/or `ghost` as arguments.')
69 |
70 | # First check that all of the command line arguments are valid.
71 | if not CheckSchedulers(argv[1:]):
72 | raise ValueError('Invalid scheduler specified.')
73 |
74 | # Run the experiments.
75 | for i in range(1, len(argv)):
76 | scheduler = Scheduler(argv[i])
77 | if scheduler == Scheduler.CFS:
78 | RunCfs()
79 | else:
80 | if scheduler != Scheduler.GHOST:
81 | raise ValueError(f'Unknown scheduler {scheduler}.')
82 | RunGhost()
83 |
84 |
85 | if __name__ == '__main__':
86 | app.run(main)
87 |
--------------------------------------------------------------------------------
/experiments/scripts/shenango.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 | """Runs the RocksDB Shenango experiments.
7 |
8 | This script runs the RocksDB Shenango experiments on ghOSt and on CFS. In these
9 | experiments, RocksDB is co-located with an Antagonist. Specifically, the
10 | dispatcher and worker threads are co-located with the Antagonist threads while
11 | the load generator is isolated on its own CPU (to ensure that the load we think
12 | we are generating is the load we are actually generating). For ghOSt, the
13 | Antagonist threads are preempted to allow RocksDB threads to run. For CFS, this
14 | preemption is left to CFS to figure out. Furthermore, for the CFS experiments,
15 | the worker threads sleep on a futex when they do not have work rather than spin
16 | so that CFS gives the Antagonist threads a chance to run.
17 | """
18 |
19 | from typing import Sequence
20 | from absl import app
21 | from experiments.scripts.options import CfsWaitType
22 | from experiments.scripts.options import CheckSchedulers
23 | from experiments.scripts.options import GetAntagonistOptions
24 | from experiments.scripts.options import GetGhostOptions
25 | from experiments.scripts.options import GetRocksDBOptions
26 | from experiments.scripts.options import Scheduler
27 | from experiments.scripts.run import Experiment
28 | from experiments.scripts.run import Run
29 |
30 | _NUM_CPUS = 8
31 | _NUM_CFS_WORKERS = _NUM_CPUS - 2
32 | _NUM_GHOST_WORKERS = 11
33 | # Subtract 1 for the Antagonist since the Antagonist does not run a thread on
34 | # the same CPU as the load generator.
35 | _NUM_ANTAGONIST_CPUS = _NUM_CPUS - 1
36 |
37 |
38 | def RunCfs():
39 | """Runs the CFS (Linux Completely Fair Scheduler) experiment."""
40 | e: Experiment = Experiment()
41 | # Run throughputs 10000, 20000, 30000, ... 60000.
42 | e.throughputs = list(i for i in range(10000, 600000, 10000))
43 | # Toward the end, run throughputs 70000, 71000, 72000, ..., 120000.
44 | e.throughputs.extend(list(i for i in range(70000, 121000, 1000)))
45 | e.rocksdb = GetRocksDBOptions(Scheduler.CFS, _NUM_CPUS, _NUM_CFS_WORKERS)
46 | e.rocksdb.cfs_wait_type = CfsWaitType.FUTEX
47 | e.rocksdb.get_exponential_mean = '1us'
48 | e.antagonist = GetAntagonistOptions(Scheduler.CFS, _NUM_ANTAGONIST_CPUS)
49 | e.ghost = None
50 |
51 | Run(e)
52 |
53 |
54 | def RunGhost():
55 | """Runs the ghOSt experiment."""
56 | e: Experiment = Experiment()
57 | # Run throughputs 10000, 20000, 30000, ..., 380000.
58 | e.throughputs = list(i for i in range(10000, 381000, 10000))
59 | # Toward the end, run throughputs 390000, 391000, 392000, ..., 450000.
60 | e.throughputs.extend(list(i for i in range(390000, 451000, 1000)))
61 | e.rocksdb = GetRocksDBOptions(Scheduler.GHOST, _NUM_CPUS, _NUM_GHOST_WORKERS)
62 | e.rocksdb.get_exponential_mean = '1us'
63 | e.rocksdb.ghost_qos = 2
64 | e.antagonist = GetAntagonistOptions(Scheduler.GHOST, _NUM_ANTAGONIST_CPUS)
65 | e.antagonist.ghost_qos = 1
66 | e.ghost = GetGhostOptions(_NUM_CPUS)
67 | # There is no time-based preemption for Shenango, so set the preemption time
68 | # slice to infinity.
69 | e.ghost.preemption_time_slice = 'inf'
70 |
71 | Run(e)
72 |
73 |
74 | def main(argv: Sequence[str]):
75 | if len(argv) > 3:
76 | raise app.UsageError('Too many command-line arguments.')
77 | elif len(argv) == 1:
78 | raise app.UsageError(
79 | 'No experiment specified. Pass `cfs` and/or `ghost` as arguments.')
80 |
81 | # First check that all of the command line arguments are valid.
82 | if not CheckSchedulers(argv[1:]):
83 | raise ValueError('Invalid scheduler specified.')
84 |
85 | # Run the experiments.
86 | for i in range(1, len(argv)):
87 | scheduler = Scheduler(argv[i])
88 | if scheduler == Scheduler.CFS:
89 | RunCfs()
90 | else:
91 | if scheduler != Scheduler.GHOST:
92 | raise ValueError(f'Unknown scheduler {scheduler}.')
93 | RunGhost()
94 |
95 |
96 | if __name__ == '__main__':
97 | app.run(main)
98 |
--------------------------------------------------------------------------------
/experiments/scripts/shinjuku.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 | """Runs the RocksDB Shinjuku experiments.
7 |
8 | This script runs the RocksDB Shinjuku experiments on ghOSt and on CFS. In these
9 | experiments, there is a centralized FIFO queue maintained for RocksDB requests.
10 | For ghOSt, long requests that exceed their time slice are preempted so that they
11 | do not prevent short requests from running (i.e., ghOSt prevents head-of-line
12 | blocking). The preempted requests are added to the back of the FIFO. For CFS,
13 | requests are run to completion.
14 | """
15 |
16 | from typing import Sequence
17 | from absl import app
18 | from experiments.scripts.options import CheckSchedulers
19 | from experiments.scripts.options import GetGhostOptions
20 | from experiments.scripts.options import GetRocksDBOptions
21 | from experiments.scripts.options import Scheduler
22 | from experiments.scripts.run import Experiment
23 | from experiments.scripts.run import Run
24 |
25 | _NUM_CPUS = 8
26 | _NUM_CFS_WORKERS = _NUM_CPUS - 2
27 | _NUM_GHOST_WORKERS = 200
28 |
29 |
30 | def RunCfs():
31 | """Runs the CFS (Linux Completely Fair Scheduler) experiment."""
32 | e: Experiment = Experiment()
33 | # Run throughputs 10000, 20000, 30000, and 40000.
34 | e.throughputs = list(i for i in range(10000, 50000, 10000))
35 | # Toward the end, run throughputs 50000, 51000, 52000, ..., 80000.
36 | e.throughputs.extend(list(i for i in range(50000, 81000, 1000)))
37 | e.rocksdb = GetRocksDBOptions(Scheduler.CFS, _NUM_CPUS, _NUM_CFS_WORKERS)
38 | e.rocksdb.range_query_ratio = 0.005
39 | e.antagonist = None
40 | e.ghost = None
41 |
42 | Run(e)
43 |
44 |
45 | def RunGhost():
46 | """Runs the ghOSt experiment."""
47 | e: Experiment = Experiment()
48 | # Run throughputs 1000, 20000, 30000, ..., 130000.
49 | e.throughputs = list(i for i in range(10000, 140000, 10000))
50 | # Toward the end, run throughputs 140000, 141000, 142000, ..., 150000.
51 | e.throughputs.extend(list(i for i in range(140000, 151000, 1000)))
52 | e.rocksdb = GetRocksDBOptions(Scheduler.GHOST, _NUM_CPUS, _NUM_GHOST_WORKERS)
53 | e.rocksdb.range_query_ratio = 0.005
54 | e.antagonist = None
55 | e.ghost = GetGhostOptions(_NUM_CPUS)
56 | e.ghost.preemption_time_slice = '30us'
57 |
58 | Run(e)
59 |
60 |
61 | def main(argv: Sequence[str]):
62 | if len(argv) > 3:
63 | raise app.UsageError('Too many command-line arguments.')
64 | elif len(argv) == 1:
65 | raise app.UsageError(
66 | 'No experiment specified. Pass `cfs` and/or `ghost` as arguments.')
67 |
68 | # First check that all of the command line arguments are valid.
69 | if not CheckSchedulers(argv[1:]):
70 | raise ValueError('Invalid scheduler specified.')
71 |
72 | # Run the experiments.
73 | for i in range(1, len(argv)):
74 | scheduler = Scheduler(argv[i])
75 | if scheduler == Scheduler.CFS:
76 | RunCfs()
77 | else:
78 | if scheduler != Scheduler.GHOST:
79 | raise ValueError(f'Unknown scheduler {scheduler}.')
80 | RunGhost()
81 |
82 |
83 | if __name__ == '__main__':
84 | app.run(main)
85 |
--------------------------------------------------------------------------------
/experiments/scripts/shinjuku_shenango.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 | """Runs the RocksDB Shenango experiments.
7 |
8 | This script runs the RocksDB Shinjuku+Shenango experiments on ghOSt and CFS. The
9 | experiments contain a mix of short and long requests (Shinjuku) and the RocksDB
10 | threads are co-located with Antagonist threads (Shenango).
11 | """
12 |
13 | from typing import Sequence
14 | from absl import app
15 | from experiments.scripts.options import CfsWaitType
16 | from experiments.scripts.options import CheckSchedulers
17 | from experiments.scripts.options import GetAntagonistOptions
18 | from experiments.scripts.options import GetGhostOptions
19 | from experiments.scripts.options import GetRocksDBOptions
20 | from experiments.scripts.options import Scheduler
21 | from experiments.scripts.run import Experiment
22 | from experiments.scripts.run import Run
23 |
24 | _NUM_CPUS = 8
25 | _NUM_CFS_WORKERS = _NUM_CPUS - 2
26 | _NUM_GHOST_WORKERS = 200
27 | # Subtract 1 for the Antagonist since the Antagonist does not run a thread on
28 | # the same CPU as the load generator.
29 | _NUM_ANTAGONIST_CPUS = _NUM_CPUS - 1
30 |
31 |
32 | def RunCfs():
33 | """Runs the CFS (Linux Completely Fair Scheduler) experiment."""
34 | e: Experiment = Experiment()
35 | # Run throughputs 10000, 20000, 30000, and 40000.
36 | e.throughputs = list(i for i in range(10000, 50000, 10000))
37 | # Toward the end, run throughputs 50000, 51000, 52000, ..., 80000.
38 | e.throughputs.extend(list(i for i in range(50000, 81000, 1000)))
39 | e.rocksdb = GetRocksDBOptions(Scheduler.CFS, _NUM_CPUS, _NUM_CFS_WORKERS)
40 | e.rocksdb.range_query_ratio = 0.005
41 | e.rocksdb.cfs_wait_type = CfsWaitType.FUTEX
42 | e.antagonist = GetAntagonistOptions(Scheduler.CFS, _NUM_ANTAGONIST_CPUS)
43 | e.ghost = None
44 |
45 | Run(e)
46 |
47 |
48 | def RunGhost():
49 | """Runs the ghOSt experiment."""
50 | e: Experiment = Experiment()
51 | # Run throughputs 1000, 20000, 30000, ..., 130000.
52 | e.throughputs = list(i for i in range(10000, 140000, 10000))
53 | # Toward the end, run throughputs 140000, 141000, 142000, ..., 150000.
54 | e.throughputs.extend(list(i for i in range(140000, 151000, 1000)))
55 | e.rocksdb = GetRocksDBOptions(Scheduler.GHOST, _NUM_CPUS, _NUM_GHOST_WORKERS)
56 | e.rocksdb.range_query_ratio = 0.005
57 | e.rocksdb.ghost_qos = 2
58 | e.antagonist = GetAntagonistOptions(Scheduler.GHOST, _NUM_ANTAGONIST_CPUS)
59 | e.antagonist.ghost_qos = 1
60 | e.ghost = GetGhostOptions(_NUM_CPUS)
61 | e.ghost.preemption_time_slice = '30us'
62 |
63 | Run(e)
64 |
65 |
66 | def main(argv: Sequence[str]):
67 | if len(argv) > 3:
68 | raise app.UsageError('Too many command-line arguments.')
69 | elif len(argv) == 1:
70 | raise app.UsageError(
71 | 'No experiment specified. Pass `cfs` and/or `ghost` as arguments.')
72 |
73 | # First check that all of the command line arguments are valid.
74 | if not CheckSchedulers(argv[1:]):
75 | raise ValueError('Invalid scheduler specified.')
76 |
77 | # Run the experiments.
78 | for i in range(1, len(argv)):
79 | scheduler = Scheduler(argv[i])
80 | if scheduler == Scheduler.CFS:
81 | RunCfs()
82 | else:
83 | if scheduler != Scheduler.GHOST:
84 | raise ValueError(f'Unknown scheduler {scheduler}.')
85 | RunGhost()
86 |
87 |
88 | if __name__ == '__main__':
89 | app.run(main)
90 |
--------------------------------------------------------------------------------
/experiments/shared/prio_table_helper.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/shared/prio_table_helper.h"
8 |
9 | #include "lib/base.h"
10 |
11 | namespace ghost_test {
12 |
13 | void PrioTableHelper::GetWorkClass(uint32_t wcid, ghost::work_class& wc) const {
14 | CheckWorkClassInRange(wcid);
15 |
16 | wc = *table_.work_class(wcid);
17 | }
18 |
19 | void PrioTableHelper::SetWorkClass(uint32_t wcid, const ghost::work_class& wc) {
20 | CHECK_EQ(wcid, wc.id);
21 | CheckWorkClassInRange(wcid);
22 |
23 | *table_.work_class(wcid) = wc;
24 | }
25 |
26 | void PrioTableHelper::CopySchedItem(ghost::sched_item& dst,
27 | const ghost::sched_item& src) const {
28 | dst.sid = src.sid;
29 | dst.wcid = src.wcid;
30 | dst.gpid = src.gpid;
31 | dst.flags = src.flags;
32 | dst.deadline = src.deadline;
33 | }
34 |
35 | void PrioTableHelper::GetSchedItem(uint32_t sid, ghost::sched_item& si) const {
36 | CheckSchedItemInRange(sid);
37 |
38 | CopySchedItem(si, *table_.sched_item(sid));
39 | }
40 |
41 | void PrioTableHelper::SetSchedItem(uint32_t sid, const ghost::sched_item& si) {
42 | CHECK_EQ(sid, si.sid);
43 | CheckSchedItemInRange(si.sid);
44 | CheckWorkClassInRange(si.wcid);
45 |
46 | ghost::sched_item* curr = table_.sched_item(sid);
47 | uint32_t begin = curr->seqcount.write_begin();
48 | CopySchedItem(*curr, si);
49 | curr->seqcount.write_end(begin);
50 | MarkUpdatedTableIndex(curr->sid);
51 | }
52 |
53 | PrioTableHelper::PrioTableHelper(uint32_t num_sched_items,
54 | uint32_t num_work_classes)
55 | : table_(num_sched_items, num_work_classes,
56 | ghost::PrioTable::StreamCapacity::kStreamCapacity83) {
57 | CHECK(num_sched_items == 0 || num_work_classes >= 1);
58 | }
59 |
60 | void PrioTableHelper::MarkRunnability(uint32_t sid, bool runnable) {
61 | CheckSchedItemInRange(sid);
62 |
63 | ghost::sched_item* si = table_.sched_item(sid);
64 | uint32_t begin = si->seqcount.write_begin();
65 | if (runnable) {
66 | si->flags |= SCHED_ITEM_RUNNABLE;
67 | } else {
68 | si->flags &= ~SCHED_ITEM_RUNNABLE;
69 | }
70 | si->seqcount.write_end(begin);
71 | MarkUpdatedTableIndex(si->sid);
72 | }
73 |
74 | void PrioTableHelper::MarkRunnable(uint32_t sid) {
75 | MarkRunnability(sid, /*runnable=*/true);
76 | }
77 |
78 | void PrioTableHelper::MarkIdle(uint32_t sid) {
79 | MarkRunnability(sid, /*runnable=*/false);
80 | }
81 |
82 | void PrioTableHelper::WaitUntilRunnable(uint32_t sid) const {
83 | CheckSchedItemInRange(sid);
84 |
85 | ghost::sched_item* si = table_.sched_item(sid);
86 | std::atomic* flags =
87 | reinterpret_cast*>(&si->flags);
88 | while ((flags->load(std::memory_order_acquire) & SCHED_ITEM_RUNNABLE) == 0) {
89 | ghost::Pause();
90 | }
91 | }
92 |
93 | } // namespace ghost_test
94 |
--------------------------------------------------------------------------------
/experiments/shared/thread_pool.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/shared/thread_pool.h"
8 |
9 | namespace ghost_test {
10 |
11 | ExperimentThreadPool::~ExperimentThreadPool() {
12 | // Check that all threads have been joined.
13 | CHECK(absl::c_all_of(threads_,
14 | [](const std::unique_ptr& thread) {
15 | return !thread->Joinable();
16 | }));
17 | }
18 |
19 | void ExperimentThreadPool::Init(
20 | const std::vector& ksched,
21 | const std::vector>& thread_work) {
22 | CHECK_EQ(ksched.size(), num_threads_);
23 | CHECK_EQ(ksched.size(), thread_work.size());
24 |
25 | threads_.reserve(num_threads_);
26 | for (uint32_t i = 0; i < num_threads_; i++) {
27 | threads_.push_back(std::make_unique(
28 | ksched[i],
29 | std::bind(&ExperimentThreadPool::ThreadMain, this, i, thread_work[i])));
30 | }
31 | }
32 |
33 | void ExperimentThreadPool::MarkExit(uint32_t sid) {
34 | thread_triggers_.Trigger(sid);
35 | }
36 |
37 | void ExperimentThreadPool::ThreadMain(
38 | uint32_t i, std::function thread_work) {
39 | while (!ShouldExit(i)) {
40 | thread_work(i);
41 | }
42 | num_exited_.fetch_add(1, std::memory_order_release);
43 | }
44 |
45 | void ExperimentThreadPool::Join() {
46 | // Check that all threads have already been notified to exit. If not, the call
47 | // to `Join` below will hang on one the threads because that thread will not
48 | // exit.
49 | for (uint32_t i = 0; i < num_threads_; i++) {
50 | CHECK(thread_triggers_.Triggered(/*sid=*/i));
51 | }
52 | for (std::unique_ptr& thread : threads_) {
53 | // Check that `thread` is joinable. `thread` will not be joinable if it has
54 | // already been joined.
55 | CHECK(thread->Joinable());
56 | thread->Join();
57 | }
58 | }
59 |
60 | } // namespace ghost_test
61 |
--------------------------------------------------------------------------------
/experiments/shared/thread_wait.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "experiments/shared/thread_wait.h"
8 |
9 | #include "lib/base.h"
10 |
11 | namespace ghost_test {
12 |
13 | ThreadWait::ThreadWait(uint32_t num_threads, WaitType wait_type)
14 | : num_threads_(num_threads), wait_type_(wait_type) {
15 | runnability_.reserve(num_threads);
16 | for (uint32_t i = 0; i < num_threads_; i++) {
17 | runnability_.push_back(std::make_unique>(0));
18 | }
19 | }
20 |
21 | void ThreadWait::MarkRunnable(uint32_t sid) {
22 | CHECK_LT(sid, num_threads_);
23 |
24 | runnability_[sid]->store(1, std::memory_order_release);
25 | if (wait_type_ == WaitType::kFutex) {
26 | ghost::Futex::Wake(runnability_[sid].get(), 1);
27 | }
28 | }
29 |
30 | void ThreadWait::MarkIdle(uint32_t sid) {
31 | CHECK_LT(sid, num_threads_);
32 |
33 | runnability_[sid]->store(0, std::memory_order_release);
34 | }
35 |
36 | void ThreadWait::WaitUntilRunnable(uint32_t sid) const {
37 | CHECK_LT(sid, num_threads_);
38 |
39 | const std::unique_ptr>& r = runnability_[sid];
40 | if (wait_type_ == WaitType::kSpin) {
41 | while (r->load(std::memory_order_acquire) == 0) {
42 | ghost::Pause();
43 | }
44 | } else {
45 | CHECK_EQ(wait_type_, WaitType::kFutex);
46 |
47 | ghost::Futex::Wait(r.get(), 0);
48 | }
49 | }
50 |
51 | } // namespace ghost_test
52 |
--------------------------------------------------------------------------------
/experiments/shared/thread_wait.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_EXPERIMENTS_SHARED_THREAD_WAIT_H_
8 | #define GHOST_EXPERIMENTS_SHARED_THREAD_WAIT_H_
9 |
10 | #include
11 |
12 | #include "lib/base.h"
13 |
14 | namespace ghost_test {
15 |
16 | // Support class for test apps that run experiments with threads that need to
17 | // wait. This class allows threads to be marked as idle/runnable and lets them
18 | // wait if they are idle until they are marked runnable again either by spinning
19 | // or sleeping on a futex.
20 | //
21 | // Example:
22 | // ThreadWait thread_wait_;
23 | // (Initialize with the number of threads you are using and the wait type.)
24 | // ...
25 | // Main Thread: thread_wait_.MarkIdle(/*sid=*/2);
26 | // ...
27 | // Thread 2: thread_wait_.WaitUntilRunnable(/*sid=*/2);
28 | // (Thread 2 now waits.)
29 | // ...
30 | // Thread 1: thread_wait_.MarkRunnable(/*sid=*/2);
31 | // (Thread 2 now returns from 'WaitUntilRunnable()' and does other work.)
32 | class ThreadWait {
33 | public:
34 | // When 'WaitUntilRunnable' is called, there are different ways to wait. Each
35 | // way affects performance differently.
36 | enum class WaitType {
37 | // Wait by spinning. Threads will return from 'WaitUntilRunnable' more
38 | // quickly when marked runnable but will burn up their CPU while waiting.
39 | kSpin,
40 | // Wait by sleeping on a futex. Threads will not burn up their CPU while
41 | // waiting but will return from 'WaitUntilRunnable' more slowly when marked
42 | // runnable.
43 | kFutex,
44 | };
45 |
46 | ThreadWait(uint32_t num_threads, WaitType wait_type);
47 |
48 | // Marks 'sid' as runnable.
49 | void MarkRunnable(uint32_t sid);
50 | // Marks 'sid' as idle.
51 | void MarkIdle(uint32_t sid);
52 | // Waits until 'sid' is runnable.
53 | void WaitUntilRunnable(uint32_t sid) const;
54 |
55 | private:
56 | const uint32_t num_threads_;
57 | const WaitType wait_type_;
58 | std::vector>> runnability_;
59 | };
60 |
61 | inline std::ostream& operator<<(std::ostream& os,
62 | ThreadWait::WaitType wait_type) {
63 | switch (wait_type) {
64 | case ThreadWait::WaitType::kSpin:
65 | return os << "Spin";
66 | case ThreadWait::WaitType::kFutex:
67 | return os << "Futex";
68 | }
69 | }
70 |
71 | } // namespace ghost_test
72 |
73 | #endif // GHOST_EXPERIMENTS_SHARED_THREAD_WAIT_H_
74 |
--------------------------------------------------------------------------------
/lib/agent.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "agent.h"
8 |
9 | #include
10 |
11 | #include "lib/scheduler.h"
12 |
13 | namespace ghost {
14 |
15 | Agent::~Agent() {
16 | enclave_->DetachAgent(this);
17 | CHECK(!thread_.joinable());
18 | }
19 |
20 | void Agent::StartBegin() { thread_ = std::thread(&Agent::ThreadBody, this); }
21 |
22 | void Agent::StartComplete() { ready_.WaitForNotification(); }
23 |
24 | void LocalAgent::ThreadBody() {
25 | int queue_fd;
26 | Scheduler* s = AgentScheduler();
27 | if (!s) {
28 | // Some tests don't have a scheduler. Those that don't need to set a
29 | // default channel before starting the agents, which the kernel will use.
30 | // If they did not set a default, then SchedAgentEnterGhost will fail.
31 | // TODO Once we move queues to ghostfs, we might be able to CHECK that
32 | // there is a default for the enclave.
33 | queue_fd = -1;
34 | } else {
35 | queue_fd = s->GetAgentChannel(cpu_).GetFd();
36 | }
37 |
38 | CHECK_EQ(prctl(PR_SET_NAME, absl::StrCat("ap_task_", cpu().id()).c_str()), 0);
39 |
40 | gtid_ = Gtid::Current();
41 | enclave_->WaitForOldAgent();
42 |
43 | // setsched may fail with EBUSY, which is when there is an old agent that has
44 | // not left the cpu yet. Spin until we can. The old agent has priority; the
45 | // kernel will preempt us when it is runnable, since we are still in CFS. We
46 | // know that the old agent is gone or in the act of dying, because we called
47 | // WaitForOldAgent.
48 | int ret;
49 | do {
50 | ret = GhostHelper()->SchedAgentEnterGhost(enclave_->GetCtlFd(), cpu_,
51 | queue_fd);
52 | } while (ret && errno == EBUSY);
53 | CHECK_EQ(ret, 0);
54 |
55 | status_word_ = LocalStatusWord(StatusWord::AgentSW{});
56 | CHECK(!status_word_.empty());
57 |
58 | enclave_->AttachAgent(cpu_, this);
59 |
60 | AgentThread();
61 | WaitForExitNotification();
62 | }
63 |
64 | bool Agent::Ping() {
65 | RunRequest* req = enclave()->GetRunRequest(cpu_);
66 | return req->Ping();
67 | }
68 |
69 | void Agent::TerminateBegin() {
70 | finished_.Notify();
71 |
72 | // Ensure that we return control to agent to observe finished.
73 | Ping();
74 |
75 | do_exit_.Notify();
76 | }
77 |
78 | void Agent::TerminateComplete() {
79 | thread_.join();
80 |
81 | // pthread_join() can return before the dying task has released all
82 | // of its resources (CLONE_CHILD_CLEARTID based synchronization via
83 | // do_exit()->exit_mm()->mm_release() happens much earlier than the
84 | // 'sched_class.task_dead' callback).
85 | //
86 | // Since agent state transitions don't produce task messages we use
87 | // the GHOST_SW_F_CANFREE bit to check whether the kernel has invoked
88 | // the 'task_dead' callback.
89 | while (!status_word().can_free()) {
90 | absl::SleepFor(absl::Milliseconds(1));
91 | }
92 | }
93 |
94 | // static
95 | const bool Agent::kVersionCheck = Ghost::CheckVersion();
96 |
97 | } // namespace ghost
98 |
--------------------------------------------------------------------------------
/lib/arr_structs.bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * Use of this source code is governed by a BSD-style
5 | * license that can be found in the LICENSE file or at
6 | * https://developers.google.com/open-source/licenses/bsd
7 | *
8 | * Helpers for building structures like linked lists where the elements are
9 | * indexes in an array instead of pointers.
10 | */
11 |
12 | #ifndef GHOST_LIB_ARR_STRUCTS_BPF_H_
13 | #define GHOST_LIB_ARR_STRUCTS_BPF_H_
14 |
15 | #ifdef __BPF__
16 | #include "third_party/bpf/common.bpf.h"
17 | #else
18 | #define BOUNDED_ARRAY_IDX(arr, arr_sz, idx) &(arr)[(idx)]
19 | #endif
20 |
21 | /* For older gcc, typeof may be undefined. */
22 | #ifndef typeof
23 | #define typeof(x) __typeof__(x)
24 | #endif
25 |
26 | /* Helper to prevent the compiler from optimizing bounds check on x. */
27 | #ifndef BPF_MUST_CHECK
28 | #define BPF_MUST_CHECK(x) ({ asm volatile ("" : "+r"(x)); x; })
29 | #endif
30 |
31 | /*
32 | * Lookup the elem for an id. Returns a pointer to the elem or NULL.
33 | */
34 | #define __id_to_elem(arr, arr_sz, id) ({ \
35 | size_t ___id = id; \
36 | ___id ? BOUNDED_ARRAY_IDX(arr, arr_sz, ___id - 1) : NULL; \
37 | })
38 |
39 | /*
40 | * Lookup the id for an elem. elem must be in arr.
41 | *
42 | * The manual pointer arithmetic avoids signed division, which is not allowed in
43 | * BPF. (The difference of pointers is signed).
44 | */
45 | #define __elem_to_id(arr, elem) \
46 | (((size_t)((unsigned char*)(elem) - (unsigned char*)(arr)) \
47 | / sizeof(*elem)) + 1)
48 |
49 | #ifndef offsetof
50 | #define offsetof(type, member) ((size_t) (&((type*)0)->member))
51 | #endif
52 |
53 | #ifndef container_of
54 | #define container_of(ptr, type, member) ({ \
55 | (type*)((char*)ptr - offsetof(type, member)); \
56 | })
57 | #endif
58 |
59 |
60 | #endif // GHOST_LIB_ARR_STRUCTS_BPF_H_
61 |
--------------------------------------------------------------------------------
/lib/flux.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 | //
7 | // Userspace helpers for schedulers using the flux infrastructure
8 |
9 |
10 | #ifndef GHOST_LIB_FLUX_H_
11 | #define GHOST_LIB_FLUX_H_
12 |
13 | #include "bpf/user/agent.h"
14 |
15 | #define FluxSetProgTypes(bpf_obj) ({ \
16 | bpf_program__set_types(bpf_obj->progs.flux_pnt, \
17 | BPF_PROG_TYPE_GHOST_SCHED, BPF_GHOST_SCHED_PNT); \
18 | bpf_program__set_types(bpf_obj->progs.flux_msg_send, \
19 | BPF_PROG_TYPE_GHOST_MSG, BPF_GHOST_MSG_SEND); \
20 | bpf_program__set_types(bpf_obj->progs.flux_select_rq, \
21 | BPF_PROG_TYPE_GHOST_SELECT_RQ, BPF_GHOST_SELECT_RQ); \
22 | })
23 |
24 | #define FluxRegisterProgs(bpf_obj) ({ \
25 | CHECK_EQ(agent_bpf_register(bpf_obj->progs.flux_pnt, BPF_GHOST_SCHED_PNT), \
26 | 0); \
27 | CHECK_EQ(agent_bpf_register(bpf_obj->progs.flux_msg_send, \
28 | BPF_GHOST_MSG_SEND), 0); \
29 | CHECK_EQ(agent_bpf_register(bpf_obj->progs.flux_select_rq, \
30 | BPF_GHOST_SELECT_RQ), 0); \
31 | })
32 |
33 | #define FluxSetGlobals(bpf_obj) ({ \
34 | bpf_obj->rodata->enable_bpf_printd = CapHas(CAP_PERFMON); \
35 | bpf_obj->rodata->ghost_gtid_seqnum_bits = ghost_tid_seqnum_bits(); \
36 | })
37 |
38 | #define FluxCheckMaps(bpf_obj) ({ \
39 | CHECK_EQ(bpf_map__value_size(bpf_obj->maps.cpu_data), \
40 | FLUX_MAX_CPUS * sizeof(struct flux_cpu)); \
41 | CHECK_EQ(bpf_map__value_size(bpf_obj->maps.thread_data), \
42 | FLUX_MAX_GTIDS * sizeof(struct flux_thread)); \
43 | })
44 |
45 | #endif // GHOST_LIB_FLUX_H_
46 |
--------------------------------------------------------------------------------
/lib/ghost_uapi.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "lib/ghost_uapi.h"
8 |
9 | // The global symbol in combination with alwayslink=1 ensures that only
10 | // a single instance of the ghost_uapi library is depended on by a cc_binary
11 | // target.
12 | //
13 | // Without this we could have some intermediate libraries or the binary
14 | // itself compile against one ABI while other libraries are compiled
15 | // against a different ABI.
16 | //
17 | // Now if a binary inadvertently takes a dependency on 'ghost_uapi'
18 | // _and_ 'ghost_uapi_75' then the linker will complain as follows:
19 | // ld: error: duplicate symbol: did_you_take_an_unintended_ghost_uapi_dependency
20 |
21 | int did_you_take_an_unintended_ghost_uapi_dependency = GHOST_VERSION;
22 |
--------------------------------------------------------------------------------
/lib/ghost_uapi.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_LIB_GHOST_UAPI_H_
8 | #define GHOST_LIB_GHOST_UAPI_H_
9 |
10 | #ifndef GHOST_SELECT_ABI
11 | #include "abi/latest/kernel/ghost.h"
12 | #elif GHOST_SELECT_ABI == 84
13 | #include "abi/84/kernel/ghost.h"
14 | #elif GHOST_SELECT_ABI == 90
15 | #include "abi/90/kernel/ghost.h"
16 | #else
17 | #error "missing an abi?"
18 | #endif
19 |
20 | #endif // GHOST_LIB_GHOST_UAPI_H_
21 |
--------------------------------------------------------------------------------
/lib/logging.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_LIB_LOGGING_H_
8 | #define GHOST_LIB_LOGGING_H_
9 |
10 | #include
11 |
12 | #include "absl/log/check.h"
13 | #include "absl/log/log.h"
14 | #include "absl/strings/str_format.h"
15 | #include "third_party/util/util.h"
16 |
17 | #ifndef GHOST_DEBUG
18 | #ifdef NDEBUG
19 | #define GHOST_DEBUG 0
20 | #else
21 | #define GHOST_DEBUG 1
22 | #endif // !NDEBUG
23 | #endif // !GHOST_DEBUG
24 |
25 | #ifndef VLOG
26 | #ifdef NDEBUG
27 | #define VLOG(level) LOG_IF(INFO, false)
28 | #else
29 | #define VLOG(level) LOG_IF(INFO, verbose() < level)
30 | #endif // !NDEBUG
31 | #endif // !VLOG
32 |
33 | // TODO: Consider deprecating GHOST_DPRINT once we migrate to VLOG.
34 | #define GHOST_DPRINT(level, target, fmt, ...) \
35 | do { \
36 | if (verbose() < level) break; \
37 | absl::FPrintF(target, fmt "\n", ##__VA_ARGS__); \
38 | } while (0)
39 |
40 | #define GHOST_ERROR(fmt, ...) \
41 | do { \
42 | LOG(FATAL) << "(" << ghost::GetTID() << ") " \
43 | << absl::StrFormat(fmt, ##__VA_ARGS__); \
44 | } while (0)
45 |
46 | #define GHOST_I_AM_HERE \
47 | do { \
48 | LOG(INFO) << "GHOST_I_AM_HERE: PID " << getpid() << " " \
49 | << ghost::Gtid::Current().describe() << " at " << __func__; \
50 | } while (0)
51 |
52 | #endif // GHOST_LIB_LOGGING_H_
53 |
--------------------------------------------------------------------------------
/lib/trivial_status.cc:
--------------------------------------------------------------------------------
1 | #include "lib/trivial_status.h"
2 | #include "absl/strings/str_format.h"
3 |
4 | namespace ghost {
5 |
6 | namespace {
7 |
8 | template
9 | void CopyString(std::array& dest, absl::string_view s) {
10 | static_assert(ArraySize > 0);
11 | const size_t chars_to_copy = std::min(ArraySize - 1, s.size());
12 | if (chars_to_copy < s.size()) {
13 | absl::FPrintF(stderr,
14 | "Source string too large to fit in TrivialStatus: %zu, vs "
15 | "max_size %zu\n",
16 | s.size(), chars_to_copy);
17 | }
18 | std::copy_n(s.begin(), chars_to_copy, dest.begin());
19 | dest[chars_to_copy] = '\0';
20 | }
21 |
22 | } // namespace
23 |
24 | TrivialStatus::TrivialStatus(const absl::Status& s) {
25 | code_ = s.code();
26 |
27 | CopyString(error_message_, s.message());
28 | }
29 |
30 | TrivialStatusOrString::TrivialStatusOrString(
31 | const absl::StatusOr& s)
32 | : status_(TrivialStatus(s.status())) {
33 | if (s.ok()) {
34 | string_length_ = s.value().size();
35 | CopyString(str_, s.value());
36 | }
37 | }
38 |
39 | absl::StatusOr TrivialStatusOrString::ToStatusOr() const
40 | {
41 | absl::Status s = status_.ToStatus();
42 | if (s.ok()) {
43 | return std::string(str_.data(), string_length_);
44 | }
45 | return s;
46 | }
47 |
48 | } // namespace ghost
49 |
--------------------------------------------------------------------------------
/lib/trivial_status.h:
--------------------------------------------------------------------------------
1 | #ifndef GHOST_LIB_TRIVIAL_STATUS_H_
2 | #define GHOST_LIB_TRIVIAL_STATUS_H_
3 |
4 | #include "absl/log/check.h"
5 | #include "absl/status/status.h"
6 | #include "absl/status/statusor.h"
7 |
8 | namespace ghost {
9 |
10 | // This is a trivially copyable version of absl::Status. This is useful
11 | // because it can be serialized across the shared memory AgentRpcBuffer.
12 | class TrivialStatus {
13 | public:
14 | explicit TrivialStatus() : TrivialStatus(absl::OkStatus()) {}
15 | explicit TrivialStatus(const absl::Status& s);
16 |
17 | // Returns the absl::Status version of this object.
18 | absl::Status ToStatus() const {
19 | return absl::Status(code_, std::string(error_message_.data()));
20 | }
21 |
22 | bool ok() const { return code_ == absl::StatusCode::kOk; }
23 |
24 | private:
25 | static constexpr size_t kMaxErrorMessageSize = 1000;
26 |
27 | absl::StatusCode code_;
28 |
29 | // Sized large enough to handle most error messages. Must fit in
30 | // AgentRpcBuffer BufferBytes.
31 | std::array error_message_;
32 | };
33 |
34 | // This is a trivially copyable version of absl::StatusOr. This is useful
35 | // because it can be serialized across the shared memory AgentRpcBuffer.
36 | template
37 | class TrivialStatusOr {
38 | public:
39 | explicit TrivialStatusOr() : status_(TrivialStatus(absl::OkStatus())) {}
40 |
41 | // Constructs a TrivialStatusOr from an error status.
42 | explicit TrivialStatusOr(const absl::Status& s) : status_(TrivialStatus(s)) {
43 | CHECK(!s.ok());
44 | }
45 |
46 | explicit TrivialStatusOr(const T& val)
47 | : status_(TrivialStatus(absl::OkStatus())) {
48 | value_ = val;
49 | }
50 |
51 | explicit TrivialStatusOr(const absl::StatusOr& s)
52 | : status_(TrivialStatus(s.status())) {
53 | if (s.ok()) {
54 | value_ = s.value();
55 | }
56 | }
57 |
58 | // Returns the absl::StatusOr version of this object.
59 | absl::StatusOr ToStatusOr() const {
60 | absl::Status s = status_.ToStatus();
61 | if (s.ok()) {
62 | return value_;
63 | }
64 | return s;
65 | }
66 |
67 | bool ok() const { return status_.ok(); }
68 |
69 | private:
70 | TrivialStatus status_;
71 |
72 | // If the status is OK, this stores the contained value.
73 | T value_;
74 | };
75 |
76 | // This is a trivially copyable version of absl::StatusOr. This is
77 | // useful because it can be serialized across the shared memory AgentRpcBuffer.
78 | class TrivialStatusOrString {
79 | public:
80 | explicit TrivialStatusOrString()
81 | : status_(TrivialStatus(absl::OkStatus())) {}
82 |
83 | explicit TrivialStatusOrString(const absl::StatusOr& s);
84 |
85 | // Returns the absl::StatusOr version of this object.
86 | absl::StatusOr ToStatusOr() const;
87 |
88 | bool ok() const { return status_.ok(); }
89 |
90 | private:
91 | static constexpr size_t kMaxStringSize = 30000;
92 |
93 | TrivialStatus status_;
94 |
95 | // If the status is OK, this stores the contained string.
96 | // Must fit in AgentRpcBuffer BufferBytes.
97 | std::array str_;
98 |
99 | // Not all strings will use null terminators, so we must track the original
100 | // size of the std::string.
101 | size_t string_length_ = 0;
102 | };
103 |
104 | } // namespace ghost
105 |
106 | #endif // GHOST_LIB_TRIVIAL_STATUS_H_
107 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py
2 |
--------------------------------------------------------------------------------
/schedulers/biff/agent_biff.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 |
11 | #include "absl/debugging/symbolize.h"
12 | #include "absl/flags/parse.h"
13 | #include "lib/agent.h"
14 | #include "lib/channel.h"
15 | #include "lib/enclave.h"
16 | #include "lib/topology.h"
17 | #include "schedulers/biff/biff_scheduler.h"
18 |
19 | ABSL_FLAG(std::string, enclave, "", "Connect to preexisting enclave directory");
20 |
21 | int main(int argc, char* argv[]) {
22 | absl::InitializeSymbolizer(argv[0]);
23 | absl::ParseCommandLine(argc, argv);
24 |
25 | ghost::Topology* t = ghost::MachineTopology();
26 | ghost::AgentConfig config(t, t->all_cpus());
27 | std::string enclave = absl::GetFlag(FLAGS_enclave);
28 | if (!enclave.empty()) {
29 | int fd = open(enclave.c_str(), O_PATH);
30 | CHECK_GE(fd, 0);
31 | config.enclave_fd_ = fd;
32 | }
33 |
34 | auto uap = new ghost::AgentProcess,
35 | ghost::AgentConfig>(config);
36 |
37 | ghost::GhostHelper()->InitCore();
38 |
39 | printf("Initialization complete, ghOSt active.\n");
40 | fflush(stdout);
41 |
42 | ghost::Notification exit;
43 | static bool first = true;
44 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
45 | if (first) {
46 | exit.Notify();
47 | first = false;
48 | return false;
49 | }
50 | return true;
51 | });
52 |
53 | exit.WaitForNotification();
54 |
55 | delete uap;
56 |
57 | printf("\nDone!\n");
58 | return 0;
59 | }
60 |
--------------------------------------------------------------------------------
/schedulers/biff/biff_scheduler.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "schedulers/biff/biff_scheduler.h"
8 |
9 | #include "absl/strings/str_format.h"
10 | #include "third_party/bpf/topology.bpf.h"
11 | #include "bpf/user/agent.h"
12 |
13 | namespace ghost {
14 |
15 | BiffScheduler::BiffScheduler(Enclave* enclave, CpuList cpulist,
16 | const AgentConfig& config)
17 | : Scheduler(enclave, std::move(cpulist)),
18 | unused_channel_(GHOST_MAX_QUEUE_ELEMS, /*node=*/0) {
19 |
20 | bpf_obj_ = biff_bpf__open();
21 | CHECK_NE(bpf_obj_, nullptr);
22 |
23 | bpf_program__set_types(bpf_obj_->progs.biff_pnt,
24 | BPF_PROG_TYPE_GHOST_SCHED, BPF_GHOST_SCHED_PNT);
25 | bpf_program__set_types(bpf_obj_->progs.biff_msg_send, BPF_PROG_TYPE_GHOST_MSG,
26 | BPF_GHOST_MSG_SEND);
27 | bpf_program__set_types(bpf_obj_->progs.biff_select_rq,
28 | BPF_PROG_TYPE_GHOST_SELECT_RQ, BPF_GHOST_SELECT_RQ);
29 |
30 | bpf_obj_->rodata->enable_bpf_printd = CapHas(CAP_PERFMON);
31 | SetBpfTopologyVars(bpf_obj_->rodata, MachineTopology());
32 |
33 | CHECK_EQ(biff_bpf__load(bpf_obj_), 0);
34 |
35 | CHECK_EQ(agent_bpf_register(bpf_obj_->progs.biff_pnt, BPF_GHOST_SCHED_PNT),
36 | 0);
37 | CHECK_EQ(agent_bpf_register(bpf_obj_->progs.biff_msg_send,
38 | BPF_GHOST_MSG_SEND), 0);
39 | CHECK_EQ(agent_bpf_register(bpf_obj_->progs.biff_select_rq,
40 | BPF_GHOST_SELECT_RQ), 0);
41 |
42 | bpf_cpu_data_ = static_cast(
43 | bpf_map__mmap(bpf_obj_->maps.cpu_data));
44 | CHECK_NE(bpf_cpu_data_, MAP_FAILED);
45 |
46 | bpf_sw_data_ = static_cast(
47 | bpf_map__mmap(bpf_obj_->maps.sw_data));
48 | CHECK_NE(bpf_sw_data_, MAP_FAILED);
49 | }
50 |
51 | BiffScheduler::~BiffScheduler() {
52 | bpf_map__munmap(bpf_obj_->maps.cpu_data, bpf_cpu_data_);
53 | bpf_map__munmap(bpf_obj_->maps.sw_data, bpf_sw_data_);
54 | biff_bpf__destroy(bpf_obj_);
55 | }
56 |
57 | void BiffScheduler::EnclaveReady() {
58 | enclave()->SetDeliverTicks(true);
59 | enclave()->SetDeliverCpuAvailability(true);
60 | WRITE_ONCE(bpf_obj_->bss->initialized, true);
61 | }
62 |
63 | void BiffScheduler::DiscoverTasks() {
64 | enclave()->DiscoverTasks();
65 | }
66 |
67 | void BiffAgentTask::AgentThread() {
68 | gtid().assign_name("Agent:" + std::to_string(cpu().id()));
69 |
70 | SignalReady();
71 | WaitForEnclaveReady();
72 |
73 | while (!Finished()) {
74 | RunRequest* req = enclave()->GetRunRequest(cpu());
75 | req->LocalYield(status_word().barrier(), /*flags=*/0);
76 | }
77 | }
78 |
79 | } // namespace ghost
80 |
--------------------------------------------------------------------------------
/schedulers/biff/biff_scheduler.h:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_SCHEDULERS_BIFF_BIFF_SCHEDULER_H_
8 | #define GHOST_SCHEDULERS_BIFF_BIFF_SCHEDULER_H_
9 |
10 | #include
11 | #include
12 |
13 | #include "third_party/bpf/biff_bpf.h"
14 | #include "lib/agent.h"
15 | #include "lib/scheduler.h"
16 | #include "schedulers/biff/biff_bpf.skel.h"
17 |
18 | namespace ghost {
19 |
20 | class BiffScheduler : public Scheduler {
21 | public:
22 | explicit BiffScheduler(Enclave* enclave, CpuList cpulist,
23 | const AgentConfig& config);
24 | ~BiffScheduler() final;
25 |
26 | void EnclaveReady() final;
27 | void DiscoverTasks() final;
28 | Channel& GetDefaultChannel() final { return unused_channel_; };
29 |
30 | private:
31 | LocalChannel unused_channel_;
32 | struct biff_bpf* bpf_obj_;
33 | struct biff_bpf_cpu_data* bpf_cpu_data_;
34 | struct biff_bpf_sw_data* bpf_sw_data_;
35 | };
36 |
37 | class BiffAgentTask : public LocalAgent {
38 | public:
39 | BiffAgentTask(Enclave* enclave, Cpu cpu, BiffScheduler* biff_sched)
40 | : LocalAgent(enclave, cpu), biff_sched_(biff_sched) {}
41 |
42 | void AgentThread() override;
43 | Scheduler* AgentScheduler() const override { return biff_sched_; }
44 |
45 | private:
46 | BiffScheduler* biff_sched_;
47 | };
48 |
49 | template
50 | class FullBiffAgent : public FullAgent {
51 | public:
52 | explicit FullBiffAgent(AgentConfig config)
53 | : FullAgent(config) {
54 | biff_sched_ = std::make_unique(
55 | &this->enclave_, *this->enclave_.cpus(), config);
56 | this->StartAgentTasks();
57 | this->enclave_.Ready();
58 | }
59 |
60 | ~FullBiffAgent() override {
61 | this->enclave_.SetDeliverCpuAvailability(false);
62 | this->TerminateAgentTasks();
63 | }
64 |
65 | std::unique_ptr MakeAgent(const Cpu& cpu) override {
66 | return std::make_unique(&this->enclave_, cpu,
67 | biff_sched_.get());
68 | }
69 |
70 | void RpcHandler(int64_t req, const AgentRpcArgs& args,
71 | AgentRpcResponse& response) override {
72 | switch (req) {
73 | default:
74 | response.response_code = -1;
75 | return;
76 | }
77 | }
78 |
79 | private:
80 | std::unique_ptr biff_sched_;
81 | };
82 |
83 | } // namespace ghost
84 |
85 | #endif // GHOST_SCHEDULERS_BIFF_BIFF_SCHEDULER_H_
86 |
--------------------------------------------------------------------------------
/schedulers/cfs/README.md:
--------------------------------------------------------------------------------
1 | # ghOSt CFS Agent
2 |
3 | CFS is the default scheduler in the Linux kernel. The CFS agent is a (currently
4 | incomplete) implementation of this scheduling policy as a ghost userspace
5 | agent. Currently it assigns new tasks in a round-robin fashion to CPUs. Each CPU
6 | has a runqueue; when ghost receives a message to schedule a ghost task on a cpu,
7 | it simply plucks the one with the lowest vruntime.
8 |
9 | To bring this agent to parity with CFS in the kernel, some items left to
10 | implement are:
11 |
12 | - load balancing
13 |
14 | - nice values
15 |
16 | - work stealing
17 |
18 | - group scheduling
19 |
20 | Once at feature parity, this agent can be used to deduce the "ghost" tax and
21 | be used to quickly iterate on parameter tuning.
22 |
--------------------------------------------------------------------------------
/schedulers/cfs/cfs_agent.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #include "absl/debugging/symbolize.h"
13 | #include "absl/flags/parse.h"
14 | #include "lib/agent.h"
15 | #include "lib/enclave.h"
16 | #include "schedulers/cfs/cfs_scheduler.h"
17 |
18 | ABSL_FLAG(std::string, ghost_cpus, "1-5", "cpulist");
19 | ABSL_FLAG(std::string, enclave, "", "Connect to preexisting enclave directory");
20 |
21 | // Scheduling tuneables
22 | ABSL_FLAG(
23 | absl::Duration, min_granularity, absl::Milliseconds(1),
24 | "The minimum time a task will run before being preempted by another task");
25 | ABSL_FLAG(absl::Duration, latency, absl::Milliseconds(10),
26 | "The target time period in which all tasks will run at least once");
27 |
28 | namespace ghost {
29 |
30 | static void ParseAgentConfig(CfsConfig* config) {
31 | CpuList ghost_cpus =
32 | MachineTopology()->ParseCpuStr(absl::GetFlag(FLAGS_ghost_cpus));
33 | CHECK(!ghost_cpus.Empty());
34 |
35 | Topology* topology = MachineTopology();
36 | config->topology_ = topology;
37 | config->cpus_ = ghost_cpus;
38 | std::string enclave = absl::GetFlag(FLAGS_enclave);
39 | if (!enclave.empty()) {
40 | int fd = open(enclave.c_str(), O_PATH);
41 | CHECK_GE(fd, 0);
42 | config->enclave_fd_ = fd;
43 | }
44 |
45 | config->min_granularity_ = absl::GetFlag(FLAGS_min_granularity);
46 | config->latency_ = absl::GetFlag(FLAGS_latency);
47 | }
48 |
49 | } // namespace ghost
50 |
51 | int main(int argc, char* argv[]) {
52 | absl::InitializeSymbolizer(argv[0]);
53 | absl::ParseCommandLine(argc, argv);
54 |
55 | ghost::CfsConfig config;
56 | ghost::ParseAgentConfig(&config);
57 |
58 | printf("Initializing...\n");
59 |
60 | // Using new so we can destruct the object before printing Done
61 | auto uap = new ghost::AgentProcess,
62 | ghost::CfsConfig>(config);
63 |
64 | ghost::GhostHelper()->InitCore();
65 | printf("Initialization complete, ghOSt active.\n");
66 | // When `stdout` is directed to a terminal, it is newline-buffered. When
67 | // `stdout` is directed to a non-interactive device (e.g, a Python subprocess
68 | // pipe), it is fully buffered. Thus, in order for the Python script to read
69 | // the initialization message as soon as it is passed to `printf`, we need to
70 | // manually flush `stdout`.
71 | fflush(stdout);
72 |
73 | ghost::Notification exit;
74 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
75 | static bool first = true; // We only modify the first SIGINT.
76 |
77 | if (first) {
78 | exit.Notify();
79 | first = false;
80 | return false; // We'll exit on subsequent SIGTERMs.
81 | }
82 | return true;
83 | });
84 |
85 | // TODO: this is racy - uap could be deleted already
86 | ghost::GhostSignals::AddHandler(SIGUSR1, [uap](int) {
87 | uap->Rpc(ghost::CfsScheduler::kDebugRunqueue);
88 | return false;
89 | });
90 |
91 | exit.WaitForNotification();
92 |
93 | delete uap;
94 |
95 | printf("\nDone!\n");
96 |
97 | return 0;
98 | }
99 |
--------------------------------------------------------------------------------
/schedulers/cfs_bpf/agent_cfs.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 |
11 | #include "absl/debugging/symbolize.h"
12 | #include "absl/flags/parse.h"
13 | #include "lib/agent.h"
14 | #include "lib/channel.h"
15 | #include "lib/enclave.h"
16 | #include "lib/topology.h"
17 | #include "schedulers/cfs_bpf/cfs_scheduler.h"
18 |
19 | ABSL_FLAG(std::string, enclave, "", "Connect to preexisting enclave directory");
20 |
21 | int main(int argc, char* argv[]) {
22 | absl::InitializeSymbolizer(argv[0]);
23 | absl::ParseCommandLine(argc, argv);
24 |
25 | ghost::Topology* t = ghost::MachineTopology();
26 | ghost::AgentConfig config(t, t->all_cpus());
27 | std::string enclave = absl::GetFlag(FLAGS_enclave);
28 | if (!enclave.empty()) {
29 | int fd = open(enclave.c_str(), O_PATH);
30 | CHECK_GE(fd, 0);
31 | config.enclave_fd_ = fd;
32 | }
33 |
34 |
35 | auto uap = new ghost::AgentProcess,
36 | ghost::AgentConfig>(config);
37 |
38 | ghost::GhostHelper()->InitCore();
39 |
40 | printf("Initialization complete, ghOSt active.\n");
41 | fflush(stdout);
42 |
43 | ghost::Notification exit;
44 | static bool first = true;
45 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
46 | if (first) {
47 | exit.Notify();
48 | first = false;
49 | return false;
50 | }
51 | return true;
52 | });
53 |
54 | exit.WaitForNotification();
55 |
56 | delete uap;
57 |
58 | printf("\nDone!\n");
59 | return 0;
60 | }
61 |
--------------------------------------------------------------------------------
/schedulers/cfs_bpf/cfs_scheduler.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "schedulers/cfs_bpf/cfs_scheduler.h"
8 |
9 | #include "absl/strings/str_format.h"
10 | #include "bpf/user/agent.h"
11 |
12 | namespace ghost {
13 |
14 | CfsScheduler::CfsScheduler(Enclave* enclave, CpuList cpulist,
15 | const AgentConfig& config)
16 | : Scheduler(enclave, std::move(cpulist)),
17 | unused_channel_(1, /*node=*/0) {
18 |
19 | bpf_obj_ = cfs_bpf__open();
20 | CHECK_NE(bpf_obj_, nullptr);
21 |
22 |
23 | bpf_program__set_types(bpf_obj_->progs.cfs_pnt,
24 | BPF_PROG_TYPE_GHOST_SCHED, BPF_GHOST_SCHED_PNT);
25 | bpf_program__set_types(bpf_obj_->progs.cfs_msg_send, BPF_PROG_TYPE_GHOST_MSG,
26 | BPF_GHOST_MSG_SEND);
27 |
28 | CHECK_EQ(cfs_bpf__load(bpf_obj_), 0);
29 |
30 | CHECK_EQ(agent_bpf_register(bpf_obj_->progs.cfs_pnt, BPF_GHOST_SCHED_PNT),
31 | 0);
32 | CHECK_EQ(agent_bpf_register(bpf_obj_->progs.cfs_msg_send,
33 | BPF_GHOST_MSG_SEND), 0);
34 |
35 | bpf_cpu_data_ = static_cast(
36 | bpf_map__mmap(bpf_obj_->maps.cpu_data));
37 | CHECK_NE(bpf_cpu_data_, MAP_FAILED);
38 |
39 | bpf_thread_data_ = static_cast(
40 | bpf_map__mmap(bpf_obj_->maps.thread_data));
41 | CHECK_NE(bpf_thread_data_, MAP_FAILED);
42 |
43 | enclave->SetDeliverCpuAvailability(false);
44 | }
45 |
46 | CfsScheduler::~CfsScheduler() {
47 | bpf_map__munmap(bpf_obj_->maps.cpu_data, bpf_cpu_data_);
48 | bpf_map__munmap(bpf_obj_->maps.thread_data, bpf_thread_data_);
49 | cfs_bpf__destroy(bpf_obj_);
50 | }
51 |
52 | void CfsScheduler::EnclaveReady() {
53 | enclave()->SetWakeOnWakerCpu(false);
54 | enclave()->SetDeliverTicks(true);
55 | WRITE_ONCE(bpf_obj_->bss->initialized, true);
56 | }
57 |
58 | void CfsScheduler::DiscoverTasks() {
59 | enclave()->DiscoverTasks();
60 | }
61 |
62 | void CfsAgentTask::AgentThread() {
63 | gtid().assign_name("Agent:" + std::to_string(cpu().id()));
64 |
65 | SignalReady();
66 | WaitForEnclaveReady();
67 |
68 | while (!Finished()) {
69 | RunRequest* req = enclave()->GetRunRequest(cpu());
70 | req->LocalYield(status_word().barrier(), /*flags=*/0);
71 | }
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/schedulers/cfs_bpf/cfs_scheduler.h:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_SCHEDULERS_CFS_BPF_BIFF_SCHEDULER_H_
8 | #define GHOST_SCHEDULERS_CFS_BPF_BIFF_SCHEDULER_H_
9 |
10 | #include
11 |
12 | #include "third_party/bpf/cfs_bpf.h"
13 | #include "lib/agent.h"
14 | #include "lib/scheduler.h"
15 | #include "schedulers/cfs_bpf/cfs_bpf.skel.h"
16 |
17 | namespace ghost {
18 |
19 | class CfsScheduler : public Scheduler {
20 | public:
21 | explicit CfsScheduler(Enclave* enclave, CpuList cpulist,
22 | const AgentConfig& config);
23 | ~CfsScheduler() final;
24 |
25 | void EnclaveReady() final;
26 | void DiscoverTasks() final;
27 | Channel& GetDefaultChannel() final { return unused_channel_; };
28 |
29 | private:
30 | LocalChannel unused_channel_;
31 | struct cfs_bpf* bpf_obj_;
32 | struct cfs_bpf_cpu_data* bpf_cpu_data_;
33 | struct cfs_bpf_thread* bpf_thread_data_;
34 | };
35 |
36 | class CfsAgentTask : public LocalAgent {
37 | public:
38 | CfsAgentTask(Enclave* enclave, Cpu cpu, CfsScheduler* cfs_sched)
39 | : LocalAgent(enclave, cpu), cfs_sched_(cfs_sched) {}
40 |
41 | void AgentThread() override;
42 | Scheduler* AgentScheduler() const override { return cfs_sched_; }
43 |
44 | private:
45 | CfsScheduler* cfs_sched_;
46 | };
47 |
48 | template
49 | class FullCfsAgent : public FullAgent {
50 | public:
51 | explicit FullCfsAgent(AgentConfig config)
52 | : FullAgent(config) {
53 | cfs_sched_ = absl::make_unique(
54 | &this->enclave_, *this->enclave_.cpus(), config);
55 | this->StartAgentTasks();
56 | this->enclave_.Ready();
57 | }
58 |
59 | ~FullCfsAgent() override {
60 | this->TerminateAgentTasks();
61 | }
62 |
63 | std::unique_ptr MakeAgent(const Cpu& cpu) override {
64 | return absl::make_unique(&this->enclave_, cpu,
65 | cfs_sched_.get());
66 | }
67 |
68 | void RpcHandler(int64_t req, const AgentRpcArgs& args,
69 | AgentRpcResponse& response) override {
70 | switch (req) {
71 | default:
72 | response.response_code = -1;
73 | return;
74 | }
75 | }
76 |
77 | private:
78 | std::unique_ptr cfs_sched_;
79 | };
80 |
81 | } // namespace ghost
82 |
83 | #endif // GHOST_SCHEDULERS_CFS_BPF_CFS_SCHEDULER_H_
84 |
--------------------------------------------------------------------------------
/schedulers/edf/agent_exp.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 |
10 | #include "absl/debugging/symbolize.h"
11 | #include "absl/flags/parse.h"
12 | #include "lib/agent.h"
13 | #include "lib/channel.h"
14 | #include "lib/enclave.h"
15 | #include "lib/topology.h"
16 | #include "schedulers/edf/edf_scheduler.h"
17 |
18 | ABSL_FLAG(std::string, ghost_cpus, "1-5", "cpulist");
19 | ABSL_FLAG(
20 | int32_t, globalcpu, -1,
21 | "Global cpu. If -1, then defaults to the lowest CPU in )");
22 | ABSL_FLAG(bool, ticks, false, "Generate cpu tick messages");
23 | ABSL_FLAG(std::string, enclave, "", "Connect to preexisting enclave directory");
24 |
25 | namespace ghost {
26 |
27 | void ParseGlobalConfig(GlobalConfig* config) {
28 | CpuList ghost_cpus =
29 | MachineTopology()->ParseCpuStr(absl::GetFlag(FLAGS_ghost_cpus));
30 | // One CPU for the spinning global agent and at least one other for running
31 | // scheduled ghOSt tasks.
32 | CHECK_GE(ghost_cpus.Size(), 2);
33 |
34 | int globalcpu = absl::GetFlag(FLAGS_globalcpu);
35 | if (globalcpu < 0) {
36 | CHECK_EQ(globalcpu, -1);
37 | globalcpu = ghost_cpus.Front().id();
38 | }
39 | CHECK(ghost_cpus.IsSet(globalcpu));
40 |
41 | Topology* topology = MachineTopology();
42 | config->topology_ = topology;
43 | config->cpus_ = ghost_cpus;
44 | config->global_cpu_ = topology->cpu(globalcpu);
45 | config->edf_ticks_ = absl::GetFlag(FLAGS_ticks) ? CpuTickConfig::kAllTicks
46 | : CpuTickConfig::kNoTicks;
47 |
48 | std::string enclave = absl::GetFlag(FLAGS_enclave);
49 | if (!enclave.empty()) {
50 | int fd = open(enclave.c_str(), O_PATH);
51 | CHECK_GE(fd, 0);
52 | config->enclave_fd_ = fd;
53 | }
54 | }
55 |
56 | } // namespace ghost
57 |
58 | int main(int argc, char* argv[]) {
59 | absl::InitializeSymbolizer(argv[0]);
60 | absl::ParseCommandLine(argc, argv);
61 |
62 | ghost::GlobalConfig config;
63 | ghost::ParseGlobalConfig(&config);
64 |
65 | printf("Core map\n");
66 |
67 | int n = 0;
68 | for (const ghost::Cpu& c : config.topology_->all_cores()) {
69 | printf("( ");
70 | for (const ghost::Cpu& s : c.siblings()) printf("%2d ", s.id());
71 | printf(")%c", ++n % 8 == 0 ? '\n' : '\t');
72 | }
73 | printf("\n");
74 |
75 | printf("Initializing...\n");
76 |
77 | // Using new so we can destruct the object before printing Done
78 | auto uap = new ghost::AgentProcess,
79 | ghost::GlobalConfig>(config);
80 |
81 | ghost::GhostHelper()->InitCore();
82 |
83 | printf("Initialization complete, ghOSt active.\n");
84 | // When `stdout` is directed to a terminal, it is newline-buffered. When
85 | // `stdout` is directed to a non-interactive device (e.g, a Python subprocess
86 | // pipe), it is fully buffered. Thus, in order for the Python script to read
87 | // the initialization message as soon as it is passed to `printf`, we need to
88 | // manually flush `stdout`.
89 | fflush(stdout);
90 |
91 | ghost::Notification exit;
92 | static bool first = true;
93 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
94 | if (first) {
95 | exit.Notify();
96 | first = false;
97 | return false; // We'll exit on subsequent signals.
98 | }
99 | return true;
100 | });
101 | ghost::GhostSignals::AddHandler(SIGTERM, [&exit](int) {
102 | if (first) {
103 | exit.Notify();
104 | first = false;
105 | return false; // We'll exit on subsequent signals.
106 | }
107 | return true;
108 | });
109 |
110 | // TODO: this is racy - uap could be deleted already
111 | ghost::GhostSignals::AddHandler(SIGUSR1, [uap](int) {
112 | uap->Rpc(ghost::EdfScheduler::kDebugRunqueue);
113 | return false;
114 | });
115 |
116 | exit.WaitForNotification();
117 |
118 | delete uap;
119 |
120 | printf("\nDone!\n");
121 | return 0;
122 | }
123 |
--------------------------------------------------------------------------------
/schedulers/fifo/centralized/fifo_agent.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | #include "absl/debugging/symbolize.h"
14 | #include "absl/flags/parse.h"
15 | #include "lib/agent.h"
16 | #include "lib/channel.h"
17 | #include "lib/enclave.h"
18 | #include "lib/topology.h"
19 | #include "schedulers/fifo/centralized/fifo_scheduler.h"
20 |
21 | ABSL_FLAG(std::string, ghost_cpus, "1-5", "cpulist");
22 | ABSL_FLAG(int32_t, globalcpu, -1,
23 | "Global cpu. If -1, then defaults to the first cpu in ");
24 | ABSL_FLAG(absl::Duration, preemption_time_slice, absl::InfiniteDuration(),
25 | "A task is preempted after running for this time slice (default = "
26 | "infinite time slice)");
27 |
28 | namespace ghost {
29 |
30 | void ParseFifoConfig(FifoConfig* config) {
31 | CpuList ghost_cpus =
32 | MachineTopology()->ParseCpuStr(absl::GetFlag(FLAGS_ghost_cpus));
33 | // One CPU for the spinning global agent and at least one other for running
34 | // scheduled ghOSt tasks.
35 | CHECK_GE(ghost_cpus.Size(), 2);
36 |
37 | int globalcpu = absl::GetFlag(FLAGS_globalcpu);
38 | if (globalcpu < 0) {
39 | CHECK_EQ(globalcpu, -1);
40 | globalcpu = ghost_cpus.Front().id();
41 | absl::SetFlag(&FLAGS_globalcpu, globalcpu);
42 | }
43 | CHECK(ghost_cpus.IsSet(globalcpu));
44 |
45 | Topology* topology = MachineTopology();
46 | config->topology_ = topology;
47 | config->cpus_ = ghost_cpus;
48 | config->global_cpu_ = topology->cpu(globalcpu);
49 | config->preemption_time_slice_ = absl::GetFlag(FLAGS_preemption_time_slice);
50 | }
51 |
52 | } // namespace ghost
53 |
54 | int main(int argc, char* argv[]) {
55 | absl::InitializeSymbolizer(argv[0]);
56 |
57 | absl::ParseCommandLine(argc, argv);
58 |
59 | ghost::FifoConfig config;
60 | ghost::ParseFifoConfig(&config);
61 |
62 | printf("Core map\n");
63 |
64 | int n = 0;
65 | for (const ghost::Cpu& c : config.topology_->all_cores()) {
66 | printf("( ");
67 | for (const ghost::Cpu& s : c.siblings()) printf("%2d ", s.id());
68 | printf(")%c", ++n % 8 == 0 ? '\n' : '\t');
69 | }
70 | printf("\n");
71 |
72 | printf("Initializing...\n");
73 |
74 | // Using new so we can destruct the object before printing Done
75 | auto uap = new ghost::AgentProcess,
76 | ghost::FifoConfig>(config);
77 |
78 | ghost::GhostHelper()->InitCore();
79 |
80 | printf("Initialization complete, ghOSt active.\n");
81 |
82 | // When `stdout` is directed to a terminal, it is newline-buffered. When
83 | // `stdout` is directed to a non-interactive device (e.g, a Python subprocess
84 | // pipe), it is fully buffered. Thus, in order for the Python script to read
85 | // the initialization message as soon as it is passed to `printf`, we need to
86 | // manually flush `stdout`.
87 | fflush(stdout);
88 |
89 | ghost::Notification exit;
90 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
91 | static bool first = true; // We only modify the first SIGINT.
92 |
93 | if (first) {
94 | exit.Notify();
95 | first = false;
96 | return false; // We'll exit on subsequent SIGTERMs.
97 | }
98 | return true;
99 | });
100 |
101 | // TODO: this is racy - uap could be deleted already
102 | ghost::GhostSignals::AddHandler(SIGUSR1, [uap](int) {
103 | uap->Rpc(ghost::FifoScheduler::kDebugRunqueue);
104 | return false;
105 | });
106 |
107 | exit.WaitForNotification();
108 |
109 | delete uap;
110 |
111 | printf("Done!\n");
112 | return 0;
113 | }
114 |
--------------------------------------------------------------------------------
/schedulers/fifo/per_cpu/fifo_agent.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 |
11 | #include "absl/debugging/symbolize.h"
12 | #include "absl/flags/parse.h"
13 | #include "lib/agent.h"
14 | #include "lib/enclave.h"
15 | #include "schedulers/fifo/per_cpu/fifo_scheduler.h"
16 |
17 | ABSL_FLAG(std::string, ghost_cpus, "1-5", "cpulist");
18 | ABSL_FLAG(std::string, enclave, "", "Connect to preexisting enclave directory");
19 |
20 | namespace ghost {
21 |
22 | static void ParseAgentConfig(AgentConfig* config) {
23 | CpuList ghost_cpus =
24 | MachineTopology()->ParseCpuStr(absl::GetFlag(FLAGS_ghost_cpus));
25 | CHECK(!ghost_cpus.Empty());
26 |
27 | Topology* topology = MachineTopology();
28 | config->topology_ = topology;
29 | config->cpus_ = ghost_cpus;
30 | std::string enclave = absl::GetFlag(FLAGS_enclave);
31 | if (!enclave.empty()) {
32 | int fd = open(enclave.c_str(), O_PATH);
33 | CHECK_GE(fd, 0);
34 | config->enclave_fd_ = fd;
35 | }
36 | }
37 |
38 | } // namespace ghost
39 |
40 | int main(int argc, char* argv[]) {
41 | absl::InitializeSymbolizer(argv[0]);
42 | absl::ParseCommandLine(argc, argv);
43 |
44 | ghost::AgentConfig config;
45 | ghost::ParseAgentConfig(&config);
46 |
47 | printf("Initializing...\n");
48 |
49 | // Using new so we can destruct the object before printing Done
50 | auto uap = new ghost::AgentProcess,
51 | ghost::AgentConfig>(config);
52 |
53 | ghost::GhostHelper()->InitCore();
54 | printf("Initialization complete, ghOSt active.\n");
55 | // When `stdout` is directed to a terminal, it is newline-buffered. When
56 | // `stdout` is directed to a non-interactive device (e.g, a Python subprocess
57 | // pipe), it is fully buffered. Thus, in order for the Python script to read
58 | // the initialization message as soon as it is passed to `printf`, we need to
59 | // manually flush `stdout`.
60 | fflush(stdout);
61 |
62 | ghost::Notification exit;
63 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
64 | static bool first = true; // We only modify the first SIGINT.
65 |
66 | if (first) {
67 | exit.Notify();
68 | first = false;
69 | return false; // We'll exit on subsequent SIGTERMs.
70 | }
71 | return true;
72 | });
73 |
74 | // TODO: this is racy - uap could be deleted already
75 | ghost::GhostSignals::AddHandler(SIGUSR1, [uap](int) {
76 | uap->Rpc(ghost::FifoScheduler::kDebugRunqueue);
77 | return false;
78 | });
79 |
80 | exit.WaitForNotification();
81 |
82 | delete uap;
83 |
84 | printf("\nDone!\n");
85 |
86 | return 0;
87 | }
88 |
--------------------------------------------------------------------------------
/schedulers/flux/agent_flux.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 |
11 | #include "absl/debugging/symbolize.h"
12 | #include "absl/flags/parse.h"
13 | #include "lib/agent.h"
14 | #include "lib/channel.h"
15 | #include "lib/enclave.h"
16 | #include "lib/topology.h"
17 | #include "schedulers/flux/flux_scheduler.h"
18 |
19 | ABSL_FLAG(std::string, enclave, "", "Connect to preexisting enclave directory");
20 |
21 | int main(int argc, char* argv[]) {
22 | absl::InitializeSymbolizer(argv[0]);
23 | absl::ParseCommandLine(argc, argv);
24 |
25 | ghost::Topology* t = ghost::MachineTopology();
26 | ghost::AgentConfig config(t, t->all_cpus());
27 | std::string enclave = absl::GetFlag(FLAGS_enclave);
28 | if (!enclave.empty()) {
29 | int fd = open(enclave.c_str(), O_PATH);
30 | CHECK_GE(fd, 0);
31 | config.enclave_fd_ = fd;
32 | }
33 |
34 | auto uap = new ghost::AgentProcess,
35 | ghost::AgentConfig>(config);
36 |
37 | ghost::GhostHelper()->InitCore();
38 |
39 | printf("Initialization complete, ghOSt active.\n");
40 | fflush(stdout);
41 |
42 | ghost::Notification exit;
43 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
44 | static bool first = true;
45 | if (first) {
46 | exit.Notify();
47 | first = false;
48 | return false;
49 | }
50 | return true;
51 | });
52 |
53 | exit.WaitForNotification();
54 |
55 | delete uap;
56 |
57 | printf("\nDone!\n");
58 | return 0;
59 | }
60 |
--------------------------------------------------------------------------------
/schedulers/flux/flux_scheduler.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 |
9 | #include "schedulers/flux/flux_scheduler.h"
10 |
11 | #include "absl/strings/str_format.h"
12 | #include "lib/flux.h"
13 |
14 | namespace ghost {
15 |
16 | // We only have one scheduler of each type, so id-to-type is pretty basic.
17 | int IdToType(int id)
18 | {
19 | switch (id) {
20 | case FLUX_SCHED_NONE:
21 | return FLUX_SCHED_TYPE_NONE;
22 | case FLUX_SCHED_ROCI:
23 | return FLUX_SCHED_TYPE_ROCI;
24 | case FLUX_SCHED_BIFF:
25 | return FLUX_SCHED_TYPE_BIFF;
26 | case FLUX_SCHED_IDLE:
27 | return FLUX_SCHED_TYPE_IDLE;
28 | default:
29 | return FLUX_SCHED_TYPE_NONE;
30 | }
31 | }
32 |
33 | FluxScheduler::FluxScheduler(Enclave* enclave, CpuList cpulist,
34 | const AgentConfig& config)
35 | : Scheduler(enclave, std::move(cpulist)),
36 | unused_channel_(1, /*node=*/0) {
37 |
38 | bpf_obj_ = flux_bpf__open();
39 | CHECK_NE(bpf_obj_, nullptr);
40 |
41 | FluxCheckMaps(bpf_obj_);
42 | FluxSetProgTypes(bpf_obj_);
43 | FluxSetGlobals(bpf_obj_);
44 |
45 | CHECK_EQ(flux_bpf__load(bpf_obj_), 0);
46 |
47 | FluxRegisterProgs(bpf_obj_);
48 |
49 | cpu_data_ = static_cast(bpf_map__mmap(bpf_obj_->maps.cpu_data));
50 | CHECK_NE(cpu_data_, MAP_FAILED);
51 | for (int i = 0; i < FLUX_MAX_CPUS; ++i) {
52 | cpu_data_[i].f.id = i;
53 | }
54 | struct flux_sched s;
55 | for (int i = 0; i < FLUX_NR_SCHEDS; i++) {
56 | memset(&s, 0, sizeof(struct flux_sched));
57 | s.f.id = i;
58 | s.f.type = IdToType(i);
59 | // All idle *sched types* should have their nr_cpus_wanted set
60 | if (s.f.type == FLUX_SCHED_TYPE_IDLE) {
61 | s.f.nr_cpus_wanted = MachineTopology()->num_cpus();
62 | }
63 | if (s.f.id == FLUX_SCHED_ROCI) {
64 | s.roci.primary_id = FLUX_SCHED_BIFF;
65 | s.roci.idle_id = FLUX_SCHED_IDLE;
66 | }
67 | CHECK_EQ(bpf_map_update_elem(bpf_map__fd(bpf_obj_->maps.schedulers),
68 | &i, &s, BPF_ANY), 0);
69 | }
70 | thread_data_ = static_cast(
71 | bpf_map__mmap(bpf_obj_->maps.thread_data));
72 | CHECK_NE(thread_data_, MAP_FAILED);
73 | }
74 |
75 | FluxScheduler::~FluxScheduler() {
76 | bpf_map__munmap(bpf_obj_->maps.cpu_data, cpu_data_);
77 | bpf_map__munmap(bpf_obj_->maps.thread_data, thread_data_);
78 | flux_bpf__destroy(bpf_obj_);
79 | }
80 |
81 | void FluxScheduler::EnclaveReady() {
82 | enclave()->SetDeliverTicks(true);
83 | enclave()->SetDeliverCpuAvailability(true);
84 | // We learn about cpu availability via a message. Some cpus may currently be
85 | // available and idle, but will not generate a message until CFS runs on them.
86 | // Poke each cpu to speed up the process.
87 | //
88 | // Running a CFS task on a cpu will eventually result in an
89 | // unavailable->available edge when that cpu runs out of CFS tasks, and that
90 | // edge will generate a MSG_CPU_AVAILABLE.
91 | std::thread thread([this] {
92 | for (const Cpu& cpu : *enclave()->cpus()) {
93 | // Ignore errors. It's possible the agent is in a cgroup that doesn't
94 | // include all of the enclave cpus. The cpus we skip will eventually run
95 | // a CFS task, just not right away.
96 | (void) GhostHelper()->SchedSetAffinity(Gtid::Current(),
97 | MachineTopology()->ToCpuList({cpu}));
98 | }
99 | });
100 | thread.join();
101 |
102 | WRITE_ONCE(bpf_obj_->bss->user_initialized, true);
103 | }
104 |
105 | void FluxScheduler::DiscoverTasks() {
106 | enclave()->DiscoverTasks();
107 | }
108 |
109 | void FluxAgentTask::AgentThread() {
110 | gtid().assign_name("Agent:" + std::to_string(cpu().id()));
111 |
112 | SignalReady();
113 | WaitForEnclaveReady();
114 |
115 | while (!Finished()) {
116 | RunRequest* req = enclave()->GetRunRequest(cpu());
117 | req->LocalYield(status_word().barrier(), /*flags=*/0);
118 | }
119 | }
120 |
121 | } // namespace ghost
122 |
--------------------------------------------------------------------------------
/schedulers/flux/flux_scheduler.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_SCHEDULERS_FLUX_FLUX_SCHEDULER_H_
8 | #define GHOST_SCHEDULERS_FLUX_FLUX_SCHEDULER_H_
9 |
10 | #include
11 | #include
12 |
13 | #include "third_party/bpf/flux_bpf.h"
14 | #include "lib/agent.h"
15 | #include "lib/scheduler.h"
16 | #include "schedulers/flux/flux_bpf.skel.h"
17 |
18 | namespace ghost {
19 |
20 | class FluxScheduler : public Scheduler {
21 | public:
22 | explicit FluxScheduler(Enclave* enclave, CpuList cpulist,
23 | const AgentConfig& config);
24 | ~FluxScheduler() final;
25 |
26 | void EnclaveReady() final;
27 | void DiscoverTasks() final;
28 | Channel& GetDefaultChannel() final { return unused_channel_; };
29 |
30 | private:
31 | LocalChannel unused_channel_;
32 | flux_bpf* bpf_obj_;
33 | flux_cpu* cpu_data_;
34 | flux_thread* thread_data_;
35 | };
36 |
37 | class FluxAgentTask : public LocalAgent {
38 | public:
39 | FluxAgentTask(Enclave* enclave, Cpu cpu, FluxScheduler* flux_sched)
40 | : LocalAgent(enclave, cpu), flux_sched_(flux_sched) {}
41 |
42 | void AgentThread() override;
43 | Scheduler* AgentScheduler() const override { return flux_sched_; }
44 |
45 | private:
46 | FluxScheduler* flux_sched_;
47 | };
48 |
49 | template
50 | class FullFluxAgent : public FullAgent {
51 | public:
52 | explicit FullFluxAgent(AgentConfig config)
53 | : FullAgent(config) {
54 | flux_sched_ = std::make_unique(
55 | &this->enclave_, *this->enclave_.cpus(), config);
56 | this->StartAgentTasks();
57 | this->enclave_.Ready();
58 | }
59 |
60 | ~FullFluxAgent() override {
61 | // Turn off the availability messages before fully tearing down. Once the
62 | // BPF program is removed, we won't filter the messages anymore, and we'll
63 | // get a few MSG_CPU_AVAILABLE/BUSY sent to userspace. That will overflow
64 | // our channel. On older kernels, that'd trigger a WARN_ON_ONCE.
65 | this->enclave_.SetDeliverCpuAvailability(false);
66 | this->TerminateAgentTasks();
67 | }
68 |
69 | std::unique_ptr MakeAgent(const Cpu& cpu) override {
70 | return std::make_unique(&this->enclave_, cpu,
71 | flux_sched_.get());
72 | }
73 |
74 | void RpcHandler(int64_t req, const AgentRpcArgs& args,
75 | AgentRpcResponse& response) override {
76 | switch (req) {
77 | default:
78 | response.response_code = -1;
79 | return;
80 | }
81 | }
82 |
83 | private:
84 | std::unique_ptr flux_sched_;
85 | };
86 |
87 | } // namespace ghost
88 |
89 | #endif // GHOST_SCHEDULERS_FLUX_FLUX_SCHEDULER_H_
90 |
--------------------------------------------------------------------------------
/schedulers/sol/agent_sol.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | #include "absl/debugging/symbolize.h"
14 | #include "absl/flags/parse.h"
15 | #include "lib/agent.h"
16 | #include "lib/channel.h"
17 | #include "lib/enclave.h"
18 | #include "lib/topology.h"
19 | #include "schedulers/sol/sol_scheduler.h"
20 |
21 | ABSL_FLAG(std::string, ghost_cpus, "1-5", "cpulist");
22 | ABSL_FLAG(int32_t, globalcpu, -1,
23 | "Global cpu. If -1, then defaults to the first cpu in ");
24 | ABSL_FLAG(absl::Duration, preemption_time_slice, absl::InfiniteDuration(),
25 | "A task is preempted after running for this time slice (default = "
26 | "infinite time slice)");
27 |
28 | namespace ghost {
29 |
30 | void ParseSolConfig(SolConfig* config) {
31 | int globalcpu = absl::GetFlag(FLAGS_globalcpu);
32 | CpuList ghost_cpus =
33 | MachineTopology()->ParseCpuStr(absl::GetFlag(FLAGS_ghost_cpus));
34 |
35 | CHECK_GT(ghost_cpus.Size(), 1);
36 |
37 | if (globalcpu < 0) {
38 | CHECK_EQ(globalcpu, -1);
39 | globalcpu = ghost_cpus.Front().id();
40 | absl::SetFlag(&FLAGS_globalcpu, globalcpu);
41 | }
42 |
43 | Topology* topology = MachineTopology();
44 | config->topology_ = topology;
45 | config->cpus_ = ghost_cpus;
46 | config->global_cpu_ = topology->cpu(globalcpu);
47 | config->numa_node_ = ghost_cpus.Front().numa_node();
48 | config->preemption_time_slice_ = absl::GetFlag(FLAGS_preemption_time_slice);
49 | }
50 |
51 | } // namespace ghost
52 |
53 | int main(int argc, char* argv[]) {
54 | absl::InitializeSymbolizer(argv[0]);
55 |
56 | absl::ParseCommandLine(argc, argv);
57 |
58 | ghost::SolConfig config;
59 | ghost::ParseSolConfig(&config);
60 |
61 | printf("Core map\n");
62 |
63 | int n = 0;
64 | for (const ghost::Cpu& c : config.topology_->all_cores()) {
65 | printf("( ");
66 | for (const ghost::Cpu& s : c.siblings()) printf("%2d ", s.id());
67 | printf(")%c", ++n % 8 == 0 ? '\n' : '\t');
68 | }
69 | printf("\n");
70 |
71 | printf("Initializing...\n");
72 |
73 | // Using new so we can destruct the object before printing Done
74 | auto uap = new ghost::AgentProcess,
75 | ghost::SolConfig>(config);
76 |
77 | ghost::GhostHelper()->InitCore();
78 |
79 | printf("Initialization complete, ghOSt active.\n");
80 |
81 | // When `stdout` is directed to a terminal, it is newline-buffered. When
82 | // `stdout` is directed to a non-interactive device (e.g, a Python subprocess
83 | // pipe), it is fully buffered. Thus, in order for the Python script to read
84 | // the initialization message as soon as it is passed to `printf`, we need to
85 | // manually flush `stdout`.
86 | fflush(stdout);
87 |
88 | ghost::Notification exit;
89 | ghost::GhostSignals::AddHandler(SIGINT, [&exit](int) {
90 | static bool first = true; // We only modify the first SIGINT.
91 |
92 | if (first) {
93 | exit.Notify();
94 | first = false;
95 | return false; // We'll exit on subsequent SIGTERMs.
96 | }
97 | return true;
98 | });
99 |
100 | // TODO: this is racy - uap could be deleted already
101 | ghost::GhostSignals::AddHandler(SIGUSR1, [uap](int) {
102 | uap->Rpc(ghost::SolScheduler::kDebugRunqueue);
103 | uap->Rpc(ghost::SolScheduler::kDumpStats);
104 | return false;
105 | });
106 |
107 | exit.WaitForNotification();
108 |
109 | printf("%ld nsecs\n", uap->Rpc(ghost::SolScheduler::kGetSchedOverhead));
110 |
111 | delete uap;
112 |
113 | printf("Done!\n");
114 | return 0;
115 | }
116 |
--------------------------------------------------------------------------------
/shared/shmem.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | // Provides an abstraction for constructing shared memory mappings between two
8 | // (or more) processes. Mappings are huge-page backed, with synchronization for
9 | // versioning, and client initialization.
10 | //
11 | // Currently, a process can host an arbitrary number of shmem regions, but they
12 | // must each have a unique name. There is no limit on how many clients may
13 | // connect to a processes region.
14 | //
15 | // Connecting clients must have the ability to examine open file descriptors of
16 | // the remote process. Generally speaking, for the ghost use-case, this is not
17 | // a particular impingement as we expect processes to host shared memory with
18 | // their scheduling requirements and privileged agents to be the connecting
19 | // clients.
20 | #ifndef GHOST_SHARED_SHMEM_H
21 | #define GHOST_SHARED_SHMEM_H
22 |
23 | #include
24 | #include
25 | #include
26 |
27 | #include
28 |
29 | #include "lib/base.h"
30 |
31 | namespace ghost {
32 |
33 | class GhostShmem {
34 | public:
35 | GhostShmem() {}
36 | // Constructs a new named shared memory region hosted by the current process.
37 | // It is guaranteed that the useful size will be at least "size".
38 | // REQUIRES: "name" must uniquely identify this region.
39 | GhostShmem(int64_t client_version, const char* name, size_t size);
40 | ~GhostShmem();
41 |
42 | // Connects to the region identified by "name", hosted by the process "pid".
43 | // REQUIRES: "pid" hosting "name" must exist.
44 | bool Attach(int64_t client_version, const char* name, pid_t pid);
45 |
46 | // Called by clients when they are aready for remote connections to proceed.
47 | // REQUIRES: Must be called.
48 | void MarkReady();
49 |
50 | // A raw byte mapping into the hosted shared memory region.
51 | inline char* bytes() { return static_cast(data_); }
52 |
53 | // This is the client usable bytes addressable via bytes(). It will be at
54 | // least as large as requested at time of construction.
55 | size_t size();
56 |
57 | // This includes internal overheads and roundings on the mapping.
58 | size_t absolute_size() const { return map_size_; }
59 | inline const void* absolute_start() const { return shmem_; }
60 |
61 | // The process that owns the shmem region.
62 | pid_t Owner() const;
63 |
64 | // Internal overheads that clients may optimized passed mapping sizes against.
65 | // This is useful as it represents the padding that should be considered if
66 | // trying to optimally pack against the huge-page backing.
67 | static size_t OverHeadbytes() { return kHeaderReservedBytes; }
68 |
69 | GhostShmem(const GhostShmem&) = delete;
70 | GhostShmem(GhostShmem&&) = delete;
71 |
72 | static GhostShmem* GetShmemBlob(size_t size);
73 |
74 | private:
75 | struct InternalHeader;
76 |
77 | void WaitForReady();
78 |
79 | static int memfd_create(const char* name, unsigned int flags) {
80 | return syscall(__NR_memfd_create, name, flags);
81 | }
82 | void CreateShmem(int64_t client_version, const char* suffix, size_t size);
83 | bool ConnectShmem(int64_t client_version, const char* suffix, pid_t pid);
84 |
85 | // These members describe the shared memory area.
86 | void* shmem_ = nullptr;
87 | size_t map_size_;
88 | int memfd_ = -1;
89 | // These members map into the shared memory area.
90 | InternalHeader* hdr_ = nullptr;
91 | void* data_;
92 |
93 | static int OpenGhostShmemFd(const char* suffix, pid_t pid);
94 | static constexpr int kHeaderReservedBytes = 4096; // PAGE_SIZE
95 | };
96 |
97 | } // namespace ghost
98 |
99 | #endif // GHOST_SHARED_SHMEM_H
100 |
--------------------------------------------------------------------------------
/tests/biff_test.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "gmock/gmock.h"
8 | #include "gtest/gtest.h"
9 | #include "schedulers/biff/biff_scheduler.h"
10 |
11 | namespace ghost {
12 | namespace {
13 |
14 | class BiffTest : public testing::Test {
15 | protected:
16 | static void SetUpTestSuite() {
17 | Topology* t = MachineTopology();
18 | AgentConfig cfg(t, t->all_cpus());
19 |
20 | uap_ = new AgentProcess, AgentConfig>(cfg);
21 | }
22 |
23 | static void TearDownTestSuite() {
24 | delete uap_;
25 | uap_ = nullptr;
26 | }
27 |
28 | static AgentProcess, AgentConfig>* uap_;
29 | };
30 |
31 | AgentProcess, AgentConfig>* BiffTest::uap_;
32 |
33 | TEST_F(BiffTest, Simple) {
34 | RemoteThreadTester(/*num_threads=*/1).Run(
35 | [] {
36 | absl::SleepFor(absl::Milliseconds(10));
37 | sched_yield();
38 | absl::SleepFor(absl::Milliseconds(10));
39 | }
40 | );
41 | }
42 |
43 | TEST_F(BiffTest, SimpleMany) {
44 | RemoteThreadTester().Run(
45 | [] {
46 | absl::SleepFor(absl::Milliseconds(10));
47 | sched_yield();
48 | absl::SleepFor(absl::Milliseconds(10));
49 | }
50 | );
51 | }
52 |
53 | TEST_F(BiffTest, BusyRunFor) {
54 | RemoteThreadTester(/*num_threads=*/100).Run(
55 | [] {
56 | SpinFor(absl::Milliseconds(10));
57 | }
58 | );
59 | }
60 |
61 | } // namespace
62 | } // namespace ghost
63 |
64 | int main(int argc, char **argv) {
65 | testing::InitGoogleMock(&argc, argv);
66 |
67 | return RUN_ALL_TESTS();
68 | }
69 |
--------------------------------------------------------------------------------
/tests/capabilities_test.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #ifndef GHOST_TESTS_CAPABILITIES_TEST_H_
8 | #define GHOST_TESTS_CAPABILITIES_TEST_H_
9 |
10 | #include "gmock/gmock.h"
11 | #include "gtest/gtest.h"
12 | #include
13 |
14 | using ::testing::Eq;
15 | using ::testing::IsFalse;
16 | using ::testing::IsTrue;
17 | using ::testing::NotNull;
18 |
19 | // Privileged ghOSt syscalls may only be used by threads with the `CAP_SYS_NICE`
20 | // capability.
21 | constexpr cap_value_t kGhostCapability = CAP_SYS_NICE;
22 |
23 | // Sets `is_set` to true if the calling thread has the `CAP_SYS_NICE` capability
24 | // in its effective set. Sets `is_set` to false otherwise.
25 | //
26 | // Note that we pass `is_set` by reference rather than return a boolean so that
27 | // we can use `ASSERT_THAT` and `EXPECT_THAT` macros in this function. These
28 | // macros only work in functions with void return types.
29 | void NiceCapabilitySet(bool& is_set) {
30 | cap_t current = cap_get_proc();
31 | ASSERT_THAT(current, NotNull());
32 | cap_flag_value_t flag_value;
33 | ASSERT_THAT(
34 | cap_get_flag(current, kGhostCapability, CAP_EFFECTIVE, &flag_value),
35 | Eq(0));
36 | EXPECT_THAT(cap_free(current), Eq(0));
37 | is_set = (flag_value == CAP_SET);
38 | }
39 |
40 | // Asserts that the `CAP_SYS_NICE` capability is set.
41 | void AssertNiceCapabilitySet() {
42 | bool is_set = false;
43 | NiceCapabilitySet(is_set);
44 | ASSERT_THAT(is_set, IsTrue());
45 | }
46 |
47 | // Asserts that the `CAP_SYS_NICE` capability is not set.
48 | void AssertNiceCapabilityNotSet() {
49 | bool is_set = true;
50 | NiceCapabilitySet(is_set);
51 | ASSERT_THAT(is_set, IsFalse());
52 | }
53 |
54 | // Drops the `CAP_SYS_NICE` capability from the calling thread's effective set.
55 | // Note that the calling thread must already hold the `CAP_SYS_NICE` capability
56 | // when it calls this function.
57 | void DropNiceCapability() {
58 | AssertNiceCapabilitySet();
59 |
60 | cap_t current = cap_get_proc();
61 | ASSERT_THAT(current, NotNull());
62 | const cap_value_t cap_array[] = {kGhostCapability};
63 | ASSERT_THAT(
64 | cap_set_flag(current, CAP_EFFECTIVE, /*ncaps=*/1, cap_array, CAP_CLEAR),
65 | Eq(0));
66 | ASSERT_THAT(cap_set_proc(current), Eq(0));
67 | EXPECT_THAT(cap_free(current), Eq(0));
68 |
69 | AssertNiceCapabilityNotSet();
70 | }
71 |
72 | #endif // GHOST_TESTS_CAPABILITIES_TEST_H_
73 |
--------------------------------------------------------------------------------
/tests/cfs_bpf_test.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include "gmock/gmock.h"
8 | #include "gtest/gtest.h"
9 | #include "schedulers/cfs_bpf/cfs_scheduler.h"
10 |
11 | namespace ghost {
12 | namespace {
13 |
14 | class CfsTest : public testing::Test {
15 | protected:
16 | static void SetUpTestSuite() {
17 | Topology* t = MachineTopology();
18 | AgentConfig cfg(t, t->all_cpus());
19 |
20 | uap_ = new AgentProcess, AgentConfig>(cfg);
21 | }
22 |
23 | static void TearDownTestSuite() {
24 | delete uap_;
25 | uap_ = nullptr;
26 | }
27 |
28 | static AgentProcess, AgentConfig>* uap_;
29 | };
30 |
31 | AgentProcess, AgentConfig>* CfsTest::uap_;
32 |
33 | TEST_F(CfsTest, Simple) {
34 | GhostThread t(GhostThread::KernelScheduler::kGhost, [] {
35 | absl::SleepFor(absl::Milliseconds(10));
36 | sched_yield();
37 | });
38 |
39 | t.Join();
40 | }
41 |
42 | TEST_F(CfsTest, SimpleMany) {
43 | constexpr int kNumThreads = 1000;
44 | std::vector> threads;
45 | threads.reserve(kNumThreads);
46 |
47 | for (int i = 0; i < kNumThreads; ++i) {
48 | threads.push_back(
49 | std::make_unique
50 | (GhostThread::KernelScheduler::kGhost, [] {
51 | absl::SleepFor(absl::Milliseconds(10));
52 | sched_yield();
53 | absl::SleepFor(absl::Milliseconds(10));
54 | }));
55 | }
56 |
57 | for (std::unique_ptr& t : threads) {
58 | t->Join();
59 | }
60 |
61 | }
62 |
63 | TEST_F(CfsTest, BusyRunFor) {
64 |
65 | constexpr int kNumThreads = 1000;
66 | const absl::Duration d = absl::Milliseconds(10);
67 |
68 | std::vector> threads;
69 | threads.reserve(kNumThreads);
70 |
71 | for (int i = 0; i < kNumThreads; ++i) {
72 | threads.push_back(
73 | std::make_unique
74 | (GhostThread::KernelScheduler::kGhost, [&] {
75 | SpinFor(d);
76 | }));
77 | }
78 |
79 | for (std::unique_ptr& t : threads) {
80 | t->Join();
81 | }
82 |
83 |
84 | }
85 |
86 | } // namespace
87 | } // namespace ghost
88 |
89 | int main(int argc, char **argv) {
90 | testing::InitGoogleMock(&argc, argv);
91 |
92 | return RUN_ALL_TESTS();
93 | }
94 |
--------------------------------------------------------------------------------
/tests/flux_test.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 |
9 | #include "gmock/gmock.h"
10 | #include "gtest/gtest.h"
11 | #include "schedulers/flux/flux_scheduler.h"
12 |
13 | namespace ghost {
14 | namespace {
15 |
16 | class FluxTest : public testing::Test {
17 | protected:
18 | static void SetUpTestSuite() {
19 | Topology* t = MachineTopology();
20 | AgentConfig cfg(t, t->all_cpus());
21 |
22 | uap_ = new AgentProcess, AgentConfig>(cfg);
23 | }
24 |
25 | static void TearDownTestSuite() {
26 | delete uap_;
27 | uap_ = nullptr;
28 | }
29 |
30 | static AgentProcess, AgentConfig>* uap_;
31 | };
32 |
33 | AgentProcess, AgentConfig>* FluxTest::uap_;
34 |
35 | TEST_F(FluxTest, Simple) {
36 | RemoteThreadTester(/*num_threads=*/1).Run(
37 | [] {
38 | absl::SleepFor(absl::Milliseconds(10));
39 | sched_yield();
40 | absl::SleepFor(absl::Milliseconds(10));
41 | }
42 | );
43 | }
44 |
45 | TEST_F(FluxTest, SimpleMany) {
46 | RemoteThreadTester().Run(
47 | [] {
48 | absl::SleepFor(absl::Milliseconds(10));
49 | sched_yield();
50 | absl::SleepFor(absl::Milliseconds(10));
51 | }
52 | );
53 | }
54 |
55 | TEST_F(FluxTest, BusyRunFor) {
56 | RemoteThreadTester(/*num_threads=*/100).Run(
57 | [] {
58 | SpinFor(absl::Milliseconds(10));
59 | }
60 | );
61 | }
62 |
63 | TEST_F(FluxTest, PrioChangeSelf) {
64 | RemoteThreadTester().Run(
65 | [] {
66 | EXPECT_EQ(setpriority(PRIO_PROCESS, 0, 5), 0);
67 | absl::SleepFor(absl::Milliseconds(10));
68 | EXPECT_EQ(setpriority(PRIO_PROCESS, 0, 10), 0);
69 | sched_yield();
70 | EXPECT_EQ(setpriority(PRIO_PROCESS, 0, 5), 0);
71 | }
72 | );
73 | }
74 |
75 | TEST_F(FluxTest, PrioChangeRemote) {
76 | RemoteThreadTester().Run(
77 | [] { // ghost threads
78 | SpinFor(absl::Milliseconds(5));
79 | sched_yield();
80 | absl::SleepFor(absl::Milliseconds(5));
81 | },
82 | [](GhostThread* t) { // remote, per-thread work
83 | EXPECT_EQ(setpriority(PRIO_PROCESS, t->tid(), 5), 0);
84 | EXPECT_EQ(setpriority(PRIO_PROCESS, t->tid(), 10), 0);
85 | }
86 | );
87 | }
88 |
89 | TEST_F(FluxTest, DepartedSelf) {
90 | RemoteThreadTester().Run(
91 | [] { // ghost threads
92 | absl::SleepFor(absl::Milliseconds(10));
93 | const sched_param param{};
94 | EXPECT_EQ(sched_setscheduler(/*pid=*/0, SCHED_OTHER, ¶m), 0);
95 | EXPECT_EQ(sched_getscheduler(/*pid=*/0), SCHED_OTHER);
96 | },
97 | [](GhostThread* t) { // remote, per-thread work
98 | }
99 | );
100 | }
101 |
102 | TEST_F(FluxTest, DepartedRemote) {
103 | RemoteThreadTester().Run(
104 | [] { // ghost threads
105 | SpinFor(absl::Milliseconds(5));
106 | sched_yield();
107 | absl::SleepFor(absl::Milliseconds(5));
108 | },
109 | [](GhostThread* t) { // remote, per-thread work
110 | const sched_param param{};
111 | EXPECT_EQ(sched_setscheduler(t->tid(), SCHED_OTHER, ¶m), 0);
112 | }
113 | );
114 | }
115 |
116 | // Originally, I thought this was trigging a bug. Turns out it just takes a
117 | // long time with 1000 threads (~30 seconds on CONFIG=dbg in virtme).
118 | TEST_F(FluxTest, DepartedRemoteShortSleep) {
119 | RemoteThreadTester(/*num_threads=*/100).Run(
120 | [] { // ghost threads
121 | absl::SleepFor(absl::Nanoseconds(1));
122 | },
123 | [](GhostThread* t) { // remote, per-thread work
124 | const sched_param param{};
125 | EXPECT_EQ(sched_setscheduler(t->tid(), SCHED_OTHER, ¶m), 0);
126 | }
127 | );
128 | }
129 |
130 | } // namespace
131 | } // namespace ghost
132 |
133 | int main(int argc, char **argv) {
134 | testing::InitGoogleMock(&argc, argv);
135 |
136 | return RUN_ALL_TESTS();
137 | }
138 |
--------------------------------------------------------------------------------
/third_party/BUILD.bazel:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | # This BUILD file is necessary so that `//third_party` is a package that the
4 | # WORKSPACE file can reference. Without this BUILD file, Bazel will be unable to
5 | # pull in the project dependencies and compile the project.
6 |
7 | exports_files([
8 | "iovisor_bcc/bits.bpf.h",
9 | "iovisor_bcc/trace_helpers.h",
10 | "util/util.h",
11 | ])
12 |
--------------------------------------------------------------------------------
/third_party/bpf/BUILD:
--------------------------------------------------------------------------------
1 | # Note: If you modify this BUILD file, please contact jhumphri@ first to ensure
2 | # that you are not breaking the Copybara script.
3 |
4 | load("//:bpf/bpf.bzl", "bpf_program")
5 |
6 | package(
7 | default_applicable_licenses = ["//:license"],
8 | default_visibility = [
9 | "//:__subpackages__",
10 | ],
11 | )
12 |
13 | # We use the GPLv2 license for the eBPF code so that we can access kernel
14 | # functionality restricted to eBPF programs that are licensed under GPLv2. That
15 | # being said, keep in mind that all of this eBPF code is authored and owned by
16 | # Google.
17 | licenses(["restricted"])
18 |
19 | exports_files(
20 | [
21 | "biff_bpf.h",
22 | "cfs_bpf.h",
23 | "common.bpf.h",
24 | "edf.h",
25 | "flux_bpf.h",
26 | "pntring.bpf.h",
27 | "pntring_funcs.bpf.h",
28 | "schedfair.h",
29 | "schedlat.h",
30 | "schedrun.h",
31 | "topology.bpf.h",
32 | "schedghostidle.bpf.c",
33 | ],
34 | )
35 |
36 | filegroup(
37 | name = "flux_infra",
38 | srcs = [
39 | "flux_api.bpf.c",
40 | "flux_dispatch.bpf.c",
41 | "flux_header_bpf.h",
42 | ],
43 | )
44 |
45 | filegroup(
46 | name = "flux_scheds",
47 | srcs = [
48 | "biff_flux.bpf.c",
49 | "biff_flux_bpf.h",
50 | "idle_flux.bpf.c",
51 | "idle_flux_bpf.h",
52 | "prov_flux.bpf.c",
53 | "prov_flux_bpf.h",
54 | "roci_flux.bpf.c",
55 | "roci_flux_bpf.h",
56 | ],
57 | )
58 |
59 | bpf_program(
60 | name = "biff_bpf",
61 | src = "biff.bpf.c",
62 | hdrs = [
63 | "biff_bpf.h",
64 | "common.bpf.h",
65 | "topology.bpf.h",
66 | "//:abi/latest/kernel/ghost.h",
67 | "//:lib/ghost_uapi.h",
68 | ],
69 | bpf_object = "biff_bpf.o",
70 | )
71 |
72 | bpf_program(
73 | name = "cfs_bpf",
74 | src = "cfs.bpf.c",
75 | hdrs = [
76 | "cfs_bpf.h",
77 | "common.bpf.h",
78 | "//:abi/latest/kernel/ghost.h",
79 | "//:arr_structs",
80 | "//:lib/ghost_uapi.h",
81 | ],
82 | bpf_object = "cfs_bpf.o",
83 | )
84 |
85 | bpf_program(
86 | name = "edf_bpf",
87 | src = "edf.bpf.c",
88 | hdrs = [
89 | "common.bpf.h",
90 | "edf.h",
91 | "//:abi/latest/kernel/ghost.h",
92 | "//:lib/ghost_uapi.h",
93 | ],
94 | bpf_object = "edf_bpf.o",
95 | )
96 |
97 | bpf_program(
98 | name = "flux_bpf",
99 | src = "flux.bpf.c",
100 | hdrs = [
101 | "common.bpf.h",
102 | "flux_bpf.h",
103 | ":flux_infra",
104 | ":flux_scheds",
105 | "//:abi/latest/kernel/ghost.h",
106 | "//:arr_structs",
107 | "//:lib/ghost_uapi.h",
108 | ],
109 | bpf_object = "flux_bpf.o",
110 | )
111 |
112 | bpf_program(
113 | name = "schedclasstop_bpf",
114 | src = "schedclasstop.bpf.c",
115 | hdrs = [
116 | "common.bpf.h",
117 | ],
118 | bpf_object = "schedclasstop_bpf.o",
119 | )
120 |
121 | bpf_program(
122 | name = "schedfair_bpf",
123 | src = "schedfair.bpf.c",
124 | hdrs = [
125 | "common.bpf.h",
126 | "schedfair.h",
127 | "//third_party:iovisor_bcc/bits.bpf.h",
128 | ],
129 | bpf_object = "schedfair_bpf.o",
130 | )
131 |
132 | bpf_program(
133 | name = "schedlat_bpf",
134 | src = "schedlat.bpf.c",
135 | hdrs = [
136 | "common.bpf.h",
137 | "schedlat.h",
138 | "//third_party:iovisor_bcc/bits.bpf.h",
139 | ],
140 | bpf_object = "schedlat_bpf.o",
141 | )
142 |
143 | bpf_program(
144 | name = "schedrun_bpf",
145 | src = "schedrun.bpf.c",
146 | hdrs = [
147 | "common.bpf.h",
148 | "schedrun.h",
149 | "//third_party:iovisor_bcc/bits.bpf.h",
150 | ],
151 | bpf_object = "schedrun_bpf.o",
152 | )
153 |
154 | bpf_program(
155 | name = "test_bpf",
156 | src = "test.bpf.c",
157 | hdrs = [
158 | "common.bpf.h",
159 | "//:abi/latest/kernel/ghost.h",
160 | "//:lib/ghost_uapi.h",
161 | ],
162 | bpf_object = "test_bpf.o",
163 | )
164 |
--------------------------------------------------------------------------------
/third_party/bpf/biff_bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2022 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #ifndef GHOST_LIB_BPF_BPF_BIFF_BPF_H_
15 | #define GHOST_LIB_BPF_BPF_BIFF_BPF_H_
16 |
17 | #ifndef __BPF__
18 | #include
19 | #endif
20 |
21 | #define BIFF_MAX_CPUS 1024
22 | #define BIFF_MAX_GTIDS 65536
23 |
24 | /*
25 | * The array map of these, called `cpu_data`, can be mmapped by userspace.
26 | */
27 | struct biff_bpf_cpu_data {
28 | uint64_t current;
29 | uint64_t cpu_seqnum;
30 | bool available;
31 | } __attribute__((aligned(64)));
32 |
33 | /*
34 | * bpf can quickly access hash maps, but userspace can't. Ghost already gives
35 | * us the "status_word region" (SWR), which is an mmappable file in ghostfs that
36 | * exports read-only data from the kernel. Every task has a status word ID
37 | * (identifier for SWR) the and index within the SWR.
38 | *
39 | * The sw_data is logically an extension of the status word. It is read-write
40 | * by userspace and bpf.
41 | *
42 | * For each SW region (and there is 1, with BIFF_MAX_GTIDS slots), there is a
43 | * corresponding bpf array map, called `sw_data`, with the same number of
44 | * "words", such that given a task's sw index, we can find its sw_data. In bpf,
45 | * the index is stored in struct task_sw_info and is maintained by bpf-msg.
46 | *
47 | * Since userspace doesn't receive messages, it will have to scan the SWR to
48 | * discover tasks and their SW {id, index} pairs. (You can start scanning from
49 | * the last-new spot, since the kernel allocates linearly, with wrapping.)
50 | *
51 | * This may seem like an extra level of indirection and pointer chasing, but bpf
52 | * autogenerates the array map access code, so even if we don't use sw_data from
53 | * userspace yet, it's not hard to have it ready.
54 | *
55 | * aligned(8) since this is a bpf map value.
56 | */
57 | struct biff_bpf_sw_data {
58 | uint64_t ran_at;
59 | uint64_t ran_until;
60 | uint64_t runnable_at;
61 | uint64_t parent;
62 | } __attribute__((aligned(8)));
63 |
64 |
65 | #endif // GHOST_LIB_BPF_BPF_BIFF_BPF_H_
66 |
--------------------------------------------------------------------------------
/third_party/bpf/biff_flux_bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #ifndef GHOST_LIB_BPF_BPF_BIFF_FLUX_BPF_H_
15 | #define GHOST_LIB_BPF_BPF_BIFF_FLUX_BPF_H_
16 |
17 | #ifndef __BPF__
18 | #include
19 | #endif
20 |
21 | #include "lib/queue.bpf.h"
22 |
23 | struct biff_flux_sched {
24 | struct arr_list rq;
25 | };
26 |
27 | struct biff_flux_cpu {
28 | uint64_t current;
29 | };
30 |
31 | struct biff_flux_thread {
32 | uint64_t ran_at;
33 | uint64_t ran_until;
34 | uint64_t runnable_at;
35 | struct arr_list_entry link;
36 | bool enqueued;
37 | bool times_up;
38 | int cpu;
39 | };
40 |
41 | #endif // GHOST_LIB_BPF_BPF_BIFF_FLUX_BPF_H_
42 |
--------------------------------------------------------------------------------
/third_party/bpf/cfs_bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2022 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #ifndef GHOST_BPF_BPF_CFS_BPF_H_
15 | #define GHOST_BPF_BPF_CFS_BPF_H_
16 |
17 | #ifndef __BPF__
18 | #include
19 | #endif
20 |
21 | #include "lib/queue.bpf.h"
22 |
23 | #define CFS_MAX_CPUS 1024
24 | #define CFS_MAX_GTIDS 65536
25 |
26 | /*
27 | * The array map of these, called `cpu_data`, can be mmapped by userspace.
28 | */
29 | struct cfs_bpf_cpu_data {
30 | uint64_t current;
31 | uint64_t cpu_seqnum;
32 | bool available;
33 | } __attribute__((aligned(64)));
34 |
35 | /*
36 | * Per-cpu runqueue for CFS using Linked list.
37 | */
38 | struct cfs_bpf_rq {
39 | uint64_t current;
40 | uint64_t weight;
41 | uint64_t nr_running;
42 | uint64_t min_vruntime;
43 | struct arr_list rq_root;
44 | #ifdef __BPF__
45 | struct bpf_spin_lock lock;
46 | #else
47 | uint32_t lock;
48 | #endif
49 | }__attribute__((aligned(64)));
50 |
51 | /*
52 | * Thread struct to store the values required for cfs tasks. Think of this as
53 | * the same as a task struct for cfs. It brings its own memory for the runqueue
54 | * (LL).
55 | * aligned(8) since this is a bpf map value.
56 | */
57 | struct cfs_bpf_thread {
58 | uint64_t gtid;
59 | uint64_t task_barrier;
60 | uint64_t ran_at;
61 | uint64_t ran_until;
62 | uint64_t runnable_at;
63 | uint64_t weight;
64 | uint64_t real_time;
65 | uint64_t sum_exec_runtime;
66 | uint64_t prev_sum_exec_runtime;
67 | uint64_t vruntime;
68 | uint64_t on_rq;
69 | struct arr_list_entry next_task;
70 | } __attribute__((aligned(8)));
71 |
72 |
73 |
74 |
75 |
76 | #endif // GHOST_BPF_BPF_CFS_BPF_H_
77 |
--------------------------------------------------------------------------------
/third_party/bpf/edf.bpf.c:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #include
13 |
14 | // clang-format off
15 | #include
16 | #include "libbpf/bpf_helpers.h"
17 | #include "libbpf/bpf_tracing.h"
18 | // clang-format on
19 |
20 | #include "lib/ghost_uapi.h"
21 | #include "third_party/bpf/common.bpf.h"
22 | #include "third_party/bpf/edf.h"
23 |
24 | bool skip_tick = false;
25 |
26 | /* max_entries is patched at runtime to num_possible_cpus */
27 | struct {
28 | __uint(type, BPF_MAP_TYPE_ARRAY);
29 | __uint(max_entries, 1024);
30 | __type(key, u32);
31 | __type(value, struct edf_bpf_per_cpu_data);
32 | __uint(map_flags, BPF_F_MMAPABLE);
33 | } cpu_data SEC(".maps");
34 |
35 | SEC("ghost_sched/pnt")
36 | int edf_pnt(struct bpf_ghost_sched *ctx)
37 | {
38 | return 0;
39 | }
40 |
41 | /*
42 | * You have to play games to get the compiler to not modify the context pointer
43 | * (msg). You can load X bytes off a ctx, but if you add to ctx, then load,
44 | * you'll get the dreaded: "dereference of modified ctx ptr" error.
45 | *
46 | * You can also sprinkle asm volatile ("" ::: "memory") to help reduce compiler
47 | * optimizations on the context.
48 | */
49 | static void __attribute__((noinline)) handle_yield(struct bpf_ghost_msg *msg)
50 | {
51 | struct ghost_msg_payload_task_yield *yield = &msg->yield;
52 |
53 | yield->agent_data = 1;
54 | }
55 |
56 | static void __attribute__((noinline)) handle_wakeup(struct bpf_ghost_msg *msg)
57 | {
58 | struct ghost_msg_payload_task_wakeup *wakeup = &msg->wakeup;
59 |
60 | wakeup->agent_data = 1;
61 | }
62 |
63 | SEC("ghost_msg/msg_send")
64 | int edf_msg_send(struct bpf_ghost_msg *msg)
65 | {
66 | switch (msg->type) {
67 | case MSG_TASK_WAKEUP:
68 | handle_wakeup(msg);
69 | break;
70 | case MSG_TASK_YIELD:
71 | handle_yield(msg);
72 | break;
73 | case MSG_CPU_TICK:
74 | if (skip_tick)
75 | return 1;
76 | break;
77 | case MSG_CPU_AGENT_BLOCKED:
78 | case MSG_CPU_AGENT_WAKEUP:
79 | /*
80 | * Suppress these messages. Having this in BPF ensures that
81 | * our vmlinux.h knows about these message types.
82 | */
83 | return 1;
84 | }
85 |
86 | return 0;
87 | }
88 |
89 | char LICENSE[] SEC("license") = "GPL";
90 |
--------------------------------------------------------------------------------
/third_party/bpf/edf.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2021 Google LLC
2 | *
3 | * This program is free software; you can redistribute it and/or
4 | * modify it under the terms of the GNU General Public License
5 | * version 2 as published by the Free Software Foundation.
6 | *
7 | * This program is distributed in the hope that it will be useful,
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | * GNU General Public License for more details.
11 | */
12 |
13 | #ifndef GHOST_LIB_BPF_BPF_EDF_H_
14 | #define GHOST_LIB_BPF_BPF_EDF_H_
15 |
16 | #ifndef __BPF__
17 | #include
18 | #endif
19 |
20 | struct edf_bpf_per_cpu_data {
21 | uint8_t example_bool;
22 | } __attribute__((aligned(64)));
23 |
24 | #endif // GHOST_LIB_BPF_BPF_EDF_H_
25 |
--------------------------------------------------------------------------------
/third_party/bpf/flux.bpf.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 or later as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #include
15 |
16 | // clang-format off
17 | #include
18 | #include "libbpf/bpf_helpers.h"
19 | #include "libbpf/bpf_tracing.h"
20 | // clang-format on
21 |
22 | #include "lib/ghost_uapi.h"
23 | #include "third_party/bpf/common.bpf.h"
24 | #include "third_party/bpf/flux_bpf.h"
25 |
26 | #include
27 |
28 | struct {
29 | __uint(type, BPF_MAP_TYPE_ARRAY);
30 | __uint(max_entries, FLUX_NR_SCHEDS);
31 | __type(key, u32);
32 | __type(value, struct flux_sched);
33 | } schedulers SEC(".maps");
34 |
35 | static inline struct flux_sched *get_sched(int id)
36 | {
37 | return bpf_map_lookup_elem(&schedulers, &id);
38 | }
39 |
40 | static inline int get_parent_id(struct flux_sched *s)
41 | {
42 | if (s->f.id == FLUX_SCHED_ROCI)
43 | return FLUX_SCHED_NONE;
44 | return FLUX_SCHED_ROCI;
45 | }
46 |
47 | /*
48 | * The 'tier' is where a scheduler is in the hierarchy of schedulers. Since
49 | * we're in BPF, this is hardcoded: Roci is at top, with biff and idle below.
50 | *
51 | * Schedulers can preempt their cpus, and you can have preemptions at every tier
52 | * concurrently. e.g.
53 | * - biff can preempt its own cpu to kick a thread off cpu (tier = 2)
54 | * - roci can preempt that cpu to kick biff off (tier = 1)
55 | * - the kernel can preempt the cpu completely (availability change, tier = 0)
56 | *
57 | * When preempt_to is 3 (FLUX_MAX_NR_TIERS, aka FLUX_TIER_NO_PREEMPT), there are
58 | * no preemption requests.
59 | *
60 | * Keep in mind that it's always OK for us to preempt a cpu. If there's some
61 | * corner case where we accidentally preempt a cpu unintentionally, that's fine.
62 | * The schedulers will just reallocate it.
63 | *
64 | * Quick example: roci on cpu A wants to preempt cpu B. It does its
65 | * bookkeeping, plans to preempt, then calls flux_preempt_cpu. At that point,
66 | * the kernel preempts the cpu, then reallocates it, and the cpu is roci's
67 | * again. Then cpu A writes preempt_to and sends the IPI. Next time we run
68 | * PNT, we'll preempt that cpu up to roci, which can then hand it back to
69 | * biff/idle/whoever.
70 | */
71 |
72 | #define FLUX_MAX_NR_TIERS 3
73 |
74 | static inline int sched_id_to_tier(int id)
75 | {
76 | switch (id) {
77 | case FLUX_SCHED_NONE:
78 | return 0;
79 | case FLUX_SCHED_ROCI:
80 | return 1;
81 | case FLUX_SCHED_BIFF:
82 | case FLUX_SCHED_IDLE:
83 | return 2;
84 | };
85 | return 0;
86 | }
87 |
88 | static int new_thread_sched_id(struct ghost_msg_payload_task_new *new)
89 | {
90 | return FLUX_SCHED_BIFF;
91 | }
92 |
93 | static int top_tier_sched_id(void)
94 | {
95 | return FLUX_SCHED_ROCI;
96 | }
97 |
98 | #define __gen_thread_op_cases(op_type, op, sched, ...) \
99 | case FLUX_SCHED_TYPE_BIFF: \
100 | op_type(biff, op)(sched, __VA_ARGS__); \
101 | break; \
102 |
103 | #define __gen_cpu_op_cases(op_type, op, sched, ...) \
104 | case FLUX_SCHED_TYPE_ROCI: \
105 | op_type(roci, op)(sched, __VA_ARGS__); \
106 | break; \
107 | case FLUX_SCHED_TYPE_BIFF: \
108 | op_type(biff, op)(sched, __VA_ARGS__); \
109 | break; \
110 | case FLUX_SCHED_TYPE_IDLE: \
111 | op_type(idle, op)(sched, __VA_ARGS__); \
112 | break; \
113 |
114 |
115 | #include "third_party/bpf/flux_dispatch.bpf.c"
116 |
117 | /********************* SCHED OPS *********************/
118 |
119 | #include "third_party/bpf/roci_flux.bpf.c"
120 | #include "third_party/bpf/biff_flux.bpf.c"
121 | #include "third_party/bpf/idle_flux.bpf.c"
122 |
123 | #include "third_party/bpf/flux_api.bpf.c"
124 |
--------------------------------------------------------------------------------
/third_party/bpf/flux_bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #ifndef GHOST_LIB_BPF_BPF_FLUX_BPF_H_
15 | #define GHOST_LIB_BPF_BPF_FLUX_BPF_H_
16 |
17 | #include "third_party/bpf/biff_flux_bpf.h"
18 | #include "third_party/bpf/flux_header_bpf.h"
19 | #include "third_party/bpf/idle_flux_bpf.h"
20 | #include "third_party/bpf/roci_flux_bpf.h"
21 |
22 | struct flux_sched {
23 | struct __flux_sched f;
24 |
25 | #ifdef __BPF__
26 | /*
27 | * bpf_spin_lock is not available in userspace.
28 | * The sizeof == 32 is UAPI and statically asserted in flux_pnt.
29 | */
30 | struct bpf_spin_lock lock;
31 | #else
32 | uint32_t lock;
33 | #endif
34 | union {
35 | struct roci_flux_sched roci;
36 | struct biff_flux_sched biff;
37 | struct idle_flux_sched idle;
38 | };
39 | } __attribute__((aligned(8)));
40 | /* aligned(8) since this is a bpf map value. */
41 |
42 | enum {
43 | FLUX_SCHED_NONE,
44 | FLUX_SCHED_ROCI,
45 | FLUX_SCHED_BIFF,
46 | FLUX_SCHED_IDLE,
47 | FLUX_NR_SCHEDS,
48 | };
49 |
50 | enum {
51 | FLUX_SCHED_TYPE_NONE,
52 | FLUX_SCHED_TYPE_ROCI,
53 | FLUX_SCHED_TYPE_BIFF,
54 | FLUX_SCHED_TYPE_IDLE,
55 | FLUX_NR_SCHED_TYPES,
56 | };
57 |
58 | struct flux_cpu {
59 | struct __flux_cpu f;
60 |
61 | /*
62 | * A cpu can be used by many schedulers concurrently, i.e. roci and biff
63 | * can both use cpu fields, since roci allocs the cpu to biff.
64 | *
65 | * Additionally, there could be multiple instances of biff. Even if you
66 | * try to ensure no scheduler has a descendent of the same type, you
67 | * still have a problem: schedulers might use the cpu struct even if
68 | * the cpu is not allocated to them.
69 | *
70 | * It seems like a simple rule: "don't use your blob in the cpu struct
71 | * if you no longer have it", however remember that in ghost, certain
72 | * messages happen after a context switch! e.g. by the time we run
73 | * flux_thread_preempted() (which resolves to biff_thread_preempted()),
74 | * the cpu was already taken away from that instance of biff, and
75 | * possibly allocted to another instance of biff, which is also using
76 | * the biff fields!
77 | *
78 | * The fix is to have an array, indexed by sched_id, which is unique for
79 | * multiple instances of a scheduler. i.e. each biff gets their own
80 | * sched_id and thus their own struct. That way, every scheduler can
81 | * touch *their part* of the cpu, even if they no longer have the cpu
82 | * allocated.
83 | *
84 | * However, the rule remains that schedulers cannot touch the __flux_cpu
85 | * unless they own the cpu.
86 | */
87 | union {
88 | struct roci_flux_cpu roci;
89 | struct biff_flux_cpu biff;
90 | struct idle_flux_cpu idle;
91 | } __s[FLUX_NR_SCHEDS];
92 | } __attribute__((aligned(64)));
93 | /* aligned(64) for per-cpu caching */
94 |
95 | struct flux_thread {
96 | struct __flux_thread f;
97 |
98 | /* A thread belongs to a single scheduler at a time. */
99 | union {
100 | struct biff_flux_thread biff;
101 | };
102 | } __attribute__((aligned(8)));
103 | /* aligned(8) since this is a bpf map value. */
104 |
105 | #endif // GHOST_LIB_BPF_BPF_FLUX_BPF_H_
106 |
--------------------------------------------------------------------------------
/third_party/bpf/ghost_shared_bpf.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #ifndef GHOST_LIB_BPF_GHOST_SHARED_BPF_H_
13 | #define GHOST_LIB_BPF_GHOST_SHARED_BPF_H_
14 |
15 | // Keep this file's structs in sync with bpf/ghost_shared.h.
16 | // We need different headers for BPF and C programs due to various Google3
17 | // reasons.
18 |
19 | struct ghost_per_cpu_data {
20 | __u8 want_tick;
21 | } __attribute__((aligned(64)));
22 |
23 | #endif // GHOST_LIB_BPF_GHOST_SHARED_BPF_H_
24 |
--------------------------------------------------------------------------------
/third_party/bpf/idle_flux.bpf.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 or later as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | /* Idle scheduler implementation for Flux. */
15 |
16 | static void idle_request_for_cpus(struct flux_sched *i, int child_id,
17 | int nr_cpus, int *ret)
18 | {
19 | /* Never called, we have no children. */
20 | }
21 |
22 | static void idle_cpu_allocated(struct flux_sched *i, struct flux_cpu *cpu)
23 | {
24 | /* Don't care. */
25 | }
26 |
27 | static void idle_cpu_returned(struct flux_sched *i, int child_id,
28 | struct flux_cpu *cpu)
29 | {
30 | /* Never called, we have no children. */
31 | }
32 |
33 | static void idle_cpu_preempted(struct flux_sched *i, int child_id,
34 | struct flux_cpu *cpu)
35 | {
36 | /* Don't care. */
37 | }
38 |
39 | static void idle_cpu_preemption_completed(struct flux_sched *i, int child_id,
40 | struct flux_cpu *cpu)
41 | {
42 | /* Don't care. */
43 | }
44 |
45 | static void idle_cpu_ticked(struct flux_sched *i, int child_id,
46 | struct flux_cpu *cpu)
47 | {
48 | /* Don't care. */
49 | }
50 |
51 | static void idle_pick_next_task(struct flux_sched *i, struct flux_cpu *cpu,
52 | struct bpf_ghost_sched *ctx)
53 | {
54 | flux_run_idle(cpu, ctx);
55 | }
56 |
--------------------------------------------------------------------------------
/third_party/bpf/idle_flux_bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #ifndef GHOST_LIB_BPF_BPF_IDLE_FLUX_BPF_H_
15 | #define GHOST_LIB_BPF_BPF_IDLE_FLUX_BPF_H_
16 |
17 | #ifndef __BPF__
18 | #include
19 | #endif
20 |
21 | struct idle_flux_sched {
22 | uint8_t thanks_cplusplus; /* no zero-length structs... */
23 | };
24 |
25 | struct idle_flux_cpu {
26 | uint8_t thanks_cplusplus; /* no zero-length structs... */
27 | };
28 |
29 | #endif // GHOST_LIB_BPF_BPF_IDLE_FLUX_BPF_H_
30 |
--------------------------------------------------------------------------------
/third_party/bpf/prov_flux_bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #ifndef GHOST_LIB_BPF_BPF_PROV_FLUX_BPF_H_
15 | #define GHOST_LIB_BPF_BPF_PROV_FLUX_BPF_H_
16 |
17 | #ifndef __BPF__
18 | #include
19 | #endif
20 |
21 | #include "lib/queue.bpf.h"
22 |
23 | /*
24 | * Prov: a provisioning scheduler.
25 | *
26 | * We have three children: prio, next, and last.
27 | *
28 | * The policy is to give prio up to max_nr_prio_cpus, preferring to pick cpus
29 | * flagged "priority". Else take from last, then next. Next takes from last.
30 | *
31 | * max_nr_prio_cpus and the per-cpu priority fields are configured by userspace.
32 | * You can do things like set max_nr_prio_cpus = 10, and pick your ten favorite
33 | * cpus (e.g. sharing a LLC). Prio will get those 10, assuming they are
34 | * available to us at all (kernel CFS or our parent could have them instead).
35 | * If those chosen 10 aren't available, we'll find non-priority cpus to give you
36 | * instead.
37 | *
38 | * If prio has a cpu that isn't priority and there are available priority cpus,
39 | * we'll preempt prio (on timer tick) and move it to its desired cpu. It's a
40 | * tradeoff - if you don't do that, prio will get scattered around the machine.
41 | * Note that if no cpus are marked priority, prio will just get any
42 | * max_nr_prio_cpus.
43 | *
44 | * max_nr_prio_cpus could be changed at runtime, or we can make it a function of
45 | * our cpus (future work). Don't change cpu->priority at runtime without adding
46 | * some other state tracking bools.
47 | *
48 | * In the original version of Prov, priority was an int and these were stored in
49 | * a tree. However, the AVL code is expensive in terms of instructions, and it
50 | * was really easy to blow out of our 1 million instruction budget...
51 | */
52 |
53 | struct prov_poke_tracker {
54 | uint64_t threshold; /* how many usec between pokes */
55 | uint64_t poked_at; /* last time we poked, in usec */
56 | };
57 |
58 | struct prov_flux_sched {
59 | unsigned int prio_id;
60 | unsigned int next_id;
61 | unsigned int last_id;
62 |
63 | unsigned int max_nr_prio_cpus;
64 |
65 | /*
66 | * The first place prio looks for a victim. These cpus are next's and
67 | * last's cpus.
68 | *
69 | * There is a window of time when a priority cpu is granted to us
70 | * (prov), but not granted to any child scheduler yet. It won't be on
71 | * this list. If prio has an outstanding nr_cpus_wanted, when we get to
72 | * PNT, we'll hand out this cpu. (Recall that cpu_grant happens in
73 | * PNT). It's possible that there is a concurrent request on another
74 | * cpu that won't see this newly-granted cpu, and we may give out a
75 | * non-priority cpu to prio when this cpu would have been better. I'm
76 | * fine with that.
77 | */
78 | struct arr_list priority_cpus;
79 |
80 | struct arr_list nexts_cpus;
81 | struct arr_list lasts_cpus;
82 |
83 | struct prov_poke_tracker prio_poke;
84 | struct prov_poke_tracker next_poke;
85 |
86 | /* debug stats, disabled at load time if prov_debug_stats is false */
87 | uint64_t prio_grants;
88 | uint64_t next_grants;
89 | uint64_t last_grants;
90 |
91 | uint64_t prio_self_preempts;
92 | uint64_t next_self_preempts;
93 | uint64_t last_self_preempts;
94 |
95 | uint64_t prio_ipi_preempts;
96 | uint64_t next_ipi_preempts;
97 | uint64_t last_ipi_preempts;
98 | };
99 |
100 | struct prov_flux_cpu {
101 | struct arr_list_entry prio_link;
102 | struct arr_list_entry child_link;
103 | bool priority;
104 | /*
105 | * preempt_pending is an earmark/signal that we already removed the cpu
106 | * from the appropriate child list(s).
107 | */
108 | bool preempt_pending;
109 | unsigned int owning_child;
110 | };
111 |
112 | #endif // GHOST_LIB_BPF_BPF_PROV_FLUX_BPF_H_
113 |
--------------------------------------------------------------------------------
/third_party/bpf/roci_flux_bpf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 Google LLC
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | *
8 | * This program is distributed in the hope that it will be useful,
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | * GNU General Public License for more details.
12 | */
13 |
14 | #ifndef GHOST_LIB_BPF_BPF_ROCI_FLUX_BPF_H_
15 | #define GHOST_LIB_BPF_BPF_ROCI_FLUX_BPF_H_
16 |
17 | #ifndef __BPF__
18 | #include
19 | #endif
20 |
21 | #include "lib/queue.bpf.h"
22 |
23 | /*
24 | * TODO: ROCI assumes it is the top of the hierarchy and that "idle_id" is
25 | * actually the idle scheduler. There are a few assumptions baked in here:
26 | * - the idle_id (secondary child) always wants a cpu. So we never yield in
27 | * PNT.
28 | * - We never ask our parent for cpus, since we assume there is no parent to
29 | * ask.
30 | * - We're extremely aggressive about taking cpus from idle. This is fine if it
31 | * is actually idle, but can get excessive. Specifically, we look at
32 | * nr_cpus_needed, not nr_cpus (which was the request from that call). If you
33 | * have two cpus making requests at the same time, ROCI might double-up and
34 | * preempt 2x the cpus needed.
35 | */
36 | struct roci_flux_sched {
37 | struct arr_list primary_cpus;
38 | struct arr_list idle_cpus;
39 | unsigned int primary_id;
40 | unsigned int idle_id;
41 | };
42 |
43 | struct roci_flux_cpu {
44 | struct arr_list_entry link;
45 | bool preempt_pending;
46 | };
47 |
48 | #endif // GHOST_LIB_BPF_BPF_ROCI_FLUX_BPF_H_
49 |
--------------------------------------------------------------------------------
/third_party/bpf/schedclasstop.bpf.c:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #include
13 |
14 | // clang-format off
15 | #include
16 | #include "libbpf/bpf_core_read.h"
17 | #include "libbpf/bpf_helpers.h"
18 | #include "libbpf/bpf_tracing.h"
19 | // clang-format on
20 |
21 | #include "third_party/bpf/common.bpf.h"
22 |
23 | #define SCHED_GHOST 18
24 | #define SCHED_AGENT 19 /* Not a real sched class */
25 | #define MAX_SCHED_CLASS (SCHED_AGENT + 1)
26 |
27 | /* Using this map as a per-cpu u64 */
28 | struct {
29 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
30 | __uint(max_entries, 1);
31 | __type(key, u32);
32 | __type(value, u64);
33 | } start_times SEC(".maps");
34 |
35 | struct {
36 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
37 | __uint(max_entries, MAX_SCHED_CLASS);
38 | __type(key, u32);
39 | __type(value, u64);
40 | } class_times SEC(".maps");
41 |
42 | static int task_sched_policy(struct task_struct *p)
43 | {
44 | #define PF_IDLE 0x2 /* linux/sched.h */
45 | u32 flags = BPF_CORE_READ(p, flags);
46 |
47 | /*
48 | * SCHED_IDLE isn't the idle thread, but we do want to track idle
49 | * separately. We reuse SCHED_ISO (4), which is probably the least
50 | * likely value to be used.
51 | */
52 | if (flags & PF_IDLE)
53 | return 4;
54 | if (task_has_ghost_policy(p)) {
55 | if (is_agent(p))
56 | return SCHED_AGENT;
57 | else
58 | return SCHED_GHOST;
59 |
60 | }
61 | return BPF_CORE_READ(p, policy);
62 | }
63 |
64 | SEC("tp_btf/sched_switch")
65 | int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev,
66 | struct task_struct *next)
67 | {
68 | u64 *start_time, *class_time;
69 | u32 prev_policy;
70 | u32 zero = 0;
71 | u64 now;
72 |
73 | prev_policy = task_sched_policy(prev);
74 |
75 | start_time = bpf_map_lookup_elem(&start_times, &zero);
76 | /* This lookup always succeeds, but the verifier needs proof. */
77 | if (!start_time)
78 | return 0;
79 |
80 | now = bpf_ktime_get_ns();
81 | if (*start_time) {
82 | class_time = bpf_map_lookup_elem(&class_times, &prev_policy);
83 | if (class_time)
84 | *class_time += now - *start_time;
85 | }
86 | *start_time = now;
87 |
88 | return 0;
89 | }
90 |
91 | char LICENSE[] SEC("license") = "GPL";
92 |
--------------------------------------------------------------------------------
/third_party/bpf/schedfair.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2021 Google LLC
2 | *
3 | * This program is free software; you can redistribute it and/or
4 | * modify it under the terms of the GNU General Public License
5 | * version 2 as published by the Free Software Foundation.
6 | *
7 | * This program is distributed in the hope that it will be useful,
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | * GNU General Public License for more details.
11 | */
12 |
13 | #ifndef GHOST_LIB_BPF_BPF_SCHEDFAIR_H_
14 | #define GHOST_LIB_BPF_BPF_SCHEDFAIR_H_
15 |
16 | #include
17 |
18 | #define MAX_PIDS 102400
19 |
20 | struct task_info {
21 | /* state tracking */
22 | uint8_t load_tracked;
23 | int user_prio;
24 |
25 | /* intermediate variables */
26 | uint64_t share_at_wake;
27 | uint64_t ran_at;
28 | uint64_t cpu_runtime_since_wake;
29 |
30 | /* output for userspace */
31 | uint64_t total_cpu_runtime;
32 | uint64_t total_cpu_share;
33 | };
34 |
35 | #endif // GHOST_LIB_BPF_BPF_SCHEDFAIR_H_
36 |
--------------------------------------------------------------------------------
/third_party/bpf/schedghostidle.bpf.c:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #include
13 |
14 | // clang-format off
15 | #include
16 | #include "libbpf/bpf_core_read.h"
17 | #include "libbpf/bpf_helpers.h"
18 | #include "libbpf/bpf_tracing.h"
19 | // clang-format on
20 |
21 | #include "lib/ghost_uapi.h"
22 | #include "third_party/bpf/common.bpf.h"
23 | #include "third_party/iovisor_bcc/bits.bpf.h"
24 |
25 | #define MAX_CPUS 512
26 | /* Keep this in sync with schedghostidle.c and bpf/user/agent.c */
27 | #define NR_SLOTS 25
28 |
29 | uint64_t nr_latches = 0;
30 | uint64_t nr_bpf_latches = 0;
31 | uint64_t nr_idle_to_bpf_latches = 0;
32 |
33 | /*
34 | * This array maps is racy, but it's fine. Both the latcher and sched_switch
35 | * tracepoints hold the RQ lock. We want to access a cpu's data from another
36 | * cpu, since the latcher may not be on a particular cpu.
37 | */
38 | struct cpu_info {
39 | bool is_idle;
40 | u64 idle_start;
41 | };
42 |
43 | struct {
44 | __uint(type, BPF_MAP_TYPE_ARRAY);
45 | __uint(max_entries, MAX_CPUS);
46 | __type(key, u32);
47 | __type(value, struct cpu_info);
48 | } cpu_info SEC(".maps");
49 |
50 | /* key: hist slot idx. value: count */
51 | struct {
52 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
53 | __uint(max_entries, NR_SLOTS);
54 | __type(key, u32);
55 | __type(value, u64);
56 | } hist SEC(".maps");
57 |
58 | static bool task_is_idle(struct task_struct *p)
59 | {
60 | #define PF_IDLE 0x2 /* linux/sched.h */
61 | u32 flags = BPF_CORE_READ(p, flags);
62 |
63 | return flags & PF_IDLE;
64 | }
65 |
66 | SEC("tp_btf/sched_switch")
67 | int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev,
68 | struct task_struct *next)
69 | {
70 | u32 cpu = bpf_get_smp_processor_id();
71 | struct cpu_info *ci = bpf_map_lookup_elem(&cpu_info, &cpu);
72 |
73 | if (!ci)
74 | return 0;
75 |
76 | if (task_is_idle(next)) {
77 | ci->is_idle = true;
78 | ci->idle_start = bpf_ktime_get_ns();
79 | } else {
80 | ci->is_idle = false;
81 | }
82 |
83 | return 0;
84 | }
85 |
86 | static int task_cpu(struct task_struct *p)
87 | {
88 | return BPF_CORE_READ(p, cpu);
89 | }
90 |
91 | static void update_hist(u64 nsec)
92 | {
93 | u64 slot, *count;
94 |
95 | slot = log2l(nsec / 1000);
96 | if (slot >= NR_SLOTS)
97 | slot = NR_SLOTS - 1;
98 | count = bpf_map_lookup_elem(&hist, &slot);
99 | if (!count)
100 | return;
101 | *count += 1;
102 | }
103 |
104 | SEC("tp_btf/sched_ghost_latched")
105 | int BPF_PROG(sched_ghost_latched, struct task_struct *old,
106 | struct task_struct *new, int run_flags)
107 | {
108 | u32 cpu = task_cpu(new);
109 | struct cpu_info *ci = bpf_map_lookup_elem(&cpu_info, &cpu);
110 |
111 | __sync_fetch_and_add(&nr_latches, 1);
112 | /* BPF-PNT is the only one who uses SEND_TASK_ON_CPU. */
113 | if (run_flags & SEND_TASK_ON_CPU)
114 | __sync_fetch_and_add(&nr_bpf_latches, 1);
115 |
116 | if (!ci || !ci->is_idle) {
117 | /*
118 | * When BPF-PNT latches a task, the cpu might not go idle.
119 | * However, we'd like to measure those events.
120 | */
121 | if (run_flags & SEND_TASK_ON_CPU)
122 | update_hist(0);
123 | return 0;
124 | }
125 | __sync_fetch_and_add(&nr_idle_to_bpf_latches, 1);
126 |
127 | update_hist(bpf_ktime_get_ns() - ci->idle_start);
128 | /*
129 | * Technically, the cpu is still idle, and our latch may get aborted or
130 | * otherwise fail. But the agent has noticed the previous idling (as
131 | * shown by it trying to latch), so we do not want to count as idle for
132 | * any other latchings that happen before the next sched_switch.
133 | */
134 | ci->is_idle = false;
135 |
136 | return 0;
137 | }
138 |
139 | char LICENSE[] SEC("license") = "GPL";
140 |
--------------------------------------------------------------------------------
/third_party/bpf/schedlat.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2021 Google LLC
2 | *
3 | * This program is free software; you can redistribute it and/or
4 | * modify it under the terms of the GNU General Public License
5 | * version 2 as published by the Free Software Foundation.
6 | *
7 | * This program is distributed in the hope that it will be useful,
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | * GNU General Public License for more details.
11 | */
12 |
13 | #ifndef GHOST_LIB_BPF_BPF_SCHEDLAT_H_
14 | #define GHOST_LIB_BPF_BPF_SCHEDLAT_H_
15 |
16 | #include
17 |
18 | #define MAX_PIDS 102400
19 | #define MAX_NR_HIST_SLOTS 25
20 |
21 | struct task_stat {
22 | uint64_t runnable_at;
23 | uint64_t latched_at;
24 | uint64_t ran_at;
25 | };
26 |
27 | /*
28 | * Power of 2 histogram, <=1 us, 2us, 4us, etc. This struct must be at least
29 | * 8-byte aligned, since it is a value for a BPF map. The kernel will round up
30 | * the size of any map value to 8 bytes internally. If we have an array of
31 | * these objects, the kernel will think each object is 8-byte aligned each.
32 | * When we read the per-cpu map in schedlat.c, we get an array of struct hist.
33 | * The compiler needs to agree with the kernel on the size of the objects, or
34 | * you'll corrupt your stats.
35 | */
36 | struct hist {
37 | uint32_t slots[MAX_NR_HIST_SLOTS];
38 | } __attribute__((aligned(8)));
39 |
40 | enum {
41 | RUNNABLE_TO_LATCHED,
42 | LATCHED_TO_RUN,
43 | RUNNABLE_TO_RUN,
44 | NR_HISTS,
45 | };
46 |
47 | #endif // GHOST_LIB_BPF_BPF_SCHEDLAT_H_
48 |
--------------------------------------------------------------------------------
/third_party/bpf/schedlat_shared_bpf.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #ifndef GHOST_LIB_BPF_SCHEDLAT_SHARED_BPF_H_
13 | #define GHOST_LIB_BPF_SCHEDLAT_SHARED_BPF_H_
14 |
15 | // Keep this file's structs in sync with bpf/schedlat_shared.h.
16 | // We need different headers for BPF and C programs due to various Google3
17 | // reasons.
18 |
19 | #define MAX_PIDS 102400
20 | #define MAX_NR_HIST_SLOTS 25
21 |
22 | struct task_stat {
23 | __u64 runnable_at;
24 | __u64 latched_at;
25 | __u64 ran_at;
26 | };
27 |
28 | /*
29 | * Power of 2 histogram, <=1 us, 2us, 4us, etc. This struct must be at least
30 | * 8-byte aligned, since it is a value for a BPF map.
31 | */
32 | struct hist {
33 | u32 slots[MAX_NR_HIST_SLOTS];
34 | } __attribute__((aligned(64)));
35 |
36 | enum {
37 | RUNNABLE_TO_LATCHED,
38 | LATCHED_TO_RUN,
39 | RUNNABLE_TO_RUN,
40 | NR_HISTS,
41 | };
42 |
43 | #endif // GHOST_LIB_BPF_SCHEDLAT_SHARED_BPF_H_
44 |
--------------------------------------------------------------------------------
/third_party/bpf/schedrun.bpf.c:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #include
13 |
14 | // clang-format off
15 | #include
16 | #include "libbpf/bpf_core_read.h"
17 | #include "libbpf/bpf_helpers.h"
18 | #include "libbpf/bpf_tracing.h"
19 |
20 | // common.bpf.h comes before bits.bpf.h for u32/s32/u64/s64 in OSS.
21 | #include "third_party/bpf/common.bpf.h"
22 | #include "third_party/iovisor_bcc/bits.bpf.h"
23 | #include "third_party/bpf/schedrun.h"
24 | // clang-format on
25 |
26 | const volatile pid_t targ_tgid = 0;
27 | const volatile bool ghost_only = false;
28 |
29 | // Map each task's pid to the timestamp it started running.
30 | struct {
31 | __uint(type, BPF_MAP_TYPE_HASH);
32 | __uint(max_entries, MAX_PIDS);
33 | __type(key, u32);
34 | __type(value, u64);
35 | } task_start_times SEC(".maps");
36 |
37 | struct {
38 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
39 | __uint(max_entries, NR_HISTS);
40 | __type(key, u32);
41 | __type(value, struct hist);
42 | } hists SEC(".maps");
43 |
44 | // TODO: refactor (copied from schedlat.bpf.c).
45 | static void update_hist(u32 hist_id, u64 value)
46 | {
47 | u64 slot; /* Gotta love BPF. slot needs to be a u64, not a u32. */
48 | struct hist *hist;
49 |
50 | hist = bpf_map_lookup_elem(&hists, &hist_id);
51 | if (!hist)
52 | return;
53 | slot = log2l(value);
54 | if (slot >= MAX_NR_HIST_SLOTS)
55 | slot = MAX_NR_HIST_SLOTS - 1;
56 | hist->slots[slot]++;
57 | }
58 |
59 | static void task_stop(struct task_struct *p)
60 | {
61 | u32 pid = BPF_CORE_READ(p, pid);
62 | u64 stop = bpf_ktime_get_us();
63 | u64 *start = bpf_map_lookup_elem(&task_start_times, &pid);
64 |
65 | if (start) {
66 | u64 diff = stop - *start;
67 | update_hist(RUNTIMES_ALL, diff);
68 |
69 | long state = BPF_CORE_READ(p, state);
70 | if (state == TASK_RUNNING) // prev yielded or was preempted.
71 | update_hist(RUNTIMES_PREEMPTED_YIELDED, diff);
72 | else // prev blocked.
73 | update_hist(RUNTIMES_BLOCKED, diff);
74 |
75 | if (state == TASK_DEAD)
76 | bpf_map_delete_elem(&task_start_times, &pid);
77 | }
78 | }
79 |
80 | static void task_run(struct task_struct *p)
81 | {
82 | u32 pid = BPF_CORE_READ(p, pid);
83 | u64 start = bpf_ktime_get_us();
84 |
85 | bpf_map_update_elem(&task_start_times, &pid, &start, BPF_ANY);
86 | }
87 |
88 | static bool is_traced(struct task_struct *p)
89 | {
90 | if (targ_tgid)
91 | return BPF_CORE_READ(p, tgid) == targ_tgid;
92 |
93 | if (ghost_only)
94 | return is_traced_ghost(p);
95 |
96 | return true;
97 | }
98 |
99 | SEC("tp_btf/sched_switch")
100 | int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev,
101 | struct task_struct *next)
102 | {
103 | if (is_traced(prev))
104 | task_stop(prev);
105 |
106 | if (is_traced(next))
107 | task_run(next);
108 |
109 | return 0;
110 | }
111 |
112 | char LICENSE[] SEC("license") = "GPL";
113 |
--------------------------------------------------------------------------------
/third_party/bpf/schedrun.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2021 Google LLC
2 | *
3 | * This program is free software; you can redistribute it and/or
4 | * modify it under the terms of the GNU General Public License
5 | * version 2 as published by the Free Software Foundation.
6 | *
7 | * This program is distributed in the hope that it will be useful,
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | * GNU General Public License for more details.
11 | */
12 |
13 | #ifndef GHOST_LIB_BPF_BPF_SCHEDRUN_H_
14 | #define GHOST_LIB_BPF_BPF_SCHEDRUN_H_
15 |
16 | #include
17 |
18 | #define MAX_PIDS 102400
19 | #define MAX_NR_HIST_SLOTS 25
20 |
21 | /*
22 | * Power of 2 histogram, <=1 us, 2us, 4us, etc. This struct must be at least
23 | * 8-byte aligned, since it is a value for a BPF map.
24 | */
25 | struct hist {
26 | uint32_t slots[MAX_NR_HIST_SLOTS];
27 | } __attribute__((aligned(64)));
28 |
29 | enum {
30 | RUNTIMES_PREEMPTED_YIELDED,
31 | RUNTIMES_BLOCKED,
32 | RUNTIMES_ALL,
33 | NR_HISTS,
34 | };
35 |
36 | #endif // GHOST_LIB_BPF_BPF_SCHEDRUN_H_
37 |
--------------------------------------------------------------------------------
/third_party/bpf/schedrun_shared_bpf.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #ifndef GHOST_LIB_BPF_SCHEDRUN_SHARED_BPF_H_
13 | #define GHOST_LIB_BPF_SCHEDRUN_SHARED_BPF_H_
14 |
15 | // Keep this file's structs in sync with bpf/schedrun_shared.h.
16 | // We need different headers for BPF and C programs due to various Google3
17 | // reasons.
18 |
19 | #define MAX_PIDS 102400
20 | #define MAX_NR_HIST_SLOTS 25
21 |
22 | /*
23 | * Power of 2 histogram, <=1 us, 2us, 4us, etc. This struct must be at least
24 | * 8-byte aligned, since it is a value for a BPF map.
25 | */
26 | struct hist {
27 | u32 slots[MAX_NR_HIST_SLOTS];
28 | } __attribute__((aligned(64)));
29 |
30 | enum {
31 | RUNTIMES_PREEMPTED_YIELDED,
32 | RUNTIMES_BLOCKED,
33 | RUNTIMES_ALL,
34 | NR_HISTS,
35 | };
36 |
37 | #endif // GHOST_LIB_BPF_SCHEDRUN_SHARED_BPF_H_
38 |
--------------------------------------------------------------------------------
/third_party/bpf/test.bpf.c:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // This program is free software; you can redistribute it and/or
4 | // modify it under the terms of the GNU General Public License
5 | // version 2 as published by the Free Software Foundation.
6 | //
7 | // This program is distributed in the hope that it will be useful,
8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | // GNU General Public License for more details.
11 |
12 | #include
13 |
14 | // clang-format off
15 | #include "libbpf/bpf_helpers.h"
16 | #include "libbpf/bpf_tracing.h"
17 | // clang-format on
18 |
19 | #include "lib/ghost_uapi.h"
20 | #include "third_party/bpf/common.bpf.h"
21 |
22 | SEC("ghost_sched/pnt")
23 | int test_pnt(struct bpf_ghost_sched *ctx)
24 | {
25 | return 0;
26 | }
27 |
28 | char LICENSE[] SEC("license") = "GPL";
29 |
--------------------------------------------------------------------------------
/third_party/iovisor_bcc/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2020 Wenbo Zhang
2 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
3 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
4 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
5 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6 |
--------------------------------------------------------------------------------
/third_party/iovisor_bcc/bits.bpf.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2 | /*
3 | * From iovisor's bcc/libbpf-tools/bits.bpf.h.
4 | *
5 | * These are very small, and it's not worth getting a dependency on
6 | * third_party/bcc/. The tools including this header are similar in style to
7 | * libbpf-tools, which are intended to be built from within the
8 | * bcc/libbpf-tools/ directory.
9 | */
10 |
11 | #ifndef GHOST_LIB_BPF_BITS_BPF_H_
12 | #define GHOST_LIB_BPF_BITS_BPF_H_
13 |
14 | static __always_inline u64 log2(u32 v)
15 | {
16 | u32 shift, r;
17 |
18 | r = (v > 0xFFFF) << 4; v >>= r;
19 | shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
20 | shift = (v > 0xF) << 2; v >>= shift; r |= shift;
21 | shift = (v > 0x3) << 1; v >>= shift; r |= shift;
22 | r |= (v >> 1);
23 |
24 | return r;
25 | }
26 |
27 | static __always_inline u64 log2l(u64 v)
28 | {
29 | u32 hi = v >> 32;
30 |
31 | if (hi)
32 | return log2(hi) + 32;
33 | else
34 | return log2(v);
35 | }
36 |
37 | #endif // GHOST_LIB_BPF_BITS_BPF_H_
38 |
--------------------------------------------------------------------------------
/third_party/linux.BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_foreign_cc//foreign_cc:defs.bzl", "make")
2 |
3 | # The libbpf source code. This is encompassed by the `:source` filegroup, but
4 | # the `make` rule below wants just the library source passed via the
5 | # `lib_source` parameter.
6 | filegroup(
7 | name = "libbpf_source",
8 | srcs = glob(["tools/lib/bpf/**"]),
9 | visibility = ["//visibility:private"],
10 | )
11 |
12 | # The bpftool source code. This is encompassed by the `:source` filegroup, but
13 | # the `make` rule below wants just the library source passed via the
14 | # `lib_source` parameter.
15 | filegroup(
16 | name = "bpftool_source",
17 | srcs = glob(["tools/bpf/bpftool/**"]),
18 | visibility = ["//visibility:private"],
19 | )
20 |
21 | # The Linux source code.
22 | filegroup(
23 | name = "source",
24 | srcs = glob(["**"]),
25 | visibility = ["//visibility:private"],
26 | )
27 |
28 | # Compiles the libbpf static library.
29 | make(
30 | name = "libbpf",
31 | # This is the library source. This filegroup includes the Makefile.
32 | lib_source = ":libbpf_source",
33 | # The Makefile uses other files in the Linux kernel tree outside of its
34 | # directory during the build process (e.g.,
35 | # `tools/scripts/Makefile.include`).
36 | build_data = [":source"],
37 | # This is the target passed to `make` (i.e., `make libbpf.a`).
38 | targets = ["libbpf.a"],
39 | # This copy should be done automatically by the rules_foreign_cc tool, yet
40 | # it is not. This may happen because the libbpf library is not at the root
41 | # of the Linux kernel tree. Perhaps the rules_foreign_cc tool makes an
42 | # assumption that the library source is at the root of the kernel tree,
43 | # which causes its copy of libbpf.a to fail since it cannot find the static
44 | # library at the root of the kernel tree.
45 | #
46 | # Note: The values of the environment variables below are written to
47 | # GNUMake.log, so look at that file to inspect them. You can also look at
48 | # that log to see which other environment variables exist.
49 | postfix_script = "cp $EXT_BUILD_ROOT/external/linux/tools/lib/bpf/libbpf.a $INSTALLDIR/lib/libbpf.a; " +
50 | # By making the `libbpf` directory and copying the libbpf header files into
51 | # it, we can have the #include paths in the project prefixed by `libbpf`. In
52 | # other words, we can do `#include "libbpf/header.h"` instead of
53 | # `#include "header.h"`. With the latter, it is more confusing to figure out
54 | # where the header file is and could cause conflicts if a header file in the
55 | # project has the same name as a header file in libbpf.
56 | "mkdir $INSTALLDIR/include/libbpf; " +
57 | "cp $EXT_BUILD_ROOT/external/linux/tools/lib/bpf/*.h $INSTALLDIR/include/libbpf",
58 | visibility = ["//visibility:public"],
59 | )
60 |
61 | # Compiles the bpftool binary.
62 | make(
63 | name = "bpftool",
64 | lib_source = ":bpftool_source",
65 | # This attribute specifies that the output is a binary. Otherwise, the
66 | # rules_foreign_cc tool expects to find a static library (i.e., `bpftool.a`)
67 | # and fails when the static library is not produced.
68 | out_binaries = ["bpftool"],
69 | build_data = [":source"],
70 | # The default targets are `` and `install`, but we do not want the `install`
71 | # target. Thus, specify that the only target is `` (i.e., just `make`).
72 | targets = [""],
73 | # See the comment in the `:libbpf` target for an explanation of why this
74 | # copy is necessary.
75 | postfix_script = "cp $EXT_BUILD_ROOT/external/linux/tools/bpf/bpftool/bpftool $INSTALLDIR/bin/bpftool",
76 | visibility = ["//visibility:public"],
77 | )
78 |
--------------------------------------------------------------------------------
/third_party/util/LICENSE:
--------------------------------------------------------------------------------
1 | MIT license
2 |
3 | Copyright 2022 Google LLC
4 |
5 | Permission is hereby granted, free of charge, to any
6 | person obtaining a copy of this software and associated
7 | documentation files (the "Software"), to deal in the
8 | Software without restriction, including without
9 | limitation the rights to use, copy, modify, merge,
10 | publish, distribute, sublicense, and/or sell copies of
11 | the Software, and to permit persons to whom the Software
12 | is furnished to do so, subject to the following
13 | conditions:
14 |
15 | The above copyright notice and this permission notice
16 | shall be included in all copies or substantial portions
17 | of the Software.
18 |
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
20 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
21 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
22 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
23 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
26 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 | DEALINGS IN THE SOFTWARE.
28 |
--------------------------------------------------------------------------------
/third_party/util/util.h:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by an MIT-style
4 | // license that can be found in the LICENSE file or at
5 | // https://opensource.org/licenses/MIT.
6 |
7 | #ifndef GHOST_THIRD_PARTY_UTIL_UTIL_H_
8 | #define GHOST_THIRD_PARTY_UTIL_UTIL_H_
9 |
10 | #include
11 |
12 | // The code below is derived from
13 | // https://stackoverflow.com/questions/34672441/stdis-base-of-for-template-classes.
14 | template class Base, typename Derived>
15 | struct is_base_of_template_impl {
16 | template
17 | static constexpr std::true_type Test(const Base*);
18 | static constexpr std::false_type Test(...);
19 | using type = decltype(Test(std::declval()));
20 | };
21 |
22 | template class Base, typename Derived>
23 | inline constexpr bool is_base_of_template_v =
24 | is_base_of_template_impl::type::value;
25 |
26 | #endif // GHOST_THIRD_PARTY_UTIL_UTIL_H_
27 |
--------------------------------------------------------------------------------
/util/cgroup_scraper.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2022 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 | #
9 | # Moves tasks with the matching cgroup/cpu.ghost_enabled value to an enclave
10 |
11 | if [ $# -lt 1 ]; then
12 | echo "Usage: ./`basename $0` ENCLAVE_DIR [GHOST_ENABLED_VAL]"
13 | exit -1
14 | fi
15 | ENCLAVE_DIR=$1
16 |
17 | if [ $# -ge 2 ]; then
18 | ENABLED_VAL=$2
19 | else
20 | # current kernels only support '1'. In the future, we'll probably
21 | # allow a u32 that can be an enclave ID
22 | ENABLED_VAL=1
23 | fi
24 |
25 |
26 | declare -A enclave_tasks
27 | for T in `cat $ENCLAVE_DIR/tasks`; do
28 | enclave_tasks[$T]="1"
29 | done
30 |
31 | for CGE in `find /dev/cgroup/cpu -name cpu.ghost_enabled`; do
32 | [[ "$ENABLED_VAL" != "`cat $CGE`" ]] && continue
33 |
34 | DIRNAME=`dirname $CGE`
35 | for T in `cat $DIRNAME/tasks`; do
36 | [[ ${enclave_tasks[$T]} ]] && continue
37 |
38 | echo $T > $ENCLAVE_DIR/tasks
39 | done
40 | done
41 |
--------------------------------------------------------------------------------
/util/enclave_watcher.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 |
13 | #include
14 | #include
15 | #include
16 |
17 | #include "absl/flags/parse.h"
18 | #include "lib/enclave.h"
19 | #include "lib/ghost.h"
20 |
21 | ABSL_FLAG(std::string, enclave, "", "path to enclave directory");
22 | ABSL_FLAG(int32_t, agent_pid, -1,
23 | "Optional PID of agent to kill (default is none)");
24 | ABSL_FLAG(bool, sigkill, false, "send agent_pid a SIGKILL instead of SIGINT");
25 |
26 | int main(int argc, char *argv[]) {
27 | absl::ParseCommandLine(argc, argv);
28 |
29 | std::string enclave = absl::GetFlag(FLAGS_enclave);
30 | pid_t agent = absl::GetFlag(FLAGS_agent_pid);
31 | bool sigkill = absl::GetFlag(FLAGS_sigkill);
32 |
33 | if (enclave.empty()) {
34 | fprintf(stderr,
35 | "need an enclave path, e.g. --enclave /sys/fs/ghost/enclave_1/\n");
36 | return 1;
37 | }
38 | int dfd = open(enclave.c_str(), O_PATH);
39 | CHECK_GE(dfd, 0);
40 |
41 | ghost::LocalEnclave::WaitForAgentOnlineValue(dfd, 1);
42 |
43 | absl::Time killed = absl::Now();
44 | if (agent != -1) {
45 | // Most agents gracefully shutdown on SIGINT
46 | kill(agent, sigkill ? SIGKILL : SIGINT);
47 | }
48 |
49 | // This captures when an enclave goes offline (agent crash/exit)
50 | ghost::LocalEnclave::WaitForAgentOnlineValue(dfd, 0);
51 | absl::Time offline = absl::Now();
52 |
53 | // This captures when a new agent takes over the enclave
54 | ghost::LocalEnclave::WaitForAgentOnlineValue(dfd, 1);
55 | absl::Time online = absl::Now();
56 |
57 | int nr_tasks = ghost::LocalEnclave::GetNrTasks(dfd);
58 | int64_t blackout = absl::ToInt64Nanoseconds(online - offline);
59 |
60 | std::cout << "Watcher measured blackout of : " << blackout / 1000000
61 | << " msec, ~" << blackout / (nr_tasks ?: 1) / 1000
62 | << " usec per task (" << nr_tasks << " tasks)\n";
63 |
64 | if (agent != -1) {
65 | std::cout << "Watcher kill-to-agent_offline : "
66 | << absl::ToInt64Milliseconds(offline - killed) << " msec\n";
67 | }
68 |
69 | close(dfd);
70 | return 0;
71 | }
72 |
--------------------------------------------------------------------------------
/util/fdcat.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | // Cats the contents of some file shared by fdsrv
8 | //
9 | // Example:
10 | // $ echo foo | fdsrv NONCE
11 | // @074e0 NONCE
12 | // $ fdcat @074e0 NONCE
13 | // foo
14 |
15 | #include "shared/fd_server.h"
16 |
17 | int main(int argc, char* argv[]) {
18 | if (argc < 3) {
19 | std::cerr << "Usage: fdcat PATH NONCE" << std::endl;
20 | exit(1);
21 | }
22 | auto fd = ghost::FdServer::GetSharedFd(argv[1], argv[2]);
23 | if (!fd.ok()) {
24 | std::cerr << "Failed: " << fd.status() << std::endl;
25 | return 1;
26 | }
27 | char buf[4096];
28 | ssize_t ret;
29 | while ((ret = read(*fd, buf, sizeof(buf))) > 0) {
30 | std::string s(buf, ret);
31 | std::cout << s;
32 | }
33 | return 0;
34 | }
35 |
36 |
--------------------------------------------------------------------------------
/util/fdsrv.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | // Shares stdin with FdServer. Outputs the path and the nonce.
8 | //
9 | // Examples:
10 | // $ echo foo | fdsrv NONCE
11 | // @074e0 NONCE
12 | // $ fdcat @074e0 NONCE
13 | // foo
14 | //
15 | // $ fdsrv NONCE < some_file
16 | // @36911 NONCE
17 | // $ fdcat @36911 NONCE
18 | // contents_of_some_file
19 |
20 | #include "shared/fd_server.h"
21 |
22 | int main(int argc, char* argv[]) {
23 | if (argc < 2) {
24 | std::cerr << "Usage: fdsrv NONCE" << std::endl;
25 | exit(1);
26 | }
27 | ghost::FdServer foo(/*fd=*/0, /*nonce=*/argv[1], absl::InfiniteDuration());
28 | absl::StatusOr path = foo.Init();
29 | if (!path.ok()) {
30 | std::cerr << "Failed: " << path.status() << std::endl;
31 | return 1;
32 | }
33 | std::cout << *path << " " << argv[1] << std::endl;
34 | absl::Status status = foo.Serve();
35 | if (!status.ok()) {
36 | std::cerr << "Failed: " << status << std::endl;
37 | return 1;
38 | }
39 | return 0;
40 | }
41 |
--------------------------------------------------------------------------------
/util/pushtosched.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd
6 |
7 | // This is a helper program that moves threads into the SCHED_OTHER (CFS) sched
8 | // class. The thread TIDs are passed to the process via stdin. See
9 | // `PrintUsage()` for more details about using this program.
10 |
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | #include "absl/strings/str_format.h"
17 | #include "absl/strings/string_view.h"
18 |
19 | namespace {
20 |
21 | void PrintUsage(absl::string_view program_name) {
22 | absl::FPrintF(stderr, R"(Usage:
23 | To push tasks in a cgroup into CFS:
24 | $ cat /dev/cgroup/cpu/your/tasks | %s
25 | To push ghOSt tasks into CFS:
26 | $ cat /sys/fs/ghost/enclave_X/tasks | %s
27 | To push CFS tasks into ghOSt, please write pids directly to enclave's task
28 | file. For example,
29 | $ cat /dev/cgroup/cpu/your/tasks > /sys/fs/ghost/enclave_X/tasks
30 | )",
31 | program_name, program_name);
32 | }
33 |
34 | // Adds `pid` to the sched class specified by `policy`.
35 | int SchedEnterOther(pid_t pid) {
36 | sched_param param = {0};
37 | return sched_setscheduler(pid, SCHED_OTHER, ¶m);
38 | }
39 |
40 | } // namespace
41 |
42 | int main(int argc, char* argv[]) {
43 | if (argc != 1) {
44 | PrintUsage(argv[0]);
45 | return 1;
46 | }
47 |
48 | absl::FPrintF(stderr, "Moving processes to SCHED_OTHER (CFS).\n");
49 | pid_t pid;
50 | while (fscanf(stdin, "%d\n", &pid) != EOF) {
51 | absl::FPrintF(stderr, "pid: %d\n", pid);
52 | if (sched_getscheduler(pid) == SCHED_OTHER) {
53 | absl::FPrintF(
54 | stderr, "Already in sched class SCHED_OTHER, skipping pid %d\n", pid);
55 | continue;
56 | }
57 |
58 | if (SchedEnterOther(pid)) {
59 | absl::FPrintF(stderr, "sched_setscheduler failed (pid: %d): %s\n", pid,
60 | strerror(errno));
61 | }
62 |
63 | int actual = sched_getscheduler(pid);
64 | if (actual < 0) {
65 | absl::FPrintF(stderr, "sched_getscheduler for pid %d failed: %s\n", pid,
66 | strerror(errno));
67 | return 1;
68 | } else if (actual != SCHED_OTHER) {
69 | absl::FPrintF(
70 | stderr,
71 | "Failed to set sched policy of pid %d: want SCHED_OTHER(%d), "
72 | "got %d\n",
73 | pid, SCHED_OTHER, actual);
74 | }
75 | }
76 |
77 | return 0;
78 | }
79 |
--------------------------------------------------------------------------------