├── .cargo └── config.toml ├── .github └── workflows │ └── hugo.yaml ├── .gitignore ├── .gitmodules ├── Cargo.lock ├── Cargo.toml ├── Docs ├── .gitignore ├── README.md ├── archetypes │ └── default.md ├── content │ └── en │ │ ├── Architecture │ │ └── _index.md │ │ ├── Dpdk │ │ └── _index.md │ │ ├── Lifetimes │ │ └── _index.md │ │ ├── Modules │ │ └── _index.md │ │ ├── R2Configs │ │ └── _index.md │ │ ├── Tryme │ │ └── _index.md │ │ ├── Unsafe │ │ └── _index.md │ │ └── _index.md ├── go.mod ├── go.sum └── hugo.toml ├── LICENCE ├── README.md ├── api ├── Cargo.toml ├── api.md └── src │ └── lib.rs ├── apis ├── interface │ ├── Cargo.toml │ └── src │ │ ├── apis.thrift │ │ └── lib.rs ├── log │ ├── Cargo.toml │ └── src │ │ ├── apis.thrift │ │ └── lib.rs └── route │ ├── Cargo.toml │ └── src │ ├── apis.thrift │ └── lib.rs ├── common ├── Cargo.toml └── src │ └── lib.rs ├── counters ├── Cargo.toml ├── counters.md └── src │ ├── bin.rs │ ├── flavors.rs │ ├── lib.rs │ └── test.rs ├── dpdk ├── Cargo.toml ├── Dpdk_Support.md ├── dpdk.md └── src │ ├── dpdk.rs │ ├── lib.rs │ ├── stubs.rs │ └── test.rs ├── ffis └── dpdk │ ├── Cargo.toml │ ├── build.rs │ └── src │ ├── bindgen.rs │ ├── bindgen │ ├── allow-function.regex │ ├── allow-type.regex │ ├── allow-var.regex │ ├── bindgen.md │ ├── bindgen.sh │ ├── headers.h │ └── include │ │ └── lib.rs │ └── lib.rs ├── fwd ├── Cargo.toml ├── fwd.md └── src │ ├── adj.rs │ ├── intf.rs │ ├── ipv4.rs │ └── lib.rs ├── gnodes ├── interface │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── layer2 │ └── eth │ │ ├── decap │ │ ├── Cargo.toml │ │ └── src │ │ │ └── lib.rs │ │ └── encap │ │ ├── Cargo.toml │ │ └── src │ │ └── lib.rs └── layer3 │ └── ipv4 │ ├── fwd │ ├── Cargo.toml │ └── src │ │ └── lib.rs │ └── parse │ ├── Cargo.toml │ └── src │ └── lib.rs ├── graph ├── Cargo.toml ├── graph.md └── src │ ├── lib.rs │ └── test.rs ├── log ├── Cargo.toml ├── log.md └── src │ ├── lib.rs │ └── test.rs ├── main ├── Cargo.toml ├── main.md └── src │ ├── ifd.rs │ ├── ipv4.rs │ ├── logs.rs │ ├── main.rs │ ├── msgs.rs │ └── test.rs ├── msg ├── Cargo.toml ├── msg.md └── src │ └── lib.rs ├── names ├── Cargo.toml ├── names.md └── src │ └── lib.rs ├── packet ├── Cargo.toml ├── packet.md └── src │ ├── lib.rs │ └── test.rs ├── perf ├── Cargo.toml └── src │ ├── lib.rs │ ├── stubs.rs │ └── x64.rs ├── sched ├── Cargo.toml ├── sched.md └── src │ ├── hfsc.rs │ ├── hfsc │ └── test.rs │ └── lib.rs ├── tryme.sh ├── unix ├── efd │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── epoll │ ├── Cargo.toml │ └── src │ │ ├── lib.rs │ │ └── test.rs ├── shm │ ├── Cargo.toml │ └── src │ │ └── lib.rs └── socket │ ├── Cargo.toml │ └── src │ ├── lib.rs │ └── test.rs └── utils ├── clis.md ├── r2cnt ├── Cargo.toml ├── r2cnt.md └── src │ └── main.rs ├── r2intf ├── Cargo.toml ├── r2intf.md └── src │ ├── main.rs │ └── r2intf.yml ├── r2log ├── Cargo.toml ├── r2log.md └── src │ ├── main.rs │ └── r2log.yml └── r2rt ├── Cargo.toml ├── r2rt.md └── src ├── main.rs └── r2rt.yml /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.x86_64-unknown-linux-gnu] 2 | runner = "sudo -E" 3 | 4 | [target.aarch64-unknown-linux-gnu] 5 | runner = "sudo -E" 6 | -------------------------------------------------------------------------------- /.github/workflows/hugo.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Hugo Documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | paths: 8 | - 'Docs/**' # Only trigger when Docs directory changes 9 | 10 | # Allow manual trigger 11 | workflow_dispatch: 12 | 13 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 14 | permissions: 15 | contents: read 16 | pages: write 17 | id-token: write 18 | 19 | # Allow only one concurrent deployment 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: true 23 | 24 | jobs: 25 | build: 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Checkout 29 | uses: actions/checkout@v4 30 | with: 31 | submodules: recursive 32 | 33 | - name: Setup Hugo 34 | uses: peaceiris/actions-hugo@v2 35 | with: 36 | hugo-version: 'latest' 37 | extended: true 38 | 39 | - name: Setup Node 40 | uses: actions/setup-node@v3 41 | with: 42 | node-version: '18' 43 | 44 | - name: Install PostCSS dependencies 45 | run: | 46 | cd Docs 47 | npm init -y 48 | npm install postcss postcss-cli autoprefixer 49 | 50 | - name: Install Docsy Dependencies 51 | run: npm install 52 | working-directory: Docs/themes/docsy 53 | 54 | - name: Build with Hugo 55 | env: 56 | HUGO_ENVIRONMENT: production 57 | run: | 58 | cd Docs 59 | hugo --minify --baseURL="https://gopakumarce.github.io/R2/" 60 | 61 | - name: Setup Pages 62 | uses: actions/configure-pages@v3 63 | 64 | - name: Upload artifact 65 | uses: actions/upload-pages-artifact@v3 66 | with: 67 | path: './Docs/public' 68 | 69 | deploy: 70 | environment: 71 | name: github-pages 72 | url: ${{ steps.deployment.outputs.page_url }} 73 | runs-on: ubuntu-latest 74 | needs: build 75 | steps: 76 | - name: Deploy to GitHub Pages 77 | id: deployment 78 | uses: actions/deploy-pages@v4 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cscope.files 2 | cscope.in.out 3 | cscope.out 4 | cscope.po.out 5 | *.swp 6 | /target 7 | **/target 8 | **/*.rs.bk 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Docs/themes/docsy"] 2 | path = Docs/themes/docsy 3 | url = https://github.com/google/docsy.git 4 | [submodule "Docs/themes/font-awesome"] 5 | path = Docs/themes/font-awesome 6 | url = https://github.com/FortAwesome/Font-Awesome.git 7 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "names", 4 | "graph", 5 | "packet", 6 | "sched", 7 | "fwd", 8 | "log", 9 | "counters", 10 | "perf", 11 | "api", 12 | "msg", 13 | "dpdk", 14 | "main", 15 | "ffis/dpdk", 16 | "unix/socket", 17 | "unix/epoll", 18 | "unix/efd", 19 | "unix/shm", 20 | "gnodes/layer2/eth/decap", 21 | "gnodes/layer2/eth/encap", 22 | "gnodes/layer3/ipv4/parse", 23 | "gnodes/layer3/ipv4/fwd", 24 | "gnodes/interface", 25 | "apis/log", 26 | "apis/interface", 27 | "apis/route", 28 | "utils/r2cnt", 29 | "utils/r2intf", 30 | "utils/r2log", 31 | "utils/r2rt", 32 | ] 33 | 34 | # The development profile, used for `cargo build` 35 | [profile.dev] 36 | opt-level = 0 # Controls the --opt-level the compiler builds with 37 | debug = true # Controls whether the compiler passes `-g` 38 | -------------------------------------------------------------------------------- /Docs/.gitignore: -------------------------------------------------------------------------------- 1 | public/ 2 | resources/ 3 | .hugo_build.lock 4 | themes/docsy 5 | themes/font-awesome 6 | -------------------------------------------------------------------------------- /Docs/README.md: -------------------------------------------------------------------------------- 1 | # Hugo 2 | 3 | Once you make doc changes, do "hugo server -D" and go to 4 | localhost:1313 to see the doc pages and test it out 5 | -------------------------------------------------------------------------------- /Docs/archetypes/default.md: -------------------------------------------------------------------------------- 1 | +++ 2 | date = '{{ .Date }}' 3 | draft = true 4 | title = '{{ replace .File.ContentBaseName "-" " " | title }}' 5 | +++ 6 | -------------------------------------------------------------------------------- /Docs/content/en/Dpdk/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # Compiling with dpdk 9 | 10 | Install dpdk as below (snippet below gives the tested version) 11 | 12 | ``` 13 | sudo apt-get install python3 ninja-build meson 14 | curl https://fast.dpdk.org/rel/dpdk-19.11.3.tar.xz -o dpdk.tar.xz 15 | tar xf dpdk.tar.xz 16 | cd dpdk-stable-19.11.3 17 | meson build 18 | cd build 19 | ninja 20 | sudo ninja install 21 | ``` 22 | 23 | And then do "cargo build --features dpdk" 24 | 25 | NOTE: To update to a different dpdk version, go to ffis/dpdk and run bindgen.sh 26 | there. Note that the produced rust bindings might need minor tweaks to get it 27 | compiled, for example dpdk C code has an aligned structure placed inside a packed 28 | structure (see comments about rte_ether_addr in bindgen/include/lib.rs) which 29 | rust cannot support, so that needed a manual tweak 30 | 31 | What we support is a simple AF_PACKET dpdk driver, just to ensure that the general 32 | process of initializing dpdk and working with its apis are all in place. To switch 33 | to a regular PCI driver "hopefully" does not involve code changes and is transparent 34 | to the users of the dpdk library. It of course will involve configuring hugepages and 35 | such which are general dpdk setup procedures outside the purview of this code 36 | 37 | # DPDK interacting with R2 38 | 39 | What R2 wants to leverage from DPDK is the wealth of device drivers it supports. It gives us a quick start and as and when R2 gets mature and has drivers of its own, 40 | the DPDK support can be phased out. We do not intend to use any other packet forwarding functionalities in DPDK. And including DPDK violates the "safety" aspect of 41 | Rust big time - all FFI is unsafe code! So use of DPDK should be a transit path to get us somewhere and then we replace the dependency on it (which is drivers) 42 | 43 | ## Configuration 44 | 45 | In the r2 config file add a section called dpdk as below. The on=true means R2 is running with dpdk enabled, it can be set to false and then rest of the dpdk configs dont matter because dpdk is turned off. The mem=128 says dpdk uses 128Mb for mbuf pool. The ncores=3 says that core0 is used as the main core (non data plane) and core1 and core2 are the data plane cores. core0 is used as the main core always as of today 46 | 47 | ``` 48 | [dpdk] 49 | on=true 50 | mem=128 51 | ncores=3 52 | ``` 53 | 54 | ## Packet pools 55 | 56 | R2 has the PacketPool trait in packet cargo, which is implemented for DPDK also. DPDK has the concept of mbufs with its own mbuf header with its own l3/l2 fields etc., 57 | we dont plan to use anything in the mbuf header other than being able to specify the mbuf packet/data length in the mbuf pkt_len and data_len fields. Also we support 58 | only a single mbuf packet (as of today), even though R2 itself supports chained particles. So the mapping is as follows 59 | 60 | pkt - comes from heap 61 | particle - comes from heap 62 | particle.raw - this is the mbuf 63 | 64 | The dpdk mbuf structure is like this - [[struct rte_mbuf][headroom][data area]]. The headroom + data-area is the size we specify to dpdk when we create an mbuf pool. 65 | 66 | When a packet is freed, it just goes back to the pool's packet free queue. For a particle, we dont maintain a free queue, instead we let the freed particle go back 67 | into the dpdk mbuf pool (we have to give it back to the mbuf pool or else dpdk driver wont find an mbuf to give us a packet). And when we need a particle, we allocate 68 | an mbuf from dpdk mbuf pool, but then how do we get the heap-particle from the mbuf ? We do that by stealing two pointers from the headroom. So the actual layout 69 | of the mbuf that we use is as below 70 | 71 | [[struct rte_mbuf][mbuf-ptr heap-ptr remaining-headroom][data area]] 72 | 73 | So the mbuf buf_addr starts right after the rte_mbuf structure, in our case poiting to the area we use to store the mbuf pointer itself. And the next word we use 74 | to store the heap-particle pointer. Each mbuf is allocated its own heap-particle when mbuf pool is initialized. So when mbuf is allocated, we can get the BoxPart 75 | structure also using the heap-ptr address. So we eat into the available headroom a bit. So this allows us to get from mbuf to BoxPart 76 | 77 | The mbuf pointer itself is stored to get from BoxPart to mbuf .. So if BoxPart is freed, we know what mbuf needs to be freed to the dpdk mbuf pool. Obviously, all 78 | this is hugely unsafe pointer math. 79 | 80 | ## Driver Rx/Tx 81 | 82 | DPDK initializes each port and assigns it a port number. Each dpdk port is a structure that implements the Driver trait in graph cargo. The Driver trait expects 83 | and send and receive function, which is implemented using DPDK's rx-burst and tx-burst APIs (with burst size 1 as of today). And the drivers/ports are themselves 84 | just a part of the IfNode graph node. 85 | 86 | ## DPDK EAL Thread 87 | 88 | DPDK does the actual work of reading packets, processing them and sending them out in "EAL Threads". And for R2, an EAL thread is nothing but a thread that processes 89 | the graph. Unfortunately the EAL thread runs inside the dpdk FFI code, so we have to dress up the Rust graph processing routing with unsafes to make it palatable 90 | to FFI. 91 | 92 | Other than the above mentioned items, the rest of the architecture continues to be the same - DPDK or not, we have features in graph nodes, we have a graph 93 | processor thread, we have driver nodes. And dpdk sits in the driver nodes, and the grap processor is run as a DPDK EAL thread, that is about it. 94 | -------------------------------------------------------------------------------- /Docs/content/en/Lifetimes/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # Lifetimes 9 | 10 | As you can see, the use of Rust's lifetime markers has not proliferated throughout the code base. This is because really there is only one structure that has a 'reference', and that is used in almost every single place in the system - that one structure is the Packet. The Packet has one or more Particle structures, and the Particle as a mutable 'raw' &[u8] slice that holds the real data in the packet. 11 | 12 | The packets, particles and raw data in the system are all allocated one time during initialization of R2, and they are never deallocated. That is, the packets, particles and raw data are permanent - ie in Rust, they have 'static' lifetimes. So as we can see, the Particle has &'static mut raw[u8] - since it has a static lifetime, even though Packet/Particle is used throughout the system, theres no lifetime proliferation. So needless to say, if you add a non-static (ie temporary) reference inside the Packet/Particle structure, all hell will break loose and you will need a lot of lifetimes everywhere. 13 | 14 | The other structure which is pervasively used everywhere is a graph node client - ie any structure that implements the Gclient structure. The graph nodes are created by control threads and 'sent' to forwarding threads, so any non-static reference inside the structure that implements Gclient trait also will mean a blow up of lifetime usages everywhere. Also the graph node is 'sent' after creation from control to forwarding threads embedded the R2Msg message structure - so the node having non-static references will mean that the R2Msg will need lifetimes too, and R2Msg is also widely used in the system, so that will be a snow ball effect of having more lifetimes. 15 | 16 | In summary, be very very careful and wise and thoughtful while adding references inside data structures - Rust will force you to be thoughtful by complaining about lifetimes errors all over the place. 17 | -------------------------------------------------------------------------------- /Docs/content/en/Modules/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # Modules 9 | 10 | R2 project is organized as a workspace which is a collection of independent Cargo-es (libraries). What typically happens in large software projects is that its hard to define the module dependencies, to know what module depends on what (often ending up with circular dependencies). We strive to define the dependency model here upfront. 11 | 12 | ## The major modules 13 | 14 | 1. names: this module defines the names of all the graph nodes, so modules can refer to the names of other modules in a consistent way 15 | 16 | 2. fwd: Like we explained in the architecture section, a packet can traverse multiple forwarding objects. This module defines ALL the forwarding objects used in the system (refer to section "Forwarding objects" in architecture details) 17 | 18 | 3. counters: shared memory counters used by a lot of other modules 19 | 20 | 4. log: forwarding path fast logging library 21 | 22 | 5. common: miscellaneous common utilities 23 | 24 | 6. packet: The basic packet defenition and packet manipulation libraries, foundational library used by all forwarding nodes. 25 | 26 | 7. graph: The library that deals with creating the forwarding graph and adding nodes to it etc.. 27 | 28 | 8. gnodes: All the features that plug in to the graph as nodes 29 | 30 | 9. api: the library to let external utilities (in Rust) to make API calls to R2 31 | 32 | 10. apis: The thrift api defenitions of various modules. 33 | 34 | 11. msg: The control to data plane (and vice versa) message definitions 35 | 36 | ### Dependency 37 | 38 | We are not trying to list here the dependency graph - that obviously can be derived from the Cargo.toml of the various modules. The goal here is to provide the dependency expectations between the major modules listed above. 39 | 40 | #### The first tier 41 | 42 | names, apis, common, counters, log and fwd are the "top tier" modules - almost anyone and everyone will depend on them directly or indirectly. names, api, common etc.. is obvious. counters and logging is very fundamental infrastructure, so its not surprising everyone depends on it. 43 | 44 | fwd needs some explanation. fwd defines the forwarading path objects, and the Packet structure might contain references to some of those objects. When the packet traverses from node to node, it might capture the forwarding object information in the packet itself. So packet has to depend on fwd. And packet is itself a foundational/fundamental module in the system, hence fwd becomes even more basic/fundamental. So every object in fwd should be composed of Rust standard lib objects or other objects in fwd itself. 45 | 46 | #### The second tier 47 | 48 | packet library depends on fwd. Packet is used by almost all of R2 49 | 50 | #### The third tier 51 | 52 | graph, msg are the next tier of modules - they depend on the modules above 53 | 54 | #### The last tier 55 | 56 | All the graph nodes (modules in gnodes/) depend on everything above. 57 | -------------------------------------------------------------------------------- /Docs/content/en/R2Configs/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # Configuration file 9 | 10 | The configuration file is by default assumed to be /etc/r2.cfg if it exists, or else it can be suppled as a command line parameter 'r2 -c ' .. The config file is in the INI format, same as whats used by the cargo.toml files. It has different sections explained below 11 | 12 | # general 13 | 14 | ``` 15 | [general] 16 | pkts=4096 17 | particles=8192 18 | particle_sz=2048 19 | threads=4 20 | ``` 21 | 22 | This means that r2 should run with with a pool of 4096 packets and 8192 particles of size 2048, and total four data forwarding threads. Each of these has its defaults, so any of them can be safely omitted. 23 | 24 | # dpdk 25 | 26 | See [dpdk](../dpdk) to see the dpdk configuration options 27 | -------------------------------------------------------------------------------- /Docs/content/en/Tryme/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # Trying R2 9 | 10 | NOTE: R2 compiles and works only on linux as of now, tested on Ubuntu 20.04, although any later version 11 | of Ubuntu should work too 12 | 13 | The goal here is to provide a test setup as below, where R2_client1 and R2_client2 are two docker containers from which we can originate packets like ping and R2 in the middle, is the router. 14 | 15 | ```c 16 | R2_client1 [veth_c2_1]----[veth_r2_1] R2 [veth_r2_2]----[veth_c2_2] R2_client2 17 | 1.1.1.1 1.1.1.2 2.1.1.2 2.1.1.1 18 | ``` 19 | 20 | The steps below have been tested on brand new Ubuntu installations 18.04 and 16.04 server AND desktop. So for other versions of ubuntu or other distributions of linux, or if you have an already running ubuntu you have mucked around with, there might have to be some modifications to the steps below. 14.04 ubuntu has a different set of steps to install docker, so I did not list that here, but if you have 14.04, just get docker and docker CLIs installed (step 2) and rest of the steps are the same. Also you might have some packages already like git, gcc etc.. in which case those apt-gets are just ignored 21 | 22 | ## Steps 23 | 24 | 1. Install rust as mentioned here - 25 | 26 | ```c 27 | sudo apt update 28 | sudo apt install curl 29 | sudo apt install gcc 30 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh 31 | ``` 32 | 33 | For the rust install, just select "proceed wih the standard install" 34 | 35 | 2. Install docker as below. Docker installation steps below should go through fine. But in case you face issues, more docker information is here . 36 | 37 | ```c 38 | sudo apt-get install -y apt-transport-https ca-certificates curl gnupg-agent software-properties-common 39 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 40 | sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 41 | sudo apt-get update 42 | sudo apt-get install -y docker-ce docker-ce-cli containerd.io 43 | sudo docker pull busybox 44 | ``` 45 | 46 | 3. Download the source code from here - 47 | 48 | ```c 49 | sudo apt install git 50 | git clone https://github.com/gopakumarce/R2.git 51 | ``` 52 | 53 | 4. In the R2 source code root directory, type the below command. The first two commands 'sudo usermod' and 'newgrp docker' adds your user to the docker group so you can create docker containers etc.. with your userid. The first time compilation can take a few seconds, the R2 build system 'cargo', downloads source code for all dependencies and compiles them them the first time. NOTE: Various things inside the script like creating new interfaces etc.., are also run with 'sudo' permissions 54 | 55 | ```c 56 | sudo usermod -aG docker $USER 57 | newgrp docker 58 | cd R2 59 | ./tryme.sh 60 | ``` 61 | 62 | ## Play around, have fun 63 | 64 | Once step4 is complete, attach to the containers, type route -n, ifconfig etc.. to see the interfaces and ip addresses, and ping from one container to the other. The ping gets routed via R2. Use commands below to attach to either container, ctrl-d to exit. Commands to attach to each container and ping the ip address in the other container, is below. R2 itself does not respond to ping today, so if you ping R2 itself, that will fail. 65 | 66 | ```c 67 | docker exec -it R2_client1 sh 68 | ping 2.1.1.1 69 | docker exec -it R2_client2 sh 70 | ping 1.1.1.1 71 | ``` 72 | 73 | You can play around further for example by adding more loopback interfaces inside the containers, assign it ip addresses like 3.1.1.1, 4.1.1.1 etc.. and add routes in R2 to point the route to the right container interface NOTE: So why dint we have to add routes for the simple setup above ? Its because we were just pinging the connected subnets of each interface. And R2 by default inserts a connected/network route for its interfaces 74 | 75 | ```c 76 | sudo ./target/debug/r2rt route 3.1.1.1/32 2.1.1.1 veth_r2_2 77 | sudo ./target/debug/r2rt route 4.1.1.1/32 1.1.1.1 veth_r2_1 78 | ``` 79 | 80 | -------------------------------------------------------------------------------- /Docs/content/en/Unsafe/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # Unsafe-Rust in R2 9 | 10 | Anywhere we have to deal with raw pointers, we basically end up having to use unsafe Rust. Obviously the goal is to limit that to a small set of libraries which have no option but to do pointer manipulation. That current list is below. From what I know about packet forwarding systems, this would be all about it and I dont expect any more. So this means that we have to be extremely careful what we do in these libraries and we have to vet every pointer manipulation there and ensure they are bug free if we want to rely on Rust's memory safety for rest of the code. 11 | 12 | The code in the unix/ directory deals with low level posix interactions with the system, and hence they are generally expected to be unsafe. 13 | 14 | 1. unix/shm - the shared memory library, here we deal with mmap() and getting virtual addresses etc.., so cant do without unsafe 15 | 16 | 2. unix/socket - the raw socket libray, to send and receive packets. And to send and receive packets we need write and read to/from the particle raw data, hence this also needs unsafe 17 | 18 | 3. unix/epoll - this calls some system calls via libc, like fcntl. This doesnt have to be unsafe. Its a TODO to replace this with a Rust library (does one exist ?) for epoll ? 19 | 20 | 4. counters: counters deal with taking shared memory addresses and converting it to Rust counter structure, so they end up being unsafe 21 | 22 | 5. log: logging is done by writing data to a log buffer, again ends up being unsafe 23 | 24 | 6. packet: The packet library deals with manipulating packet data in raw byte buffers, again ends up being unsafe. The default packet pool provided by the library just deals with buffers from the heap, but at some point we anticipate R2 to come up with say Intel dpdk based packet pools as an example - at which point the place where that packet pool is implemented in R2 will also have some unsafe semantics 25 | 26 | 7. dpdk: The entire dpdk library is unsafe since it deals with dpdk FFI. DPDK is a transitional component which gets us going with the help of its rich set of drivers, 27 | till we can afford the time to write those ourselves 28 | 29 | 30 | I cannot re-emphasis the need to keep the amount of unsafe code to the absolute minimum. And like I mentioned before, having seen many packet forwarding systems, I anticipate that the above list is all there ever will be of unsafe code, and if we keep the above pieces of code small and simple and bug free, we can be assured that Rust will take care of the memory sanctity of R2 31 | -------------------------------------------------------------------------------- /Docs/content/en/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Table of Contents" 3 | linkTitle: "Table of Contents" 4 | weight: 20 5 | menu: 6 | main: 7 | weight: 20 8 | type: docs 9 | --- 10 | 11 | The code for this project is at https://github.com/gopakumarce/R2/ - a good sequence 12 | will be to go through the architecture and general overview provided here and [try it 13 | out](tryme/) and then start reading the code accompanied by more documentation of the 14 | code in each code module 15 | 16 | ## Sections 17 | 18 | * [Architecture](architecture/) 19 | * [Lifetimes](lifetimes/) 20 | * [Modules](modules/) 21 | * [R2Configs](r2configs/) 22 | * [Tryme](tryme/) 23 | * [Unsafe](unsafe/) 24 | * [DPDK Support](dpdk/) 25 | 26 | 27 | -------------------------------------------------------------------------------- /Docs/go.mod: -------------------------------------------------------------------------------- 1 | module hugo 2 | 3 | go 1.22.6 4 | 5 | require ( 6 | github.com/FortAwesome/Font-Awesome v4.7.0+incompatible // indirect 7 | github.com/twbs/bootstrap v5.3.3+incompatible // indirect 8 | ) 9 | -------------------------------------------------------------------------------- /Docs/go.sum: -------------------------------------------------------------------------------- 1 | github.com/FortAwesome/Font-Awesome v4.7.0+incompatible h1:3trjm7NtX5NXlju1AxSWSzedDMq2hsfH78Qtqrc8EgY= 2 | github.com/FortAwesome/Font-Awesome v4.7.0+incompatible/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo= 3 | github.com/twbs/bootstrap v5.3.3+incompatible h1:goFoqinzdHfkeegpFP7pvhbd0g+A3O2hbU3XCjuNrEQ= 4 | github.com/twbs/bootstrap v5.3.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0= 5 | -------------------------------------------------------------------------------- /Docs/hugo.toml: -------------------------------------------------------------------------------- 1 | languageCode = 'en-us' 2 | title = 'R2 - Router in Rust' 3 | baseURL = "https://gopakumarce.github.io/R2/" 4 | theme = "docsy" 5 | showSectionPages = true 6 | 7 | [languages] 8 | [languages.en] 9 | contentDir = "content/en" 10 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Gopakumar C E 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is R2 2 | 3 | R2 is a L3 packet router written in rust inspired by the design of VPP . It potentially 4 | will evolve into an L4/L7 proxy (yet another!) 5 | 6 | NOTE: R2 compiles and works only on linux as of now, tested with Ubuntu 20.04, although it should work 7 | with any later ubuntu also 8 | 9 | ## Getting familiar with R2 10 | 11 | The recommended method is to first go through the general overview page , which has links with a high level overview of R2. And once that is done, go through the TryMe page and just get familiar with downloading the code, compiling it and getting R2 running with a simple two container setup. Once that is done you can refer to the code itself accompanied by the documentation in various modules of the code 12 | 13 | NOTE: The contents in the links mentioned above are also present in the Docs/content/en directory here 14 | -------------------------------------------------------------------------------- /api/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "api" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | ordered-float = "0.3.0" 10 | try_from = "0.2.0" 11 | -------------------------------------------------------------------------------- /api/api.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "API" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # How to write an API 10 | 11 | So here I will not talk about how to write thrift APIs or its syntax etc.. There are plenty of documentation on that in google. Once the thrift file is written, to generate rust code we have to run "thrift -out [directory] --gen rs -r [thrift API file]". This step and how to build the thrift compiler etc.. are all available documentation in google. 12 | 13 | To write a new API for your module foo, here are the steps. I would use the code in apis/log/ and utils/r2log as simple cookie cutter templates. Outlining the broad steps here 14 | 15 | 1. We are putting all the thrift API files in the apis/ directory. So create an apis/foo/ with its Cargo.toml and all that. 16 | 17 | 2. Define the thrift API file apis/foo/src/apis.thrift and put APIs in it. Say you define "service Foo" in this file where the Foo {} will have your APIs like whatever_api_i_defined() 18 | 19 | 3. Compile the apis.thrift to a rust file and put it in apis/foo/src/lib.rs. At some point we will convert this step to be automatically done when we do a cargo build 20 | 21 | 4. In common define a name common::FOO_APIS for your API service (like common::LOG_APIs for the logger) 22 | 23 | 5. Inside R2, define an object for your module that has implementations for all the APIs that your module exposes, and whatever other data your module wants to put in there (similar to main/src/log.rs::LogApis). Lets call it FooCtx. And similar to the log example, implement the required trait "impl FooSyncHandler for FooCtx" - which will have all the server side callbacks for the services/APIs you defined in step 2. 24 | 25 | 6. In register_apis() inside R2, register the FooCtx just like the logger does it - svr.register(common::FOO_APIS, Box::new(FooSyncProcessor::new(foo_ctx)) 26 | 27 | 7. In the external rust utility where you want to invoke the R2 API, call api::api_client() to open a channel to talk to R2. 28 | 29 | 8. Using the channel above, create a client context for your service - FooSyncClient::new(i_prot, o_prot) - lets call it foo_ctx_client. And then using this context you can call all the APIs you defined in the thrift file in step 2 - you can call foo_ctx_client.whatever_api_i_defined() 30 | 31 | Once you have your basic API defined along the lines of the log/ example and you get it compiled and working, its easier to then do more fancy stuff by consulting the other apis in the code like apis/interface or apis/route which have more flavours like error handling and returning error codes etc.. 32 | -------------------------------------------------------------------------------- /api/src/lib.rs: -------------------------------------------------------------------------------- 1 | use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol, TMultiplexedOutputProtocol}; 2 | use thrift::protocol::{TCompactInputProtocolFactory, TCompactOutputProtocolFactory}; 3 | use thrift::server::{TMultiplexedProcessor, TProcessor, TServer}; 4 | use thrift::transport::{ReadHalf, WriteHalf}; 5 | use thrift::transport::{TFramedReadTransport, TFramedWriteTransport, TIoChannel, TTcpChannel}; 6 | use thrift::transport::{TFramedReadTransportFactory, TFramedWriteTransportFactory}; 7 | 8 | type ThreadSafeProcessor = Box; 9 | pub type ClientInputProtocol = TCompactInputProtocol>>; 10 | pub type ClientOutputProtocol = TMultiplexedOutputProtocol< 11 | TCompactOutputProtocol>>, 12 | >; 13 | 14 | /// We implement a "Multiplexed" API server, which means that theres one transport connection 15 | /// at 'addr' that all the clients share, and the clients identify themselves with unique 16 | /// names that help in demultiplexing to the right client. And each client of course will have 17 | /// multiple API calls of their own 18 | pub struct ApiSvr { 19 | addr: String, 20 | clients: Vec<(String, ThreadSafeProcessor)>, 21 | } 22 | 23 | impl ApiSvr { 24 | /// A new server listening on address 'addr' 25 | pub fn new(addr: String) -> ApiSvr { 26 | ApiSvr { 27 | addr, 28 | clients: Vec::new(), 29 | } 30 | } 31 | 32 | /// Register a client with 'name' and callback 'processor'. The callback has multiple 33 | /// APIs bundled inside it, defined by the client's thrift API defenition 34 | pub fn register(&mut self, name: &str, processor: ThreadSafeProcessor) { 35 | self.clients.push((name.to_string(), processor)); 36 | } 37 | 38 | /// Run the server, listening on the address waiting for clients to make calls 39 | pub fn run(&mut self) -> thrift::Result<()> { 40 | let i_tran = TFramedReadTransportFactory::new(); 41 | let i_prot = TCompactInputProtocolFactory::new(); 42 | let o_tran = TFramedWriteTransportFactory::new(); 43 | let o_prot = TCompactOutputProtocolFactory::new(); 44 | 45 | let mut mux = TMultiplexedProcessor::new(); 46 | while let Some(c) = self.clients.pop() { 47 | mux.register(c.0, c.1, false).unwrap(); 48 | } 49 | 50 | let mut server = TServer::new(i_tran, i_prot, o_tran, o_prot, mux, 1); 51 | server.listen(&self.addr) 52 | } 53 | } 54 | 55 | /// Used by programs outside R2 to establish a session/connection to the API server in R2 56 | pub fn api_client( 57 | host_port: &str, 58 | service: &str, 59 | ) -> thrift::Result<(ClientInputProtocol, ClientOutputProtocol)> { 60 | let mut c = TTcpChannel::new(); 61 | c.open(host_port)?; 62 | let (i_chan, o_chan) = c.split()?; 63 | let i_tran = TFramedReadTransport::new(i_chan); 64 | let o_tran = TFramedWriteTransport::new(o_chan); 65 | let i_prot = TCompactInputProtocol::new(i_tran); 66 | let o_prot = TCompactOutputProtocol::new(o_tran); 67 | let o_prot = TMultiplexedOutputProtocol::new(service, o_prot); 68 | Ok((i_prot, o_prot)) 69 | } 70 | -------------------------------------------------------------------------------- /apis/interface/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "apis_interface" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" -------------------------------------------------------------------------------- /apis/interface/src/apis.thrift: -------------------------------------------------------------------------------- 1 | struct ScApi { 2 | 1: i32 m1, 3 | 2: i32 d, 4 | 3: i32 m2, 5 | } 6 | 7 | struct CurvesApi { 8 | 1: optional ScApi r_sc, 9 | 2: optional ScApi u_sc, 10 | 3: ScApi f_sc, 11 | } 12 | 13 | exception InterfaceErr { 14 | 1: string why 15 | } 16 | 17 | service Interface { 18 | void add_if(1:string ifname, 2:i32 ifindex, 3:string mac) throws (1:InterfaceErr ouch), 19 | void add_ip(1:string ifname, 2:string ip_and_mask) throws (1:InterfaceErr ouch), 20 | void add_class(1:string ifname, 2:string name, 3:string parent, 4:i32 qlimit, 5:bool is_leaf, 6:CurvesApi curves) throws (1:InterfaceErr ouch) 21 | } -------------------------------------------------------------------------------- /apis/log/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "apis_log" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | -------------------------------------------------------------------------------- /apis/log/src/apis.thrift: -------------------------------------------------------------------------------- 1 | exception LogErr { 2 | 1: string why 3 | } 4 | 5 | service Log { 6 | void show(1:string filename) throws (1:LogErr ouch), 7 | } -------------------------------------------------------------------------------- /apis/route/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "apis_route" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | -------------------------------------------------------------------------------- /apis/route/src/apis.thrift: -------------------------------------------------------------------------------- 1 | 2 | exception RouteErr { 3 | 1: string why 4 | } 5 | 6 | service Route { 7 | void add_route(1:string ip_and_mask, 2:string nhop, 3:string ifname) throws (1:RouteErr ouch), 8 | void del_route(1:string ip_and_mask, 2:string nhop, 3:string ifname) throws (1:RouteErr ouch), 9 | string show(1:string prefix, 2:string filename) throws (1:RouteErr ouch), 10 | } -------------------------------------------------------------------------------- /common/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "common" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | 9 | -------------------------------------------------------------------------------- /common/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::time::{SystemTime, UNIX_EPOCH}; 2 | 3 | pub const API_SVR: &str = "127.0.0.1:5555"; 4 | pub const LOG_APIS: &str = "log"; 5 | pub const INTF_APIS: &str = "interface"; 6 | pub const ROUTE_APIS: &str = "route"; 7 | pub const R2CNT_SHM: &str = "r2cnt"; 8 | pub const R2LOG_SHM: &str = "r2log"; 9 | 10 | #[macro_export] 11 | macro_rules! KB { 12 | ($name:expr) => { 13 | $name * 1024 14 | }; 15 | } 16 | 17 | #[macro_export] 18 | macro_rules! MB { 19 | ($name:expr) => { 20 | $name * 1024 * 1024 21 | }; 22 | } 23 | 24 | pub fn pow2_u32(val: u32) -> u32 { 25 | let mut v = val - 1; 26 | v |= v >> 1; 27 | v |= v >> 2; 28 | v |= v >> 4; 29 | v |= v >> 8; 30 | v |= v >> 16; 31 | v += 1; 32 | v 33 | } 34 | 35 | pub fn time_nsecs() -> u64 { 36 | SystemTime::now() 37 | .duration_since(UNIX_EPOCH) 38 | .unwrap() 39 | .as_nanos() as u64 40 | } 41 | 42 | pub fn time_usecs() -> u64 { 43 | SystemTime::now() 44 | .duration_since(UNIX_EPOCH) 45 | .unwrap() 46 | .as_micros() as u64 47 | } 48 | 49 | pub fn time_msecs() -> u64 { 50 | SystemTime::now() 51 | .duration_since(UNIX_EPOCH) 52 | .unwrap() 53 | .as_millis() as u64 54 | } 55 | -------------------------------------------------------------------------------- /counters/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "counters" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | shm = { path = "../unix/shm" } -------------------------------------------------------------------------------- /counters/counters.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Counters" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Shared memory counters 10 | 11 | Counters are allocated in shared memory, so that external utilities can display the counters without disturbing the R2 process. The shared memory itself is managed in unix/shm, and theres not a lot to describe there - its just standard unix shared mem. 12 | 13 | Also note that there is typically only one counter shared memory area for the entire R2 (all forwarding threads). This is because the counters are expected to be allocated and deallocated by the control threads - we never expect the forwarding thread to be dynamically allocating or deallocating counters. So the counter allocation/deallocation in control thread can be lock protected, and the control thread simply passes on the counter to the forwarding thread(s). And of course the counter itself is just a pointer to an 8 byte memory area, so modifying that doesnt need any atomics or locks - which automatically means that the counter cannot be shared across forwarding threads or else the contents will get garbled (we dont do any atomic ops on the counter). So typically every forwarding thread node will have its own copy of the counters - this is done when the control plan thread clones the node, a new copy of the counter is created inside clone() of each node. 14 | 15 | The shared memory for counters is divided into three areas 16 | 17 | ## Shared memory areas 18 | 19 | ### Directory 20 | 21 | The directory is just an array of entries that will point to the name of a counter, length of the name, the address of the actual counter, and number of counters. Note that we might contiguously allocate more than one counter. So the address of the counter points to an array of one or more counters, and hence the length in there. The directory is mostly useful for external utilities to walk through all the counters, dump the counter name and the associated value(s). 22 | 23 | ### Name 24 | 25 | As described before, this holds the name of each counter, there is maximum length to each name. 26 | 27 | ### Counters 28 | 29 | This holds the actual counter values. Modules that uses counters will have addresses of these locations that hold values, and will write into these locations. External utilies that dump counters will read from these locations. 30 | 31 | ## Managing shm areas using 'Bins' 32 | 33 | So the above three areas we mentioned (directory, names, counters) have pre-defined start and end addresses (ie they have a fixed size). And then we are allocating objects which are multiples of a fixed size from these areas. 34 | 35 | For example, the directory entry is fixed in size - and we always allocate them one at a time. The name is either 32 or 64 bytes, ie its a multiple of 32. The counters themselves are either one 64 bit value or an array of upto 32 of such 64 bit values. So the general modus operandi here is that each of these areas have items that are of a "bin size" (sizeof directory, 32 bytes for name, 8 bytes for counter) and a "bin max" which is a multiple of bin size thats the max one object can ever be (1 for directory, 2 for names and 32 for counters). 36 | 37 | So the bin module provides a rather simple way to allocate and free fixed size objects from an area, with possibility for more than one of these objects contiguous. For example referring to the area for counters, there are bins of size 1 through 32. The bin size 1 holds a list of counters of just 8 bytes (64 bits. Bin size 2 holds a list of counters of size 16 bytes (2 contiguous 8 bytes) and so on uptill bin 32 that holds list counters of size contiguous 32*8 bytes. And if someone wants one counter of 8 bytes, they pop one from the bin 1 list. If someone wants a 32x8byte counter, they pop one from bin32. 38 | 39 | All bins start off as empty. And we keep track of an offset in the memory area from which we are free to allocate. So when someone wants to allocate from a bin thats empty, we allocate the object fresh from the memory area's free offset. Theres one catch though which is that we allocate in multiples of a "page size" - so if someone wants 32*8 byte object, we allocate page-size / 32x8 number of those objects and put them all in the list in bin32. When an object is freed, it just goes back to its bin - which means that there is no "compaction" and such complicated memory management done. Which means that if someone allocates a ton of 8byte counters and frees them all and then tries to allocate 32x8 byte counters, the latter allocation might fail since all the free ones are in bin1. So obviously the bin stuff is used just for counter allocations - counters are usually allocated in fixed patterns and we dont anticipate random malloc style counter allocations, so the simple bin mechanism should suffice 40 | 41 | ## Counter flavors 42 | 43 | There are three types of counters. First being the simple 8 byte counter, which can be used for anything like counting number of packets or bytes or some kind of error etc.. The next is a packet & byte counter - for situations where we want to increment a packet count and also increment the byte count, so rather than have two seperate counters in seperate parts of memory, we just allow an allocation of contiguous 16 bytes of memory. The third is an array of 8byte counters. with max 32 elements in the array. 44 | 45 | -------------------------------------------------------------------------------- /counters/src/bin.rs: -------------------------------------------------------------------------------- 1 | use common::pow2_u32; 2 | use std::cmp; 3 | use std::collections::VecDeque; 4 | 5 | /// The Bin structure divides a range of memory into bins, each bin holds multiple objects of 6 | /// the same size. And different bins have different object sizes, all powers of two. Once an 7 | /// object is put in a bin, it cant be 'moved' to any other bin. The memory range is assumed 8 | /// to be from 0 to a max size, and the objects are returned with 0 based offsets in that range, 9 | /// the caller can adjust the offsets to real memory addresses 10 | #[derive(Default)] 11 | pub struct Bin { 12 | binsz: u32, 13 | max: u64, 14 | zeroes: u32, 15 | pagesz: u32, 16 | offset: u64, 17 | bins: Vec>, 18 | } 19 | 20 | impl Bin { 21 | /// binsz: The object size in each bin are multiples of binsz 22 | /// max: The total size of the range of memory. range is 0 to max 23 | /// pagesz: If we need more objects in a bin, we allocate a minimum of pagesz/object-size 24 | pub fn new(mut binsz: u32, max: u64, pagesz: u32) -> Bin { 25 | binsz = pow2_u32(binsz); 26 | Bin { 27 | binsz, 28 | max, 29 | zeroes: binsz.leading_zeros(), 30 | pagesz: pow2_u32(pagesz), 31 | offset: 0, 32 | bins: Vec::new(), 33 | } 34 | } 35 | 36 | // The Bin::bins[index] into which an object of 'size' will fit into 37 | fn index(&self, size: u32) -> (u32, usize) { 38 | let size = pow2_u32(size); 39 | let size = cmp::max(size, self.binsz); 40 | let index = self.zeroes - size.leading_zeros(); 41 | (size, index as usize) 42 | } 43 | 44 | // Add more objects to a bin 45 | fn resize(&mut self, size: u32, index: usize) { 46 | let alloc = if size > self.pagesz { 47 | if size % self.pagesz != 0 { 48 | self.pagesz * size / self.pagesz + 1 49 | } else { 50 | self.pagesz * size / self.pagesz 51 | } 52 | } else { 53 | self.pagesz 54 | }; 55 | if alloc as u64 + self.offset <= self.max { 56 | let mut i = 0; 57 | while i < alloc { 58 | self.bins[index].push_front(self.offset); 59 | i += size; 60 | self.offset += size as u64; 61 | } 62 | } 63 | } 64 | 65 | // Get a 0 based offset into the range, for an object of size 'size' 66 | pub fn get(&mut self, size: u32) -> Option { 67 | if size == 0 { 68 | return None; 69 | } 70 | let (size, index) = self.index(size); 71 | if index >= self.bins.len() { 72 | self.bins.resize(index + 1, VecDeque::new()) 73 | } 74 | if let Some(val) = self.bins[index].pop_front() { 75 | Some(val) 76 | } else { 77 | self.resize(size, index); 78 | self.bins[index].pop_back() 79 | } 80 | } 81 | 82 | // Free object of size 'size' at offset 'base' into the proper bin 83 | pub fn free(&mut self, base: u64, size: u32) { 84 | if size == 0 { 85 | panic!("Bad bin free, base {}, size {}", base, size); 86 | } 87 | let (size, index) = self.index(size); 88 | if index >= self.bins.len() { 89 | panic!( 90 | "Bad bin free, base {}, size {}, index {}", 91 | base, size, index 92 | ); 93 | } 94 | self.bins[index].push_front(base); 95 | } 96 | 97 | pub fn offset(&self) -> u64 { 98 | self.offset 99 | } 100 | } 101 | 102 | #[cfg(test)] 103 | mod test { 104 | use super::*; 105 | 106 | #[test] 107 | fn test_bin() { 108 | let mut bin = Bin::new(4, 32, 8); 109 | assert_eq!(bin.zeroes, 29); 110 | // Get a larger than possible bin 111 | let cntr = bin.get(33); 112 | assert!(cntr.is_none()); 113 | // Get one 4 byte bin, it allocates two 4 byte bins (pagesize 8) 114 | let cntr = bin.get(4).unwrap(); 115 | assert_eq!(cntr, 0); 116 | assert_eq!(bin.offset(), 8); 117 | assert_eq!(bin.bins[0].len(), 1); 118 | bin.free(cntr, 4); 119 | // Free the 4 byte bin 120 | assert_eq!(bin.bins[0].len(), 2); 121 | // Get two 4 byte bins, its already allocated 122 | let cntr1 = bin.get(4).unwrap(); 123 | let cntr2 = bin.get(4).unwrap(); 124 | assert_eq!(cntr1, 0); 125 | assert_eq!(cntr2, 4); 126 | assert_eq!(bin.offset(), 8); 127 | assert_eq!(bin.bins[0].len(), 0); 128 | // Get one 16 byte bin (power of two 13) 129 | let cntr = bin.get(13).unwrap(); 130 | assert_eq!(cntr, 8); 131 | assert_eq!(bin.offset(), 24); 132 | assert_eq!(bin.bins[2].len(), 0); 133 | bin.free(cntr, 13); 134 | assert_eq!(bin.bins[2].len(), 1); 135 | // Get one 8 byte bin (power of two 5) 136 | let cntr = bin.get(5).unwrap(); 137 | assert_eq!(cntr, 24); 138 | assert_eq!(bin.offset(), 32); 139 | assert_eq!(bin.bins[1].len(), 0); 140 | bin.free(cntr, 5); 141 | assert_eq!(bin.bins[1].len(), 1); 142 | // get one more 4 byte value (pow of 2 of 3) 143 | let cntr = bin.get(3); 144 | assert!(cntr.is_none()); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /counters/src/flavors.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use std::cmp; 3 | 4 | // Three flavors of counters: A single u64, two u64s and a vector of u64s 5 | // We use the code lib.rs which allocates a set of contiguous counters. The 6 | // raw counters are not exposed by the library, instead we expose the flavours 7 | // of counters as mentioned above. The raw counters deal with raw addresses, 8 | // the flavours here hide the addresses and provide incr() / decr() APIs. 9 | 10 | pub enum CounterType { 11 | Error, 12 | Info, 13 | Pkts, 14 | } 15 | 16 | pub struct Counter { 17 | dir: u64, 18 | count: u64, 19 | } 20 | 21 | fn counter_name(node: &str, ctype: CounterType, name: &str) -> String { 22 | let mut cntr = node.to_string(); 23 | match ctype { 24 | flavors::CounterType::Error => cntr.push_str("/error/"), 25 | flavors::CounterType::Info => cntr.push_str("/info/"), 26 | flavors::CounterType::Pkts => cntr.push_str("/pkts/"), 27 | } 28 | cntr.push_str(name); 29 | let max_namelen = (NAME.binsz * NAME.binmax) as usize; 30 | if cntr.len() > max_namelen { 31 | cntr.truncate(max_namelen); 32 | } 33 | cntr 34 | } 35 | 36 | impl Counter { 37 | pub fn incr(&mut self) -> u64 { 38 | self.add(1) 39 | } 40 | 41 | pub fn decr(&mut self) -> u64 { 42 | self.sub(1) 43 | } 44 | 45 | pub fn add(&mut self, val: u64) -> u64 { 46 | unsafe { 47 | let count = self.count as *mut u64; 48 | *count += val; 49 | *count 50 | } 51 | } 52 | 53 | pub fn sub(&mut self, val: u64) -> u64 { 54 | unsafe { 55 | let count = self.count as *mut u64; 56 | *count -= val; 57 | *count 58 | } 59 | } 60 | 61 | pub fn new(counters: &mut Counters, node: &str, ctype: CounterType, name: &str) -> Counter { 62 | let (mut dir, mut base) = counters.get(&counter_name(node, ctype, name), 1); 63 | if dir == 0 { 64 | dir = counters.dummies.counter.dir; 65 | base = counters.dummies.counter.base; 66 | } 67 | Counter { dir, count: base } 68 | } 69 | 70 | #[allow(dead_code)] 71 | pub fn free(&self, counters: &mut Counters) { 72 | if self.dir != counters.dummies.counter.dir { 73 | counters.free(self.dir); 74 | } 75 | } 76 | } 77 | 78 | pub struct PktsBytes { 79 | dir: u64, 80 | pkts: u64, 81 | bytes: u64, 82 | } 83 | 84 | impl PktsBytes { 85 | pub fn incr(&mut self, val: u64) -> u64 { 86 | self.add(1, val) 87 | } 88 | 89 | pub fn decr(&mut self, val: u64) -> u64 { 90 | self.sub(1, val) 91 | } 92 | 93 | pub fn add(&mut self, pkts: u64, bytes: u64) -> u64 { 94 | unsafe { 95 | let count = self.bytes as *mut u64; 96 | *count += bytes; 97 | let count = self.pkts as *mut u64; 98 | *count += pkts; 99 | *count 100 | } 101 | } 102 | 103 | pub fn sub(&mut self, pkts: u64, bytes: u64) -> u64 { 104 | unsafe { 105 | let count = self.bytes as *mut u64; 106 | *count -= bytes; 107 | let count = self.pkts as *mut u64; 108 | *count -= pkts; 109 | *count 110 | } 111 | } 112 | 113 | pub fn new(counters: &mut Counters, node: &str, ctype: CounterType, name: &str) -> PktsBytes { 114 | let (mut dir, mut base) = counters.get(&counter_name(node, ctype, name), 2); 115 | if dir == 0 { 116 | dir = counters.dummies.pktsbytes.dir; 117 | base = counters.dummies.pktsbytes.base; 118 | } 119 | PktsBytes { 120 | dir, 121 | pkts: base, 122 | bytes: base + VEC.binsz as u64, 123 | } 124 | } 125 | 126 | #[allow(dead_code)] 127 | pub fn free(&self, counters: &mut Counters) { 128 | if self.dir != counters.dummies.pktsbytes.dir { 129 | counters.free(self.dir); 130 | } 131 | } 132 | } 133 | 134 | pub struct CounterArray { 135 | dir: u64, 136 | array: Vec, 137 | } 138 | 139 | impl CounterArray { 140 | pub fn get(&self, index: usize) -> u64 { 141 | unsafe { 142 | let count = self.array[index] as *mut u64; 143 | *count 144 | } 145 | } 146 | 147 | pub fn set(&mut self, index: usize, val: u64) -> u64 { 148 | unsafe { 149 | let count = self.array[index] as *mut u64; 150 | *count = val; 151 | *count 152 | } 153 | } 154 | 155 | pub fn incr(&mut self, index: usize) -> u64 { 156 | self.add(index, 1) 157 | } 158 | 159 | pub fn decr(&mut self, index: usize) -> u64 { 160 | self.sub(index, 1) 161 | } 162 | 163 | pub fn add(&mut self, index: usize, val: u64) -> u64 { 164 | unsafe { 165 | let count = self.array[index] as *mut u64; 166 | *count += val; 167 | *count 168 | } 169 | } 170 | 171 | pub fn sub(&mut self, index: usize, val: u64) -> u64 { 172 | unsafe { 173 | let count = self.array[index] as *mut u64; 174 | *count -= val; 175 | *count 176 | } 177 | } 178 | 179 | pub fn new( 180 | counters: &mut Counters, 181 | node: &str, 182 | ctype: CounterType, 183 | name: &str, 184 | size: usize, 185 | ) -> CounterArray { 186 | let veclen = cmp::min(size, VEC.binmax as usize); 187 | let (mut dir, mut base) = counters.get(&counter_name(node, ctype, name), veclen); 188 | if dir == 0 { 189 | dir = counters.dummies.array.dir; 190 | base = counters.dummies.array.base; 191 | } 192 | let mut vec = Vec::new(); 193 | for i in 0..veclen { 194 | vec.push(base + i as u64 * VEC.binsz as u64); 195 | } 196 | CounterArray { dir, array: vec } 197 | } 198 | 199 | #[allow(dead_code)] 200 | pub fn free(&self, counters: &mut Counters) { 201 | if self.dir != counters.dummies.array.dir { 202 | counters.free(self.dir); 203 | } 204 | } 205 | } 206 | 207 | #[derive(Default, Clone)] 208 | pub struct CounterRO { 209 | val: Vec, 210 | } 211 | 212 | impl CounterRO { 213 | pub fn new(base: u64, len: u32) -> CounterRO { 214 | assert!(len <= VEC.binsz * VEC.binmax); 215 | let mut val = vec![]; 216 | for i in 0..len / VEC.binsz { 217 | val.push(base + i as u64 * VEC.binsz as u64); 218 | } 219 | CounterRO { val } 220 | } 221 | 222 | pub fn search( 223 | parent: &CountersRO, 224 | node: &str, 225 | ctype: CounterType, 226 | name: &str, 227 | ) -> Option { 228 | parent.hash.get(&counter_name(node, ctype, name)).cloned() 229 | } 230 | 231 | pub fn num_cntrs(&self) -> usize { 232 | self.val.len() 233 | } 234 | 235 | pub fn read(&self, index: usize) -> u64 { 236 | unsafe { *(self.val[index] as *const u64) } 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /counters/src/lib.rs: -------------------------------------------------------------------------------- 1 | use self::flavors::CounterRO; 2 | use bin::Bin; 3 | use shm::{shm_close, shm_open_ro, shm_open_rw, shm_unlink}; 4 | use std::collections::HashMap; 5 | use std::mem::size_of; 6 | use std::str; 7 | 8 | // This module provides counters in shared memory 9 | // The shared memory is divided into four areas: 10 | // 0. struct Hdr 11 | // 1. A Directory area which leads us to counter names and values 12 | // 2. Area for the counter values themselves 13 | // 3. Area for counter names 14 | // 15 | // The directory has the first 4Mb of shm space, values gets 16Mb and the names 16 | // come after that. The values and names are allocated in power of twos and the 17 | // free ones go back to a simple power of two bin from which we reallocate them 18 | 19 | const MMAP_SIZE: usize = common::MB!(64); 20 | 21 | // Holds the directory entries 22 | const DIR: BinInfo = BinInfo { 23 | binsz: size_of::() as u32, 24 | binmax: 1, 25 | pagesz: common::KB!(4), 26 | start: 64, 27 | totsz: common::MB!(4), 28 | }; 29 | 30 | // Holds the actual counters 31 | const VEC: BinInfo = BinInfo { 32 | binsz: size_of::() as u32, 33 | binmax: 32, 34 | pagesz: common::KB!(4), 35 | start: DIR.start + DIR.totsz, 36 | totsz: common::MB!(16), 37 | }; 38 | 39 | // Holds the names of the counters 40 | const NAME: BinInfo = BinInfo { 41 | binsz: 32, 42 | binmax: 2, 43 | pagesz: common::KB!(4), 44 | start: VEC.start + VEC.totsz, 45 | totsz: common::MB!(32), 46 | }; 47 | 48 | struct Hdr { 49 | num_counters: u32, 50 | } 51 | 52 | struct BinInfo { 53 | // The objects in each bin are a multiple of binsz, max multiple being binmax 54 | binsz: u32, 55 | // The largest object we can ask for is binsz * binmax 56 | binmax: u32, 57 | // We want to allocate objects of at least pagesz/object-size in count 58 | pagesz: u32, 59 | // Assuming we carve up a large memory range into multiple ranges for different 60 | // purposes, the first one starts at offset 0, and this one starts at 'start' 61 | start: u64, 62 | // max size of this memory range 63 | totsz: u64, 64 | } 65 | 66 | // One particular directory entry. It holds an offset into the name area for the name 67 | // of this counter, a length of the name string, offset into the counter area for the 68 | // actual counter/counters, and length of the counters 69 | #[derive(Default, Copy, Clone)] 70 | struct Dir { 71 | name_off: u32, 72 | name_len: u32, 73 | vec_off: u32, 74 | vec_len: u32, 75 | } 76 | 77 | #[derive(Default, Copy, Clone)] 78 | struct Dummy { 79 | dir: u64, 80 | base: u64, 81 | } 82 | 83 | struct Dummies { 84 | counter: Dummy, 85 | pktsbytes: Dummy, 86 | array: Dummy, 87 | } 88 | 89 | pub struct Counters { 90 | shname: String, 91 | fd: i32, 92 | base: u64, 93 | dir: Bin, 94 | vec: Bin, 95 | names: Bin, 96 | dummies: Dummies, 97 | } 98 | 99 | impl Counters { 100 | // If we run out of counters, we dont want to panic or make the caller to bail out and 101 | // take special actions etc.. It is very very possible to run out of counters, and usually 102 | // its fine to let the program just continue in that case. So we provide a dummy counter, 103 | // the same dummy counter that will be given to everyone who asks for counters and runs out. 104 | fn dummies(&mut self) { 105 | let (dir, base) = self.get("dummy1", 1); 106 | self.dummies.counter.dir = dir; 107 | self.dummies.counter.base = base; 108 | let (dir, base) = self.get("dummy2", 2); 109 | self.dummies.pktsbytes.dir = dir; 110 | self.dummies.pktsbytes.base = base; 111 | let (dir, base) = self.get("dummyN", VEC.binmax as usize); 112 | self.dummies.array.dir = dir; 113 | self.dummies.array.base = base; 114 | } 115 | 116 | /// Allocate a new counter pool, open a named shared memory of 'name' 117 | pub fn new(name: &str) -> Result { 118 | assert!(DIR.start >= size_of::() as u64); 119 | let (fd, base) = shm_open_rw(name, MMAP_SIZE); 120 | if base == 0 { 121 | return Err(fd); 122 | } 123 | 124 | let dummy = Dummy { dir: 0, base: 0 }; 125 | let mut counters = Counters { 126 | shname: name.to_string(), 127 | fd, 128 | base, 129 | dir: Bin::new(DIR.binsz, DIR.totsz, DIR.pagesz), 130 | vec: Bin::new(VEC.binsz, VEC.totsz, VEC.pagesz), 131 | names: Bin::new(NAME.binsz, NAME.totsz, NAME.pagesz), 132 | dummies: Dummies { 133 | counter: dummy, 134 | pktsbytes: dummy, 135 | array: dummy, 136 | }, 137 | }; 138 | unsafe { 139 | let hdr = base as *mut Hdr; 140 | (*hdr).num_counters = 0; 141 | } 142 | counters.dummies(); 143 | Ok(counters) 144 | } 145 | 146 | // Allocate a counter with 'name', contiguous 'nvecs' 64 bit counters 147 | fn get(&mut self, name: &str, nvecs: usize) -> (u64, u64) { 148 | let mut ret = (0, 0); 149 | let veclen = nvecs as u32 * VEC.binsz; 150 | let daddr = self.dir.get(DIR.binsz); 151 | let vaddr = self.vec.get(veclen); 152 | let naddr = self.names.get(name.len() as u32); 153 | if let Some(daddr) = daddr { 154 | if let Some(vaddr) = vaddr { 155 | if let Some(naddr) = naddr { 156 | let daddr = daddr + self.base + DIR.start; 157 | let vaddr = vaddr + self.base + VEC.start; 158 | let naddr = naddr + self.base + NAME.start; 159 | unsafe { 160 | let bytes = name.as_bytes(); 161 | for (n, byte) in bytes.iter().enumerate() { 162 | let n8 = (naddr + n as u64) as *mut u8; 163 | *n8 = *byte; 164 | } 165 | let d = daddr as *mut Dir; 166 | (*d).name_off = (naddr - self.base) as u32; 167 | (*d).name_len = name.len() as u32; 168 | (*d).vec_off = (vaddr - self.base) as u32; 169 | (*d).vec_len = veclen; 170 | // This is the total number of counters allocated, might not be in-use 171 | let hdr = self.base as *mut Hdr; 172 | (*hdr).num_counters = self.dir.offset() as u32 / DIR.binsz; 173 | } 174 | ret = (daddr, vaddr) 175 | } else { 176 | self.dir.free(daddr, veclen); 177 | self.vec.free(vaddr, veclen); 178 | } 179 | } else { 180 | self.dir.free(daddr, veclen); 181 | } 182 | } 183 | ret 184 | } 185 | 186 | // Free a counter indicated by a 'dir' directory entry, return the name address 187 | // to the name bin, return the counter address (potentially more than one 188 | // contiguous counter) to its bin, then return the dir address itself to the dir bin 189 | #[allow(dead_code)] 190 | fn free(&mut self, dir: u64) { 191 | unsafe { 192 | let d = dir as *mut Dir; 193 | let noff = (*d).name_off as u64 - NAME.start; 194 | self.names.free(noff, (*d).name_len); 195 | let voff = (*d).vec_off as u64 - VEC.start; 196 | self.vec.free(voff, (*d).vec_len); 197 | (*d).name_off = 0; 198 | (*d).name_len = 0; 199 | (*d).vec_off = 0; 200 | (*d).vec_len = 0; 201 | let dir = dir - (self.base + DIR.start); 202 | self.dir.free(dir, DIR.binsz); 203 | } 204 | } 205 | } 206 | 207 | impl Drop for Counters { 208 | fn drop(&mut self) { 209 | shm_close(self.fd); 210 | shm_unlink(&self.shname[0..]); 211 | } 212 | } 213 | 214 | pub struct CountersRO { 215 | fd: i32, 216 | base: u64, 217 | pub hash: HashMap, 218 | } 219 | 220 | impl CountersRO { 221 | /// A readonly version of the current set of counters (at the time of reading 222 | /// shared memory). Walk through the directory entries and create a hashmap of 223 | /// the names of each counter and the address of the counter 224 | pub fn new(name: &str) -> Result { 225 | let (fd, base) = shm_open_ro(name, MMAP_SIZE); 226 | if base == 0 { 227 | return Err(fd); 228 | } 229 | let mut counters = CountersRO { 230 | fd, 231 | base, 232 | hash: HashMap::new(), 233 | }; 234 | unsafe { 235 | let hdr = counters.base as *mut Hdr; 236 | for i in 0..(*hdr).num_counters { 237 | let d = (counters.base + DIR.start + (i * DIR.binsz) as u64) as *mut Dir; 238 | let dir: Dir = *d; 239 | if dir.name_len == 0 240 | || dir.vec_len == 0 241 | || dir.name_len > NAME.binmax * NAME.binsz 242 | || dir.vec_len > VEC.binmax * VEC.binsz 243 | { 244 | continue; 245 | } 246 | let mut vec_names = vec![]; 247 | for i in 0..dir.name_len { 248 | let names = (counters.base + (dir.name_off + i) as u64) as *const u8; 249 | vec_names.push(*names); 250 | } 251 | let name = str::from_utf8(&vec_names[0..]).unwrap_or("UNKNOWN"); 252 | let cntr = CounterRO::new(counters.base + dir.vec_off as u64, dir.vec_len); 253 | counters.hash.insert(name.to_string(), cntr); 254 | } 255 | } 256 | Ok(counters) 257 | } 258 | } 259 | 260 | impl Drop for CountersRO { 261 | fn drop(&mut self) { 262 | shm_close(self.fd); 263 | } 264 | } 265 | 266 | mod bin; 267 | pub mod flavors; 268 | 269 | #[cfg(test)] 270 | mod test; 271 | -------------------------------------------------------------------------------- /dpdk/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dpdk" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | libc = "0.2.0" 9 | crossbeam-queue = "0.2.1" 10 | dpdk-ffi = { path = "../ffis/dpdk", optional = true } 11 | graph = { path = "../graph" } 12 | packet = { path = "../packet" } 13 | counters = { path = "../counters" } 14 | socket = { path = "../unix/socket" } 15 | 16 | [features] 17 | "dpdk" = ["dep:dpdk-ffi"] 18 | -------------------------------------------------------------------------------- /dpdk/Dpdk_Support.md: -------------------------------------------------------------------------------- 1 | # R2 supports DPDK (Interfacing Rust & C) 2 | 3 | ## Trying R2 with dpdk 4 | 5 | Follow the same instructions as in ["tryme page"](https://r2.rs/tryme/), but before doing any of those steps, create a file named /etc/r2.cfg and put the below contents in it. This will ensure that the same example runs in dpdk mode, if you do a "top -H" after the tryme.sh completes, you will see two lcore threads (1 and 2) spinning at 100% - those are the dpdk polling EAL threads. Without this config file, the tryme.sh example will make R2 run in socket mode. 6 | 7 | ``` 8 | [dpdk] 9 | on=true 10 | mem=128 11 | ncores=3 12 | ``` 13 | 14 | 15 | ## FFI: Interfacing with C code 16 | 17 | Obviously, calling DPDK APIs and using various dpdk structures etc. is possible only if there is a Rust defenition of those APIs and structures - and then we can mark them as "extern C" to indicate they are C compiled APIs - a one line summary of Rust's Foreign Function Interface aka FFI. But there is a gazillion number of dpdk apis and structures - there is too many, even if we use only a handful of them. I would have given up and not done this work at all if I had to sit down and hand code these APIs and structures. 18 | 19 | Thankfully that was not needed. There is a utility called bindgen that uses help from the llvm compiler to take a piece of C code and generate rust equivalent apis and structures "automatically". See the [section on bindgen](https://r2.rs/bindgen/) to see the few basic steps I had to do to generate automatic bindings. But it is not without its own share of problems, as the [section on bindgen](https://r2.rs/bindgen/) shows, there are some manual steps to get rid of some nested __align__ attribute which bindgen doesnt recognize etc. And also bindgen obviously does not generate bindings for inline functions - and there is a truck load of inline utility APIs in dpdk. So those had to be hand coded, and that was not a lot of fun. But I cant complain because if not for bindgen this whole work would not even have been possible. I hear there is something better than bindgen that works on c++ code, have not checked that out. But I dont anticipate needing a lot of bindings to dpdk code because like I mentioned in the beginning, my need is just to use the dpdk pools and Rx/Tx APIs, I dont plan to use any other dpdk features. 20 | 21 | ## DPDK plugging into the R2 architecture 22 | 23 | There was no architecture level changes needed to just get dpdk to Rx/Tx packets. Obviously there was a lot of pointer manipulation for making dpdk mbufs usable with R2, but otherwise dpdk drivers just plugin as an IFNode graph node in the R2 graph. And finally the entire R2 graph is run as a dpdk EAL thread. And there can be as many threads as required with the ports split across the threads - the simple ["tryme" example](https://r2.rs/tryme/) can be run with dpdk enabled and two threads, one handling one port each. There needs to be a config file that turns on dpdk also [documented in](https://r2.rs/dpdk/) 24 | 25 | ## More work with dpdk 26 | 27 | Some more items to handle over time 28 | 29 | 1. DPDK in this this first commit just uses AF_PACKET driver (running with --no-pci), but nothing prevents it from any driver - although a PCI driver needs more work in terms of unbinding linux drivers etc.., but thats all script work outside R2 itself. 30 | 2. Also we havent really used hugepages (runnig with --no-huge), that again is a script work outside of R2. 31 | 3. The DPDK threads run 100% by default, as seen when using the ["tryme" example](https://r2.rs/tryme/) with dpdk on. Some way to modulate them to not use 100% would be useful 32 | -------------------------------------------------------------------------------- /dpdk/dpdk.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # Compiling with dpdk 9 | 10 | Install dpdk as below (snippet below gives the tested version) 11 | 12 | ``` 13 | sudo apt-get install python3 ninja-build meson 14 | curl https://fast.dpdk.org/rel/dpdk-19.11.3.tar.xz -o dpdk.tar.xz 15 | tar xf dpdk.tar.xz 16 | cd dpdk-stable-19.11.3 17 | meson build 18 | cd build 19 | ninja 20 | sudo ninja install 21 | ``` 22 | 23 | And then do "cargo build --features dpdk" 24 | 25 | NOTE: To update to a different dpdk version, go to ffis/dpdk and run bindgen.sh 26 | there. Note that the produced rust bindings might need minor tweaks to get it 27 | compiled, for example dpdk C code has an aligned structure placed inside a packed 28 | structure (see comments about rte_ether_addr in bindgen/include/lib.rs) which 29 | rust cannot support, so that needed a manual tweak 30 | 31 | What we support is a simple AF_PACKET dpdk driver, just to ensure that the general 32 | process of initializing dpdk and working with its apis are all in place. To switch 33 | to a regular PCI driver "hopefully" does not involve code changes and is transparent 34 | to the users of the dpdk library. It of course will involve configuring hugepages and 35 | such which are general dpdk setup procedures outside the purview of this code 36 | 37 | # DPDK interacting with R2 38 | 39 | What R2 wants to leverage from DPDK is the wealth of device drivers it supports. It gives us a quick start and as and when R2 gets mature and has drivers of its own, 40 | the DPDK support can be phased out. We do not intend to use any other packet forwarding functionalities in DPDK. And including DPDK violates the "safety" aspect of 41 | Rust big time - all FFI is unsafe code! So use of DPDK should be a transit path to get us somewhere and then we replace the dependency on it (which is drivers) 42 | 43 | ## Configuration 44 | 45 | In the r2 config file add a section called dpdk as below. The on=true means R2 is running with dpdk enabled, it can be set to false and then rest of the dpdk configs dont matter because dpdk is turned off. The mem=128 says dpdk uses 128Mb for mbuf pool. The ncores=3 says that core0 is used as the main core (non data plane) and core1 and core2 are the data plane cores. core0 is used as the main core always as of today 46 | 47 | ``` 48 | [dpdk] 49 | on=true 50 | mem=128 51 | ncores=3 52 | ``` 53 | 54 | ## Packet pools 55 | 56 | R2 has the PacketPool trait in packet cargo, which is implemented for DPDK also. DPDK has the concept of mbufs with its own mbuf header with its own l3/l2 fields etc., 57 | we dont plan to use anything in the mbuf header other than being able to specify the mbuf packet/data length in the mbuf pkt_len and data_len fields. Also we support 58 | only a single mbuf packet (as of today), even though R2 itself supports chained particles. So the mapping is as follows 59 | 60 | pkt - comes from heap 61 | particle - comes from heap 62 | particle.raw - this is the mbuf 63 | 64 | The dpdk mbuf structure is like this - [[struct rte_mbuf][headroom][data area]]. The headroom + data-area is the size we specify to dpdk when we create an mbuf pool. 65 | 66 | When a packet is freed, it just goes back to the pool's packet free queue. For a particle, we dont maintain a free queue, instead we let the freed particle go back 67 | into the dpdk mbuf pool (we have to give it back to the mbuf pool or else dpdk driver wont find an mbuf to give us a packet). And when we need a particle, we allocate 68 | an mbuf from dpdk mbuf pool, but then how do we get the heap-particle from the mbuf ? We do that by stealing two pointers from the headroom. So the actual layout 69 | of the mbuf that we use is as below 70 | 71 | [[struct rte_mbuf][mbuf-ptr heap-ptr remaining-headroom][data area]] 72 | 73 | So the mbuf buf_addr starts right after the rte_mbuf structure, in our case poiting to the area we use to store the mbuf pointer itself. And the next word we use 74 | to store the heap-particle pointer. Each mbuf is allocated its own heap-particle when mbuf pool is initialized. So when mbuf is allocated, we can get the BoxPart 75 | structure also using the heap-ptr address. So we eat into the available headroom a bit. So this allows us to get from mbuf to BoxPart 76 | 77 | The mbuf pointer itself is stored to get from BoxPart to mbuf .. So if BoxPart is freed, we know what mbuf needs to be freed to the dpdk mbuf pool. Obviously, all 78 | this is hugely unsafe pointer math. 79 | 80 | ## Driver Rx/Tx 81 | 82 | DPDK initializes each port and assigns it a port number. Each dpdk port is a structure that implements the Driver trait in graph cargo. The Driver trait expects 83 | and send and receive function, which is implemented using DPDK's rx-burst and tx-burst APIs (with burst size 1 as of today). And the drivers/ports are themselves 84 | just a part of the IfNode graph node. 85 | 86 | ## DPDK EAL Thread 87 | 88 | DPDK does the actual work of reading packets, processing them and sending them out in "EAL Threads". And for R2, an EAL thread is nothing but a thread that processes 89 | the graph. Unfortunately the EAL thread runs inside the dpdk FFI code, so we have to dress up the Rust graph processing routing with unsafes to make it palatable 90 | to FFI. 91 | 92 | Other than the above mentioned items, the rest of the architecture continues to be the same - DPDK or not, we have features in graph nodes, we have a graph 93 | processor thread, we have driver nodes. And dpdk sits in the driver nodes, and the grap processor is run as a DPDK EAL thread, that is about it. 94 | -------------------------------------------------------------------------------- /dpdk/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "dpdk")] 2 | mod dpdk; 3 | 4 | #[cfg(not(feature = "dpdk"))] 5 | mod stubs; 6 | 7 | // Re-export the APIs so main and foobar don't need to care about stubs.rs 8 | #[cfg(feature = "dpdk")] 9 | pub use dpdk::*; 10 | 11 | #[cfg(not(feature = "dpdk"))] 12 | pub use stubs::*; 13 | 14 | pub enum DpdkHw { 15 | AfPacket, 16 | PCI, 17 | } 18 | 19 | pub struct Params<'a> { 20 | pub name: &'a str, 21 | pub hw: DpdkHw, 22 | } 23 | 24 | #[derive(Debug)] 25 | pub enum PortInitErr { 26 | ProbeFail, 27 | ConfigFail, 28 | QueueFail, 29 | StartFail, 30 | UnknownHw, 31 | } 32 | -------------------------------------------------------------------------------- /dpdk/src/stubs.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use core::panic; 3 | use counters::Counters; 4 | use crossbeam_queue::ArrayQueue; 5 | use graph::Driver; 6 | use packet::{BoxPart, BoxPkt, PacketPool}; 7 | use std::sync::Arc; 8 | 9 | pub struct Dpdk {} 10 | 11 | #[derive(Default)] 12 | pub struct DpdkGlobal {} 13 | 14 | pub struct PktsDpdk {} 15 | 16 | unsafe impl Send for PktsDpdk {} 17 | 18 | impl PktsDpdk { 19 | pub fn new( 20 | _: &str, 21 | _: Arc>, 22 | _: &mut Counters, 23 | _: usize, 24 | _: usize, 25 | _: usize, 26 | ) -> Self { 27 | panic!("DPDK feature not compiled in"); 28 | } 29 | } 30 | 31 | impl PacketPool for PktsDpdk { 32 | fn pkt(&mut self, _: usize) -> Option { 33 | panic!("DPDK feature not compiled in"); 34 | } 35 | 36 | fn particle(&mut self, _: usize) -> Option { 37 | panic!("DPDK feature not compiled in"); 38 | } 39 | 40 | fn free_pkt(&mut self, _: BoxPkt) { 41 | panic!("DPDK feature not compiled in"); 42 | } 43 | 44 | fn free_part(&mut self, _: BoxPart) { 45 | panic!("DPDK feature not compiled in"); 46 | } 47 | 48 | fn particle_sz(&self) -> usize { 49 | panic!("DPDK feature not compiled in"); 50 | } 51 | 52 | fn pkt_with_particles(&mut self, _: BoxPart) -> Option { 53 | panic!("DPDK feature not compiled in"); 54 | } 55 | 56 | fn opaque(&self) -> u64 { 57 | panic!("DPDK feature not compiled in"); 58 | } 59 | } 60 | 61 | impl DpdkGlobal { 62 | pub fn new(_: usize, _: usize) -> Self { 63 | panic!("DPDK feature not compiled in"); 64 | } 65 | 66 | pub fn add(&mut self, _: &mut Counters, _: Params) -> Result { 67 | Err(PortInitErr::UnknownHw) 68 | } 69 | } 70 | 71 | impl Driver for Dpdk { 72 | fn fd(&self) -> Option { 73 | panic!("DPDK feature not compiled in"); 74 | } 75 | 76 | fn recvmsg(&mut self, _: &mut dyn PacketPool, _: usize) -> Option { 77 | panic!("DPDK feature not compiled in"); 78 | } 79 | 80 | fn sendmsg(&mut self, _: &mut dyn PacketPool, _: BoxPkt) -> usize { 81 | panic!("DPDK feature not compiled in"); 82 | } 83 | } 84 | 85 | pub fn dpdk_init(_: usize, _: usize) -> Result<(), i32> { 86 | panic!("DPDK feature not compiled in"); 87 | } 88 | 89 | pub type LcoreFunctionT = ::std::option::Option< 90 | unsafe extern "C" fn(arg1: *mut ::std::os::raw::c_void) -> ::std::os::raw::c_int, 91 | >; 92 | 93 | pub fn dpdk_launch(_: usize, _: LcoreFunctionT, _: *mut core::ffi::c_void) { 94 | panic!("DPDK feature not compiled in"); 95 | } 96 | -------------------------------------------------------------------------------- /dpdk/src/test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use counters::Counters; 4 | use crossbeam_queue::ArrayQueue; 5 | use packet::PacketPool; 6 | use std::process::Command; 7 | use std::sync::atomic::{AtomicUsize, Ordering}; 8 | use std::sync::Arc; 9 | use std::thread; 10 | use std::time; 11 | 12 | const NUM_PKTS: usize = 10; 13 | const NUM_PART: usize = 20; 14 | const PART_SZ: usize = 3072; 15 | const MAX_PACKET: usize = 1500; 16 | 17 | fn packet_free(q: Arc>, pool: &mut dyn PacketPool) { 18 | while let Ok(p) = q.pop() { 19 | pool.free(p); 20 | } 21 | } 22 | 23 | fn packet_pool(test: &str, counters: &mut Counters, q: Arc>) -> Box { 24 | Box::new(PktsDpdk::new( 25 | test, q, counters, NUM_PKTS, NUM_PART, PART_SZ, 26 | )) 27 | } 28 | 29 | fn delete_veth() { 30 | let args = [ 31 | "link", "del", "r2_eth1", "type", "veth", "peer", "name", "r2_eth2", 32 | ]; 33 | Command::new("ip") 34 | .args(&args) 35 | .spawn() 36 | .expect("veth failed") 37 | .wait() 38 | .unwrap(); 39 | } 40 | 41 | // We get random packets if ipv6 is enabled, we want only our own packets 42 | fn disable_ipv6(eth: &str) -> String { 43 | let mut name = "net.ipv6.conf.".to_string(); 44 | name.push_str(eth); 45 | name.push_str(".disable_ipv6=1"); 46 | name 47 | } 48 | 49 | fn create_veth() { 50 | let args = [ 51 | "link", "add", "r2_eth1", "type", "veth", "peer", "name", "r2_eth2", 52 | ]; 53 | Command::new("ip") 54 | .args(&args) 55 | .spawn() 56 | .expect("veth failed") 57 | .wait() 58 | .unwrap(); 59 | 60 | let args = ["r2_eth1", "up"]; 61 | Command::new("ifconfig") 62 | .args(&args) 63 | .spawn() 64 | .expect("ifconfig eth1 fail") 65 | .wait() 66 | .unwrap(); 67 | let args = ["-w", &disable_ipv6("r2_eth1")]; 68 | Command::new("sysctl") 69 | .args(&args) 70 | .spawn() 71 | .expect("ipv6 disable fail") 72 | .wait() 73 | .unwrap(); 74 | 75 | let args = ["-w", &disable_ipv6("r2_eth2")]; 76 | Command::new("sysctl") 77 | .args(&args) 78 | .spawn() 79 | .expect("ipv6 disable fail") 80 | .wait() 81 | .unwrap(); 82 | let args = ["r2_eth2", "up"]; 83 | Command::new("ifconfig") 84 | .args(&args) 85 | .spawn() 86 | .expect("ifconfig eth2 fail") 87 | .wait() 88 | .unwrap(); 89 | } 90 | 91 | struct DpdkThread { 92 | pool_rx: Box, 93 | pool_tx: Box, 94 | q_rx: Arc>, 95 | q_tx: Arc>, 96 | dpdk_rx: Dpdk, 97 | dpdk_tx: Dpdk, 98 | done: Arc, 99 | } 100 | 101 | extern "C" fn dpdk_eal_thread(arg: *mut core::ffi::c_void) -> i32 { 102 | unsafe { 103 | let params: Box = Box::from_raw(arg as *mut DpdkThread); 104 | dpdk_thread(params); 105 | 0 106 | } 107 | } 108 | 109 | fn dpdk_thread(mut params: Box) { 110 | let data: Vec = (0..MAX_PACKET).map(|x| (x % 256) as u8).collect(); 111 | loop { 112 | packet_free(params.q_rx.clone(), &mut *params.pool_rx); 113 | packet_free(params.q_tx.clone(), &mut *params.pool_tx); 114 | let pkt = params.pool_tx.pkt(0); 115 | if pkt.is_none() { 116 | continue; 117 | } 118 | let mut pkt = pkt.unwrap(); 119 | assert!(pkt.append(&mut *params.pool_tx, &data[0..])); 120 | assert_eq!( 121 | params.dpdk_tx.sendmsg(&mut *params.pool_tx, pkt), 122 | MAX_PACKET 123 | ); 124 | 125 | let pkt = params.dpdk_rx.recvmsg(&mut *params.pool_rx, 0); 126 | if pkt.is_none() { 127 | continue; 128 | } 129 | let pkt = pkt.unwrap(); 130 | let pktlen = pkt.len(); 131 | assert_eq!(MAX_PACKET, pktlen); 132 | let (buf, len) = match pkt.data(0) { 133 | Some((d, s)) => (d, s), 134 | None => panic!("Cant get offset 0"), 135 | }; 136 | assert_eq!(len, pktlen); 137 | for i in 0..MAX_PACKET { 138 | assert_eq!(buf[i], i as u8); 139 | } 140 | params.done.fetch_add(1, Ordering::Relaxed); 141 | } 142 | } 143 | 144 | #[test] 145 | fn read_write() { 146 | delete_veth(); 147 | create_veth(); 148 | 149 | // Two cores, core0 master lcore, and core1 for packet forwarding 150 | let mut glob = DpdkGlobal::new(2, 2); 151 | 152 | let q_tx = Arc::new(ArrayQueue::new(NUM_PKTS)); 153 | let mut counters = Counters::new("dpdk_test").unwrap(); 154 | let pool_tx = packet_pool("dpdk_read_write_tx", &mut counters, q_tx.clone()); 155 | let params = Params { 156 | name: "r2_eth1", 157 | hw: DpdkHw::AfPacket, 158 | }; 159 | let dpdk_tx = match glob.add(&mut counters, params) { 160 | Ok(dpdk) => dpdk, 161 | Err(err) => panic!("Error {:?} creating dpdk port", err), 162 | }; 163 | 164 | let q_rx = Arc::new(ArrayQueue::new(NUM_PKTS)); 165 | let pool_rx = packet_pool("dpdk_read_write_rx", &mut counters, q_rx.clone()); 166 | let params = Params { 167 | name: "r2_eth2", 168 | hw: DpdkHw::AfPacket, 169 | }; 170 | let dpdk_rx = match glob.add(&mut counters, params) { 171 | Ok(dpdk) => dpdk, 172 | Err(err) => panic!("Error {:?} creating dpdk port", err), 173 | }; 174 | 175 | let wait = Arc::new(AtomicUsize::new(0)); 176 | let done = wait.clone(); 177 | 178 | let params = Box::new(DpdkThread { 179 | pool_rx, 180 | pool_tx, 181 | q_rx, 182 | q_tx, 183 | dpdk_rx, 184 | dpdk_tx, 185 | done, 186 | }); 187 | 188 | dpdk_launch( 189 | 1, 190 | Some(dpdk_eal_thread), 191 | Box::into_raw(params) as *mut core::ffi::c_void, 192 | ); 193 | 194 | while wait.load(Ordering::Relaxed) == 0 { 195 | let wait = time::Duration::from_millis(1); 196 | thread::sleep(wait) 197 | } 198 | 199 | delete_veth(); 200 | } 201 | -------------------------------------------------------------------------------- /ffis/dpdk/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dpdk-ffi" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | build = "build.rs" 7 | 8 | [lib] 9 | doctest = false 10 | 11 | [dependencies] 12 | libc = "0.2.0" 13 | -------------------------------------------------------------------------------- /ffis/dpdk/build.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | fn main() { 4 | let out = env::var("OUT_DIR").unwrap(); 5 | println!("DPDK Output directory is {}", &out); 6 | 7 | println!("cargo:rustc-link-lib=dylib=rte_eal"); 8 | println!("cargo:rustc-link-lib=dylib=rte_kvargs"); 9 | println!("cargo:rustc-link-lib=dylib=rte_mempool"); 10 | println!("cargo:rustc-link-lib=dylib=rte_mbuf"); 11 | println!("cargo:rustc-link-lib=dylib=rte_ethdev"); 12 | println!("cargo:rerun-if-changed=build.rs"); 13 | } 14 | -------------------------------------------------------------------------------- /ffis/dpdk/src/bindgen.rs: -------------------------------------------------------------------------------- 1 | bindgen/include/lib.rs -------------------------------------------------------------------------------- /ffis/dpdk/src/bindgen/allow-function.regex: -------------------------------------------------------------------------------- 1 | rte_.* 2 | pci_.* 3 | eth_dev_.* 4 | rust_.* 5 | vlan_.* 6 | rss_.* 7 | virtio_.* 8 | reta_.* 9 | -------------------------------------------------------------------------------- /ffis/dpdk/src/bindgen/allow-type.regex: -------------------------------------------------------------------------------- 1 | arp_.* 2 | arg_handler_t 3 | buffer_.* 4 | cryptodev_.* 5 | dequeue_pkt_burst_t 6 | efd_.* 7 | enqueue_pkt_burst_t 8 | esp_hdr 9 | ethdev_.* 10 | ether_.* 11 | eth_.* 12 | eventdev_.* 13 | event_.* 14 | flow_ctrl_.* 15 | hash_sig_t 16 | icmp_hdr 17 | ipv4_hdr 18 | ipv6_extension_fragment 19 | ipv6_hdr 20 | ip_frag 21 | ip_frag_key 22 | ip_frag_pkt 23 | ip_frag_tbl_stat 24 | ip_pkt_list 25 | lcore_.* 26 | malloc_elem 27 | malloc_heap 28 | MARKER* 29 | member_set_t 30 | mtu_set_t 31 | pci_probe_t 32 | pci_remove_t 33 | phys_addr_t 34 | port_params 35 | power_management_env 36 | priority_flow_ctrl_.* 37 | rawdev_.* 38 | reta_.* 39 | rss_.* 40 | rte_.* 41 | sctp_hdr 42 | security_.* 43 | tcp_hdr 44 | timer_source 45 | udp_hdr 46 | vfio_.* 47 | vhost_.* 48 | vlan_.* 49 | vring_.* 50 | vxlan_.* 51 | xmm_t 52 | __virtio16 53 | __virtio32 54 | __virtio64 55 | -------------------------------------------------------------------------------- /ffis/dpdk/src/bindgen/allow-var.regex: -------------------------------------------------------------------------------- 1 | BALANCE_XMIT_.* 2 | BONDING_MODE_.* 3 | CTRL_MBUF_.* 4 | DEV_.* 5 | eal_.* 6 | EFD_.* 7 | ETHER_TYPE_.* 8 | ETH_.* 9 | IND_.* 10 | IPV4_.* 11 | IPV6_.* 12 | lcore_.* 13 | MARKER_TLV_TYPE_.* 14 | MS_PER_S 15 | NSEC_PER_SEC 16 | NS_PER_S 17 | per_lcore_.* 18 | PKT_.* 19 | PORTLIST_.* 20 | reta_.* 21 | rss_.* 22 | RTE_.* 23 | rte_.* 24 | SLOW_SUBTYPE_.* 25 | SOCKET_ID_ANY 26 | SOFTNIC_.* 27 | STATE_.* 28 | STR_.* 29 | TLV_TYPE_.* 30 | TOKEN_STRING_MULTI 31 | US_PER_S 32 | VFIO_.* 33 | VHOST_.* 34 | VIRTIO_.* 35 | vlan_.* 36 | VMDQ_.* 37 | VRING_.* 38 | -------------------------------------------------------------------------------- /ffis/dpdk/src/bindgen/bindgen.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "DPDK bindgen" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # How to generate dpdk ffi binding APIs 10 | 11 | * install dpdk - using meson build and ninja install etc.. 12 | 13 | * temporarily overwrite rte_memcpy.h as below, restore it when these steps are complete 14 | sudo cp ./lib/librte_eal/common/include/generic/rte_memcpy.h /usr/local/include/rte_memcpy.h 15 | 16 | * Open /usr/local/include/rte_ether.h and in struct rte_ether_addr remove attribute aligned, 17 | that causes issues with bindgen and transitive repr(aligned) inclusions 18 | 19 | * Add the headers you want to ./headers.h 20 | 21 | * now run ./bindgen.sh 22 | 23 | -------------------------------------------------------------------------------- /ffis/dpdk/src/bindgen/bindgen.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | allowf=foobar 4 | for i in `cat allow-function.regex` 5 | do 6 | allowf="${allowf}|${i}" 7 | done 8 | 9 | allowt=foobar 10 | for i in `cat allow-type.regex` 11 | do 12 | allowt="${allowt}|${i}" 13 | done 14 | 15 | allowv=foobar 16 | for i in `cat allow-var.regex` 17 | do 18 | allowv="${allowv}|${i}" 19 | done 20 | 21 | bindgen headers.h --raw-line "#![allow(clippy::all)]" --raw-line "#![allow(dead_code)]" --allowlist-function $allowf --allowlist-type $allowt --allowlist-var $allowv -o include/lib.rs 22 | -------------------------------------------------------------------------------- /ffis/dpdk/src/bindgen/headers.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "/usr/local/include/rte_eal.h" 3 | #include "/usr/local/include/rte_mempool.h" 4 | #include "/usr/local/include/rte_mbuf.h" 5 | #include "/usr/local/include/rte_ethdev.h" 6 | #include "/usr/local/include/rte_dev.h" 7 | #include "/usr/local/include/rte_launch.h" 8 | -------------------------------------------------------------------------------- /ffis/dpdk/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | 5 | pub mod bindgen; 6 | use bindgen::{rte_eth_devices, rte_mbuf, rte_mempool, rte_mempool_ops, rte_mempool_ops_table}; 7 | 8 | // Included below are APIs which we would have 'liked' bindgen to create a 9 | // binding for but it did not, for whatever reason. One common reason being 10 | // that the API was declared as inline. Obviously this has the issue that if 11 | // later dpdk versions change how this is done, then we have to come back and 12 | // update this 13 | 14 | // This is rte_eth_rx_burst() which is declared as inline and hence bindgen 15 | // does not generate the bindings. 16 | #[allow(clippy::not_unsafe_ptr_arg_deref)] 17 | pub fn dpdk_rx_one(port_id: u16, queue_id: usize, mbuf: *mut *mut rte_mbuf) -> u16 { 18 | unsafe { 19 | #[allow(static_mut_refs)] 20 | let devices = rte_eth_devices.as_ptr(); 21 | let dev = devices.add(port_id as usize); 22 | let cb = (*dev).rx_pkt_burst.unwrap(); 23 | let ptr = (*(*dev).data).rx_queues.add(queue_id); 24 | cb(*ptr, mbuf, 1) 25 | } 26 | } 27 | 28 | // This is rte_eth_tx_burst() which is declared as inline and hence bindgen 29 | // does not generate the bindings. 30 | #[allow(clippy::not_unsafe_ptr_arg_deref)] 31 | pub fn dpdk_tx_one(port_id: u16, queue_id: usize, mbuf: *mut *mut rte_mbuf) -> u16 { 32 | unsafe { 33 | #[allow(static_mut_refs)] 34 | let devices = rte_eth_devices.as_ptr(); 35 | let dev = devices.add(port_id as usize); 36 | let cb = (*dev).tx_pkt_burst.unwrap(); 37 | let ptr = (*(*dev).data).tx_queues.add(queue_id); 38 | cb(*ptr, mbuf, 1) 39 | } 40 | } 41 | 42 | // This is rte_pktmbuf_alloc() which is declared as inline and hence bindgen 43 | // does not generate the bindings. The original rte_pktmbuf_alloc() has cache 44 | // allocation etc. which is ignored below, it directly goes to the pool 45 | #[allow(clippy::not_unsafe_ptr_arg_deref)] 46 | pub fn dpdk_mbuf_free(m: *mut rte_mbuf) { 47 | unsafe { 48 | let mbuf: *mut core::ffi::c_void = m as *mut core::ffi::c_void; 49 | let mp: *mut rte_mempool = (*m).pool; 50 | let ops: *mut rte_mempool_ops = &mut rte_mempool_ops_table.ops[(*mp).ops_index as usize]; 51 | let cb = (*ops).enqueue.unwrap(); 52 | cb(mp, &mbuf, 1); 53 | } 54 | } 55 | 56 | // This is rte_pktmbuf_free() which is declared as inline and hence bindgen 57 | // does not generate the bindings. The original rte_pktmbuf_free() has cache 58 | // allocation etc. which is ignored below, it directly goes to the pool 59 | #[allow(clippy::not_unsafe_ptr_arg_deref)] 60 | pub fn dpdk_mbuf_alloc(mp: *mut rte_mempool) -> Option<*mut rte_mbuf> { 61 | unsafe { 62 | let mut m: *mut libc::c_void = std::ptr::null_mut::(); 63 | let ops: *mut rte_mempool_ops = &mut rte_mempool_ops_table.ops[(*mp).ops_index as usize]; 64 | let cb = (*ops).dequeue.unwrap(); 65 | cb(mp, &mut m, 1); 66 | if !m.is_null() { 67 | Some(m as *mut rte_mbuf) 68 | } else { 69 | None 70 | } 71 | } 72 | } 73 | 74 | // This is rte_pktmbuf_mtod_offset(), bindgen not generated because of inline 75 | #[allow(clippy::not_unsafe_ptr_arg_deref)] 76 | pub fn dpdk_mtod(m: *mut rte_mbuf) -> *mut u8 { 77 | unsafe { 78 | let addr: *mut u8 = (*m).buf_addr as *mut u8; 79 | addr.add((*m).data_off as usize) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /fwd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fwd" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | treebitmap = "0.4.0" 9 | common = { path = "../common" } 10 | counters = { path = "../counters" } 11 | log = { path = "../log" } -------------------------------------------------------------------------------- /fwd/fwd.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Forwarding" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Forwarding objects 10 | 11 | We discussed in brief in the architecture details section about forwarding objects, consult that document for more details. Any packet goes through a chain of objects. For example a packet starts from an Interface object, it then gets rid of the layer2 encap and assuming its an IPv4 packet, it will try to find an ipv4 table object where it looks up to find an ipv4 leaf object which will provide an adjacency object that tells the packet which Interface object the packet has to go out of and what layer2 encaps to apply on the packet before it goes out of the interface. 12 | 13 | So this module captures all these object parameters - what is capture here is really highly "independent" data - other than the basic types and the objects in this module, this module is not supposed to depend on anything like device drivers for example (that will be utter blasphemy!). This module is the top/first tier in the module heirarchy (refer Docs/modules.md). 14 | -------------------------------------------------------------------------------- /fwd/src/adj.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | pub struct Adjacency { 4 | pub nhop: Ipv4Addr, 5 | pub ifindex: usize, 6 | } 7 | 8 | impl Adjacency { 9 | pub fn new(nhop: Ipv4Addr, ifindex: usize) -> Adjacency { 10 | Adjacency { nhop, ifindex } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /fwd/src/intf.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use std::net::Ipv4Addr; 3 | 4 | pub const MAX_INTERFACES: usize = 4 * 1024; 5 | 6 | pub struct Interface { 7 | pub ifname: String, 8 | pub ifindex: usize, 9 | pub bandwidth: usize, 10 | pub mtu: usize, 11 | pub ipv4_addr: Ipv4Addr, 12 | pub mask_len: u32, 13 | pub l2_addr: Vec, 14 | pub headroom: usize, 15 | } 16 | 17 | impl Interface { 18 | pub fn new(ifname: &str, ifindex: usize, l2_addr: Vec, headroom: usize) -> Interface { 19 | Interface { 20 | ifname: ifname.to_string(), 21 | ifindex, 22 | bandwidth: common::MB!(10 * 1024), 23 | mtu: ETHER_MTU, 24 | ipv4_addr: Ipv4Addr::new(0, 0, 0, 0), 25 | mask_len: 0, 26 | l2_addr, 27 | headroom, 28 | } 29 | } 30 | 31 | pub fn get_v4addr(&self) -> (Ipv4Addr, u32) { 32 | (self.ipv4_addr, self.mask_len) 33 | } 34 | 35 | pub fn set_v4addr(&mut self, addr: Ipv4Addr, mask_len: u32) { 36 | self.ipv4_addr = addr; 37 | self.mask_len = mask_len; 38 | } 39 | } 40 | 41 | impl Clone for Interface { 42 | fn clone(&self) -> Interface { 43 | Interface { 44 | ifname: self.ifname.clone(), 45 | ifindex: self.ifindex, 46 | bandwidth: self.bandwidth, 47 | mtu: self.mtu, 48 | ipv4_addr: self.ipv4_addr, 49 | mask_len: self.mask_len, 50 | l2_addr: self.l2_addr.clone(), 51 | headroom: self.headroom, 52 | } 53 | } 54 | } 55 | 56 | pub struct ModifyInterfaceMsg { 57 | pub intf: Arc, 58 | } 59 | 60 | impl Clone for ModifyInterfaceMsg { 61 | fn clone(&self) -> ModifyInterfaceMsg { 62 | ModifyInterfaceMsg { 63 | intf: self.intf.clone(), 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /fwd/src/ipv4.rs: -------------------------------------------------------------------------------- 1 | use super::{Arc, Fwd, IpLookupTable, Ipv4Addr}; 2 | 3 | pub struct IPv4TableMsg { 4 | pub table: Arc, 5 | } 6 | 7 | impl IPv4TableMsg { 8 | pub fn new(table: Arc) -> IPv4TableMsg { 9 | IPv4TableMsg { table } 10 | } 11 | } 12 | 13 | impl Clone for IPv4TableMsg { 14 | fn clone(&self) -> IPv4TableMsg { 15 | IPv4TableMsg { 16 | table: self.table.clone(), 17 | } 18 | } 19 | } 20 | 21 | pub struct IPv4Leaf { 22 | pub next: Fwd, 23 | } 24 | 25 | impl IPv4Leaf { 26 | pub fn new(fwd: Fwd) -> IPv4Leaf { 27 | IPv4Leaf { next: fwd } 28 | } 29 | } 30 | 31 | #[derive(Default)] 32 | pub struct IPv4Table { 33 | pub root: IpLookupTable>, 34 | } 35 | 36 | impl IPv4Table { 37 | pub fn new() -> IPv4Table { 38 | IPv4Table { 39 | root: IpLookupTable::new(), 40 | } 41 | } 42 | 43 | pub fn add(&mut self, ip: Ipv4Addr, masklen: u32, value: Arc) -> bool { 44 | let dup = self.root.insert(ip, masklen, value); 45 | dup.is_none() 46 | } 47 | 48 | pub fn del(&mut self, ip: Ipv4Addr, masklen: u32) -> bool { 49 | let ret = self.root.remove(ip, masklen); 50 | ret.is_some() 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /fwd/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::net::Ipv4Addr; 2 | use std::sync::Arc; 3 | use treebitmap::IpLookupTable; 4 | pub mod ipv4; 5 | use ipv4::IPv4Leaf; 6 | pub mod adj; 7 | use adj::Adjacency; 8 | pub mod intf; 9 | use intf::Interface; 10 | use std::str::FromStr; 11 | 12 | pub const ETH_TYPE_ARP: u16 = 0x0806; 13 | pub const ETH_TYPE_IPV4: u16 = 0x0800; 14 | pub const ARP_HWTYPE_ETH: u16 = 0x0001; 15 | pub const ARP_OPCODE_REQ: u16 = 0x0001; 16 | pub const ARP_OPCODE_REPLY: u16 = 0x0002; 17 | pub const ETH_ALEN: usize = 6; 18 | pub const ETHER_HDR_LEN: usize = 14; 19 | pub const ETHER_MTU: usize = 1500; 20 | pub const BCAST_MAC: &[u8; ETH_ALEN] = &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; 21 | pub const ZERO_MAC: &[u8; ETH_ALEN] = &[0, 0, 0, 0, 0, 0]; 22 | pub const ZERO_IP: Ipv4Addr = Ipv4Addr::new(0, 0, 0, 0); 23 | pub const IPHDR_MIN_LEN: usize = 20; 24 | pub const IPHDR_DADDR_OFF: usize = 16; 25 | 26 | pub enum EthOffsets { 27 | EthDaddrOff = 0, 28 | EthSaddrOff = 6, 29 | EthTypeOff = 12, 30 | EthHwtypeOff = 14, 31 | EthProtoOff = 16, 32 | EthHwSzOff = 18, 33 | EthProtoSzOff = 19, 34 | EthOpcodeOff = 20, 35 | EthSenderMacOff = 22, 36 | EthSenderIpOff = 28, 37 | EthTargetMacOff = 32, 38 | EthTargetIpOff = 38, 39 | } 40 | 41 | #[allow(dead_code)] 42 | pub struct EthHdr { 43 | dhost: [u8; ETH_ALEN], 44 | shost: [u8; ETH_ALEN], 45 | eth_type: u16, 46 | } 47 | 48 | pub struct EthMacRaw { 49 | pub bytes: Arc>, 50 | } 51 | 52 | pub struct EthMacAddMsg { 53 | pub ifindex: usize, 54 | pub ip: Ipv4Addr, 55 | pub mac: EthMacRaw, 56 | } 57 | 58 | impl Clone for EthMacAddMsg { 59 | fn clone(&self) -> EthMacAddMsg { 60 | EthMacAddMsg { 61 | ifindex: self.ifindex, 62 | ip: self.ip, 63 | mac: EthMacRaw { 64 | bytes: self.mac.bytes.clone(), 65 | }, 66 | } 67 | } 68 | } 69 | 70 | #[allow(dead_code)] 71 | pub struct IpHdr { 72 | ihl: u8, 73 | version: u8, 74 | tos: u8, 75 | tot_len: u16, 76 | id: u16, 77 | frag_off: u16, 78 | ttl: u8, 79 | protocol: u8, 80 | check: u16, 81 | saddr: u32, 82 | daddr: u32, 83 | /*The options start here. */ 84 | } 85 | 86 | #[derive(Clone)] 87 | pub enum Fwd { 88 | IPv4Leaf(Arc), 89 | Adjacency(Arc), 90 | Interface(Arc), 91 | } 92 | 93 | pub fn str_to_mac(mac: &str) -> Option> { 94 | let mac = mac.split(':'); 95 | let mut bytes = Vec::new(); 96 | for m in mac { 97 | if let Ok(byte) = u8::from_str_radix(m, 16) { 98 | bytes.push(byte); 99 | } else { 100 | return None; 101 | } 102 | } 103 | if bytes.len() == ETH_ALEN { 104 | Some(bytes) 105 | } else { 106 | None 107 | } 108 | } 109 | 110 | pub fn ip_mask_decode(ip_and_mask: &str) -> Option<(Ipv4Addr, u32)> { 111 | let im = ip_and_mask.split('/'); 112 | let im: Vec<&str> = im.collect(); 113 | if im.len() != 2 { 114 | return None; 115 | } 116 | if let Ok(ipv4) = Ipv4Addr::from_str(im[0]) { 117 | if let Ok(masklen) = im[1].parse::() { 118 | Some((ipv4, masklen)) 119 | } else { 120 | None 121 | } 122 | } else { 123 | None 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /gnodes/interface/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "interface" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | crossbeam-queue = "0.2.1" 9 | names = { path = "../../names" } 10 | common = { path = "../../common" } 11 | graph = { path = "../../graph" } 12 | packet = { path = "../../packet" } 13 | sched = { path = "../../sched" } 14 | fwd = { path = "../../fwd" } 15 | msg = { path = "../../msg" } 16 | counters = { path = "../../counters" } 17 | log = { path = "../../log" } 18 | efd = { path = "../../unix/efd" } 19 | socket = { path = "../../unix/socket" } 20 | dpdk = { path = "../../dpdk" } -------------------------------------------------------------------------------- /gnodes/interface/src/lib.rs: -------------------------------------------------------------------------------- 1 | use counters::flavors::{Counter, CounterType}; 2 | use counters::Counters; 3 | use crossbeam_queue::ArrayQueue; 4 | use efd::Efd; 5 | use fwd::intf::Interface; 6 | use graph::Driver; 7 | use graph::{Dispatch, Gclient, VEC_SIZE}; 8 | use log::Logger; 9 | use msg::R2Msg; 10 | use names::l2_eth_decap; 11 | use packet::BoxPkt; 12 | use sched::hfsc::Hfsc; 13 | use std::sync::Arc; 14 | 15 | #[derive(Copy, Clone)] 16 | enum Next { 17 | Drop = 0, 18 | L2EthDecap, 19 | } 20 | 21 | const NEXT_NAMES: &[Next] = &[Next::Drop, Next::L2EthDecap]; 22 | 23 | fn next_name(ifindex: usize, next: Next) -> String { 24 | match next { 25 | Next::Drop => names::DROP.to_string(), 26 | Next::L2EthDecap => l2_eth_decap(ifindex), 27 | } 28 | } 29 | 30 | // The interface node (Ifnode) in the graph is responsible for reading packets from 31 | // an interface and sending packets ouf of an interface - the IfNode has a 'driver' 32 | // that handles the I/O part. Today the driver is just raw socket, it will eventually 33 | // get extended to have more options like DPDK etc.. The IfNode for an interface is 34 | // present in all forwarding threads, although only one thread is the 'owner' of the 35 | // interface. All other threads handoff packets to the 'owner' vis MPSC 'thread_q' 36 | pub struct IfNode { 37 | name: String, 38 | affinity: Option, 39 | intf: Arc, 40 | sched: Hfsc, 41 | driver: Option>, 42 | sched_fail: Counter, 43 | threadq_fail: Counter, 44 | thread_q: Arc>, 45 | thread_wakeup: Arc, 46 | } 47 | 48 | impl IfNode { 49 | // affinity: specifies which thread owns the IfNode 50 | // efd: event fd (efd) used to wakeup the owner thread when handing off packets on thread_q 51 | // intf: The common driver-agnostic parameters of an interface like ip address/mtu etc.. 52 | pub fn new( 53 | counters: &mut Counters, 54 | affinity: Option, 55 | efd: Arc, 56 | intf: Arc, 57 | driver: Box, 58 | ) -> Result { 59 | let name = names::rx_tx(intf.ifindex); 60 | 61 | // By default the scheduler is HFSC today, eventually there will be other options 62 | let sched = sched::hfsc::Hfsc::new(common::MB!(10 * 1024)); 63 | let sched_fail = Counter::new(counters, &name, CounterType::Error, "sched_fail"); 64 | let threadq_fail = Counter::new(counters, &name, CounterType::Error, "threadq_fail"); 65 | Ok(IfNode { 66 | name, 67 | affinity, 68 | intf, 69 | sched, 70 | driver: Some(driver), 71 | sched_fail, 72 | threadq_fail, 73 | thread_q: Arc::new(ArrayQueue::new(VEC_SIZE)), 74 | thread_wakeup: efd, 75 | }) 76 | } 77 | 78 | pub fn name(&self) -> String { 79 | self.name.clone() 80 | } 81 | 82 | pub fn next_names(&self) -> Vec { 83 | let mut v = Vec::new(); 84 | for n in NEXT_NAMES { 85 | assert_eq!(*n as usize, v.len()); 86 | v.push(next_name(self.intf.ifindex, *n)); 87 | } 88 | v 89 | } 90 | 91 | pub fn fd(&self) -> Option { 92 | if let Some(ref driver) = self.driver { 93 | driver.fd() 94 | } else { 95 | None 96 | } 97 | } 98 | } 99 | 100 | impl Gclient for IfNode { 101 | fn clone(&self, counters: &mut Counters, _log: Arc) -> Box> { 102 | // Only the 'owner' IfNode really needs/uses a scheduler, so in all other nodes, the 103 | // sched doesnt really do anything, they handoff packets to the owner IfNode. 104 | let sched = sched::hfsc::Hfsc::new(common::MB!(10 * 1024)); 105 | let sched_fail = Counter::new(counters, &self.name, CounterType::Error, "sched_fail"); 106 | let threadq_fail = Counter::new(counters, &self.name, CounterType::Error, "threadq_fail"); 107 | Box::new(IfNode { 108 | name: self.name.clone(), 109 | affinity: self.affinity, 110 | intf: self.intf.clone(), 111 | sched, 112 | driver: None, 113 | sched_fail, 114 | threadq_fail, 115 | thread_q: self.thread_q.clone(), 116 | thread_wakeup: self.thread_wakeup.clone(), 117 | }) 118 | } 119 | 120 | fn dispatch(&mut self, thread: usize, vectors: &mut Dispatch) { 121 | let owner_thread = self.affinity.is_none() || (self.affinity == Some(thread)); 122 | // Do packet Tx if we are the owner thread (thread the driver/device is pinnned to). 123 | // If so send the packet out on the driver, otherwise enqueue the packet to the MPSC 124 | // queue to the owner thread 125 | while let Some(p) = vectors.pop() { 126 | if owner_thread { 127 | // TODO: We have the scheduler, but we havent figured out the packet queueing 128 | // model. Till then we cant really put the scheduler to use 129 | if !self.sched.has_classes() { 130 | self.driver.as_mut().unwrap().sendmsg(vectors.pool, p); 131 | } 132 | } else if self.thread_q.push(p).is_err() { 133 | self.threadq_fail.incr(); 134 | } else { 135 | self.thread_wakeup.write(1); 136 | } 137 | } 138 | if owner_thread { 139 | while let Ok(p) = self.thread_q.pop() { 140 | if !self.sched.has_classes() { 141 | self.driver.as_mut().unwrap().sendmsg(vectors.pool, p); 142 | } 143 | } 144 | } 145 | if self.sched.pkts_queued() != 0 { 146 | // Well, we are not caring to return the exact scheduler time at the moment, but 147 | // its a TODO to return here the smallest scheduler interval rather than 0 148 | vectors.wakeup(0); 149 | } 150 | // Do packet Rx, only on the thread this driver is pinned to 151 | if owner_thread { 152 | for _ in 0..VEC_SIZE { 153 | let pkt = self 154 | .driver 155 | .as_mut() 156 | .unwrap() 157 | .recvmsg(vectors.pool, self.intf.headroom); 158 | if pkt.is_none() { 159 | break; 160 | } 161 | let mut pkt = pkt.unwrap(); 162 | if pkt.len() == 0 { 163 | break; 164 | } 165 | pkt.in_ifindex = self.intf.ifindex; 166 | vectors.push(Next::L2EthDecap as usize, pkt); 167 | } 168 | } 169 | } 170 | fn control_msg(&mut self, thread: usize, message: R2Msg) { 171 | match message { 172 | R2Msg::ModifyInterface(mod_intf) => { 173 | self.intf = mod_intf.intf; 174 | } 175 | R2Msg::ClassAdd(class) => { 176 | if (self.affinity.is_none() || (self.affinity == Some(thread))) 177 | && self 178 | .sched 179 | .create_class( 180 | class.name, 181 | class.parent, 182 | class.qlimit, 183 | class.is_leaf, 184 | class.curves, 185 | ) 186 | .is_err() 187 | { 188 | self.sched_fail.incr(); 189 | } 190 | } 191 | _ => panic!("Unknown type"), 192 | } 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /gnodes/layer2/eth/decap/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "l2_eth_decap" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../../../../common" } 9 | names = { path = "../../../../names" } 10 | graph = { path = "../../../../graph" } 11 | packet = { path = "../../../../packet" } 12 | fwd = { path = "../../../../fwd" } 13 | counters = { path = "../../../../counters" } 14 | log = { path = "../../../../log" } 15 | msg = { path = "../../../../msg" } 16 | -------------------------------------------------------------------------------- /gnodes/layer2/eth/encap/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "l2_eth_encap" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../../../../common" } 9 | names = { path = "../../../../names" } 10 | graph = { path = "../../../../graph" } 11 | fwd = { path = "../../../../fwd" } 12 | packet = { path = "../../../../packet" } 13 | counters = { path = "../../../../counters" } 14 | log = { path = "../../../../log" } 15 | msg = { path = "../../../../msg" } 16 | -------------------------------------------------------------------------------- /gnodes/layer2/eth/encap/src/lib.rs: -------------------------------------------------------------------------------- 1 | use counters::flavors::{Counter, CounterType}; 2 | use counters::Counters; 3 | use fwd::intf::MAX_INTERFACES; 4 | use fwd::IPHDR_DADDR_OFF; 5 | use fwd::IPHDR_MIN_LEN; 6 | use fwd::{ 7 | intf::Interface, EthMacRaw, EthOffsets, ARP_HWTYPE_ETH, ARP_OPCODE_REQ, BCAST_MAC, ETH_ALEN, 8 | ETH_TYPE_ARP, ETH_TYPE_IPV4, ZERO_IP, ZERO_MAC, 9 | }; 10 | use graph::Dispatch; 11 | use graph::Gclient; 12 | use log::Logger; 13 | use msg::R2Msg; 14 | use names::l2_eth_encap; 15 | use packet::BoxPkt; 16 | use packet::PacketPool; 17 | use std::collections::HashMap; 18 | use std::net::Ipv4Addr; 19 | use std::sync::Arc; 20 | 21 | // If the system has say 4000 interfaces, EncapMux prevents having every single node that needs 22 | // to send a packet out to have 4000 nodes as their next-node. Instead those nodes have EncapMux 23 | // as their next node and EncapMux will have 4000 next nodes. So all that EncapMux does is to 24 | // take the input packet and enqueu it to the right EthEncap node. This convenience of course 25 | // comes with the hit that all output packets incur one unnecessary dequeue/enqueue 26 | #[derive(Default)] 27 | pub struct EncapMux { 28 | next_names: Vec, 29 | } 30 | 31 | impl EncapMux { 32 | pub fn new() -> EncapMux { 33 | EncapMux { 34 | next_names: (0..MAX_INTERFACES).map(names::l2_eth_encap).collect(), 35 | } 36 | } 37 | 38 | pub fn name(&self) -> String { 39 | names::ENCAPMUX.to_string() 40 | } 41 | 42 | pub fn next_names(&self) -> Vec { 43 | self.next_names.clone() 44 | } 45 | } 46 | 47 | impl Gclient for EncapMux { 48 | fn clone(&self, _counters: &mut Counters, _log: Arc) -> Box> { 49 | Box::new(EncapMux { 50 | next_names: self.next_names.clone(), 51 | }) 52 | } 53 | 54 | fn dispatch(&mut self, _thread: usize, vectors: &mut Dispatch) { 55 | while let Some(p) = vectors.pop() { 56 | vectors.push(p.out_ifindex, p); 57 | } 58 | } 59 | } 60 | 61 | #[derive(Copy, Clone)] 62 | enum Next { 63 | Drop = 0, 64 | TX, 65 | } 66 | 67 | const NEXT_NAMES: &[Next] = &[Next::Drop, Next::TX]; 68 | 69 | fn next_name(ifindex: usize, next: Next) -> String { 70 | match next { 71 | Next::Drop => names::DROP.to_string(), 72 | Next::TX => names::rx_tx(ifindex), 73 | } 74 | } 75 | 76 | struct Cnt { 77 | bad_mac: Counter, 78 | } 79 | 80 | // Encapsulate an ethernet packet and send it to the interface. If the mac address table 81 | // does not have the mac address, generate an ARP request. The ARP response will be received 82 | // on the Decap node and it will broadcast the learned mac which the Encap node will also 83 | // receive. This mechanism needs rethinking (see github issue #4) 84 | pub struct EthEncap { 85 | intf: Arc, 86 | mac: HashMap, 87 | cnt: Cnt, 88 | } 89 | 90 | impl EthEncap { 91 | pub fn new(intf: Arc, counters: &mut Counters) -> Self { 92 | let bad_mac = Counter::new( 93 | counters, 94 | &l2_eth_encap(intf.ifindex), 95 | CounterType::Error, 96 | "bad_mac", 97 | ); 98 | EthEncap { 99 | intf, 100 | mac: HashMap::new(), 101 | cnt: Cnt { bad_mac }, 102 | } 103 | } 104 | 105 | pub fn name(&self) -> String { 106 | l2_eth_encap(self.intf.ifindex) 107 | } 108 | 109 | pub fn next_names(&self) -> Vec { 110 | let mut v = Vec::new(); 111 | for n in NEXT_NAMES { 112 | assert_eq!(*n as usize, v.len()); 113 | v.push(next_name(self.intf.ifindex, *n)); 114 | } 115 | v 116 | } 117 | 118 | fn do_arp_request(&self, pool: &mut dyn PacketPool, in_pkt: &BoxPkt) -> Option { 119 | let pkt = pool.pkt(0 /* no headroom */); 120 | pkt.as_ref()?; 121 | let mut pkt = pkt.unwrap(); 122 | let raw = pkt.head_mut(); 123 | 124 | // Dest mac all ones 125 | let off = EthOffsets::EthDaddrOff as usize; 126 | raw[off..off + ETH_ALEN].copy_from_slice(BCAST_MAC); 127 | // Src mac 128 | let off = EthOffsets::EthSaddrOff as usize; 129 | raw[off..off + ETH_ALEN].copy_from_slice(&self.intf.l2_addr[0..ETH_ALEN]); 130 | // 0x0806 ARP 131 | let off = EthOffsets::EthTypeOff as usize; 132 | raw[off..off + 2].copy_from_slice(Ð_TYPE_ARP.to_be_bytes()); 133 | // Hardware type ethernet 0x0001 134 | let off = EthOffsets::EthHwtypeOff as usize; 135 | raw[off..off + 2].copy_from_slice(&ARP_HWTYPE_ETH.to_be_bytes()); 136 | // Ether type ipv4 0x0800 137 | let off = EthOffsets::EthProtoOff as usize; 138 | raw[off..off + 2].copy_from_slice(Ð_TYPE_IPV4.to_be_bytes()); 139 | // Hw addr length 6 140 | let off = EthOffsets::EthHwSzOff as usize; 141 | raw[off] = 6; 142 | // Procol addr length 4 143 | let off = EthOffsets::EthProtoSzOff as usize; 144 | raw[off] = 4; 145 | // Arp opcode request 146 | let off = EthOffsets::EthOpcodeOff as usize; 147 | raw[off..off + 2].copy_from_slice(&ARP_OPCODE_REQ.to_be_bytes()); 148 | // src mac 149 | let off = EthOffsets::EthSenderMacOff as usize; 150 | raw[off..off + ETH_ALEN].copy_from_slice(&self.intf.l2_addr[0..ETH_ALEN]); 151 | // src ipv4 addr 152 | let off = EthOffsets::EthSenderIpOff as usize; 153 | raw[off..off + 4].copy_from_slice(&self.intf.ipv4_addr.octets()); 154 | // dst mac 155 | let off = EthOffsets::EthTargetMacOff as usize; 156 | raw[off..off + ETH_ALEN].copy_from_slice(ZERO_MAC); 157 | // dst ipv4 addr 158 | let off = EthOffsets::EthTargetIpOff as usize; 159 | if in_pkt.out_l3addr == ZERO_IP { 160 | // If adjacency has zero nexthop, its a connected adj, use destination IP 161 | // to arp 162 | let (l3, l3len) = in_pkt.get_l3(); 163 | assert!(l3len >= IPHDR_MIN_LEN); 164 | raw[off..off + 4].copy_from_slice(&l3[IPHDR_DADDR_OFF..IPHDR_DADDR_OFF + 4]); 165 | } else { 166 | raw[off..off + 4].copy_from_slice(&in_pkt.out_l3addr.octets()); 167 | } 168 | 169 | let bytes = 2 * ETH_ALEN + 2 + 2 + 2 + 1 + 1 + 2 + ETH_ALEN + 4 + ETH_ALEN + 4; 170 | pkt.move_tail(bytes as isize); 171 | pkt.out_ifindex = self.intf.ifindex; 172 | Some(pkt) 173 | } 174 | 175 | pub fn mac_add(&mut self, ip: Ipv4Addr, mac: EthMacRaw) { 176 | if mac.bytes.len() < ETH_ALEN { 177 | self.cnt.bad_mac.incr(); 178 | return; 179 | } 180 | self.mac.entry(ip).or_insert(mac); 181 | } 182 | 183 | fn add_eth_hdr(&self, pool: &mut dyn PacketPool, pkt: &mut BoxPkt, mac: &EthMacRaw) -> bool { 184 | if !pkt.prepend(pool, Ð_TYPE_IPV4.to_be_bytes()) { 185 | return false; 186 | } 187 | if !pkt.prepend(pool, &self.intf.l2_addr[0..ETH_ALEN]) { 188 | return false; 189 | } 190 | if !pkt.prepend(pool, &mac.bytes) { 191 | return false; 192 | } 193 | pkt.set_l2(ETH_ALEN); 194 | true 195 | } 196 | } 197 | 198 | impl Gclient for EthEncap { 199 | fn clone(&self, counters: &mut Counters, _log: Arc) -> Box> { 200 | let bad_mac = Counter::new(counters, &self.name(), CounterType::Error, "bad_mac"); 201 | Box::new(EthEncap { 202 | intf: self.intf.clone(), 203 | mac: HashMap::new(), 204 | cnt: Cnt { bad_mac }, 205 | }) 206 | } 207 | 208 | fn dispatch(&mut self, _thread: usize, vectors: &mut Dispatch) { 209 | while let Some(mut p) = vectors.pop() { 210 | let mac = self.mac.get(&p.out_l3addr); 211 | if let Some(mac) = mac { 212 | if self.add_eth_hdr(vectors.pool, &mut p, mac) { 213 | vectors.push(Next::TX as usize, p); 214 | } 215 | } else { 216 | let arp = self.do_arp_request(vectors.pool, &p); 217 | if let Some(arp) = arp { 218 | vectors.push(Next::TX as usize, arp); 219 | } 220 | } 221 | } 222 | } 223 | 224 | fn control_msg(&mut self, _thread: usize, message: R2Msg) { 225 | match message { 226 | R2Msg::ModifyInterface(mod_intf) => { 227 | self.intf = mod_intf.intf; 228 | } 229 | R2Msg::EthMacAdd(mac_add) => { 230 | self.mac_add(mac_add.ip, mac_add.mac); 231 | } 232 | _ => panic!("Unknown type"), 233 | } 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /gnodes/layer3/ipv4/fwd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "l3_ipv4_fwd" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | names = { path = "../../../../names" } 9 | graph = { path = "../../../../graph" } 10 | msg = { path = "../../../../msg" } 11 | packet = { path = "../../../../packet" } 12 | counters = { path = "../../../../counters" } 13 | fwd = { path = "../../../../fwd" } 14 | log = { path = "../../../../log" } 15 | -------------------------------------------------------------------------------- /gnodes/layer3/ipv4/fwd/src/lib.rs: -------------------------------------------------------------------------------- 1 | use counters::{flavors::Counter, flavors::CounterType, Counters}; 2 | use fwd::IPHDR_DADDR_OFF; 3 | use fwd::IPHDR_MIN_LEN; 4 | use fwd::{ipv4::IPv4Table, Fwd}; 5 | use graph::Dispatch; 6 | use graph::Gclient; 7 | use log::Logger; 8 | use msg::R2Msg; 9 | use std::net::Ipv4Addr; 10 | use std::sync::Arc; 11 | 12 | #[derive(Copy, Clone)] 13 | enum Next { 14 | Drop = 0, 15 | EncapMux, 16 | } 17 | 18 | const NEXT_NAMES: &[Next] = &[Next::Drop, Next::EncapMux]; 19 | 20 | fn next_name(next: Next) -> String { 21 | match next { 22 | Next::Drop => names::DROP.to_string(), 23 | Next::EncapMux => names::ENCAPMUX.to_string(), 24 | } 25 | } 26 | 27 | struct IPv4Cnt { 28 | no_route: Counter, 29 | invalid_l3: Counter, 30 | } 31 | 32 | // The IPv4 Forwarding node: all it does is a route lookup the destinaton address in a 33 | // tree-bitmap data structure, find the 'adjacency' information that says where the 34 | // packet has to go out and send it to the Encap node for that output interface. 35 | pub struct IPv4Fwd { 36 | table: Arc, 37 | cnt: IPv4Cnt, 38 | } 39 | 40 | impl IPv4Fwd { 41 | pub fn new(table: Arc, counters: &mut Counters) -> IPv4Fwd { 42 | let invalid_l3 = Counter::new( 43 | counters, 44 | names::L3_IPV4_FWD, 45 | CounterType::Error, 46 | "invalid_l3", 47 | ); 48 | let no_route = Counter::new(counters, names::L3_IPV4_FWD, CounterType::Pkts, "no_route"); 49 | IPv4Fwd { 50 | table, 51 | cnt: IPv4Cnt { 52 | no_route, 53 | invalid_l3, 54 | }, 55 | } 56 | } 57 | 58 | pub fn name(&self) -> String { 59 | names::L3_IPV4_FWD.to_string() 60 | } 61 | 62 | pub fn next_names(&self) -> Vec { 63 | let mut v = Vec::new(); 64 | for n in NEXT_NAMES { 65 | assert_eq!(*n as usize, v.len()); 66 | v.push(next_name(*n)); 67 | } 68 | v 69 | } 70 | } 71 | 72 | impl Gclient for IPv4Fwd { 73 | fn clone(&self, counters: &mut Counters, _log: Arc) -> Box> { 74 | let no_route = Counter::new(counters, &self.name(), CounterType::Pkts, "no_route"); 75 | let invalid_l3 = Counter::new(counters, &self.name(), CounterType::Error, "invalid_l3"); 76 | Box::new(IPv4Fwd { 77 | table: self.table.clone(), 78 | cnt: IPv4Cnt { 79 | no_route, 80 | invalid_l3, 81 | }, 82 | }) 83 | } 84 | 85 | fn dispatch(&mut self, _thread: usize, vectors: &mut Dispatch) { 86 | while let Some(mut p) = vectors.pop() { 87 | let (iphdr, hdrlen) = p.get_l3(); 88 | if hdrlen < IPHDR_MIN_LEN { 89 | self.cnt.invalid_l3.incr(); 90 | continue; 91 | } 92 | let daddr = Ipv4Addr::new( 93 | iphdr[IPHDR_DADDR_OFF], 94 | iphdr[IPHDR_DADDR_OFF + 1], 95 | iphdr[IPHDR_DADDR_OFF + 2], 96 | iphdr[IPHDR_DADDR_OFF + 3], 97 | ); 98 | if let Some((_prefix, _mask, leaf)) = self.table.root.longest_match(daddr) { 99 | match &leaf.next { 100 | Fwd::Adjacency(adj) => { 101 | p.out_ifindex = adj.ifindex; 102 | p.out_l3addr = adj.nhop; 103 | if p.out_l3addr == fwd::ZERO_IP { 104 | // destination is in connected subnet 105 | p.out_l3addr = daddr; 106 | } 107 | vectors.push(Next::EncapMux as usize, p); 108 | } 109 | _ => { 110 | let _ = self.cnt.no_route.incr(); 111 | } 112 | } 113 | } else { 114 | self.cnt.no_route.incr(); 115 | } 116 | } 117 | } 118 | 119 | fn control_msg(&mut self, _thread: usize, message: R2Msg) { 120 | match message { 121 | R2Msg::IPv4TableAdd(table) => { 122 | self.table = table.table; 123 | } 124 | _ => panic!("Unknown type"), 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /gnodes/layer3/ipv4/parse/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "l3_ipv4_parse" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../../../../common" } 9 | names = { path = "../../../../names" } 10 | graph = { path = "../../../../graph" } 11 | packet = { path = "../../../../packet" } 12 | counters = { path = "../../../../counters" } 13 | fwd = { path = "../../../../fwd" } 14 | log = { path = "../../../../log" } 15 | -------------------------------------------------------------------------------- /gnodes/layer3/ipv4/parse/src/lib.rs: -------------------------------------------------------------------------------- 1 | use counters::flavors::{Counter, CounterType}; 2 | use counters::Counters; 3 | use fwd::IPHDR_MIN_LEN; 4 | use graph::Dispatch; 5 | use graph::Gclient; 6 | use log::Logger; 7 | use std::sync::Arc; 8 | 9 | #[derive(Copy, Clone)] 10 | enum Next { 11 | Drop = 0, 12 | L3Ipv4Fwd, 13 | } 14 | 15 | const NEXT_NAMES: &[Next] = &[Next::Drop, Next::L3Ipv4Fwd]; 16 | 17 | fn next_name(next: Next) -> String { 18 | match next { 19 | Next::Drop => names::DROP.to_string(), 20 | Next::L3Ipv4Fwd => names::L3_IPV4_FWD.to_string(), 21 | } 22 | } 23 | 24 | // The parse node is assumed to get a layer3 packet as input, and its role is to redirect 25 | // the packet to the appropriate layer3 feature node (like v4, v6 or gre or mpls etc..). 26 | // All it handles today is ipv4 27 | pub struct IPv4Parse { 28 | bad_pkt: Counter, 29 | } 30 | 31 | impl IPv4Parse { 32 | pub fn new(counters: &mut Counters) -> IPv4Parse { 33 | let bad_pkt = Counter::new( 34 | counters, 35 | names::L3_IPV4_PARSE, 36 | CounterType::Error, 37 | "bad_pkt", 38 | ); 39 | IPv4Parse { bad_pkt } 40 | } 41 | 42 | pub fn name(&self) -> String { 43 | names::L3_IPV4_PARSE.to_string() 44 | } 45 | 46 | pub fn next_names(&self) -> Vec { 47 | let mut v = Vec::new(); 48 | for n in NEXT_NAMES { 49 | assert_eq!(*n as usize, v.len()); 50 | v.push(next_name(*n)); 51 | } 52 | v 53 | } 54 | } 55 | 56 | impl Gclient for IPv4Parse { 57 | fn clone(&self, counters: &mut Counters, _log: Arc) -> Box> { 58 | let bad_pkt = Counter::new(counters, &self.name(), CounterType::Error, "bad_pkt"); 59 | Box::new(IPv4Parse { bad_pkt }) 60 | } 61 | 62 | fn dispatch(&mut self, _thread: usize, vectors: &mut Dispatch) { 63 | while let Some(mut p) = vectors.pop() { 64 | if p.set_l3(IPHDR_MIN_LEN) { 65 | vectors.push(Next::L3Ipv4Fwd as usize, p); 66 | } else { 67 | self.bad_pkt.incr(); 68 | } 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /graph/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "graph" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | crossbeam-queue = "0.2.1" 9 | names = { path = "../names" } 10 | packet = { path = "../packet" } 11 | counters = { path = "../counters" } 12 | log = { path = "../log" } 13 | perf = { path = "../perf" } 14 | 15 | -------------------------------------------------------------------------------- /graph/graph.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Graph" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Graph 10 | 11 | This is the central piece of R2. The Graph structure consists of the following main items 12 | 13 | 1. Vector of Nodes 14 | 2. Vector of packets queues - queues waiting to be processed by each node (dispatch vectors) 15 | 3. A dictionary of indices for each node - an index into the vector in 1) above 16 | 17 | The graph is basically a collection of nodes and each node specifies its list of next-nodes. As of today, there is only nodes that can get added to the graph, nothing gets deleted from it (we can extend it if need be, maybe just mark a node as deleted etc..). And since nodes are added to a vector in 1) above, the index corresponding to the node is simply an offset into that vector. And the list of next-nodes of each node is thus simply a list of indices. 18 | 19 | ## Gnode 20 | 21 | The node structure has a Client object with a set of APIs corresponding which each client object should provide. One of the APIs is dispatch(), which is the API that gives clients the packets waiting to be processed. The other API is clone(), which is used to make copies of the graph (which involves copying each node). And the last API is control_msg() - if the control plane wants to send a message to the nodes (like add a route), then the client gets this callback. 22 | 23 | When the client asks for a node to be created and inserted into the graph, it provides the client object as a parameter. It also provides a list of names of the next-nodes. Once all the nodes are inserted to the graph, the graph creator calls the finalize() API on the graph which will basically update each node with a next-node-name to next-node-index translation. 24 | 25 | The graph run() API walks through every single node, and calls the dispatch() API on the client. The dispatch is called with a Dispatch structure, which basically contains the dispatch vector of every node in the graph - the client will take packets as inputs from its own dispatch vector and queue them to dispatch vectors of the other nodes. The Dispatch structure also provides the list of node-ids of the next-nodes of that particular node - remember we had mentioned earlier how each node gives the names of its next-nodes as a list and how we convert it to node-ids in finalize(), after the graph is all ready. 26 | 27 | The node client is any structure that provides the APIs mentioned earlier. And the general structure of a node is as below 28 | 29 | 1. Client will have its own name - a "well known" name listed in the names module 30 | 2. Client will provide a list of names of next-nodes. Usually its done by having an enum Next which gives names to next-node array indices and then a NEXT_NAMES array which gives names of the next-nodes. There is no requirement to do it that way, although its nice to have a uniform way of doing things 31 | 3. Client will use the pop() method of the Dispatch object passed in to get its own input packets one by one, process the packet and send them to another node using push() method of the Dispatch object 32 | 4. Client does not know about the "actual" node indices etc.., the client always refers to its own local next-node array to refer to the node it wants to push the packet to, the Dispatch object will figure out how to convert that to the actual graph index 33 | -------------------------------------------------------------------------------- /log/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "log" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | libc = { version = "0.2.171", features = [ "extra_traits" ] } 10 | common = { path = "../common" } 11 | shm = { path = "../unix/shm" } 12 | 13 | 14 | -------------------------------------------------------------------------------- /log/log.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "LOG" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Forwarding plane logger 10 | 11 | So the first thing to be clarified upfront is that this is NOT a general purpose syslog kind of logger. This is purely meant for forwarding path to log per-packet information, and not expected to be on by default, usually used for a limited time to collect data to debug some issue. So obviously the logger has to be a simple fast module. These are the properties of the logger 12 | 13 | 1. The logger is per thread - there is no locks or atomic operatons etc.. when a thread logs to its logger. Its just a straightforward write to memory. So obviously an external utility that displays the logs will have to "merge" the logs from each thread according to timestamps before its displayed to the user 14 | 15 | 2. The logger memory is in shared memory. So an external utility can dump the logger without disturbing the R2 process, although thats not how its done today. Today the external utility makes an API call to R2 to dump the logs, the API callback sets a flag asking forwarding paths to stop using their loggers, and waits till it knows the forwarding paths have stopped using the logger, and then the API callback handler dumps the logs as serialized json - note that it doesnt merge the logs, thats upto the external utility. We can (and I think we should) offload R2 from serializing logs, R2 should just make the forwarding paths stop using the logger and then the external utility can/should do everything else. But this will need the data in Logger::hash to also be available to the external utility, to be able to interpret the log entries. 16 | 17 | 3. The shared memory area for logs is divided into fixed size chunks of memory - so its basically a circular list of fixed size objects. And hence obviously each log entry has a fixed size and hence theres a max limit to what can be dumped in each log entry - all these choices are to keep the logger simple and fast. 18 | 19 | ## The logger macro 20 | 21 | Modules dump a log entry using a log! macro. The macro takes variable number of parameters and uses a log_helper! macro to recursively walk through each parameter and copy it to the log entry. The macro just determines the size of each parameter, treats it as an array of bytes and copies those bytes into a log entry. So logging an entry is a bunch of mem copies. 22 | 23 | As described in the architecture section, The logger is shared between the API handler (control) thread and the forwarding plane thread, so that the API handler can dump the logs. Because of this sharing, the logger automatically becomes read-only in Rust. And obviously we want to modify the logger in the forwarding plane - for example to get a new log entry, ie advance the log entry head/tail etc.. So for that purpose we use the "interior mutability" concept in Rust - we basically use the indices as Atomic numbers. But that does not really introduce an atomic operation because we use the Relaxed memory mode - remember this atomic is just to get around Rust making the shared logger read-only. And the relaxed mode in Intel CPUs just translates to a regular memory read/write, no atomics. 24 | 25 | ## Serialization 26 | 27 | To dump logger entries as json, I initially considered the serdes module in Rust. But as described in the architecture, we want to do everything possible to keep R2 small and its dependencies minimal. The serdes module is a rather large module and R2 depending on it to just dump a simple json structure did not make sense - so the logger module just dumps the entry as hand-coded json. Its ugly, but it does avoid huge unnecessary dependency, and it should be fine as long as the data being dumped remains flat and straight forward (which will be the case for a log entry) 28 | 29 | -------------------------------------------------------------------------------- /log/src/test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn log_basic() { 5 | let logger = Logger::new("r2_log_test_basic", 32, 4).unwrap(); 6 | log!(logger, "Foo bar", 1 as u8, 2 as u16, 3 as u32, 4 as u64, 5 as u8, 6 as u32, 7 as u8); 7 | 8 | let mut base = logger.base; 9 | unsafe { 10 | assert_eq!(*(base as *const u32), 1); // index 11 | base += 4; 12 | assert!(*(base as *const u64) != 0); // rdtsc 13 | base += 8; 14 | assert_eq!(*(base as *const u8), 1); 15 | base += 1; 16 | assert_eq!(*(base as *const u16), 2); 17 | base += 2; 18 | assert_eq!(*(base as *const u32), 3); 19 | base += 4; 20 | assert_eq!(*(base as *const u64), 4); 21 | base += 8; 22 | assert_eq!(*(base as *const u8), 5); 23 | base += 1; 24 | assert_eq!(*(base as *const u32), 6); 25 | base += 4; 26 | assert_eq!(*(base as *const u8), 0); // 7 did not get written 27 | } 28 | 29 | log!( 30 | logger, 31 | "Foo bar again", 32 | 1 as u8, 33 | 2 as u16, 34 | 3 as u32, 35 | 4 as u64, 36 | 5 as u8, 37 | 6 as u32, 38 | 7 as u8 39 | ); 40 | unsafe { 41 | assert_eq!(*(base as *const u32), 2); // index 42 | base += 4; 43 | assert!(*(base as *const u64) != 0); // rdtsc 44 | base += 8; 45 | assert_eq!(*(base as *const u8), 1); 46 | base += 1; 47 | assert_eq!(*(base as *const u16), 2); 48 | base += 2; 49 | assert_eq!(*(base as *const u32), 3); 50 | base += 4; 51 | assert_eq!(*(base as *const u64), 4); 52 | base += 8; 53 | assert_eq!(*(base as *const u8), 5); 54 | base += 1; 55 | assert_eq!(*(base as *const u32), 6); 56 | base += 4; 57 | assert_eq!(*(base as *const u8), 0); // 7 did not get written 58 | } 59 | log!( 60 | logger, 61 | "Foo bar yet again", 62 | 1 as u8, 63 | 2 as u16, 64 | 3 as u32, 65 | 4 as u64, 66 | 5 as u8, 67 | 6 as u32, 68 | 7 as u8 69 | ); 70 | log!( 71 | logger, 72 | "Foo bar, cant tolerate it %d %f !!", 73 | 1 as u8, 74 | 2 as u16, 75 | 3 as u32, 76 | 4 as u64, 77 | 5 as u8, 78 | 6 as u32, 79 | 7 as u8 80 | ); 81 | 82 | log!( 83 | logger, 84 | "Test wrap around", 85 | 7 as u8, 86 | 6 as u16, 87 | 5 as u32, 88 | 4 as u64, 89 | 3 as u8, 90 | 2 as u32, 91 | 1 as u8 92 | ); 93 | // wrap around to the first entry 94 | let mut base = logger.base; 95 | unsafe { 96 | assert_eq!(*(base as *const u32), 5); // index 97 | base += 4; 98 | assert!(*(base as *const u64) != 0); // rdtsc 99 | base += 8; 100 | assert_eq!(*(base as *const u8), 7); 101 | base += 1; 102 | assert_eq!(*(base as *const u16), 6); 103 | base += 2; 104 | assert_eq!(*(base as *const u32), 5); 105 | base += 4; 106 | assert_eq!(*(base as *const u64), 4); 107 | base += 8; 108 | assert_eq!(*(base as *const u8), 3); 109 | base += 1; 110 | assert_eq!(*(base as *const u32), 2); 111 | base += 4; 112 | assert_eq!(*(base as *const u32), 2); // This is the index number 2 (second entry) 113 | } 114 | 115 | let file = File::create("/tmp/r2_logs.json").unwrap(); 116 | logger.serialize(file).unwrap(); 117 | } 118 | -------------------------------------------------------------------------------- /main/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "r2" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | crossbeam-queue = "0.2.1" 10 | common = { path = "../common" } 11 | packet = { path = "../packet" } 12 | graph = { path = "../graph" } 13 | names = { path = "../names" } 14 | api = { path = "../api" } 15 | sched = { path = "../sched" } 16 | socket = { path = "../unix/socket" } 17 | dpdk = { path = "../dpdk" } 18 | epoll = { path = "../unix/epoll" } 19 | efd = { path = "../unix/efd" } 20 | counters = { path = "../counters" } 21 | fwd = { path = "../fwd" } 22 | log = { path = "../log" } 23 | perf = { path = "../perf" } 24 | msg = { path = "../msg" } 25 | l2_eth_encap = { path = "../gnodes/layer2/eth/encap" } 26 | l2_eth_decap = { path = "../gnodes/layer2/eth/decap" } 27 | l3_ipv4_parse = { path = "../gnodes/layer3/ipv4/parse" } 28 | l3_ipv4_fwd = { path = "../gnodes/layer3/ipv4/fwd" } 29 | interface = { path = "../gnodes/interface" } 30 | apis_interface = { path = "../apis/interface" } 31 | apis_log = { path = "../apis/log" } 32 | apis_route = { path = "../apis/route" } 33 | rust-ini = "0.15.3" 34 | clap = { version = "2.33.0"} 35 | -------------------------------------------------------------------------------- /main/main.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Main" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Main 10 | 11 | The code in main/ is logically organized into files based on their functionality 12 | 13 | main.rs: deals with creating and initializing fundamental structures, like packet pools, counters, the graph itself. 14 | 15 | ifd.rs: deals with interface management - adding new interfaces, adding/modifying ip addresses etc.. 16 | 17 | ipv4.rs: Deals with ipv4 routing, adding/deleting routes etc.. 18 | 19 | log.rs: Dealing with log display etc.. 20 | 21 | msgs.rs: Deals with forwarding<-->control plane messaging 22 | 23 | pkts.rs: Deals with packet pools, today this is empty, we just use the default in-heap pool provided by packet/ library. 24 | 25 | ## main.rs 26 | 27 | The struct R2 holds the context of the entire program - it has data in it that used by the control threads, and some data thats shared across control and forwarding threads. It also has data that is unique to individual forwarding threads (logger being an example). main creates all these contexts like counters, packet pools and message channel to exchange messages between control and forwarding planes. The broadcast() method in R2 broadcasts the message to all the forwarding threads. broadcast() expects each message to implement a clone() - it works by sending a copy of the message to each thread. 28 | 29 | Main proceeds to create all the graph nodes other than the interface related ones (EtherEncap, EtherDecap etc..) in create_nodes(). The interface related nodes as explained in the architecture, are "pluggable" - an external entity has to message R2 to add an interface. Main also creates an epoller - whenever the interface nodes are created, if the interfaces work using sockets, we need to wait till packets arrive on the socket. We register the socket file descriptor with the epoller. 30 | 31 | The register_apis() ends up registering the callbacks of all the modules that have APIs that can be invoked from an external entity. And finally create_thread() launches as many forwarding threads as required, cloning the graph for each thread. The forwarding threads are waiting on the epoller for an event, and as soon as there is event on any of the fds, it goes ahead and runs through the entire graph by calling graph.run(). Note that the epoller model will not be needed / it will need changes when we introduce polling based forwarding models like dpdk. 32 | 33 | Main also launches an API handler thread which will listen to external API requests. The API handler threads all share a reference counted struct R2. The API handler threads get invoked when there is an external API call, in which case it takes actions like adding a route and doing a broadcast() of a message to all forwarding threads. And finally the main thread itself gets into a "wait for messages from forwarding thread" mode - like we explaiend earlier, the forwarding thread might want to send messages to control thread or to other forwarding threads. So this wait loop handles those messages. 34 | 35 | ## ifd.rs 36 | 37 | The interface handling code in this file also includes API callbacks for adding interfaces, modifying interface parameters like ip addresses and interface QoS queues. Each interface in the system has an index called "ifindex" - pretty standard concept in any linux/unix. Note that the interfaces and ifindices etc.. have nothing whatsoever to do with linux interfaces and ifindices. R2 does not really care about linux interfaces or linux forwarding - R2 has its own internal ifindices different from linux and its own forwarding seperate from linux. 38 | 39 | When an external entity calls the API to add an interface, we end up calling create_interface_node() which basically creates a graph node and sends the graph node as a broadcast() message to all the forwarding threads. As we mentioned earlier, the broadcast() will clone() the message - and the graph nodes are designed to have clone() APIs, so it works well. And each forwarding thread on receiving the message adds the interface node to the graph and calls graph.finalize() to update the other nodes with indices of the newly added node. 40 | 41 | Similarly the handle_add_ip() handles the changes in interface parameters like ip address (and later other parameters like mtu or bandwidth etc.. can be added on). The parameters of the interface are used by the forwarding threads. Like we discussed in the architecture section, the goal here is to copy the parameters to a new interface structure and send the new structure as a message to the forwarding threads - and the forwarding threads will swap out their interface with the new one, in one simple light weight step. So the existing interface is cloned(), and the new parameters are set and we call broadcast() to send a message to all forwarding threads. Similar stuff happens when we call handle_add_class() to modify the QoS parameters of the interface. 42 | 43 | ## ipv4.rs 44 | 45 | The API callback in this file gets invoked when there is a route add/del triggered externally. Like we had explained in the architecture section, ipv4 route table is organized as an active/backup copy. Again, the routes themselves are shared, only the table (the tree, tree nodes etc..) are duplicated. The add_or_del_route() API first modifies the current backup table (WhichTable defines which one is primary and backup), then does a broadcast() message to all forwarding threads to switch to the backup table, and then it waits till all the reference counts on the active table drops and the reference count becomes 1 - the Arc::get_mut() will succeed only if the reference count drops to 1. At that point the old active (right now backup) is also modified and that completes the sequence. 46 | 47 | Note that the control thread waiting for forwarding threads to drop reference should not take long - the forwarding threads are in the business of packet forwarding, so they are not going to have a sleep-for-ten-minutes kind of things you expect from control threads. Even so, adding a control thread yield while spin looping for ref count drop to 1 might be a good TODO item. 48 | 49 | ## log.rs 50 | 51 | Here we handle API callbacks to dump the log from each forwarding thread. Details are in logger.md. Also note that we dump the log files, but dont merge them, we expect an external utility to do that. Also as explained in logger.md, it might be a good idea to just stop the loggers in this API handler and let the external utility do the dumping also. 52 | -------------------------------------------------------------------------------- /main/src/logs.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use apis_log::{LogErr, LogSyncHandler}; 3 | use std::fs::File; 4 | 5 | pub struct LogApis { 6 | r2: Arc>, 7 | } 8 | 9 | impl LogApis { 10 | pub fn new(r2: Arc>) -> LogApis { 11 | LogApis { r2 } 12 | } 13 | } 14 | 15 | impl LogSyncHandler for LogApis { 16 | fn handle_show(&self, filename: String) -> thrift::Result<()> { 17 | let r2 = self.r2.lock().unwrap(); 18 | for t in r2.threads.iter() { 19 | let name = format!("{}:{}", filename, t.thread); 20 | let file = match File::create(&name) { 21 | Err(why) => { 22 | return Err(From::from(LogErr::new(format!( 23 | "couldn't create {}: {}", 24 | filename, 25 | why 26 | )))); 27 | } 28 | Ok(file) => file, 29 | }; 30 | if let Err(why) = t.logger.serialize(file) { 31 | return Err(From::from(LogErr::new(format!( 32 | "couldn't write log {}: {}", 33 | name, 34 | why 35 | )))); 36 | } 37 | } 38 | Ok(()) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /main/src/msgs.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use names::l2_eth_decap; 3 | use names::l2_eth_encap; 4 | 5 | pub fn ctrl2fwd_messages( 6 | thread: usize, 7 | epoll: &mut Epoll, 8 | receiver: &Receiver, 9 | g: &mut Graph, 10 | ) { 11 | while let Ok(msg) = receiver.try_recv() { 12 | match msg { 13 | R2Msg::GnodeAdd(gnode_add) => { 14 | g.add(gnode_add.node, gnode_add.init); 15 | g.finalize(); 16 | } 17 | R2Msg::EpollAdd(epoll_add) => { 18 | if epoll_add.thread == thread { 19 | if let Some(fd) = epoll_add.fd { 20 | epoll.add(fd, EPOLLIN); 21 | } 22 | } 23 | } 24 | R2Msg::IPv4TableAdd(_) => { 25 | g.control_msg(names::L3_IPV4_FWD, msg); 26 | } 27 | R2Msg::ModifyInterface(mod_intf) => { 28 | g.control_msg( 29 | &l2_eth_decap(mod_intf.intf.ifindex), 30 | R2Msg::ModifyInterface(mod_intf.clone()), 31 | ); 32 | g.control_msg( 33 | &l2_eth_encap(mod_intf.intf.ifindex), 34 | R2Msg::ModifyInterface(mod_intf.clone()), 35 | ); 36 | g.control_msg( 37 | &rx_tx(mod_intf.intf.ifindex), 38 | R2Msg::ModifyInterface(mod_intf), 39 | ); 40 | } 41 | R2Msg::EthMacAdd(mac_add) => { 42 | g.control_msg( 43 | &l2_eth_decap(mac_add.ifindex), 44 | R2Msg::EthMacAdd(mac_add.clone()), 45 | ); 46 | g.control_msg(&l2_eth_encap(mac_add.ifindex), R2Msg::EthMacAdd(mac_add)); 47 | } 48 | R2Msg::ClassAdd(class) => { 49 | g.control_msg(&rx_tx(class.ifindex), R2Msg::ClassAdd(class)); 50 | } 51 | } 52 | } 53 | } 54 | 55 | pub fn fwd2ctrl_messages(r2: Arc>, receiver: Receiver) { 56 | while let Ok(msg) = receiver.recv() { 57 | match msg { 58 | R2Msg::EthMacAdd(mac_add) => { 59 | let mut r2 = r2.lock().unwrap(); 60 | r2.broadcast(R2Msg::EthMacAdd(mac_add)); 61 | } 62 | _ => panic!("Unexpected message"), 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /msg/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "msg" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | treebitmap = "0.4.0" 9 | common = { path = "../common" } 10 | fwd = { path = "../fwd" } 11 | graph = { path = "../graph" } 12 | counters = { path = "../counters" } 13 | log = { path = "../log" } -------------------------------------------------------------------------------- /msg/msg.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Messages" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Control <--> Forwarding plane messaging 10 | 11 | This module declares all the data types used to exchange messages between control<-->Forwarding plane 12 | -------------------------------------------------------------------------------- /msg/src/lib.rs: -------------------------------------------------------------------------------- 1 | use counters::Counters; 2 | use fwd::intf::ModifyInterfaceMsg; 3 | use fwd::ipv4::IPv4TableMsg; 4 | use fwd::EthMacAddMsg; 5 | use graph::{Gclient, GnodeInit}; 6 | use log::Logger; 7 | use std::sync::Arc; 8 | 9 | pub enum R2Msg { 10 | GnodeAdd(GnodeAddMsg), 11 | EpollAdd(EpollAddMsg), 12 | IPv4TableAdd(IPv4TableMsg), 13 | ModifyInterface(ModifyInterfaceMsg), 14 | EthMacAdd(EthMacAddMsg), 15 | ClassAdd(ClassAddMsg), 16 | } 17 | 18 | impl R2Msg { 19 | pub fn clone(&self, counters: &mut Counters, logger: Arc) -> Self { 20 | match self { 21 | R2Msg::GnodeAdd(gnode_add) => R2Msg::GnodeAdd(gnode_add.clone(counters, logger)), 22 | R2Msg::EpollAdd(epoll_add) => R2Msg::EpollAdd(epoll_add.clone()), 23 | R2Msg::IPv4TableAdd(table_add) => R2Msg::IPv4TableAdd(table_add.clone()), 24 | R2Msg::ModifyInterface(mod_intf) => R2Msg::ModifyInterface(mod_intf.clone()), 25 | R2Msg::EthMacAdd(mac_add) => R2Msg::EthMacAdd(mac_add.clone()), 26 | R2Msg::ClassAdd(class) => R2Msg::ClassAdd(class.clone()), 27 | } 28 | } 29 | } 30 | 31 | pub struct GnodeAddMsg { 32 | pub node: Box>, 33 | pub init: GnodeInit, 34 | } 35 | 36 | impl GnodeAddMsg { 37 | pub fn clone(&self, counters: &mut Counters, logger: Arc) -> Self { 38 | GnodeAddMsg { 39 | node: self.node.clone(counters, logger), 40 | init: self.init.clone(counters), 41 | } 42 | } 43 | } 44 | 45 | pub struct EpollAddMsg { 46 | pub fd: Option, 47 | pub thread: usize, 48 | } 49 | 50 | impl Clone for EpollAddMsg { 51 | fn clone(&self) -> EpollAddMsg { 52 | EpollAddMsg { 53 | fd: self.fd, 54 | thread: self.thread, 55 | } 56 | } 57 | } 58 | 59 | #[derive(Copy, Clone, Default)] 60 | pub struct Sc { 61 | pub m1: u64, 62 | pub d: usize, 63 | pub m2: u64, 64 | } 65 | 66 | #[derive(Copy, Clone, Default)] 67 | pub struct Curves { 68 | pub r_sc: Option, 69 | pub u_sc: Option, 70 | pub f_sc: Sc, 71 | } 72 | 73 | pub struct ClassAddMsg { 74 | pub ifindex: usize, 75 | pub name: String, 76 | pub parent: String, 77 | pub qlimit: usize, 78 | pub is_leaf: bool, 79 | pub curves: Curves, 80 | } 81 | 82 | impl Clone for ClassAddMsg { 83 | fn clone(&self) -> ClassAddMsg { 84 | ClassAddMsg { 85 | ifindex: self.ifindex, 86 | name: self.name.clone(), 87 | parent: self.parent.clone(), 88 | qlimit: self.qlimit, 89 | is_leaf: self.is_leaf, 90 | curves: self.curves, 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /names/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "names" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | 9 | -------------------------------------------------------------------------------- /names/names.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Node names" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Node names 10 | 11 | This module defines the well known names of all the nodes in the graph, and also provides APIs to create the node names if it involves numbers like ifindex etc.. DO NOT construct names yourself, use the APIs here and add APIs here for new node names. 12 | -------------------------------------------------------------------------------- /names/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub const DROP: &str = "drop"; 2 | pub const IFMUX: &str = "ifmux"; 3 | pub const ENCAPMUX: &str = "encapmux"; 4 | const RX_TX: &str = "rx_tx:"; 5 | pub const L2_ETH_DECAP: &str = "l2_eth_decap:"; 6 | pub const L2_ETH_ENCAP: &str = "l2_eth_encap:"; 7 | pub const L3_IPV4_PARSE: &str = "l3_ipv4_parse"; 8 | pub const L3_IPV4_FWD: &str = "l3_ipv4_fwd"; 9 | 10 | pub fn rx_tx(ifindex: usize) -> String { 11 | let mut name = RX_TX.to_string(); 12 | name.push_str(&ifindex.to_string()); 13 | name 14 | } 15 | 16 | pub fn l2_eth_decap(ifindex: usize) -> String { 17 | let mut name = L2_ETH_DECAP.to_string(); 18 | name.push_str(&ifindex.to_string()); 19 | name 20 | } 21 | 22 | pub fn l2_eth_encap(ifindex: usize) -> String { 23 | let mut name = L2_ETH_ENCAP.to_string(); 24 | name.push_str(&ifindex.to_string()); 25 | name 26 | } 27 | -------------------------------------------------------------------------------- /packet/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "packet" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | fwd = { path = "../fwd" } 9 | counters = { path = "../counters" } 10 | crossbeam-queue = "0.2.1" 11 | -------------------------------------------------------------------------------- /packet/packet.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "packet" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Packet and Particle 10 | 11 | The packet object consists of a struct Packet and a chain of struct Particle objects. There Particle objects hold the actual data - it corresponds to an mbuf / skb etc.. in the bsd / linux worlds. The packet holds just meta data like total length of the packet, the l2 and l3 header start in the data etc.. The particle object also holds minimal data like the head and tail of the actual data buffer etc.. The question about what is a good particle size etc.. are offloaded to the user of the library. Typically these days the popular particle size of choice is 2048. Which means that the standard ethernet mtu frames (1500) will fit in one particle, and a jumbo ethernet (9000) frame will need four particles chained together. As we can see, the Particle structure chains a set of particles together - and also all the Packet get_data() kind of APIs lets users get into various offsets into the packet with chaining hidden from them. Rest of the packet library is standard networking operations on a packet - push and pull data to/from the front of the packet, append/remove data to/from the tail of the packet, get offsets to data inside the packet, store the layer2 and layer3 data offsets etc. 12 | 13 | ## BoxPkt, BoxPart and raw data 14 | 15 | The memory address that the actual packet data resides very often comes from 'special' memory areas like the HighMem in case of a DPDK packet pool. The Packet and Particle structures itself usually can come from anywhere - stack / heap wherever. Although coming from stack wont usually work if the Packet/Particle has to cross thread boundaries. Its not as often as having the raw data come from special memory, but many times the system designer would want the Packet/Particle structure itself to come from memory areas of their choice - for example if the packet has to cross process boundaries. So the BoxPkt and BoxPart just gives the system designer an option to have the Packet and Particle come from memory of their choice. So a BoxPkt and BoxPart is similar to a Box and Box - except that the Box version always comes from the heap whereas BoxPkt and BoxPart can come from any memory area of choice. So all the clients / applications / graph nodes will deal with the BoxPkt and not the Packet directly. The deref and derefmut traits allow the BoxPkt to be accessed as its just a Packet. 16 | 17 | ## Pools 18 | 19 | The packet, particle and particle data (particle.raw) are each assumed to come from some pre-allocated pool, and hence the BoxPkt and BoxPart structures which are basically storing an address to Packet and Particle respectively. The pool itself can be implemented by the user of this library in whichever way the user wants - the packet, particle and the raw data can come from the memory of choice of the pool designer, the PacketPool trait simply defines APIs to get and free packets/particles. How exactly its done is upto the designer. Also, the pools are expected to be per thread (which usually maps to per core).Pools adhere to the philosophy we follow in all places in R2 - that we 'create stuff' in one place (main / a control thread) and 'send' it to the forwarding threads. So the pool trait itself needs is marked as Send capable. Since pools are per thread, that means the packet buffers are also per thread - so the pool itself does not need to be lock free etc.. But as explained in the architecture section, an interface/driver pinned to one thread can of course send packets out of an interface pinned in another thread. So then how does the packet get returned back to the pool in the original thread ? Each thread has a queue to which other threads can return packets to. The Packet structure itself has information about the queue stored in it, when the packet goes out of scope, the drop() imlpementation for the packet enqueues the packet back to the queue. And each graph run() will take packets out of the queue and give it back to the pool (its an area to investigate where before trying to allocate a packet from the pool, we can try picking one out of the queue first). Today the queue is a bounded MPSC crossbeam ArrayQueue - there are suggestions from people to try and switch it out to an implementation of something like an LMAX disruptor queue. 20 | 21 | ### Default Heap Pool 22 | 23 | And a very usable simple example of pool is also provided in the PktsHeap structure where packets, particles and raw data all comes from the heap. And they are all stored in a simple VecDequeue. 24 | -------------------------------------------------------------------------------- /packet/src/test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | const NUM_PKTS: usize = 10; 4 | const NUM_PART: usize = 20; 5 | const PARTICLE_SZ: usize = 512; 6 | 7 | fn packet_pool(test: &str) -> Box { 8 | let q = Arc::new(ArrayQueue::new(NUM_PKTS)); 9 | let mut counters = Counters::new(test).unwrap(); 10 | Box::new(PktsHeap::new( 11 | "PKTS_HEAP", 12 | q, 13 | &mut counters, 14 | NUM_PKTS, 15 | NUM_PART, 16 | PARTICLE_SZ, 17 | )) 18 | } 19 | 20 | fn nparticles(pkt: &BoxPkt) -> usize { 21 | let mut cnt = 1; 22 | let mut p = pkt.particle.as_ref().unwrap(); 23 | while let Some(next) = p.next.as_ref() { 24 | cnt += 1; 25 | p = next; 26 | } 27 | cnt 28 | } 29 | 30 | fn verify_pkt(pkt: &mut BoxPkt) { 31 | for i in 0..pkt.len() { 32 | let (d, _) = match pkt.data(i) { 33 | Some((d, s)) => (d, s), 34 | None => panic!("Cannot find offset"), 35 | }; 36 | assert_eq!(d[0], (i % 256) as u8) 37 | } 38 | } 39 | 40 | fn push_pkt(pool: &mut dyn PacketPool, headroom: usize, v: &Vec, sz: usize, npart: usize) { 41 | let mut pkt = pool.pkt(headroom).unwrap(); 42 | let mut i = 0; 43 | // Push one byte at a time 44 | while i < v.len() { 45 | if (i + sz) >= v.len() { 46 | assert!(pkt.append(pool, &v[i..])); 47 | break; 48 | } else { 49 | assert!(pkt.append(pool, &v[i..i + sz])); 50 | } 51 | i += sz; 52 | } 53 | assert_eq!(pkt.len(), v.len()); 54 | assert_eq!(nparticles(&pkt), npart); 55 | verify_pkt(&mut pkt); 56 | } 57 | 58 | fn append_w_headroom(pool: &mut dyn PacketPool, headroom: usize, npart: usize) { 59 | let need = npart * PARTICLE_SZ - headroom; 60 | let v: Vec = (0..need).map(|x| (x % 256) as u8).collect(); 61 | 62 | // push one byte at a time 63 | push_pkt(pool, headroom, &v, 1, npart); 64 | 65 | // push two bytes at a time 66 | push_pkt(pool, headroom, &v, 2, npart); 67 | 68 | // push three bytes at a time 69 | push_pkt(pool, headroom, &v, 3, npart); 70 | 71 | // push hundred bytes at a time 72 | push_pkt(pool, headroom, &v, 100, npart); 73 | 74 | // push PARTICLE_SZ+1 bytes at a time 75 | push_pkt(pool, headroom, &v, PARTICLE_SZ + 1, npart); 76 | } 77 | 78 | #[test] 79 | fn append_test() { 80 | let mut pool = packet_pool("append_test"); 81 | append_w_headroom(&mut *pool, 100, 3); 82 | } 83 | 84 | #[test] 85 | fn prepend_test() { 86 | let mut pool = packet_pool("prepend_test"); 87 | let mut pkt = pool.pkt(100).unwrap(); 88 | assert_eq!(pkt.headroom(), 100); 89 | let v: Vec = (0..100).map(|x| (x % 256) as u8).collect(); 90 | assert!(pkt.prepend(&mut *pool, &v[0..])); 91 | assert_eq!(pkt.len(), 100); 92 | assert_eq!(pkt.headroom(), 0); 93 | assert_eq!(nparticles(&pkt), 1); 94 | verify_pkt(&mut pkt); 95 | 96 | let mut pkt = pool.pkt(100).unwrap(); 97 | assert_eq!(pkt.headroom(), 100); 98 | let v: Vec = (0..200).map(|x| (x % 256) as u8).collect(); 99 | assert!(pkt.prepend(&mut *pool, &v[0..])); 100 | assert_eq!(pkt.len(), 200); 101 | assert_eq!(pkt.headroom(), PARTICLE_SZ - 100); // 100 in the first particle, 100 in next 102 | assert_eq!(nparticles(&pkt), 2); 103 | verify_pkt(&mut pkt); 104 | } 105 | 106 | fn check_last_part(pkt: &mut BoxPkt, tail: usize) { 107 | let p = pkt.particle.as_mut().unwrap().last_particle(); 108 | assert_eq!(p.tail, tail); 109 | } 110 | 111 | #[test] 112 | fn move_tail_test() { 113 | let mut pool = packet_pool("move_tail_test"); 114 | // One particle test 115 | let headroom = 100; 116 | let available = PARTICLE_SZ - headroom; 117 | let mut pkt = pool.pkt(headroom).unwrap(); 118 | let bytes = vec![0 as u8; available - 10]; 119 | assert!(pkt.append(&mut *pool, &bytes[0..])); 120 | assert_eq!(pkt.len(), available - 10); 121 | check_last_part(&mut pkt, headroom + available - 10); 122 | assert_eq!(pkt.move_tail(10), 10); 123 | assert_eq!(pkt.len(), available); 124 | check_last_part(&mut pkt, headroom + available); 125 | // Cant go forward any further 126 | assert_eq!(pkt.move_tail(1), 0); 127 | assert_eq!(pkt.len(), available); 128 | check_last_part(&mut pkt, headroom + available); 129 | // Now go back 130 | let back = 0 - available as isize; 131 | assert_eq!(pkt.move_tail(back), back); 132 | assert_eq!(pkt.len(), 0); 133 | check_last_part(&mut pkt, headroom); 134 | // Cant go back any further 135 | assert_eq!(pkt.move_tail(-1), 0); 136 | assert_eq!(pkt.len(), 0); 137 | check_last_part(&mut pkt, headroom); 138 | 139 | // Two particle test 140 | let headroom = 100; 141 | let available = 2 * PARTICLE_SZ - headroom; 142 | let mut pkt = pool.pkt(headroom).unwrap(); 143 | let bytes = vec![0 as u8; available - 10]; 144 | assert!(pkt.append(&mut *pool, &bytes[0..])); 145 | assert_eq!(pkt.len(), available - 10); 146 | check_last_part(&mut pkt, PARTICLE_SZ - 10); 147 | assert_eq!(pkt.move_tail(10), 10); 148 | assert_eq!(pkt.len(), available); 149 | check_last_part(&mut pkt, PARTICLE_SZ); 150 | // Cant go forward any further 151 | assert_eq!(pkt.move_tail(1), 0); 152 | assert_eq!(pkt.len(), available); 153 | check_last_part(&mut pkt, PARTICLE_SZ); 154 | // Now go back 155 | let back = 0 - PARTICLE_SZ as isize; 156 | assert_eq!(pkt.move_tail(back), back); 157 | assert_eq!(pkt.len(), available - PARTICLE_SZ); 158 | check_last_part(&mut pkt, 0); 159 | // Cant go back any further 160 | assert_eq!(pkt.move_tail(-1), 0); 161 | assert_eq!(pkt.len(), available - PARTICLE_SZ); 162 | check_last_part(&mut pkt, 0); 163 | } 164 | 165 | fn check_first_part(pkt: &BoxPkt, head: usize) { 166 | let p = &pkt.particle; 167 | assert_eq!(p.as_ref().unwrap().head, head); 168 | } 169 | 170 | #[test] 171 | fn slice_test() { 172 | let mut pool = packet_pool("slice_test"); 173 | let headroom = 100; 174 | let mut pkt = pool.pkt(headroom).unwrap(); 175 | let bytes = vec![0 as u8; 2 * PARTICLE_SZ]; 176 | assert!(pkt.append(&mut *pool, &bytes[0..])); 177 | let slices = pkt.slices(); 178 | assert_eq!(slices.len(), 3); 179 | let (s, l) = slices[0]; 180 | let p = pkt.particle.as_ref().unwrap(); 181 | assert_eq!(s, &p.raw.as_ref().unwrap()[headroom..p.tail]); 182 | assert_eq!(l, PARTICLE_SZ - headroom); 183 | let (s, l) = slices[1]; 184 | let p = p.next.as_ref().unwrap(); 185 | assert_eq!(s, &p.raw.as_ref().unwrap()[0..p.tail]); 186 | assert_eq!(l, PARTICLE_SZ); 187 | let (s, l) = slices[2]; 188 | let p = p.next.as_ref().unwrap(); 189 | assert_eq!(s, &p.raw.as_ref().unwrap()[0..p.tail]); 190 | assert_eq!(l, headroom); 191 | } 192 | 193 | #[test] 194 | fn move_head_test() { 195 | let mut pool = packet_pool("move_head_test"); 196 | // One particle test 197 | let headroom = 100; 198 | let available = PARTICLE_SZ - headroom; 199 | let mut pkt = pool.pkt(headroom).unwrap(); 200 | let bytes = vec![0 as u8; available]; 201 | assert!(pkt.append(&mut *pool, &bytes[0..])); 202 | assert_eq!(pkt.len(), available); 203 | // Go front 10 204 | assert_eq!(pkt.move_head(10), 10); 205 | assert_eq!(pkt.len(), available - 10); 206 | check_first_part(&pkt, headroom + 10); 207 | // Come back 10 208 | assert_eq!(pkt.move_head(-10), -10); 209 | assert_eq!(pkt.len(), available); 210 | check_first_part(&pkt, headroom); 211 | // Try to go front available+1, it shud fail 212 | assert_eq!(pkt.move_head((available + 1) as isize), 0); 213 | assert_eq!(pkt.len(), available); 214 | // Try to go front available 215 | assert_eq!(pkt.move_head(available as isize), available as isize); 216 | check_first_part(&pkt, PARTICLE_SZ); 217 | assert_eq!(pkt.len(), 0); 218 | // Try to go back to the beginning of buffer - 1, it shud fail 219 | let l = -1 - PARTICLE_SZ as isize; 220 | assert_eq!(pkt.move_head(l), 0); 221 | check_first_part(&pkt, PARTICLE_SZ); 222 | assert_eq!(pkt.len(), 0); 223 | // Try to go back to the beginning of buffer 224 | let l = 0 - PARTICLE_SZ as isize; 225 | assert_eq!(pkt.move_head(l), l); 226 | check_first_part(&pkt, 0); 227 | assert_eq!(pkt.len(), PARTICLE_SZ); 228 | } 229 | 230 | #[test] 231 | fn l2_test() { 232 | let mut pool = packet_pool("l2_test"); 233 | let mac: Vec = vec![1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]; 234 | // One particle test 235 | let headroom = 100; 236 | let mut pkt = pool.pkt(headroom).unwrap(); 237 | assert!(pkt.append(&mut *pool, &mac[0..])); 238 | assert_eq!(pkt.pull_l2(mac.len()), mac.len()); 239 | let (l2, l2len) = pkt.get_l2(); 240 | assert_eq!(l2len, mac.len()); 241 | assert_eq!(mac.iter().zip(l2).all(|(a, b)| a == b), true); 242 | assert_eq!(pkt.len(), 0); 243 | assert!(pkt.push_l2(&mut *pool, &mac)); 244 | let (l2, l2len) = pkt.get_l2(); 245 | assert_eq!(l2len, mac.len()); 246 | assert_eq!(mac.iter().zip(l2).all(|(a, b)| a == b), true); 247 | assert_eq!(pkt.len(), mac.len()); 248 | 249 | let mut pkt = pool.pkt(headroom).unwrap(); 250 | assert!(pkt.append(&mut *pool, &mac[0..])); 251 | assert!(pkt.set_l2(mac.len())); 252 | let (l2, l2len) = pkt.get_l2(); 253 | assert_eq!(l2len, mac.len()); 254 | assert_eq!(mac.iter().zip(l2).all(|(a, b)| a == b), true); 255 | assert_eq!(pkt.len(), mac.len()); 256 | } 257 | 258 | #[test] 259 | fn l3_test() { 260 | let mut pool = packet_pool("l3_test"); 261 | let ip: Vec = vec![1, 2, 3, 4, 1, 2, 3, 4]; 262 | // One particle test 263 | let headroom = 100; 264 | let mut pkt = pool.pkt(headroom).unwrap(); 265 | assert!(pkt.append(&mut *pool, &ip[0..])); 266 | assert_eq!(pkt.pull_l3(ip.len()), ip.len()); 267 | let (l3, l3len) = pkt.get_l3(); 268 | assert_eq!(l3len, ip.len()); 269 | assert_eq!(ip.iter().zip(l3).all(|(a, b)| a == b), true); 270 | assert_eq!(pkt.len(), 0); 271 | assert!(pkt.push_l3(&mut *pool, &ip)); 272 | let (l3, l3len) = pkt.get_l3(); 273 | assert_eq!(l3len, ip.len()); 274 | assert_eq!(ip.iter().zip(l3).all(|(a, b)| a == b), true); 275 | assert_eq!(pkt.len(), ip.len()); 276 | assert!(pkt.set_l3(ip.len())); 277 | let (l3, l3len) = pkt.get_l3(); 278 | assert_eq!(l3len, ip.len()); 279 | assert_eq!(ip.iter().zip(l3).all(|(a, b)| a == b), true); 280 | assert_eq!(pkt.len(), ip.len()); 281 | assert!(!pkt.set_l3(ip.len() + 1)); 282 | } 283 | -------------------------------------------------------------------------------- /perf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "perf" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | counters = { path = "../counters" } 9 | -------------------------------------------------------------------------------- /perf/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(target_arch = "x86_64")] 2 | mod x64; 3 | 4 | #[cfg(not(target_arch = "x86_64"))] 5 | mod stubs; 6 | 7 | #[cfg(target_arch = "x86_64")] 8 | pub use x64::*; 9 | 10 | #[cfg(not(target_arch = "x86_64"))] 11 | pub use stubs::*; 12 | -------------------------------------------------------------------------------- /perf/src/stubs.rs: -------------------------------------------------------------------------------- 1 | use counters::Counters; 2 | 3 | pub struct Perf {} 4 | 5 | impl Perf { 6 | pub fn new(_: &str, _: &mut Counters) -> Self { 7 | Perf {} 8 | } 9 | 10 | pub fn start(&mut self) {} 11 | 12 | pub fn stop(&mut self) {} 13 | 14 | pub fn get_count(&self) -> u64 { 15 | 0 16 | } 17 | 18 | pub fn get_avg(&self) -> u64 { 19 | 0 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /perf/src/x64.rs: -------------------------------------------------------------------------------- 1 | use counters::{ 2 | flavors::{CounterArray, CounterType}, 3 | Counters, 4 | }; 5 | use std::arch::x86_64::_rdtsc; 6 | 7 | pub struct Perf { 8 | // Three counters: 9 | // index 0 is the timestamp rdtsc() 10 | // inex 1 is the hit count 11 | // index 2 is the sum of rdtsc() delta, the average of which is what usually is of interest 12 | cntrs: CounterArray, 13 | } 14 | 15 | impl Perf { 16 | pub fn new(name: &str, counters: &mut Counters) -> Self { 17 | let mut cntrs = CounterArray::new(counters, "perf", CounterType::Info, name, 3); 18 | cntrs.set(0, 0); 19 | cntrs.set(1, 0); 20 | cntrs.set(2, 0); 21 | Perf { cntrs } 22 | } 23 | 24 | pub fn start(&mut self) { 25 | unsafe { 26 | self.cntrs.set(0, _rdtsc()); 27 | } 28 | } 29 | 30 | pub fn stop(&mut self) { 31 | unsafe { 32 | let elapsed = _rdtsc() - self.cntrs.get(0); 33 | self.cntrs.add(1, 1); 34 | self.cntrs.add(2, elapsed); 35 | } 36 | } 37 | 38 | pub fn get_count(&self) -> u64 { 39 | self.cntrs.get(1) 40 | } 41 | 42 | pub fn get_avg(&self) -> u64 { 43 | if self.cntrs.get(1) != 0 { 44 | self.cntrs.get(2) / self.cntrs.get(1) 45 | } else { 46 | 0 47 | } 48 | } 49 | } 50 | 51 | #[cfg(test)] 52 | mod test { 53 | use super::*; 54 | 55 | #[test] 56 | fn test_perf() { 57 | let mut counters = Counters::new("perf_test").unwrap(); 58 | let mut p = Perf::new("perf", &mut counters); 59 | p.start(); 60 | let mut _i = 0; 61 | for _ in 0..100 { 62 | _i += 1; 63 | } 64 | p.stop(); 65 | assert_eq!(p.get_count(), 1); 66 | let mut total = p.cntrs.get(2); 67 | assert!(p.get_avg() > 50); 68 | assert!(p.get_avg() == total); 69 | p.start(); 70 | let mut _i = 0; 71 | for _ in 0..100 { 72 | _i += 1; 73 | } 74 | p.stop(); 75 | assert_eq!(p.get_count(), 2); 76 | total = p.cntrs.get(2); 77 | assert!(total > p.get_avg()); 78 | assert!(p.get_avg() > 50); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /sched/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sched" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | crossbeam-queue = "0.2.1" 9 | common = { path = "../common" } 10 | packet = { path = "../packet" } 11 | counters = { path = "../counters" } 12 | msg = { path = "../msg" } 13 | -------------------------------------------------------------------------------- /sched/sched.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "scheduler" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Scheduler 10 | 11 | The only scheduler we support today is HFSC, and the code is completely modelled after the BSD version of HFSC (the linux version is quite similar to the BSD version too). Really the only authoritative "documentation" on HFSC is - thats a terse reading, so good luck with that :P 12 | 13 | There needs to be a bit more layers of abstraction here as and when more schedulers are added, right now the IfNode is hardcoded to assume its an HFSC scheduler. And the utils/r2intf also configures various HFSC curves, the configuration will be different if its a different scheduler. 14 | 15 | As of today in our hfsc implementation, we do not support upper limit curves - only fair share and realtime are supported. 16 | -------------------------------------------------------------------------------- /sched/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod hfsc; 2 | -------------------------------------------------------------------------------- /tryme.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Exit on error 4 | set -e 5 | 6 | # Remove containers 7 | rm_containers () { 8 | # if containers already exist, get their container ID 9 | C1=$(docker ps -a | grep R2_client1 | awk '{print $1}') 10 | C2=$(docker ps -a | grep R2_client2 | awk '{print $1}') 11 | 12 | if [[ ! -z $C1 ]]; then 13 | docker stop $C1 || true 14 | docker rm $C1 || true 15 | fi 16 | if [[ ! -z $C2 ]]; then 17 | docker stop $C2 || true 18 | docker rm $C2 || true 19 | fi 20 | } 21 | 22 | rm_containers 23 | 24 | # Do not run this script as root, we use sudo for priv cmds 25 | if [[ $EUID == 0 ]]; then 26 | echo >&2 "error: Do not run this script as root" 27 | exit 1 28 | fi 29 | 30 | # If R2 is already running, kill it, this script will again launch R2 31 | sudo pkill r2 || true 32 | 33 | # Create two docker containers using the tiny busybox image 34 | docker create -t --name R2_client1 busybox sh 35 | docker create -t --name R2_client2 busybox sh 36 | docker start R2_client1 37 | docker start R2_client2 38 | 39 | # Get the container IDs for cleanup 40 | C1=`docker ps | grep R2_client1 | awk '{print $1}'` 41 | C2=`docker ps | grep R2_client2 | awk '{print $1}'` 42 | 43 | # Create veth interface pairs 44 | sudo ip link add veth_r2_1 type veth peer name veth_c2_1 45 | sudo ip link add veth_r2_2 type veth peer name veth_c2_2 46 | 47 | # Get pids of the docker namespace 48 | c1_pid=`docker inspect --format '{{ .State.Pid }}' R2_client1` 49 | c2_pid=`docker inspect --format '{{ .State.Pid }}' R2_client2` 50 | # Move the c2 end of veths to the dockers namespace 51 | sudo ip link set netns $c1_pid dev veth_c2_1 52 | sudo ip link set netns $c2_pid dev veth_c2_2 53 | # Set the links to up state 54 | sudo nsenter -t $c1_pid -n ip link set veth_c2_1 up 55 | sudo nsenter -t $c2_pid -n ip link set veth_c2_2 up 56 | # Configure ip addresses on the docker end 57 | sudo nsenter -t $c1_pid -n ip addr add 1.1.1.1/24 dev veth_c2_1 58 | sudo nsenter -t $c2_pid -n ip addr add 2.1.1.1/24 dev veth_c2_2 59 | # Delete default routes on both containers 60 | sudo nsenter -t $c1_pid -n ip route del default 61 | sudo nsenter -t $c2_pid -n ip route del default 62 | # Point default route to our new interfaces 63 | sudo nsenter -t $c1_pid -n ip route add default via 1.1.1.2 dev veth_c2_1 64 | sudo nsenter -t $c2_pid -n ip route add default via 2.1.1.2 dev veth_c2_2 65 | 66 | # compile R2 67 | ~/.cargo/bin/cargo build 68 | 69 | # Run R2 70 | sudo ./target/debug/r2 & 71 | 72 | # Sometimes the interfaces take a while to come up, so wait for couple 73 | # of seconds and bring the interfaces up 74 | sleep 2 75 | sudo ip link set veth_r2_1 up 76 | sudo ip link set veth_r2_2 up 77 | 78 | # Add one end of the veth pairs to R2, with some random mac address 79 | ./target/debug/r2intf veth_r2_1 add 0 8a:61:da:68:46:76 80 | ./target/debug/r2intf veth_r2_2 add 1 0e:67:57:1b:68:9c 81 | 82 | # Add ip addresses in the corresponding subnets that we added to the docker 83 | ./target/debug/r2intf veth_r2_1 ip 1.1.1.2/24 84 | ./target/debug/r2intf veth_r2_2 ip 2.1.1.2/24 85 | 86 | -------------------------------------------------------------------------------- /unix/efd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "efd" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | libc = { version = "0.2.171", features = [ "extra_traits" ] } 9 | -------------------------------------------------------------------------------- /unix/efd/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub struct Efd { 2 | pub fd: i32, 3 | } 4 | 5 | impl Efd { 6 | pub fn new(flags: i32) -> Result { 7 | unsafe { 8 | let fd = libc::eventfd(0, flags); 9 | if fd <= 0 { 10 | return Err(*(libc::__errno_location())); 11 | } 12 | Ok(Efd { fd }) 13 | } 14 | } 15 | 16 | pub fn write(&self, val: u64) { 17 | unsafe { 18 | let data = [val; 1]; 19 | libc::write(self.fd, data.as_ptr() as *const libc::c_void, 8); 20 | } 21 | } 22 | 23 | pub fn read(&self) -> u64 { 24 | unsafe { 25 | let data: [u64; 1] = [0; 1]; 26 | libc::read(self.fd, data.as_ptr() as *mut libc::c_void, 8); 27 | data[0] 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /unix/epoll/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "epoll" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | libc = { version = "0.2.171", features = [ "extra_traits" ] } 9 | efd = { path = "../../unix/efd" } 10 | -------------------------------------------------------------------------------- /unix/epoll/src/lib.rs: -------------------------------------------------------------------------------- 1 | use efd::Efd; 2 | use std::sync::Arc; 3 | 4 | pub const EPOLLIN: u32 = libc::EPOLLIN as u32; 5 | pub const EPOLLOUT: u32 = libc::EPOLLOUT as u32; 6 | pub const EPOLLHUP: u32 = libc::EPOLLHUP as u32; 7 | pub const EPOLLERR: u32 = libc::EPOLLERR as u32; 8 | 9 | pub trait EpollClient: Send { 10 | fn event(&mut self, fd: i32, event: u32); 11 | } 12 | 13 | pub struct Epoll { 14 | epoll: i32, 15 | nfds: i32, 16 | timeout: i32, 17 | wakeup: Arc, 18 | client: Box, 19 | events: Vec, 20 | } 21 | 22 | impl Epoll { 23 | pub fn new( 24 | efd: Arc, 25 | nfds: i32, 26 | timeout: i32, 27 | client: Box, 28 | ) -> Result { 29 | let epoll: i32 = unsafe { 30 | let epoll = libc::epoll_create(nfds); 31 | if epoll < 0 { 32 | return Err(*(libc::__errno_location())); 33 | } 34 | epoll 35 | }; 36 | let event = libc::epoll_event { events: 0, u64: 0 }; 37 | let events: Vec = vec![event; nfds as usize]; 38 | let epoll = Epoll { 39 | epoll, 40 | nfds, 41 | timeout, 42 | client, 43 | wakeup: efd, 44 | events, 45 | }; 46 | epoll.add(epoll.wakeup.fd, EPOLLIN); 47 | Ok(epoll) 48 | } 49 | 50 | pub fn add(&self, fd: i32, flags: u32) -> i32 { 51 | unsafe { 52 | let mut f = libc::fcntl(fd, libc::F_GETFL); 53 | if f == -1 { 54 | let errno = *(libc::__errno_location()); 55 | return -errno; 56 | } 57 | f |= libc::O_NONBLOCK; 58 | if libc::fcntl(fd, libc::F_SETFL, f) < 0 { 59 | let errno = *(libc::__errno_location()); 60 | return -errno; 61 | } 62 | let mut event = libc::epoll_event { 63 | events: flags, 64 | u64: fd as u64, 65 | }; 66 | let ret = libc::epoll_ctl(self.epoll, libc::EPOLL_CTL_ADD, fd, &mut event); 67 | if ret < 0 { 68 | let errno = *(libc::__errno_location()); 69 | return -errno; 70 | } 71 | } 72 | 0 73 | } 74 | 75 | pub fn del(&self, fd: i32) { 76 | unsafe { 77 | let mut event = libc::epoll_event { events: 0, u64: 0 }; 78 | libc::epoll_ctl(self.epoll, libc::EPOLL_CTL_DEL, fd, &mut event); 79 | } 80 | } 81 | 82 | pub fn wait(&mut self) -> i32 { 83 | let ret = unsafe { 84 | let ret = libc::epoll_wait( 85 | self.epoll, 86 | self.events.as_mut_ptr(), 87 | self.nfds, 88 | self.timeout, 89 | ); 90 | if ret == -1 { 91 | let errno = *(libc::__errno_location()); 92 | if errno == libc::EINTR { 93 | return 0; 94 | } 95 | return -errno; 96 | } 97 | ret 98 | }; 99 | for e in self.events.iter().take(ret as usize) { 100 | let fd = e.u64 as i32; 101 | if fd == self.wakeup.fd { 102 | self.wakeup.read(); 103 | } 104 | self.client.event(fd, e.events); 105 | } 106 | ret 107 | } 108 | } 109 | 110 | #[cfg(test)] 111 | mod test; 112 | -------------------------------------------------------------------------------- /unix/epoll/src/test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use std::str; 3 | use std::sync::atomic::{AtomicUsize, Ordering}; 4 | use std::sync::Arc; 5 | use std::thread; 6 | 7 | const HELLO_WORLD: &str = "Hello World"; 8 | 9 | fn pipe_read(fd: i32, buf: &mut [u8]) -> isize { 10 | unsafe { 11 | return libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()); 12 | } 13 | } 14 | 15 | fn pipe_write(fd: i32, s: &str) { 16 | unsafe { 17 | libc::write(fd, s.as_ptr() as *const libc::c_void, s.len()); 18 | } 19 | } 20 | 21 | struct EpollTest { 22 | fd: i32, 23 | nevents: Arc, 24 | } 25 | 26 | impl EpollClient for EpollTest { 27 | fn event(&mut self, fd: i32, _event: u32) { 28 | assert_eq!(fd, self.fd); 29 | self.nevents.fetch_add(1, Ordering::Relaxed); 30 | let mut buf: Vec = vec![0; HELLO_WORLD.len()]; 31 | let sz = pipe_read(fd, &mut buf[0..]); 32 | assert_eq!(sz as usize, buf.len()); 33 | let str = str::from_utf8(&buf[0..]).unwrap(); 34 | assert_eq!(HELLO_WORLD, str); 35 | } 36 | } 37 | 38 | #[test] 39 | fn epoll_test() { 40 | let mut pipefd = [-1, -1]; 41 | unsafe { 42 | libc::pipe2(pipefd.as_mut_ptr(), libc::O_NONBLOCK); 43 | assert!(pipefd[0] > 0); 44 | assert!(pipefd[1] > 0); 45 | } 46 | let edata = Box::new(EpollTest { 47 | fd: pipefd[0], 48 | nevents: Arc::new(AtomicUsize::new(0)), 49 | }); 50 | let nevents = edata.nevents.clone(); 51 | let efd = Arc::new(Efd::new(0).unwrap()); 52 | let mut epoll = match Epoll::new(efd, 4, -1, edata) { 53 | Ok(e) => e, 54 | Err(errno) => panic!("epoll create failed, errno {}", errno), 55 | }; 56 | let ret = epoll.add(pipefd[0], EPOLLIN); 57 | assert_eq!(ret, 0); 58 | 59 | let wait = Arc::new(AtomicUsize::new(0)); 60 | let done = wait.clone(); 61 | let tname = "epoll".to_string(); 62 | let handler = thread::Builder::new().name(tname).spawn(move || loop { 63 | epoll.wait(); 64 | if nevents.load(Ordering::Relaxed) == 4 { 65 | epoll.del(pipefd[0]); 66 | done.fetch_add(1, Ordering::Relaxed); 67 | break; 68 | } 69 | }); 70 | 71 | while wait.load(Ordering::Relaxed) == 0 { 72 | pipe_write(pipefd[1], HELLO_WORLD); 73 | } 74 | handler.unwrap().join().unwrap(); 75 | } 76 | -------------------------------------------------------------------------------- /unix/shm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shm" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | libc = { version = "0.2.171", features = [ "extra_traits" ] } 9 | -------------------------------------------------------------------------------- /unix/shm/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | use std::ptr; 3 | 4 | pub fn shm_open_rw(name: &str, size: usize) -> (i32, u64) { 5 | unsafe { 6 | let c_name = CString::new(name).unwrap(); 7 | let c_name = c_name.as_ptr(); 8 | let flags = libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR; 9 | let fd = libc::shm_open(c_name, flags, libc::S_IRUSR | libc::S_IWUSR); 10 | if fd == -1 { 11 | return (*(libc::__errno_location()), 0); 12 | } 13 | if libc::ftruncate(fd, size as i64) == -1 { 14 | return (*(libc::__errno_location()), 0); 15 | } 16 | let flags = libc::PROT_READ | libc::PROT_WRITE; 17 | let base = libc::mmap(ptr::null_mut(), size, flags, libc::MAP_SHARED, fd, 0); 18 | if base.is_null() { 19 | libc::close(fd); 20 | return (*(libc::__errno_location()), 0); 21 | } 22 | 23 | (fd, base as u64) 24 | } 25 | } 26 | 27 | pub fn shm_open_ro(name: &str, size: usize) -> (i32, u64) { 28 | unsafe { 29 | let c_name = CString::new(name).unwrap(); 30 | let c_name = c_name.as_ptr(); 31 | let flags = libc::O_RDONLY; 32 | let fd = libc::shm_open(c_name, flags, libc::S_IRUSR | libc::S_IWUSR); 33 | if fd == -1 { 34 | return (*(libc::__errno_location()), 0); 35 | } 36 | let flags = libc::PROT_READ; 37 | let base = libc::mmap(ptr::null_mut(), size, flags, libc::MAP_SHARED, fd, 0); 38 | if base.is_null() { 39 | libc::close(fd); 40 | return (*(libc::__errno_location()), 0); 41 | } 42 | 43 | (fd, base as u64) 44 | } 45 | } 46 | 47 | pub fn shm_close(fd: i32) { 48 | unsafe { 49 | libc::close(fd); 50 | } 51 | } 52 | 53 | pub fn shm_unlink(name: &str) { 54 | let c_name = CString::new(name).unwrap(); 55 | let c_name = c_name.as_ptr(); 56 | unsafe { 57 | libc::shm_unlink(c_name); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /unix/socket/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "socket" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | libc = { version = "0.2.171", features = [ "extra_traits" ] } 9 | crossbeam-queue = "0.2.1" 10 | common = { path = "../../common" } 11 | packet = { path = "../../packet" } 12 | counters = { path = "../../counters" } 13 | graph = { path = "../../graph" } 14 | -------------------------------------------------------------------------------- /unix/socket/src/lib.rs: -------------------------------------------------------------------------------- 1 | use graph::Driver; 2 | use packet::{BoxPkt, PacketPool}; 3 | use std::ffi::CString; 4 | use std::mem; 5 | use std::ptr; 6 | 7 | pub struct RawSock { 8 | fd: i32, 9 | } 10 | 11 | impl Driver for RawSock { 12 | fn fd(&self) -> Option { 13 | Some(self.fd) 14 | } 15 | 16 | fn recvmsg(&mut self, pool: &mut dyn PacketPool, headroom: usize) -> Option { 17 | let pkt = (*pool).pkt(headroom); 18 | pkt.as_ref()?; 19 | let mut pkt = pkt.unwrap(); 20 | unsafe { 21 | let buf = pkt.head(); 22 | let mut iov: libc::iovec = mem::MaybeUninit::zeroed().assume_init(); 23 | let head = buf.as_ptr() as u64 + pkt.headroom() as u64; 24 | iov.iov_base = head as *mut libc::c_void; 25 | iov.iov_len = buf.len() - pkt.headroom(); 26 | let mut cmsg: [u8; 32] = mem::MaybeUninit::zeroed().assume_init(); 27 | let mut mhdr: libc::msghdr = mem::MaybeUninit::zeroed().assume_init(); 28 | mhdr.msg_name = ptr::null_mut(); 29 | mhdr.msg_namelen = 0 as libc::socklen_t; 30 | mhdr.msg_iov = &mut iov; 31 | mhdr.msg_iovlen = 1; 32 | mhdr.msg_control = cmsg.as_mut_ptr() as *mut libc::c_void; 33 | mhdr.msg_controllen = cmsg.len(); 34 | mhdr.msg_flags = 0; 35 | let rv = libc::recvmsg(self.fd, &mut mhdr, libc::MSG_TRUNC); 36 | if rv > 0 { 37 | assert_eq!(pkt.move_tail(rv), rv); 38 | } 39 | Some(pkt) 40 | } 41 | } 42 | 43 | fn sendmsg(&mut self, _pool: &mut dyn PacketPool, pkt: BoxPkt) -> usize { 44 | unsafe { 45 | let slices = pkt.slices(); 46 | let iov: libc::iovec = mem::MaybeUninit::zeroed().assume_init(); 47 | let mut iovec: Vec = vec![iov; slices.len()]; 48 | for i in 0..slices.len() { 49 | iovec[i].iov_base = slices[i].0.as_ptr() as *mut libc::c_void; 50 | iovec[i].iov_len = slices[i].1; 51 | } 52 | let mut mhdr: libc::msghdr = mem::MaybeUninit::zeroed().assume_init(); 53 | mhdr.msg_name = ptr::null_mut(); 54 | mhdr.msg_namelen = 0 as libc::socklen_t; 55 | mhdr.msg_iov = iovec.as_mut_ptr(); 56 | mhdr.msg_iovlen = iovec.len(); 57 | mhdr.msg_control = ptr::null_mut(); 58 | mhdr.msg_controllen = 0; 59 | mhdr.msg_flags = 0; 60 | let rv = libc::sendmsg(self.fd, &mhdr, 0); 61 | if rv < 0 { 62 | return 0; 63 | } 64 | rv as usize 65 | } 66 | } 67 | } 68 | 69 | impl RawSock { 70 | const PACKET_AUXDATA: i32 = 8; 71 | const ETH_P_ALL_BE: u16 = 0x0300; // htons(libc::ETH_P_ALL); 72 | 73 | fn sockaddr_ll_new(index: u32) -> libc::sockaddr_ll { 74 | libc::sockaddr_ll { 75 | sll_family: libc::AF_PACKET as libc::c_ushort, 76 | sll_ifindex: index as libc::c_int, 77 | sll_protocol: RawSock::ETH_P_ALL_BE, 78 | sll_hatype: 0, 79 | sll_pkttype: 0, 80 | sll_halen: 0, 81 | sll_addr: [0; 8], 82 | } 83 | } 84 | 85 | pub fn fd(&self) -> i32 { 86 | self.fd 87 | } 88 | 89 | pub fn new(interface: &str, non_blocking: bool) -> Result { 90 | unsafe { 91 | let fd = libc::socket( 92 | libc::AF_PACKET, 93 | libc::SOCK_RAW, 94 | RawSock::ETH_P_ALL_BE as i32, 95 | ); 96 | if fd < 0 { 97 | return Err(*(libc::__errno_location())); 98 | } 99 | let c_str = CString::new(interface).unwrap(); 100 | let ifname = c_str.as_ptr(); 101 | let index = libc::if_nametoindex(ifname); 102 | if index == 0 { 103 | return Err(*(libc::__errno_location())); 104 | } 105 | let mut sa = RawSock::sockaddr_ll_new(index); 106 | let ptr = &mut sa as *mut libc::sockaddr_ll as *mut libc::sockaddr; 107 | let sz = mem::size_of::() as libc::socklen_t; 108 | let ret = libc::bind(fd, ptr, sz); 109 | if ret < 0 { 110 | return Err(*(libc::__errno_location())); 111 | } 112 | let val: Vec = vec![1]; 113 | let ret = libc::setsockopt( 114 | fd, 115 | libc::SOL_PACKET, 116 | RawSock::PACKET_AUXDATA, 117 | val.as_ptr() as *const libc::c_void, 118 | mem::size_of::() as u32, 119 | ); 120 | if ret < 0 { 121 | return Err(*(libc::__errno_location())); 122 | } 123 | if non_blocking { 124 | let mut f = libc::fcntl(fd, libc::F_GETFL); 125 | if f == -1 { 126 | return Err(*(libc::__errno_location())); 127 | } 128 | f |= libc::O_NONBLOCK; 129 | if libc::fcntl(fd, libc::F_SETFL, f) < 0 { 130 | return Err(*(libc::__errno_location())); 131 | } 132 | } 133 | Ok(RawSock { fd }) 134 | } 135 | } 136 | } 137 | 138 | #[cfg(test)] 139 | mod test; 140 | -------------------------------------------------------------------------------- /unix/socket/src/test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use counters::Counters; 3 | use crossbeam_queue::ArrayQueue; 4 | use packet::{PacketPool, PktsHeap}; 5 | use std::process::Command; 6 | use std::sync::atomic::{AtomicUsize, Ordering}; 7 | use std::sync::Arc; 8 | use std::thread; 9 | 10 | const NUM_PKTS: usize = 10; 11 | const NUM_PART: usize = 20; 12 | const MAX_PACKET: usize = 1500; 13 | const PARTICLE_SZ: usize = 512; 14 | 15 | fn packet_free(q: Arc>, pool: &mut dyn PacketPool) { 16 | while let Ok(p) = q.pop() { 17 | pool.free(p); 18 | } 19 | } 20 | 21 | fn packet_pool(test: &str, part_sz: usize, q: Arc>) -> Box { 22 | let mut counters = Counters::new(test).unwrap(); 23 | Box::new(PktsHeap::new( 24 | "PKTS_HEAP", 25 | q, 26 | &mut counters, 27 | NUM_PKTS, 28 | NUM_PART, 29 | part_sz, 30 | )) 31 | } 32 | 33 | fn delete_veth() { 34 | let args = [ 35 | "link", "del", "r2_eth1", "type", "veth", "peer", "name", "r2_eth2", 36 | ]; 37 | Command::new("ip") 38 | .args(&args) 39 | .spawn() 40 | .expect("veth failed") 41 | .wait() 42 | .unwrap(); 43 | } 44 | 45 | // We get random packets if ipv6 is enabled, we want only our own packets 46 | fn disable_ipv6(eth: &str) -> String { 47 | let mut name = "net.ipv6.conf.".to_string(); 48 | name.push_str(eth); 49 | name.push_str(".disable_ipv6=1"); 50 | name 51 | } 52 | 53 | fn create_veth() { 54 | let args = [ 55 | "link", "add", "r2_eth1", "type", "veth", "peer", "name", "r2_eth2", 56 | ]; 57 | Command::new("ip") 58 | .args(&args) 59 | .spawn() 60 | .expect("veth failed") 61 | .wait() 62 | .unwrap(); 63 | 64 | let args = ["r2_eth1", "up"]; 65 | Command::new("ifconfig") 66 | .args(&args) 67 | .spawn() 68 | .expect("ifconfig eth1 fail") 69 | .wait() 70 | .unwrap(); 71 | let args = ["-w", &disable_ipv6("r2_eth1")]; 72 | Command::new("sysctl") 73 | .args(&args) 74 | .spawn() 75 | .expect("ipv6 disable fail") 76 | .wait() 77 | .unwrap(); 78 | 79 | let args = ["-w", &disable_ipv6("r2_eth2")]; 80 | Command::new("sysctl") 81 | .args(&args) 82 | .spawn() 83 | .expect("ipv6 disable fail") 84 | .wait() 85 | .unwrap(); 86 | let args = ["r2_eth2", "up"]; 87 | Command::new("ifconfig") 88 | .args(&args) 89 | .spawn() 90 | .expect("ifconfig eth2 fail") 91 | .wait() 92 | .unwrap(); 93 | } 94 | 95 | #[test] 96 | fn read_write() { 97 | delete_veth(); 98 | create_veth(); 99 | 100 | let wait = Arc::new(AtomicUsize::new(0)); 101 | let done = wait.clone(); 102 | let tname = "rx".to_string(); 103 | let rx_q = Arc::new(ArrayQueue::new(NUM_PKTS)); 104 | let mut pool = packet_pool("sock_read_write_rx", MAX_PACKET, rx_q.clone()); 105 | let handler = thread::Builder::new().name(tname).spawn(move || { 106 | let mut raw = match RawSock::new("r2_eth2", false) { 107 | Ok(raw) => raw, 108 | Err(errno) => panic!("Errno {} opening socket", errno), 109 | }; 110 | assert!(raw.fd > 0); 111 | 112 | let pkt = raw.recvmsg(&mut *pool, 0).unwrap(); 113 | let pktlen = pkt.len(); 114 | assert_eq!(MAX_PACKET, pktlen); 115 | let (buf, len) = match pkt.data(0) { 116 | Some((d, s)) => (d, s), 117 | None => panic!("Cant get offset 0"), 118 | }; 119 | assert_eq!(len, pktlen); 120 | for i in 0..MAX_PACKET { 121 | assert_eq!(buf[i], i as u8); 122 | } 123 | packet_free(rx_q.clone(), &mut *pool); 124 | done.fetch_add(1, Ordering::Relaxed); 125 | }); 126 | 127 | let mut raw = match RawSock::new("r2_eth1", false) { 128 | Ok(raw) => raw, 129 | Err(errno) => panic!("Errno {} opening socket", errno), 130 | }; 131 | assert!(raw.fd > 0); 132 | let data: Vec = (0..MAX_PACKET).map(|x| (x % 256) as u8).collect(); 133 | // Send data as multi particle pkt 134 | let tx_q = Arc::new(ArrayQueue::new(NUM_PKTS)); 135 | let mut pool = packet_pool("sock_read_write_tx", PARTICLE_SZ, tx_q.clone()); 136 | while wait.load(Ordering::Relaxed) == 0 { 137 | let mut pkt = pool.pkt(0).unwrap(); 138 | assert!(pkt.append(&mut *pool, &data[0..])); 139 | assert_eq!(raw.sendmsg(&mut *pool, pkt), MAX_PACKET); 140 | packet_free(tx_q.clone(), &mut *pool); 141 | } 142 | 143 | handler.unwrap().join().unwrap(); 144 | delete_veth(); 145 | } 146 | -------------------------------------------------------------------------------- /utils/clis.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | type: docs 4 | description: > 5 | 6 | --- 7 | 8 | # CLIs using Rust Clap library 9 | 10 | Command line argument parsing is a messy and complex affair, and we by now means want to spend time writing code for that. We use the Rust Clap library, which is quite popular, quite feature rich and quite easy to use, and fairly well documented - cant ask for more! And needless to say I did not have much troubles with clap. Clap has a few different styles of programming a CLI, again refer to Clap documents for that. R2 does not mandate any particular way of using clap - feel free to use what fits your need, but my preferred style of using clap is by defining CLIs using a YAML file. 11 | 12 | YAML files can get notoriously complicated, but the goal we have is to define small independent stand-alone CLIs which define small YAML files - let a utility command do one thing, have multiple utils binaries to do different things. I dont have an alternate suggestion - unless the CLI is DAMN SIMPLE, the other Clap ways of defining the CLI using a chain of Rust objects etc.. seems far more complex to me than a YAML file, so YAML is the best option IMO, like it or not. And there are not any better/simpler Rust parsers other than Clap, and to be fair Clap is pretty good and I think its not too bad to have to live with this YAML option. 13 | 14 | Again, the best way of adding a new CLI is by mimicking an existing one - and I will again quote the utils/r2log cli as an example because its really tiny. And for more complex examples, consult r2intf as an example, and play around with it by typing just "r2intf" and see the help strings that pop out, and try the subcommands and different combinations to get an idea of how things work. 15 | 16 | The basic concepts in the Clap yaml file are below, and always remember that yaml is very particular about indendation - so just use a good editor with a yaml extention so that the indendation etc.. gets adjusted for you automatically 17 | 18 | There is an 'args' keyword which basically means that "this is an argument", what follows is the name of the argument, and whether the argument is mandatory (required: true) or not (required:false). Inside the code, the value of that argument can be obtained by matches.value_of([argument name]). Also if the arg is mandatory and in a specific position, then you can choose to have the arg as a keyword-followed-by-value or just a value directly. For example "r2intf IFNAME" as seen in utils/r2intf/r2intf.yml is a mandatory argument and theres no key word for it .. You just type "r2intf eth0 [whatever else follows]" .. And in this case in r2intf/src/main.c, a call to matches.value_of("IFNAME") directly giveds the supplied interface name - eth0 in this example. If we want the keyword-value style of configurng, then the args has a 'long' / 'short' version of specifying the keyword. For example the args qlimit in rtintf.yml will be configured as 'r2intf eth0 [blah blah] --qlimit 100' - and in the code the qlimit will be retrieved as below 19 | 20 | if matches.is_present("qlimit") { 21 | qlimit = value_t!(matches, "qlimit", i32).unwrap_or_else(|e| e.exit()); 22 | } 23 | 24 | The value_t! macro is a convenient way to convert the text to a value of particular type in Rust. The other concept in the yaml file is a 'subcommand' - a subcommand is a choice between whether you want to excute A or B. For example in r2intf.yml, the options are 'add' and 'class' - so you can either add a new interface OR configure a qos class on an existing interface. Clap does not support multiple subcommands on the same line - at a time you can do only ONE of all the possible subcommands. 25 | 26 | A few other useful constructs are - 'takes_value' which says whether the keyword needs any value. Just the presence of a keyword like 'delete' is often sufficient to say what needs to be done without any value, so take_value can be false in that case. The other useful construct is 'requires' - you can say that if option A is configured, then option B and C also has to be configured, ie option A 'requires' B and C 27 | 28 | And to emphasise what we mentoined initially again - keep the CLIs small and simple. If the yaml file grows too big, it will reach a stage where no one can figure out whats doing what and how to modify anything. 29 | -------------------------------------------------------------------------------- /utils/r2cnt/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "r2cnt" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | 9 | counters = { path = "../../counters" } 10 | common = { path = "../../common" } 11 | -------------------------------------------------------------------------------- /utils/r2cnt/r2cnt.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "r2cnt" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Usage 10 | 11 | The usage is ./target/debug/r2cnt .. Today it just dumps all the counters, there needs to be more options added to filter out zero counters, filter counters bby name etc.. 12 | -------------------------------------------------------------------------------- /utils/r2cnt/src/main.rs: -------------------------------------------------------------------------------- 1 | use counters::CountersRO; 2 | use std::process; 3 | 4 | fn main() { 5 | if let Ok(counters) = CountersRO::new(common::R2CNT_SHM) { 6 | for (name, cntr) in counters.hash.iter() { 7 | print!("{}: ", name); 8 | for i in 0..cntr.num_cntrs() { 9 | print!("{} ", cntr.read(i)); 10 | } 11 | println!(); 12 | } 13 | } else { 14 | println!("No shared memory r2 found"); 15 | process::exit(1); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /utils/r2intf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "r2intf" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | clap = { version = "2.33.0", features = ["yaml"]} 10 | api = { path = "../../api" } 11 | common = { path = "../../common" } 12 | fwd = { path = "../../fwd" } 13 | apis_interface = { path = "../../apis/interface" } 14 | -------------------------------------------------------------------------------- /utils/r2intf/r2intf.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "r2intf" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Usage 10 | 11 | This utility is used for adding a new interface and configuring the interface parameters like ip address and QoS queues. Example usages of r2intf are below 12 | 13 | ## Add an interface 14 | 15 | Parameters are interface name, ifindex and mac address 16 | 17 | ./target/debug/r2intf eth0 add 0 8a:61:da:68:46:76 18 | 19 | ## Add an IP address 20 | 21 | Format is ipaddress/mask 22 | 23 | ./target/debug/r2intf eth0 ip 1.1.1.2/24 24 | 25 | ## Adding QoS classes 26 | 27 | Right now the scheduler supported is HFSC. You will have to get familiar with HFSC concepts of realtime (r), fair share (f) and upper limit (u) - and each of those varieties has a curve with parameters m1, m2, and d. So we configure a QoS class on the interface specifying a class name and a parent name and the parameters of interest above. The interface by default has a class called with name 'root', so the first class added will have a parent of name 'root' 28 | -------------------------------------------------------------------------------- /utils/r2intf/src/main.rs: -------------------------------------------------------------------------------- 1 | use api::api_client; 2 | use apis_interface::{CurvesApi, InterfaceSyncClient, ScApi, TInterfaceSyncClient}; 3 | use clap::ArgMatches; 4 | #[macro_use] 5 | extern crate clap; 6 | use clap::App; 7 | 8 | fn interface_add(ifname: String, ifindex: i32, mac: String) { 9 | let (i_prot, o_prot) = match api_client(common::API_SVR, common::INTF_APIS) { 10 | Ok((i, o)) => (i, o), 11 | Err(why) => { 12 | println!("Client connection failed: {}", why); 13 | return; 14 | } 15 | }; 16 | let mut client = InterfaceSyncClient::new(i_prot, o_prot); 17 | 18 | if let Err(e) = client.add_if(ifname, ifindex, mac) { 19 | println!("Add failed: {}", e); 20 | } 21 | } 22 | 23 | fn add_ip(ifname: String, ip_and_mask: String) { 24 | let (i_prot, o_prot) = match api_client(common::API_SVR, common::INTF_APIS) { 25 | Ok((i, o)) => (i, o), 26 | Err(why) => { 27 | println!("Client connection failed: {}", why); 28 | return; 29 | } 30 | }; 31 | let mut client = InterfaceSyncClient::new(i_prot, o_prot); 32 | 33 | if let Err(e) = client.add_ip(ifname, ip_and_mask) { 34 | println!("Add failed: {}", e); 35 | } 36 | } 37 | 38 | fn class_add_del( 39 | del: bool, 40 | ifname: &str, 41 | class: &str, 42 | parent: &str, 43 | qlimit: i32, 44 | leaf: bool, 45 | curves: CurvesApi, 46 | ) { 47 | let (i_prot, o_prot) = match api_client(common::API_SVR, common::INTF_APIS) { 48 | Ok((i, o)) => (i, o), 49 | Err(why) => { 50 | println!("Client connection failed: {}", why); 51 | return; 52 | } 53 | }; 54 | let mut client = InterfaceSyncClient::new(i_prot, o_prot); 55 | 56 | if !del { 57 | if let Err(e) = client.add_class( 58 | ifname.to_string(), 59 | class.to_string(), 60 | parent.to_string(), 61 | qlimit, 62 | leaf, 63 | curves, 64 | ) { 65 | println!("Add failed: {}", e); 66 | } 67 | } 68 | } 69 | 70 | fn class_parse_fsc(curves: &mut CurvesApi, matches: &ArgMatches) { 71 | if matches.is_present("fm1") { 72 | let m1 = value_t!(matches, "fm1", i32).unwrap_or_else(|e| e.exit()); 73 | let d = value_t!(matches, "fd", i32).unwrap_or_else(|e| e.exit()); 74 | let m2 = value_t!(matches, "fm2", i32).unwrap_or_else(|e| e.exit()); 75 | curves.f_sc = Some(ScApi { 76 | m1: Some(m1), 77 | d: Some(d), 78 | m2: Some(m2), 79 | }); 80 | } 81 | } 82 | 83 | fn class_parse_rsc(curves: &mut CurvesApi, matches: &ArgMatches) { 84 | if matches.is_present("rm1") { 85 | let m1 = value_t!(matches, "rm1", i32).unwrap_or_else(|e| e.exit()); 86 | let d = value_t!(matches, "rd", i32).unwrap_or_else(|e| e.exit()); 87 | let m2 = value_t!(matches, "rm2", i32).unwrap_or_else(|e| e.exit()); 88 | curves.r_sc = Some(ScApi { 89 | m1: Some(m1), 90 | d: Some(d), 91 | m2: Some(m2), 92 | }); 93 | } 94 | } 95 | 96 | fn class_parse_usc(curves: &mut CurvesApi, matches: &ArgMatches) { 97 | if matches.is_present("um1") { 98 | let m1 = value_t!(matches, "um1", i32).unwrap_or_else(|e| e.exit()); 99 | let d = value_t!(matches, "ud", i32).unwrap_or_else(|e| e.exit()); 100 | let m2 = value_t!(matches, "um2", i32).unwrap_or_else(|e| e.exit()); 101 | curves.u_sc = Some(ScApi { 102 | m1: Some(m1), 103 | d: Some(d), 104 | m2: Some(m2), 105 | }); 106 | } 107 | } 108 | 109 | fn class_subcmd(ifname: &str, matches: &ArgMatches) { 110 | let mut curves = CurvesApi { 111 | r_sc: None, 112 | u_sc: None, 113 | f_sc: None, 114 | }; 115 | let mut del = false; 116 | let mut leaf = false; 117 | let mut qlimit = 0; 118 | let class = matches.value_of("CLASS").unwrap(); 119 | let parent = matches.value_of("PARENT").unwrap(); 120 | if matches.is_present("delete") { 121 | del = true; 122 | } 123 | if matches.is_present("leaf") { 124 | leaf = true; 125 | } 126 | if matches.is_present("qlimit") { 127 | qlimit = value_t!(matches, "qlimit", i32).unwrap_or_else(|e| e.exit()); 128 | } 129 | class_parse_fsc(&mut curves, matches); 130 | class_parse_rsc(&mut curves, matches); 131 | class_parse_usc(&mut curves, matches); 132 | class_add_del(del, ifname, class, parent, qlimit, leaf, curves); 133 | } 134 | 135 | fn add_subcmd(ifname: &str, matches: &ArgMatches) { 136 | let ifindex = value_t!(matches, "IFINDEX", i32).unwrap_or_else(|e| e.exit()); 137 | let mac = value_t!(matches, "MAC", String).unwrap_or_else(|e| e.exit()); 138 | if fwd::str_to_mac(&mac).is_none() { 139 | println!("Bad Mac address {}", &mac); 140 | return; 141 | } 142 | interface_add(ifname.to_string(), ifindex, mac); 143 | } 144 | 145 | fn ip_subcmd(ifname: &str, matches: &ArgMatches) { 146 | let ip_and_mask = value_t!(matches, "IPMASK", String).unwrap_or_else(|e| e.exit()); 147 | if fwd::ip_mask_decode(&ip_and_mask).is_none() { 148 | println!("Bad IP/MASK {}", &ip_and_mask); 149 | return; 150 | } 151 | add_ip(ifname.to_string(), ip_and_mask); 152 | } 153 | 154 | fn main() { 155 | let yaml = load_yaml!("./r2intf.yml"); 156 | let matches = App::from(yaml).get_matches(); 157 | 158 | let ifname = matches.value_of("IFNAME").unwrap(); 159 | 160 | if let Some(matches) = matches.subcommand_matches("add") { 161 | add_subcmd(ifname, matches); 162 | } else if let Some(matches) = matches.subcommand_matches("class") { 163 | class_subcmd(ifname, matches); 164 | } else if let Some(matches) = matches.subcommand_matches("ip") { 165 | ip_subcmd(ifname, matches); 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /utils/r2intf/src/r2intf.yml: -------------------------------------------------------------------------------- 1 | name: r2intf 2 | version: "1.0" 3 | author: Gopa Kumar 4 | about: Configure interface related parameters 5 | args: 6 | - IFNAME: 7 | help: Interface Name 8 | required: true 9 | subcommands: 10 | - add: 11 | about: add interface 12 | args: 13 | - IFINDEX: 14 | help: Interface Index 15 | required: true 16 | - MAC: 17 | help: Mac address in aa:bb:cc:dd:ee:ff format 18 | required: true 19 | - ip: 20 | about: configure ip address 21 | args: 22 | - IPMASK: 23 | help: IP address and mask in format 1.1.1.1/24 for example 24 | required: true 25 | - class: 26 | about: add or delete qos classes 27 | args: 28 | - delete: 29 | long: del 30 | help: delete class ? default is add 31 | takes_value: false 32 | - CLASS: 33 | help: Class Name 34 | required: true 35 | - PARENT: 36 | help: Parent Class Name 37 | required: true 38 | - qlimit: 39 | long: qlimit 40 | required: false 41 | takes_value: true 42 | value_name: qlimit 43 | help: Queue limit 44 | - leaf: 45 | long: leaf 46 | required: false 47 | takes_value: false 48 | help: Is this class a leaf (ie packet queue) ? 49 | - fm1: 50 | long: fm1 51 | value_name: fm1 52 | required: true 53 | takes_value: true 54 | help: Mandatory fairshare m1 55 | - fd: 56 | long: fd 57 | value_name: fd 58 | required: true 59 | takes_value: true 60 | help: Mandatory fairshare d 61 | - fm2: 62 | long: fm2 63 | value_name: fm2 64 | required: true 65 | takes_value: true 66 | help: Mandatory fairshare m2 67 | - rm1: 68 | long: rm1 69 | value_name: rm1 70 | requires: 71 | - rd 72 | - rm2 73 | required: false 74 | takes_value: true 75 | help: Optional realtime m1 76 | - rd: 77 | long: rd 78 | value_name: rd 79 | requires: 80 | - rm1 81 | - rm2 82 | required: false 83 | takes_value: true 84 | help: Optional realtime d 85 | - rm2: 86 | long: rm2 87 | value_name: rm2 88 | requires: 89 | - rm1 90 | - rd 91 | required: false 92 | takes_value: true 93 | help: Optional realtime m2 94 | - um1: 95 | long: um1 96 | value_name: um1 97 | requires: 98 | - ud 99 | - um2 100 | required: false 101 | takes_value: true 102 | help: Optional upperlimit m1 103 | - ud: 104 | long: ud 105 | value_name: ud 106 | requires: 107 | - um1 108 | - um2 109 | required: false 110 | takes_value: true 111 | help: Optional upperlimit d 112 | - um2: 113 | long: um2 114 | value_name: um2 115 | requires: 116 | - um1 117 | - ud 118 | required: false 119 | takes_value: true 120 | help: Optional upperlimit m2 121 | -------------------------------------------------------------------------------- /utils/r2log/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "r2log" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | clap = "2.33.0" 10 | api = { path = "../../api" } 11 | common = { path = "../../common" } 12 | apis_log = { path = "../../apis/log" } 13 | -------------------------------------------------------------------------------- /utils/r2log/r2log.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "r2log" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Usage 10 | 11 | This just dumps the logs .. Usage is ./target/debug/r2log [optional filename] .. If no file name is provided, the logs are dumped to /tmp/r2_logs.json - note that as of today the individual thread's logs are dumped as /tmp/r2_logs.json:0 /tmp/r2_logs.json:1 etc.. Its a TBD to merge the logs into one file 12 | -------------------------------------------------------------------------------- /utils/r2log/src/main.rs: -------------------------------------------------------------------------------- 1 | use api::api_client; 2 | use apis_log::{LogSyncClient, TLogSyncClient}; 3 | #[macro_use] 4 | extern crate clap; 5 | use clap::App; 6 | 7 | fn show_logging(filename: &str) { 8 | let (i_prot, o_prot) = match api_client(common::API_SVR, common::LOG_APIS) { 9 | Ok((i, o)) => (i, o), 10 | Err(why) => { 11 | println!("Client connection failed: {}", why); 12 | return; 13 | } 14 | }; 15 | let mut client = LogSyncClient::new(i_prot, o_prot); 16 | 17 | match client.show(filename.to_string()) { 18 | Ok(result) => result, 19 | Err(why) => println!("Command failed: {}", why), 20 | } 21 | } 22 | 23 | fn main() { 24 | let yaml = load_yaml!("./r2log.yml"); 25 | let matches = App::from(yaml).get_matches(); 26 | 27 | if let Some(name) = matches.value_of("FILENAME") { 28 | show_logging(name); 29 | } else { 30 | println!("Writing logs to /tmp/r2_logs.json"); 31 | show_logging("/tmp/r2_logs.json"); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /utils/r2log/src/r2log.yml: -------------------------------------------------------------------------------- 1 | name: r2log 2 | version: "1.0" 3 | author: Gopa Kumar 4 | about: write logs to file 5 | args: 6 | - FILENAME: 7 | help: file name 8 | required: false 9 | -------------------------------------------------------------------------------- /utils/r2rt/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "r2rt" 3 | version = "0.1.0" 4 | authors = ["Gopa Kumar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | thrift = "0.13.0" 9 | clap = { version = "2.33.0", features = ["yaml"]} 10 | api = { path = "../../api" } 11 | common = { path = "../../common" } 12 | fwd = { path = "../../fwd" } 13 | apis_route = { path = "../../apis/route" } 14 | -------------------------------------------------------------------------------- /utils/r2rt/r2rt.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "r2rt" 3 | weight: 1 4 | type: docs 5 | description: > 6 | 7 | --- 8 | 9 | # Usage 10 | 11 | This utility is used for adding/deleting routes. Example usage is below - parameters are network, nexthop and interface 12 | 13 | ./target/debug/r2rt route 4.1.1.1/32 1.1.1.1 eth0 14 | -------------------------------------------------------------------------------- /utils/r2rt/src/main.rs: -------------------------------------------------------------------------------- 1 | use api::api_client; 2 | use apis_route::{RouteSyncClient, TRouteSyncClient}; 3 | #[macro_use] 4 | extern crate clap; 5 | use clap::App; 6 | use clap::ArgMatches; 7 | use fwd::ip_mask_decode; 8 | use std::net::Ipv4Addr; 9 | use std::str::FromStr; 10 | 11 | fn add_del_ip(ip_and_mask: &str, nhop: &str, ifname: &str, del: bool) { 12 | let (i_prot, o_prot) = match api_client(common::API_SVR, common::ROUTE_APIS) { 13 | Ok((i, o)) => (i, o), 14 | Err(why) => { 15 | println!("Client connection failed: {}", why); 16 | return; 17 | } 18 | }; 19 | let mut client = RouteSyncClient::new(i_prot, o_prot); 20 | 21 | let ret = if del { 22 | client.del_route( 23 | ip_and_mask.to_string(), 24 | nhop.to_string(), 25 | ifname.to_string(), 26 | ) 27 | } else { 28 | client.add_route( 29 | ip_and_mask.to_string(), 30 | nhop.to_string(), 31 | ifname.to_string(), 32 | ) 33 | }; 34 | if let Err(e) = ret { 35 | println!("Add failed: {}", e); 36 | } 37 | } 38 | 39 | fn show(prefix: &str, filename: &str) -> String { 40 | let (i_prot, o_prot) = match api_client(common::API_SVR, common::ROUTE_APIS) { 41 | Ok((i, o)) => (i, o), 42 | Err(why) => panic!("Client connection failed: {}", why), 43 | }; 44 | let mut client = RouteSyncClient::new(i_prot, o_prot); 45 | let ret = client.show(prefix.to_string(), filename.to_string()); 46 | if let Err(e) = ret { 47 | format!("Show failed: {}", e) 48 | } else { 49 | ret.unwrap() 50 | } 51 | } 52 | 53 | fn add_del_subcmd(matches: &ArgMatches) { 54 | let ip_mask = matches.value_of("IPMASK").unwrap(); 55 | let nhop = matches.value_of("NHOP").unwrap(); 56 | let ifname = matches.value_of("IFNAME").unwrap(); 57 | let del = matches.is_present("delete"); 58 | 59 | if ip_mask_decode(ip_mask).is_none() { 60 | println!("IP/Mask invalid"); 61 | return; 62 | } 63 | if Ipv4Addr::from_str(nhop).is_err() { 64 | println!("Nhop invalid"); 65 | return; 66 | } 67 | add_del_ip(ip_mask, nhop, ifname, del); 68 | } 69 | 70 | fn show_subcmd(matches: &ArgMatches) -> String { 71 | let prefix = matches.value_of("PREFIX").unwrap(); 72 | if prefix != "all" { 73 | if let Err(_n) = Ipv4Addr::from_str(prefix) { 74 | return "Prefix should be a valid ip address or keyword 'all'".to_string(); 75 | } 76 | } 77 | if let Some(name) = matches.value_of("FILENAME") { 78 | show(prefix, name); 79 | String::new() 80 | } else { 81 | if prefix == "all" { 82 | println!("Writing routes to file /tmp/r2_routes.json"); 83 | } 84 | show(prefix, "/tmp/r2_routes.json") 85 | } 86 | } 87 | 88 | fn main() { 89 | let yaml = load_yaml!("./r2rt.yml"); 90 | let matches = App::from(yaml).get_matches(); 91 | 92 | if let Some(matches) = matches.subcommand_matches("route") { 93 | add_del_subcmd(matches); 94 | } else if let Some(matches) = matches.subcommand_matches("show") { 95 | let show = show_subcmd(matches); 96 | println!("{}", show); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /utils/r2rt/src/r2rt.yml: -------------------------------------------------------------------------------- 1 | name: r2rt 2 | version: "1.0" 3 | author: Gopa Kumar 4 | about: Configure routing 5 | subcommands: 6 | - route: 7 | about: Add or delete routes 8 | args: 9 | - IPMASK: 10 | help: IP Address and Mask 11 | required: true 12 | - NHOP: 13 | help: Next Hop IP Address 14 | required: true 15 | - IFNAME: 16 | help: Interface Name 17 | required: true 18 | - delete: 19 | long: del 20 | help: delete route ? default is add 21 | takes_value: false 22 | - show: 23 | about: Show routes 24 | args: 25 | - PREFIX: 26 | help: Prefix to display, or "all" to show all routes 27 | required: true 28 | - FILENAME: 29 | help: file name to write routes to 30 | required: false 31 | --------------------------------------------------------------------------------