├── .gitignore ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── agent ├── Makefile ├── agent.c └── agent.h ├── bpfmap ├── Makefile ├── arraymap.c ├── arraymap.h ├── bpfmap.c ├── bpfmap.h ├── hashtab.c ├── hashtab.h ├── libghthash │ ├── ght_hash_table.h │ ├── hash_functions.c │ └── hash_table.c ├── lpm_trie.c ├── lpm_trie.h ├── test_arraymap.c ├── test_bpfmap.c └── test_hashmap.c ├── controller ├── cli.py ├── core │ ├── __init__.py │ ├── application.py │ ├── events.py │ ├── packets │ │ └── __init__.py │ └── protocol.py ├── ewma.py ├── flowarrival.py ├── interarrival.py ├── latency.py ├── learningswitch.py └── simpleswitch.py ├── dpdkswitch ├── Makefile ├── main.c └── meson.build ├── examples ├── Makefile ├── ewma.c ├── flowarrival.c ├── idps.c ├── ids.c ├── interarrival.c ├── latency.c ├── learningswitch.c ├── learningswitch_centralized.c ├── lpm.c ├── mirror.c ├── trafficcount.c ├── traffichist.c └── wire.c ├── includes ├── ebpf_consts.h ├── ebpf_functions.h ├── ebpf_packet.h └── ebpf_switch.h ├── mininet ├── 1sw_topo.py ├── 3sw_topo.py ├── eBPFSwitch.py └── idps.py ├── protocol ├── Function.proto ├── Header.proto ├── Hello.proto ├── Makefile ├── Notify.proto ├── Packet.proto └── Table.proto ├── softswitch ├── Makefile └── main.c ├── tools ├── disassembler.py ├── test.dot └── test.png └── ubpf ├── .gitignore ├── Makefile ├── ebpf.h ├── inc └── ubpf.h ├── test.c ├── ubpf_int.h ├── ubpf_jit_x86_64.c ├── ubpf_jit_x86_64.h ├── ubpf_loader.c └── ubpf_vm.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.o 3 | *.o.d 4 | *.o.cmd 5 | *.a 6 | 7 | softswitch/softswitch 8 | 9 | protocol/src/c 10 | protocol/src/python 11 | 12 | dpdkswitch/build 13 | 14 | examples/*.bin 15 | examples/*.asm 16 | 17 | bpfmap/test_arraymap 18 | bpfmap/test_bpfmap 19 | bpfmap/test_hashmap 20 | 21 | controller/latency.dat 22 | 23 | 24 | # OpenWRT package files 25 | ipkg-* 26 | .built 27 | .configured_yyyyyyyy 28 | .prepared 29 | .quilt_checked 30 | .source_dir 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TARGETS:=switch dpdkswitch-src examples-src 2 | 3 | all: $(TARGETS) 4 | 5 | bpfmap-src: 6 | cd bpfmap && $(MAKE) 7 | 8 | protocol-src: 9 | cd protocol && $(MAKE) 10 | 11 | ubpf-src: bpfmap-src 12 | cd ubpf && $(MAKE) 13 | 14 | agent-src: protocol-src bpfmap-src ubpf-src 15 | cd agent && $(MAKE) 16 | 17 | switch: agent-src 18 | cd softswitch && $(MAKE) 19 | 20 | dpdkswitch-src: agent-src 21 | cd dpdkswitch && $(MAKE) 22 | 23 | examples-src: 24 | cd examples && $(MAKE) 25 | 26 | clean: 27 | cd bpfmap && $(MAKE) clean 28 | cd ubpf && $(MAKE) clean 29 | cd agent && $(MAKE) clean 30 | cd protocol && $(MAKE) clean 31 | cd softswitch && $(MAKE) clean 32 | cd examples && $(MAKE) clean 33 | cd dpdkswitch && $(MAKE) clean 34 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | protobuf = "*" 8 | twisted = "*" 9 | 10 | [dev-packages] 11 | 12 | [requires] 13 | python_version = "3.10" 14 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "11e6f3bca0111a498bdc5061ed0ae8fea0a1a204a02d384e3e4254f88ccfc5b1" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.10" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "attrs": { 20 | "hashes": [ 21 | "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", 22 | "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1" 23 | ], 24 | "markers": "python_version >= '3.7'", 25 | "version": "==23.2.0" 26 | }, 27 | "automat": { 28 | "hashes": [ 29 | "sha256:c3164f8742b9dc440f3682482d32aaff7bb53f71740dd018533f9de286b64180", 30 | "sha256:e56beb84edad19dcc11d30e8d9b895f75deeb5ef5e96b84a467066b3b84bb04e" 31 | ], 32 | "version": "==22.10.0" 33 | }, 34 | "constantly": { 35 | "hashes": [ 36 | "sha256:3fd9b4d1c3dc1ec9757f3c52aef7e53ad9323dbe39f51dfd4c43853b68dfa3f9", 37 | "sha256:aa92b70a33e2ac0bb33cd745eb61776594dc48764b06c35e0efd050b7f1c7cbd" 38 | ], 39 | "markers": "python_version >= '3.8'", 40 | "version": "==23.10.4" 41 | }, 42 | "hyperlink": { 43 | "hashes": [ 44 | "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b", 45 | "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4" 46 | ], 47 | "version": "==21.0.0" 48 | }, 49 | "idna": { 50 | "hashes": [ 51 | "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", 52 | "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" 53 | ], 54 | "markers": "python_version >= '3.5'", 55 | "version": "==3.6" 56 | }, 57 | "incremental": { 58 | "hashes": [ 59 | "sha256:912feeb5e0f7e0188e6f42241d2f450002e11bbc0937c65865045854c24c0bd0", 60 | "sha256:b864a1f30885ee72c5ac2835a761b8fe8aa9c28b9395cacf27286602688d3e51" 61 | ], 62 | "version": "==22.10.0" 63 | }, 64 | "mininet": { 65 | "hashes": [ 66 | "sha256:2175d04efa37dd6df86d1ff80e1f55d0d388c0edd0c8f0fc467c52a72799a85b", 67 | "sha256:4d93983ab2709bf39764006d85b60b6ce1ee8897f8ebf1c3f848b7840f0632b8" 68 | ], 69 | "index": "pypi", 70 | "version": "==2.3.0.dev6" 71 | }, 72 | "protobuf": { 73 | "hashes": [ 74 | "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62", 75 | "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d", 76 | "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61", 77 | "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62", 78 | "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3", 79 | "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9", 80 | "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830", 81 | "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6", 82 | "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0", 83 | "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020", 84 | "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e" 85 | ], 86 | "index": "pypi", 87 | "markers": "python_version >= '3.8'", 88 | "version": "==4.25.2" 89 | }, 90 | "setuptools": { 91 | "hashes": [ 92 | "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05", 93 | "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78" 94 | ], 95 | "markers": "python_version >= '3.8'", 96 | "version": "==69.0.3" 97 | }, 98 | "six": { 99 | "hashes": [ 100 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", 101 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" 102 | ], 103 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 104 | "version": "==1.16.0" 105 | }, 106 | "twisted": { 107 | "hashes": [ 108 | "sha256:4ae8bce12999a35f7fe6443e7f1893e6fe09588c8d2bed9c35cdce8ff2d5b444", 109 | "sha256:987847a0790a2c597197613686e2784fd54167df3a55d0fb17c8412305d76ce5" 110 | ], 111 | "index": "pypi", 112 | "markers": "python_full_version >= '3.8.0'", 113 | "version": "==23.10.0" 114 | }, 115 | "typing-extensions": { 116 | "hashes": [ 117 | "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783", 118 | "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd" 119 | ], 120 | "markers": "python_version >= '3.8'", 121 | "version": "==4.9.0" 122 | }, 123 | "zope-interface": { 124 | "hashes": [ 125 | "sha256:0c8cf55261e15590065039696607f6c9c1aeda700ceee40c70478552d323b3ff", 126 | "sha256:13b7d0f2a67eb83c385880489dbb80145e9d344427b4262c49fbf2581677c11c", 127 | "sha256:1f294a15f7723fc0d3b40701ca9b446133ec713eafc1cc6afa7b3d98666ee1ac", 128 | "sha256:239a4a08525c080ff833560171d23b249f7f4d17fcbf9316ef4159f44997616f", 129 | "sha256:2f8d89721834524a813f37fa174bac074ec3d179858e4ad1b7efd4401f8ac45d", 130 | "sha256:2fdc7ccbd6eb6b7df5353012fbed6c3c5d04ceaca0038f75e601060e95345309", 131 | "sha256:34c15ca9248f2e095ef2e93af2d633358c5f048c49fbfddf5fdfc47d5e263736", 132 | "sha256:387545206c56b0315fbadb0431d5129c797f92dc59e276b3ce82db07ac1c6179", 133 | "sha256:43b576c34ef0c1f5a4981163b551a8781896f2a37f71b8655fd20b5af0386abb", 134 | "sha256:57d0a8ce40ce440f96a2c77824ee94bf0d0925e6089df7366c2272ccefcb7941", 135 | "sha256:5a804abc126b33824a44a7aa94f06cd211a18bbf31898ba04bd0924fbe9d282d", 136 | "sha256:67be3ca75012c6e9b109860820a8b6c9a84bfb036fbd1076246b98e56951ca92", 137 | "sha256:6af47f10cfc54c2ba2d825220f180cc1e2d4914d783d6fc0cd93d43d7bc1c78b", 138 | "sha256:6dc998f6de015723196a904045e5a2217f3590b62ea31990672e31fbc5370b41", 139 | "sha256:70d2cef1bf529bff41559be2de9d44d47b002f65e17f43c73ddefc92f32bf00f", 140 | "sha256:7ebc4d34e7620c4f0da7bf162c81978fce0ea820e4fa1e8fc40ee763839805f3", 141 | "sha256:964a7af27379ff4357dad1256d9f215047e70e93009e532d36dcb8909036033d", 142 | "sha256:97806e9ca3651588c1baaebb8d0c5ee3db95430b612db354c199b57378312ee8", 143 | "sha256:9b9bc671626281f6045ad61d93a60f52fd5e8209b1610972cf0ef1bbe6d808e3", 144 | "sha256:9ffdaa5290422ac0f1688cb8adb1b94ca56cee3ad11f29f2ae301df8aecba7d1", 145 | "sha256:a0da79117952a9a41253696ed3e8b560a425197d4e41634a23b1507efe3273f1", 146 | "sha256:a41f87bb93b8048fe866fa9e3d0c51e27fe55149035dcf5f43da4b56732c0a40", 147 | "sha256:aa6fd016e9644406d0a61313e50348c706e911dca29736a3266fc9e28ec4ca6d", 148 | "sha256:ad54ed57bdfa3254d23ae04a4b1ce405954969c1b0550cc2d1d2990e8b439de1", 149 | "sha256:b012d023b4fb59183909b45d7f97fb493ef7a46d2838a5e716e3155081894605", 150 | "sha256:b51b64432eed4c0744241e9ce5c70dcfecac866dff720e746d0a9c82f371dfa7", 151 | "sha256:bbe81def9cf3e46f16ce01d9bfd8bea595e06505e51b7baf45115c77352675fd", 152 | "sha256:c9559138690e1bd4ea6cd0954d22d1e9251e8025ce9ede5d0af0ceae4a401e43", 153 | "sha256:e30506bcb03de8983f78884807e4fd95d8db6e65b69257eea05d13d519b83ac0", 154 | "sha256:e33e86fd65f369f10608b08729c8f1c92ec7e0e485964670b4d2633a4812d36b", 155 | "sha256:e441e8b7d587af0414d25e8d05e27040d78581388eed4c54c30c0c91aad3a379", 156 | "sha256:e8bb9c990ca9027b4214fa543fd4025818dc95f8b7abce79d61dc8a2112b561a", 157 | "sha256:ef43ee91c193f827e49599e824385ec7c7f3cd152d74cb1dfe02cb135f264d83", 158 | "sha256:ef467d86d3cfde8b39ea1b35090208b0447caaabd38405420830f7fd85fbdd56", 159 | "sha256:f89b28772fc2562ed9ad871c865f5320ef761a7fcc188a935e21fe8b31a38ca9", 160 | "sha256:fddbab55a2473f1d3b8833ec6b7ac31e8211b0aa608df5ab09ce07f3727326de" 161 | ], 162 | "markers": "python_version >= '3.7'", 163 | "version": "==6.1" 164 | } 165 | }, 166 | "develop": {} 167 | } 168 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BPFabric - Netlab 2 | 3 | ## Description 4 | 5 | A programmable dataplane using the eBPF instruction set. 6 | 7 | ## Dependencies 8 | 9 | Tested and working with 10 | 11 | - Ubuntu 22.04.3 LTS - kernel 5.15.0-92-generic 12 | - clang 14.0.0 13 | 14 | ## Examples 15 | 16 | ### Learning Switch 17 | 18 | #### Description 19 | 20 | Very simple (legacy) learning switch that learn the source mac address and 21 | associated input port. If the port is unknown the packet is flooded. 22 | 23 | #### How-To 24 | 25 | Run a mininet topology and install the learningswitch.o eBPF elf to the 26 | switches. 27 | 28 | ```zsh 29 | # Run the mininet topology with three switches and 4 hosts 30 | cd mininet 31 | sudo ./3sw_topo.py 32 | ``` 33 | 34 | ```zsh 35 | # Run the interactive controller 36 | cd controller 37 | ./cli.py 38 | -------------------------------------------------------------------------------- 39 | eBPF Switch Controller Command Line Interface - Netlab 2016 40 | Simon Jouet - University of Glasgow 41 | -------------------------------------------------------------------------------- 42 | 43 | 44 | Documented commands (type help ): 45 | ======================================== 46 | help 47 | 48 | Undocumented commands: 49 | ====================== 50 | EOF connections 51 | 52 | (Cmd) Connection from switch 00000001, version 1 53 | Connection from switch 00000002, version 1 54 | Connection from switch 00000003, version 1 55 | 56 | (Cmd) connections 57 | 58 | dpid version connected at 59 | ========== ========= =============== 60 | 00000001 1 1467377575.33 61 | 00000002 1 1467377575.37 62 | 00000003 1 1467377575.4 63 | ========== ========= =============== 64 | 65 | (Cmd) 1 install ../examples/learningswitch.o 66 | (Cmd) 2 install ../examples/learningswitch.o 67 | (Cmd) 3 install ../examples/learningswitch.o 68 | ``` 69 | 70 | Try the connectivity in Mininet between the hosts 71 | 72 | ```zsh 73 | mininet> pingall 74 | *** Ping: testing ping reachability 75 | h1 -> h2 h3 h4 76 | h2 -> h1 h3 h4 77 | h3 -> h1 h2 h4 78 | h4 -> h1 h2 h3 79 | *** Results: 0% dropped (12/12 received) 80 | ``` 81 | 82 | Analyze the tables of the switches to see the ethernet addres to port mapping 83 | 84 | ```zsh 85 | # List the BPF tables available on switch 1 86 | (Cmd) 1 tables 87 | (Cmd) 88 | name type key size value size max entries 89 | ========= ====== ========== ============ ============= 90 | inports HASH 6 4 256 91 | ========= ====== ========== ============ ============= 92 | 93 | # List the entries in the inports table 94 | (Cmd) 1 table inports list 95 | (Cmd) 96 | Key Value 97 | ============== ========== 98 | 000400000000 00000000 99 | 000400000003 01000000 100 | 000400000002 01000000 101 | 000400000001 00000000 102 | ============== ========== 103 | ``` 104 | 105 | ### Centralized Learning Switch 106 | 107 | #### Description 108 | 109 | A simple centralized switch that will delegate the mac address to port mapping 110 | to the controller if the destination is unknown. 111 | 112 | #### How-To 113 | 114 | Run a mininet topology and run the controller responsible to reply to the 115 | PacketIn events from the switches. 116 | 117 | ```zsh 118 | # Start the mininet topology 119 | cd mininet 120 | sudo ./3sw_topo.py 121 | ``` 122 | 123 | ```zsh 124 | # Start the controller 125 | cd controller 126 | ./simpleswitch.py 127 | Connection from switch 00000001, version 1 128 | Installing the eBPF ELF 129 | Connection from switch 00000003, version 1 130 | Installing the eBPF ELF 131 | Connection from switch 00000002, version 1 132 | Installing the eBPF ELF 133 | 2 333300000002 ba776cdc9a6b 0x86dd 134 | 1 333300000002 ba776cdc9a6b 0x86dd 135 | 0 333300000002 ba776cdc9a6b 0x86dd 136 | 1 333300000002 000400000000 0x86dd 137 | Inserting entry in switch 2 000400000000 1 138 | [...] 139 | ``` 140 | 141 | ## Building BPFabric for OpenWRT routers 142 | 143 | More on the wiki at https://github.com/UofG-netlab/BPFabric/wiki/OpenWRT 144 | 145 | ## Issues 146 | 147 | - Error while compiling the examples `/usr/include/linux/types.h:4:10: fatal error: 'asm/types.h' file not found` 148 | - `sudo ln -s /usr/include/x86_64-linux-gnu/asm/ /usr/include/asm` 149 | - `sudo apt-get install g++-multilib` 150 | 151 | ## Debugging 152 | 153 | ### Disassembling eBPF 154 | 155 | ```zsh 156 | objcopy -I elf64-little -O binary --only-section=.text program.o program.bin 157 | ubpf/bin/ubpf-disassembler program.bin program.asm 158 | cat program.asm 159 | ``` 160 | 161 | ## Known Issues 162 | 163 | - String relocation is not working properly. The symbol in the ELF file is not containing the offset to the string table preventing the relocation of the string. 164 | 165 | ## Authors 166 | 167 | - Simon Jouet (simon.jouet@glasgow.ac.uk) 168 | - Dimitrios Pezaros (dimitrios.pezaros@glasgow.ac.uk) 169 | 170 | ## Acknowledgements 171 | 172 | - EPSRC EP/L026015/1: A Situtation-Aware Information Infrastructure 173 | -------------------------------------------------------------------------------- /agent/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS := -g -I../ubpf/inc -I../protocol/src/c -I../bpfmap -I../includes 2 | 3 | all: agent.o 4 | 5 | agent.o: agent.c 6 | $(CC) $(CFLAGS) -c $< 7 | $(AR) cr agent.a agent.o 8 | 9 | clean: 10 | $(RM) -f *.o *.a 11 | -------------------------------------------------------------------------------- /agent/agent.h: -------------------------------------------------------------------------------- 1 | #ifndef AGENT_H 2 | #define AGENT_H 3 | 4 | typedef void (*tx_packet_fn)(void *buf, int len, uint64_t out_port, int flags); 5 | 6 | struct agent_options 7 | { 8 | uint64_t dpid; 9 | char *controller; 10 | }; 11 | 12 | int agent_start(tx_packet_fn tx_fn, struct agent_options *opts); 13 | int agent_packetin(void *pkt, size_t len); 14 | int agent_stop(void); 15 | 16 | uint64_t pipeline_exec(void *pkt, size_t len); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /bpfmap/Makefile: -------------------------------------------------------------------------------- 1 | all:: 2 | 3 | INSTALL=install 4 | PREFIX=/usr/local 5 | 6 | -include bpfmap-env.mk 7 | 8 | all:: libbpfmap.a 9 | 10 | bpfmap_mods += bpfmap 11 | bpfmap_mods += arraymap 12 | bpfmap_mods += hashtab 13 | bpfmap_mods += lpm_trie 14 | bpfmap_mods += libghthash/hash_functions 15 | bpfmap_mods += libghthash/hash_table 16 | 17 | libbpfmap.a: $(bpfmap_mods:%=%.o) 18 | $(AR) r '$@' $(bpfmap_mods:%=%.o) 19 | 20 | clean:: 21 | $(RM) *.o */*.o *.a 22 | 23 | install:: install-headers install-libraries 24 | 25 | install-headers:: 26 | $(INSTALL) -m 755 -d $(PREFIX)/include 27 | $(INSTALL) -m 644 bpfmap.h $(PREFIX)/include 28 | 29 | install-libraries:: 30 | $(INSTALL) -m 755 -d $(PREFIX)/lib 31 | $(INSTALL) -m 644 libbpfmap.a $(PREFIX)/lib 32 | -------------------------------------------------------------------------------- /bpfmap/arraymap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "arraymap.h" 8 | 9 | struct bpf_map *array_map_alloc(union bpf_attr *attr) 10 | { 11 | struct bpf_array *array; 12 | uint64_t array_size; 13 | uint32_t elem_size; 14 | 15 | /* check sanity of attributes */ 16 | if (attr->max_entries == 0 || attr->key_size != 4 || 17 | attr->value_size == 0 || attr->map_flags) 18 | { 19 | errno = EINVAL; 20 | return NULL; 21 | } 22 | 23 | elem_size = round_up(attr->value_size, 8); 24 | 25 | /* allocate all map elements and zero-initialize them */ 26 | array = calloc(attr->max_entries * elem_size, sizeof(*array)); 27 | if (!array) 28 | { 29 | errno = ENOMEM; 30 | return NULL; 31 | } 32 | 33 | /* copy mandatory map attributes */ 34 | array->map.map_type = attr->map_type; 35 | array->map.key_size = attr->key_size; 36 | array->map.value_size = attr->value_size; 37 | array->map.max_entries = attr->max_entries; 38 | array->elem_size = elem_size; 39 | 40 | return &array->map; 41 | } 42 | 43 | void array_map_free(struct bpf_map *map) 44 | { 45 | struct bpf_array *array = container_of(map, struct bpf_array, map); 46 | 47 | free(array); 48 | } 49 | 50 | void *array_map_lookup_elem(struct bpf_map *map, void *key) 51 | { 52 | struct bpf_array *array = container_of(map, struct bpf_array, map); 53 | uint32_t index = *(uint32_t *)key; 54 | 55 | if (index >= array->map.max_entries) 56 | return NULL; 57 | 58 | return array->value + array->elem_size * index; 59 | } 60 | 61 | int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 62 | { 63 | struct bpf_array *array = container_of(map, struct bpf_array, map); 64 | uint32_t index = *(uint32_t *)key; 65 | uint32_t *next = (uint32_t *)next_key; 66 | 67 | if (index >= array->map.max_entries) 68 | { 69 | *next = 0; 70 | return 0; 71 | } 72 | 73 | if (index == array->map.max_entries - 1) 74 | { 75 | errno = ENOENT; 76 | return -1; 77 | } 78 | 79 | *next = index + 1; 80 | return 0; 81 | } 82 | 83 | int array_map_update_elem(struct bpf_map *map, void *key, void *value, 84 | uint64_t map_flags) 85 | { 86 | struct bpf_array *array = container_of(map, struct bpf_array, map); 87 | uint32_t index = *(uint32_t *)key; 88 | 89 | if (map_flags > BPF_EXIST) 90 | { 91 | /* unknown flags */ 92 | errno = EINVAL; 93 | return -1; 94 | } 95 | 96 | if (index >= array->map.max_entries) 97 | { 98 | /* all elements were pre-allocated, cannot insert a new one */ 99 | errno = E2BIG; 100 | return -1; 101 | } 102 | 103 | if (map_flags == BPF_NOEXIST) 104 | { 105 | /* all elements already exist */ 106 | errno = EEXIST; 107 | return -1; 108 | } 109 | 110 | memcpy(array->value + array->elem_size * index, 111 | value, map->value_size); 112 | 113 | return 0; 114 | } 115 | 116 | int array_map_delete_elem(struct bpf_map *map, void *key) 117 | { 118 | errno = EINVAL; 119 | return -1; 120 | } 121 | -------------------------------------------------------------------------------- /bpfmap/arraymap.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_ARRAYMAP_H 2 | #define __EBPF_ARRAYMAP_H 3 | 4 | #include "bpfmap.h" 5 | 6 | struct bpf_map *array_map_alloc(union bpf_attr *attr); 7 | void array_map_free(struct bpf_map *map); 8 | void *array_map_lookup_elem(struct bpf_map *map, void *key); 9 | int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key); 10 | int array_map_update_elem(struct bpf_map *map, void *key, void *value, uint64_t map_flags); 11 | int array_map_delete_elem(struct bpf_map *map, void *key); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /bpfmap/bpfmap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "bpfmap.h" 5 | #include "arraymap.h" 6 | #include "hashtab.h" 7 | #include "lpm_trie.h" 8 | 9 | #define MAX_MAPS 128 10 | 11 | struct bpf_map *bpf_maps[MAX_MAPS] = {0}; 12 | 13 | const struct bpf_map_ops bpf_map_types[] = { 14 | [BPF_MAP_TYPE_HASH] = { 15 | .map_alloc = htab_map_alloc, 16 | .map_free = htab_map_free, 17 | .map_get_next_key = htab_map_get_next_key, 18 | .map_lookup_elem = htab_map_lookup_elem, 19 | .map_update_elem = htab_map_update_elem, 20 | .map_delete_elem = htab_map_delete_elem, 21 | }, 22 | [BPF_MAP_TYPE_ARRAY] = { 23 | .map_alloc = array_map_alloc, 24 | .map_free = array_map_free, 25 | .map_get_next_key = array_map_get_next_key, 26 | .map_lookup_elem = array_map_lookup_elem, 27 | .map_update_elem = array_map_update_elem, 28 | .map_delete_elem = array_map_delete_elem, 29 | }, 30 | [BPF_MAP_TYPE_LPM_TRIE] = { 31 | .map_alloc = trie_alloc, 32 | .map_free = trie_free, 33 | .map_get_next_key = trie_get_next_key, 34 | .map_lookup_elem = trie_lookup_elem, 35 | .map_update_elem = trie_update_elem, 36 | .map_delete_elem = trie_delete_elem, 37 | }}; 38 | 39 | int bpf_create_map(enum bpf_map_type map_type, uint32_t key_size, uint32_t value_size, uint32_t max_entries, uint64_t flags) 40 | { 41 | union bpf_attr attr; 42 | 43 | memset(&attr, 0, sizeof(attr)); 44 | 45 | attr.map_type = map_type; 46 | attr.key_size = key_size; 47 | attr.value_size = value_size; 48 | attr.max_entries = max_entries; 49 | attr.map_flags = flags; 50 | 51 | // 52 | const struct bpf_map_ops *map_type_ops = &bpf_map_types[map_type]; 53 | struct bpf_map *map; 54 | 55 | map = map_type_ops->map_alloc(&attr); 56 | if (map == NULL) 57 | { 58 | return -1; 59 | } 60 | 61 | map->ops = map_type_ops; 62 | 63 | // find a free idx for this map 64 | int map_idx = -1; 65 | for (int i = 0; i < MAX_MAPS; i++) 66 | { 67 | if (bpf_maps[i] == NULL) 68 | { 69 | map_idx = i; 70 | bpf_maps[map_idx] = map; 71 | break; 72 | } 73 | } 74 | 75 | return map_idx; 76 | } 77 | 78 | void bpf_free_map(int map) 79 | { 80 | struct bpf_map *m = bpf_maps[map]; 81 | m->ops->map_free(m); 82 | bpf_maps[map] = NULL; 83 | } 84 | 85 | int bpf_update_elem(int map, void *key, void *value, unsigned long long flags) 86 | { 87 | struct bpf_map *m = bpf_maps[map]; 88 | return m->ops->map_update_elem(m, key, value, flags); 89 | } 90 | 91 | int bpf_lookup_elem(int map, void *key, void *value) 92 | { 93 | void **v = value; 94 | *v = NULL; 95 | 96 | struct bpf_map *m = bpf_maps[map]; 97 | *v = m->ops->map_lookup_elem(m, key); 98 | if (*v == NULL) 99 | { 100 | return -1; 101 | } 102 | 103 | return 0; 104 | } 105 | 106 | int bpf_delete_elem(int map, void *key) 107 | { 108 | struct bpf_map *m = bpf_maps[map]; 109 | return m->ops->map_delete_elem(m, key); 110 | } 111 | 112 | int bpf_get_next_key(int map, void *key, void *next_key) 113 | { 114 | struct bpf_map *m = bpf_maps[map]; 115 | return m->ops->map_get_next_key(m, key, next_key); 116 | } 117 | -------------------------------------------------------------------------------- /bpfmap/bpfmap.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_BPFMAP_H 2 | #define __EBPF_BPFMAP_H 3 | 4 | #include 5 | 6 | /* flags for BPF_MAP_UPDATE_ELEM command */ 7 | #define BPF_ANY 0 /* create new element or update existing */ 8 | #define BPF_NOEXIST 1 /* create new element if it didn't exist */ 9 | #define BPF_EXIST 2 /* update existing element */ 10 | 11 | #define BPF_F_NO_PREALLOC (1U << 0) 12 | 13 | #define __round_mask(x, y) ((__typeof__(x))((y)-1)) 14 | #define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1) 15 | #define round_down(x, y) ((x) & ~__round_mask(x, y)) 16 | 17 | /** 18 | * container_of - cast a member of a structure out to the containing structure 19 | * @ptr: the pointer to the member. 20 | * @type: the type of the container struct this is embedded in. 21 | * @member: the name of the member within the struct. 22 | * 23 | */ 24 | #define container_of(ptr, type, member) ({ \ 25 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 26 | (type *)( (char *)__mptr - offsetof(type,member) ); }) 27 | 28 | // #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 29 | 30 | enum bpf_map_type 31 | { 32 | BPF_MAP_TYPE_UNSPEC = 0, 33 | BPF_MAP_TYPE_HASH = 1, 34 | BPF_MAP_TYPE_ARRAY = 2, 35 | BPF_MAP_TYPE_LPM_TRIE = 11, 36 | }; 37 | 38 | union bpf_attr 39 | { 40 | struct 41 | { /* anonymous struct used by BPF_MAP_CREATE command */ 42 | uint32_t map_type; /* one of enum bpf_map_type */ 43 | uint32_t key_size; /* size of key in bytes */ 44 | uint32_t value_size; /* size of value in bytes */ 45 | uint32_t max_entries; /* max number of entries in a map */ 46 | uint32_t map_flags; /* prealloc or not */ 47 | }; 48 | 49 | struct 50 | { /* anonymous struct used by BPF_MAP_*_ELEM commands */ 51 | uint32_t map_fd; 52 | uint64_t key; 53 | union 54 | { 55 | uint64_t value; 56 | uint64_t next_key; 57 | }; 58 | uint64_t flags; 59 | }; 60 | } __attribute__((aligned(8))); 61 | 62 | struct bpf_map_ops 63 | { 64 | struct bpf_map *(*map_alloc)(union bpf_attr *attr); 65 | void (*map_release)(struct bpf_map *map); 66 | void (*map_free)(struct bpf_map *map); 67 | int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); 68 | 69 | void *(*map_lookup_elem)(struct bpf_map *map, void *key); 70 | int (*map_update_elem)(struct bpf_map *map, void *key, void *value, uint64_t flags); 71 | int (*map_delete_elem)(struct bpf_map *map, void *key); 72 | }; 73 | 74 | struct bpf_map 75 | { 76 | // atomic_t refcnt; 77 | enum bpf_map_type map_type; 78 | uint32_t key_size; 79 | uint32_t value_size; 80 | uint32_t max_entries; 81 | uint32_t map_flags; 82 | uint32_t pages; 83 | // struct user_struct *user; 84 | const struct bpf_map_ops *ops; 85 | // struct work_struct work; 86 | // atomic_t usercnt; 87 | }; 88 | 89 | struct bpf_array 90 | { 91 | struct bpf_map map; 92 | uint32_t elem_size; 93 | 94 | union 95 | { 96 | char value[0] __attribute__((aligned(8))); 97 | void *ptrs[0] __attribute__((aligned(8))); 98 | }; 99 | }; 100 | 101 | int bpf_create_map(enum bpf_map_type map_type, uint32_t key_size, uint32_t value_size, uint32_t max_entries, uint64_t flags); 102 | void bpf_free_map(int map); 103 | 104 | int bpf_update_elem(int map, void *key, void *value, unsigned long long flags); 105 | int bpf_lookup_elem(int map, void *key, void *value); 106 | int bpf_delete_elem(int map, void *key); 107 | int bpf_get_next_key(int map, void *key, void *next_key); 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /bpfmap/hashtab.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "libghthash/ght_hash_table.h" 8 | #include "bpfmap.h" 9 | 10 | /* each htab element is struct htab_elem + key + value */ 11 | struct htab_elem 12 | { 13 | char key[0] __attribute__((aligned(8))); 14 | }; 15 | 16 | struct bpf_htab 17 | { 18 | struct bpf_map map; 19 | // struct bucket *buckets; 20 | void *elems; 21 | 22 | ght_hash_table_t *htab; 23 | 24 | ght_iterator_t iterator; 25 | struct htab_elem *current; 26 | 27 | // atomic_t count; /* number of elements in this hashtable */ 28 | uint32_t n_buckets; /* number of hash buckets */ 29 | uint32_t elem_size; /* size of each element in bytes */ 30 | }; 31 | 32 | struct bpf_map *htab_map_alloc(union bpf_attr *attr) 33 | { 34 | struct bpf_htab *htab; 35 | int err, i; 36 | uint64_t cost; 37 | 38 | if (attr->map_flags & ~BPF_F_NO_PREALLOC) 39 | { 40 | /* reserved bits should not be used */ 41 | errno = EINVAL; 42 | return NULL; 43 | } 44 | 45 | htab = calloc(1, sizeof(*htab)); 46 | if (!htab) 47 | { 48 | errno = ENOMEM; 49 | return NULL; 50 | } 51 | 52 | /* mandatory map attributes */ 53 | htab->map.map_type = attr->map_type; 54 | htab->map.key_size = attr->key_size; 55 | htab->map.value_size = attr->value_size; 56 | htab->map.max_entries = attr->max_entries; 57 | htab->map.map_flags = attr->map_flags; 58 | 59 | /* check sanity of attributes. 60 | * value_size == 0 may be allowed in the future to use map as a set 61 | */ 62 | if (htab->map.max_entries == 0 || htab->map.key_size == 0 || 63 | htab->map.value_size == 0) 64 | goto free_htab; 65 | 66 | htab->htab = ght_create(htab->map.max_entries); 67 | if (htab->htab == NULL) 68 | goto free_htab; 69 | 70 | htab->elem_size = sizeof(struct htab_elem) + 71 | round_up(htab->map.key_size, 8) + 72 | round_up(htab->map.value_size, 8); 73 | 74 | return &htab->map; 75 | 76 | free_htab: 77 | free(htab); 78 | errno = EINVAL; 79 | return NULL; 80 | } 81 | 82 | void *htab_map_lookup_elem(struct bpf_map *map, void *key) 83 | { 84 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 85 | struct htab_elem *l = ght_get(htab->htab, map->key_size, key); 86 | 87 | if (l == NULL) 88 | { 89 | errno = ENOENT; 90 | return NULL; 91 | } 92 | 93 | return l->key + round_up(map->key_size, 8); 94 | } 95 | 96 | int htab_map_update_elem(struct bpf_map *map, void *key, void *value, 97 | uint64_t map_flags) 98 | { 99 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 100 | struct htab_elem *l_old; 101 | struct htab_elem *l_new; 102 | 103 | l_old = ght_get(htab->htab, map->key_size, key); 104 | if (l_old != NULL) 105 | { 106 | // Already have an entry in the map just update it 107 | memcpy(l_old->key + round_up(map->key_size, 8), value, map->value_size); 108 | 109 | return 0; 110 | } 111 | 112 | // Allocate the new element 113 | l_new = calloc(1, htab->elem_size); 114 | if (l_new == NULL) 115 | { 116 | errno = ENOMEM; 117 | return -1; 118 | } 119 | 120 | memcpy(l_new->key, key, map->key_size); 121 | memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size); 122 | 123 | return ght_insert(htab->htab, l_new->key, map->key_size, l_new); 124 | } 125 | 126 | int htab_map_delete_elem(struct bpf_map *map, void *key) 127 | { 128 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 129 | struct htab_elem *l; 130 | 131 | l = ght_remove(htab->htab, map->key_size, key); 132 | if (l) 133 | { 134 | free(l); 135 | return 0; 136 | } 137 | 138 | errno = ENOENT; 139 | return -1; 140 | } 141 | 142 | void htab_map_free(struct bpf_map *map) 143 | { 144 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 145 | 146 | ght_iterator_t iterator; 147 | const void *p_key; 148 | void *p_e; 149 | for (p_e = ght_first(htab->htab, &iterator, &p_key); p_e; p_e = ght_next(htab->htab, &iterator, &p_key)) 150 | { 151 | free(p_e); 152 | } 153 | 154 | ght_finalize(htab->htab); 155 | 156 | free(htab); 157 | } 158 | 159 | int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 160 | { 161 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 162 | struct htab_elem *l; 163 | const void *p_key; 164 | 165 | // If current is equal to key then continue iterating 166 | // Otherwise, initialize iterator, get(key), if exist, iterate iterator until key 167 | 168 | if (htab->current != NULL && memcmp(htab->current->key, key, map->key_size) == 0) 169 | { 170 | htab->current = ght_next(htab->htab, &htab->iterator, &p_key); 171 | } 172 | else 173 | { 174 | htab->current = ght_first(htab->htab, &htab->iterator, &p_key); 175 | 176 | void *l = ght_get(htab->htab, map->key_size, key); 177 | if (l != NULL) 178 | { 179 | while (memcmp(p_key, key, map->key_size) != 0) 180 | { 181 | // found the item we were looking for 182 | htab->current = ght_next(htab->htab, &htab->iterator, &p_key); 183 | } 184 | 185 | // get the next item 186 | htab->current = ght_next(htab->htab, &htab->iterator, &p_key); 187 | } 188 | } 189 | 190 | if (htab->current == NULL) 191 | { 192 | errno = ENOENT; 193 | return -1; 194 | } 195 | 196 | memcpy(next_key, p_key, map->key_size); 197 | return 0; 198 | } 199 | -------------------------------------------------------------------------------- /bpfmap/hashtab.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_HASHTAB_H 2 | #define __EBPF_HASHTAB_H 3 | 4 | #include "bpfmap.h" 5 | 6 | struct bpf_map *htab_map_alloc(union bpf_attr *attr); 7 | void *htab_map_lookup_elem(struct bpf_map *map, void *key); 8 | int htab_map_update_elem(struct bpf_map *map, void *key, void *value, uint64_t map_flags); 9 | int htab_map_delete_elem(struct bpf_map *map, void *key); 10 | void htab_map_free(struct bpf_map *map); 11 | int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /bpfmap/libghthash/hash_functions.c: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * 3 | * Copyright (C) 2001-2002, Simon Kagstrom 4 | * 5 | * Filename: hash_functions.c 6 | * Description: Hash functions 7 | * 8 | * This program is free software; you can redistribute it and/or 9 | * modify it under the terms of the GNU Library General Public License 10 | * as published by the Free Software Foundation; either version 2 11 | * of the License, or (at your option) any later version. 12 | * 13 | * This program is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU Library General Public 19 | * License along with this program; if not, write to the Free Software 20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 21 | * 02111-1307, USA. 22 | * 23 | * $Id: hash_functions.c 2174 2005-03-18 07:00:30Z ska $ 24 | * 25 | ********************************************************************/ 26 | #include 27 | 28 | #include "ght_hash_table.h" 29 | 30 | static ght_uint32_t crc32_table[256] = 31 | { 32 | 0x00000000,0x04c11db7,0x09823b6e,0x0d4326d9,0x130476dc,0x17c56b6b,0x1a864db2,0x1e475005, 33 | 0x2608edb8,0x22c9f00f,0x2f8ad6d6,0x2b4bcb61,0x350c9b64,0x31cd86d3,0x3c8ea00a,0x384fbdbd, 34 | 0x4c11db70,0x48d0c6c7,0x4593e01e,0x4152fda9,0x5f15adac,0x5bd4b01b,0x569796c2,0x52568b75, 35 | 0x6a1936c8,0x6ed82b7f,0x639b0da6,0x675a1011,0x791d4014,0x7ddc5da3,0x709f7b7a,0x745e66cd, 36 | 0x9823b6e0,0x9ce2ab57,0x91a18d8e,0x95609039,0x8b27c03c,0x8fe6dd8b,0x82a5fb52,0x8664e6e5, 37 | 0xbe2b5b58,0xbaea46ef,0xb7a96036,0xb3687d81,0xad2f2d84,0xa9ee3033,0xa4ad16ea,0xa06c0b5d, 38 | 0xd4326d90,0xd0f37027,0xddb056fe,0xd9714b49,0xc7361b4c,0xc3f706fb,0xceb42022,0xca753d95, 39 | 0xf23a8028,0xf6fb9d9f,0xfbb8bb46,0xff79a6f1,0xe13ef6f4,0xe5ffeb43,0xe8bccd9a,0xec7dd02d, 40 | 0x34867077,0x30476dc0,0x3d044b19,0x39c556ae,0x278206ab,0x23431b1c,0x2e003dc5,0x2ac12072, 41 | 0x128e9dcf,0x164f8078,0x1b0ca6a1,0x1fcdbb16,0x018aeb13,0x054bf6a4,0x0808d07d,0x0cc9cdca, 42 | 0x7897ab07,0x7c56b6b0,0x71159069,0x75d48dde,0x6b93dddb,0x6f52c06c,0x6211e6b5,0x66d0fb02, 43 | 0x5e9f46bf,0x5a5e5b08,0x571d7dd1,0x53dc6066,0x4d9b3063,0x495a2dd4,0x44190b0d,0x40d816ba, 44 | 0xaca5c697,0xa864db20,0xa527fdf9,0xa1e6e04e,0xbfa1b04b,0xbb60adfc,0xb6238b25,0xb2e29692, 45 | 0x8aad2b2f,0x8e6c3698,0x832f1041,0x87ee0df6,0x99a95df3,0x9d684044,0x902b669d,0x94ea7b2a, 46 | 0xe0b41de7,0xe4750050,0xe9362689,0xedf73b3e,0xf3b06b3b,0xf771768c,0xfa325055,0xfef34de2, 47 | 0xc6bcf05f,0xc27dede8,0xcf3ecb31,0xcbffd686,0xd5b88683,0xd1799b34,0xdc3abded,0xd8fba05a, 48 | 0x690ce0ee,0x6dcdfd59,0x608edb80,0x644fc637,0x7a089632,0x7ec98b85,0x738aad5c,0x774bb0eb, 49 | 0x4f040d56,0x4bc510e1,0x46863638,0x42472b8f,0x5c007b8a,0x58c1663d,0x558240e4,0x51435d53, 50 | 0x251d3b9e,0x21dc2629,0x2c9f00f0,0x285e1d47,0x36194d42,0x32d850f5,0x3f9b762c,0x3b5a6b9b, 51 | 0x0315d626,0x07d4cb91,0x0a97ed48,0x0e56f0ff,0x1011a0fa,0x14d0bd4d,0x19939b94,0x1d528623, 52 | 0xf12f560e,0xf5ee4bb9,0xf8ad6d60,0xfc6c70d7,0xe22b20d2,0xe6ea3d65,0xeba91bbc,0xef68060b, 53 | 0xd727bbb6,0xd3e6a601,0xdea580d8,0xda649d6f,0xc423cd6a,0xc0e2d0dd,0xcda1f604,0xc960ebb3, 54 | 0xbd3e8d7e,0xb9ff90c9,0xb4bcb610,0xb07daba7,0xae3afba2,0xaafbe615,0xa7b8c0cc,0xa379dd7b, 55 | 0x9b3660c6,0x9ff77d71,0x92b45ba8,0x9675461f,0x8832161a,0x8cf30bad,0x81b02d74,0x857130c3, 56 | 0x5d8a9099,0x594b8d2e,0x5408abf7,0x50c9b640,0x4e8ee645,0x4a4ffbf2,0x470cdd2b,0x43cdc09c, 57 | 0x7b827d21,0x7f436096,0x7200464f,0x76c15bf8,0x68860bfd,0x6c47164a,0x61043093,0x65c52d24, 58 | 0x119b4be9,0x155a565e,0x18197087,0x1cd86d30,0x029f3d35,0x065e2082,0x0b1d065b,0x0fdc1bec, 59 | 0x3793a651,0x3352bbe6,0x3e119d3f,0x3ad08088,0x2497d08d,0x2056cd3a,0x2d15ebe3,0x29d4f654, 60 | 0xc5a92679,0xc1683bce,0xcc2b1d17,0xc8ea00a0,0xd6ad50a5,0xd26c4d12,0xdf2f6bcb,0xdbee767c, 61 | 0xe3a1cbc1,0xe760d676,0xea23f0af,0xeee2ed18,0xf0a5bd1d,0xf464a0aa,0xf9278673,0xfde69bc4, 62 | 0x89b8fd09,0x8d79e0be,0x803ac667,0x84fbdbd0,0x9abc8bd5,0x9e7d9662,0x933eb0bb,0x97ffad0c, 63 | 0xafb010b1,0xab710d06,0xa6322bdf,0xa2f33668,0xbcb4666d,0xb8757bda,0xb5365d03,0xb1f740b4 64 | }; 65 | 66 | /* One-at-a-time hash (found in a web article from ddj), this is the 67 | * standard hash function. 68 | * 69 | * See http://burtleburtle.net/bob/hash/doobs.html 70 | * for the hash functions used here. 71 | */ 72 | ght_uint32_t ght_one_at_a_time_hash(ght_hash_key_t *p_key) 73 | { 74 | ght_uint32_t i_hash=0; 75 | int i; 76 | 77 | assert(p_key); 78 | 79 | for (i=0; ii_size; ++i) 80 | { 81 | i_hash += ((unsigned char*)p_key->p_key)[i]; 82 | i_hash += (i_hash << 10); 83 | i_hash ^= (i_hash >> 6); 84 | } 85 | i_hash += (i_hash << 3); 86 | i_hash ^= (i_hash >> 11); 87 | i_hash += (i_hash << 15); 88 | 89 | return i_hash; 90 | } 91 | 92 | 93 | /* CRC32 hash based on code from comp.compression FAQ. 94 | * Added by Dru Lemley 95 | */ 96 | ght_uint32_t ght_crc_hash(ght_hash_key_t *p_key) 97 | { 98 | unsigned char *p, *p_end; 99 | ght_uint32_t crc; 100 | 101 | assert(p_key); 102 | 103 | crc = 0xffffffff; /* preload shift register, per CRC-32 spec */ 104 | p = (unsigned char *)p_key->p_key; 105 | p_end = p + p_key->i_size; 106 | while (p < p_end) 107 | crc = (crc << 8) ^ crc32_table[(crc >> 24) ^ *(p++)]; 108 | return ~crc; /* transmit complement, per CRC-32 spec */ 109 | } 110 | 111 | /* Rotating hash function. */ 112 | ght_uint32_t ght_rotating_hash(ght_hash_key_t *p_key) 113 | { 114 | ght_uint32_t i_hash=0; 115 | int i; 116 | 117 | assert(p_key); 118 | 119 | for (i=0; ii_size; ++i) 120 | { 121 | i_hash = (i_hash<<4)^(i_hash>>28)^((unsigned char*)p_key->p_key)[i]; 122 | } 123 | 124 | return i_hash; 125 | } 126 | -------------------------------------------------------------------------------- /bpfmap/lpm_trie.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_LPM_TRIE_H 2 | #define __EBPF_LPM_TRIE_H 3 | 4 | #include "bpfmap.h" 5 | #include 6 | 7 | struct bpf_map *trie_alloc(union bpf_attr *attr); 8 | void *trie_lookup_elem(struct bpf_map *map, void *key); 9 | int trie_update_elem(struct bpf_map *map, void *key, void *value, uint64_t map_flags); 10 | int trie_delete_elem(struct bpf_map *map, void *key); 11 | void trie_free(struct bpf_map *map); 12 | int trie_get_next_key(struct bpf_map *map, void *key, void *next_key); 13 | 14 | struct bpf_lpm_trie_key 15 | { 16 | uint32_t prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ 17 | uint8_t data[0]; /* Arbitrary size */ 18 | }; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /bpfmap/test_arraymap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "arraymap.h" 4 | 5 | struct ewma_stats { 6 | uint64_t volume; 7 | uint64_t packets; 8 | uint64_t prediction; 9 | uint32_t lasttime; 10 | uint32_t count; 11 | }; 12 | 13 | int main() { 14 | union bpf_attr attr = { 15 | .map_type = BPF_MAP_TYPE_ARRAY, 16 | .key_size = sizeof(uint32_t), 17 | .value_size = sizeof(struct ewma_stats), 18 | .max_entries = 20, 19 | .map_flags = 0, 20 | }; 21 | 22 | // static struct bpf_map *array_map_alloc(union bpf_attr *attr); 23 | struct bpf_map *array_map; 24 | array_map = array_map_alloc(&attr); 25 | 26 | if (array_map == NULL) { 27 | printf("Error creating the array map\n"); 28 | return EXIT_FAILURE; 29 | } 30 | 31 | uint32_t key1 = 0; 32 | struct ewma_stats *stats; 33 | 34 | stats = array_map_lookup_elem(array_map, &key1); 35 | printf("%lu\n", stats->packets); 36 | 37 | stats->packets++; 38 | 39 | stats = array_map_lookup_elem(array_map, &key1); 40 | printf("%lu\n", stats->packets); 41 | 42 | uint32_t key2 = 1; 43 | stats = array_map_lookup_elem(array_map, &key2); 44 | printf("%lu\n", stats->packets); 45 | 46 | return EXIT_SUCCESS; 47 | } 48 | -------------------------------------------------------------------------------- /bpfmap/test_bpfmap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bpfmap.h" 5 | 6 | int main(int argc, char *argv[]) 7 | { 8 | // bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries) 9 | 10 | int map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), sizeof(uint32_t), 20, 0); 11 | 12 | // int bpf_update_elem(int map, void *key, void *value, unsigned long long flags); 13 | // int bpf_lookup_elem(int map, void *key, void *value); 14 | // int bpf_delete_elem(int map, void *key); 15 | // int bpf_get_next_key(int map, void *key, void *next_key); 16 | 17 | uint32_t key = 0xdeadbeef; 18 | uint32_t value = 0x11223344; 19 | uint32_t *item; 20 | 21 | if (bpf_update_elem(map, &key, &value, 0) != 0) 22 | { 23 | printf("error inserting element\n"); 24 | } 25 | 26 | if (bpf_lookup_elem(map, &key, &item) != 0) 27 | { 28 | printf("error lookup up element\n"); 29 | } 30 | 31 | value = 0x22334455; 32 | if (bpf_update_elem(map, &key, &value, 0) != 0) 33 | { 34 | printf("error updating table\n"); 35 | } 36 | 37 | if (bpf_lookup_elem(map, &key, &item) != 0) 38 | { 39 | printf("error looking up updated element\n"); 40 | } 41 | 42 | if (bpf_delete_elem(map, &key) != 0) 43 | { 44 | printf("error deleting element\n"); 45 | } 46 | 47 | if (bpf_lookup_elem(map, &key, &item) == 0) 48 | { 49 | printf("should return an error the element was deleted\n"); 50 | } 51 | 52 | if (item != NULL) 53 | { 54 | printf("non existing element should return null\n"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /bpfmap/test_hashmap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "hashtab.h" 4 | #include 5 | 6 | struct objvalue { 7 | int32_t count; 8 | int32_t size; 9 | int16_t test; 10 | }; 11 | 12 | int main(int argc, char *argv[]) { 13 | printf("testing hashmap\n"); 14 | 15 | union bpf_attr attr = { 16 | .map_type = BPF_MAP_TYPE_HASH, 17 | .key_size = sizeof(uint32_t), 18 | .value_size = sizeof(uint32_t), 19 | .max_entries = 20, 20 | .map_flags = 0, 21 | }; 22 | 23 | struct bpf_map *map = htab_map_alloc(&attr); 24 | 25 | if (map == NULL) { 26 | printf("Invalid parameters for creating the map\n"); 27 | return EXIT_FAILURE; 28 | } 29 | 30 | printf("map created successfully\n"); 31 | 32 | uint32_t key = 0xaabbccdd; 33 | uint32_t value = 0xdeadbeef; 34 | void *elem = NULL; 35 | 36 | /* Lookup for a non existing element */ 37 | elem = htab_map_lookup_elem(map, &key); 38 | if (elem != NULL) { 39 | printf("Error: found element that shouldn't exist\n"); 40 | } 41 | 42 | /* Insert a new element */ 43 | if (htab_map_update_elem(map, &key, &value, 0) == -1) { 44 | printf("Error: unable to insert entry in the hastable\n"); 45 | } 46 | 47 | /* Lookup for existing element */ 48 | elem = htab_map_lookup_elem(map, &key); 49 | if (elem == NULL) { 50 | printf("Error: unable to get element previously inserted\n"); 51 | } 52 | 53 | if (*(uint32_t *)elem != value) { 54 | printf("Error: lookup value is not the same as inserted\n"); 55 | printf("Got %x expected %x\n", *(uint32_t *)elem, value); 56 | } 57 | 58 | /* Update the element */ 59 | value = 0x11223344; 60 | if (htab_map_update_elem(map, &key, &value, 0) == -1) { 61 | printf("Error: unable to update entry in the hastable\n"); 62 | } 63 | 64 | if (*(uint32_t *)elem != value) { 65 | printf("Error: lookup value is not the same as update\n"); 66 | printf("Got %x expected %x\n", *(uint32_t *)elem, value); 67 | } 68 | 69 | 70 | /* Insert a second item */ 71 | uint32_t key2 = 0x12345678; 72 | uint32_t value2 = 0xbeefbeef; 73 | if (htab_map_update_elem(map, &key2, &value2, 0) == -1) { 74 | printf("Error: unable to insert entry in the hastable\n"); 75 | } 76 | 77 | /* Lookup second item was inserted */ 78 | elem = htab_map_lookup_elem(map, &key2); 79 | if (elem == NULL) { 80 | printf("Error: unable to get element previously inserted\n"); 81 | } 82 | 83 | if (*(uint32_t *)elem != value2) { 84 | printf("Error: lookup value is not the same as inserted\n"); 85 | printf("Got %x expected %x\n", *(uint32_t *)elem, value2); 86 | } 87 | 88 | 89 | /* Iterate over the table */ 90 | uint32_t next_key = 0; 91 | int count = 0; 92 | for (int ret = htab_map_get_next_key(map, &next_key, &next_key); ret != -1; ret = htab_map_get_next_key(map, &next_key, &next_key)) { 93 | printf("next key is %x\n", next_key); 94 | count++; 95 | } 96 | 97 | if (count != 2) { 98 | printf("Error: expected 2 items in the hashtable got %d\n", count); 99 | } 100 | 101 | // check we can start iterating from a known key 102 | printf("current key is %x\n", key); 103 | htab_map_get_next_key(map, &key, &next_key); 104 | if (next_key != key2) { 105 | printf("Error: expected next key to be %x got %x\n", key2, next_key); 106 | } 107 | 108 | if (htab_map_get_next_key(map, &next_key, &next_key) != -1) { 109 | printf("Error: expected end of iteration\n"); 110 | } 111 | 112 | 113 | /* Remove the entry */ 114 | if (htab_map_delete_elem(map, &key) != 0) { 115 | printf("Error: unable to remove the entry\n"); 116 | } 117 | 118 | /* Lookup to see if the element was properly removed */ 119 | elem = htab_map_lookup_elem(map, &key); 120 | if (elem != NULL) { 121 | printf("Error: found element that shouldn't exist\n"); 122 | } 123 | 124 | /* Remove an already removed entry */ 125 | if (htab_map_delete_elem(map, &key) != -1) { 126 | printf("Error: managed to removed an already removed entry\n"); 127 | } 128 | 129 | 130 | /* test with a map storing objects */ 131 | union bpf_attr objattr = { 132 | .map_type = BPF_MAP_TYPE_HASH, 133 | .key_size = sizeof(uint32_t), 134 | .value_size = sizeof(struct objvalue), 135 | .max_entries = 20, 136 | .map_flags = 0, 137 | }; 138 | 139 | struct bpf_map *objmap = htab_map_alloc(&objattr); 140 | 141 | if (map == NULL) { 142 | printf("Invalid parameters for creating the map\n"); 143 | return EXIT_FAILURE; 144 | } 145 | 146 | uint32_t objkey = 0x11223344; 147 | struct objvalue objval = { 148 | .count = 0, 149 | .size = 0, 150 | .test = 0, 151 | }; 152 | 153 | if (htab_map_update_elem(objmap, &objkey, &objval, 0) == -1) { 154 | printf("Error: unable to insert entry in the hastable\n"); 155 | } 156 | 157 | elem = htab_map_lookup_elem(objmap, &objkey); 158 | if (elem == NULL) { 159 | printf("Error: unable to find object in map\n"); 160 | } 161 | 162 | if (((struct objvalue *)elem)->count != 0) { 163 | printf("Error: expected count to be 0\n"); 164 | } 165 | 166 | ((struct objvalue *)elem)->count++; 167 | 168 | elem = htab_map_lookup_elem(objmap, &objkey); 169 | 170 | if (((struct objvalue *)elem)->count != 1) { 171 | printf("Error: expected count to be 0, got %d\n", ((struct objvalue *)elem)->count); 172 | } 173 | 174 | return EXIT_SUCCESS; 175 | } 176 | -------------------------------------------------------------------------------- /controller/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import cmd 3 | import os 4 | import struct 5 | 6 | from threading import Thread 7 | from twisted.internet import reactor 8 | 9 | from core import eBPFCoreApplication, set_event_handler, FLOOD 10 | from core.packets import * 11 | 12 | # The intro message to show at the top when running the program 13 | banner = "-" * 80 + """ 14 | eBPF Switch Controller Command Line Interface - Netlab 2024 15 | Simon Jouet - University of Glasgow 16 | """ + '-' * 80 + '\n' 17 | 18 | def tabulate(rows, headers=None): 19 | if not rows or len(rows) == 0: 20 | print('') 21 | return 22 | 23 | # Find the largest possible value for each column 24 | columns_width = [ max([ len(str(row[i])) for row in rows ]) for i in range(len(rows[0])) ] 25 | 26 | # If there are headers check if headers is larger than content 27 | if headers: 28 | columns_width = [ max(columns_width[i], len(header)) for i, header in enumerate(headers) ] 29 | 30 | # Add two extra spaces to columns_width for prettiness 31 | columns_width = [ w+2 for w in columns_width ] 32 | 33 | # Generate the row format string and delimiter string 34 | row_format = ' '.join(['{{:>{}}}'.format(w) for w in columns_width ]) 35 | row_delim = [ '='*w for w in columns_width ] 36 | 37 | # Print the headers if necessary 38 | print('') 39 | if headers: 40 | print(row_format.format(*headers)) 41 | 42 | # Print the rows 43 | print(row_format.format(*row_delim)) 44 | for row in rows: 45 | print(row_format.format(*row)) 46 | print(row_format.format(*row_delim)) 47 | 48 | class SwitchTableCli(cmd.Cmd): 49 | def __init__(self, connection, function_id, table_name): 50 | cmd.Cmd.__init__(self) 51 | self.connection = connection 52 | self.function_id = function_id 53 | self.table_name = table_name 54 | 55 | def do_list(self, line): 56 | self.connection.send(TableListRequest(index=self.function_id, table_name=self.table_name)) 57 | 58 | def do_get(self, line): 59 | self.connection.send(TableEntryGetRequest(index=self.function_id, table_name=self.table_name, key=bytes.fromhex(line))) 60 | 61 | def do_update(self, line): 62 | args = line.split() 63 | if len(args) != 2: 64 | print("update ") 65 | return 66 | 67 | self.connection.send(TableEntryInsertRequest(index=self.function_id, table_name=self.table_name, key=bytes.fromhex(args[0]), value=bytes.fromhex(args[1]))) 68 | 69 | def do_delete(self, line): 70 | self.connection.send(TableEntryDeleteRequest(index=self.function_id, table_name=self.table_name, key=bytes.fromhex(line))) 71 | 72 | def emptyline(self): 73 | self.do_help(None) 74 | 75 | class SwitchTablesCli(cmd.Cmd): 76 | def __init__(self, connection, function_id: int): 77 | cmd.Cmd.__init__(self) 78 | self.connection = connection 79 | self.function_id = function_id 80 | 81 | def do_list(self, line): 82 | self.connection.send(TablesListRequest(index=self.function_id)) 83 | 84 | def default(self, line: str) -> None: 85 | args = line.split(maxsplit=1) 86 | 87 | if len(args) == 0: 88 | cmd.Cmd.default(self, line) 89 | else: 90 | try: 91 | SwitchTableCli(self.connection, self.function_id, args[0]).onecmd(args[1] if len(args) > 1 else '') 92 | except ValueError: 93 | cmd.Cmd.default(self, line) 94 | 95 | def emptyline(self): 96 | self.do_help(None) 97 | 98 | class SwitchCLI(cmd.Cmd): 99 | def __init__(self, connection): 100 | cmd.Cmd.__init__(self) 101 | self.connection = connection 102 | 103 | def do_list(self, line: str): 104 | self.connection.send(FunctionListRequest()) 105 | 106 | def do_add(self, line: str) -> None: 107 | args = line.split() 108 | 109 | # 1 add 0 test ../examples/learningswitch.o 110 | if len(args) != 3: 111 | print("invalid") 112 | return 113 | 114 | index, name, path = args 115 | 116 | if not os.path.isfile(path): 117 | print('Invalid file path') 118 | return 119 | 120 | with open(path, 'rb') as f: 121 | elf = f.read() 122 | self.connection.send(FunctionAddRequest(name=name, index=int(index), elf=elf)) 123 | 124 | def do_remove(self, line: str) -> None: 125 | self.connection.send(FunctionRemoveRequest(index=int(line))) 126 | 127 | def do_table(self, line: str) -> None: 128 | args = line.split(maxsplit=1) 129 | 130 | if len(args) == 0: 131 | cmd.Cmd.default(self, line) 132 | else: 133 | try: 134 | function_id = int(args[0], 16) 135 | 136 | SwitchTablesCli(self.connection, function_id).onecmd(args[1] if len(args) > 1 else '') 137 | except ValueError: 138 | cmd.Cmd.default(self, line) 139 | 140 | def emptyline(self): 141 | self.do_help(None) 142 | 143 | class MainCLI(cmd.Cmd): 144 | def __init__(self, application): 145 | cmd.Cmd.__init__(self) 146 | self.application = application 147 | 148 | def preloop(self): 149 | print(banner) 150 | self.do_help(None) 151 | 152 | def default(self, line): 153 | args = line.split() 154 | 155 | if len(args) == 0: 156 | cmd.Cmd.default(self, line) 157 | else: 158 | try: 159 | dpid = int(args[0], 16) 160 | 161 | if dpid in self.application.connections: 162 | SwitchCLI(self.application.connections[dpid]).onecmd(' '.join(args[1:])) 163 | else: 164 | print(f'Switch with dpid {dpid} is not connected.') 165 | except ValueError: 166 | cmd.Cmd.default(self, line) 167 | 168 | def do_connections(self, line): 169 | tabulate([ ('{:08X}'.format(k), c.version, c.connected_at) for k,c in self.application.connections.items() ], headers=['dpid', 'version', 'connected at']) 170 | 171 | def emptyline(self): 172 | pass 173 | 174 | # def do_EOF(self, line): 175 | # return True 176 | 177 | class eBPFCLIApplication(eBPFCoreApplication): 178 | """ 179 | Controller application that will start a interactive CLI. 180 | """ 181 | def run(self): 182 | Thread(target=reactor.run, kwargs={'installSignalHandlers': 0}).start() 183 | 184 | try: 185 | MainCLI(self).cmdloop() 186 | except KeyboardInterrupt: 187 | print("\nGot keyboard interrupt. Exiting...") 188 | finally: 189 | reactor.callFromThread(reactor.stop) 190 | 191 | @set_event_handler(Header.TABLES_LIST_REPLY) 192 | def tables_list_reply(self, connection, pkt): 193 | tabulate([ (e.table_name, TableDefinition.TableType.Name(e.table_type), e.key_size, e.value_size, e.max_entries) for e in pkt.entries ], headers=['name', 'type', 'key size', 'value size', 'max entries']) 194 | 195 | @set_event_handler(Header.TABLE_LIST_REPLY) 196 | def table_list_reply(self, connection, pkt): 197 | entries = [] 198 | 199 | if pkt.entry.table_type in [TableDefinition.HASH, TableDefinition.LPM_TRIE]: 200 | item_size = pkt.entry.key_size + pkt.entry.value_size 201 | fmt = "{}s{}s".format(pkt.entry.key_size, pkt.entry.value_size) 202 | 203 | for i in range(pkt.n_items): 204 | key, value = struct.unpack_from(fmt, pkt.items, i * item_size) 205 | entries.append((key.hex(), value.hex())) 206 | 207 | elif pkt.entry.table_type == TableDefinition.ARRAY: 208 | item_size = pkt.entry.value_size 209 | fmt = "{}s".format(pkt.entry.value_size) 210 | 211 | for i in range(pkt.n_items): 212 | value = struct.unpack_from(fmt, pkt.items, i * item_size)[0] 213 | entries.append((i, value.hex())) 214 | 215 | tabulate(entries, headers=["Key", "Value"]) 216 | 217 | @set_event_handler(Header.TABLE_ENTRY_GET_REPLY) 218 | def table_entry_get_reply(self, connection, pkt): 219 | tabulate([(pkt.key.hex(), pkt.value.hex())], headers=["Key", "Value"]) 220 | 221 | @set_event_handler(Header.NOTIFY) 222 | def notify_event(self, connection, pkt): 223 | print(f'\n[{connection.dpid}] Received notify event {pkt.id}, data length {pkt.data}') 224 | print(pkt.data.hex()) 225 | 226 | @set_event_handler(Header.PACKET_IN) 227 | def packet_in(self, connection, pkt): 228 | print(f"\n[{connection.dpid}] Received packet in {pkt.data.hex()}") 229 | 230 | @set_event_handler(Header.FUNCTION_LIST_REPLY) 231 | def function_list_reply(self, connection, pkt): 232 | tabulate([ (e.index or 0, e.name, e.counter or 0) for e in pkt.entries ], headers=['index', 'name', 'counter']) 233 | 234 | @set_event_handler(Header.FUNCTION_ADD_REPLY) 235 | def function_add_reply(self, connection, pkt): 236 | if pkt.status == FunctionAddReply.FunctionAddStatus.INVALID_STAGE: 237 | print("Cannot add a function at this index") 238 | elif pkt.status == FunctionAddReply.FunctionAddStatus.INVALID_FUNCTION: 239 | print("Unable to install this function") 240 | else: 241 | print("Function has been installed") 242 | 243 | @set_event_handler(Header.FUNCTION_REMOVE_REPLY) 244 | def function_remove_reply(self, connection, pkt): 245 | if pkt.status == FunctionAddReply.FunctionAddStatus.INVALID_STAGE: 246 | print("Cannot remove a function at this index") 247 | else: 248 | print("Function has been removed") 249 | 250 | if __name__ == '__main__': 251 | eBPFCLIApplication().run() 252 | -------------------------------------------------------------------------------- /controller/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .application import eBPFCoreApplication 2 | from .events import set_event_handler 3 | from .protocol import FLOOD, CONTROLLER, DROP 4 | -------------------------------------------------------------------------------- /controller/core/application.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from twisted.internet import reactor 4 | 5 | from .events import set_event_handler 6 | from .protocol import eBPFFactory 7 | from .packets import * 8 | 9 | class eBPFCoreApplication(object): 10 | def __init__(self): 11 | self.connections = {} 12 | reactor.listenTCP(9000, eBPFFactory(self)) 13 | 14 | @set_event_handler('disconnect') 15 | def connection_closed(self, connection, reason): 16 | del self.connections[connection.dpid] 17 | 18 | @set_event_handler(Header.HELLO) 19 | def hello_request(self, connection, pkt): 20 | print('Connection from switch {:08X}, version {}'.format(pkt.dpid, pkt.version)) 21 | 22 | connection.dpid = pkt.dpid 23 | connection.version = pkt.version 24 | connection.connected_at = time.time() 25 | self.connections[connection.dpid] = connection 26 | 27 | # Send HELLO back 28 | connection.send(Hello(version=1, dpid=0)) 29 | 30 | def run(self): 31 | reactor.run() 32 | -------------------------------------------------------------------------------- /controller/core/events.py: -------------------------------------------------------------------------------- 1 | _handlers = {} 2 | 3 | def set_event_handler(opcode): 4 | def set_event_handler_decorator(func): 5 | _handlers.setdefault(opcode, []).append(func) 6 | return func 7 | return set_event_handler_decorator 8 | -------------------------------------------------------------------------------- /controller/core/packets/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(1, '../protocol/src/python') 3 | 4 | from Header_pb2 import Header 5 | from Hello_pb2 import Hello 6 | from Function_pb2 import FunctionAddRequest, FunctionAddReply, FunctionRemoveRequest, FunctionRemoveReply, FunctionListRequest, FunctionListReply 7 | from Table_pb2 import TablesListRequest, TablesListReply, TableListRequest, \ 8 | TableListReply, TableEntryGetRequest, TableEntryGetReply, \ 9 | TableEntryInsertRequest, TableEntryInsertReply, TableEntryDeleteRequest, \ 10 | TableEntryDeleteReply, TableDefinition 11 | from Packet_pb2 import PacketIn, PacketOut 12 | from Notify_pb2 import Notify 13 | -------------------------------------------------------------------------------- /controller/core/protocol.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import struct 3 | from twisted.internet import protocol 4 | 5 | from .packets import * 6 | from .events import _handlers 7 | 8 | FLOOD = 0xfffffffd 9 | CONTROLLER = 0xfffffffe 10 | DROP = 0xffffffff 11 | 12 | PORT = 0x00 13 | FLOOD = 0x01 << 32 14 | CONTROLLER = 0x02 << 32 15 | DROP = 0x03 << 32 16 | NEXT = 0x04 << 32 17 | 18 | class eBPFFactory(protocol.Factory): 19 | def __init__(self, application): 20 | self.application = application 21 | 22 | def buildProtocol(self, addr): 23 | return eBPFProtocol(self, self.application) 24 | 25 | PacketHeader = namedtuple('PacketHeader', ['type', 'length']) 26 | 27 | class eBPFProtocol(protocol.Protocol): 28 | _message_type_to_object = { 29 | Header.HELLO: Hello, 30 | 31 | Header.FUNCTION_ADD_REQUEST: FunctionAddRequest, 32 | Header.FUNCTION_ADD_REPLY: FunctionAddReply, 33 | Header.FUNCTION_REMOVE_REQUEST: FunctionRemoveRequest, 34 | Header.FUNCTION_REMOVE_REPLY: FunctionRemoveReply, 35 | Header.FUNCTION_LIST_REQUEST: FunctionListRequest, 36 | Header.FUNCTION_LIST_REPLY: FunctionListReply, 37 | 38 | Header.TABLES_LIST_REQUEST: TablesListRequest, 39 | Header.TABLES_LIST_REPLY: TablesListReply, 40 | Header.TABLE_LIST_REQUEST: TableListRequest, 41 | Header.TABLE_LIST_REPLY: TableListReply, 42 | Header.TABLE_ENTRY_GET_REQUEST: TableEntryGetRequest, 43 | Header.TABLE_ENTRY_GET_REPLY: TableEntryGetReply, 44 | Header.TABLE_ENTRY_INSERT_REQUEST: TableEntryInsertRequest, 45 | Header.TABLE_ENTRY_INSERT_REPLY: TableEntryInsertReply, 46 | Header.TABLE_ENTRY_DELETE_REQUEST: TableEntryDeleteRequest, 47 | Header.TABLE_ENTRY_DELETE_REPLY: TableEntryDeleteReply, 48 | Header.PACKET_IN: PacketIn, 49 | Header.PACKET_OUT: PacketOut, 50 | Header.NOTIFY: Notify, 51 | } 52 | 53 | _message_object_to_type = { v: k for k,v in _message_type_to_object.items() } 54 | 55 | HEADER_FMT = '>HH' 56 | HEADER_LENGTH = struct.calcsize(HEADER_FMT) 57 | 58 | def __init__(self, factory, application): 59 | self.factory = factory 60 | self.application = application 61 | self.buffer = bytearray() 62 | self.header = None 63 | 64 | def _read_packets(self): 65 | """ 66 | Generator to read the incoming packets, yield a tuple with the 67 | header as the first element and the object representing the packet 68 | as second element if the message type is known or the raw payload 69 | otherwise. The generator is stopped if a full packet 70 | (header and payload) is not available. 71 | """ 72 | 73 | while (not self.header and len(self.buffer) >= eBPFProtocol.HEADER_LENGTH) or (self.header and len(self.buffer) >= self.header.length): 74 | if not self.header and len(self.buffer) >= eBPFProtocol.HEADER_LENGTH: 75 | self.header = PacketHeader(*struct.unpack(eBPFProtocol.HEADER_FMT, self.buffer[:eBPFProtocol.HEADER_LENGTH])) 76 | self.buffer = self.buffer[eBPFProtocol.HEADER_LENGTH:] 77 | 78 | if self.header and len(self.buffer) >= self.header.length: 79 | # read the payload of the packet 80 | payload = bytes(self.buffer[:self.header.length]) 81 | self.buffer = self.buffer[self.header.length:] 82 | 83 | # Deserialize the packet to its associated object 84 | cls = eBPFProtocol._message_type_to_object.get(self.header.type) 85 | if cls: 86 | inst = cls() 87 | inst.ParseFromString(payload) 88 | yield (self.header, inst) 89 | else: 90 | # No handler for 91 | yield (self.header, payload) 92 | 93 | # Clear the header for the next packet 94 | self.header = None 95 | 96 | def _run_handlers(self, event, *args): 97 | """ 98 | Execute all the handlers (if any) for the event type provided. 99 | """ 100 | for handler in _handlers.get(event, []): 101 | handler(self.application, self, *args) 102 | 103 | def dataReceived(self, data): 104 | # append the newly received data to the buffer 105 | self.buffer.extend(data) 106 | 107 | # Iterate over the packets received, call the associated handlers 108 | for header, packet in self._read_packets(): 109 | self._run_handlers(header.type, packet) 110 | 111 | def connectionLost(self, reason): 112 | self._run_handlers('disconnect', reason) 113 | 114 | def send(self, pkt): 115 | """ 116 | Serialize and send a message to a switch. 117 | """ 118 | payload = pkt.SerializeToString() 119 | header = struct.pack('>HH', eBPFProtocol._message_object_to_type[type(pkt)], len(payload)) 120 | self.transport.write(header + payload) 121 | -------------------------------------------------------------------------------- /controller/ewma.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import struct 3 | 4 | from core import eBPFCoreApplication, set_event_handler 5 | from core.packets import * 6 | 7 | import time 8 | 9 | from matplotlib import pyplot as plt 10 | 11 | mng = plt.get_current_fig_manager() 12 | mng.window.attributes('-fullscreen', True) 13 | plt.ion() 14 | 15 | PLOT_SAMPLES = 100 16 | PORT_COUNT = 8 17 | 18 | class PortData(object): 19 | def __init__(self, idx): 20 | self.idx = idx 21 | self.volume = [0]*PLOT_SAMPLES 22 | self.prediction = [0]*PLOT_SAMPLES 23 | self.time = [0]*PLOT_SAMPLES 24 | 25 | # 26 | plt.subplot(PORT_COUNT/2, 2, self.idx+1) # need to add 1 as 0 in matplotlib is reserved 27 | self.volume_line, = plt.plot(self.time, self.volume) 28 | self.prediction_line, = plt.plot(self.time, self.prediction) 29 | 30 | def add_points(self, time, volume, prediction): 31 | self.time.pop(0) 32 | self.volume.pop(0) 33 | self.prediction.pop(0) 34 | 35 | self.time.append(time) 36 | self.volume.append(volume) 37 | self.prediction.append(prediction) 38 | 39 | ymax = max(max(self.volume), max(self.prediction)) 40 | xmin = min(self.time) 41 | xmax = max(self.time) 42 | 43 | self.volume_line.set_xdata(self.time) 44 | self.volume_line.set_ydata(self.volume) 45 | 46 | self.prediction_line.set_xdata(self.time) 47 | self.prediction_line.set_ydata(self.prediction) 48 | 49 | plt.subplot(PORT_COUNT/2, 2, self.idx+1) 50 | plt.ylim([0, ymax]) 51 | plt.xlim([xmin, xmax]) 52 | 53 | 54 | ports_data = [ PortData(i) for i in range(PORT_COUNT) ] 55 | plt.tight_layout() 56 | 57 | plt.title('EWMA') 58 | plt.xlabel('time (s)') 59 | plt.draw() 60 | plt.show() 61 | 62 | class EWMAApplication(eBPFCoreApplication): 63 | ewmaStruct = struct.Struct('QQQII') 64 | 65 | @set_event_handler(Header.HELLO) 66 | def hello(self, connection, pkt): 67 | with open('../examples/ewma.o', 'rb') as f: 68 | print("Installing the eBPF ELF") 69 | connection.send(InstallRequest(elf=f.read())) 70 | self.start_time = time.time() 71 | 72 | @set_event_handler(Header.NOTIFY) 73 | def notify_event(self, connection, pkt): 74 | 75 | print(pkt.data.encode('hex')) 76 | volume, packets, prediction, lasttime, count = EWMAApplication.ewmaStruct.unpack(pkt.data) 77 | print('[{connection.dpid}] [{pkt.id}] volume: {volume} prediction: {prediction} packets: {packets}') 78 | 79 | port_data = ports_data[pkt.id] 80 | port_data.add_points(time.time() - self.start_time, volume, prediction) 81 | 82 | plt.draw() 83 | plt.pause(0.01) 84 | 85 | if __name__ == '__main__': 86 | EWMAApplication().run() 87 | -------------------------------------------------------------------------------- /controller/flowarrival.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import struct 3 | import socket 4 | 5 | from core import eBPFCoreApplication, set_event_handler, FLOOD 6 | from core.packets import * 7 | 8 | def int2ip(addr): 9 | return socket.inet_ntoa(struct.pack("!I", addr)) 10 | 11 | class SimpleSwitchApplication(eBPFCoreApplication): 12 | @set_event_handler(Header.HELLO) 13 | def hello(self, connection, pkt): 14 | self.mac_to_port = {} 15 | 16 | with open('../examples/flowarrival.o', 'rb') as f: 17 | print("Installing the eBPF ELF") 18 | connection.send(InstallRequest(elf=f.read())) 19 | 20 | @set_event_handler(Header.NOTIFY) 21 | def notify_event(self, connection, pkt): 22 | t, arrival, departure = struct.unpack('/dev/null) 27 | CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) -I$(ROOT_DIR)/../includes -I$(ROOT_DIR)/../agent -I$(ROOT_DIR)/../ubpf/inc -I$(ROOT_DIR)/../protocol/src/c 28 | # Add flag to allow experimental API as l2fwd uses rte_ethdev_set_ptype API 29 | CFLAGS += -DALLOW_EXPERIMENTAL_API 30 | LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) 31 | LDFLAGS_STATIC = $(shell $(PKGCONF) --static --libs libdpdk) 32 | 33 | ifeq ($(MAKECMDGOALS),static) 34 | # check for broken pkg-config 35 | ifeq ($(shell echo $(LDFLAGS_STATIC) | grep 'whole-archive.*l:lib.*no-whole-archive'),) 36 | $(warning "pkg-config output list does not contain drivers between 'whole-archive'/'no-whole-archive' flags.") 37 | $(error "Cannot generate statically-linked binaries with this version of pkg-config") 38 | endif 39 | endif 40 | 41 | build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build 42 | $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) 43 | 44 | build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build 45 | $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) 46 | 47 | build: 48 | @mkdir -p $@ 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared 53 | test -d build && rmdir -p build || true 54 | -------------------------------------------------------------------------------- /dpdkswitch/meson.build: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2017 Intel Corporation 3 | 4 | # meson file, for building this example as part of a main DPDK build. 5 | # 6 | # To build this example as a standalone application with an already-installed 7 | # DPDK instance, use 'make' 8 | 9 | # Enable experimental API flag as l2fwd uses rte_ethdev_set_ptype API 10 | allow_experimental_apis = true 11 | sources = files( 12 | 'main.c', 13 | ) 14 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | SRC = $(wildcard *.c) 2 | TAR = $(SRC:.c=.o) 3 | 4 | .PHONE: all clean 5 | 6 | all: $(TAR) 7 | 8 | %.o: %.c 9 | clang -O2 -target bpf -I ../includes -c $< -o $@ 10 | 11 | clean: 12 | rm -f $(TAR) 13 | -------------------------------------------------------------------------------- /examples/ewma.c: -------------------------------------------------------------------------------- 1 | #include "ebpf_switch.h" 2 | 3 | #define EWMA_DELTA 5 // in seconds 4 | 5 | struct ewma_stats 6 | { 7 | uint64_t volume; 8 | uint64_t packets; 9 | uint64_t prediction; 10 | uint32_t lasttime; 11 | uint32_t count; 12 | }; 13 | 14 | struct bpf_map_def SEC("maps") ewma = { 15 | .type = BPF_MAP_TYPE_ARRAY, 16 | .key_size = sizeof(unsigned int), 17 | .value_size = sizeof(struct ewma_stats), 18 | .max_entries = 24}; 19 | 20 | uint64_t prog(struct packet *pkt) 21 | { 22 | struct ewma_stats *ewma_stat; 23 | 24 | bpf_map_lookup_elem(&ewma, &pkt->metadata.in_port, &ewma_stat); 25 | 26 | ewma_stat->volume += pkt->metadata.length; 27 | ewma_stat->packets++; 28 | 29 | if (pkt->metadata.sec - ewma_stat->lasttime > EWMA_DELTA) 30 | { 31 | // could use nsec to be more accurate 32 | // compute the new prediction, prediction = alpha * volume + (1.0-alpha)*prediction 33 | ewma_stat->prediction = (ewma_stat->volume + (ewma_stat->prediction << 3) - ewma_stat->prediction) >> 3; 34 | 35 | bpf_notify(pkt->metadata.in_port, ewma_stat, sizeof(struct ewma_stats)); 36 | 37 | // 38 | ewma_stat->lasttime = pkt->metadata.sec; 39 | ewma_stat->packets = 0; 40 | ewma_stat->volume = 0; 41 | ewma_stat->count++; 42 | } 43 | 44 | return NEXT; 45 | } 46 | char _license[] SEC("license") = "GPL"; 47 | -------------------------------------------------------------------------------- /examples/flowarrival.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "ebpf_switch.h" 4 | 5 | struct arrival_stats 6 | { 7 | uint32_t lasttime; 8 | uint32_t arrival; 9 | uint32_t departure; 10 | }; 11 | 12 | struct bpf_map_def SEC("maps") flowarrival = { 13 | .type = BPF_MAP_TYPE_ARRAY, 14 | .key_size = sizeof(unsigned int), 15 | .value_size = sizeof(struct arrival_stats), 16 | .max_entries = 1, 17 | }; 18 | 19 | uint64_t prog(struct packet *pkt) 20 | { 21 | // Check if the ethernet frame contains an ipv4 payload 22 | if (pkt->eth.h_proto == 0x0008) 23 | { 24 | struct ip *ipv4 = (struct ip *)(((uint8_t *)&pkt->eth) + ETH_HLEN); 25 | 26 | // Check if the ip packet contains a TCP payload 27 | if (ipv4->ip_p == 6) 28 | { 29 | struct tcphdr *tcp = (struct tcphdr *)(((uint32_t *)ipv4) + ipv4->ip_hl); 30 | 31 | if (tcp->th_flags & (TH_SYN | TH_FIN)) 32 | { 33 | struct arrival_stats *astats; 34 | unsigned int key = 0; 35 | bpf_map_lookup_elem(&flowarrival, &key, &astats); 36 | 37 | // 38 | if (tcp->th_flags & TH_SYN) 39 | { 40 | astats->arrival += 1; 41 | } 42 | 43 | else if (tcp->th_flags & TH_FIN) 44 | { 45 | astats->departure += 1; 46 | } 47 | 48 | else if (tcp->th_flags & TH_RST) 49 | { 50 | astats->departure += 1; 51 | } 52 | 53 | if (pkt->metadata.sec - astats->lasttime > 5) 54 | { 55 | bpf_notify(0, astats, sizeof(struct arrival_stats)); 56 | astats->lasttime = pkt->metadata.sec; 57 | astats->arrival = 0; 58 | astats->departure = 0; 59 | } 60 | } 61 | } 62 | } 63 | 64 | return NEXT; 65 | } 66 | char _license[] SEC("license") = "GPL"; 67 | -------------------------------------------------------------------------------- /examples/idps.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ebpf_switch.h" 3 | 4 | struct bpf_map_def SEC("maps") inports = { 5 | .type = BPF_MAP_TYPE_HASH, 6 | .key_size = 6, // MAC address is the key 7 | .value_size = sizeof(uint32_t), 8 | .max_entries = 256, 9 | }; 10 | 11 | uint64_t prog(struct packet *pkt, unsigned len) 12 | { 13 | uint32_t *original_port; 14 | 15 | // If the packet is from the IDPS ingress (port 0) drop it. There shouldn't be traffic coming from this port 16 | if (pkt->metadata.in_port == 0) 17 | { 18 | return DROP; 19 | } 20 | 21 | // If the packet is from the IDPS egress, rewrite the in_port and forward to the next stage 22 | if (pkt->metadata.in_port == 1 && bpf_map_lookup_elem(&inports, pkt->eth.h_source, &original_port) != -1) 23 | { 24 | pkt->metadata.in_port = *original_port; 25 | return NEXT; 26 | } 27 | 28 | // Otherwise learn the original port for this MAC address 29 | bpf_map_update_elem(&inports, pkt->eth.h_source, &pkt->metadata.in_port, 0); 30 | 31 | // Send all the traffic to the IDPS 32 | return PORT + 0; 33 | } 34 | char _license[] SEC("license") = "GPL"; 35 | -------------------------------------------------------------------------------- /examples/ids.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ebpf_switch.h" 3 | 4 | uint64_t prog(struct packet *pkt, unsigned len) 5 | { 6 | // If the packet is from the IDS (port 0) drop it 7 | if (pkt->metadata.in_port == 0) 8 | { 9 | return DROP; 10 | } 11 | 12 | // Otherwise mirror all the traffic to the IDS 13 | bpf_mirror(0, pkt, len); 14 | 15 | return NEXT; 16 | } 17 | char _license[] SEC("license") = "GPL"; 18 | -------------------------------------------------------------------------------- /examples/interarrival.c: -------------------------------------------------------------------------------- 1 | #include "ebpf_switch.h" 2 | 3 | #define NBUCKETS 64 4 | 5 | struct statekeeper 6 | { 7 | uint64_t lasttime; 8 | uint64_t counter; 9 | uint64_t overflow; 10 | }; 11 | 12 | // Use to keep the current state 13 | struct bpf_map_def SEC("maps") state = { 14 | .type = BPF_MAP_TYPE_ARRAY, 15 | .key_size = sizeof(uint32_t), 16 | .value_size = sizeof(struct statekeeper), 17 | .max_entries = 1, 18 | }; 19 | 20 | struct bpf_map_def SEC("maps") interarrival = { 21 | .type = BPF_MAP_TYPE_ARRAY, 22 | .key_size = sizeof(uint32_t), 23 | .value_size = sizeof(uint64_t), 24 | .max_entries = NBUCKETS, 25 | }; 26 | 27 | uint64_t prog(struct packet *pkt) 28 | { 29 | uint32_t zero = 0; 30 | 31 | // Retrieve the current state 32 | struct statekeeper *st; 33 | bpf_map_lookup_elem(&state, &zero, &st); 34 | 35 | // 36 | uint64_t currenttime = ((uint64_t)pkt->metadata.sec << 32) | pkt->metadata.nsec; 37 | uint64_t delta = currenttime - st->lasttime; 38 | 39 | uint32_t idx = delta >> 24; // 24 ~ 16msec | 20 ~1msec | 18 0.250 msec 40 | if (idx < NBUCKETS) 41 | { 42 | uint64_t *counter; 43 | bpf_map_lookup_elem(&interarrival, &idx, &counter); 44 | (*counter)++; 45 | } 46 | else 47 | { 48 | st->overflow++; 49 | } 50 | 51 | st->lasttime = currenttime; 52 | st->counter++; 53 | 54 | if (st->counter % 64 == 0) 55 | { 56 | uint64_t *first; 57 | bpf_map_lookup_elem(&interarrival, &zero, &first); 58 | bpf_notify(0, first, NBUCKETS * sizeof(uint64_t)); 59 | } 60 | 61 | return NEXT; 62 | } 63 | char _license[] SEC("license") = "GPL"; 64 | -------------------------------------------------------------------------------- /examples/latency.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "ebpf_switch.h" 4 | 5 | struct tcpflowtuple 6 | { 7 | uint32_t src; 8 | uint32_t dst; 9 | uint16_t srcport; 10 | uint16_t dstport; 11 | }; 12 | 13 | struct tstamp 14 | { 15 | uint32_t sec; 16 | uint32_t nsec; 17 | }; 18 | 19 | struct tcplatency 20 | { 21 | struct tstamp syn; 22 | struct tstamp synack; 23 | struct tstamp ack; 24 | }; 25 | 26 | struct bpf_map_def SEC("maps") latency = { 27 | .type = BPF_MAP_TYPE_HASH, 28 | .key_size = sizeof(struct tcpflowtuple), // key is SRC:DST:SRCPORT:DSTPORT tuple 29 | .value_size = sizeof(struct tcplatency), // key is sec:nsec 30 | .max_entries = 256, 31 | }; 32 | 33 | uint64_t prog(struct packet *pkt) 34 | { 35 | // Check if the ethernet frame contains an ipv4 payload 36 | if (pkt->eth.h_proto == 0x0008) 37 | { 38 | struct ip *ipv4 = (struct ip *)(((uint8_t *)&pkt->eth) + ETH_HLEN); 39 | 40 | // Check if the ip packet contains a TCP payload 41 | if (ipv4->ip_p == 6) 42 | { 43 | struct tcphdr *tcp = (struct tcphdr *)(((uint32_t *)ipv4) + ipv4->ip_hl); 44 | 45 | // 46 | if ((tcp->th_flags & (TH_ACK | TH_SYN)) == TH_SYN) 47 | { 48 | // TCP SYN 49 | struct tcpflowtuple tuple = { 50 | .src = ipv4->ip_src.s_addr, 51 | .dst = ipv4->ip_dst.s_addr, 52 | .srcport = tcp->th_sport, 53 | .dstport = tcp->th_dport}; 54 | 55 | struct tcplatency lat = { 56 | .syn = {.sec = pkt->metadata.sec, .nsec = pkt->metadata.nsec}, 57 | .synack = 0, 58 | .ack = 0}; 59 | 60 | bpf_map_update_elem(&latency, &tuple, &lat, 0); 61 | } 62 | else if ((tcp->th_flags & (TH_ACK | TH_SYN)) == (TH_SYN | TH_ACK)) 63 | { 64 | // TCP SYN|ACK 65 | struct tcpflowtuple tuple = { 66 | .dst = ipv4->ip_src.s_addr, 67 | .src = ipv4->ip_dst.s_addr, 68 | .dstport = tcp->th_sport, 69 | .srcport = tcp->th_dport}; 70 | struct tcplatency *lat; 71 | 72 | if (bpf_map_lookup_elem(&latency, &tuple, &lat) != -1) 73 | { 74 | lat->synack.sec = pkt->metadata.sec; 75 | lat->synack.nsec = pkt->metadata.nsec; 76 | } 77 | } 78 | else if ((tcp->th_flags & TH_ACK) == TH_ACK) 79 | { 80 | // TCP ACK 81 | struct tcpflowtuple tuple = { 82 | .src = ipv4->ip_src.s_addr, 83 | .dst = ipv4->ip_dst.s_addr, 84 | .srcport = tcp->th_sport, 85 | .dstport = tcp->th_dport}; 86 | struct tcplatency *lat; 87 | 88 | if (bpf_map_lookup_elem(&latency, &tuple, &lat) != -1) 89 | { 90 | lat->ack.sec = pkt->metadata.sec; 91 | lat->ack.nsec = pkt->metadata.nsec; 92 | 93 | bpf_notify(1, ((uint8_t *)lat) - sizeof(struct tcpflowtuple) - 4, sizeof(struct tcplatency) + sizeof(struct tcpflowtuple) + 4); 94 | bpf_map_delete_elem(&latency, &tuple); 95 | } 96 | } 97 | } 98 | } 99 | 100 | return NEXT; 101 | } 102 | char _license[] SEC("license") = "GPL"; 103 | -------------------------------------------------------------------------------- /examples/learningswitch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ebpf_switch.h" 3 | 4 | struct bpf_map_def SEC("maps") inports = { 5 | .type = BPF_MAP_TYPE_HASH, 6 | .key_size = 6, // MAC address is the key 7 | .value_size = sizeof(uint32_t), 8 | .max_entries = 256, 9 | }; 10 | 11 | uint64_t prog(struct packet *pkt) 12 | { 13 | uint32_t *out_port; 14 | 15 | // if the source is not a broadcast or multicast 16 | if ((pkt->eth.h_source[0] & 1) == 0) 17 | { 18 | // Update the port associated with the packet 19 | bpf_map_update_elem(&inports, pkt->eth.h_source, &pkt->metadata.in_port, 0); 20 | } 21 | 22 | // Flood if the destination is broadcast or multicast 23 | if (pkt->eth.h_dest[0] & 1) 24 | { 25 | return FLOOD; 26 | } 27 | 28 | // Lookup the output port 29 | if (bpf_map_lookup_elem(&inports, pkt->eth.h_dest, &out_port) == -1) 30 | { 31 | // If no entry was found flood 32 | return FLOOD; 33 | } 34 | 35 | return *out_port; 36 | } 37 | char _license[] SEC("license") = "GPL"; 38 | -------------------------------------------------------------------------------- /examples/learningswitch_centralized.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ebpf_switch.h" 3 | 4 | struct bpf_map_def SEC("maps") inports = { 5 | .type = BPF_MAP_TYPE_HASH, 6 | .key_size = 6, 7 | .value_size = sizeof(uint32_t), 8 | .max_entries = 256, 9 | }; 10 | 11 | uint64_t prog(struct packet *pkt) 12 | { 13 | uint32_t *port; 14 | 15 | // If the packet src mac is unknown, tell the controller 16 | if (bpf_map_lookup_elem(&inports, pkt->eth.h_source, &port) == -1) { 17 | return CONTROLLER; 18 | } 19 | 20 | // Lookup the output port 21 | if (bpf_map_lookup_elem(&inports, pkt->eth.h_dest, &port) == -1) { 22 | // If no entry was found send to the controller 23 | return CONTROLLER; 24 | } 25 | 26 | return *port; 27 | } 28 | char _license[] SEC("license") = "GPL"; 29 | -------------------------------------------------------------------------------- /examples/lpm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ebpf_switch.h" 3 | 4 | struct ipv4_lpm_key 5 | { 6 | uint32_t prefixlen; 7 | uint32_t data; 8 | }; 9 | 10 | struct bpf_map_def SEC("maps") lpm = { 11 | .type = BPF_MAP_TYPE_LPM_TRIE, 12 | .key_size = sizeof(struct ipv4_lpm_key), 13 | .value_size = sizeof(uint32_t), 14 | .max_entries = 256, 15 | .map_flags = BPF_F_NO_PREALLOC, 16 | }; 17 | 18 | uint64_t prog(struct packet *pkt) 19 | { 20 | struct ipv4_lpm_key key = {.prefixlen = 32, .data = 3232235777}; 21 | 22 | if (pkt->metadata.in_port == 0) 23 | { 24 | uint32_t value = 5; 25 | bpf_map_update_elem(&lpm, &key, &value, 0); 26 | } 27 | else 28 | { 29 | uint32_t *value; 30 | 31 | if (bpf_map_lookup_elem(&lpm, &key, &value) == 0) 32 | { 33 | bpf_debug(*value); 34 | } 35 | } 36 | 37 | return NEXT; 38 | } 39 | char _license[] SEC("license") = "GPL"; 40 | -------------------------------------------------------------------------------- /examples/mirror.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ebpf_switch.h" 3 | 4 | uint64_t prog(struct packet *pkt) 5 | { 6 | if (pkt->metadata.in_port == 1) 7 | { 8 | bpf_mirror(2, pkt, 100); 9 | } 10 | 11 | return NEXT; 12 | } 13 | char _license[] SEC("license") = "GPL"; 14 | -------------------------------------------------------------------------------- /examples/trafficcount.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "ebpf_switch.h" 5 | 6 | struct countentry 7 | { 8 | int bytes; 9 | int packets; 10 | }; 11 | 12 | struct bpf_map_def SEC("maps") trafficcount = { 13 | .type = BPF_MAP_TYPE_HASH, 14 | .key_size = 6, // MAC address is the key 15 | .value_size = sizeof(struct countentry), 16 | .max_entries = 256, 17 | }; 18 | 19 | uint64_t prog(struct packet *pkt) 20 | { 21 | struct countentry *item; 22 | 23 | if (bpf_map_lookup_elem(&trafficcount, pkt->eth.h_source, &item) == -1) 24 | { 25 | struct countentry newitem = { 26 | .bytes = 0, 27 | .packets = 0, 28 | }; 29 | 30 | bpf_map_update_elem(&trafficcount, pkt->eth.h_source, &newitem, 0); 31 | item = &newitem; 32 | } 33 | 34 | item->packets++; 35 | item->bytes += pkt->metadata.length; 36 | 37 | return NEXT; 38 | } 39 | char _license[] SEC("license") = "GPL"; 40 | -------------------------------------------------------------------------------- /examples/traffichist.c: -------------------------------------------------------------------------------- 1 | // Example based on Brendan Gregg's code available at http://www.brendangregg.com/blog/2015-05-15/ebpf-one-small-step.html 2 | 3 | #include "ebpf_switch.h" 4 | 5 | struct bpf_map_def SEC("maps") traffichist = { 6 | .type = BPF_MAP_TYPE_ARRAY, 7 | .key_size = sizeof(uint32_t), 8 | .value_size = sizeof(uint64_t), 9 | .max_entries = 24, 10 | }; 11 | 12 | uint64_t prog(struct packet *pkt) 13 | { 14 | // Packet distribution 15 | uint32_t index = pkt->metadata.length / 64; 16 | uint64_t *value; 17 | 18 | bpf_map_lookup_elem(&traffichist, &index, &value); 19 | (*value)++; 20 | 21 | return NEXT; 22 | } 23 | char _license[] SEC("license") = "GPL"; 24 | -------------------------------------------------------------------------------- /examples/wire.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ebpf_switch.h" 3 | 4 | uint64_t prog(struct packet *pkt) 5 | { 6 | if (pkt->metadata.in_port == 0) 7 | { 8 | return 1; 9 | } 10 | 11 | return 0; 12 | } 13 | char _license[] SEC("license") = "GPL"; 14 | -------------------------------------------------------------------------------- /includes/ebpf_consts.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_SWITCH_CONSTS_H 2 | #define __EBPF_SWITCH_CONSTS_H 3 | 4 | /** Send the packet to a specific port */ 5 | #define PORT 0x00ULL 6 | 7 | /** Flood the packet to all other ports */ 8 | #define FLOOD (0x01ULL << 32) 9 | 10 | /** Send the packet to the controller */ 11 | #define CONTROLLER (0x02ULL << 32) 12 | 13 | /** Drop the packet */ 14 | #define DROP (0x03ULL << 32) 15 | 16 | /** Send the packet to the next pipeline stage */ 17 | #define NEXT (0x04ULL << 32) 18 | 19 | #define OPCODE_MASK (0xffffffffULL << 32) 20 | 21 | #define VALUE_MASK 0xffffffff 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /includes/ebpf_functions.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_SWITCH_FUNCTIONS_H 2 | #define __EBPF_SWITCH_FUNCTIONS_H 3 | 4 | static int (*bpf_map_lookup_elem)(void *map, void *key, void *value) = (void *)1; 5 | static int (*bpf_map_update_elem)(void *map, void *key, void *value, unsigned long long flags) = (void *)2; 6 | static int (*bpf_map_delete_elem)(void *map, void *key) = (void *)3; 7 | static int (*bpf_mirror)(unsigned long long out_port, void *buf, int len) = (void *)30; 8 | static int (*bpf_notify)(int id, void *data, int len) = (void *)31; 9 | static int (*bpf_debug)(unsigned long long arg) = (void *)32; 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /includes/ebpf_packet.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_SWITCH_PACKET_H 2 | #define __EBPF_SWITCH_PACKET_H 3 | 4 | #include 5 | #include 6 | 7 | struct metadatahdr 8 | { // limited to the size available between the TPACKET_V2 header and the tp_mac payload 9 | uint32_t in_port; 10 | uint32_t sec; 11 | uint32_t nsec; 12 | uint16_t length; 13 | } __attribute__((packed)); 14 | 15 | struct packet 16 | { 17 | struct metadatahdr metadata; 18 | struct ethhdr eth; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /includes/ebpf_switch.h: -------------------------------------------------------------------------------- 1 | #ifndef __EBPF_SWITCH_H 2 | #define __EBPF_SWITCH_H 3 | 4 | #include "ebpf_consts.h" 5 | #include "ebpf_functions.h" 6 | #include "ebpf_packet.h" 7 | #include 8 | 9 | #define SEC(NAME) __attribute__((section(NAME), used)) 10 | 11 | struct bpf_map_def 12 | { 13 | unsigned int type; 14 | unsigned int key_size; 15 | unsigned int value_size; 16 | unsigned int max_entries; 17 | unsigned int map_flags; 18 | }; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /mininet/1sw_topo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from mininet.net import Mininet 4 | from mininet.topo import Topo 5 | from mininet.cli import CLI 6 | 7 | from eBPFSwitch import eBPFSwitch, eBPFHost 8 | 9 | class SingleSwitchTopo(Topo): 10 | def __init__(self, **opts): 11 | # Initialize topology and default options 12 | Topo.__init__(self, **opts) 13 | 14 | switch = self.addSwitch('s1', switch_path="../softswitch/softswitch") 15 | 16 | for h in range(1, 3): 17 | host = self.addHost(f'h{h}', 18 | ip = f"10.0.0.{h}/8", 19 | mac = '00:04:00:00:00:%02x'.format(h)) 20 | 21 | self.addLink(host, switch) 22 | 23 | def main(): 24 | topo = SingleSwitchTopo() 25 | net = Mininet(topo = topo, host = eBPFHost, switch = eBPFSwitch, controller = None) 26 | 27 | net.start() 28 | CLI(net) 29 | net.stop() 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /mininet/3sw_topo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from mininet.net import Mininet 4 | from mininet.topo import Topo 5 | from mininet.cli import CLI 6 | 7 | from eBPFSwitch import eBPFSwitch, eBPFHost 8 | 9 | class ThreeSwitchTopo(Topo): 10 | def __init__(self, **opts): 11 | # Initialize topology and default options 12 | Topo.__init__(self, **opts) 13 | 14 | coreSwitch = self.addSwitch('s1', switch_path="../softswitch/softswitch") 15 | aggSwitch1 = self.addSwitch('s2', switch_path="../softswitch/softswitch") 16 | aggSwitch2 = self.addSwitch('s3', switch_path="../softswitch/softswitch") 17 | 18 | self.addLink(aggSwitch1, coreSwitch) 19 | self.addLink(aggSwitch2, coreSwitch) 20 | 21 | for i, sw in enumerate([aggSwitch1, aggSwitch2]): 22 | # Add 2 hosts per switch 23 | for h in range(1, 3): 24 | host = self.addHost(f'h_{i}_{h}', 25 | ip = f'10.0.{i}.{h}/8', 26 | mac = '00:04:00:00:00:%02x'.format(h)) 27 | 28 | self.addLink(host, sw) 29 | 30 | def main(): 31 | topo = ThreeSwitchTopo() 32 | net = Mininet(topo = topo, host = eBPFHost, switch = eBPFSwitch, controller = None) 33 | 34 | net.start() 35 | CLI(net) 36 | net.stop() 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /mininet/eBPFSwitch.py: -------------------------------------------------------------------------------- 1 | from mininet.node import Switch, Host 2 | import subprocess 3 | 4 | 5 | class eBPFHost(Host): 6 | def config(self, **params): 7 | r = super(Host, self).config(**params) 8 | 9 | # Disable offloading 10 | for off in ["rx", "tx", "sg"]: 11 | cmd = "/sbin/ethtool --offload {} {} off".format(self.defaultIntf(), off) 12 | self.cmd(cmd) 13 | 14 | return r 15 | 16 | class eBPFSwitch(Switch): 17 | dpid = 1 18 | 19 | def __init__(self, name, switch_path='softswitch', dpid=None, **kwargs): 20 | Switch.__init__(self, name, **kwargs) 21 | 22 | self.switch_path = switch_path 23 | 24 | if dpid: 25 | self.dpid = dpid 26 | eBPFSwitch.dpid = max(eBPFSwitch.dpid, dpid) 27 | else: 28 | self.dpid = eBPFSwitch.dpid 29 | eBPFSwitch.dpid += 1 30 | 31 | @classmethod 32 | def setup(cls): 33 | pass 34 | 35 | def start(self, controllers): 36 | print("Starting eBPF switch", self.name) 37 | 38 | args = [self.switch_path] 39 | 40 | args.extend(['-p', '-i', '--dpid', str(self.dpid)]) 41 | 42 | for port, intf in self.intfs.items(): 43 | if not intf.IP(): 44 | args.append(intf.name) 45 | 46 | self.proc = subprocess.Popen(args) 47 | 48 | def stop(self): 49 | print('stopping') 50 | self.proc.kill() 51 | -------------------------------------------------------------------------------- /mininet/idps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # 4 | # sudo snort -c /usr/local/snort/etc/snort/snort.lua -R ./local.rules -Q -i mid-ingress:mid-egress -A alert_fast --daq afpacket --daq-batch-size 1 5 | # 6 | 7 | from mininet.net import Mininet 8 | from mininet.topo import Topo 9 | from mininet.cli import CLI 10 | from mininet.util import quietRun 11 | import subprocess 12 | 13 | from eBPFSwitch import eBPFSwitch, eBPFHost 14 | 15 | # an eBPFSwitch that has two veth pairs automatically allocated on port 0 and 1 16 | class MiddleboxSwitch(eBPFSwitch): 17 | def start(self, controllers): 18 | print("Starting eBPF switch", self.name) 19 | 20 | self.pairs = [('switch-egress', 'mid-ingress'), ('switch-ingress', 'mid-egress')] 21 | 22 | for (port1, port2) in self.pairs: 23 | quietRun(f'ip link add name {port1} type veth peer name {port2}') 24 | quietRun(f'ip link set dev {port1} up') 25 | quietRun(f'ip link set dev {port2} up') 26 | 27 | args = [self.switch_path] 28 | 29 | args.extend(['-p', '-i', '--dpid', str(self.dpid), 'switch-egress', 'switch-ingress']) 30 | 31 | for port, intf in self.intfs.items(): 32 | if not intf.IP(): 33 | args.append(intf.name) 34 | 35 | self.proc = subprocess.Popen(args) 36 | 37 | def stop(self): 38 | for (port1, _) in self.pairs: 39 | quietRun(f'ip link del {port1}') 40 | 41 | print('stopping') 42 | self.proc.kill() 43 | 44 | 45 | class IDPSSingleSwitchTopo(Topo): 46 | def __init__(self, **opts): 47 | # Initialize topology and default options 48 | Topo.__init__(self, **opts) 49 | 50 | switch = self.addSwitch('s1', switch_path="../softswitch/softswitch") 51 | 52 | for h in range(1, 3): 53 | host = self.addHost(f'h{h}', 54 | ip = f"10.0.0.{h}/8", 55 | mac = '00:04:00:00:00:%02x'.format(h)) 56 | 57 | self.addLink(host, switch) 58 | 59 | def main(): 60 | topo = IDPSSingleSwitchTopo() 61 | net = Mininet(topo = topo, host = eBPFHost, switch = MiddleboxSwitch, controller = None) 62 | 63 | net.start() 64 | CLI(net) 65 | net.stop() 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /protocol/Function.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message FunctionAddRequest { 4 | string name = 1; 5 | uint32 index = 2; 6 | bytes elf = 3; 7 | } 8 | 9 | message FunctionAddReply { 10 | enum FunctionAddStatus { 11 | OK = 0; 12 | INVALID_STAGE = 1; 13 | INVALID_FUNCTION = 2; 14 | } 15 | 16 | FunctionAddStatus status = 1; 17 | } 18 | 19 | message FunctionRemoveRequest { 20 | uint32 index = 1; 21 | } 22 | 23 | message FunctionRemoveReply { 24 | enum FunctionRemoveStatus { 25 | OK = 0; 26 | INVALID_STAGE = 1; 27 | } 28 | 29 | FunctionRemoveStatus status = 1; 30 | } 31 | 32 | message FunctionListEntry { 33 | string name = 1; 34 | uint32 index = 2; 35 | uint64 counter = 3; 36 | } 37 | 38 | message FunctionListRequest { 39 | } 40 | 41 | message FunctionListReply { 42 | repeated FunctionListEntry entries = 1; 43 | } 44 | -------------------------------------------------------------------------------- /protocol/Header.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message Header { 4 | enum Type { 5 | HELLO = 0; 6 | 7 | FUNCTION_ADD_REQUEST = 1; 8 | FUNCTION_ADD_REPLY = 2; 9 | FUNCTION_REMOVE_REQUEST = 3; 10 | FUNCTION_REMOVE_REPLY = 4; 11 | FUNCTION_LIST_REQUEST = 5; 12 | FUNCTION_LIST_REPLY = 6; 13 | 14 | TABLES_LIST_REQUEST = 7; 15 | TABLES_LIST_REPLY = 8; 16 | TABLE_LIST_REQUEST = 9; 17 | TABLE_LIST_REPLY = 10; 18 | 19 | TABLE_ENTRY_GET_REQUEST = 11; 20 | TABLE_ENTRY_GET_REPLY = 12; 21 | TABLE_ENTRY_INSERT_REQUEST = 13; 22 | TABLE_ENTRY_INSERT_REPLY = 14; 23 | TABLE_ENTRY_DELETE_REQUEST = 15; 24 | TABLE_ENTRY_DELETE_REPLY = 16; 25 | 26 | PACKET_IN = 17; 27 | PACKET_OUT = 18; 28 | 29 | NOTIFY = 19; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /protocol/Hello.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message Hello { 4 | uint32 version = 1; 5 | uint64 dpid = 2; 6 | } 7 | -------------------------------------------------------------------------------- /protocol/Makefile: -------------------------------------------------------------------------------- 1 | all: build-proto-c build-proto-python 2 | 3 | build-proto-c: *.proto 4 | mkdir -p src/c/ 5 | protoc-c --c_out=src/c/ *.proto 6 | 7 | cd src/c/ && gcc -c *.c && ar cr protocol.a *.o 8 | 9 | build-proto-python: *.proto 10 | mkdir -p src/python/ 11 | protoc --python_out=src/python/ *.proto 12 | 13 | clean: 14 | rm -rf src 15 | -------------------------------------------------------------------------------- /protocol/Notify.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message Notify { 4 | uint32 id = 1; 5 | bytes data = 2; 6 | } 7 | -------------------------------------------------------------------------------- /protocol/Packet.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message PacketIn { 4 | bytes data = 1; 5 | } 6 | 7 | message PacketOut { 8 | bytes data = 1; 9 | uint64 out_port = 2; 10 | } 11 | -------------------------------------------------------------------------------- /protocol/Table.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message TableDefinition { 4 | enum TableType { 5 | UNSPEC = 0; 6 | HASH = 1; 7 | ARRAY = 2; 8 | LPM_TRIE = 11; 9 | } 10 | 11 | string table_name = 1; 12 | TableType table_type = 2; 13 | uint32 key_size = 3; 14 | uint32 value_size = 4; 15 | uint32 max_entries = 5; 16 | } 17 | 18 | enum TableStatus { 19 | SUCCESS = 0; 20 | STAGE_NOT_FOUND = 1; 21 | TABLE_NOT_FOUND = 2; 22 | ENTRY_NOT_FOUND = 3; 23 | } 24 | 25 | message TablesListRequest { 26 | uint32 index = 1; 27 | } 28 | 29 | message TablesListReply { 30 | TableStatus status = 1; 31 | repeated TableDefinition entries = 2; 32 | } 33 | 34 | message TableListRequest { 35 | uint32 index = 1; 36 | string table_name = 2; 37 | } 38 | 39 | message TableListReply { 40 | TableStatus status = 1; 41 | TableDefinition entry = 2; 42 | uint32 n_items = 3; 43 | bytes items = 4; 44 | } 45 | 46 | message TableEntryGetRequest { 47 | uint32 index = 1; 48 | string table_name = 2; 49 | bytes key = 3; 50 | } 51 | 52 | message TableEntryGetReply { 53 | TableStatus status = 1; 54 | bytes key = 2; 55 | bytes value = 3; 56 | } 57 | 58 | message TableEntryInsertRequest { 59 | uint32 index = 1; 60 | string table_name = 2; 61 | bytes key = 3; 62 | bytes value = 4; 63 | } 64 | 65 | message TableEntryInsertReply { 66 | TableStatus status = 1; 67 | } 68 | 69 | message TableEntryDeleteRequest { 70 | uint32 index = 1; 71 | string table_name = 2; 72 | bytes key = 3; 73 | } 74 | 75 | message TableEntryDeleteReply { 76 | TableStatus status = 1; 77 | } 78 | -------------------------------------------------------------------------------- /softswitch/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS += -g -I../ubpf/inc -I../agent -I../includes 2 | LDFLAGS += -L../ubpf -L../bpfmap 3 | LDLIBS += -lpthread -lprotobuf-c -lubpf -lbpfmap 4 | 5 | all: softswitch 6 | 7 | softswitch: main.c ../agent/agent.o ../protocol/src/c/*.pb-c.c 8 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) 9 | 10 | clean: 11 | rm -f *.o softswitch 12 | -------------------------------------------------------------------------------- /softswitch/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | #include "ubpf.h" 30 | #include "agent.h" 31 | #include "ebpf_consts.h" 32 | #include "ebpf_packet.h" 33 | 34 | #ifndef likely 35 | #define likely(x) __builtin_expect(!!(x), 1) 36 | #endif 37 | 38 | #ifndef unlikely 39 | #define unlikely(x) __builtin_expect(!!(x), 0) 40 | #endif 41 | 42 | #ifndef ARRAY_SIZE 43 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 44 | #endif 45 | 46 | #ifndef __aligned_tpacket 47 | #define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT))) 48 | #endif 49 | 50 | #ifndef __align_tpacket 51 | #define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x)))) 52 | #endif 53 | 54 | struct ring 55 | { 56 | struct iovec *rd; 57 | uint8_t *map; 58 | struct tpacket_req req; 59 | int size; 60 | int frame_num; 61 | }; 62 | 63 | struct port 64 | { 65 | int fd; 66 | struct ring rx_ring; 67 | struct ring tx_ring; 68 | }; 69 | 70 | struct dataplane 71 | { 72 | unsigned long long dpid; 73 | int port_count; 74 | struct port *ports; 75 | } dataplane; 76 | 77 | union frame_map 78 | { 79 | struct 80 | { 81 | struct tpacket2_hdr tp_h __aligned_tpacket; 82 | struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr)); 83 | } *v2; 84 | void *raw; 85 | }; 86 | 87 | static sig_atomic_t sigint = 0; 88 | 89 | // List of offload functionalities to disable on the interface, Generic Receive Offload, RX/TX checksumming, Scatter Gather 90 | const int offload_fns[] = {ETHTOOL_SGRO, ETHTOOL_SRXCSUM, ETHTOOL_STXCSUM, ETHTOOL_SSG}; 91 | 92 | static void 93 | sighandler(int num) 94 | { 95 | sigint = 1; 96 | } 97 | 98 | static void voidhandler(int num) {} 99 | 100 | /** 101 | * @brief Allocate a new TUN/TAP interface 102 | * 103 | * @param dev the name of an interface (or '\0'). MUST have enough space to hold the interface name if '\0' is passed 104 | * @param flags interface flags (eg, IFF_TUN etc.) 105 | * @return int the file descriptor of the opened interface 106 | */ 107 | int tun_alloc(char *dev, int flags) 108 | { 109 | 110 | struct ifreq ifr; 111 | int fd, err; 112 | char *clonedev = "/dev/net/tun"; 113 | 114 | /* open the clone device */ 115 | if ((fd = open(clonedev, O_RDWR)) < 0) 116 | { 117 | return fd; 118 | } 119 | 120 | /* preparation of the struct ifr, of type "struct ifreq" */ 121 | memset(&ifr, 0, sizeof(ifr)); 122 | 123 | ifr.ifr_flags = flags; /* IFF_TUN or IFF_TAP, plus maybe IFF_NO_PI */ 124 | 125 | if (*dev) 126 | { 127 | /* if a device name was specified, put it in the structure; otherwise, 128 | * the kernel will try to allocate the "next" device of the 129 | * specified type */ 130 | strncpy(ifr.ifr_name, dev, IFNAMSIZ); 131 | } 132 | 133 | /* try to create the device */ 134 | if ((err = ioctl(fd, TUNSETIFF, (void *)&ifr)) < 0) 135 | { 136 | close(fd); 137 | return err; 138 | } 139 | 140 | /* if the operation was successful, write back the name of the 141 | * interface to the variable "dev", so the caller can know 142 | * it. Note that the caller MUST reserve space in *dev (see calling 143 | * code below) */ 144 | strcpy(dev, ifr.ifr_name); 145 | 146 | /* this is the special file descriptor that the caller will use to talk 147 | * with the virtual interface */ 148 | return fd; 149 | } 150 | 151 | static int setup_ring(int fd, struct ring *ring, int ring_type) 152 | { 153 | int err; 154 | unsigned int blocknum = 256; 155 | 156 | memset(&ring->req, 0, sizeof(ring->req)); 157 | 158 | ring->req.tp_block_size = getpagesize() << 2; 159 | ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7; 160 | ring->req.tp_block_nr = blocknum; 161 | ring->req.tp_frame_nr = ring->req.tp_block_size / 162 | ring->req.tp_frame_size * 163 | ring->req.tp_block_nr; 164 | 165 | ring->size = ring->req.tp_block_size * ring->req.tp_block_nr; 166 | 167 | err = setsockopt(fd, SOL_PACKET, ring_type, &ring->req, sizeof(ring->req)); 168 | if (err < 0) 169 | { 170 | perror("setsockopt"); 171 | exit(1); 172 | } 173 | 174 | return 0; 175 | } 176 | 177 | static int setup_socket(struct port *port, char *netdev, int promiscuous) 178 | { 179 | int err, i, fd, ifindex, v = TPACKET_V2; 180 | struct sockaddr_ll ll; 181 | 182 | ifindex = if_nametoindex(netdev); 183 | if (ifindex == 0) 184 | { 185 | perror("interface"); 186 | exit(1); 187 | } 188 | 189 | // Opens a raw socket for this port 190 | fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 191 | if (fd < 0) 192 | { 193 | perror("socket"); 194 | exit(1); 195 | } 196 | 197 | port->fd = fd; 198 | 199 | err = setsockopt(fd, SOL_PACKET, PACKET_VERSION, &v, sizeof(v)); 200 | if (err < 0) 201 | { 202 | perror("setsockopt"); 203 | exit(1); 204 | } 205 | 206 | // Set the device in promiscuous mode 207 | if (promiscuous) 208 | { 209 | struct packet_mreq mreq = {.mr_ifindex = ifindex, .mr_type = PACKET_MR_PROMISC}; 210 | if (setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) != 0) 211 | { 212 | perror("setsockopt"); 213 | exit(1); 214 | } 215 | } 216 | 217 | // Disable offloading 218 | struct ethtool_value ethv; 219 | struct ifreq ifr; 220 | 221 | strncpy(ifr.ifr_name, netdev, sizeof(ifr.ifr_name)); 222 | ifr.ifr_data = (void *)ðv; 223 | 224 | for (i = 0; i < ARRAY_SIZE(offload_fns); i++) 225 | { 226 | ethv.cmd = offload_fns[i]; 227 | ethv.data = 0; 228 | 229 | if (ioctl(fd, SIOCETHTOOL, &ifr) < 0) 230 | { 231 | printf("%s failed to set SIOCETHTOOL ioctl: %s\n", netdev, strerror(errno)); 232 | } 233 | } 234 | 235 | // Bring the interface up 236 | // memset(&ifr, 0, sizeof(ifr)); 237 | ifr.ifr_data = NULL; 238 | ifr.ifr_flags = IFF_UP; 239 | 240 | if ((ioctl(fd, SIOCSIFFLAGS, (void *)&ifr)) < 0) 241 | { 242 | perror("error"); 243 | exit(1); 244 | } 245 | 246 | // NOTE: disable qdisc, trivial performance improvement 247 | // int one = 1; 248 | // setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)); 249 | 250 | setup_ring(fd, &port->rx_ring, PACKET_RX_RING); 251 | setup_ring(fd, &port->tx_ring, PACKET_TX_RING); 252 | 253 | port->rx_ring.map = mmap(NULL, port->rx_ring.size + port->tx_ring.size, 254 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0); 255 | 256 | if (port->rx_ring.map == MAP_FAILED) 257 | { 258 | perror("mmap"); 259 | exit(1); 260 | } 261 | 262 | port->tx_ring.map = port->rx_ring.map + port->rx_ring.size; 263 | 264 | // rd_num * sizeof(*ring->rd) 265 | int rx_len_iovec = port->rx_ring.req.tp_frame_nr * sizeof(*port->rx_ring.rd); 266 | int tx_len_iovec = port->tx_ring.req.tp_frame_nr * sizeof(*port->tx_ring.rd); 267 | 268 | port->rx_ring.rd = malloc(rx_len_iovec); // allocate iovec for each block 269 | port->tx_ring.rd = malloc(tx_len_iovec); 270 | 271 | // why not use calloc? 272 | memset(port->rx_ring.rd, 0, rx_len_iovec); 273 | memset(port->tx_ring.rd, 0, tx_len_iovec); 274 | 275 | // TODO check if ring->rd is allocated properly 276 | // printf("number of frames: %d\n", port->rx_ring.req.tp_frame_nr); 277 | for (i = 0; i < port->rx_ring.req.tp_frame_nr; ++i) 278 | { 279 | port->rx_ring.rd[i].iov_base = port->rx_ring.map + (i * port->rx_ring.req.tp_frame_size); 280 | port->rx_ring.rd[i].iov_len = port->rx_ring.req.tp_frame_size; 281 | } 282 | 283 | for (i = 0; i < port->tx_ring.req.tp_frame_nr; ++i) 284 | { 285 | port->tx_ring.rd[i].iov_base = port->tx_ring.map + (i * port->tx_ring.req.tp_frame_size); 286 | port->tx_ring.rd[i].iov_len = port->tx_ring.req.tp_frame_size; 287 | } 288 | 289 | // 290 | memset(&ll, 0, sizeof(ll)); 291 | ll.sll_family = PF_PACKET; 292 | ll.sll_protocol = htons(ETH_P_ALL); 293 | ll.sll_ifindex = ifindex; 294 | ll.sll_hatype = 0; 295 | ll.sll_pkttype = 0; 296 | ll.sll_halen = 0; 297 | 298 | err = bind(fd, (struct sockaddr *)&ll, sizeof(ll)); 299 | if (err < 0) 300 | { 301 | perror("bind"); 302 | exit(1); 303 | } 304 | 305 | return fd; 306 | } 307 | 308 | static void teardown_socket(struct port *port) 309 | { 310 | munmap(port->tx_ring.map, port->tx_ring.size); 311 | munmap(port->rx_ring.map, port->rx_ring.size); 312 | 313 | free(port->tx_ring.rd); 314 | free(port->rx_ring.rd); 315 | 316 | close(port->fd); 317 | } 318 | 319 | static inline int v2_rx_kernel_ready(struct tpacket2_hdr *hdr) 320 | { 321 | return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); 322 | } 323 | 324 | static inline void v2_rx_user_ready(struct tpacket2_hdr *hdr) 325 | { 326 | hdr->tp_status = TP_STATUS_KERNEL; 327 | __sync_synchronize(); 328 | } 329 | 330 | static inline int v2_tx_kernel_ready(struct tpacket2_hdr *hdr) 331 | { 332 | return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); 333 | } 334 | 335 | static inline void v2_tx_user_ready(struct tpacket2_hdr *hdr) 336 | { 337 | hdr->tp_status = TP_STATUS_SEND_REQUEST; 338 | __sync_synchronize(); 339 | } 340 | 341 | int tx_frame(struct port *port, void *data, int len) 342 | { 343 | // add the packet to the port tx queue 344 | struct ring *tx_ring = &port->tx_ring; 345 | 346 | // TODO: Drop if tx queue is full? (drop-tail) 347 | if (v2_tx_kernel_ready(tx_ring->rd[tx_ring->frame_num].iov_base)) 348 | { 349 | union frame_map ppd_out; 350 | ppd_out.raw = tx_ring->rd[tx_ring->frame_num].iov_base; 351 | 352 | // copy the packet from ppd to ppd_out 353 | // ppd_out.v2->tp_h.tp_snaplen = ppd.v2->tp_h.tp_snaplen; 354 | // ppd_out.v2->tp_h.tp_len = ppd.v2->tp_h.tp_len; 355 | ppd_out.v2->tp_h.tp_snaplen = len; 356 | ppd_out.v2->tp_h.tp_len = len; 357 | 358 | // printf("start pointer: %p tp_mac offset: %d hdrlen: %d sockadd_ll: %d\n", ppd.raw, ppd.v2->tp_h.tp_mac, TPACKET2_HDRLEN, sizeof(struct sockaddr_ll)); 359 | 360 | // Can this be zerocopy too? I guess not with the fixed allocation of rings 361 | // assert(ppd.v2->tp_h.tp_len == ppd.v2->tp_h.tp_snaplen); 362 | // printf("ppd_out.tp_mac %d\n", ppd_out.v2->tp_h.tp_mac); 363 | 364 | memcpy((uint8_t *)ppd_out.raw + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll), 365 | (uint8_t *)data, 366 | len); 367 | 368 | ppd_out.v2->tp_h.tp_status = TP_STATUS_SEND_REQUEST; 369 | 370 | // 371 | tx_ring->frame_num = (tx_ring->frame_num + 1) % tx_ring->req.tp_frame_nr; 372 | 373 | return 0; 374 | } 375 | 376 | return -1; // Kernel not ready, dropping the packet 377 | } 378 | 379 | const char *argp_program_version = "ebpf-switch 0.2"; 380 | static char doc[] = "eBPF-switch -- eBPF user space switch"; 381 | static char args_doc[] = "interface1 interface2 [interface3 ...]"; 382 | 383 | static struct argp_option options[] = { 384 | {"verbose", 'v', NULL, 0, "Produce verbose output"}, 385 | {"dpid", 'd', "dpid", 0, "Datapath id of the switch"}, 386 | {"controller", 'c', "address", 0, "Controller address default to 127.0.0.1:9000"}, 387 | {"promiscuous", 'p', NULL, OPTION_ARG_OPTIONAL, "Enable promiscuous mode"}, 388 | {"sigint", 'i', NULL, OPTION_ARG_OPTIONAL, "Disable sigint handler"}, 389 | {"tap", 't', "tap", OPTION_ARG_OPTIONAL, "Add additional TAP interfaces"}, 390 | {0}}; 391 | 392 | #define MAX_INTERFACES 255 393 | 394 | struct arguments 395 | { 396 | char *interfaces[MAX_INTERFACES]; 397 | int interface_count; 398 | unsigned long long dpid; 399 | char *controller; 400 | 401 | int verbose; 402 | int promiscuous; 403 | int sigint; 404 | }; 405 | 406 | static error_t 407 | parse_opt(int key, char *arg, struct argp_state *state) 408 | { 409 | struct arguments *arguments = state->input; 410 | 411 | switch (key) 412 | { 413 | case 'v': 414 | arguments->verbose = 1; 415 | break; 416 | 417 | case 'p': 418 | arguments->promiscuous = 1; 419 | break; 420 | 421 | case 'd': 422 | arguments->dpid = strtoull(arg, NULL, 10); 423 | break; 424 | 425 | case 'i': 426 | arguments->sigint = 0; 427 | break; 428 | 429 | case 'c': 430 | arguments->controller = arg; 431 | break; 432 | 433 | case 't': 434 | printf("adding tap interface %s\n", arg); 435 | if (tun_alloc(arg, IFF_TAP | IFF_NO_PI) < 0) 436 | { 437 | perror("error allocating tap interface"); 438 | return errno; 439 | } 440 | arguments->interfaces[arguments->interface_count++] = arg; 441 | break; 442 | 443 | case ARGP_KEY_ARG: 444 | arguments->interfaces[arguments->interface_count++] = arg; 445 | break; 446 | 447 | case ARGP_KEY_END: 448 | if (state->arg_num < 1) /* Not enough arguments. */ 449 | argp_usage(state); 450 | break; 451 | 452 | default: 453 | return ARGP_ERR_UNKNOWN; 454 | } 455 | 456 | return 0; 457 | } 458 | 459 | static struct argp argp = {options, parse_opt, args_doc, doc}; 460 | 461 | unsigned long long random_dpid() 462 | { 463 | srand(time(NULL)); 464 | unsigned long long dpid = 0; 465 | 466 | for (int i = 0; i < 5; i++) 467 | { 468 | dpid = (dpid << 15) | (rand() & 0x7FFF); 469 | } 470 | 471 | return dpid & 0xFFFFFFFFFFFFFFFFULL; 472 | } 473 | 474 | // flags is the hack to force transmission 475 | void transmit(struct metadatahdr *buf, int len, uint64_t target, int flags) 476 | { 477 | int i; 478 | void *eth_frame = (uint8_t *)buf + sizeof(struct metadatahdr); 479 | int eth_len = len - sizeof(struct metadatahdr); 480 | 481 | switch (target & OPCODE_MASK) 482 | { 483 | case FLOOD: 484 | for (i = 0; i < dataplane.port_count; i++) 485 | { 486 | if (i != buf->in_port) 487 | { 488 | // printf("sending frame from port %d to port %d on switch %llu\n", buf->in_port, i, dataplane.dpid); 489 | tx_frame(&dataplane.ports[i], eth_frame, eth_len); 490 | } 491 | } 492 | 493 | // HACK, the packets are only sent after poll() however this 494 | // can be called asynchronously on packet from the controller and 495 | // therefore delay the packet transmission until the next packet is received 496 | if (flags == 1) 497 | { 498 | for (i = 0; i < dataplane.port_count; i++) 499 | { 500 | send(dataplane.ports[i].fd, NULL, 0, MSG_DONTWAIT); 501 | } 502 | } 503 | 504 | break; 505 | // 506 | case CONTROLLER: 507 | agent_packetin(buf, len); 508 | break; 509 | // 510 | case PORT: 511 | tx_frame(&dataplane.ports[target & VALUE_MASK], eth_frame, eth_len); 512 | break; 513 | // 514 | case NEXT: 515 | case DROP: 516 | default: 517 | // printf("Dropping the packet\n"); 518 | break; 519 | } 520 | } 521 | 522 | int main(int argc, char **argv) 523 | { 524 | int i; 525 | 526 | /* Argument Parsing */ 527 | struct arguments arguments; 528 | arguments.interface_count = 0; 529 | arguments.dpid = random_dpid(); 530 | arguments.controller = "127.0.0.1:9000"; 531 | arguments.sigint = 1; 532 | argp_parse(&argp, argc, argv, 0, 0, &arguments); 533 | 534 | /* */ 535 | dataplane.dpid = arguments.dpid; 536 | dataplane.port_count = arguments.interface_count; 537 | dataplane.ports = calloc(dataplane.port_count, sizeof(struct port)); 538 | 539 | /* */ 540 | struct pollfd pfds[dataplane.port_count]; 541 | 542 | signal(SIGINT, arguments.sigint ? sighandler : voidhandler); 543 | signal(SIGKILL, sighandler); 544 | 545 | /* setup all the interfaces */ 546 | printf("Setting up %d interfaces\n", dataplane.port_count); 547 | for (i = 0; i < dataplane.port_count; i++) 548 | { 549 | // Create the socket, allocate the tx and rx rings and create the frame io vectors 550 | setup_socket(&dataplane.ports[i], arguments.interfaces[i], arguments.promiscuous); 551 | 552 | // Create the array of pollfd for poll() 553 | pfds[i].fd = dataplane.ports[i].fd; 554 | pfds[i].events = POLLIN | POLLERR; 555 | pfds[i].revents = 0; 556 | 557 | // 558 | printf("Interface %s, index %d, fd %d\n", arguments.interfaces[i], i, dataplane.ports[i].fd); 559 | } 560 | printf("\n"); 561 | 562 | /* */ 563 | struct agent_options options = {.dpid = dataplane.dpid, .controller = arguments.controller}; 564 | 565 | agent_start((tx_packet_fn)transmit, &options); 566 | 567 | // 568 | union frame_map ppd; 569 | 570 | while (likely(!sigint)) 571 | { 572 | // 573 | for (i = 0; i < dataplane.port_count; i++) 574 | { 575 | // 576 | struct ring *rx_ring = &dataplane.ports[i].rx_ring; 577 | 578 | // process all the packets received in the rx_ring 579 | while (v2_rx_kernel_ready(rx_ring->rd[rx_ring->frame_num].iov_base)) 580 | { 581 | ppd.raw = rx_ring->rd[rx_ring->frame_num].iov_base; 582 | 583 | // printf("metadatahdr len %lu\n", sizeof(struct metadatahdr)); // Should be ppd.v2->tp_h.tp_mac - TPACKET2_HDRLEN 584 | 585 | /**/ 586 | struct metadatahdr *metadatahdr = (struct metadatahdr *)((uint8_t *)ppd.raw + TPACKET2_HDRLEN); 587 | metadatahdr->in_port = i; 588 | metadatahdr->sec = ppd.v2->tp_h.tp_sec; 589 | metadatahdr->nsec = ppd.v2->tp_h.tp_nsec; 590 | metadatahdr->length = (uint16_t)ppd.v2->tp_h.tp_len; 591 | 592 | /* Here we have the packet and we can do whatever we want with it */ 593 | uint64_t ret = pipeline_exec(metadatahdr, ppd.v2->tp_h.tp_len + sizeof(struct metadatahdr)); 594 | transmit(metadatahdr, ppd.v2->tp_h.tp_len + sizeof(struct metadatahdr), ret, 0); 595 | 596 | // Frame has been used, release the buffer space 597 | v2_rx_user_ready(ppd.raw); 598 | rx_ring->frame_num = (rx_ring->frame_num + 1) % rx_ring->req.tp_frame_nr; 599 | } 600 | } 601 | 602 | // Send all the pendings packets for each interface 603 | for (i = 0; i < dataplane.port_count; i++) 604 | { 605 | send(dataplane.ports[i].fd, NULL, 0, MSG_DONTWAIT); // Should we use POLLOUT and just queue the messages to transmit then call send() once 606 | } 607 | 608 | // Poll for the next socket POLLIN or POLLERR 609 | poll(pfds, dataplane.port_count, -1); 610 | } 611 | 612 | /* House keeping */ 613 | agent_stop(); 614 | printf("Terminating ...\n"); 615 | for (i = 0; i < dataplane.port_count; i++) 616 | { 617 | teardown_socket(&dataplane.ports[i]); 618 | } 619 | 620 | return 0; 621 | } 622 | -------------------------------------------------------------------------------- /tools/disassembler.py: -------------------------------------------------------------------------------- 1 | import struct 2 | from io import StringIO 3 | import sys 4 | import networkx as nx 5 | from networkx.drawing.nx_agraph import graphviz_layout 6 | from matplotlib import pyplot as plt 7 | from elftools.elf.elffile import ELFFile 8 | 9 | Inst = struct.Struct("BBHI") 10 | 11 | CLASSES = { 12 | 0: "ld", 13 | 1: "ldx", 14 | 2: "st", 15 | 3: "stx", 16 | 4: "alu", 17 | 5: "jmp", 18 | 7: "alu64", 19 | } 20 | 21 | ALU_OPCODES = { 22 | 0: 'add', 23 | 1: 'sub', 24 | 2: 'mul', 25 | 3: 'div', 26 | 4: 'or', 27 | 5: 'and', 28 | 6: 'lsh', 29 | 7: 'rsh', 30 | 8: 'neg', 31 | 9: 'mod', 32 | 10: 'xor', 33 | 11: 'mov', 34 | 12: 'arsh', 35 | 13: '(endian)', 36 | } 37 | 38 | JMP_OPCODES = { 39 | 0: 'ja', 40 | 1: 'jeq', 41 | 2: 'jgt', 42 | 3: 'jge', 43 | 4: 'jset', 44 | 5: 'jne', 45 | 6: 'jsgt', 46 | 7: 'jsge', 47 | 8: 'call', 48 | 9: 'exit', 49 | } 50 | 51 | MODES = { 52 | 0: 'imm', 53 | 1: 'abs', 54 | 2: 'ind', 55 | 3: 'mem', 56 | 6: 'xadd', 57 | } 58 | 59 | SIZES = { 60 | 0: 'w', 61 | 1: 'h', 62 | 2: 'b', 63 | 3: 'dw', 64 | } 65 | 66 | BPF_CLASS_LD = 0 67 | BPF_CLASS_LDX = 1 68 | BPF_CLASS_ST = 2 69 | BPF_CLASS_STX = 3 70 | BPF_CLASS_ALU = 4 71 | BPF_CLASS_JMP = 5 72 | BPF_CLASS_ALU64 = 7 73 | 74 | BPF_ALU_NEG = 8 75 | BPF_ALU_END = 13 76 | 77 | def R(reg): 78 | return "r" + str(reg) 79 | 80 | def I(imm): 81 | return "%#x" % imm 82 | 83 | def M(base, off): 84 | if off != 0: 85 | return "[%s%s]" % (base, O(off)) 86 | else: 87 | return "[%s]" % base 88 | 89 | def O(off): 90 | if off <= 32767: 91 | return "+" + str(off) 92 | else: 93 | return "-" + str(65536-off) 94 | 95 | def disassemble_one(data, offset): 96 | code, regs, off, imm = Inst.unpack_from(data, offset) 97 | dst_reg = regs & 0xf 98 | src_reg = (regs >> 4) & 0xf 99 | cls = code & 7 100 | 101 | class_name = CLASSES.get(cls) 102 | 103 | if cls == BPF_CLASS_ALU or cls == BPF_CLASS_ALU64: 104 | source = (code >> 3) & 1 105 | opcode = (code >> 4) & 0xf 106 | opcode_name = ALU_OPCODES.get(opcode) 107 | if cls == BPF_CLASS_ALU: 108 | opcode_name += "32" 109 | 110 | if opcode == BPF_ALU_END: 111 | opcode_name = source == 1 and "be" or "le" 112 | return ("%s%d %s" % (opcode_name, imm, R(dst_reg)), class_name) 113 | elif opcode == BPF_ALU_NEG: 114 | return ("%s %s" % (opcode_name, R(dst_reg)), class_name) 115 | elif source == 0: 116 | return ("%s %s, %s" % (opcode_name, R(dst_reg), I(imm)), class_name) 117 | else: 118 | return ("%s %s, %s" % (opcode_name, R(dst_reg), R(src_reg)), class_name) 119 | elif cls == BPF_CLASS_JMP: 120 | source = (code >> 3) & 1 121 | opcode = (code >> 4) & 0xf 122 | opcode_name = JMP_OPCODES.get(opcode) 123 | 124 | if opcode_name == "exit": 125 | return (opcode_name, class_name) 126 | elif opcode_name == "call": 127 | return ("%s %s" % (opcode_name, I(imm)), class_name) 128 | elif opcode_name == "ja": 129 | return ("%s %s" % (opcode_name, O(off)), class_name, O(off)) 130 | elif source == 0: 131 | return ("%s %s, %s, %s" % (opcode_name, R(dst_reg), I(imm), O(off)), class_name, O(off)) 132 | else: 133 | return ("%s %s, %s, %s" % (opcode_name, R(dst_reg), R(src_reg), O(off)), class_name, O(off)) 134 | elif cls == BPF_CLASS_LD or cls == BPF_CLASS_LDX or cls == BPF_CLASS_ST or cls == BPF_CLASS_STX: 135 | size = (code >> 3) & 3 136 | mode = (code >> 5) & 7 137 | mode_name = MODES.get(mode, str(mode)) 138 | # TODO use different syntax for non-MEM instructions 139 | size_name = SIZES.get(size, str(size)) 140 | if code == 0x18: # lddw 141 | _, _, _, imm2 = Inst.unpack_from(data, offset+8) 142 | imm = (imm2 << 32) | imm 143 | return ("%s %s, %s" % (class_name + size_name, R(dst_reg), I(imm)), class_name) 144 | elif code == 0x00: 145 | # Second instruction of lddw 146 | return (None, class_name) 147 | elif cls == BPF_CLASS_LDX: 148 | return ("%s %s, %s" % (class_name + size_name, R(dst_reg), M(R(src_reg), off)), class_name) 149 | elif cls == BPF_CLASS_ST: 150 | return ("%s %s, %s" % (class_name + size_name, M(R(dst_reg), off), I(imm)), class_name) 151 | elif cls == BPF_CLASS_STX: 152 | return ("%s %s, %s" % (class_name + size_name, M(R(dst_reg), off), R(src_reg)), class_name) 153 | else: 154 | return ("unknown mem instruction %#x" % code, None) 155 | else: 156 | return ("unknown instruction %#x" % code, None) 157 | 158 | def disassemble(data): 159 | output = StringIO() 160 | offset = 0 161 | 162 | G=nx.DiGraph() 163 | G.add_node(offset, code=[]) 164 | current_node=offset 165 | 166 | while offset < len(data): 167 | s = disassemble_one(data, offset) 168 | if s[0]: 169 | output.write(s[0] + "\n") 170 | 171 | # 172 | if offset != current_node and G.has_node(offset): 173 | # print(current_node, offset) 174 | G.add_edge(current_node, offset) 175 | current_node = offset 176 | 177 | G.nodes[current_node]['code'].append(s[0]) 178 | if s[1] == 'jmp' and s[0].split()[0] not in ['exit', 'call']: 179 | # print('jump', s) 180 | 181 | G.add_node(offset+8, code=[]) 182 | G.add_edge(current_node, offset+8) 183 | 184 | branch_offset = offset + 8 + int(s[2])*8 185 | G.add_node(branch_offset, code=[]) 186 | G.add_edge(current_node, branch_offset) 187 | 188 | current_node=offset+8 189 | 190 | offset += 8 191 | 192 | # print(sum([ len(d['code']) for n,d in G.nodes(data=True) ])) 193 | # for path in nx.all_simple_paths(G, 0, current_node): 194 | # print(path, sum([ len(G.nodes[n]['code']) for n in path ])) 195 | 196 | # nx.draw_networkx(G, labels={ k: '\n'.join(n.get('code', '')) for k,n in G.nodes(data=True) }) 197 | # plt.show() 198 | 199 | # A = nx.nx_agraph.to_agraph(G) 200 | # A.layout('dot', args='-Nfontsize=10 -Nwidth=".2" -Nheight=".2" -Nmargin=0 -Gfontsize=8') 201 | # A.draw('test.png') 202 | 203 | return output.getvalue() 204 | 205 | if __name__ == '__main__': 206 | with open(sys.argv[1], 'rb') as f: 207 | elffile = ELFFile(f) 208 | 209 | for section in elffile.iter_sections(): 210 | if section.name == '.text': 211 | print(disassemble(section.data())) 212 | -------------------------------------------------------------------------------- /tools/test.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | 0 [code="['mov r6, r1', 'mov r3, r10', 'add r3, 0xfffffff8', 'lddw r1, 0x0', 'mov r2, r6', 'call 0x1', 'ldxb r3, [r6+13]', 'lsh r3, 0x8', '\ 3 | ldxb r1, [r6+12]', 'or r3, r1', 'ldxdw r2, [r10-8]', 'ldxdw r1, [r2]', 'add r1, r3', 'stxdw [r2], r1', 'ldxdw r3, [r2+8]', 'add \ 4 | r3, 0x1', 'stxdw [r2+8], r3', 'ldxb r3, [r6+5]', 'lsh r3, 0x8', 'ldxb r4, [r6+4]', 'or r3, r4', 'ldxb r4, [r6+6]', 'ldxb r5, [r6+\ 5 | 7]', 'lsh r5, 0x8', 'or r5, r4', 'lsh r5, 0x10', 'or r5, r3', 'ldxw r3, [r2+24]', 'sub r5, r3', 'lsh r5, 0x20', 'rsh r5, 0x20', '\ 6 | mov r3, 0x6', 'jgt r3, r5, +36']"]; 7 | 272 [code="['ldxdw r3, [r2+16]', 'sub r1, r3', 'lsh r3, 0x3', 'add r1, r3', 'rsh r1, 0x3', 'stxdw [r2+16], r1', 'ldxb r3, [r6+1]', 'lsh r3, \ 8 | 0x8', 'ldxb r1, [r6]', 'or r3, r1', 'ldxb r1, [r6+3]', 'lsh r1, 0x8', 'ldxb r4, [r6+2]', 'or r1, r4', 'lsh r1, 0x10', 'or r1, r3', '\ 9 | mov r3, 0x20', 'call 0x1f', 'ldxb r1, [r6+5]', 'lsh r1, 0x8', 'ldxb r2, [r6+4]', 'or r1, r2', 'ldxb r2, [r6+6]', 'ldxb r3, [r6+7]', '\ 10 | lsh r3, 0x8', 'or r3, r2', 'lsh r3, 0x10', 'or r3, r1', 'ldxdw r1, [r10-8]', 'stxw [r1+24], r3', 'mov r2, 0x0', 'stxdw [r1+8], r2', '\ 11 | stxdw [r1], r2', 'ldxw r2, [r1+28]', 'add r2, 0x1']"]; 12 | 0 -> 272; 13 | 552 [code="['stxw [r1+28], r2', 'ldxb r1, [r6+20]', 'lsh r1, 0x38', 'arsh r1, 0x38', 'mov r8, 0x0', 'jsgt r8, r1, +7']"]; 14 | 0 -> 552; 15 | 704 [code="['add r6, 0xe', 'mov r3, r10', 'add r3, 0xfffffff0', 'lddw r1, 0x0', 'mov r2, r6', 'call 0x1', 'lsh r0, 0x20', 'rsh r0, 0x20', '\ 16 | lddw r1, 0xffffffff', 'jeq r0, r1, +2']"]; 17 | 808 [code="['ldxw r7, [r1]', 'mov r0, r7', 'exit']"]; 18 | 704 -> 808; 19 | 800 [code="['ldxdw r1, [r10-16]']"]; 20 | 704 -> 800; 21 | 648 [code="['call 0x2', 'lddw r7, 0xfffffffd', 'ldxb r1, [r6+14]', 'lsh r1, 0x38', 'arsh r1, 0x38', 'jsgt r8, r1, +14']"]; 22 | 648 -> 704; 23 | 648 -> 808; 24 | 552 -> 648; 25 | 600 [code="['mov r2, r6', 'add r2, 0x14', 'lddw r1, 0x0', 'mov r3, r6', 'mov r4, 0x0']"]; 26 | 552 -> 600; 27 | } 28 | -------------------------------------------------------------------------------- /tools/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UofG-netlab/BPFabric/8c90fb614b8549124b4071586077029972574fe5/tools/test.png -------------------------------------------------------------------------------- /ubpf/.gitignore: -------------------------------------------------------------------------------- 1 | libubpf.a 2 | test 3 | *.o 4 | *.gcov 5 | *.gcda 6 | *.gcno 7 | -------------------------------------------------------------------------------- /ubpf/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Big Switch Networks, Inc 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | CFLAGS := -Wall -Iinc -I../bpfmap -O2 -g 16 | LDLIBS := -lm 17 | 18 | ifeq ($(COVERAGE),1) 19 | CFLAGS += -fprofile-arcs -ftest-coverage 20 | LDFLAGS += -fprofile-arcs 21 | endif 22 | 23 | ifeq ($(ASAN),1) 24 | CFLAGS += -fsanitize=address 25 | LDFLAGS += -fsanitize=address 26 | endif 27 | 28 | all: libubpf.a 29 | 30 | ubpf_jit_x86_64.o: ubpf_jit_x86_64.c ubpf_jit_x86_64.h 31 | 32 | libubpf.a: ubpf_vm.o ubpf_jit_x86_64.o ubpf_loader.o 33 | ar rc $@ $^ 34 | 35 | test: test.o libubpf.a ../bpfmap/libbpfmap.a 36 | 37 | clean: 38 | rm -f test libubpf.a *.o 39 | -------------------------------------------------------------------------------- /ubpf/ebpf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Big Switch Networks, Inc 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef EBPF_H 18 | #define EBPF_H 19 | 20 | #include 21 | 22 | /* eBPF definitions */ 23 | 24 | struct ebpf_inst { 25 | uint8_t opcode; 26 | uint8_t dst : 4; 27 | uint8_t src : 4; 28 | int16_t offset; 29 | int32_t imm; 30 | }; 31 | 32 | #define EBPF_CLS_MASK 0x07 33 | #define EBPF_ALU_OP_MASK 0xf0 34 | 35 | #define EBPF_CLS_LD 0x00 36 | #define EBPF_CLS_LDX 0x01 37 | #define EBPF_CLS_ST 0x02 38 | #define EBPF_CLS_STX 0x03 39 | #define EBPF_CLS_ALU 0x04 40 | #define EBPF_CLS_JMP 0x05 41 | #define EBPF_CLS_ALU64 0x07 42 | 43 | #define EBPF_SRC_IMM 0x00 44 | #define EBPF_SRC_REG 0x08 45 | 46 | #define EBPF_SIZE_W 0x00 47 | #define EBPF_SIZE_H 0x08 48 | #define EBPF_SIZE_B 0x10 49 | #define EBPF_SIZE_DW 0x18 50 | 51 | /* Other memory modes are not yet supported */ 52 | #define EBPF_MODE_IMM 0x00 53 | #define EBPF_MODE_MEM 0x60 54 | 55 | #define EBPF_OP_ADD_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x00) 56 | #define EBPF_OP_ADD_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x00) 57 | #define EBPF_OP_SUB_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x10) 58 | #define EBPF_OP_SUB_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x10) 59 | #define EBPF_OP_MUL_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x20) 60 | #define EBPF_OP_MUL_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x20) 61 | #define EBPF_OP_DIV_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x30) 62 | #define EBPF_OP_DIV_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x30) 63 | #define EBPF_OP_OR_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x40) 64 | #define EBPF_OP_OR_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x40) 65 | #define EBPF_OP_AND_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x50) 66 | #define EBPF_OP_AND_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x50) 67 | #define EBPF_OP_LSH_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x60) 68 | #define EBPF_OP_LSH_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x60) 69 | #define EBPF_OP_RSH_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x70) 70 | #define EBPF_OP_RSH_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x70) 71 | #define EBPF_OP_NEG (EBPF_CLS_ALU|0x80) 72 | #define EBPF_OP_MOD_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0x90) 73 | #define EBPF_OP_MOD_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0x90) 74 | #define EBPF_OP_XOR_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0xa0) 75 | #define EBPF_OP_XOR_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0xa0) 76 | #define EBPF_OP_MOV_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0xb0) 77 | #define EBPF_OP_MOV_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0xb0) 78 | #define EBPF_OP_ARSH_IMM (EBPF_CLS_ALU|EBPF_SRC_IMM|0xc0) 79 | #define EBPF_OP_ARSH_REG (EBPF_CLS_ALU|EBPF_SRC_REG|0xc0) 80 | #define EBPF_OP_LE (EBPF_CLS_ALU|EBPF_SRC_IMM|0xd0) 81 | #define EBPF_OP_BE (EBPF_CLS_ALU|EBPF_SRC_REG|0xd0) 82 | 83 | #define EBPF_OP_ADD64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x00) 84 | #define EBPF_OP_ADD64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x00) 85 | #define EBPF_OP_SUB64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x10) 86 | #define EBPF_OP_SUB64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x10) 87 | #define EBPF_OP_MUL64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x20) 88 | #define EBPF_OP_MUL64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x20) 89 | #define EBPF_OP_DIV64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x30) 90 | #define EBPF_OP_DIV64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x30) 91 | #define EBPF_OP_OR64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x40) 92 | #define EBPF_OP_OR64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x40) 93 | #define EBPF_OP_AND64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x50) 94 | #define EBPF_OP_AND64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x50) 95 | #define EBPF_OP_LSH64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x60) 96 | #define EBPF_OP_LSH64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x60) 97 | #define EBPF_OP_RSH64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x70) 98 | #define EBPF_OP_RSH64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x70) 99 | #define EBPF_OP_NEG64 (EBPF_CLS_ALU64|0x80) 100 | #define EBPF_OP_MOD64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0x90) 101 | #define EBPF_OP_MOD64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0x90) 102 | #define EBPF_OP_XOR64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0xa0) 103 | #define EBPF_OP_XOR64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0xa0) 104 | #define EBPF_OP_MOV64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0xb0) 105 | #define EBPF_OP_MOV64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0xb0) 106 | #define EBPF_OP_ARSH64_IMM (EBPF_CLS_ALU64|EBPF_SRC_IMM|0xc0) 107 | #define EBPF_OP_ARSH64_REG (EBPF_CLS_ALU64|EBPF_SRC_REG|0xc0) 108 | 109 | #define EBPF_OP_LDXW (EBPF_CLS_LDX|EBPF_MODE_MEM|EBPF_SIZE_W) 110 | #define EBPF_OP_LDXH (EBPF_CLS_LDX|EBPF_MODE_MEM|EBPF_SIZE_H) 111 | #define EBPF_OP_LDXB (EBPF_CLS_LDX|EBPF_MODE_MEM|EBPF_SIZE_B) 112 | #define EBPF_OP_LDXDW (EBPF_CLS_LDX|EBPF_MODE_MEM|EBPF_SIZE_DW) 113 | #define EBPF_OP_STW (EBPF_CLS_ST|EBPF_MODE_MEM|EBPF_SIZE_W) 114 | #define EBPF_OP_STH (EBPF_CLS_ST|EBPF_MODE_MEM|EBPF_SIZE_H) 115 | #define EBPF_OP_STB (EBPF_CLS_ST|EBPF_MODE_MEM|EBPF_SIZE_B) 116 | #define EBPF_OP_STDW (EBPF_CLS_ST|EBPF_MODE_MEM|EBPF_SIZE_DW) 117 | #define EBPF_OP_STXW (EBPF_CLS_STX|EBPF_MODE_MEM|EBPF_SIZE_W) 118 | #define EBPF_OP_STXH (EBPF_CLS_STX|EBPF_MODE_MEM|EBPF_SIZE_H) 119 | #define EBPF_OP_STXB (EBPF_CLS_STX|EBPF_MODE_MEM|EBPF_SIZE_B) 120 | #define EBPF_OP_STXDW (EBPF_CLS_STX|EBPF_MODE_MEM|EBPF_SIZE_DW) 121 | #define EBPF_OP_LDDW (EBPF_CLS_LD|EBPF_MODE_IMM|EBPF_SIZE_DW) 122 | 123 | #define EBPF_OP_JA (EBPF_CLS_JMP|0x00) 124 | #define EBPF_OP_JEQ_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0x10) 125 | #define EBPF_OP_JEQ_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0x10) 126 | #define EBPF_OP_JGT_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0x20) 127 | #define EBPF_OP_JGT_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0x20) 128 | #define EBPF_OP_JGE_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0x30) 129 | #define EBPF_OP_JGE_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0x30) 130 | #define EBPF_OP_JSET_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0x40) 131 | #define EBPF_OP_JSET_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0x40) 132 | #define EBPF_OP_JNE_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0x50) 133 | #define EBPF_OP_JNE_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0x50) 134 | #define EBPF_OP_JSGT_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0x60) 135 | #define EBPF_OP_JSGT_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0x60) 136 | #define EBPF_OP_JSGE_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0x70) 137 | #define EBPF_OP_JSGE_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0x70) 138 | #define EBPF_OP_CALL (EBPF_CLS_JMP|0x80) 139 | #define EBPF_OP_EXIT (EBPF_CLS_JMP|0x90) 140 | #define EBPF_OP_JLT_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0xa0) 141 | #define EBPF_OP_JLT_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0xa0) 142 | #define EBPF_OP_JLE_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0xb0) 143 | #define EBPF_OP_JLE_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0xb0) 144 | #define EBPF_OP_JSLT_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0xc0) 145 | #define EBPF_OP_JSLT_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0xc0) 146 | #define EBPF_OP_JSLE_IMM (EBPF_CLS_JMP|EBPF_SRC_IMM|0xd0) 147 | #define EBPF_OP_JSLE_REG (EBPF_CLS_JMP|EBPF_SRC_REG|0xd0) 148 | 149 | #endif 150 | -------------------------------------------------------------------------------- /ubpf/inc/ubpf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Big Switch Networks, Inc 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef UBPF_H 18 | #define UBPF_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | // Default values for maximum instruction count and stack size. 25 | #if !defined(UBPF_MAX_INSTS) 26 | #define UBPF_MAX_INSTS 65536 27 | #endif 28 | 29 | #if !defined(UBPF_STACK_SIZE) 30 | #define UBPF_STACK_SIZE 128 31 | #endif 32 | 33 | struct ubpf_vm; 34 | typedef uint64_t (*ubpf_jit_fn)(void *mem, size_t mem_len); 35 | 36 | struct ubpf_vm *ubpf_create(void); 37 | void ubpf_destroy(struct ubpf_vm *vm); 38 | 39 | /* 40 | * Enable / disable bounds_check 41 | * 42 | * Bounds check is enabled by default, but it may be too restrictive 43 | * Pass true to enable, false to disable 44 | * Returns previous state 45 | */ 46 | bool ubpf_toggle_bounds_check(struct ubpf_vm *vm, bool enable); 47 | 48 | 49 | /* 50 | * Set the function to be invoked if the jitted program hits divide by zero. 51 | * 52 | * fprintf is the default function to be invoked on division by zero. 53 | */ 54 | void ubpf_set_error_print(struct ubpf_vm *vm, int (*error_printf)(FILE* stream, const char* format, ...)); 55 | 56 | /* 57 | * Register an external function 58 | * 59 | * The immediate field of a CALL instruction is an index into an array of 60 | * functions registered by the user. This API associates a function with 61 | * an index. 62 | * 63 | * 'name' should be a string with a lifetime longer than the VM. 64 | * 65 | * Returns 0 on success, -1 on error. 66 | */ 67 | int ubpf_register(struct ubpf_vm *vm, unsigned int idx, const char *name, void *fn); 68 | 69 | /* 70 | * Load code into a VM 71 | * 72 | * This must be done before calling ubpf_exec or ubpf_compile and after 73 | * registering all functions. 74 | * 75 | * 'code' should point to eBPF bytecodes and 'code_len' should be the size in 76 | * bytes of that buffer. 77 | * 78 | * Returns 0 on success, -1 on error. In case of error a pointer to the error 79 | * message will be stored in 'errmsg' and should be freed by the caller. 80 | */ 81 | int ubpf_load(struct ubpf_vm *vm, const void *code, uint32_t code_len, char **errmsg); 82 | 83 | /* 84 | * Load code from an ELF file 85 | * 86 | * This must be done before calling ubpf_exec or ubpf_compile and after 87 | * registering all functions. 88 | * 89 | * 'elf' should point to a copy of an ELF file in memory and 'elf_len' should 90 | * be the size in bytes of that buffer. 91 | * 92 | * The ELF file must be 64-bit little-endian with a single text section 93 | * containing the eBPF bytecodes. This is compatible with the output of 94 | * Clang. 95 | * 96 | * Returns 0 on success, -1 on error. In case of error a pointer to the error 97 | * message will be stored in 'errmsg' and should be freed by the caller. 98 | */ 99 | int ubpf_load_elf(struct ubpf_vm *vm, const void *elf, size_t elf_len, char **errmsg); 100 | 101 | uint64_t ubpf_exec(const struct ubpf_vm *vm, void *mem, size_t mem_len); 102 | 103 | ubpf_jit_fn ubpf_compile(struct ubpf_vm *vm, char **errmsg); 104 | 105 | /* 106 | * Translate the eBPF byte code to x64 machine code, store in buffer, and 107 | * write the resulting count of bytes to size. 108 | * 109 | * This must be called after registering all functions. 110 | * 111 | * Returns 0 on success, -1 on error. In case of error a pointer to the error 112 | * message will be stored in 'errmsg' and should be freed by the caller. 113 | */ 114 | int ubpf_translate(struct ubpf_vm *vm, uint8_t *buffer, size_t *size, char **errmsg); 115 | 116 | #define TABLE_NAME_MAX_LENGTH 32 117 | #define TABLE_MAX_ENTRIES 64 118 | struct table_entry { 119 | int fd; 120 | int type; 121 | int key_size; 122 | int value_size; 123 | int max_entries; 124 | }; 125 | 126 | int ubpf_get_tables(const struct ubpf_vm *vm); 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /ubpf/test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Big Switch Networks, Inc 3 | * Copyright 2017 Google Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #define _GNU_SOURCE 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include "ubpf.h" 30 | 31 | void ubpf_set_register_offset(int x); 32 | static void *readfile(const char *path, size_t maxlen, size_t *len); 33 | static void register_functions(struct ubpf_vm *vm); 34 | 35 | static void usage(const char *name) 36 | { 37 | fprintf(stderr, "usage: %s [-h] [-j|--jit] [-m|--mem PATH] BINARY\n", name); 38 | fprintf(stderr, "\nExecutes the eBPF code in BINARY and prints the result to stdout.\n"); 39 | fprintf(stderr, "If --mem is given then the specified file will be read and a pointer\nto its data passed in r1.\n"); 40 | fprintf(stderr, "If --jit is given then the JIT compiler will be used.\n"); 41 | fprintf(stderr, "\nOther options:\n"); 42 | fprintf(stderr, " -r, --register-offset NUM: Change the mapping from eBPF to x86 registers\n"); 43 | } 44 | 45 | int main(int argc, char **argv) 46 | { 47 | struct option longopts[] = { 48 | { .name = "help", .val = 'h', }, 49 | { .name = "mem", .val = 'm', .has_arg=1 }, 50 | { .name = "jit", .val = 'j' }, 51 | { .name = "register-offset", .val = 'r', .has_arg=1 }, 52 | { } 53 | }; 54 | 55 | const char *mem_filename = NULL; 56 | bool jit = false; 57 | 58 | int opt; 59 | while ((opt = getopt_long(argc, argv, "hm:jr:", longopts, NULL)) != -1) { 60 | switch (opt) { 61 | case 'm': 62 | mem_filename = optarg; 63 | break; 64 | case 'j': 65 | jit = true; 66 | break; 67 | case 'r': 68 | ubpf_set_register_offset(atoi(optarg)); 69 | break; 70 | case 'h': 71 | usage(argv[0]); 72 | return 0; 73 | default: 74 | usage(argv[0]); 75 | return 1; 76 | } 77 | } 78 | 79 | if (argc != optind + 1) { 80 | usage(argv[0]); 81 | return 1; 82 | } 83 | 84 | const char *code_filename = argv[optind]; 85 | size_t code_len; 86 | void *code = readfile(code_filename, 1024*1024, &code_len); 87 | if (code == NULL) { 88 | return 1; 89 | } 90 | 91 | size_t mem_len = 0; 92 | void *mem = NULL; 93 | if (mem_filename != NULL) { 94 | mem = readfile(mem_filename, 1024*1024, &mem_len); 95 | if (mem == NULL) { 96 | return 1; 97 | } 98 | } 99 | 100 | struct ubpf_vm *vm = ubpf_create(); 101 | if (!vm) { 102 | fprintf(stderr, "Failed to create VM\n"); 103 | return 1; 104 | } 105 | 106 | register_functions(vm); 107 | 108 | /* 109 | * The ELF magic corresponds to an RSH instruction with an offset, 110 | * which is invalid. 111 | */ 112 | bool elf = code_len >= SELFMAG && !memcmp(code, ELFMAG, SELFMAG); 113 | 114 | char *errmsg; 115 | int rv; 116 | if (elf) { 117 | rv = ubpf_load_elf(vm, code, code_len, &errmsg); 118 | } else { 119 | rv = ubpf_load(vm, code, code_len, &errmsg); 120 | } 121 | 122 | free(code); 123 | 124 | if (rv < 0) { 125 | fprintf(stderr, "Failed to load code: %s\n", errmsg); 126 | free(errmsg); 127 | ubpf_destroy(vm); 128 | return 1; 129 | } 130 | 131 | uint64_t ret; 132 | 133 | if (jit) { 134 | ubpf_jit_fn fn = ubpf_compile(vm, &errmsg); 135 | if (fn == NULL) { 136 | fprintf(stderr, "Failed to compile: %s\n", errmsg); 137 | free(errmsg); 138 | return 1; 139 | } 140 | ret = fn(mem, mem_len); 141 | } else { 142 | ret = ubpf_exec(vm, mem, mem_len); 143 | } 144 | 145 | printf("0x%"PRIx64"\n", ret); 146 | 147 | ubpf_destroy(vm); 148 | 149 | return 0; 150 | } 151 | 152 | static void *readfile(const char *path, size_t maxlen, size_t *len) 153 | { 154 | FILE *file; 155 | if (!strcmp(path, "-")) { 156 | file = fdopen(STDIN_FILENO, "r"); 157 | } else { 158 | file = fopen(path, "r"); 159 | } 160 | 161 | if (file == NULL) { 162 | fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno)); 163 | return NULL; 164 | } 165 | 166 | void *data = calloc(maxlen, 1); 167 | size_t offset = 0; 168 | size_t rv; 169 | while ((rv = fread(data+offset, 1, maxlen-offset, file)) > 0) { 170 | offset += rv; 171 | } 172 | 173 | if (ferror(file)) { 174 | fprintf(stderr, "Failed to read %s: %s\n", path, strerror(errno)); 175 | fclose(file); 176 | free(data); 177 | return NULL; 178 | } 179 | 180 | if (!feof(file)) { 181 | fprintf(stderr, "Failed to read %s because it is too large (max %u bytes)\n", 182 | path, (unsigned)maxlen); 183 | fclose(file); 184 | free(data); 185 | return NULL; 186 | } 187 | 188 | fclose(file); 189 | if (len) { 190 | *len = offset; 191 | } 192 | return data; 193 | } 194 | 195 | static uint64_t 196 | gather_bytes(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e) 197 | { 198 | return ((uint64_t)a << 32) | 199 | ((uint32_t)b << 24) | 200 | ((uint32_t)c << 16) | 201 | ((uint16_t)d << 8) | 202 | e; 203 | } 204 | 205 | static void 206 | trash_registers(void) 207 | { 208 | /* Overwrite all caller-save registers */ 209 | asm( 210 | "mov $0xf0, %rax;" 211 | "mov $0xf1, %rcx;" 212 | "mov $0xf2, %rdx;" 213 | "mov $0xf3, %rsi;" 214 | "mov $0xf4, %rdi;" 215 | "mov $0xf5, %r8;" 216 | "mov $0xf6, %r9;" 217 | "mov $0xf7, %r10;" 218 | "mov $0xf8, %r11;" 219 | ); 220 | } 221 | 222 | static uint32_t 223 | sqrti(uint32_t x) 224 | { 225 | return sqrt(x); 226 | } 227 | 228 | static void 229 | register_functions(struct ubpf_vm *vm) 230 | { 231 | ubpf_register(vm, 0, "gather_bytes", gather_bytes); 232 | ubpf_register(vm, 1, "memfrob", memfrob); 233 | ubpf_register(vm, 2, "trash_registers", trash_registers); 234 | ubpf_register(vm, 3, "sqrti", sqrti); 235 | ubpf_register(vm, 4, "strcmp_ext", strcmp); 236 | } 237 | -------------------------------------------------------------------------------- /ubpf/ubpf_int.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Big Switch Networks, Inc 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef UBPF_INT_H 18 | #define UBPF_INT_H 19 | 20 | #include 21 | #include "ebpf.h" 22 | 23 | struct ebpf_inst; 24 | typedef uint64_t (*ext_func)(uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); 25 | 26 | struct ubpf_vm { 27 | struct ebpf_inst *insts; 28 | uint16_t num_insts; 29 | ubpf_jit_fn jitted; 30 | size_t jitted_size; 31 | ext_func *ext_funcs; 32 | const char **ext_func_names; 33 | bool bounds_check_enabled; 34 | int (*error_printf)(FILE* stream, const char* format, ...); 35 | int tables; // bpf map containing the list of tables 36 | }; 37 | 38 | char *ubpf_error(const char *fmt, ...); 39 | unsigned int ubpf_lookup_registered_function(struct ubpf_vm *vm, const char *name); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /ubpf/ubpf_jit_x86_64.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Big Switch Networks, Inc 3 | * Copyright 2017 Google Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #define _GNU_SOURCE 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include "ubpf_int.h" 28 | #include "ubpf_jit_x86_64.h" 29 | 30 | #if !defined(_countof) 31 | #define _countof(array) (sizeof(array) / sizeof(array[0])) 32 | #endif 33 | 34 | /* Special values for target_pc in struct jump */ 35 | #define TARGET_PC_EXIT -1 36 | #define TARGET_PC_DIV_BY_ZERO -2 37 | 38 | static void muldivmod(struct jit_state *state, uint16_t pc, uint8_t opcode, int src, int dst, int32_t imm); 39 | 40 | #define REGISTER_MAP_SIZE 11 41 | 42 | /* 43 | * There are two common x86-64 calling conventions, as discussed at 44 | * https://en.wikipedia.org/wiki/X86_calling_conventions#x86-64_calling_conventions 45 | */ 46 | 47 | #if defined(_WIN32) 48 | static int platform_nonvolatile_registers[] = { 49 | RBP, RBX, RDI, RSI, R12, R13, R14, R15}; 50 | static int platform_parameter_registers[] = { 51 | RCX, RDX, R8, R9}; 52 | #define RCX_ALT R15 53 | static int register_map[REGISTER_MAP_SIZE] = { 54 | RAX, 55 | R15, 56 | RDX, 57 | R8, 58 | R9, 59 | R10, 60 | R11, 61 | R12, 62 | R13, 63 | R14, 64 | RBP, 65 | }; 66 | #else 67 | #define RCX_ALT R9 68 | static int platform_nonvolatile_registers[] = { 69 | RBP, RBX, R13, R14, R15}; 70 | static int platform_parameter_registers[] = { 71 | RDI, RSI, RDX, RCX, R8, R9}; 72 | static int register_map[REGISTER_MAP_SIZE] = { 73 | RAX, 74 | RDI, 75 | RSI, 76 | RDX, 77 | R9, 78 | R8, 79 | RBX, 80 | R13, 81 | R14, 82 | R15, 83 | RBP, 84 | }; 85 | #endif 86 | 87 | /* Return the x86 register for the given eBPF register */ 88 | static int 89 | map_register(int r) 90 | { 91 | assert(r < REGISTER_MAP_SIZE); 92 | return register_map[r % REGISTER_MAP_SIZE]; 93 | } 94 | 95 | /* For testing, this changes the mapping between x86 and eBPF registers */ 96 | void ubpf_set_register_offset(int x) 97 | { 98 | int i; 99 | if (x < REGISTER_MAP_SIZE) 100 | { 101 | int tmp[REGISTER_MAP_SIZE]; 102 | memcpy(tmp, register_map, sizeof(register_map)); 103 | for (i = 0; i < REGISTER_MAP_SIZE; i++) 104 | { 105 | register_map[i] = tmp[(i + x) % REGISTER_MAP_SIZE]; 106 | } 107 | } 108 | else 109 | { 110 | /* Shuffle array */ 111 | unsigned int seed = x; 112 | for (i = 0; i < REGISTER_MAP_SIZE - 1; i++) 113 | { 114 | int j = i + (rand_r(&seed) % (REGISTER_MAP_SIZE - i)); 115 | int tmp = register_map[j]; 116 | register_map[j] = register_map[i]; 117 | register_map[i] = tmp; 118 | } 119 | } 120 | } 121 | 122 | static int 123 | translate(struct ubpf_vm *vm, struct jit_state *state, char **errmsg) 124 | { 125 | int i; 126 | 127 | /* Save platform non-volatile registers */ 128 | for (i = 0; i < _countof(platform_nonvolatile_registers); i++) 129 | { 130 | emit_push(state, platform_nonvolatile_registers[i]); 131 | } 132 | 133 | /* Move first platform parameter register into register 1 */ 134 | if (map_register(1) != platform_parameter_registers[0]) 135 | { 136 | emit_mov(state, platform_parameter_registers[0], map_register(1)); 137 | } 138 | 139 | /* Copy stack pointer to R10 */ 140 | emit_mov(state, RSP, map_register(10)); 141 | 142 | /* Allocate stack space */ 143 | emit_alu64_imm32(state, 0x81, 5, RSP, UBPF_STACK_SIZE); 144 | 145 | for (i = 0; i < vm->num_insts; i++) 146 | { 147 | struct ebpf_inst inst = vm->insts[i]; 148 | state->pc_locs[i] = state->offset; 149 | 150 | int dst = map_register(inst.dst); 151 | int src = map_register(inst.src); 152 | uint32_t target_pc = i + inst.offset + 1; 153 | 154 | switch (inst.opcode) 155 | { 156 | case EBPF_OP_ADD_IMM: 157 | emit_alu32_imm32(state, 0x81, 0, dst, inst.imm); 158 | break; 159 | case EBPF_OP_ADD_REG: 160 | emit_alu32(state, 0x01, src, dst); 161 | break; 162 | case EBPF_OP_SUB_IMM: 163 | emit_alu32_imm32(state, 0x81, 5, dst, inst.imm); 164 | break; 165 | case EBPF_OP_SUB_REG: 166 | emit_alu32(state, 0x29, src, dst); 167 | break; 168 | case EBPF_OP_MUL_IMM: 169 | case EBPF_OP_MUL_REG: 170 | case EBPF_OP_DIV_IMM: 171 | case EBPF_OP_DIV_REG: 172 | case EBPF_OP_MOD_IMM: 173 | case EBPF_OP_MOD_REG: 174 | muldivmod(state, i, inst.opcode, src, dst, inst.imm); 175 | break; 176 | case EBPF_OP_OR_IMM: 177 | emit_alu32_imm32(state, 0x81, 1, dst, inst.imm); 178 | break; 179 | case EBPF_OP_OR_REG: 180 | emit_alu32(state, 0x09, src, dst); 181 | break; 182 | case EBPF_OP_AND_IMM: 183 | emit_alu32_imm32(state, 0x81, 4, dst, inst.imm); 184 | break; 185 | case EBPF_OP_AND_REG: 186 | emit_alu32(state, 0x21, src, dst); 187 | break; 188 | case EBPF_OP_LSH_IMM: 189 | emit_alu32_imm8(state, 0xc1, 4, dst, inst.imm); 190 | break; 191 | case EBPF_OP_LSH_REG: 192 | emit_mov(state, src, RCX); 193 | emit_alu32(state, 0xd3, 4, dst); 194 | break; 195 | case EBPF_OP_RSH_IMM: 196 | emit_alu32_imm8(state, 0xc1, 5, dst, inst.imm); 197 | break; 198 | case EBPF_OP_RSH_REG: 199 | emit_mov(state, src, RCX); 200 | emit_alu32(state, 0xd3, 5, dst); 201 | break; 202 | case EBPF_OP_NEG: 203 | emit_alu32(state, 0xf7, 3, dst); 204 | break; 205 | case EBPF_OP_XOR_IMM: 206 | emit_alu32_imm32(state, 0x81, 6, dst, inst.imm); 207 | break; 208 | case EBPF_OP_XOR_REG: 209 | emit_alu32(state, 0x31, src, dst); 210 | break; 211 | case EBPF_OP_MOV_IMM: 212 | emit_alu32_imm32(state, 0xc7, 0, dst, inst.imm); 213 | break; 214 | case EBPF_OP_MOV_REG: 215 | emit_mov(state, src, dst); 216 | break; 217 | case EBPF_OP_ARSH_IMM: 218 | emit_alu32_imm8(state, 0xc1, 7, dst, inst.imm); 219 | break; 220 | case EBPF_OP_ARSH_REG: 221 | emit_mov(state, src, RCX); 222 | emit_alu32(state, 0xd3, 7, dst); 223 | break; 224 | 225 | case EBPF_OP_LE: 226 | /* No-op */ 227 | break; 228 | case EBPF_OP_BE: 229 | if (inst.imm == 16) 230 | { 231 | /* rol */ 232 | emit1(state, 0x66); /* 16-bit override */ 233 | emit_alu32_imm8(state, 0xc1, 0, dst, 8); 234 | /* and */ 235 | emit_alu32_imm32(state, 0x81, 4, dst, 0xffff); 236 | } 237 | else if (inst.imm == 32 || inst.imm == 64) 238 | { 239 | /* bswap */ 240 | emit_basic_rex(state, inst.imm == 64, 0, dst); 241 | emit1(state, 0x0f); 242 | emit1(state, 0xc8 | (dst & 7)); 243 | } 244 | break; 245 | 246 | case EBPF_OP_ADD64_IMM: 247 | emit_alu64_imm32(state, 0x81, 0, dst, inst.imm); 248 | break; 249 | case EBPF_OP_ADD64_REG: 250 | emit_alu64(state, 0x01, src, dst); 251 | break; 252 | case EBPF_OP_SUB64_IMM: 253 | emit_alu64_imm32(state, 0x81, 5, dst, inst.imm); 254 | break; 255 | case EBPF_OP_SUB64_REG: 256 | emit_alu64(state, 0x29, src, dst); 257 | break; 258 | case EBPF_OP_MUL64_IMM: 259 | case EBPF_OP_MUL64_REG: 260 | case EBPF_OP_DIV64_IMM: 261 | case EBPF_OP_DIV64_REG: 262 | case EBPF_OP_MOD64_IMM: 263 | case EBPF_OP_MOD64_REG: 264 | muldivmod(state, i, inst.opcode, src, dst, inst.imm); 265 | break; 266 | case EBPF_OP_OR64_IMM: 267 | emit_alu64_imm32(state, 0x81, 1, dst, inst.imm); 268 | break; 269 | case EBPF_OP_OR64_REG: 270 | emit_alu64(state, 0x09, src, dst); 271 | break; 272 | case EBPF_OP_AND64_IMM: 273 | emit_alu64_imm32(state, 0x81, 4, dst, inst.imm); 274 | break; 275 | case EBPF_OP_AND64_REG: 276 | emit_alu64(state, 0x21, src, dst); 277 | break; 278 | case EBPF_OP_LSH64_IMM: 279 | emit_alu64_imm8(state, 0xc1, 4, dst, inst.imm); 280 | break; 281 | case EBPF_OP_LSH64_REG: 282 | emit_mov(state, src, RCX); 283 | emit_alu64(state, 0xd3, 4, dst); 284 | break; 285 | case EBPF_OP_RSH64_IMM: 286 | emit_alu64_imm8(state, 0xc1, 5, dst, inst.imm); 287 | break; 288 | case EBPF_OP_RSH64_REG: 289 | emit_mov(state, src, RCX); 290 | emit_alu64(state, 0xd3, 5, dst); 291 | break; 292 | case EBPF_OP_NEG64: 293 | emit_alu64(state, 0xf7, 3, dst); 294 | break; 295 | case EBPF_OP_XOR64_IMM: 296 | emit_alu64_imm32(state, 0x81, 6, dst, inst.imm); 297 | break; 298 | case EBPF_OP_XOR64_REG: 299 | emit_alu64(state, 0x31, src, dst); 300 | break; 301 | case EBPF_OP_MOV64_IMM: 302 | emit_load_imm(state, dst, inst.imm); 303 | break; 304 | case EBPF_OP_MOV64_REG: 305 | emit_mov(state, src, dst); 306 | break; 307 | case EBPF_OP_ARSH64_IMM: 308 | emit_alu64_imm8(state, 0xc1, 7, dst, inst.imm); 309 | break; 310 | case EBPF_OP_ARSH64_REG: 311 | emit_mov(state, src, RCX); 312 | emit_alu64(state, 0xd3, 7, dst); 313 | break; 314 | 315 | /* TODO use 8 bit immediate when possible */ 316 | case EBPF_OP_JA: 317 | emit_jmp(state, target_pc); 318 | break; 319 | case EBPF_OP_JEQ_IMM: 320 | emit_cmp_imm32(state, dst, inst.imm); 321 | emit_jcc(state, 0x84, target_pc); 322 | break; 323 | case EBPF_OP_JEQ_REG: 324 | emit_cmp(state, src, dst); 325 | emit_jcc(state, 0x84, target_pc); 326 | break; 327 | case EBPF_OP_JGT_IMM: 328 | emit_cmp_imm32(state, dst, inst.imm); 329 | emit_jcc(state, 0x87, target_pc); 330 | break; 331 | case EBPF_OP_JGT_REG: 332 | emit_cmp(state, src, dst); 333 | emit_jcc(state, 0x87, target_pc); 334 | break; 335 | case EBPF_OP_JGE_IMM: 336 | emit_cmp_imm32(state, dst, inst.imm); 337 | emit_jcc(state, 0x83, target_pc); 338 | break; 339 | case EBPF_OP_JGE_REG: 340 | emit_cmp(state, src, dst); 341 | emit_jcc(state, 0x83, target_pc); 342 | break; 343 | case EBPF_OP_JLT_IMM: 344 | emit_cmp_imm32(state, dst, inst.imm); 345 | emit_jcc(state, 0x82, target_pc); 346 | break; 347 | case EBPF_OP_JLT_REG: 348 | emit_cmp(state, src, dst); 349 | emit_jcc(state, 0x82, target_pc); 350 | break; 351 | case EBPF_OP_JLE_IMM: 352 | emit_cmp_imm32(state, dst, inst.imm); 353 | emit_jcc(state, 0x86, target_pc); 354 | break; 355 | case EBPF_OP_JLE_REG: 356 | emit_cmp(state, src, dst); 357 | emit_jcc(state, 0x86, target_pc); 358 | break; 359 | case EBPF_OP_JSET_IMM: 360 | emit_alu64_imm32(state, 0xf7, 0, dst, inst.imm); 361 | emit_jcc(state, 0x85, target_pc); 362 | break; 363 | case EBPF_OP_JSET_REG: 364 | emit_alu64(state, 0x85, src, dst); 365 | emit_jcc(state, 0x85, target_pc); 366 | break; 367 | case EBPF_OP_JNE_IMM: 368 | emit_cmp_imm32(state, dst, inst.imm); 369 | emit_jcc(state, 0x85, target_pc); 370 | break; 371 | case EBPF_OP_JNE_REG: 372 | emit_cmp(state, src, dst); 373 | emit_jcc(state, 0x85, target_pc); 374 | break; 375 | case EBPF_OP_JSGT_IMM: 376 | emit_cmp_imm32(state, dst, inst.imm); 377 | emit_jcc(state, 0x8f, target_pc); 378 | break; 379 | case EBPF_OP_JSGT_REG: 380 | emit_cmp(state, src, dst); 381 | emit_jcc(state, 0x8f, target_pc); 382 | break; 383 | case EBPF_OP_JSGE_IMM: 384 | emit_cmp_imm32(state, dst, inst.imm); 385 | emit_jcc(state, 0x8d, target_pc); 386 | break; 387 | case EBPF_OP_JSGE_REG: 388 | emit_cmp(state, src, dst); 389 | emit_jcc(state, 0x8d, target_pc); 390 | break; 391 | case EBPF_OP_JSLT_IMM: 392 | emit_cmp_imm32(state, dst, inst.imm); 393 | emit_jcc(state, 0x8c, target_pc); 394 | break; 395 | case EBPF_OP_JSLT_REG: 396 | emit_cmp(state, src, dst); 397 | emit_jcc(state, 0x8c, target_pc); 398 | break; 399 | case EBPF_OP_JSLE_IMM: 400 | emit_cmp_imm32(state, dst, inst.imm); 401 | emit_jcc(state, 0x8e, target_pc); 402 | break; 403 | case EBPF_OP_JSLE_REG: 404 | emit_cmp(state, src, dst); 405 | emit_jcc(state, 0x8e, target_pc); 406 | break; 407 | case EBPF_OP_CALL: 408 | /* We reserve RCX for shifts */ 409 | emit_mov(state, RCX_ALT, RCX); 410 | emit_call(state, vm->ext_funcs[inst.imm]); 411 | break; 412 | case EBPF_OP_EXIT: 413 | if (i != vm->num_insts - 1) 414 | { 415 | emit_jmp(state, TARGET_PC_EXIT); 416 | } 417 | break; 418 | 419 | case EBPF_OP_LDXW: 420 | emit_load(state, S32, src, dst, inst.offset); 421 | break; 422 | case EBPF_OP_LDXH: 423 | emit_load(state, S16, src, dst, inst.offset); 424 | break; 425 | case EBPF_OP_LDXB: 426 | emit_load(state, S8, src, dst, inst.offset); 427 | break; 428 | case EBPF_OP_LDXDW: 429 | emit_load(state, S64, src, dst, inst.offset); 430 | break; 431 | 432 | case EBPF_OP_STW: 433 | emit_store_imm32(state, S32, dst, inst.offset, inst.imm); 434 | break; 435 | case EBPF_OP_STH: 436 | emit_store_imm32(state, S16, dst, inst.offset, inst.imm); 437 | break; 438 | case EBPF_OP_STB: 439 | emit_store_imm32(state, S8, dst, inst.offset, inst.imm); 440 | break; 441 | case EBPF_OP_STDW: 442 | emit_store_imm32(state, S64, dst, inst.offset, inst.imm); 443 | break; 444 | 445 | case EBPF_OP_STXW: 446 | emit_store(state, S32, src, dst, inst.offset); 447 | break; 448 | case EBPF_OP_STXH: 449 | emit_store(state, S16, src, dst, inst.offset); 450 | break; 451 | case EBPF_OP_STXB: 452 | emit_store(state, S8, src, dst, inst.offset); 453 | break; 454 | case EBPF_OP_STXDW: 455 | emit_store(state, S64, src, dst, inst.offset); 456 | break; 457 | 458 | case EBPF_OP_LDDW: 459 | { 460 | struct ebpf_inst inst2 = vm->insts[++i]; 461 | uint64_t imm = (uint32_t)inst.imm | ((uint64_t)inst2.imm << 32); 462 | emit_load_imm(state, dst, imm); 463 | break; 464 | } 465 | 466 | default: 467 | *errmsg = ubpf_error("Unknown instruction at PC %d: opcode %02x", i, inst.opcode); 468 | return -1; 469 | } 470 | } 471 | 472 | /* Epilogue */ 473 | state->exit_loc = state->offset; 474 | 475 | /* Move register 0 into rax */ 476 | if (map_register(0) != RAX) 477 | { 478 | emit_mov(state, map_register(0), RAX); 479 | } 480 | 481 | /* Deallocate stack space */ 482 | emit_alu64_imm32(state, 0x81, 0, RSP, UBPF_STACK_SIZE); 483 | 484 | /* Restore platform non-volatile registers */ 485 | for (i = 0; i < _countof(platform_nonvolatile_registers); i++) 486 | { 487 | emit_pop(state, platform_nonvolatile_registers[_countof(platform_nonvolatile_registers) - i - 1]); 488 | } 489 | 490 | emit1(state, 0xc3); /* ret */ 491 | 492 | /* Division by zero handler */ 493 | state->div_by_zero_loc = state->offset; 494 | const char *div_by_zero_fmt = "uBPF error: division by zero at PC %u\n"; 495 | // RCX is the first parameter register for Windows, so first save the value. 496 | emit_mov(state, RCX, platform_parameter_registers[2]); /* muldivmod stored pc in RCX */ 497 | emit_load_imm(state, platform_parameter_registers[0], (uintptr_t)stderr); 498 | emit_load_imm(state, platform_parameter_registers[1], (uintptr_t)div_by_zero_fmt); 499 | emit_call(state, vm->error_printf); 500 | 501 | emit_load_imm(state, map_register(0), -1); 502 | emit_jmp(state, TARGET_PC_EXIT); 503 | 504 | return 0; 505 | } 506 | 507 | static void 508 | muldivmod(struct jit_state *state, uint16_t pc, uint8_t opcode, int src, int dst, int32_t imm) 509 | { 510 | bool mul = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MUL_IMM & EBPF_ALU_OP_MASK); 511 | bool div = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_DIV_IMM & EBPF_ALU_OP_MASK); 512 | bool mod = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MOD_IMM & EBPF_ALU_OP_MASK); 513 | bool is64 = (opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU64; 514 | 515 | if (div || mod) 516 | { 517 | emit_load_imm(state, RCX, pc); 518 | 519 | /* test src,src */ 520 | if (is64) 521 | { 522 | emit_alu64(state, 0x85, src, src); 523 | } 524 | else 525 | { 526 | emit_alu32(state, 0x85, src, src); 527 | } 528 | 529 | /* jz div_by_zero */ 530 | emit_jcc(state, 0x84, TARGET_PC_DIV_BY_ZERO); 531 | } 532 | 533 | if (dst != RAX) 534 | { 535 | emit_push(state, RAX); 536 | } 537 | if (dst != RDX) 538 | { 539 | emit_push(state, RDX); 540 | } 541 | if (imm) 542 | { 543 | emit_load_imm(state, RCX, imm); 544 | } 545 | else 546 | { 547 | emit_mov(state, src, RCX); 548 | } 549 | 550 | emit_mov(state, dst, RAX); 551 | 552 | if (div || mod) 553 | { 554 | /* xor %edx,%edx */ 555 | emit_alu32(state, 0x31, RDX, RDX); 556 | } 557 | 558 | if (is64) 559 | { 560 | emit_rex(state, 1, 0, 0, 0); 561 | } 562 | 563 | /* mul %ecx or div %ecx */ 564 | emit_alu32(state, 0xf7, mul ? 4 : 6, RCX); 565 | 566 | if (dst != RDX) 567 | { 568 | if (mod) 569 | { 570 | emit_mov(state, RDX, dst); 571 | } 572 | emit_pop(state, RDX); 573 | } 574 | if (dst != RAX) 575 | { 576 | if (div || mul) 577 | { 578 | emit_mov(state, RAX, dst); 579 | } 580 | emit_pop(state, RAX); 581 | } 582 | } 583 | 584 | static void 585 | resolve_jumps(struct jit_state *state) 586 | { 587 | int i; 588 | for (i = 0; i < state->num_jumps; i++) 589 | { 590 | struct jump jump = state->jumps[i]; 591 | 592 | int target_loc; 593 | if (jump.target_pc == TARGET_PC_EXIT) 594 | { 595 | target_loc = state->exit_loc; 596 | } 597 | else if (jump.target_pc == TARGET_PC_DIV_BY_ZERO) 598 | { 599 | target_loc = state->div_by_zero_loc; 600 | } 601 | else 602 | { 603 | target_loc = state->pc_locs[jump.target_pc]; 604 | } 605 | 606 | /* Assumes jump offset is at end of instruction */ 607 | uint32_t rel = target_loc - (jump.offset_loc + sizeof(uint32_t)); 608 | 609 | uint8_t *offset_ptr = &state->buf[jump.offset_loc]; 610 | memcpy(offset_ptr, &rel, sizeof(uint32_t)); 611 | } 612 | } 613 | 614 | int ubpf_translate(struct ubpf_vm *vm, uint8_t *buffer, size_t *size, char **errmsg) 615 | { 616 | struct jit_state state; 617 | int result = -1; 618 | 619 | state.offset = 0; 620 | state.size = *size; 621 | state.buf = buffer; 622 | state.pc_locs = calloc(UBPF_MAX_INSTS + 1, sizeof(state.pc_locs[0])); 623 | state.jumps = calloc(UBPF_MAX_INSTS, sizeof(state.jumps[0])); 624 | state.num_jumps = 0; 625 | 626 | if (translate(vm, &state, errmsg) < 0) 627 | { 628 | goto out; 629 | } 630 | 631 | resolve_jumps(&state); 632 | result = 0; 633 | 634 | *size = state.offset; 635 | 636 | out: 637 | free(state.pc_locs); 638 | free(state.jumps); 639 | return result; 640 | } 641 | 642 | ubpf_jit_fn 643 | ubpf_compile(struct ubpf_vm *vm, char **errmsg) 644 | { 645 | void *jitted = NULL; 646 | uint8_t *buffer = NULL; 647 | size_t jitted_size; 648 | 649 | if (vm->jitted) 650 | { 651 | return vm->jitted; 652 | } 653 | 654 | *errmsg = NULL; 655 | 656 | if (!vm->insts) 657 | { 658 | *errmsg = ubpf_error("code has not been loaded into this VM"); 659 | return NULL; 660 | } 661 | 662 | jitted_size = 65536; 663 | buffer = calloc(jitted_size, 1); 664 | 665 | if (ubpf_translate(vm, buffer, &jitted_size, errmsg) < 0) 666 | { 667 | goto out; 668 | } 669 | 670 | jitted = mmap(0, jitted_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 671 | if (jitted == MAP_FAILED) 672 | { 673 | *errmsg = ubpf_error("internal uBPF error: mmap failed: %s\n", strerror(errno)); 674 | goto out; 675 | } 676 | 677 | memcpy(jitted, buffer, jitted_size); 678 | 679 | if (mprotect(jitted, jitted_size, PROT_READ | PROT_EXEC) < 0) 680 | { 681 | *errmsg = ubpf_error("internal uBPF error: mprotect failed: %s\n", strerror(errno)); 682 | goto out; 683 | } 684 | 685 | vm->jitted = jitted; 686 | vm->jitted_size = jitted_size; 687 | 688 | out: 689 | free(buffer); 690 | if (jitted && vm->jitted == NULL) 691 | { 692 | munmap(jitted, jitted_size); 693 | } 694 | return vm->jitted; 695 | } 696 | -------------------------------------------------------------------------------- /ubpf/ubpf_jit_x86_64.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Big Switch Networks, Inc 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /* 18 | * Generic x86-64 code generation functions 19 | */ 20 | 21 | #ifndef UBPF_JIT_X86_64_H 22 | #define UBPF_JIT_X86_64_H 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #define RAX 0 29 | #define RCX 1 30 | #define RDX 2 31 | #define RBX 3 32 | #define RSP 4 33 | #define RBP 5 34 | #define RSI 6 35 | #define RDI 7 36 | #define R8 8 37 | #define R9 9 38 | #define R10 10 39 | #define R11 11 40 | #define R12 12 41 | #define R13 13 42 | #define R14 14 43 | #define R15 15 44 | 45 | enum operand_size { 46 | S8, 47 | S16, 48 | S32, 49 | S64, 50 | }; 51 | 52 | struct jump { 53 | uint32_t offset_loc; 54 | uint32_t target_pc; 55 | }; 56 | 57 | struct jit_state { 58 | uint8_t *buf; 59 | uint32_t offset; 60 | uint32_t size; 61 | uint32_t *pc_locs; 62 | uint32_t exit_loc; 63 | uint32_t div_by_zero_loc; 64 | struct jump *jumps; 65 | int num_jumps; 66 | }; 67 | 68 | static inline void 69 | emit_bytes(struct jit_state *state, void *data, uint32_t len) 70 | { 71 | assert(state->offset <= state->size - len); 72 | memcpy(state->buf + state->offset, data, len); 73 | state->offset += len; 74 | } 75 | 76 | static inline void 77 | emit1(struct jit_state *state, uint8_t x) 78 | { 79 | emit_bytes(state, &x, sizeof(x)); 80 | } 81 | 82 | static inline void 83 | emit2(struct jit_state *state, uint16_t x) 84 | { 85 | emit_bytes(state, &x, sizeof(x)); 86 | } 87 | 88 | static inline void 89 | emit4(struct jit_state *state, uint32_t x) 90 | { 91 | emit_bytes(state, &x, sizeof(x)); 92 | } 93 | 94 | static inline void 95 | emit8(struct jit_state *state, uint64_t x) 96 | { 97 | emit_bytes(state, &x, sizeof(x)); 98 | } 99 | 100 | static inline void 101 | emit_jump_offset(struct jit_state *state, int32_t target_pc) 102 | { 103 | struct jump *jump = &state->jumps[state->num_jumps++]; 104 | jump->offset_loc = state->offset; 105 | jump->target_pc = target_pc; 106 | emit4(state, 0); 107 | } 108 | 109 | static inline void 110 | emit_modrm(struct jit_state *state, int mod, int r, int m) 111 | { 112 | assert(!(mod & ~0xc0)); 113 | emit1(state, (mod & 0xc0) | ((r & 7) << 3) | (m & 7)); 114 | } 115 | 116 | static inline void 117 | emit_modrm_reg2reg(struct jit_state *state, int r, int m) 118 | { 119 | emit_modrm(state, 0xc0, r, m); 120 | } 121 | 122 | static inline void 123 | emit_modrm_and_displacement(struct jit_state *state, int r, int m, int32_t d) 124 | { 125 | if (d == 0 && (m & 7) != RBP) { 126 | emit_modrm(state, 0x00, r, m); 127 | } else if (d >= -128 && d <= 127) { 128 | emit_modrm(state, 0x40, r, m); 129 | emit1(state, d); 130 | } else { 131 | emit_modrm(state, 0x80, r, m); 132 | emit4(state, d); 133 | } 134 | } 135 | 136 | static inline void 137 | emit_rex(struct jit_state *state, int w, int r, int x, int b) 138 | { 139 | assert(!(w & ~1)); 140 | assert(!(r & ~1)); 141 | assert(!(x & ~1)); 142 | assert(!(b & ~1)); 143 | emit1(state, 0x40 | (w << 3) | (r << 2) | (x << 1) | b); 144 | } 145 | 146 | /* 147 | * Emits a REX prefix with the top bit of src and dst. 148 | * Skipped if no bits would be set. 149 | */ 150 | static inline void 151 | emit_basic_rex(struct jit_state *state, int w, int src, int dst) 152 | { 153 | if (w || (src & 8) || (dst & 8)) { 154 | emit_rex(state, w, !!(src & 8), 0, !!(dst & 8)); 155 | } 156 | } 157 | 158 | static inline void 159 | emit_push(struct jit_state *state, int r) 160 | { 161 | emit_basic_rex(state, 0, 0, r); 162 | emit1(state, 0x50 | (r & 7)); 163 | } 164 | 165 | static inline void 166 | emit_pop(struct jit_state *state, int r) 167 | { 168 | emit_basic_rex(state, 0, 0, r); 169 | emit1(state, 0x58 | (r & 7)); 170 | } 171 | 172 | /* REX prefix and ModRM byte */ 173 | /* We use the MR encoding when there is a choice */ 174 | /* 'src' is often used as an opcode extension */ 175 | static inline void 176 | emit_alu32(struct jit_state *state, int op, int src, int dst) 177 | { 178 | emit_basic_rex(state, 0, src, dst); 179 | emit1(state, op); 180 | emit_modrm_reg2reg(state, src, dst); 181 | } 182 | 183 | /* REX prefix, ModRM byte, and 32-bit immediate */ 184 | static inline void 185 | emit_alu32_imm32(struct jit_state *state, int op, int src, int dst, int32_t imm) 186 | { 187 | emit_alu32(state, op, src, dst); 188 | emit4(state, imm); 189 | } 190 | 191 | /* REX prefix, ModRM byte, and 8-bit immediate */ 192 | static inline void 193 | emit_alu32_imm8(struct jit_state *state, int op, int src, int dst, int8_t imm) 194 | { 195 | emit_alu32(state, op, src, dst); 196 | emit1(state, imm); 197 | } 198 | 199 | /* REX.W prefix and ModRM byte */ 200 | /* We use the MR encoding when there is a choice */ 201 | /* 'src' is often used as an opcode extension */ 202 | static inline void 203 | emit_alu64(struct jit_state *state, int op, int src, int dst) 204 | { 205 | emit_basic_rex(state, 1, src, dst); 206 | emit1(state, op); 207 | emit_modrm_reg2reg(state, src, dst); 208 | } 209 | 210 | /* REX.W prefix, ModRM byte, and 32-bit immediate */ 211 | static inline void 212 | emit_alu64_imm32(struct jit_state *state, int op, int src, int dst, int32_t imm) 213 | { 214 | emit_alu64(state, op, src, dst); 215 | emit4(state, imm); 216 | } 217 | 218 | /* REX.W prefix, ModRM byte, and 8-bit immediate */ 219 | static inline void 220 | emit_alu64_imm8(struct jit_state *state, int op, int src, int dst, int8_t imm) 221 | { 222 | emit_alu64(state, op, src, dst); 223 | emit1(state, imm); 224 | } 225 | 226 | /* Register to register mov */ 227 | static inline void 228 | emit_mov(struct jit_state *state, int src, int dst) 229 | { 230 | emit_alu64(state, 0x89, src, dst); 231 | } 232 | 233 | static inline void 234 | emit_cmp_imm32(struct jit_state *state, int dst, int32_t imm) 235 | { 236 | emit_alu64_imm32(state, 0x81, 7, dst, imm); 237 | } 238 | 239 | static inline void 240 | emit_cmp(struct jit_state *state, int src, int dst) 241 | { 242 | emit_alu64(state, 0x39, src, dst); 243 | } 244 | 245 | static inline void 246 | emit_jcc(struct jit_state *state, int code, int32_t target_pc) 247 | { 248 | emit1(state, 0x0f); 249 | emit1(state, code); 250 | emit_jump_offset(state, target_pc); 251 | } 252 | 253 | /* Load [src + offset] into dst */ 254 | static inline void 255 | emit_load(struct jit_state *state, enum operand_size size, int src, int dst, int32_t offset) 256 | { 257 | emit_basic_rex(state, size == S64, dst, src); 258 | 259 | if (size == S8 || size == S16) { 260 | /* movzx */ 261 | emit1(state, 0x0f); 262 | emit1(state, size == S8 ? 0xb6 : 0xb7); 263 | } else if (size == S32 || size == S64) { 264 | /* mov */ 265 | emit1(state, 0x8b); 266 | } 267 | 268 | emit_modrm_and_displacement(state, dst, src, offset); 269 | } 270 | 271 | /* Load sign-extended immediate into register */ 272 | static inline void 273 | emit_load_imm(struct jit_state *state, int dst, int64_t imm) 274 | { 275 | if (imm >= INT32_MIN && imm <= INT32_MAX) { 276 | emit_alu64_imm32(state, 0xc7, 0, dst, imm); 277 | } else { 278 | /* movabs $imm,dst */ 279 | emit_basic_rex(state, 1, 0, dst); 280 | emit1(state, 0xb8 | (dst & 7)); 281 | emit8(state, imm); 282 | } 283 | } 284 | 285 | /* Store register src to [dst + offset] */ 286 | static inline void 287 | emit_store(struct jit_state *state, enum operand_size size, int src, int dst, int32_t offset) 288 | { 289 | if (size == S16) { 290 | emit1(state, 0x66); /* 16-bit override */ 291 | } 292 | int rexw = size == S64; 293 | if (rexw || src & 8 || dst & 8 || size == S8) { 294 | emit_rex(state, rexw, !!(src & 8), 0, !!(dst & 8)); 295 | } 296 | emit1(state, size == S8 ? 0x88 : 0x89); 297 | emit_modrm_and_displacement(state, src, dst, offset); 298 | } 299 | 300 | /* Store immediate to [dst + offset] */ 301 | static inline void 302 | emit_store_imm32(struct jit_state *state, enum operand_size size, int dst, int32_t offset, int32_t imm) 303 | { 304 | if (size == S16) { 305 | emit1(state, 0x66); /* 16-bit override */ 306 | } 307 | emit_basic_rex(state, size == S64, 0, dst); 308 | emit1(state, size == S8 ? 0xc6 : 0xc7); 309 | emit_modrm_and_displacement(state, 0, dst, offset); 310 | if (size == S32 || size == S64) { 311 | emit4(state, imm); 312 | } else if (size == S16) { 313 | emit2(state, imm); 314 | } else if (size == S8) { 315 | emit1(state, imm); 316 | } 317 | } 318 | 319 | static inline void 320 | emit_call(struct jit_state *state, void *target) 321 | { 322 | /* TODO use direct call when possible */ 323 | emit_load_imm(state, RAX, (uintptr_t)target); 324 | /* callq *%rax */ 325 | emit1(state, 0xff); 326 | emit1(state, 0xd0); 327 | } 328 | 329 | static inline void 330 | emit_jmp(struct jit_state *state, uint32_t target_pc) 331 | { 332 | emit1(state, 0xe9); 333 | emit_jump_offset(state, target_pc); 334 | } 335 | 336 | #endif 337 | -------------------------------------------------------------------------------- /ubpf/ubpf_loader.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Big Switch Networks, Inc 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #define _GNU_SOURCE 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "ubpf_int.h" 25 | #include 26 | 27 | #include 28 | #include "bpfmap.h" 29 | 30 | #define MAX_SECTIONS 32 31 | 32 | #ifndef EM_BPF 33 | #define EM_BPF 247 34 | #endif 35 | 36 | #ifndef BPF_PSEUDO_MAP_FD 37 | #define BPF_PSEUDO_MAP_FD 1 38 | #endif 39 | 40 | struct bounds 41 | { 42 | const void *base; 43 | uint64_t size; 44 | }; 45 | 46 | struct section 47 | { 48 | const Elf64_Shdr *shdr; 49 | const void *data; 50 | uint64_t size; 51 | }; 52 | 53 | // Should we replace this with the definition in linux/bpf.h? 54 | struct bpf_map_def 55 | { 56 | unsigned int type; 57 | unsigned int key_size; 58 | unsigned int value_size; 59 | unsigned int max_entries; 60 | unsigned int map_flags; 61 | }; 62 | 63 | static const void * 64 | bounds_check(struct bounds *bounds, uint64_t offset, uint64_t size) 65 | { 66 | if (offset + size > bounds->size || offset + size < offset) 67 | { 68 | return NULL; 69 | } 70 | return bounds->base + offset; 71 | } 72 | 73 | int ubpf_load_elf(struct ubpf_vm *vm, const void *elf, size_t elf_size, char **errmsg) 74 | { 75 | struct bounds b = {.base = elf, .size = elf_size}; 76 | void *text_copy = NULL; 77 | int i; 78 | 79 | const Elf64_Ehdr *ehdr = bounds_check(&b, 0, sizeof(*ehdr)); 80 | if (!ehdr) 81 | { 82 | *errmsg = ubpf_error("not enough data for ELF header"); 83 | goto error; 84 | } 85 | 86 | if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG)) 87 | { 88 | *errmsg = ubpf_error("wrong magic"); 89 | goto error; 90 | } 91 | 92 | if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) 93 | { 94 | *errmsg = ubpf_error("wrong class"); 95 | goto error; 96 | } 97 | 98 | // TODO CHECK: This check assumes the host platform and the eBPF endianess architecture match 99 | // if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) { 100 | // *errmsg = ubpf_error("wrong byte order: got %d expected %d", ehdr->e_ident[EI_DATA], ELFDATA2LSB); 101 | // goto error; 102 | // } 103 | 104 | if (ehdr->e_ident[EI_VERSION] != 1) 105 | { 106 | *errmsg = ubpf_error("wrong version"); 107 | goto error; 108 | } 109 | 110 | if (ehdr->e_ident[EI_OSABI] != ELFOSABI_NONE) 111 | { 112 | *errmsg = ubpf_error("wrong OS ABI"); 113 | goto error; 114 | } 115 | 116 | if (ehdr->e_type != ET_REL) 117 | { 118 | *errmsg = ubpf_error("wrong type, expected relocatable"); 119 | goto error; 120 | } 121 | 122 | if (ehdr->e_machine != EM_NONE && ehdr->e_machine != EM_BPF) 123 | { 124 | *errmsg = ubpf_error("wrong machine, expected none or BPF, got %d", 125 | ehdr->e_machine); 126 | goto error; 127 | } 128 | 129 | if (ehdr->e_shnum > MAX_SECTIONS) 130 | { 131 | *errmsg = ubpf_error("too many sections"); 132 | goto error; 133 | } 134 | 135 | // ref to string table, TODO: probably a better way to reference to the strings_table 136 | const char *strings_table = NULL; 137 | 138 | /* Parse section headers into an array */ 139 | struct section sections[MAX_SECTIONS]; 140 | for (i = 0; i < ehdr->e_shnum; i++) 141 | { 142 | const Elf64_Shdr *shdr = bounds_check(&b, ehdr->e_shoff + i * ehdr->e_shentsize, sizeof(*shdr)); 143 | if (!shdr) 144 | { 145 | *errmsg = ubpf_error("bad section header offset or size"); 146 | goto error; 147 | } 148 | 149 | const void *data = bounds_check(&b, shdr->sh_offset, shdr->sh_size); 150 | if (!data) 151 | { 152 | *errmsg = ubpf_error("bad section offset or size"); 153 | goto error; 154 | } 155 | 156 | sections[i].shdr = shdr; 157 | sections[i].data = data; 158 | sections[i].size = shdr->sh_size; 159 | 160 | // Store the reference to the strings table 161 | if (shdr->sh_type == SHT_STRTAB) 162 | { 163 | strings_table = data; 164 | } 165 | } 166 | 167 | // Find the reference to the symtab and maps sections, NOTE: quite hacky way of doing things ... 168 | int symtab_idx = 0; 169 | int maps_idx = 0; 170 | 171 | for (i = 0; i < ehdr->e_shnum; i++) 172 | { 173 | struct section *sec = §ions[i]; 174 | 175 | if (sec->shdr->sh_type == SHT_SYMTAB) 176 | { 177 | symtab_idx = i; 178 | } 179 | 180 | else if (strcmp("maps", strings_table + sec->shdr->sh_name) == 0) 181 | { 182 | maps_idx = i; 183 | } 184 | } 185 | 186 | if (symtab_idx != 0 && maps_idx != 0) 187 | { 188 | // Iterate over symbol definition to find the maps 189 | struct section *symtab = §ions[symtab_idx]; 190 | const Elf64_Sym *syms = symtab->data; 191 | uint32_t num_syms = symtab->size / sizeof(Elf64_Sym); 192 | for (i = 0; i < num_syms; i++) 193 | { 194 | // Get the related section using st_shndx entry 195 | const Elf64_Sym *sym = &syms[i]; 196 | struct section *rel = §ions[sym->st_shndx]; 197 | 198 | // If the related section is the maps definition, then we have a table definition symbol 199 | if (sym->st_shndx == maps_idx) 200 | { 201 | int bpf_map_def_idx = sym->st_value / sizeof(struct bpf_map_def); 202 | const struct bpf_map_def *maps_defs = rel->data; 203 | const struct bpf_map_def map_def = maps_defs[bpf_map_def_idx]; 204 | 205 | // TODO do we have to copy the name as it will be copied again ... 206 | char map_name[TABLE_NAME_MAX_LENGTH] = {0}; 207 | strncpy(map_name, strings_table + sym->st_name, TABLE_NAME_MAX_LENGTH - 1); 208 | 209 | // 210 | int ret; 211 | struct table_entry *tab_entry; 212 | ret = bpf_lookup_elem(vm->tables, map_name, &tab_entry); 213 | 214 | // If the map doesn't exist create it 215 | if (ret == -1) 216 | { 217 | tab_entry = calloc(1, sizeof(struct table_entry)); 218 | tab_entry->fd = bpf_create_map(map_def.type, map_def.key_size, map_def.value_size, map_def.max_entries, map_def.map_flags); 219 | 220 | if (tab_entry->fd == -1) 221 | { 222 | *errmsg = ubpf_error("unable to allocate BPF table"); 223 | goto error; 224 | } 225 | 226 | tab_entry->type = map_def.type; 227 | tab_entry->key_size = map_def.key_size; 228 | tab_entry->value_size = map_def.value_size; 229 | tab_entry->max_entries = map_def.max_entries; 230 | 231 | ret = bpf_update_elem(vm->tables, map_name, tab_entry, 0); 232 | free(tab_entry); 233 | } 234 | } 235 | } 236 | } 237 | 238 | /* Find first text section */ 239 | int text_shndx = 0; 240 | for (i = 0; i < ehdr->e_shnum; i++) 241 | { 242 | const Elf64_Shdr *shdr = sections[i].shdr; 243 | if (shdr->sh_type == SHT_PROGBITS && 244 | shdr->sh_flags == (SHF_ALLOC | SHF_EXECINSTR)) 245 | { 246 | text_shndx = i; 247 | break; 248 | } 249 | } 250 | 251 | if (!text_shndx) 252 | { 253 | *errmsg = ubpf_error("text section not found"); 254 | goto error; 255 | } 256 | 257 | struct section *text = §ions[text_shndx]; 258 | 259 | /* May need to modify text for relocations, so make a copy */ 260 | text_copy = malloc(text->size); 261 | if (!text_copy) 262 | { 263 | *errmsg = ubpf_error("failed to allocate memory"); 264 | goto error; 265 | } 266 | memcpy(text_copy, text->data, text->size); 267 | 268 | /* Process each relocation section */ 269 | for (i = 0; i < ehdr->e_shnum; i++) 270 | { 271 | struct section *rel = §ions[i]; 272 | 273 | if (rel->shdr->sh_type != SHT_REL) 274 | { 275 | continue; 276 | } 277 | else if (rel->shdr->sh_info != text_shndx) 278 | { 279 | continue; 280 | } 281 | 282 | const Elf64_Rel *rs = rel->data; 283 | 284 | if (rel->shdr->sh_link >= ehdr->e_shnum) 285 | { 286 | *errmsg = ubpf_error("bad symbol table section index"); 287 | goto error; 288 | } 289 | 290 | struct section *symtab = §ions[rel->shdr->sh_link]; 291 | const Elf64_Sym *syms = symtab->data; 292 | uint32_t num_syms = symtab->size / sizeof(syms[0]); 293 | 294 | if (symtab->shdr->sh_link >= ehdr->e_shnum) 295 | { 296 | *errmsg = ubpf_error("bad string table section index"); 297 | goto error; 298 | } 299 | 300 | struct section *strtab = §ions[symtab->shdr->sh_link]; 301 | const char *strings = strtab->data; 302 | 303 | int j; 304 | for (j = 0; j < rel->size / sizeof(Elf64_Rel); j++) 305 | { 306 | const Elf64_Rel *r = &rs[j]; 307 | 308 | // if (ELF64_R_TYPE(r->r_info) != 2) { 309 | // *errmsg = ubpf_error("bad relocation type %u", ELF64_R_TYPE(r->r_info)); 310 | // goto error; 311 | // } 312 | 313 | uint32_t sym_idx = ELF64_R_SYM(r->r_info); 314 | if (sym_idx >= num_syms) 315 | { 316 | *errmsg = ubpf_error("bad symbol index"); 317 | goto error; 318 | } 319 | 320 | const Elf64_Sym *sym = &syms[sym_idx]; 321 | 322 | // printf("st_name %d st_info %d st_other %d st_shndx %d st_value %lu st_size %lu\n", sym->st_name, sym->st_info, sym->st_other, sym->st_shndx, sym->st_value, sym->st_size); 323 | 324 | if (sym->st_name >= strtab->size) 325 | { 326 | *errmsg = ubpf_error("bad symbol name"); 327 | goto error; 328 | } 329 | 330 | const char *sym_name = strings + sym->st_name; 331 | 332 | if (r->r_offset + 8 > text->size) 333 | { 334 | *errmsg = ubpf_error("bad relocation offset"); 335 | goto error; 336 | } 337 | 338 | // Custom map relocation 339 | if (ELF64_R_TYPE(r->r_info) == 1 && sym->st_shndx == maps_idx) 340 | { 341 | struct ebpf_inst *insns = text_copy; 342 | unsigned int insn_idx; 343 | 344 | insn_idx = r->r_offset / sizeof(struct ebpf_inst); 345 | 346 | if (insns[insn_idx].opcode != (EBPF_CLS_LD | EBPF_SRC_IMM | EBPF_SIZE_DW)) 347 | { 348 | *errmsg = ubpf_error("bad relocation for instruction 0x%x at index %d\n", insns[insn_idx].opcode, insn_idx); 349 | goto error; 350 | } 351 | 352 | char map_name[32] = {0}; 353 | struct table_entry *tab_entry; 354 | strncpy(map_name, sym_name, 31); 355 | 356 | if (bpf_lookup_elem(vm->tables, map_name, &tab_entry) != 0) 357 | { 358 | *errmsg = ubpf_error("cannot find map"); 359 | goto error; 360 | } 361 | 362 | insns[insn_idx].src = BPF_PSEUDO_MAP_FD; // do we need this? 363 | insns[insn_idx].imm = tab_entry->fd; 364 | } 365 | 366 | // Perform string relocation 367 | else if (ELF64_R_TYPE(r->r_info) == 1) 368 | { 369 | struct section *rodata = §ions[sym->st_shndx]; 370 | 371 | struct ebpf_inst *insns = text_copy; 372 | unsigned int insn_idx; 373 | 374 | insn_idx = r->r_offset / sizeof(struct ebpf_inst); 375 | 376 | if (insns[insn_idx].opcode != (EBPF_CLS_LD | EBPF_SRC_IMM | EBPF_SIZE_DW)) // LDDW instruction 377 | { 378 | *errmsg = ubpf_error("bad relocation for instruction 0x%x at index %d\n", insns[insn_idx].opcode, insn_idx); 379 | goto error; 380 | } 381 | 382 | uint64_t address = (uintptr_t)rodata->data + sym->st_value; 383 | insns[insn_idx].imm = address; 384 | insns[insn_idx + 1].imm = address >> 32; 385 | } 386 | 387 | else if (ELF64_R_TYPE(r->r_info) == 2) 388 | { 389 | unsigned int imm = ubpf_lookup_registered_function(vm, sym_name); 390 | if (imm == -1) 391 | { 392 | *errmsg = ubpf_error("function '%s' not found", sym_name); 393 | goto error; 394 | } 395 | 396 | *(uint32_t *)(text_copy + r->r_offset + 4) = imm; 397 | } 398 | 399 | else 400 | { 401 | *errmsg = ubpf_error("bad relocation type %u", ELF64_R_TYPE(r->r_info)); 402 | goto error; 403 | } 404 | } 405 | } 406 | 407 | int rv = ubpf_load(vm, text_copy, sections[text_shndx].size, errmsg); 408 | free(text_copy); 409 | return rv; 410 | 411 | error: 412 | free(text_copy); 413 | return -1; 414 | } 415 | --------------------------------------------------------------------------------