├── .github └── ISSUE_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── Documentation ├── development.md └── getting-started.md ├── LICENSE.md ├── Makefile ├── README.md ├── apps ├── curl-poll │ ├── Makefile │ └── curl-poll.c └── curl │ ├── Makefile │ └── curl.c ├── build └── BUILD ├── include ├── arp.h ├── basic.h ├── cli.h ├── dst.h ├── ethernet.h ├── icmpv4.h ├── inet.h ├── ip.h ├── ipc.h ├── list.h ├── netdev.h ├── route.h ├── skbuff.h ├── sock.h ├── socket.h ├── syshead.h ├── tcp.h ├── tcp_data.h ├── timer.h ├── tuntap_if.h ├── utils.h └── wait.h ├── src ├── arp.c ├── cli.c ├── dst.c ├── icmpv4.c ├── inet.c ├── ip_input.c ├── ip_output.c ├── ipc.c ├── main.c ├── netdev.c ├── route.c ├── skbuff.c ├── sock.c ├── socket.c ├── tcp.c ├── tcp_data.c ├── tcp_input.c ├── tcp_output.c ├── timer.c ├── tuntap_if.c └── utils.c ├── tests ├── README.md ├── suites │ ├── arp │ │ └── suite-arp │ ├── icmp │ │ └── suite-icmp │ └── tcp │ │ ├── curl-fixture.txt │ │ ├── env-delayed │ │ ├── env-duplication │ │ ├── env-lossy │ │ ├── env-normal │ │ ├── suite-curl │ │ ├── suite-packet-delay │ │ ├── suite-packet-duplication │ │ ├── suite-packet-loss │ │ └── tests ├── test-run-all └── utils │ └── common └── tools ├── Makefile ├── level-ip ├── liblevelip.c └── liblevelip.h /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Describe your issue 2 | 3 | - Is this a bug report or a feature request? 4 | - Describe the issue 5 | - What is the expected behaviour? 6 | - How to reproduce the problem? 7 | 8 | # Describe your running environment 9 | 10 | This is important for troubleshooting. Please attach at least the following info: 11 | 12 | - **OS** (e.g. `cat /etc/os-release`): 13 | - **Kernel** (e.g. `uname -a`): 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build/*.o 3 | a.out 4 | *.o 5 | *cscope* 6 | lvl-ip 7 | tests/venv 8 | vgcore* 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: c 4 | 5 | compiler: 6 | - clang 7 | 8 | before_install: 9 | - sudo apt-get -qq update 10 | - sudo apt-get install -qqy iputils-arping libcap-dev libcap2-bin 11 | 12 | script: 13 | - sudo mknod /dev/net/tap c 10 200 14 | - sudo chmod 0666 /dev/net/tap 15 | - sudo setcap cap_net_admin=ep /bin/ip 16 | - make test 17 | -------------------------------------------------------------------------------- /Documentation/development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | Level-IP is at a very alpha stage, has many hardcoded values and is not really intuitive to develop on. 4 | 5 | This document aims to provide information on the current features, roadmap and overall development routine. 6 | 7 | # Debugging 8 | 9 | Build Level-IP with `make debug`. It adds debug symbols and by default, enables Google's Address Sanitizer. 10 | 11 | ## Debug Output 12 | 13 | When built with `make debug`, `lvl-ip` becomes chatty and outputs debug statements. You can enable/disable different component debug output with macros defined in headers. 14 | 15 | For example, enabling socket-specific output: 16 | 17 | make clean 18 | CFLAGS+=-DDEBUG_SOCKET make debug 19 | 20 | ## Debugging Networking 21 | 22 | Use `tcpdump` with the IP address you're using, e.g.: 23 | 24 | $ tcpdump -i any host 10.0.0.4 -n 25 | IP 10.0.0.4.12000 > 10.0.0.5.8000: Flags [S], seq 1525252, win 512, length 0 26 | IP 10.0.0.5.8000 > 10.0.0.4.12000: Flags [S.], seq 1332068674, ack 1525253, win 29200, options [mss 1460], length 0 27 | IP 10.0.0.4.12000 > 10.0.0.5.8000: Flags [.], ack 1, win 512, length 0 28 | 29 | Together with the verbose `lvl-ip` output, you can troubleshoot behaviour and spot patterns. 30 | 31 | ## Tracing Program Code 32 | 33 | Simply run `gdb` with your favorite interface. 34 | 35 | Refer to https://sourceware.org/gdb/current/onlinedocs/gdb/Threads.html for debugging with threads. 36 | 37 | ## Debugging Memory Allocation and Use 38 | 39 | One of the useful debugging aids so far has been Address Sanitizer by Google. It is built in to newer GCC versions, and it is activated with `-fsanitize=address`. Sure enough, `make debug` enables this by default. 40 | 41 | https://github.com/google/sanitizers/wiki/AddressSanitizer 42 | 43 | ## Debugging Concurrency 44 | 45 | Level-IP uses multiple threads with shared data structures, therefore programming errors like race conditions are easy to introduce. 46 | 47 | Thread Sanitizer by Google is also built in to newer GCCs, which helps pinpointing concurrent access to variables without proper guards. 48 | 49 | https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual 50 | 51 | # Coding Style 52 | 53 | The foremost aim of Level-IP is to be an educational project on networking. Hence, source code readability should be focused on when developing Level-IP. 54 | 55 | TODO: Actual style guidelines, so far I have been just winging it. 56 | 57 | # Features 58 | 59 | First and foremost, Level-IP aims to be just an introduction to TCP/IP stacks. Hence, convenient features are prioritized over e.g. raw performance improvements. 60 | 61 | ## Current Features 62 | 63 | * One hardcoded interface/netdev (IP 10.0.0.4) 64 | * One hardcoded socket 65 | * Ethernet II frame handling 66 | * ARP request/reply, simple caching 67 | * ICMP pings and replies 68 | * IPv4 packet handling, checksum 69 | * One hardcoded route table with default netdevice 70 | * TCPv4 Handshake 71 | * TCP data transmission 72 | * TCP RFC793 "Segment Arrives" 73 | * TCP RFC6298 Retransmission calculation 74 | * TCP RFC793 User Timeout 75 | 76 | ## Upcoming features 77 | 78 | * IP Fragmentation 79 | * IP/ICMP Diagnostics 80 | * TCP Window Management 81 | * TCP Silly Window Syndrome Avoidance 82 | * TCP Zero-Window Probes 83 | * TCP Congestion Control 84 | * TCP Selective Acknowledgments (SACK) 85 | * Server socket API calls (bind, accept...) 86 | * Raw Socket (for arping, ping..) 87 | * 'select' socket API call 88 | * ... 89 | -------------------------------------------------------------------------------- /Documentation/getting-started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | Level-IP is a TCP/IP stack that is run as a single daemon process on your Linux host. Networking is achieved by configuring your Linux host to forward packets to/from Level-IP. 4 | 5 | To interface applications against Level-IP, a wrapper library for standard libc calls is provided. This wrapper can then be used with existing binaries such as `curl`, `surf` and `firefox` to redirect communications to Level-IP. 6 | 7 | DISCLAIMER: Level-IP is not a production-ready networking stack, and does not intend to be one. The nature of lower-level networking imposes a great responsiblity to the software and any security vulnerabilities can be disastrous. Hence, do not run Level-IP for extended periods of time, purely because it has bugs (and as all software, will continue to have them). 8 | 9 | # Building 10 | 11 | Standard `make` stuff. 12 | 13 | $ make all 14 | 15 | This builds `lvl-ip` itself, but also the libc wrapper and provided example applications. 16 | 17 | When building, `sudo setcap ...` probably asks super user permissions from you. This is because `lvl-ip` needs the `CAP_NET_ADMIN` capability to setup itself. After the setup, it drops that capability. 18 | 19 | Currently, `lvl-ip` also configures the tap interface through the `ip` tool. Hence, give it permissions too: 20 | 21 | $ which ip 22 | /usr/bin/ip 23 | $ sudo setcap cap_net_admin=ep /usr/bin/ip 24 | 25 | # Setup 26 | 27 | Level-IP uses a Linux TAP device to communicate to the outside world. In short, the tap device is initialized in the host Linux' networking stack, and `lvl-ip` can then read the L2 frames: 28 | 29 | $ sudo mknod /dev/net/tap c 10 200 30 | $ sudo chmod 0666 /dev/net/tap 31 | 32 | In essence, `lvl-ip` operates as a host inside the tap device's subnet. Therefore, in order to communicate with other hosts, the tap device needs to be set in a forwarding mode: 33 | 34 | An example from my (Arch) Linux machine, where `wlp2s0` is my outgoing interface, and `tap0` is the tap device for `lvl-ip`: 35 | 36 | $ sysctl -w net.ipv4.ip_forward=1 37 | $ iptables -I INPUT --source 10.0.0.0/24 -j ACCEPT 38 | $ iptables -t nat -I POSTROUTING --out-interface wlp2s0 -j MASQUERADE 39 | $ iptables -I FORWARD --in-interface wlp2s0 --out-interface tap0 -j ACCEPT 40 | $ iptables -I FORWARD --in-interface tap0 --out-interface wlp2s0 -j ACCEPT 41 | 42 | Now, packets coming from `lvl-ip` (10.0.0.4/24 in this case) should be NATed by the host Linux interfaces and traverse the FORWARD chain correctly to the host's outgoing gateway. 43 | 44 | See http://www.netfilter.org/documentation/HOWTO/packet-filtering-HOWTO-9.html for more info. 45 | 46 | # Usage 47 | 48 | When you've built lvl-ip and setup your host stack to forward packets, you can try communicating to the Internet: 49 | 50 | $ ./lvl-ip 51 | 52 | The userspace TCP/IP stack should start. Now, first test communications with the provided applications: 53 | 54 | $ cd tools 55 | $ ./level-ip ../apps/curl/curl google.com 80 56 | 57 | `./level-ip` is just a bash-script that allows `liblevelip.so` to take precedence over the libc socket API calls. 58 | 59 | The important point is that `./level-ip` aims to be usable against any existing dynamically-linked application. Let's try the _real_ `curl`: 60 | 61 | [saminiir@localhost tools]$ curl --version 62 | curl 7.50.0 (x86_64-pc-linux-gnu) libcurl/7.50.0 OpenSSL/1.0.2h zlib/1.2.8 libidn/1.33 libssh2/1.7.0 63 | Protocols: dict file ftp ftps gopher http https imap imaps pop3 pop3s rtsp scp sftp smb smbs smtp smtps telnet tftp 64 | Features: AsynchDNS IDN IPv6 Largefile GSS-API Kerberos SPNEGO NTLM NTLM_WB SSL libz TLS-SRP UnixSockets 65 | [saminiir@localhost tools]$ curl google.com 66 | 67 | 302 Moved 68 |

302 Moved

69 | The document has moved 70 | here. 71 | 72 | 73 | And instead of using the Linux' TCP/IP stack, let's try it with `lvl-ip`: 74 | 75 | [saminiir@localhost tools]$ ./level-ip curl google.com 76 | 77 | 302 Moved 78 |

302 Moved

79 | The document has moved 80 | here. 81 | 82 | 83 | The result is exactly the same. Under the hood, however, `curl` calls the libc socket API but these calls are redirected to `lvl-ip` instead. 84 | 85 | Try browsing the Web, with Level-IP doing the packet transfer: 86 | 87 | [saminiir@localhost tools]$ firefox --version 88 | Mozilla Firefox 47.0.1 89 | [saminiir@localhost tools]$ ./level-ip firefox google.com 90 | 91 | That's it! 92 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Sami Niiranen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CPPFLAGS = -I include -Wall -Werror -pthread 2 | 3 | src = $(wildcard src/*.c) 4 | obj = $(patsubst src/%.c, build/%.o, $(src)) 5 | headers = $(wildcard include/*.h) 6 | apps = apps/curl/curl 7 | 8 | lvl-ip: $(obj) 9 | $(CC) $(CFLAGS) $(CPPFLAGS) $(obj) -o lvl-ip 10 | @echo 11 | @echo "lvl-ip needs CAP_NET_ADMIN:" 12 | sudo setcap cap_setpcap,cap_net_admin=ep lvl-ip 13 | 14 | build/%.o: src/%.c ${headers} 15 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 16 | 17 | debug: CFLAGS+= -DDEBUG_SOCKET -DDEBUG_TCP -g -fsanitize=thread 18 | debug: lvl-ip 19 | 20 | apps: $(apps) 21 | $(MAKE) -C tools 22 | $(MAKE) -C apps/curl 23 | $(MAKE) -C apps/curl-poll 24 | 25 | all: lvl-ip apps 26 | 27 | test: debug apps 28 | @echo 29 | @echo "Networking capabilites are required for test dependencies:" 30 | which arping | sudo xargs setcap cap_net_raw=ep 31 | which tc | sudo xargs setcap cap_net_admin=ep 32 | @echo 33 | cd tests && ./test-run-all 34 | 35 | clean: 36 | rm build/*.o lvl-ip 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Level-IP [![Build Status](https://travis-ci.org/saminiir/level-ip.svg?branch=master)](https://travis-ci.org/saminiir/level-ip) 2 | 3 | Level-IP is a Linux userspace TCP/IP stack, implemented with TUN/TAP devices. 4 | 5 | The main goals are to: 6 | * Learn TCP/IP 7 | * Learn Linux systems/network programming 8 | * Learn Linux Socket API 9 | 10 | The results of the learning experience will be accompanied by explanatory blog posts: 11 | 12 | - Part 1, Ethernet & ARP: http://www.saminiir.com/lets-code-tcp-ip-stack-1-ethernet-arp 13 | - Part 2, IPv4 & ICMPv4: http://www.saminiir.com/lets-code-tcp-ip-stack-2-ipv4-icmpv4 14 | - Part 3, TCP Basics & Handshake: http://www.saminiir.com/lets-code-tcp-ip-stack-3-tcp-handshake/ 15 | - Part 4, TCP Data Flow & Socket API: http://www.saminiir.com/lets-code-tcp-ip-stack-4-tcp-data-flow-socket-api/ 16 | - Part 5, TCP Retransmission: http://www.saminiir.com/lets-code-tcp-ip-stack-5-tcp-retransmission/ 17 | 18 | See [Getting Started](Documentation/getting-started.md). 19 | 20 | For development documentation, start with [Development](Documentation/development.md). 21 | 22 | # Reference works 23 | 24 | * Linux kernel TCP/IP stack, [source code](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/net/ipv4) 25 | * picoTCP, [source code](https://github.com/tass-belgium/picotcp) 26 | * Xiaochen Wang's TCP/IP stack, [source code](https://github.com/chobits/tapip) 27 | 28 | # License 29 | 30 | See [LICENSE.md](LICENSE.md) (MIT) 31 | -------------------------------------------------------------------------------- /apps/curl-poll/Makefile: -------------------------------------------------------------------------------- 1 | curl: curl-poll.c 2 | $(CC) curl-poll.c -o curl-poll 3 | -------------------------------------------------------------------------------- /apps/curl-poll/curl-poll.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define MAX_HOSTNAME 50 13 | #define RLEN 4096 14 | 15 | int get_address(char *host, char *port, struct sockaddr *addr) 16 | { 17 | struct addrinfo hints; 18 | struct addrinfo *result, *rp; 19 | int s; 20 | 21 | memset(&hints, 0, sizeof(struct addrinfo)); 22 | hints.ai_family = AF_INET; 23 | hints.ai_socktype = SOCK_STREAM; 24 | 25 | s = getaddrinfo(host, port, &hints, &result); 26 | 27 | if (s != 0) { 28 | fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(s)); 29 | exit(EXIT_FAILURE); 30 | } 31 | 32 | for (rp = result; rp != NULL; rp = rp->ai_next) { 33 | *addr = *rp->ai_addr; 34 | freeaddrinfo(result); 35 | return 0; 36 | } 37 | 38 | return 1; 39 | } 40 | 41 | int main(int argc, char **argv) 42 | { 43 | if (argc != 3 || strnlen(argv[1], MAX_HOSTNAME) == MAX_HOSTNAME) { 44 | fprintf(stderr, "Curl called but HOST or PORT not given or invalid\n"); 45 | return 1; 46 | } 47 | 48 | struct sockaddr addr; 49 | int sock; 50 | 51 | if (strnlen(argv[2], 6) == 6) { 52 | fprintf(stderr, "Curl called but PORT malformed\n"); 53 | return 1; 54 | } 55 | 56 | if (get_address(argv[1], argv[2], &addr) != 0) { 57 | fprintf(stderr, "Curl could not resolve hostname\n"); 58 | return 1; 59 | } 60 | 61 | sock = socket(AF_INET, SOCK_STREAM, 0); 62 | 63 | if (fcntl(sock, F_SETFL, O_NONBLOCK) == -1) { 64 | perror("Setting socket nonblocking"); 65 | return 1; 66 | } 67 | 68 | if (connect(sock, &addr, 16) == -1) { 69 | if (errno != EINPROGRESS) { 70 | perror("Curl could not establish connection"); 71 | return 1; 72 | } 73 | } 74 | 75 | struct pollfd fds[1]; 76 | fds[0].fd = sock; 77 | fds[0].events = POLLOUT; 78 | 79 | int ret = poll(fds, 1, -1); 80 | 81 | if (ret < 1) { 82 | perror("Poll failed"); 83 | return 1; 84 | } 85 | 86 | assert(fds[0].revents & POLLOUT); 87 | 88 | char str[512]; 89 | 90 | snprintf(str, 512, "GET / HTTP/1.1\r\nHost: %s:%s\r\nConnection: close\r\n\r\n", argv[1], argv[2]); 91 | int len = strlen(str); 92 | 93 | if (write(sock, str, len) != len) { 94 | perror("Write error"); 95 | return 1; 96 | } 97 | 98 | int rlen = 0; 99 | while (1) { 100 | fds[0].events = POLLIN; 101 | 102 | ret = poll(fds, 1, -1); 103 | 104 | if (ret < 0) { 105 | perror("Poll failed"); 106 | return 1; 107 | } 108 | 109 | if (fds[0].revents & POLLIN) { 110 | char buf[RLEN] = { 0 }; 111 | 112 | if ((rlen = read(sock, buf, RLEN)) == -1) { 113 | perror("Read error"); 114 | return 1; 115 | } 116 | 117 | if (rlen == 0) { 118 | /* We're done */ 119 | break; 120 | } 121 | 122 | printf("%s", buf); 123 | } 124 | 125 | if (fds[0].revents & (POLLHUP | POLLERR)) { 126 | fprintf(stderr, "POLLHUP/ERR received %d\n", fds[0].revents); 127 | break; 128 | } 129 | } 130 | 131 | close(sock); 132 | } 133 | -------------------------------------------------------------------------------- /apps/curl/Makefile: -------------------------------------------------------------------------------- 1 | curl: curl.c 2 | $(CC) curl.c -o curl 3 | -------------------------------------------------------------------------------- /apps/curl/curl.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define MAX_HOSTNAME 50 9 | #define RLEN 4096 10 | 11 | int get_address(char *host, char *port, struct sockaddr *addr) 12 | { 13 | struct addrinfo hints; 14 | struct addrinfo *result, *rp; 15 | int s; 16 | 17 | memset(&hints, 0, sizeof(struct addrinfo)); 18 | hints.ai_family = AF_INET; 19 | hints.ai_socktype = SOCK_STREAM; 20 | 21 | s = getaddrinfo(host, port, &hints, &result); 22 | 23 | if (s != 0) { 24 | printf("getaddrinfo: %s\n", gai_strerror(s)); 25 | exit(EXIT_FAILURE); 26 | } 27 | 28 | for (rp = result; rp != NULL; rp = rp->ai_next) { 29 | *addr = *rp->ai_addr; 30 | freeaddrinfo(result); 31 | return 0; 32 | } 33 | 34 | return 1; 35 | } 36 | 37 | int main(int argc, char **argv) 38 | { 39 | if (argc != 3 || strnlen(argv[1], MAX_HOSTNAME) == MAX_HOSTNAME) { 40 | printf("Curl called but HOST or PORT not given or invalid\n"); 41 | return 1; 42 | } 43 | 44 | struct sockaddr addr; 45 | int sock; 46 | 47 | if (strnlen(argv[2], 6) == 6) { 48 | printf("Curl called but PORT malformed\n"); 49 | return 1; 50 | } 51 | 52 | if (get_address(argv[1], argv[2], &addr) != 0) { 53 | printf("Curl could not resolve hostname\n"); 54 | return 1; 55 | } 56 | 57 | sock = socket(AF_INET, SOCK_STREAM, 0); 58 | 59 | if (connect(sock, &addr, 16) == -1) { 60 | perror("Curl could not establish connection"); 61 | return 1; 62 | } 63 | 64 | char str[512]; 65 | 66 | snprintf(str, 512, "GET / HTTP/1.1\r\nHost: %s:%s\r\nConnection: close\r\n\r\n", argv[1], argv[2]); 67 | int len = strlen(str); 68 | 69 | if (write(sock, str, len) != len) { 70 | printf("Write error\n"); 71 | return 1; 72 | } 73 | 74 | char buf[RLEN] = { 0 }; 75 | int rlen = 0; 76 | 77 | while ((rlen = read(sock, buf, RLEN)) > 0) { 78 | printf("%s", buf); 79 | } 80 | 81 | if (rlen == -1) { 82 | perror("Curl read error"); 83 | return 1; 84 | } 85 | 86 | close(sock); 87 | } 88 | -------------------------------------------------------------------------------- /build/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saminiir/level-ip/c1950ea0e0f9feceb5602432f1751b8ce71c4952/build/BUILD -------------------------------------------------------------------------------- /include/arp.h: -------------------------------------------------------------------------------- 1 | #ifndef ARP_H 2 | #define ARP_H 3 | #include "syshead.h" 4 | #include "ethernet.h" 5 | #include "netdev.h" 6 | #include "skbuff.h" 7 | #include "list.h" 8 | #include "utils.h" 9 | 10 | #define ARP_ETHERNET 0x0001 11 | #define ARP_IPV4 0x0800 12 | #define ARP_REQUEST 0x0001 13 | #define ARP_REPLY 0x0002 14 | 15 | #define ARP_HDR_LEN sizeof(struct arp_hdr) 16 | #define ARP_DATA_LEN sizeof(struct arp_ipv4) 17 | 18 | #define ARP_CACHE_LEN 32 19 | #define ARP_FREE 0 20 | #define ARP_WAITING 1 21 | #define ARP_RESOLVED 2 22 | 23 | #ifdef DEBUG_ARP 24 | #define arp_dbg(str, hdr) \ 25 | do { \ 26 | print_debug("arp "str" (hwtype: %hu, protype: %.4hx, " \ 27 | "hwsize: %d, prosize: %d, opcode: %.4hx)", \ 28 | hdr->hwtype, hdr->protype, hdr->hwsize, \ 29 | hdr->prosize, hdr->opcode); \ 30 | } while (0) 31 | 32 | #define arpdata_dbg(str, data) \ 33 | do { \ 34 | print_debug("arp data "str" (smac: %.2hhx:%.2hhx:%.2hhx:%.2hhx" \ 35 | ":%.2hhx:%.2hhx, sip: %hhu.%hhu.%hhu.%hhu, dmac: %.2hhx:%.2hhx" \ 36 | ":%.2hhx:%.2hhx:%.2hhx:%.2hhx, dip: %hhu.%hhu.%hhu.%hhu)", \ 37 | data->smac[0], data->smac[1], data->smac[2], data->smac[3], \ 38 | data->smac[4], data->smac[5], data->sip >> 24, data->sip >> 16, \ 39 | data->sip >> 8, data->sip >> 0, data->dmac[0], data->dmac[1], \ 40 | data->dmac[2], data->dmac[3], data->dmac[4], data->dmac[5], \ 41 | data->dip >> 24, data->dip >> 16, data->dip >> 8, data->dip >> 0); \ 42 | } while (0) 43 | 44 | #define arpcache_dbg(str, entry) \ 45 | do { \ 46 | print_debug("arp cache "str" (hwtype: %hu, sip: %hhu.%hhu.%hhu.%hhu, " \ 47 | "smac: %.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx, state: %d)", entry->hwtype, \ 48 | entry->sip >> 24, entry->sip >> 16, entry->sip >> 8, entry->sip >> 0, \ 49 | entry->smac[0], entry->smac[1], entry->smac[2], entry->smac[3], entry->smac[4], \ 50 | entry->smac[5], entry->state); \ 51 | } while (0) 52 | #else 53 | #define arp_dbg(str, hdr) 54 | #define arpdata_dbg(str, data) 55 | #define arpcache_dbg(str, entry) 56 | #endif 57 | 58 | struct arp_hdr 59 | { 60 | uint16_t hwtype; 61 | uint16_t protype; 62 | uint8_t hwsize; 63 | uint8_t prosize; 64 | uint16_t opcode; 65 | unsigned char data[]; 66 | } __attribute__((packed)); 67 | 68 | struct arp_ipv4 69 | { 70 | unsigned char smac[6]; 71 | uint32_t sip; 72 | unsigned char dmac[6]; 73 | uint32_t dip; 74 | } __attribute__((packed)); 75 | 76 | struct arp_cache_entry 77 | { 78 | struct list_head list; 79 | uint16_t hwtype; 80 | uint32_t sip; 81 | unsigned char smac[6]; 82 | unsigned int state; 83 | }; 84 | 85 | void arp_init(); 86 | void free_arp(); 87 | void arp_rcv(struct sk_buff *skb); 88 | void arp_reply(struct sk_buff *skb, struct netdev *netdev); 89 | int arp_request(uint32_t sip, uint32_t dip, struct netdev *netdev); 90 | unsigned char* arp_get_hwaddr(uint32_t sip); 91 | 92 | static inline struct arp_hdr *arp_hdr(struct sk_buff *skb) 93 | { 94 | return (struct arp_hdr *)(skb->head + ETH_HDR_LEN); 95 | } 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /include/basic.h: -------------------------------------------------------------------------------- 1 | #ifndef BASIC_H 2 | #define BASIC_H 3 | 4 | #define CLEAR(x) memset(&(x), 0, sizeof(x)) 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /include/cli.h: -------------------------------------------------------------------------------- 1 | #ifndef CLI_H_ 2 | #define CLI_H_ 3 | 4 | void parse_cli(int argc, char** argv); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /include/dst.h: -------------------------------------------------------------------------------- 1 | #ifndef DST_H_ 2 | #define DST_H_ 3 | 4 | #include "skbuff.h" 5 | 6 | struct sk_buff; 7 | 8 | int dst_neigh_output(struct sk_buff *skb); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /include/ethernet.h: -------------------------------------------------------------------------------- 1 | #ifndef ETHERNET_H_ 2 | #define ETHERNET_H_ 3 | 4 | #include 5 | #include "netdev.h" 6 | #include "skbuff.h" 7 | #include "syshead.h" 8 | #include "utils.h" 9 | 10 | #define ETH_HDR_LEN sizeof(struct eth_hdr) 11 | 12 | #ifdef DEBUG_ETH 13 | #define eth_dbg(msg, hdr) \ 14 | do { \ 15 | print_debug("eth "msg" (" \ 16 | "dmac: %.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx, " \ 17 | "smac: %.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx, " \ 18 | "ethertype: %.4hx)", \ 19 | hdr->dmac[0], hdr->dmac[1], hdr->dmac[2], hdr->dmac[3], \ 20 | hdr->dmac[4], hdr->dmac[5], hdr->smac[0], hdr->smac[1], \ 21 | hdr->smac[2], hdr->smac[3], hdr->smac[4], hdr->smac[5], hdr->ethertype); \ 22 | } while (0) 23 | #else 24 | #define eth_dbg(msg, hdr) 25 | #endif 26 | 27 | struct sk_buff; 28 | struct netdev; 29 | 30 | uint8_t *skb_head(struct sk_buff *skb); 31 | 32 | struct eth_hdr 33 | { 34 | uint8_t dmac[6]; 35 | uint8_t smac[6]; 36 | uint16_t ethertype; 37 | uint8_t payload[]; 38 | } __attribute__((packed)); 39 | 40 | static inline struct eth_hdr *eth_hdr(struct sk_buff *skb) 41 | { 42 | struct eth_hdr *hdr = (struct eth_hdr *)skb_head(skb); 43 | 44 | hdr->ethertype = ntohs(hdr->ethertype); 45 | 46 | return hdr; 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /include/icmpv4.h: -------------------------------------------------------------------------------- 1 | #ifndef ICMPV4_H 2 | #define ICMPV4_H 3 | 4 | #include "syshead.h" 5 | #include "skbuff.h" 6 | 7 | #define ICMP_V4_REPLY 0x00 8 | #define ICMP_V4_DST_UNREACHABLE 0x03 9 | #define ICMP_V4_SRC_QUENCH 0x04 10 | #define ICMP_V4_REDIRECT 0x05 11 | #define ICMP_V4_ECHO 0x08 12 | #define ICMP_V4_ROUTER_ADV 0x09 13 | #define ICMP_V4_ROUTER_SOL 0x0a 14 | #define ICMP_V4_TIMEOUT 0x0b 15 | #define ICMP_V4_MALFORMED 0x0c 16 | 17 | struct icmp_v4 { 18 | uint8_t type; 19 | uint8_t code; 20 | uint16_t csum; 21 | uint8_t data[]; 22 | } __attribute__((packed)); 23 | 24 | struct icmp_v4_echo { 25 | uint16_t id; 26 | uint16_t seq; 27 | uint8_t data[]; 28 | } __attribute__((packed)); 29 | 30 | struct icmp_v4_dst_unreachable { 31 | uint8_t unused; 32 | uint8_t len; 33 | uint16_t var; 34 | uint8_t data[]; 35 | } __attribute__((packed)); 36 | 37 | 38 | void icmpv4_incoming(struct sk_buff *skb); 39 | void icmpv4_reply(struct sk_buff *skb); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /include/inet.h: -------------------------------------------------------------------------------- 1 | #ifndef _INET_H 2 | #define _INET_H 3 | 4 | #include "syshead.h" 5 | #include "socket.h" 6 | #include "skbuff.h" 7 | 8 | #ifdef DEBUG_SOCKET 9 | #define inet_dbg(sock, msg, ...) \ 10 | do { \ 11 | socket_dbg(sock, "INET "msg, ##__VA_ARGS__); \ 12 | } while (0) 13 | #else 14 | #define inet_dbg(msg, th, ...) 15 | #endif 16 | 17 | int inet_create(struct socket *sock, int protocol); 18 | int inet_socket(struct socket *sock, int protocol); 19 | int inet_connect(struct socket *sock, struct sockaddr *addr, int addr_len, int flags); 20 | int inet_write(struct socket *sock, const void *buf, int len); 21 | int inet_read(struct socket *sock, void *buf, int len); 22 | int inet_close(struct socket *sock); 23 | int inet_free(struct socket *sock); 24 | int inet_abort(struct socket *sock); 25 | int inet_getpeername(struct socket *sock, struct sockaddr *restrict address, 26 | socklen_t *restrict address_len); 27 | int inet_getsockname(struct socket *sock, struct sockaddr *restrict address, 28 | socklen_t *restrict address_len); 29 | 30 | struct sock *inet_lookup(struct sk_buff *skb, uint16_t sport, uint16_t dport); 31 | #endif 32 | -------------------------------------------------------------------------------- /include/ip.h: -------------------------------------------------------------------------------- 1 | #ifndef IPV4_H 2 | #define IPV4_H 3 | #include "syshead.h" 4 | #include "ethernet.h" 5 | #include "skbuff.h" 6 | #include "sock.h" 7 | 8 | #define IPV4 0x04 9 | #define IP_TCP 0x06 10 | #define ICMPV4 0x01 11 | 12 | #define IP_HDR_LEN sizeof(struct iphdr) 13 | #define ip_len(ip) (ip->len - (ip->ihl * 4)) 14 | 15 | #ifdef DEBUG_IP 16 | #define ip_dbg(msg, hdr) \ 17 | do { \ 18 | print_debug("ip "msg" (ihl: %hhu version: %hhu tos: %hhu " \ 19 | "len %hu id: %hu frag_offset: %hu ttl: %hhu " \ 20 | "proto: %hhu csum: %hx " \ 21 | "saddr: %hhu.%hhu.%hhu.%hhu daddr: %hhu.%hhu.%hhu.%hhu)", \ 22 | hdr->ihl, \ 23 | hdr->version, hdr->tos, hdr->len, hdr->id, \ 24 | hdr->frag_offset, hdr->ttl, hdr->proto, hdr->csum, \ 25 | hdr->saddr >> 24, hdr->saddr >> 16, hdr->saddr >> 8, hdr->saddr >> 0, \ 26 | hdr->daddr >> 24, hdr->daddr >> 16, hdr->daddr >> 8, hdr->daddr >> 0); \ 27 | } while (0) 28 | #else 29 | #define ip_dbg(msg, hdr) 30 | #endif 31 | 32 | struct iphdr { 33 | uint8_t ihl : 4; /* TODO: Support Big Endian hosts */ 34 | uint8_t version : 4; 35 | uint8_t tos; 36 | uint16_t len; 37 | uint16_t id; 38 | uint16_t frag_offset; 39 | uint8_t ttl; 40 | uint8_t proto; 41 | uint16_t csum; 42 | uint32_t saddr; 43 | uint32_t daddr; 44 | uint8_t data[]; 45 | } __attribute__((packed)); 46 | 47 | static inline struct iphdr *ip_hdr(const struct sk_buff *skb) 48 | { 49 | return (struct iphdr *)(skb->head + ETH_HDR_LEN); 50 | } 51 | 52 | static inline uint32_t ip_parse(char *addr) 53 | { 54 | uint32_t dst = 0; 55 | 56 | if (inet_pton(AF_INET, addr, &dst) != 1) { 57 | perror("ERR: Parsing inet address failed"); 58 | exit(1); 59 | } 60 | 61 | return ntohl(dst); 62 | } 63 | 64 | int ip_rcv(struct sk_buff *skb); 65 | int ip_output(struct sock *sk, struct sk_buff *skb); 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /include/ipc.h: -------------------------------------------------------------------------------- 1 | #ifndef IPC_H_ 2 | #define IPC_H_ 3 | 4 | #include "list.h" 5 | 6 | #ifdef DEBUG_IPC 7 | #define ipc_dbg(msg, th) \ 8 | do { \ 9 | print_debug("IPC sockets count %d, current sock %d, tid %lu: %s", \ 10 | socket_count, th->sock, th->id, msg); \ 11 | } while (0) 12 | #else 13 | #define ipc_dbg(msg, th) 14 | #endif 15 | 16 | void *start_ipc_listener(); 17 | 18 | #define IPC_SOCKET 0x0001 19 | #define IPC_CONNECT 0x0002 20 | #define IPC_WRITE 0x0003 21 | #define IPC_READ 0x0004 22 | #define IPC_CLOSE 0x0005 23 | #define IPC_POLL 0x0006 24 | #define IPC_FCNTL 0x0007 25 | #define IPC_GETSOCKOPT 0x0008 26 | #define IPC_SETSOCKOPT 0x0009 27 | #define IPC_GETPEERNAME 0x000A 28 | #define IPC_GETSOCKNAME 0x000B 29 | 30 | struct ipc_thread { 31 | struct list_head list; 32 | int sock; 33 | pthread_t id; 34 | }; 35 | 36 | struct ipc_msg { 37 | uint16_t type; 38 | pid_t pid; 39 | uint8_t data[]; 40 | } __attribute__((packed)); 41 | 42 | struct ipc_err { 43 | int rc; 44 | int err; 45 | uint8_t data[]; 46 | } __attribute__((packed)); 47 | 48 | struct ipc_socket { 49 | int domain; 50 | int type; 51 | int protocol; 52 | } __attribute__((packed)); 53 | 54 | struct ipc_connect { 55 | int sockfd; 56 | struct sockaddr addr; 57 | socklen_t addrlen; 58 | } __attribute__((packed)); 59 | 60 | struct ipc_write { 61 | int sockfd; 62 | size_t len; 63 | uint8_t buf[]; 64 | } __attribute__((packed)); 65 | 66 | struct ipc_read { 67 | int sockfd; 68 | size_t len; 69 | uint8_t buf[]; 70 | } __attribute__((packed)); 71 | 72 | struct ipc_close { 73 | int sockfd; 74 | } __attribute__((packed)); 75 | 76 | struct ipc_pollfd { 77 | int fd; 78 | short int events; 79 | short int revents; 80 | } __attribute__((packed)); 81 | 82 | struct ipc_poll { 83 | nfds_t nfds; 84 | int timeout; 85 | struct ipc_pollfd fds[]; 86 | } __attribute__((packed)); 87 | 88 | struct ipc_fcntl { 89 | int sockfd; 90 | int cmd; 91 | uint8_t data[]; 92 | } __attribute__((packed)); 93 | 94 | struct ipc_sockopt { 95 | int fd; 96 | int level; 97 | int optname; 98 | socklen_t optlen; 99 | uint8_t optval[]; 100 | } __attribute__((packed)); 101 | 102 | struct ipc_sockname { 103 | int socket; 104 | socklen_t address_len; 105 | uint8_t sa_data[128]; 106 | }; 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /include/list.h: -------------------------------------------------------------------------------- 1 | #ifndef _LIST_H 2 | #define _LIST_H 3 | 4 | #include 5 | 6 | struct list_head { 7 | struct list_head *next; 8 | struct list_head *prev; 9 | }; 10 | 11 | #define LIST_HEAD(name) \ 12 | struct list_head name = { &(name), &(name) } 13 | 14 | static inline void list_init(struct list_head *head) 15 | { 16 | head->prev = head->next = head; 17 | } 18 | 19 | static inline void list_add(struct list_head *new, struct list_head *head) 20 | { 21 | head->next->prev = new; 22 | new->next = head->next; 23 | new->prev = head; 24 | head->next = new; 25 | } 26 | 27 | static inline void list_add_tail(struct list_head *new, struct list_head *head) 28 | { 29 | head->prev->next = new; 30 | new->prev = head->prev; 31 | new->next = head; 32 | head->prev = new; 33 | } 34 | 35 | static inline void list_del(struct list_head *elem) 36 | { 37 | struct list_head *prev = elem->prev; 38 | struct list_head *next = elem->next; 39 | 40 | prev->next = next; 41 | next->prev = prev; 42 | } 43 | 44 | #define list_entry(ptr, type, member) \ 45 | ((type *) ((char *) (ptr) - offsetof(type, member))) 46 | 47 | #define list_first_entry(ptr, type, member) \ 48 | list_entry((ptr)->next, type, member) 49 | 50 | #define list_for_each(pos, head) \ 51 | for (pos = (head)->next; pos != (head); pos = pos->next) 52 | 53 | #define list_for_each_safe(pos, p, head) \ 54 | for (pos = (head)->next, p = pos->next; \ 55 | pos != (head); \ 56 | pos = p, p = pos->next) 57 | 58 | static inline int list_empty(struct list_head *head) 59 | { 60 | return head->next == head; 61 | } 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /include/netdev.h: -------------------------------------------------------------------------------- 1 | #ifndef NETDEV_H 2 | #define NETDEV_H 3 | #include "syshead.h" 4 | #include "ethernet.h" 5 | #include "skbuff.h" 6 | #include "utils.h" 7 | 8 | #define BUFLEN 1600 9 | #define MAX_ADDR_LEN 32 10 | 11 | #define netdev_dbg(fmt, args...) \ 12 | do { \ 13 | print_debug("NETDEV: "fmt, ##args); \ 14 | } while (0) 15 | 16 | struct eth_hdr; 17 | 18 | struct netdev { 19 | uint32_t addr; 20 | uint8_t addr_len; 21 | uint8_t hwaddr[6]; 22 | uint32_t mtu; 23 | }; 24 | 25 | void netdev_init(); 26 | int netdev_transmit(struct sk_buff *skb, uint8_t *dst, uint16_t ethertype); 27 | void *netdev_rx_loop(); 28 | void free_netdev(); 29 | struct netdev *netdev_get(uint32_t sip); 30 | #endif 31 | -------------------------------------------------------------------------------- /include/route.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROUTE_H 2 | #define _ROUTE_H 3 | 4 | #include "list.h" 5 | 6 | #define RT_LOOPBACK 0x01 7 | #define RT_GATEWAY 0x02 8 | #define RT_HOST 0x04 9 | #define RT_REJECT 0x08 10 | #define RT_UP 0x10 11 | 12 | struct rtentry { 13 | struct list_head list; 14 | uint32_t dst; 15 | uint32_t gateway; 16 | uint32_t netmask; 17 | uint8_t flags; 18 | uint32_t metric; 19 | struct netdev *dev; 20 | }; 21 | 22 | void route_init(); 23 | struct rtentry *route_lookup(uint32_t daddr); 24 | void free_routes(); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /include/skbuff.h: -------------------------------------------------------------------------------- 1 | #ifndef SKBUFF_H_ 2 | #define SKBUFF_H_ 3 | 4 | #include "netdev.h" 5 | #include "route.h" 6 | #include "list.h" 7 | #include 8 | 9 | struct sk_buff { 10 | struct list_head list; 11 | struct rtentry *rt; 12 | struct netdev *dev; 13 | int refcnt; 14 | uint16_t protocol; 15 | uint32_t len; 16 | uint32_t dlen; 17 | uint32_t seq; 18 | uint32_t end_seq; 19 | uint8_t *end; 20 | uint8_t *head; 21 | uint8_t *data; 22 | uint8_t *payload; 23 | }; 24 | 25 | struct sk_buff_head { 26 | struct list_head head; 27 | 28 | uint32_t qlen; 29 | }; 30 | 31 | struct sk_buff *alloc_skb(unsigned int size); 32 | void free_skb(struct sk_buff *skb); 33 | uint8_t *skb_push(struct sk_buff *skb, unsigned int len); 34 | uint8_t *skb_head(struct sk_buff *skb); 35 | void *skb_reserve(struct sk_buff *skb, unsigned int len); 36 | void skb_reset_header(struct sk_buff *skb); 37 | 38 | static inline uint32_t skb_queue_len(const struct sk_buff_head *list) 39 | { 40 | return list->qlen; 41 | } 42 | 43 | static inline void skb_queue_init(struct sk_buff_head *list) 44 | { 45 | list_init(&list->head); 46 | list->qlen = 0; 47 | } 48 | 49 | static inline void skb_queue_add(struct sk_buff_head *list, struct sk_buff *new, struct sk_buff *next) 50 | { 51 | list_add_tail(&new->list, &next->list); 52 | list->qlen += 1; 53 | } 54 | 55 | static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *new) 56 | { 57 | list_add_tail(&new->list, &list->head); 58 | list->qlen += 1; 59 | } 60 | 61 | static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) 62 | { 63 | struct sk_buff *skb = list_first_entry(&list->head, struct sk_buff, list); 64 | list_del(&skb->list); 65 | list->qlen -= 1; 66 | 67 | return skb; 68 | } 69 | 70 | static inline int skb_queue_empty(const struct sk_buff_head *list) 71 | { 72 | return skb_queue_len(list) < 1; 73 | } 74 | 75 | static inline struct sk_buff *skb_peek(struct sk_buff_head *list) 76 | { 77 | if (skb_queue_empty(list)) return NULL; 78 | 79 | return list_first_entry(&list->head, struct sk_buff, list); 80 | } 81 | 82 | static inline void skb_queue_free(struct sk_buff_head *list) 83 | { 84 | struct sk_buff *skb = NULL; 85 | 86 | while ((skb = skb_peek(list)) != NULL) { 87 | skb_dequeue(list); 88 | skb->refcnt--; 89 | free_skb(skb); 90 | } 91 | } 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /include/sock.h: -------------------------------------------------------------------------------- 1 | #ifndef _SOCK_H 2 | #define _SOCK_H 3 | 4 | #include "socket.h" 5 | #include "wait.h" 6 | #include "skbuff.h" 7 | 8 | struct sock; 9 | 10 | struct net_ops { 11 | struct sock* (*alloc_sock) (int protocol); 12 | int (*init) (struct sock *sk); 13 | int (*connect) (struct sock *sk, const struct sockaddr *addr, int addr_len, int flags); 14 | int (*disconnect) (struct sock *sk, int flags); 15 | int (*write) (struct sock *sk, const void *buf, int len); 16 | int (*read) (struct sock *sk, void *buf, int len); 17 | int (*recv_notify) (struct sock *sk); 18 | int (*close) (struct sock *sk); 19 | int (*abort) (struct sock *sk); 20 | }; 21 | 22 | struct sock { 23 | struct socket *sock; 24 | struct net_ops *ops; 25 | struct wait_lock recv_wait; 26 | struct sk_buff_head receive_queue; 27 | struct sk_buff_head write_queue; 28 | int protocol; 29 | int state; 30 | int err; 31 | short int poll_events; 32 | uint16_t sport; 33 | uint16_t dport; 34 | uint32_t saddr; 35 | uint32_t daddr; 36 | }; 37 | 38 | static inline struct sk_buff *write_queue_head(struct sock *sk) 39 | { 40 | return skb_peek(&sk->write_queue); 41 | } 42 | 43 | struct sock *sk_alloc(struct net_ops *ops, int protocol); 44 | void sock_free(struct sock *sk); 45 | void sock_init_data(struct socket *sock, struct sock *sk); 46 | void sock_connected(struct sock *sk); 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /include/socket.h: -------------------------------------------------------------------------------- 1 | #ifndef SOCKET_H_ 2 | #define SOCKET_H_ 3 | 4 | #include "sock.h" 5 | #include "wait.h" 6 | #include "list.h" 7 | 8 | #ifdef DEBUG_SOCKET 9 | #define socket_dbg(sock, msg, ...) \ 10 | do { \ 11 | print_debug("Socket fd %d pid %d state %d sk_state %d flags %d poll %d sport %d dport %d " \ 12 | "recv-q %d send-q %d: "msg, \ 13 | sock->fd, sock->pid, sock->state, sock->sk->state, sock->flags, \ 14 | sock->sk->poll_events, \ 15 | sock->sk->sport, sock->sk->dport, \ 16 | sock->sk->receive_queue.qlen, \ 17 | sock->sk->write_queue.qlen, ##__VA_ARGS__); \ 18 | } while (0) 19 | #else 20 | #define socket_dbg(sock, msg, ...) 21 | #endif 22 | 23 | struct socket; 24 | 25 | enum socket_state { 26 | SS_FREE = 0, /* not allocated */ 27 | SS_UNCONNECTED, /* unconnected to any socket */ 28 | SS_CONNECTING, /* in process of connecting */ 29 | SS_CONNECTED, /* connected to socket */ 30 | SS_DISCONNECTING /* in process of disconnecting */ 31 | }; 32 | 33 | struct sock_type { 34 | struct sock_ops *sock_ops; 35 | struct net_ops *net_ops; 36 | int type; 37 | int protocol; 38 | }; 39 | 40 | struct sock_ops { 41 | int (*connect) (struct socket *sock, const struct sockaddr *addr, 42 | int addr_len, int flags); 43 | int (*write) (struct socket *sock, const void *buf, int len); 44 | int (*read) (struct socket *sock, void *buf, int len); 45 | int (*close) (struct socket *sock); 46 | int (*free) (struct socket *sock); 47 | int (*abort) (struct socket *sock); 48 | int (*poll) (struct socket *sock); 49 | int (*getpeername) (struct socket *sock, struct sockaddr *restrict addr, 50 | socklen_t *restrict address_len); 51 | int (*getsockname) (struct socket *sock, struct sockaddr *restrict addr, 52 | socklen_t *restrict address_len); 53 | }; 54 | 55 | struct net_family { 56 | int (*create) (struct socket *sock, int protocol); 57 | }; 58 | 59 | struct socket { 60 | struct list_head list; 61 | int fd; 62 | pid_t pid; 63 | int refcnt; 64 | enum socket_state state; 65 | short type; 66 | int flags; 67 | struct sock *sk; 68 | struct sock_ops *ops; 69 | struct wait_lock sleep; 70 | pthread_rwlock_t lock; 71 | }; 72 | 73 | void *socket_ipc_open(void *args); 74 | int _socket(pid_t pid, int domain, int type, int protocol); 75 | int _connect(pid_t pid, int sockfd, const struct sockaddr *addr, socklen_t addrlen); 76 | int _write(pid_t pid, int sockfd, const void *buf, const unsigned int count); 77 | int _read(pid_t pid, int sockfd, void *buf, const unsigned int count); 78 | int _close(pid_t pid, int sockfd); 79 | int _poll(pid_t pid, struct pollfd fds[], nfds_t nfds, int timeout); 80 | int _fcntl(pid_t pid, int fildes, int cmd, ...); 81 | int _getsockopt(pid_t pid, int fd, int level, int optname, void *optval, socklen_t *optlen); 82 | int _getpeername(pid_t pid, int socket, struct sockaddr *restrict address, 83 | socklen_t *restrict address_len); 84 | int _getsockname(pid_t pid, int socket, struct sockaddr *restrict address, 85 | socklen_t *restrict address_len); 86 | 87 | struct socket *socket_lookup(uint16_t sport, uint16_t dport); 88 | struct socket *socket_find(struct socket *sock); 89 | int socket_rd_acquire(struct socket *sock); 90 | int socket_wr_acquire(struct socket *sock); 91 | int socket_release(struct socket *sock); 92 | int socket_free(struct socket *sock); 93 | int socket_delete(struct socket *sock); 94 | void abort_sockets(); 95 | void socket_debug(); 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /include/syshead.h: -------------------------------------------------------------------------------- 1 | #ifndef SYSHEAD_H 2 | #define SYSHEAD_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /include/tcp.h: -------------------------------------------------------------------------------- 1 | #ifndef TCP_H_ 2 | #define TCP_H_ 3 | #include "syshead.h" 4 | #include "ip.h" 5 | #include "timer.h" 6 | #include "utils.h" 7 | 8 | #define TCP_HDR_LEN sizeof(struct tcphdr) 9 | #define TCP_DOFFSET sizeof(struct tcphdr) / 4 10 | 11 | #define TCP_FIN 0x01 12 | #define TCP_SYN 0x02 13 | #define TCP_RST 0x04 14 | #define TCP_PSH 0x08 15 | #define TCP_ACK 0x10 16 | 17 | #define TCP_URG 0x20 18 | #define TCP_ECN 0x40 19 | #define TCP_WIN 0x80 20 | 21 | #define TCP_SYN_BACKOFF 500 22 | #define TCP_CONN_RETRIES 3 23 | 24 | #define TCP_OPT_NOOP 1 25 | #define TCP_OPTLEN_MSS 4 26 | #define TCP_OPT_MSS 2 27 | #define TCP_OPT_SACK_OK 4 28 | #define TCP_OPT_SACK 5 29 | #define TCP_OPTLEN_SACK 2 30 | #define TCP_OPT_TS 8 31 | 32 | #define TCP_2MSL 60000 33 | #define TCP_USER_TIMEOUT 180000 34 | 35 | #define tcp_sk(sk) ((struct tcp_sock *)sk) 36 | #define tcp_hlen(tcp) (tcp->hl << 2) 37 | 38 | #ifdef DEBUG_TCP 39 | extern const char *tcp_dbg_states[]; 40 | #define tcp_in_dbg(hdr, sk, skb) \ 41 | do { \ 42 | print_debug("TCP %u.%u.%u.%u.%u > %u.%u.%u.%u.%u: " \ 43 | "Flags [S%uA%uP%uF%uR%u], seq %u:%u, ack %u, win %u rto %d boff %d", \ 44 | (uint8_t)(sk->daddr >> 24), (uint8_t)(sk->daddr >> 16), (uint8_t)(sk->daddr >> 8), (uint8_t)(sk->daddr >> 0), sk->dport, \ 45 | (uint8_t)(sk->saddr >> 24), (uint8_t)(sk->saddr >> 16), (uint8_t)(sk->saddr >> 8), (uint8_t)(sk->saddr >> 0), sk->sport, \ 46 | hdr->syn, hdr->ack, hdr->psh, hdr->fin, hdr->rst, hdr->seq - tcp_sk(sk)->tcb.irs, \ 47 | hdr->seq + skb->dlen - tcp_sk(sk)->tcb.irs, \ 48 | hdr->ack_seq - tcp_sk(sk)->tcb.iss, hdr->win, tcp_sk(sk)->rto, tcp_sk(sk)->backoff); \ 49 | } while (0) 50 | 51 | #define tcp_out_dbg(hdr, sk, skb) \ 52 | do { \ 53 | print_debug("TCP %u.%u.%u.%u.%u > %u.%u.%u.%u.%u: " \ 54 | "Flags [S%uA%uP%uF%uR%u], seq %u:%u, ack %u, win %u rto %d boff %d", \ 55 | (uint8_t)(sk->saddr >> 24), (uint8_t)(sk->saddr >> 16), (uint8_t)(sk->saddr >> 8), (uint8_t)(sk->saddr >> 0), sk->sport, \ 56 | (uint8_t)(sk->daddr >> 24), (uint8_t)(sk->daddr >> 16), (uint8_t)(sk->daddr >> 8), (uint8_t)(sk->daddr >> 0), sk->dport, \ 57 | hdr->syn, hdr->ack, hdr->psh, hdr->fin, hdr->rst, hdr->seq - tcp_sk(sk)->tcb.iss, \ 58 | hdr->seq + skb->dlen - tcp_sk(sk)->tcb.iss, \ 59 | hdr->ack_seq - tcp_sk(sk)->tcb.irs, hdr->win, tcp_sk(sk)->rto, tcp_sk(sk)->backoff); \ 60 | } while (0) 61 | 62 | #define tcpsock_dbg(msg, sk) \ 63 | do { \ 64 | print_debug("TCP x:%u > %u.%u.%u.%u.%u (snd_una %u, snd_nxt %u, snd_wnd %u, " \ 65 | "snd_wl1 %u, snd_wl2 %u, rcv_nxt %u, rcv_wnd %u recv-q %d send-q %d " \ 66 | "rto %d boff %d) state %s: "msg, \ 67 | sk->sport, (uint8_t)(sk->daddr >> 24), (uint8_t)(sk->daddr >> 16), (uint8_t)(sk->daddr >> 8), (uint8_t)(sk->daddr >> 0), \ 68 | sk->dport, tcp_sk(sk)->tcb.snd_una - tcp_sk(sk)->tcb.iss, \ 69 | tcp_sk(sk)->tcb.snd_nxt - tcp_sk(sk)->tcb.iss, tcp_sk(sk)->tcb.snd_wnd, \ 70 | tcp_sk(sk)->tcb.snd_wl1, tcp_sk(sk)->tcb.snd_wl2, \ 71 | tcp_sk(sk)->tcb.rcv_nxt - tcp_sk(sk)->tcb.irs, tcp_sk(sk)->tcb.rcv_wnd, \ 72 | sk->receive_queue.qlen, sk->write_queue.qlen, tcp_sk(sk)->rto, tcp_sk(sk)->backoff, \ 73 | tcp_dbg_states[sk->state]); \ 74 | } while (0) 75 | 76 | #define tcp_set_state(sk, state) \ 77 | do { \ 78 | tcpsock_dbg("state is now "#state, sk); \ 79 | __tcp_set_state(sk, state); \ 80 | } while (0) 81 | 82 | #define return_tcp_drop(sk, skb) \ 83 | do { \ 84 | tcpsock_dbg("dropping packet", sk); \ 85 | return __tcp_drop(sk, skb); \ 86 | } while (0) 87 | 88 | #define tcp_drop(tsk, skb) \ 89 | do { \ 90 | tcpsock_dbg("dropping packet", sk); \ 91 | __tcp_drop(tsk, skb); \ 92 | } while (0) 93 | 94 | #else 95 | #define tcp_in_dbg(hdr, sk, skb) 96 | #define tcp_out_dbg(hdr, sk, skb) 97 | #define tcpsock_dbg(msg, sk) 98 | #define tcp_set_state(sk, state) __tcp_set_state(sk, state) 99 | #define return_tcp_drop(tsk, skb) return __tcp_drop(tsk, skb) 100 | #define tcp_drop(tsk, skb) __tcp_drop(tsk, skb) 101 | #endif 102 | 103 | struct tcphdr { 104 | uint16_t sport; 105 | uint16_t dport; 106 | uint32_t seq; 107 | uint32_t ack_seq; 108 | uint8_t rsvd : 4; 109 | uint8_t hl : 4; 110 | uint8_t fin : 1, 111 | syn : 1, 112 | rst : 1, 113 | psh : 1, 114 | ack : 1, 115 | urg : 1, 116 | ece : 1, 117 | cwr : 1; 118 | uint16_t win; 119 | uint16_t csum; 120 | uint16_t urp; 121 | uint8_t data[]; 122 | } __attribute__((packed)); 123 | 124 | struct tcp_options { 125 | uint16_t options; 126 | uint16_t mss; 127 | uint8_t sack; 128 | }; 129 | 130 | struct tcp_opt_mss { 131 | uint8_t kind; 132 | uint8_t len; 133 | uint16_t mss; 134 | } __attribute__((packed)); 135 | 136 | struct tcpiphdr { 137 | uint32_t saddr; 138 | uint32_t daddr; 139 | uint8_t zero; 140 | uint8_t proto; 141 | uint16_t tlen; 142 | } __attribute__((packed)); 143 | 144 | enum tcp_states { 145 | TCP_LISTEN, /* represents waiting for a connection request from any remote 146 | TCP and port. */ 147 | TCP_SYN_SENT, /* represents waiting for a matching connection request 148 | after having sent a connection request. */ 149 | TCP_SYN_RECEIVED, /* represents waiting for a confirming connection 150 | request acknowledgment after having both received and sent a 151 | connection request. */ 152 | TCP_ESTABLISHED, /* represents an open connection, data received can be 153 | delivered to the user. The normal state for the data transfer phase 154 | of the connection. */ 155 | TCP_FIN_WAIT_1, /* represents waiting for a connection termination request 156 | from the remote TCP, or an acknowledgment of the connection 157 | termination request previously sent. */ 158 | TCP_FIN_WAIT_2, /* represents waiting for a connection termination request 159 | from the remote TCP. */ 160 | TCP_CLOSE, /* represents no connection state at all. */ 161 | TCP_CLOSE_WAIT, /* represents waiting for a connection termination request 162 | from the local user. */ 163 | TCP_CLOSING, /* represents waiting for a connection termination request 164 | acknowledgment from the remote TCP. */ 165 | TCP_LAST_ACK, /* represents waiting for an acknowledgment of the 166 | connection termination request previously sent to the remote TCP 167 | (which includes an acknowledgment of its connection termination 168 | request). */ 169 | TCP_TIME_WAIT, /* represents waiting for enough time to pass to be sure 170 | the remote TCP received the acknowledgment of its connection 171 | termination request. */ 172 | }; 173 | 174 | struct tcb { 175 | uint32_t snd_una; /* oldest unacknowledged sequence number */ 176 | uint32_t snd_nxt; /* next sequence number to be sent */ 177 | uint32_t snd_wnd; 178 | uint32_t snd_up; 179 | uint32_t snd_wl1; 180 | uint32_t snd_wl2; 181 | uint32_t iss; 182 | uint32_t rcv_nxt; /* next sequence number expected on an incoming segments, and 183 | is the left or lower edge of the receive window */ 184 | uint32_t rcv_wnd; 185 | uint32_t rcv_up; 186 | uint32_t irs; 187 | }; 188 | 189 | struct tcp_sack_block { 190 | uint32_t left; 191 | uint32_t right; 192 | } __attribute__((packed)); 193 | 194 | struct tcp_sock { 195 | struct sock sk; 196 | int fd; 197 | uint16_t tcp_header_len; 198 | struct tcb tcb; 199 | uint8_t flags; 200 | uint8_t backoff; 201 | int32_t srtt; 202 | int32_t rttvar; 203 | uint32_t rto; 204 | struct timer *retransmit; 205 | struct timer *delack; 206 | struct timer *keepalive; 207 | struct timer *linger; 208 | uint8_t delacks; 209 | uint16_t rmss; 210 | uint16_t smss; 211 | uint16_t cwnd; 212 | uint32_t inflight; 213 | 214 | uint8_t sackok; 215 | uint8_t sacks_allowed; 216 | uint8_t sacklen; 217 | struct tcp_sack_block sacks[4]; 218 | 219 | uint8_t tsopt; 220 | 221 | struct sk_buff_head ofo_queue; /* Out-of-order queue */ 222 | }; 223 | 224 | static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) 225 | { 226 | return (struct tcphdr *)(skb->head + ETH_HDR_LEN + IP_HDR_LEN); 227 | } 228 | 229 | void tcp_init(); 230 | void tcp_in(struct sk_buff *skb); 231 | int tcp_checksum(struct tcp_sock *sock, struct tcphdr *thdr); 232 | void tcp_select_initial_window(uint32_t *rcv_wnd); 233 | 234 | int generate_iss(); 235 | struct sock *tcp_alloc_sock(); 236 | int tcp_v4_init_sock(struct sock *sk); 237 | int tcp_init_sock(struct sock *sk); 238 | void __tcp_set_state(struct sock *sk, uint32_t state); 239 | int tcp_v4_checksum(struct sk_buff *skb, uint32_t saddr, uint32_t daddr); 240 | int tcp_v4_connect(struct sock *sk, const struct sockaddr *addr, int addrlen, int flags); 241 | int tcp_connect(struct sock *sk); 242 | int tcp_disconnect(struct sock *sk, int flags); 243 | int tcp_write(struct sock *sk, const void *buf, int len); 244 | int tcp_read(struct sock *sk, void *buf, int len); 245 | int tcp_receive(struct tcp_sock *tsk, void *buf, int len); 246 | int tcp_input_state(struct sock *sk, struct tcphdr *th, struct sk_buff *skb); 247 | int tcp_send_synack(struct sock *sk); 248 | int tcp_send_next(struct sock *sk, int amount); 249 | int tcp_send_ack(struct sock *sk); 250 | void *tcp_send_delack(void *arg); 251 | int tcp_queue_fin(struct sock *sk); 252 | int tcp_send_fin(struct sock *sk); 253 | int tcp_send(struct tcp_sock *tsk, const void *buf, int len); 254 | int tcp_send_reset(struct tcp_sock *tsk); 255 | int tcp_send_challenge_ack(struct sock *sk, struct sk_buff *skb); 256 | int tcp_recv_notify(struct sock *sk); 257 | int tcp_close(struct sock *sk); 258 | int tcp_abort(struct sock *sk); 259 | int tcp_done(struct sock *sk); 260 | void tcp_rtt(struct tcp_sock *tsk); 261 | void tcp_handle_fin_state(struct sock *sk); 262 | void tcp_enter_time_wait(struct sock *sk); 263 | void tcp_clear_timers(struct sock *sk); 264 | void tcp_rearm_rto_timer(struct tcp_sock *tsk); 265 | void tcp_stop_rto_timer(struct tcp_sock *tsk); 266 | void tcp_release_rto_timer(struct tcp_sock *tsk); 267 | void tcp_stop_delack_timer(struct tcp_sock *tsk); 268 | void tcp_release_delack_timer(struct tcp_sock *tsk); 269 | void tcp_rearm_user_timeout(struct sock *sk); 270 | int tcp_calculate_sacks(struct tcp_sock *tsk); 271 | 272 | #endif 273 | -------------------------------------------------------------------------------- /include/tcp_data.h: -------------------------------------------------------------------------------- 1 | #ifndef _TCP_DATA_H 2 | #define _TCP_DATA_H 3 | 4 | #include "tcp.h" 5 | 6 | int tcp_data_dequeue(struct tcp_sock *tsk, void *user_buf, int len); 7 | int tcp_data_queue(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb); 8 | int tcp_data_close(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb); 9 | #endif 10 | -------------------------------------------------------------------------------- /include/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef TIMER_H_ 2 | #define TIMER_H_ 3 | 4 | #include "syshead.h" 5 | #include "utils.h" 6 | #include "list.h" 7 | 8 | #define timer_dbg(msg, t) \ 9 | do { \ 10 | print_debug("Timer at %d: "msg": expires %d", tick, t->expires); \ 11 | } while (0) 12 | 13 | struct timer { 14 | struct list_head list; 15 | int refcnt; 16 | uint32_t expires; 17 | int cancelled; 18 | void *(*handler)(void *); 19 | void *arg; 20 | pthread_mutex_t lock; 21 | }; 22 | 23 | struct timer *timer_add(uint32_t expire, void *(*handler)(void *), void *arg); 24 | void timer_oneshot(uint32_t expire, void *(*handler)(void *), void *arg); 25 | void timer_release(struct timer *t); 26 | void timer_cancel(struct timer *t); 27 | void *timers_start(); 28 | int timer_get_tick(); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /include/tuntap_if.h: -------------------------------------------------------------------------------- 1 | #ifndef TUNTAP_IF_H 2 | #define TUNTAP_IF_H 3 | void tun_init(); 4 | int tun_read(char *buf, int len); 5 | int tun_write(char *buf, int len); 6 | void free_tun(); 7 | #endif 8 | -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #define CMDBUFLEN 100 5 | 6 | #define print_debug(str, ...) \ 7 | printf(str" - %s:%u\n", ##__VA_ARGS__, __FILE__, __LINE__); 8 | 9 | #define print_err(str, ...) \ 10 | fprintf(stderr, str, ##__VA_ARGS__); 11 | 12 | int run_cmd(char *cmd, ...); 13 | uint32_t sum_every_16bits(void *addr, int count); 14 | uint16_t checksum(void *addr, int count, int start_sum); 15 | int get_address(char *host, char *port, struct sockaddr *addr); 16 | uint32_t parse_ipv4_string(char *addr); 17 | uint32_t min(uint32_t x, uint32_t y); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /include/wait.h: -------------------------------------------------------------------------------- 1 | #ifndef _WAIT_H 2 | #define _WAIT_H 3 | 4 | #include "syshead.h" 5 | 6 | struct wait_lock { 7 | pthread_cond_t ready; 8 | pthread_mutex_t lock; 9 | uint8_t sleeping; 10 | }; 11 | 12 | static inline int wait_init(struct wait_lock *w) { 13 | pthread_cond_init(&w->ready, NULL); 14 | pthread_mutex_init(&w->lock, NULL); 15 | w->sleeping = 0; 16 | 17 | return 0; 18 | }; 19 | 20 | static inline int wait_wakeup(struct wait_lock *w) { 21 | pthread_mutex_lock(&w->lock); 22 | 23 | pthread_cond_signal(&w->ready); 24 | w->sleeping = 0; 25 | 26 | pthread_mutex_unlock(&w->lock); 27 | return 0; 28 | }; 29 | 30 | static inline int wait_sleep(struct wait_lock *w) { 31 | w->sleeping = 1; 32 | pthread_cond_wait(&w->ready, &w->lock); 33 | 34 | return 0; 35 | }; 36 | 37 | static inline void wait_free(struct wait_lock *w) { 38 | wait_wakeup(w); 39 | 40 | pthread_mutex_destroy(&w->lock); 41 | pthread_cond_destroy(&w->ready); 42 | }; 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /src/arp.c: -------------------------------------------------------------------------------- 1 | #include "arp.h" 2 | #include "netdev.h" 3 | #include "skbuff.h" 4 | #include "list.h" 5 | 6 | /* 7 | * https://tools.ietf.org/html/rfc826 8 | */ 9 | 10 | static uint8_t broadcast_hw[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 11 | static LIST_HEAD(arp_cache); 12 | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; 13 | 14 | static struct sk_buff *arp_alloc_skb() 15 | { 16 | struct sk_buff *skb = alloc_skb(ETH_HDR_LEN + ARP_HDR_LEN + ARP_DATA_LEN); 17 | skb_reserve(skb, ETH_HDR_LEN + ARP_HDR_LEN + ARP_DATA_LEN); 18 | skb->protocol = htons(ETH_P_ARP); 19 | 20 | return skb; 21 | } 22 | 23 | static struct arp_cache_entry *arp_entry_alloc(struct arp_hdr *hdr, struct arp_ipv4 *data) 24 | { 25 | struct arp_cache_entry *entry = malloc(sizeof(struct arp_cache_entry)); 26 | list_init(&entry->list); 27 | 28 | entry->state = ARP_RESOLVED; 29 | entry->hwtype = hdr->hwtype; 30 | entry->sip = data->sip; 31 | memcpy(entry->smac, data->smac, sizeof(entry->smac)); 32 | 33 | return entry; 34 | } 35 | 36 | static int insert_arp_translation_table(struct arp_hdr *hdr, struct arp_ipv4 *data) 37 | { 38 | struct arp_cache_entry *entry = arp_entry_alloc(hdr, data); 39 | 40 | pthread_mutex_lock(&lock); 41 | list_add_tail(&entry->list, &arp_cache); 42 | pthread_mutex_unlock(&lock); 43 | 44 | return 0; 45 | } 46 | 47 | static int update_arp_translation_table(struct arp_hdr *hdr, struct arp_ipv4 *data) 48 | { 49 | struct list_head *item; 50 | struct arp_cache_entry *entry; 51 | 52 | pthread_mutex_lock(&lock); 53 | list_for_each(item, &arp_cache) { 54 | entry = list_entry(item, struct arp_cache_entry, list); 55 | 56 | if (entry->hwtype == hdr->hwtype && entry->sip == data->sip) { 57 | memcpy(entry->smac, data->smac, 6); 58 | pthread_mutex_unlock(&lock); 59 | 60 | return 1; 61 | } 62 | } 63 | 64 | pthread_mutex_unlock(&lock); 65 | 66 | return 0; 67 | } 68 | 69 | void arp_init() 70 | { 71 | 72 | } 73 | 74 | void arp_rcv(struct sk_buff *skb) 75 | { 76 | struct arp_hdr *arphdr; 77 | struct arp_ipv4 *arpdata; 78 | struct netdev *netdev; 79 | int merge = 0; 80 | 81 | arphdr = arp_hdr(skb); 82 | 83 | arphdr->hwtype = ntohs(arphdr->hwtype); 84 | arphdr->protype = ntohs(arphdr->protype); 85 | arphdr->opcode = ntohs(arphdr->opcode); 86 | arp_dbg("in", arphdr); 87 | 88 | if (arphdr->hwtype != ARP_ETHERNET) { 89 | printf("ARP: Unsupported HW type\n"); 90 | goto drop_pkt; 91 | } 92 | 93 | if (arphdr->protype != ARP_IPV4) { 94 | printf("ARP: Unsupported protocol\n"); 95 | goto drop_pkt; 96 | } 97 | 98 | arpdata = (struct arp_ipv4 *) arphdr->data; 99 | 100 | arpdata->sip = ntohl(arpdata->sip); 101 | arpdata->dip = ntohl(arpdata->dip); 102 | arpdata_dbg("receive", arpdata); 103 | 104 | merge = update_arp_translation_table(arphdr, arpdata); 105 | 106 | if (!(netdev = netdev_get(arpdata->dip))) { 107 | printf("ARP was not for us\n"); 108 | goto drop_pkt; 109 | } 110 | 111 | if (!merge && insert_arp_translation_table(arphdr, arpdata) != 0) { 112 | print_err("ERR: No free space in ARP translation table\n"); 113 | goto drop_pkt; 114 | } 115 | 116 | switch (arphdr->opcode) { 117 | case ARP_REQUEST: 118 | arp_reply(skb, netdev); 119 | return; 120 | default: 121 | printf("ARP: Opcode not supported\n"); 122 | goto drop_pkt; 123 | } 124 | 125 | drop_pkt: 126 | free_skb(skb); 127 | return; 128 | } 129 | 130 | int arp_request(uint32_t sip, uint32_t dip, struct netdev *netdev) 131 | { 132 | struct sk_buff *skb; 133 | struct arp_hdr *arp; 134 | struct arp_ipv4 *payload; 135 | int rc = 0; 136 | 137 | skb = arp_alloc_skb(); 138 | 139 | if (!skb) return -1; 140 | 141 | skb->dev = netdev; 142 | 143 | payload = (struct arp_ipv4 *) skb_push(skb, ARP_DATA_LEN); 144 | 145 | memcpy(payload->smac, netdev->hwaddr, netdev->addr_len); 146 | payload->sip = sip; 147 | 148 | memcpy(payload->dmac, broadcast_hw, netdev->addr_len); 149 | payload->dip = dip; 150 | 151 | arp = (struct arp_hdr *) skb_push(skb, ARP_HDR_LEN); 152 | 153 | arp_dbg("req", arp); 154 | arp->opcode = htons(ARP_REQUEST); 155 | arp->hwtype = htons(ARP_ETHERNET); 156 | arp->protype = htons(ETH_P_IP); 157 | arp->hwsize = netdev->addr_len; 158 | arp->prosize = 4; 159 | 160 | arpdata_dbg("req", payload); 161 | payload->sip = htonl(payload->sip); 162 | payload->dip = htonl(payload->dip); 163 | 164 | rc = netdev_transmit(skb, broadcast_hw, ETH_P_ARP); 165 | free_skb(skb); 166 | return rc; 167 | } 168 | 169 | void arp_reply(struct sk_buff *skb, struct netdev *netdev) 170 | { 171 | struct arp_hdr *arphdr; 172 | struct arp_ipv4 *arpdata; 173 | 174 | arphdr = arp_hdr(skb); 175 | 176 | skb_reserve(skb, ETH_HDR_LEN + ARP_HDR_LEN + ARP_DATA_LEN); 177 | skb_push(skb, ARP_HDR_LEN + ARP_DATA_LEN); 178 | 179 | arpdata = (struct arp_ipv4 *) arphdr->data; 180 | 181 | memcpy(arpdata->dmac, arpdata->smac, 6); 182 | arpdata->dip = arpdata->sip; 183 | 184 | memcpy(arpdata->smac, netdev->hwaddr, 6); 185 | arpdata->sip = netdev->addr; 186 | 187 | arphdr->opcode = ARP_REPLY; 188 | 189 | arp_dbg("reply", arphdr); 190 | arphdr->opcode = htons(arphdr->opcode); 191 | arphdr->hwtype = htons(arphdr->hwtype); 192 | arphdr->protype = htons(arphdr->protype); 193 | 194 | arpdata_dbg("reply", arpdata); 195 | arpdata->sip = htonl(arpdata->sip); 196 | arpdata->dip = htonl(arpdata->dip); 197 | 198 | skb->dev = netdev; 199 | 200 | netdev_transmit(skb, arpdata->dmac, ETH_P_ARP); 201 | free_skb(skb); 202 | } 203 | 204 | /* 205 | * Returns the HW address of the given source IP address 206 | * NULL if not found 207 | */ 208 | unsigned char* arp_get_hwaddr(uint32_t sip) 209 | { 210 | struct list_head *item; 211 | struct arp_cache_entry *entry; 212 | 213 | pthread_mutex_lock(&lock); 214 | list_for_each(item, &arp_cache) { 215 | entry = list_entry(item, struct arp_cache_entry, list); 216 | 217 | if (entry->state == ARP_RESOLVED && 218 | entry->sip == sip) { 219 | arpcache_dbg("entry", entry); 220 | 221 | uint8_t *copy = entry->smac; 222 | pthread_mutex_unlock(&lock); 223 | 224 | return copy; 225 | } 226 | } 227 | 228 | pthread_mutex_unlock(&lock); 229 | 230 | return NULL; 231 | } 232 | 233 | void free_arp() 234 | { 235 | struct list_head *item, *tmp; 236 | struct arp_cache_entry *entry; 237 | 238 | list_for_each_safe(item, tmp, &arp_cache) { 239 | entry = list_entry(item, struct arp_cache_entry, list); 240 | list_del(item); 241 | 242 | free(entry); 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /src/cli.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "utils.h" 3 | #include "cli.h" 4 | 5 | int debug = 0; 6 | 7 | static void usage(char *app) 8 | { 9 | print_err("Usage: %s\n", app); 10 | print_err("\n"); 11 | print_err("Linux TCP/IP stack implemented with TUN/TAP devices.\n"); 12 | print_err("Requires the CAP_NET_ADMIN capability. See capabilities(7).\n"); 13 | print_err("See https://www.kernel.org/doc/Documentation/networking/tuntap.txt\n"); 14 | print_err("\n"); 15 | print_err("Options:\n"); 16 | print_err(" -d Debug logging and tracing\n"); 17 | print_err(" -h Print usage\n"); 18 | print_err("\n"); 19 | exit(1); 20 | } 21 | 22 | extern int optind; 23 | 24 | static int parse_opts(int *argc, char*** argv) 25 | { 26 | int opt; 27 | 28 | while ((opt = getopt(*argc, *argv, "hd")) != -1) { 29 | switch (opt) { 30 | case 'd': 31 | debug = 1; 32 | break; 33 | case 'h': 34 | default: 35 | usage(*argv[0]); 36 | } 37 | } 38 | 39 | *argc -= optind; 40 | *argv += optind; 41 | 42 | return optind; 43 | } 44 | 45 | void parse_cli(int argc, char **argv) 46 | { 47 | parse_opts(&argc, &argv); 48 | } 49 | -------------------------------------------------------------------------------- /src/dst.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "dst.h" 3 | #include "ip.h" 4 | #include "arp.h" 5 | 6 | int dst_neigh_output(struct sk_buff *skb) 7 | { 8 | struct iphdr *iphdr = ip_hdr(skb); 9 | struct netdev *netdev = skb->dev; 10 | struct rtentry *rt = skb->rt; 11 | uint32_t daddr = ntohl(iphdr->daddr); 12 | uint32_t saddr = ntohl(iphdr->saddr); 13 | 14 | uint8_t *dmac; 15 | 16 | if (rt->flags & RT_GATEWAY) { 17 | daddr = rt->gateway; 18 | } 19 | 20 | dmac = arp_get_hwaddr(daddr); 21 | 22 | if (dmac) { 23 | return netdev_transmit(skb, dmac, ETH_P_IP); 24 | } else { 25 | arp_request(saddr, daddr, netdev); 26 | 27 | /* Inform upper layer that traffic was not sent, retry later */ 28 | return -1; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/icmpv4.c: -------------------------------------------------------------------------------- 1 | #include "ethernet.h" 2 | #include "icmpv4.h" 3 | #include "ip.h" 4 | #include "utils.h" 5 | 6 | void icmpv4_incoming(struct sk_buff *skb) 7 | { 8 | struct iphdr *iphdr = ip_hdr(skb); 9 | struct icmp_v4 *icmp = (struct icmp_v4 *) iphdr->data; 10 | 11 | //TODO: Check csum 12 | 13 | switch (icmp->type) { 14 | case ICMP_V4_ECHO: 15 | icmpv4_reply(skb); 16 | return; 17 | case ICMP_V4_DST_UNREACHABLE: 18 | print_err("ICMPv4 received 'dst unreachable' code %d, " 19 | "check your routes and firewall rules\n", icmp->code); 20 | goto drop_pkt; 21 | default: 22 | print_err("ICMPv4 did not match supported types\n"); 23 | goto drop_pkt; 24 | } 25 | 26 | drop_pkt: 27 | free_skb(skb); 28 | return; 29 | } 30 | 31 | void icmpv4_reply(struct sk_buff *skb) 32 | { 33 | struct iphdr *iphdr = ip_hdr(skb); 34 | struct icmp_v4 *icmp; 35 | struct sock sk; 36 | memset(&sk, 0, sizeof(struct sock)); 37 | 38 | uint16_t icmp_len = iphdr->len - (iphdr->ihl * 4); 39 | 40 | skb_reserve(skb, ETH_HDR_LEN + IP_HDR_LEN + icmp_len); 41 | skb_push(skb, icmp_len); 42 | 43 | icmp = (struct icmp_v4 *)skb->data; 44 | 45 | icmp->type = ICMP_V4_REPLY; 46 | icmp->csum = 0; 47 | icmp->csum = checksum(icmp, icmp_len, 0); 48 | 49 | skb->protocol = ICMPV4; 50 | sk.daddr = iphdr->saddr; 51 | 52 | ip_output(&sk, skb); 53 | free_skb(skb); 54 | } 55 | -------------------------------------------------------------------------------- /src/inet.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "inet.h" 3 | #include "socket.h" 4 | #include "sock.h" 5 | #include "tcp.h" 6 | #include "wait.h" 7 | 8 | extern struct net_ops tcp_ops; 9 | 10 | static int inet_stream_connect(struct socket *sock, const struct sockaddr *addr, 11 | int addr_len, int flags); 12 | 13 | static int INET_OPS = 1; 14 | 15 | struct net_family inet = { 16 | .create = inet_create, 17 | }; 18 | 19 | static struct sock_ops inet_stream_ops = { 20 | .connect = &inet_stream_connect, 21 | .write = &inet_write, 22 | .read = &inet_read, 23 | .close = &inet_close, 24 | .free = &inet_free, 25 | .abort = &inet_abort, 26 | .getpeername = &inet_getpeername, 27 | .getsockname = &inet_getsockname, 28 | }; 29 | 30 | static struct sock_type inet_ops[] = { 31 | { 32 | .sock_ops = &inet_stream_ops, 33 | .net_ops = &tcp_ops, 34 | .type = SOCK_STREAM, 35 | .protocol = IPPROTO_TCP, 36 | } 37 | }; 38 | 39 | int inet_create(struct socket *sock, int protocol) 40 | { 41 | struct sock *sk; 42 | struct sock_type *skt = NULL; 43 | 44 | for (int i = 0; i < INET_OPS; i++) { 45 | if (inet_ops[i].type & sock->type) { 46 | skt = &inet_ops[i]; 47 | break; 48 | } 49 | } 50 | 51 | if (!skt) { 52 | print_err("Could not find socktype for socket\n"); 53 | return 1; 54 | } 55 | 56 | sock->ops = skt->sock_ops; 57 | 58 | sk = sk_alloc(skt->net_ops, protocol); 59 | sk->protocol = protocol; 60 | 61 | sock_init_data(sock, sk); 62 | 63 | return 0; 64 | } 65 | 66 | int inet_socket(struct socket *sock, int protocol) 67 | { 68 | return 0; 69 | } 70 | 71 | int inet_connect(struct socket *sock, struct sockaddr *addr, 72 | int addr_len, int flags) 73 | { 74 | return 0; 75 | } 76 | 77 | static int inet_stream_connect(struct socket *sock, const struct sockaddr *addr, 78 | int addr_len, int flags) 79 | { 80 | struct sock *sk = sock->sk; 81 | int rc = 0; 82 | 83 | if (addr_len < sizeof(addr->sa_family)) { 84 | return -EINVAL; 85 | } 86 | 87 | if (addr->sa_family == AF_UNSPEC) { 88 | sk->ops->disconnect(sk, flags); 89 | return -EAFNOSUPPORT; 90 | } 91 | 92 | switch (sock->state) { 93 | default: 94 | sk->err = -EINVAL; 95 | goto out; 96 | case SS_CONNECTED: 97 | sk->err = -EISCONN; 98 | goto out; 99 | case SS_CONNECTING: 100 | sk->err = -EALREADY; 101 | goto out; 102 | case SS_UNCONNECTED: 103 | sk->err = -EISCONN; 104 | if (sk->state != TCP_CLOSE) { 105 | goto out; 106 | } 107 | 108 | sk->ops->connect(sk, addr, addr_len, flags); 109 | sock->state = SS_CONNECTING; 110 | sk->err = -EINPROGRESS; 111 | 112 | if (sock->flags & O_NONBLOCK) { 113 | goto out; 114 | } 115 | 116 | pthread_mutex_lock(&sock->sleep.lock); 117 | while (sock->state == SS_CONNECTING && sk->err == -EINPROGRESS) { 118 | socket_release(sock); 119 | wait_sleep(&sock->sleep); 120 | socket_wr_acquire(sock); 121 | } 122 | pthread_mutex_unlock(&sock->sleep.lock); 123 | socket_wr_acquire(sock); 124 | 125 | switch (sk->err) { 126 | case -ETIMEDOUT: 127 | case -ECONNREFUSED: 128 | goto sock_error; 129 | } 130 | 131 | if (sk->err != 0) { 132 | goto out; 133 | } 134 | 135 | sock->state = SS_CONNECTED; 136 | break; 137 | } 138 | 139 | out: 140 | return sk->err; 141 | sock_error: 142 | rc = sk->err; 143 | return rc; 144 | } 145 | 146 | int inet_write(struct socket *sock, const void *buf, int len) 147 | { 148 | struct sock *sk = sock->sk; 149 | 150 | return sk->ops->write(sk, buf, len); 151 | } 152 | 153 | int inet_read(struct socket *sock, void *buf, int len) 154 | { 155 | struct sock *sk = sock->sk; 156 | 157 | return sk->ops->read(sk, buf, len); 158 | } 159 | 160 | struct sock *inet_lookup(struct sk_buff *skb, uint16_t sport, uint16_t dport) 161 | { 162 | struct socket *sock = socket_lookup(sport, dport); 163 | if (sock == NULL) return NULL; 164 | 165 | return sock->sk; 166 | } 167 | 168 | int inet_close(struct socket *sock) 169 | { 170 | if (!sock) { 171 | return 0; 172 | } 173 | 174 | struct sock *sk = sock->sk; 175 | 176 | return sock->sk->ops->close(sk); 177 | } 178 | 179 | int inet_free(struct socket *sock) 180 | { 181 | struct sock *sk = sock->sk; 182 | sock_free(sk); 183 | free(sock->sk); 184 | 185 | return 0; 186 | } 187 | 188 | int inet_abort(struct socket *sock) 189 | { 190 | struct sock *sk = sock->sk; 191 | 192 | if (sk) { 193 | sk->ops->abort(sk); 194 | } 195 | 196 | return 0; 197 | } 198 | 199 | int inet_getpeername(struct socket *sock, struct sockaddr *restrict address, 200 | socklen_t *address_len) 201 | { 202 | struct sock *sk = sock->sk; 203 | 204 | if (sk == NULL) { 205 | return -1; 206 | } 207 | 208 | struct sockaddr_in *res = (struct sockaddr_in *) address; 209 | res->sin_family = AF_INET; 210 | res->sin_port = htons(sk->dport); 211 | res->sin_addr.s_addr = htonl(sk->daddr); 212 | *address_len = sizeof(struct sockaddr_in); 213 | 214 | inet_dbg(sock, "geetpeername sin_family %d sin_port %d sin_addr %d addrlen %d", 215 | res->sin_family, ntohs(res->sin_port), ntohl(res->sin_addr.s_addr), *address_len); 216 | 217 | return 0; 218 | } 219 | int inet_getsockname(struct socket *sock, struct sockaddr *restrict address, 220 | socklen_t *address_len) 221 | { 222 | struct sock *sk = sock->sk; 223 | 224 | if (sk == NULL) { 225 | return -1; 226 | } 227 | 228 | struct sockaddr_in *res = (struct sockaddr_in *) address; 229 | res->sin_family = AF_INET; 230 | res->sin_port = htons(sk->sport); 231 | res->sin_addr.s_addr = htonl(sk->saddr); 232 | *address_len = sizeof(struct sockaddr_in); 233 | 234 | inet_dbg(sock, "getsockname sin_family %d sin_port %d sin_addr %d addrlen %d", 235 | res->sin_family, ntohs(res->sin_port), ntohl(res->sin_addr.s_addr), *address_len); 236 | 237 | return 0; 238 | } 239 | -------------------------------------------------------------------------------- /src/ip_input.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "skbuff.h" 3 | #include "arp.h" 4 | #include "ip.h" 5 | #include "icmpv4.h" 6 | #include "tcp.h" 7 | #include "utils.h" 8 | 9 | static void ip_init_pkt(struct iphdr *ih) 10 | { 11 | ih->saddr = ntohl(ih->saddr); 12 | ih->daddr = ntohl(ih->daddr); 13 | ih->len = ntohs(ih->len); 14 | ih->id = ntohs(ih->id); 15 | } 16 | 17 | int ip_rcv(struct sk_buff *skb) 18 | { 19 | struct iphdr *ih = ip_hdr(skb); 20 | uint16_t csum = -1; 21 | 22 | if (ih->version != IPV4) { 23 | print_err("Datagram version was not IPv4\n"); 24 | goto drop_pkt; 25 | } 26 | 27 | if (ih->ihl < 5) { 28 | print_err("IPv4 header length must be at least 5\n"); 29 | goto drop_pkt; 30 | } 31 | 32 | if (ih->ttl == 0) { 33 | //TODO: Send ICMP error 34 | print_err("Time to live of datagram reached 0\n"); 35 | goto drop_pkt; 36 | } 37 | 38 | csum = checksum(ih, ih->ihl * 4, 0); 39 | 40 | if (csum != 0) { 41 | // Invalid checksum, drop packet handling 42 | goto drop_pkt; 43 | } 44 | 45 | // TODO: Check fragmentation, possibly reassemble 46 | 47 | ip_init_pkt(ih); 48 | 49 | ip_dbg("in", ih); 50 | 51 | switch (ih->proto) { 52 | case ICMPV4: 53 | icmpv4_incoming(skb); 54 | return 0; 55 | case IP_TCP: 56 | tcp_in(skb); 57 | return 0; 58 | default: 59 | print_err("Unknown IP header proto\n"); 60 | goto drop_pkt; 61 | } 62 | 63 | drop_pkt: 64 | free_skb(skb); 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /src/ip_output.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "skbuff.h" 3 | #include "utils.h" 4 | #include "ip.h" 5 | #include "dst.h" 6 | #include "route.h" 7 | 8 | void ip_send_check(struct iphdr *ihdr) 9 | { 10 | uint32_t csum = checksum(ihdr, ihdr->ihl * 4, 0); 11 | ihdr->csum = csum; 12 | } 13 | 14 | int ip_output(struct sock *sk, struct sk_buff *skb) 15 | { 16 | struct rtentry *rt; 17 | struct iphdr *ihdr = ip_hdr(skb); 18 | 19 | rt = route_lookup(sk->daddr); 20 | 21 | if (!rt) { 22 | // TODO: dest_unreachable 23 | print_err("IP output route lookup fail\n"); 24 | return -1; 25 | } 26 | 27 | skb->dev = rt->dev; 28 | skb->rt = rt; 29 | 30 | skb_push(skb, IP_HDR_LEN); 31 | 32 | ihdr->version = IPV4; 33 | ihdr->ihl = 0x05; 34 | ihdr->tos = 0; 35 | ihdr->len = skb->len; 36 | ihdr->id = ihdr->id; 37 | ihdr->frag_offset = 0x4000; 38 | ihdr->ttl = 64; 39 | ihdr->proto = skb->protocol; 40 | ihdr->saddr = skb->dev->addr; 41 | ihdr->daddr = sk->daddr; 42 | ihdr->csum = 0; 43 | 44 | ip_dbg("out", ihdr); 45 | 46 | ihdr->len = htons(ihdr->len); 47 | ihdr->id = htons(ihdr->id); 48 | ihdr->daddr = htonl(ihdr->daddr); 49 | ihdr->saddr = htonl(ihdr->saddr); 50 | ihdr->csum = htons(ihdr->csum); 51 | ihdr->frag_offset = htons(ihdr->frag_offset); 52 | 53 | ip_send_check(ihdr); 54 | 55 | return dst_neigh_output(skb); 56 | } 57 | -------------------------------------------------------------------------------- /src/ipc.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "utils.h" 3 | #include "ipc.h" 4 | #include "socket.h" 5 | 6 | #define IPC_BUFLEN 8192 7 | 8 | static LIST_HEAD(sockets); 9 | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; 10 | static int socket_count = 0; 11 | 12 | static struct ipc_thread *ipc_alloc_thread(int sock) 13 | { 14 | struct ipc_thread *th = calloc(sizeof(struct ipc_thread), 1); 15 | list_init(&th->list); 16 | th->sock = sock; 17 | 18 | pthread_mutex_lock(&lock); 19 | list_add_tail(&th->list, &sockets); 20 | socket_count++; 21 | pthread_mutex_unlock(&lock); 22 | 23 | ipc_dbg("New IPC socket allocated", th); 24 | 25 | return th; 26 | } 27 | 28 | static void ipc_free_thread(int sock) 29 | { 30 | struct list_head *item, *tmp = NULL; 31 | struct ipc_thread *th = NULL; 32 | 33 | pthread_mutex_lock(&lock); 34 | 35 | list_for_each_safe(item, tmp, &sockets) { 36 | th = list_entry(item, struct ipc_thread, list); 37 | 38 | if (th->sock == sock) { 39 | list_del(&th->list); 40 | ipc_dbg("IPC socket deleted", th); 41 | 42 | close(th->sock); 43 | free(th); 44 | socket_count--; 45 | break; 46 | } 47 | 48 | } 49 | 50 | pthread_mutex_unlock(&lock); 51 | } 52 | 53 | static int ipc_try_send(int sockfd, const void *buf, size_t len) 54 | { 55 | return send(sockfd, buf, len, MSG_NOSIGNAL); 56 | } 57 | 58 | static int ipc_write_rc(int sockfd, pid_t pid, uint16_t type, int rc) 59 | { 60 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err); 61 | struct ipc_msg *response = alloca(resplen); 62 | 63 | if (response == NULL) { 64 | print_err("Could not allocate memory for IPC write response\n"); 65 | return -1; 66 | } 67 | 68 | response->type = type; 69 | response->pid = pid; 70 | 71 | struct ipc_err err; 72 | 73 | if (rc < 0) { 74 | err.err = -rc; 75 | err.rc = -1; 76 | } else { 77 | err.err = 0; 78 | err.rc = rc; 79 | } 80 | 81 | memcpy(response->data, &err, sizeof(struct ipc_err)); 82 | 83 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) { 84 | perror("Error on writing IPC write response"); 85 | } 86 | 87 | return 0; 88 | } 89 | 90 | static int ipc_read(int sockfd, struct ipc_msg *msg) 91 | { 92 | struct ipc_read *requested = (struct ipc_read *) msg->data; 93 | pid_t pid = msg->pid; 94 | int rlen = -1; 95 | char rbuf[requested->len]; 96 | memset(rbuf, 0, requested->len); 97 | 98 | rlen = _read(pid, requested->sockfd, rbuf, requested->len); 99 | 100 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + 101 | sizeof(struct ipc_read) + (rlen > 0 ? rlen : 0); 102 | struct ipc_msg *response = alloca(resplen); 103 | struct ipc_err *error = (struct ipc_err *) response->data; 104 | struct ipc_read *actual = (struct ipc_read *) error->data; 105 | 106 | if (response == NULL) { 107 | print_err("Could not allocate memory for IPC read response\n"); 108 | return -1; 109 | } 110 | 111 | response->type = IPC_READ; 112 | response->pid = pid; 113 | 114 | error->rc = rlen < 0 ? -1 : rlen; 115 | error->err = rlen < 0 ? -rlen : 0; 116 | 117 | actual->sockfd = requested->sockfd; 118 | actual->len = rlen; 119 | memcpy(actual->buf, rbuf, rlen > 0 ? rlen : 0); 120 | 121 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) { 122 | perror("Error on writing IPC read response"); 123 | } 124 | 125 | return 0; 126 | } 127 | 128 | static int ipc_write(int sockfd, struct ipc_msg *msg) 129 | { 130 | struct ipc_write *payload = (struct ipc_write *) msg->data; 131 | pid_t pid = msg->pid; 132 | int rc = -1; 133 | int head = IPC_BUFLEN - sizeof(struct ipc_write) - sizeof(struct ipc_msg); 134 | 135 | char buf[payload->len]; 136 | 137 | memset(buf, 0, payload->len); 138 | memcpy(buf, payload->buf, payload->len > head ? head : payload->len); 139 | 140 | // Guard for payload that is longer than initial IPC_BUFLEN 141 | if (payload->len > head) { 142 | int tail = payload->len - head; 143 | int res = read(sockfd, &buf[head], tail); 144 | 145 | if (res == -1) { 146 | perror("Read on IPC payload guard"); 147 | return -1; 148 | } else if (res != tail) { 149 | print_err("Hmm, we did not read exact payload amount in IPC write\n"); 150 | } 151 | } 152 | 153 | rc = _write(pid, payload->sockfd, buf, payload->len); 154 | 155 | return ipc_write_rc(sockfd, pid, IPC_WRITE, rc); 156 | } 157 | 158 | static int ipc_connect(int sockfd, struct ipc_msg *msg) 159 | { 160 | struct ipc_connect *payload = (struct ipc_connect *)msg->data; 161 | pid_t pid = msg->pid; 162 | int rc = -1; 163 | 164 | rc = _connect(pid, payload->sockfd, &payload->addr, payload->addrlen); 165 | 166 | return ipc_write_rc(sockfd, pid, IPC_CONNECT, rc); 167 | } 168 | 169 | static int ipc_socket(int sockfd, struct ipc_msg *msg) 170 | { 171 | struct ipc_socket *sock = (struct ipc_socket *)msg->data; 172 | pid_t pid = msg->pid; 173 | int rc = -1; 174 | 175 | rc = _socket(pid, sock->domain, sock->type, sock->protocol); 176 | 177 | return ipc_write_rc(sockfd, pid, IPC_SOCKET, rc); 178 | } 179 | 180 | static int ipc_close(int sockfd, struct ipc_msg *msg) 181 | { 182 | struct ipc_close *payload = (struct ipc_close *)msg->data; 183 | pid_t pid = msg->pid; 184 | int rc = -1; 185 | 186 | rc = _close(pid, payload->sockfd); 187 | 188 | rc = ipc_write_rc(sockfd, pid, IPC_CLOSE, rc); 189 | 190 | return rc; 191 | } 192 | 193 | static int ipc_poll(int sockfd, struct ipc_msg *msg) 194 | { 195 | struct ipc_poll *data = (struct ipc_poll *)msg->data; 196 | pid_t pid = msg->pid; 197 | int rc = -1; 198 | 199 | struct pollfd fds[data->nfds]; 200 | 201 | for (int i = 0; i < data->nfds; i++) { 202 | fds[i].fd = data->fds[i].fd; 203 | fds[i].events = data->fds[i].events; 204 | fds[i].revents = data->fds[i].revents; 205 | } 206 | 207 | rc = _poll(pid, fds, data->nfds, data->timeout); 208 | 209 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_pollfd) * data->nfds; 210 | struct ipc_msg *response = alloca(resplen); 211 | 212 | if (response == NULL) { 213 | print_err("Could not allocate memory for IPC write response\n"); 214 | return -1; 215 | } 216 | 217 | response->type = IPC_POLL; 218 | response->pid = pid; 219 | 220 | struct ipc_err err; 221 | 222 | if (rc < 0) { 223 | err.err = -rc; 224 | err.rc = -1; 225 | } else { 226 | err.err = 0; 227 | err.rc = rc; 228 | } 229 | 230 | memcpy(response->data, &err, sizeof(struct ipc_err)); 231 | 232 | struct ipc_pollfd *polled = (struct ipc_pollfd *) ((struct ipc_err *)response->data)->data; 233 | 234 | for (int i = 0; i < data->nfds; i++) { 235 | polled[i].fd = fds[i].fd; 236 | polled[i].events = fds[i].events; 237 | polled[i].revents = fds[i].revents; 238 | } 239 | 240 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) { 241 | perror("Error on writing IPC poll response"); 242 | } 243 | 244 | return 0; 245 | } 246 | 247 | static int ipc_fcntl(int sockfd, struct ipc_msg *msg) 248 | { 249 | struct ipc_fcntl *fc = (struct ipc_fcntl *)msg->data; 250 | pid_t pid = msg->pid; 251 | int rc = -1; 252 | 253 | switch (fc->cmd) { 254 | case F_GETFL: 255 | rc = _fcntl(pid, fc->sockfd, fc->cmd); 256 | break; 257 | case F_SETFL: 258 | rc = _fcntl(pid, fc->sockfd, fc->cmd, *(int *)fc->data); 259 | break; 260 | default: 261 | print_err("IPC Fcntl cmd not supported %d\n", fc->cmd); 262 | rc = -EINVAL; 263 | } 264 | 265 | return ipc_write_rc(sockfd, pid, IPC_FCNTL, rc); 266 | } 267 | 268 | static int ipc_getsockopt(int sockfd, struct ipc_msg *msg) 269 | { 270 | struct ipc_sockopt *opts = (struct ipc_sockopt *)msg->data; 271 | 272 | pid_t pid = msg->pid; 273 | int rc = -1; 274 | 275 | rc = _getsockopt(pid, opts->fd, opts->level, opts->optname, opts->optval, &opts->optlen); 276 | 277 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockopt) + opts->optlen; 278 | struct ipc_msg *response = alloca(resplen); 279 | 280 | if (response == NULL) { 281 | print_err("Could not allocate memory for IPC getsockopt response\n"); 282 | return -1; 283 | } 284 | 285 | response->type = IPC_GETSOCKOPT; 286 | response->pid = pid; 287 | 288 | struct ipc_err err; 289 | 290 | if (rc < 0) { 291 | err.err = -rc; 292 | err.rc = -1; 293 | } else { 294 | err.err = 0; 295 | err.rc = rc; 296 | } 297 | 298 | memcpy(response->data, &err, sizeof(struct ipc_err)); 299 | 300 | struct ipc_sockopt *optres = (struct ipc_sockopt *) ((struct ipc_err *)response->data)->data; 301 | 302 | optres->fd = opts->fd; 303 | optres->level = opts->level; 304 | optres->optname = opts->optname; 305 | optres->optlen = opts->optlen; 306 | memcpy(&optres->optval, opts->optval, opts->optlen); 307 | 308 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) { 309 | perror("Error on writing IPC getsockopt response"); 310 | } 311 | 312 | return rc; 313 | } 314 | 315 | static int ipc_getpeername(int sockfd, struct ipc_msg *msg) 316 | { 317 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data; 318 | 319 | pid_t pid = msg->pid; 320 | int rc = -1; 321 | 322 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname); 323 | struct ipc_msg *response = alloca(resplen); 324 | 325 | if (response == NULL) { 326 | print_err("Could not allocate memory for IPC getpeername response\n"); 327 | return -1; 328 | } 329 | 330 | response->type = IPC_GETPEERNAME; 331 | response->pid = pid; 332 | 333 | struct ipc_sockname *nameres = (struct ipc_sockname *) ((struct ipc_err *)response->data)->data; 334 | rc = _getpeername(pid, name->socket, (struct sockaddr *)nameres->sa_data, &nameres->address_len); 335 | 336 | struct ipc_err err; 337 | 338 | if (rc < 0) { 339 | err.err = -rc; 340 | err.rc = -1; 341 | } else { 342 | err.err = 0; 343 | err.rc = rc; 344 | } 345 | 346 | memcpy(response->data, &err, sizeof(struct ipc_err)); 347 | 348 | nameres->socket = name->socket; 349 | 350 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) { 351 | perror("Error on writing IPC getpeername response"); 352 | } 353 | 354 | return rc; 355 | } 356 | 357 | static int ipc_getsockname(int sockfd, struct ipc_msg *msg) 358 | { 359 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data; 360 | 361 | pid_t pid = msg->pid; 362 | int rc = -1; 363 | 364 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname); 365 | struct ipc_msg *response = alloca(resplen); 366 | 367 | if (response == NULL) { 368 | print_err("Could not allocate memory for IPC getsockname response\n"); 369 | return -1; 370 | } 371 | 372 | response->type = IPC_GETSOCKNAME; 373 | response->pid = pid; 374 | 375 | struct ipc_sockname *nameres = (struct ipc_sockname *) ((struct ipc_err *)response->data)->data; 376 | rc = _getsockname(pid, name->socket, (struct sockaddr *)nameres->sa_data, &nameres->address_len); 377 | 378 | struct ipc_err err; 379 | 380 | if (rc < 0) { 381 | err.err = -rc; 382 | err.rc = -1; 383 | } else { 384 | err.err = 0; 385 | err.rc = rc; 386 | } 387 | 388 | memcpy(response->data, &err, sizeof(struct ipc_err)); 389 | 390 | nameres->socket = name->socket; 391 | 392 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) { 393 | perror("Error on writing IPC getsockname response"); 394 | } 395 | 396 | return rc; 397 | } 398 | 399 | static int demux_ipc_socket_call(int sockfd, char *cmdbuf, int blen) 400 | { 401 | struct ipc_msg *msg = (struct ipc_msg *)cmdbuf; 402 | 403 | switch (msg->type) { 404 | case IPC_SOCKET: 405 | return ipc_socket(sockfd, msg); 406 | break; 407 | case IPC_CONNECT: 408 | return ipc_connect(sockfd, msg); 409 | break; 410 | case IPC_WRITE: 411 | return ipc_write(sockfd, msg); 412 | break; 413 | case IPC_READ: 414 | return ipc_read(sockfd, msg); 415 | break; 416 | case IPC_CLOSE: 417 | return ipc_close(sockfd, msg); 418 | break; 419 | case IPC_POLL: 420 | return ipc_poll(sockfd, msg); 421 | break; 422 | case IPC_FCNTL: 423 | return ipc_fcntl(sockfd, msg); 424 | break; 425 | case IPC_GETSOCKOPT: 426 | return ipc_getsockopt(sockfd, msg); 427 | case IPC_GETPEERNAME: 428 | return ipc_getpeername(sockfd, msg); 429 | case IPC_GETSOCKNAME: 430 | return ipc_getsockname(sockfd, msg); 431 | default: 432 | print_err("No such IPC type %d\n", msg->type); 433 | break; 434 | }; 435 | 436 | return 0; 437 | } 438 | 439 | void *socket_ipc_open(void *args) { 440 | int blen = IPC_BUFLEN; 441 | char buf[blen]; 442 | int sockfd = *(int *)args; 443 | int rc = -1; 444 | 445 | while ((rc = read(sockfd, buf, blen)) > 0) { 446 | rc = demux_ipc_socket_call(sockfd, buf, blen); 447 | 448 | if (rc == -1) { 449 | print_err("Error on demuxing IPC socket call\n"); 450 | close(sockfd); 451 | return NULL; 452 | }; 453 | } 454 | 455 | ipc_free_thread(sockfd); 456 | 457 | if (rc == -1) { 458 | perror("socket ipc read"); 459 | } 460 | 461 | return NULL; 462 | } 463 | 464 | void *start_ipc_listener() 465 | { 466 | int fd, rc, datasock; 467 | struct sockaddr_un un; 468 | char *sockname = "/tmp/lvlip.socket"; 469 | 470 | unlink(sockname); 471 | 472 | if (strnlen(sockname, sizeof(un.sun_path)) == sizeof(un.sun_path)) { 473 | // Path is too long 474 | print_err("Path for UNIX socket is too long\n"); 475 | exit(-1); 476 | } 477 | 478 | if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { 479 | perror("IPC listener UNIX socket"); 480 | exit(EXIT_FAILURE); 481 | } 482 | 483 | memset(&un, 0, sizeof(struct sockaddr_un)); 484 | un.sun_family = AF_UNIX; 485 | strncpy(un.sun_path, sockname, sizeof(un.sun_path) - 1); 486 | 487 | rc = bind(fd, (const struct sockaddr *) &un, sizeof(struct sockaddr_un)); 488 | 489 | if (rc == -1) { 490 | perror("IPC bind"); 491 | exit(EXIT_FAILURE); 492 | } 493 | 494 | rc = listen(fd, 20); 495 | 496 | if (rc == -1) { 497 | perror("IPC listen"); 498 | exit(EXIT_FAILURE); 499 | } 500 | 501 | if (chmod(sockname, S_IRUSR | S_IWUSR | S_IXUSR | 502 | S_IRGRP | S_IWGRP | S_IXGRP | 503 | S_IROTH | S_IWOTH | S_IXOTH) == -1) { 504 | perror("Chmod on lvl-ip IPC UNIX socket failed"); 505 | exit(EXIT_FAILURE); 506 | } 507 | 508 | for (;;) { 509 | datasock = accept(fd, NULL, NULL); 510 | if (datasock == -1) { 511 | perror("IPC accept"); 512 | exit(EXIT_FAILURE); 513 | } 514 | 515 | struct ipc_thread *th = ipc_alloc_thread(datasock); 516 | 517 | if (pthread_create(&th->id, NULL, &socket_ipc_open, &th->sock) != 0) { 518 | print_err("Error on socket thread creation\n"); 519 | exit(1); 520 | }; 521 | } 522 | 523 | close(fd); 524 | 525 | unlink(sockname); 526 | 527 | return NULL; 528 | } 529 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "basic.h" 3 | #include "cli.h" 4 | #include "tuntap_if.h" 5 | #include "utils.h" 6 | #include "ipc.h" 7 | #include "timer.h" 8 | #include "route.h" 9 | #include "ethernet.h" 10 | #include "arp.h" 11 | #include "tcp.h" 12 | #include "netdev.h" 13 | #include "ip.h" 14 | 15 | #define MAX_CMD_LENGTH 6 16 | 17 | typedef void (*sighandler_t)(int); 18 | 19 | #define THREAD_CORE 0 20 | #define THREAD_TIMERS 1 21 | #define THREAD_IPC 2 22 | #define THREAD_SIGNAL 3 23 | static pthread_t threads[4]; 24 | 25 | int running = 1; 26 | sigset_t mask; 27 | 28 | static void create_thread(pthread_t id, void *(*func) (void *)) 29 | { 30 | if (pthread_create(&threads[id], NULL, 31 | func, NULL) != 0) { 32 | print_err("Could not create core thread\n"); 33 | } 34 | } 35 | 36 | static void *stop_stack_handler(void *arg) 37 | { 38 | int err, signo; 39 | 40 | for (;;) { 41 | err = sigwait(&mask, &signo); 42 | if (err != 0) { 43 | print_err("Sigwait failed: %d\n", err); 44 | } 45 | 46 | switch (signo) { 47 | case SIGINT: 48 | case SIGQUIT: 49 | running = 0; 50 | pthread_cancel(threads[THREAD_IPC]); 51 | pthread_cancel(threads[THREAD_CORE]); 52 | pthread_cancel(threads[THREAD_TIMERS]); 53 | return 0; 54 | default: 55 | printf("Unexpected signal %d\n", signo); 56 | } 57 | } 58 | } 59 | 60 | static void init_signals() 61 | { 62 | int err; 63 | 64 | sigemptyset(&mask); 65 | sigaddset(&mask, SIGINT); 66 | sigaddset(&mask, SIGQUIT); 67 | 68 | if ((err = pthread_sigmask(SIG_BLOCK, &mask, NULL)) != 0) { 69 | print_err("SIG_BLOCK error\n"); 70 | exit(1); 71 | } 72 | } 73 | 74 | static void init_stack() 75 | { 76 | tun_init(); 77 | netdev_init(); 78 | route_init(); 79 | arp_init(); 80 | tcp_init(); 81 | } 82 | 83 | static void run_threads() 84 | { 85 | create_thread(THREAD_CORE, netdev_rx_loop); 86 | create_thread(THREAD_TIMERS, timers_start); 87 | create_thread(THREAD_IPC, start_ipc_listener); 88 | create_thread(THREAD_SIGNAL, stop_stack_handler); 89 | } 90 | 91 | static void wait_for_threads() 92 | { 93 | for (int i = 0; i < 3; i++) { 94 | if (pthread_join(threads[i], NULL) != 0) { 95 | print_err("Error when joining threads\n"); 96 | exit(1); 97 | } 98 | } 99 | } 100 | 101 | void free_stack() 102 | { 103 | abort_sockets(); 104 | free_arp(); 105 | free_routes(); 106 | free_netdev(); 107 | free_tun(); 108 | } 109 | 110 | void init_security() 111 | { 112 | if (prctl(PR_CAPBSET_DROP, CAP_NET_ADMIN) == -1) { 113 | perror("Error on network admin capability drop"); 114 | exit(1); 115 | } 116 | 117 | if (prctl(PR_CAPBSET_DROP, CAP_SETPCAP) == -1) { 118 | perror("Error on capability set drop"); 119 | exit(1); 120 | } 121 | } 122 | 123 | int main(int argc, char** argv) 124 | { 125 | parse_cli(argc, argv); 126 | 127 | init_signals(); 128 | init_stack(); 129 | init_security(); 130 | 131 | run_threads(); 132 | wait_for_threads(); 133 | 134 | free_stack(); 135 | } 136 | -------------------------------------------------------------------------------- /src/netdev.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "utils.h" 3 | #include "skbuff.h" 4 | #include "netdev.h" 5 | #include "ethernet.h" 6 | #include "arp.h" 7 | #include "ip.h" 8 | #include "tuntap_if.h" 9 | #include "basic.h" 10 | 11 | struct netdev *loop; 12 | struct netdev *netdev; 13 | extern int running; 14 | 15 | static struct netdev *netdev_alloc(char *addr, char *hwaddr, uint32_t mtu) 16 | { 17 | struct netdev *dev = malloc(sizeof(struct netdev)); 18 | 19 | dev->addr = ip_parse(addr); 20 | 21 | sscanf(hwaddr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &dev->hwaddr[0], 22 | &dev->hwaddr[1], 23 | &dev->hwaddr[2], 24 | &dev->hwaddr[3], 25 | &dev->hwaddr[4], 26 | &dev->hwaddr[5]); 27 | 28 | dev->addr_len = 6; 29 | dev->mtu = mtu; 30 | 31 | return dev; 32 | } 33 | 34 | void netdev_init(char *addr, char *hwaddr) 35 | { 36 | loop = netdev_alloc("127.0.0.1", "00:00:00:00:00:00", 1500); 37 | netdev = netdev_alloc("10.0.0.4", "00:0c:29:6d:50:25", 1500); 38 | } 39 | 40 | int netdev_transmit(struct sk_buff *skb, uint8_t *dst_hw, uint16_t ethertype) 41 | { 42 | struct netdev *dev; 43 | struct eth_hdr *hdr; 44 | int ret = 0; 45 | 46 | dev = skb->dev; 47 | 48 | skb_push(skb, ETH_HDR_LEN); 49 | 50 | hdr = (struct eth_hdr *)skb->data; 51 | 52 | memcpy(hdr->dmac, dst_hw, dev->addr_len); 53 | memcpy(hdr->smac, dev->hwaddr, dev->addr_len); 54 | 55 | hdr->ethertype = htons(ethertype); 56 | eth_dbg("out", hdr); 57 | 58 | ret = tun_write((char *)skb->data, skb->len); 59 | 60 | return ret; 61 | } 62 | 63 | static int netdev_receive(struct sk_buff *skb) 64 | { 65 | struct eth_hdr *hdr = eth_hdr(skb); 66 | 67 | eth_dbg("in", hdr); 68 | 69 | switch (hdr->ethertype) { 70 | case ETH_P_ARP: 71 | arp_rcv(skb); 72 | break; 73 | case ETH_P_IP: 74 | ip_rcv(skb); 75 | break; 76 | case ETH_P_IPV6: 77 | default: 78 | printf("Unsupported ethertype %x\n", hdr->ethertype); 79 | free_skb(skb); 80 | break; 81 | } 82 | 83 | return 0; 84 | } 85 | 86 | void *netdev_rx_loop() 87 | { 88 | while (running) { 89 | struct sk_buff *skb = alloc_skb(BUFLEN); 90 | 91 | if (tun_read((char *)skb->data, BUFLEN) < 0) { 92 | perror("ERR: Read from tun_fd"); 93 | free_skb(skb); 94 | return NULL; 95 | } 96 | 97 | netdev_receive(skb); 98 | } 99 | 100 | return NULL; 101 | } 102 | 103 | struct netdev* netdev_get(uint32_t sip) 104 | { 105 | if (netdev->addr == sip) { 106 | return netdev; 107 | } else { 108 | return NULL; 109 | } 110 | } 111 | 112 | void free_netdev() 113 | { 114 | free(loop); 115 | free(netdev); 116 | } 117 | -------------------------------------------------------------------------------- /src/route.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "route.h" 3 | #include "dst.h" 4 | #include "netdev.h" 5 | #include "list.h" 6 | #include "ip.h" 7 | 8 | static LIST_HEAD(routes); 9 | 10 | extern struct netdev *netdev; 11 | extern struct netdev *loop; 12 | 13 | extern char *tapaddr; 14 | extern char *taproute; 15 | 16 | static struct rtentry *route_alloc(uint32_t dst, uint32_t gateway, uint32_t netmask, 17 | uint8_t flags, uint32_t metric, struct netdev *dev) 18 | { 19 | struct rtentry *rt = malloc(sizeof(struct rtentry)); 20 | list_init(&rt->list); 21 | 22 | rt->dst = dst; 23 | rt->gateway = gateway; 24 | rt->netmask = netmask; 25 | rt->flags = flags; 26 | rt->metric = metric; 27 | rt->dev = dev; 28 | return rt; 29 | } 30 | 31 | void route_add(uint32_t dst, uint32_t gateway, uint32_t netmask, uint8_t flags, 32 | uint32_t metric, struct netdev *dev) 33 | { 34 | struct rtentry *rt = route_alloc(dst, gateway, netmask, flags, metric, dev); 35 | 36 | list_add_tail(&rt->list, &routes); 37 | } 38 | 39 | void route_init() 40 | { 41 | route_add(loop->addr, 0, 0xff000000, RT_LOOPBACK, 0, loop); 42 | route_add(netdev->addr, 0, 0xffffff00, RT_HOST, 0, netdev); 43 | route_add(0, ip_parse(tapaddr), 0, RT_GATEWAY, 0, netdev); 44 | } 45 | 46 | struct rtentry *route_lookup(uint32_t daddr) 47 | { 48 | struct list_head *item; 49 | struct rtentry *rt = NULL; 50 | 51 | list_for_each(item, &routes) { 52 | rt = list_entry(item, struct rtentry, list); 53 | if ((daddr & rt->netmask) == (rt->dst & rt->netmask)) break; 54 | // If no matches, we default to to default gw (last item) 55 | } 56 | 57 | return rt; 58 | } 59 | 60 | void free_routes() 61 | { 62 | struct list_head *item, *tmp; 63 | struct rtentry *rt; 64 | 65 | list_for_each_safe(item, tmp, &routes) { 66 | rt = list_entry(item, struct rtentry, list); 67 | list_del(item); 68 | 69 | free(rt); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/skbuff.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "skbuff.h" 3 | #include "list.h" 4 | 5 | struct sk_buff *alloc_skb(unsigned int size) 6 | { 7 | struct sk_buff *skb = malloc(sizeof(struct sk_buff)); 8 | 9 | memset(skb, 0, sizeof(struct sk_buff)); 10 | skb->data = malloc(size); 11 | memset(skb->data, 0, size); 12 | 13 | skb->refcnt = 0; 14 | skb->head = skb->data; 15 | skb->end = skb->data + size; 16 | 17 | list_init(&skb->list); 18 | 19 | return skb; 20 | } 21 | 22 | void free_skb(struct sk_buff *skb) 23 | { 24 | if (skb->refcnt < 1) { 25 | free(skb->head); 26 | free(skb); 27 | } 28 | } 29 | 30 | void *skb_reserve(struct sk_buff *skb, unsigned int len) 31 | { 32 | skb->data += len; 33 | 34 | return skb->data; 35 | } 36 | 37 | uint8_t *skb_push(struct sk_buff *skb, unsigned int len) 38 | { 39 | skb->data -= len; 40 | skb->len += len; 41 | 42 | return skb->data; 43 | } 44 | 45 | uint8_t *skb_head(struct sk_buff *skb) 46 | { 47 | return skb->head; 48 | } 49 | 50 | void skb_reset_header(struct sk_buff *skb) 51 | { 52 | skb->data = skb->end - skb->dlen; 53 | skb->len = skb->dlen; 54 | } 55 | -------------------------------------------------------------------------------- /src/sock.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "sock.h" 3 | #include "socket.h" 4 | 5 | struct sock *sk_alloc(struct net_ops *ops, int protocol) 6 | { 7 | struct sock *sk; 8 | 9 | sk = ops->alloc_sock(protocol); 10 | 11 | sk->ops = ops; 12 | 13 | return sk; 14 | } 15 | 16 | void sock_init_data(struct socket *sock, struct sock *sk) 17 | { 18 | sock->sk = sk; 19 | sk->sock = sock; 20 | 21 | wait_init(&sk->recv_wait); 22 | skb_queue_init(&sk->receive_queue); 23 | skb_queue_init(&sk->write_queue); 24 | 25 | sk->poll_events = 0; 26 | 27 | sk->ops->init(sk); 28 | } 29 | 30 | void sock_free(struct sock *sk) 31 | { 32 | skb_queue_free(&sk->receive_queue); 33 | skb_queue_free(&sk->write_queue); 34 | } 35 | 36 | void sock_connected(struct sock *sk) 37 | { 38 | struct socket *sock = sk->sock; 39 | 40 | sock->state = SS_CONNECTED; 41 | sk->err = 0; 42 | sk->poll_events = (POLLOUT | POLLWRNORM | POLLWRBAND); 43 | 44 | wait_wakeup(&sock->sleep); 45 | } 46 | -------------------------------------------------------------------------------- /src/socket.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "utils.h" 3 | #include "socket.h" 4 | #include "inet.h" 5 | #include "wait.h" 6 | #include "timer.h" 7 | 8 | static int sock_amount = 0; 9 | static LIST_HEAD(sockets); 10 | static pthread_rwlock_t slock = PTHREAD_RWLOCK_INITIALIZER; 11 | 12 | extern struct net_family inet; 13 | 14 | static struct net_family *families[128] = { 15 | [AF_INET] = &inet, 16 | }; 17 | 18 | static struct socket *alloc_socket(pid_t pid) 19 | { 20 | // TODO: Figure out a way to not shadow kernel file descriptors. 21 | // Now, we'll just expect the fds for a process to never exceed this. 22 | static int fd = 4097; 23 | struct socket *sock = malloc(sizeof (struct socket)); 24 | list_init(&sock->list); 25 | 26 | sock->pid = pid; 27 | sock->refcnt = 1; 28 | 29 | pthread_rwlock_wrlock(&slock); 30 | sock->fd = fd++; 31 | pthread_rwlock_unlock(&slock); 32 | 33 | sock->state = SS_UNCONNECTED; 34 | sock->ops = NULL; 35 | sock->flags = O_RDWR; 36 | wait_init(&sock->sleep); 37 | pthread_rwlock_init(&sock->lock, NULL); 38 | 39 | return sock; 40 | } 41 | 42 | int socket_rd_acquire(struct socket *sock) 43 | { 44 | int rc = pthread_rwlock_wrlock(&sock->lock); 45 | sock->refcnt++; 46 | return rc; 47 | } 48 | 49 | int socket_wr_acquire(struct socket *sock) 50 | { 51 | int rc = pthread_rwlock_wrlock(&sock->lock); 52 | sock->refcnt++; 53 | return rc; 54 | } 55 | 56 | int socket_release(struct socket *sock) 57 | { 58 | int rc = 0; 59 | sock->refcnt--; 60 | 61 | if (sock->refcnt == 0) { 62 | rc = pthread_rwlock_unlock(&sock->lock); 63 | free(sock); 64 | } else { 65 | rc = pthread_rwlock_unlock(&sock->lock); 66 | } 67 | 68 | return rc; 69 | } 70 | 71 | int socket_free(struct socket *sock) 72 | { 73 | pthread_rwlock_wrlock(&slock); 74 | socket_wr_acquire(sock); 75 | list_del(&sock->list); 76 | sock_amount--; 77 | pthread_rwlock_unlock(&slock); 78 | 79 | if (sock->ops) { 80 | sock->ops->free(sock); 81 | } 82 | 83 | wait_free(&sock->sleep); 84 | socket_release(sock); 85 | 86 | return 0; 87 | } 88 | 89 | static void *socket_garbage_collect(void *arg) 90 | { 91 | struct socket *sock = socket_find((struct socket *)arg); 92 | 93 | if (sock == NULL) return NULL; 94 | 95 | socket_free(sock); 96 | 97 | return NULL; 98 | } 99 | 100 | int socket_delete(struct socket *sock) 101 | { 102 | int rc = 0; 103 | 104 | if (sock->state == SS_DISCONNECTING) goto out; 105 | 106 | sock->state = SS_DISCONNECTING; 107 | timer_oneshot(10000, &socket_garbage_collect, sock); 108 | 109 | out: 110 | return rc; 111 | } 112 | 113 | void abort_sockets() { 114 | struct list_head *item, *tmp; 115 | struct socket *sock; 116 | 117 | list_for_each_safe(item, tmp, &sockets) { 118 | sock = list_entry(item, struct socket, list); 119 | sock->ops->abort(sock); 120 | } 121 | } 122 | 123 | static struct socket *get_socket(pid_t pid, uint32_t fd) 124 | { 125 | struct list_head *item; 126 | struct socket *sock = NULL; 127 | 128 | pthread_rwlock_rdlock(&slock); 129 | list_for_each(item, &sockets) { 130 | sock = list_entry(item, struct socket, list); 131 | if (sock->pid == pid && sock->fd == fd) goto out; 132 | } 133 | 134 | sock = NULL; 135 | 136 | out: 137 | pthread_rwlock_unlock(&slock); 138 | return sock; 139 | } 140 | 141 | struct socket *socket_lookup(uint16_t remoteport, uint16_t localport) 142 | { 143 | struct list_head *item; 144 | struct socket *sock = NULL; 145 | struct sock *sk = NULL; 146 | 147 | pthread_rwlock_rdlock(&slock); 148 | 149 | list_for_each(item, &sockets) { 150 | sock = list_entry(item, struct socket, list); 151 | 152 | if (sock == NULL || sock->sk == NULL) continue; 153 | sk = sock->sk; 154 | 155 | if (sk->sport == localport && sk->dport == remoteport) { 156 | goto found; 157 | } 158 | } 159 | 160 | sock = NULL; 161 | found: 162 | pthread_rwlock_unlock(&slock); 163 | return sock; 164 | } 165 | 166 | struct socket *socket_find(struct socket *find) 167 | { 168 | struct list_head *item; 169 | struct socket *sock = NULL; 170 | 171 | pthread_rwlock_rdlock(&slock); 172 | list_for_each(item, &sockets) { 173 | sock = list_entry(item, struct socket, list); 174 | if (sock == find) goto out; 175 | } 176 | 177 | sock = NULL; 178 | 179 | out: 180 | pthread_rwlock_unlock(&slock); 181 | return sock; 182 | } 183 | 184 | #ifdef DEBUG_SOCKET 185 | void socket_debug() 186 | { 187 | struct list_head *item; 188 | struct socket *sock = NULL; 189 | 190 | pthread_rwlock_rdlock(&slock); 191 | 192 | list_for_each(item, &sockets) { 193 | sock = list_entry(item, struct socket, list); 194 | socket_rd_acquire(sock); 195 | socket_dbg(sock, ""); 196 | socket_release(sock); 197 | } 198 | 199 | pthread_rwlock_unlock(&slock); 200 | } 201 | #else 202 | void socket_debug() 203 | { 204 | return; 205 | } 206 | #endif 207 | 208 | int _socket(pid_t pid, int domain, int type, int protocol) 209 | { 210 | struct socket *sock; 211 | struct net_family *family; 212 | 213 | if ((sock = alloc_socket(pid)) == NULL) { 214 | print_err("Could not alloc socket\n"); 215 | return -1; 216 | } 217 | 218 | sock->type = type; 219 | 220 | family = families[domain]; 221 | 222 | if (!family) { 223 | print_err("Domain not supported: %d\n", domain); 224 | goto abort_socket; 225 | } 226 | 227 | if (family->create(sock, protocol) != 0) { 228 | print_err("Creating domain failed\n"); 229 | goto abort_socket; 230 | } 231 | 232 | pthread_rwlock_wrlock(&slock); 233 | 234 | list_add_tail(&sock->list, &sockets); 235 | sock_amount++; 236 | 237 | socket_rd_acquire(sock); 238 | pthread_rwlock_unlock(&slock); 239 | int rc = sock->fd; 240 | socket_release(sock); 241 | 242 | return rc; 243 | 244 | abort_socket: 245 | socket_free(sock); 246 | return -1; 247 | } 248 | 249 | int _connect(pid_t pid, int sockfd, const struct sockaddr *addr, socklen_t addrlen) 250 | { 251 | struct socket *sock; 252 | 253 | if ((sock = get_socket(pid, sockfd)) == NULL) { 254 | print_err("Connect: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid); 255 | return -EBADF; 256 | } 257 | 258 | socket_wr_acquire(sock); 259 | 260 | int rc = sock->ops->connect(sock, addr, addrlen, 0); 261 | switch (rc) { 262 | case -EINVAL: 263 | case -EAFNOSUPPORT: 264 | case -ECONNREFUSED: 265 | case -ETIMEDOUT: 266 | socket_release(sock); 267 | socket_free(sock); 268 | break; 269 | default: 270 | socket_release(sock); 271 | } 272 | 273 | return rc; 274 | } 275 | 276 | int _write(pid_t pid, int sockfd, const void *buf, const unsigned int count) 277 | { 278 | struct socket *sock; 279 | 280 | if ((sock = get_socket(pid, sockfd)) == NULL) { 281 | print_err("Write: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid); 282 | return -EBADF; 283 | } 284 | 285 | socket_wr_acquire(sock); 286 | int rc = sock->ops->write(sock, buf, count); 287 | socket_release(sock); 288 | 289 | return rc; 290 | } 291 | 292 | int _read(pid_t pid, int sockfd, void *buf, const unsigned int count) 293 | { 294 | struct socket *sock; 295 | 296 | if ((sock = get_socket(pid, sockfd)) == NULL) { 297 | print_err("Read: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid); 298 | return -EBADF; 299 | } 300 | 301 | socket_wr_acquire(sock); 302 | int rc = sock->ops->read(sock, buf, count); 303 | socket_release(sock); 304 | 305 | return rc; 306 | } 307 | 308 | int _close(pid_t pid, int sockfd) 309 | { 310 | struct socket *sock; 311 | 312 | if ((sock = get_socket(pid, sockfd)) == NULL) { 313 | print_err("Close: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid); 314 | return -EBADF; 315 | } 316 | 317 | 318 | socket_wr_acquire(sock); 319 | int rc = sock->ops->close(sock); 320 | socket_release(sock); 321 | 322 | return rc; 323 | } 324 | 325 | int _poll(pid_t pid, struct pollfd fds[], nfds_t nfds, int timeout) 326 | { 327 | for (;;) { 328 | int polled = 0; 329 | 330 | for (int i = 0; i < nfds; i++) { 331 | struct socket *sock; 332 | struct pollfd *poll = &fds[i]; 333 | if ((sock = get_socket(pid, poll->fd)) == NULL) { 334 | print_err("Poll: could not find socket (fd %u) for connection (pid %d)\n", poll->fd, pid); 335 | return -EBADF; 336 | } 337 | 338 | socket_rd_acquire(sock); 339 | poll->revents = sock->sk->poll_events & (poll->events | POLLHUP | POLLERR | POLLNVAL); 340 | if (poll->revents > 0) { 341 | polled++; 342 | } 343 | socket_release(sock); 344 | } 345 | 346 | if (polled > 0 || timeout == 0) { 347 | return polled; 348 | } else { 349 | if (timeout > 0) { 350 | if (timeout > 10) { 351 | timeout -= 10; 352 | } else { 353 | timeout = 0; 354 | } 355 | } 356 | usleep(1000 * 10); 357 | } 358 | } 359 | 360 | return -EAGAIN; 361 | } 362 | 363 | int _fcntl(pid_t pid, int fildes, int cmd, ...) 364 | { 365 | struct socket *sock; 366 | 367 | if ((sock = get_socket(pid, fildes)) == NULL) { 368 | print_err("Fcntl: could not find socket (fd %u) for connection (pid %d)\n", fildes, pid); 369 | return -EBADF; 370 | } 371 | 372 | socket_wr_acquire(sock); 373 | va_list ap; 374 | int rc = 0; 375 | 376 | switch (cmd) { 377 | case F_GETFL: 378 | rc = sock->flags; 379 | goto out; 380 | case F_SETFL: 381 | va_start(ap, cmd); 382 | sock->flags = va_arg(ap, int); 383 | va_end(ap); 384 | rc = 0; 385 | goto out; 386 | default: 387 | rc = -1; 388 | goto out; 389 | } 390 | 391 | rc = -1; 392 | 393 | out: 394 | socket_release(sock); 395 | return rc; 396 | } 397 | 398 | int _getsockopt(pid_t pid, int fd, int level, int optname, void *optval, socklen_t *optlen) 399 | { 400 | struct socket *sock; 401 | 402 | if ((sock = get_socket(pid, fd)) == NULL) { 403 | print_err("Getsockopt: could not find socket (fd %u) for connection (pid %d)\n", fd, pid); 404 | return -EBADF; 405 | } 406 | 407 | int rc = 0; 408 | 409 | socket_rd_acquire(sock); 410 | switch (level) { 411 | case SOL_SOCKET: 412 | switch (optname) { 413 | case SO_ERROR: 414 | *optlen = 4; 415 | *(int *)optval = sock->sk->err; 416 | rc = 0; 417 | break; 418 | default: 419 | print_err("Getsockopt unsupported optname %d\n", optname); 420 | rc = -ENOPROTOOPT; 421 | break; 422 | } 423 | 424 | break; 425 | default: 426 | print_err("Getsockopt: Unsupported level %d\n", level); 427 | rc = -EINVAL; 428 | break; 429 | } 430 | 431 | socket_release(sock); 432 | 433 | return rc; 434 | } 435 | 436 | int _getpeername(pid_t pid, int socket, struct sockaddr *restrict address, 437 | socklen_t *restrict address_len) 438 | { 439 | struct socket *sock; 440 | 441 | if ((sock = get_socket(pid, socket)) == NULL) { 442 | print_err("Getpeername: could not find socket (fd %u) for connection (pid %d)\n", socket, pid); 443 | return -EBADF; 444 | } 445 | 446 | socket_rd_acquire(sock); 447 | int rc = sock->ops->getpeername(sock, address, address_len); 448 | socket_release(sock); 449 | 450 | return rc; 451 | } 452 | 453 | int _getsockname(pid_t pid, int socket, struct sockaddr *restrict address, 454 | socklen_t *restrict address_len) 455 | { 456 | struct socket *sock; 457 | 458 | if ((sock = get_socket(pid, socket)) == NULL) { 459 | print_err("Getsockname: could not find socket (fd %u) for connection (pid %d)\n", socket, pid); 460 | return -EBADF; 461 | } 462 | 463 | socket_rd_acquire(sock); 464 | int rc = sock->ops->getsockname(sock, address, address_len); 465 | socket_release(sock); 466 | 467 | return rc; 468 | } 469 | -------------------------------------------------------------------------------- /src/tcp.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "inet.h" 3 | #include "tcp.h" 4 | #include "ip.h" 5 | #include "sock.h" 6 | #include "utils.h" 7 | #include "timer.h" 8 | #include "wait.h" 9 | 10 | #ifdef DEBUG_TCP 11 | const char *tcp_dbg_states[] = { 12 | "TCP_LISTEN", "TCP_SYNSENT", "TCP_SYN_RECEIVED", "TCP_ESTABLISHED", "TCP_FIN_WAIT_1", 13 | "TCP_FIN_WAIT_2", "TCP_CLOSE", "TCP_CLOSE_WAIT", "TCP_CLOSING", "TCP_LAST_ACK", "TCP_TIME_WAIT", 14 | }; 15 | #endif 16 | 17 | static pthread_rwlock_t tcplock = PTHREAD_RWLOCK_INITIALIZER; 18 | 19 | struct net_ops tcp_ops = { 20 | .alloc_sock = &tcp_alloc_sock, 21 | .init = &tcp_v4_init_sock, 22 | .connect = &tcp_v4_connect, 23 | .disconnect = &tcp_disconnect, 24 | .write = &tcp_write, 25 | .read = &tcp_read, 26 | .recv_notify = &tcp_recv_notify, 27 | .close = &tcp_close, 28 | .abort = &tcp_abort, 29 | }; 30 | 31 | void tcp_init() 32 | { 33 | 34 | } 35 | 36 | static void tcp_init_segment(struct tcphdr *th, struct iphdr *ih, struct sk_buff *skb) 37 | { 38 | th->sport = ntohs(th->sport); 39 | th->dport = ntohs(th->dport); 40 | th->seq = ntohl(th->seq); 41 | th->ack_seq = ntohl(th->ack_seq); 42 | th->win = ntohs(th->win); 43 | th->csum = ntohs(th->csum); 44 | th->urp = ntohs(th->urp); 45 | 46 | skb->seq = th->seq; 47 | skb->dlen = ip_len(ih) - tcp_hlen(th); 48 | skb->len = skb->dlen + th->syn + th->fin; 49 | skb->end_seq = skb->seq + skb->dlen; 50 | skb->payload = th->data; 51 | } 52 | 53 | static void tcp_clear_queues(struct tcp_sock *tsk) { 54 | skb_queue_free(&tsk->ofo_queue); 55 | } 56 | 57 | void tcp_in(struct sk_buff *skb) 58 | { 59 | struct sock *sk; 60 | struct iphdr *iph; 61 | struct tcphdr *th; 62 | 63 | iph = ip_hdr(skb); 64 | th = (struct tcphdr*) iph->data; 65 | 66 | tcp_init_segment(th, iph, skb); 67 | 68 | sk = inet_lookup(skb, th->sport, th->dport); 69 | 70 | if (sk == NULL) { 71 | print_err("No TCP socket for sport %d dport %d\n", 72 | th->sport, th->dport); 73 | free_skb(skb); 74 | return; 75 | } 76 | socket_wr_acquire(sk->sock); 77 | 78 | tcp_in_dbg(th, sk, skb); 79 | /* if (tcp_checksum(iph, th) != 0) { */ 80 | /* goto discard; */ 81 | /* } */ 82 | tcp_input_state(sk, th, skb); 83 | 84 | socket_release(sk->sock); 85 | } 86 | 87 | int tcp_udp_checksum(uint32_t saddr, uint32_t daddr, uint8_t proto, 88 | uint8_t *data, uint16_t len) 89 | { 90 | uint32_t sum = 0; 91 | 92 | sum += saddr; 93 | sum += daddr; 94 | sum += htons(proto); 95 | sum += htons(len); 96 | 97 | return checksum(data, len, sum); 98 | } 99 | 100 | int tcp_v4_checksum(struct sk_buff *skb, uint32_t saddr, uint32_t daddr) 101 | { 102 | return tcp_udp_checksum(saddr, daddr, IP_TCP, skb->data, skb->len); 103 | } 104 | 105 | struct sock *tcp_alloc_sock() 106 | { 107 | struct tcp_sock *tsk = malloc(sizeof(struct tcp_sock)); 108 | 109 | memset(tsk, 0, sizeof(struct tcp_sock)); 110 | tsk->sk.state = TCP_CLOSE; 111 | tsk->sackok = 1; 112 | 113 | tsk->rmss = 1460; 114 | // Default to 536 as per spec 115 | tsk->smss = 536; 116 | 117 | skb_queue_init(&tsk->ofo_queue); 118 | 119 | return (struct sock *)tsk; 120 | } 121 | 122 | int tcp_v4_init_sock(struct sock *sk) 123 | { 124 | tcp_init_sock(sk); 125 | return 0; 126 | } 127 | 128 | int tcp_init_sock(struct sock *sk) 129 | { 130 | return 0; 131 | } 132 | 133 | void __tcp_set_state(struct sock *sk, uint32_t state) 134 | { 135 | sk->state = state; 136 | } 137 | 138 | static uint16_t generate_port() 139 | { 140 | /* TODO: Generate a proper port */ 141 | static int port = 40000; 142 | 143 | pthread_rwlock_wrlock(&tcplock); 144 | int copy = ++port + (timer_get_tick() % 10000); 145 | pthread_rwlock_unlock(&tcplock); 146 | 147 | return copy; 148 | } 149 | 150 | int generate_iss() 151 | { 152 | /* TODO: Generate a proper ISS */ 153 | return (int)time(NULL) * rand(); 154 | } 155 | 156 | int tcp_v4_connect(struct sock *sk, const struct sockaddr *addr, int addrlen, int flags) 157 | { 158 | uint16_t dport = ((struct sockaddr_in *)addr)->sin_port; 159 | uint32_t daddr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 160 | 161 | sk->dport = ntohs(dport); 162 | sk->sport = generate_port(); 163 | sk->daddr = ntohl(daddr); 164 | /* TODO: Do not hardcode lvl-ip local interface */ 165 | sk->saddr = parse_ipv4_string("10.0.0.4"); 166 | 167 | return tcp_connect(sk); 168 | } 169 | 170 | int tcp_disconnect(struct sock *sk, int flags) 171 | { 172 | return 0; 173 | } 174 | 175 | int tcp_write(struct sock *sk, const void *buf, int len) 176 | { 177 | struct tcp_sock *tsk = tcp_sk(sk); 178 | int ret = sk->err; 179 | 180 | if (ret != 0) goto out; 181 | 182 | switch (sk->state) { 183 | case TCP_ESTABLISHED: 184 | case TCP_CLOSE_WAIT: 185 | break; 186 | default: 187 | ret = -EBADF; 188 | goto out; 189 | } 190 | 191 | return tcp_send(tsk, buf, len); 192 | 193 | out: 194 | return ret; 195 | } 196 | 197 | int tcp_read(struct sock *sk, void *buf, int len) 198 | { 199 | struct tcp_sock *tsk = tcp_sk(sk); 200 | int ret = -1; 201 | 202 | switch (sk->state) { 203 | case TCP_CLOSE: 204 | ret = -EBADF; 205 | goto out; 206 | case TCP_LISTEN: 207 | case TCP_SYN_SENT: 208 | case TCP_SYN_RECEIVED: 209 | /* Queue for processing after entering ESTABLISHED state. If there 210 | is no room to queue this request, respond with "error: 211 | insufficient resources". */ 212 | case TCP_ESTABLISHED: 213 | case TCP_FIN_WAIT_1: 214 | case TCP_FIN_WAIT_2: 215 | /* If insufficient incoming segments are queued to satisfy the 216 | request, queue the request. */ 217 | 218 | break; 219 | case TCP_CLOSE_WAIT: 220 | /* If no text is awaiting delivery, the RECEIVE will get a 221 | "error: connection closing" response. Otherwise, any remaining 222 | text can be used to satisfy the RECEIVE. */ 223 | if (!skb_queue_empty(&tsk->sk.receive_queue)) break; 224 | if (tsk->flags & TCP_FIN) { 225 | tsk->flags &= ~TCP_FIN; 226 | return 0; 227 | } 228 | 229 | break; 230 | case TCP_CLOSING: 231 | case TCP_LAST_ACK: 232 | case TCP_TIME_WAIT: 233 | ret = sk->err; 234 | goto out; 235 | default: 236 | goto out; 237 | } 238 | 239 | return tcp_receive(tsk, buf, len); 240 | 241 | out: 242 | return ret; 243 | } 244 | 245 | int tcp_recv_notify(struct sock *sk) 246 | { 247 | if (&(sk->recv_wait)) { 248 | return wait_wakeup(&sk->recv_wait); 249 | } 250 | 251 | // No recv wait lock 252 | return -1; 253 | } 254 | 255 | int tcp_close(struct sock *sk) 256 | { 257 | switch (sk->state) { 258 | case TCP_CLOSE: 259 | case TCP_CLOSING: 260 | case TCP_LAST_ACK: 261 | case TCP_TIME_WAIT: 262 | case TCP_FIN_WAIT_1: 263 | case TCP_FIN_WAIT_2: 264 | /* Respond with "error: connection closing". */ 265 | sk->err = -EBADF; 266 | return -1; 267 | case TCP_LISTEN: 268 | case TCP_SYN_SENT: 269 | case TCP_SYN_RECEIVED: 270 | return tcp_done(sk); 271 | case TCP_ESTABLISHED: 272 | /* Queue this until all preceding SENDs have been segmentized, then 273 | form a FIN segment and send it. In any case, enter FIN-WAIT-1 274 | state. */ 275 | tcp_set_state(sk, TCP_FIN_WAIT_1); 276 | tcp_queue_fin(sk); 277 | break; 278 | case TCP_CLOSE_WAIT: 279 | /* Queue this request until all preceding SENDs have been 280 | segmentized; then send a FIN segment, enter LAST_ACK state. */ 281 | tcp_queue_fin(sk); 282 | break; 283 | default: 284 | print_err("Unknown TCP state for close\n"); 285 | return -1; 286 | } 287 | 288 | return 0; 289 | } 290 | 291 | int tcp_abort(struct sock *sk) 292 | { 293 | struct tcp_sock *tsk = tcp_sk(sk); 294 | tcp_send_reset(tsk); 295 | return tcp_done(sk); 296 | } 297 | 298 | static int tcp_free(struct sock *sk) 299 | { 300 | struct tcp_sock *tsk = tcp_sk(sk); 301 | 302 | tcp_clear_timers(sk); 303 | tcp_clear_queues(tsk); 304 | 305 | wait_wakeup(&sk->sock->sleep); 306 | 307 | return 0; 308 | } 309 | 310 | int tcp_done(struct sock *sk) 311 | { 312 | tcp_set_state(sk, TCP_CLOSING); 313 | tcp_free(sk); 314 | return socket_delete(sk->sock); 315 | } 316 | 317 | void tcp_clear_timers(struct sock *sk) 318 | { 319 | struct tcp_sock *tsk = tcp_sk(sk); 320 | tcp_stop_rto_timer(tsk); 321 | tcp_stop_delack_timer(tsk); 322 | 323 | timer_cancel(tsk->keepalive); 324 | tsk->keepalive = NULL; 325 | timer_cancel(tsk->linger); 326 | tsk->linger = NULL; 327 | } 328 | 329 | void tcp_stop_rto_timer(struct tcp_sock *tsk) 330 | { 331 | if (tsk) { 332 | timer_cancel(tsk->retransmit); 333 | tsk->retransmit = NULL; 334 | tsk->backoff = 0; 335 | } 336 | } 337 | 338 | void tcp_release_rto_timer(struct tcp_sock *tsk) 339 | { 340 | if (tsk) { 341 | timer_release(tsk->retransmit); 342 | tsk->retransmit = NULL; 343 | } 344 | } 345 | 346 | void tcp_stop_delack_timer(struct tcp_sock *tsk) 347 | { 348 | timer_cancel(tsk->delack); 349 | tsk->delack = NULL; 350 | } 351 | 352 | void tcp_release_delack_timer(struct tcp_sock *tsk) 353 | { 354 | timer_release(tsk->delack); 355 | tsk->delack = NULL; 356 | } 357 | 358 | void tcp_handle_fin_state(struct sock *sk) 359 | { 360 | switch (sk->state) { 361 | case TCP_CLOSE_WAIT: 362 | tcp_set_state(sk, TCP_LAST_ACK); 363 | break; 364 | case TCP_ESTABLISHED: 365 | tcp_set_state(sk, TCP_FIN_WAIT_1); 366 | break; 367 | } 368 | } 369 | 370 | static void *tcp_linger(void *arg) 371 | { 372 | struct sock *sk = (struct sock *) arg; 373 | socket_wr_acquire(sk->sock); 374 | struct tcp_sock *tsk = tcp_sk(sk); 375 | tcpsock_dbg("TCP time-wait timeout, freeing TCB", sk); 376 | 377 | timer_cancel(tsk->linger); 378 | tsk->linger = NULL; 379 | 380 | tcp_done(sk); 381 | socket_release(sk->sock); 382 | 383 | return NULL; 384 | } 385 | 386 | static void *tcp_user_timeout(void *arg) 387 | { 388 | struct sock *sk = (struct sock *) arg; 389 | socket_wr_acquire(sk->sock); 390 | struct tcp_sock *tsk = tcp_sk(sk); 391 | tcpsock_dbg("TCP user timeout, freeing TCB and aborting conn", sk); 392 | 393 | timer_cancel(tsk->linger); 394 | tsk->linger = NULL; 395 | 396 | tcp_abort(sk); 397 | socket_release(sk->sock); 398 | 399 | return NULL; 400 | } 401 | 402 | void tcp_enter_time_wait(struct sock *sk) 403 | { 404 | struct tcp_sock *tsk = tcp_sk(sk); 405 | 406 | tcp_set_state(sk, TCP_TIME_WAIT); 407 | 408 | tcp_clear_timers(sk); 409 | /* RFC793 arbitrarily defines MSL to be 2 minutes */ 410 | tsk->linger = timer_add(TCP_2MSL, &tcp_linger, sk); 411 | } 412 | 413 | void tcp_rearm_user_timeout(struct sock *sk) 414 | { 415 | struct tcp_sock *tsk = tcp_sk(sk); 416 | 417 | if (sk->state == TCP_TIME_WAIT) return; 418 | 419 | timer_cancel(tsk->linger); 420 | /* RFC793 set user timeout */ 421 | tsk->linger = timer_add(TCP_USER_TIMEOUT, &tcp_user_timeout, sk); 422 | } 423 | 424 | void tcp_rtt(struct tcp_sock *tsk) 425 | { 426 | if (tsk->backoff > 0 || !tsk->retransmit) { 427 | // Karn's Algorithm: Don't measure retransmissions 428 | return; 429 | } 430 | 431 | int r = timer_get_tick() - (tsk->retransmit->expires - tsk->rto); 432 | if (r < 0) return; 433 | 434 | if (!tsk->srtt) { 435 | /* RFC6298 2.2 first measurement is made */ 436 | tsk->srtt = r; 437 | tsk->rttvar = r / 2; 438 | } else { 439 | /* RFC6298 2.3 a subsequent measurement is made */ 440 | double beta = 0.25; 441 | double alpha = 0.125; 442 | tsk->rttvar = (1 - beta) * tsk->rttvar + beta * abs(tsk->srtt - r); 443 | tsk->srtt = (1 - alpha) * tsk->srtt + alpha * r; 444 | } 445 | 446 | int k = 4 * tsk->rttvar; 447 | 448 | /* RFC6298 says RTO should be at least 1 second. Linux uses 200ms */ 449 | if (k < 200) k = 200; 450 | 451 | tsk->rto = tsk->srtt + k; 452 | } 453 | 454 | int tcp_calculate_sacks(struct tcp_sock *tsk) 455 | { 456 | struct tcp_sack_block *sb = &tsk->sacks[tsk->sacklen]; 457 | 458 | sb->left = 0; 459 | sb->right = 0; 460 | 461 | struct sk_buff *next; 462 | struct list_head *item, *tmp; 463 | 464 | list_for_each_safe(item, tmp, &tsk->ofo_queue.head) { 465 | next = list_entry(item, struct sk_buff, list); 466 | 467 | if (sb->left == 0) { 468 | sb->left = next->seq; 469 | tsk->sacklen++; 470 | } 471 | 472 | if (sb->right == 0) sb->right = next->end_seq; 473 | else if (sb->right == next->seq) sb->right = next->end_seq; 474 | else { 475 | if (tsk->sacklen >= tsk->sacks_allowed) break; 476 | 477 | sb = &tsk->sacks[tsk->sacklen]; 478 | sb->left = next->seq; 479 | sb->right = next->end_seq; 480 | tsk->sacklen++; 481 | } 482 | } 483 | 484 | return 0; 485 | } 486 | -------------------------------------------------------------------------------- /src/tcp_data.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "tcp.h" 3 | #include "list.h" 4 | 5 | /* Routine for inserting skbs ordered by seq into queue */ 6 | static void tcp_data_insert_ordered(struct sk_buff_head *queue, struct sk_buff *skb) 7 | { 8 | struct sk_buff *next; 9 | struct list_head *item, *tmp; 10 | 11 | list_for_each_safe(item, tmp, &queue->head) { 12 | next = list_entry(item, struct sk_buff, list); 13 | 14 | if (skb->seq < next->seq) { 15 | if (skb->end_seq > next->seq) { 16 | /* TODO: We need to join skbs */ 17 | print_err("Could not join skbs\n"); 18 | } else { 19 | skb->refcnt++; 20 | skb_queue_add(queue, skb, next); 21 | return; 22 | } 23 | } else if (skb->seq == next->seq) { 24 | /* We already have this segment! */ 25 | return; 26 | } 27 | } 28 | 29 | skb->refcnt++; 30 | skb_queue_tail(queue, skb); 31 | } 32 | 33 | /* Routine for transforming out-of-order segments into order */ 34 | static void tcp_consume_ofo_queue(struct tcp_sock *tsk) 35 | { 36 | struct sock *sk = &tsk->sk; 37 | struct tcb *tcb = &tsk->tcb; 38 | struct sk_buff *skb = NULL; 39 | 40 | while ((skb = skb_peek(&tsk->ofo_queue)) != NULL 41 | && tcb->rcv_nxt == skb->seq) { 42 | /* skb is in-order, put it in receive queue */ 43 | tcb->rcv_nxt += skb->dlen; 44 | skb_dequeue(&tsk->ofo_queue); 45 | skb_queue_tail(&sk->receive_queue, skb); 46 | } 47 | } 48 | 49 | int tcp_data_dequeue(struct tcp_sock *tsk, void *user_buf, int userlen) 50 | { 51 | struct sock *sk = &tsk->sk; 52 | struct tcphdr *th; 53 | int rlen = 0; 54 | 55 | while (!skb_queue_empty(&sk->receive_queue) && rlen < userlen) { 56 | struct sk_buff *skb = skb_peek(&sk->receive_queue); 57 | if (skb == NULL) break; 58 | 59 | th = tcp_hdr(skb); 60 | 61 | /* Guard datalen to not overflow userbuf */ 62 | int dlen = (rlen + skb->dlen) > userlen ? (userlen - rlen) : skb->dlen; 63 | memcpy(user_buf, skb->payload, dlen); 64 | 65 | /* Accommodate next round of data dequeue */ 66 | skb->dlen -= dlen; 67 | skb->payload += dlen; 68 | rlen += dlen; 69 | user_buf += dlen; 70 | 71 | /* skb is fully eaten, process flags and drop it */ 72 | if (skb->dlen == 0) { 73 | if (th->psh) tsk->flags |= TCP_PSH; 74 | skb_dequeue(&sk->receive_queue); 75 | skb->refcnt--; 76 | free_skb(skb); 77 | } 78 | } 79 | 80 | if (skb_queue_empty(&sk->receive_queue) && !(tsk->flags & TCP_FIN)) { 81 | sk->poll_events &= ~POLLIN; 82 | } 83 | 84 | return rlen; 85 | } 86 | 87 | int tcp_data_queue(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb) 88 | { 89 | struct sock *sk = &tsk->sk; 90 | struct tcb *tcb = &tsk->tcb; 91 | int rc = 0; 92 | 93 | if (!tcb->rcv_wnd) { 94 | free_skb(skb); 95 | return -1; 96 | } 97 | 98 | int expected = skb->seq == tcb->rcv_nxt; 99 | if (expected) { 100 | tcb->rcv_nxt += skb->dlen; 101 | 102 | skb->refcnt++; 103 | skb_queue_tail(&sk->receive_queue, skb); 104 | 105 | tcp_consume_ofo_queue(tsk); 106 | 107 | // There is new data for user to read 108 | sk->poll_events |= (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); 109 | tsk->sk.ops->recv_notify(&tsk->sk); 110 | } else { 111 | /* Segment passed validation, hence it is in-window 112 | but not the left-most sequence. Put into out-of-order queue 113 | for later processing */ 114 | tcp_data_insert_ordered(&tsk->ofo_queue, skb); 115 | 116 | if (tsk->sackok) { 117 | tcp_calculate_sacks(tsk); 118 | } 119 | 120 | /* RFC5581: A TCP receiver SHOULD send an immediate duplicate ACK when an out- 121 | * of-order segment arrives. The purpose of this ACK is to inform the 122 | * sender that a segment was received out-of-order and which sequence 123 | * number is expected. */ 124 | tcp_send_ack(sk); 125 | } 126 | 127 | return rc; 128 | } 129 | -------------------------------------------------------------------------------- /src/tcp_input.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "tcp.h" 3 | #include "tcp_data.h" 4 | #include "skbuff.h" 5 | #include "sock.h" 6 | 7 | static int tcp_parse_opts(struct tcp_sock *tsk, struct tcphdr *th) 8 | { 9 | uint8_t *ptr = th->data; 10 | uint8_t optlen = tcp_hlen(th) - 20; 11 | struct tcp_opt_mss *opt_mss = NULL; 12 | uint8_t sack_seen = 0; 13 | uint8_t tsopt_seen = 0; 14 | 15 | while (optlen > 0 && optlen < 20) { 16 | switch (*ptr) { 17 | case TCP_OPT_MSS: 18 | opt_mss = (struct tcp_opt_mss *)ptr; 19 | uint16_t mss = ntohs(opt_mss->mss); 20 | 21 | if (mss > 536 && mss <= 1460) { 22 | tsk->smss = mss; 23 | } 24 | 25 | ptr += sizeof(struct tcp_opt_mss); 26 | optlen -= 4; 27 | break; 28 | case TCP_OPT_NOOP: 29 | ptr += 1; 30 | optlen--; 31 | break; 32 | case TCP_OPT_SACK_OK: 33 | sack_seen = 1; 34 | optlen--; 35 | break; 36 | case TCP_OPT_TS: 37 | tsopt_seen = 1; 38 | optlen--; 39 | break; 40 | default: 41 | print_err("Unrecognized TCPOPT\n"); 42 | optlen--; 43 | break; 44 | } 45 | } 46 | 47 | if (!tsopt_seen) { 48 | tsk->tsopt = 0; 49 | } 50 | 51 | if (sack_seen && tsk->sackok) { 52 | // There's room for 4 sack blocks without TS OPT 53 | if (tsk->tsopt) tsk->sacks_allowed = 3; 54 | else tsk->sacks_allowed = 4; 55 | } else { 56 | tsk->sackok = 0; 57 | } 58 | 59 | return 0; 60 | } 61 | 62 | /* 63 | * Acks all segments from retransmissionn queue that are "older" 64 | * than current unacknowledged sequence 65 | */ 66 | static int tcp_clean_rto_queue(struct sock *sk, uint32_t una) 67 | { 68 | struct tcp_sock *tsk = tcp_sk(sk); 69 | struct sk_buff *skb; 70 | int rc = 0; 71 | 72 | while ((skb = skb_peek(&sk->write_queue)) != NULL) { 73 | if (skb->seq > 0 && skb->end_seq <= una) { 74 | /* skb fully acknowledged */ 75 | skb_dequeue(&sk->write_queue); 76 | skb->refcnt--; 77 | free_skb(skb); 78 | if (tsk->inflight > 0) { 79 | tsk->inflight--; 80 | } 81 | } else { 82 | break; 83 | } 84 | }; 85 | 86 | if (skb == NULL || tsk->inflight == 0) { 87 | /* No unacknowledged skbs, stop rto timer */ 88 | tcp_stop_rto_timer(tsk); 89 | } 90 | 91 | return rc; 92 | } 93 | 94 | static inline int __tcp_drop(struct sock *sk, struct sk_buff *skb) 95 | { 96 | free_skb(skb); 97 | return 0; 98 | } 99 | 100 | static int tcp_verify_segment(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb) 101 | { 102 | struct tcb *tcb = &tsk->tcb; 103 | 104 | if (skb->dlen > 0 && tcb->rcv_wnd == 0) return 0; 105 | 106 | if (th->seq < tcb->rcv_nxt || 107 | th->seq > (tcb->rcv_nxt + tcb->rcv_wnd)) { 108 | tcpsock_dbg("Received invalid segment", (&tsk->sk)); 109 | return 0; 110 | } 111 | 112 | return 1; 113 | } 114 | 115 | /* TCP RST received */ 116 | static void tcp_reset(struct sock *sk) 117 | { 118 | sk->poll_events = (POLLOUT | POLLWRNORM | POLLERR | POLLHUP); 119 | switch (sk->state) { 120 | case TCP_SYN_SENT: 121 | sk->err = -ECONNREFUSED; 122 | break; 123 | case TCP_CLOSE_WAIT: 124 | sk->err = -EPIPE; 125 | break; 126 | case TCP_CLOSE: 127 | return; 128 | default: 129 | sk->err = -ECONNRESET; 130 | break; 131 | } 132 | 133 | tcp_done(sk); 134 | } 135 | 136 | static inline int tcp_discard(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th) 137 | { 138 | free_skb(skb); 139 | return 0; 140 | } 141 | 142 | static int tcp_listen(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th) 143 | { 144 | free_skb(skb); 145 | return 0; 146 | } 147 | 148 | static int tcp_synsent(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th) 149 | { 150 | struct tcb *tcb = &tsk->tcb; 151 | struct sock *sk = &tsk->sk; 152 | 153 | tcpsock_dbg("state is synsent", sk); 154 | 155 | if (th->ack) { 156 | if (th->ack_seq <= tcb->iss || th->ack_seq > tcb->snd_nxt) { 157 | tcpsock_dbg("ACK is unacceptable", sk); 158 | 159 | if (th->rst) goto discard; 160 | goto reset_and_discard; 161 | } 162 | 163 | if (th->ack_seq < tcb->snd_una || th->ack_seq > tcb->snd_nxt) { 164 | tcpsock_dbg("ACK is unacceptable", sk); 165 | goto reset_and_discard; 166 | } 167 | } 168 | 169 | /* ACK is acceptable */ 170 | 171 | if (th->rst) { 172 | tcp_reset(&tsk->sk); 173 | goto discard; 174 | } 175 | 176 | /* third check the security and precedence -> ignored */ 177 | 178 | /* fourth check the SYN bit */ 179 | if (!th->syn) { 180 | goto discard; 181 | } 182 | 183 | tcb->rcv_nxt = th->seq + 1; 184 | tcb->irs = th->seq; 185 | if (th->ack) { 186 | tcb->snd_una = th->ack_seq; 187 | /* Any packets in RTO queue that are acknowledged here should be removed */ 188 | tcp_clean_rto_queue(sk, tcb->snd_una); 189 | } 190 | 191 | if (tcb->snd_una > tcb->iss) { 192 | tcp_set_state(sk, TCP_ESTABLISHED); 193 | tcb->snd_una = tcb->snd_nxt; 194 | tsk->backoff = 0; 195 | /* RFC 6298: Sender SHOULD set RTO <- 1 second */ 196 | tsk->rto = 1000; 197 | tcp_send_ack(&tsk->sk); 198 | tcp_rearm_user_timeout(&tsk->sk); 199 | tcp_parse_opts(tsk, th); 200 | sock_connected(sk); 201 | } else { 202 | tcp_set_state(sk, TCP_SYN_RECEIVED); 203 | tcb->snd_una = tcb->iss; 204 | tcp_send_synack(&tsk->sk); 205 | } 206 | 207 | discard: 208 | tcp_drop(sk, skb); 209 | return 0; 210 | reset_and_discard: 211 | //TODO reset 212 | tcp_drop(sk, skb); 213 | return 0; 214 | } 215 | 216 | static int tcp_closed(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th) 217 | { 218 | /* 219 | All data in the incoming segment is discarded. An incoming 220 | segment containing a RST is discarded. An incoming segment not 221 | containing a RST causes a RST to be sent in response. The 222 | acknowledgment and sequence field values are selected to make the 223 | reset sequence acceptable to the TCP that sent the offending 224 | segment. 225 | 226 | If the ACK bit is off, sequence number zero is used, 227 | 228 | 229 | 230 | If the ACK bit is on, 231 | 232 | 233 | 234 | Return. 235 | */ 236 | 237 | int rc = -1; 238 | 239 | tcpsock_dbg("state is closed", (&tsk->sk)); 240 | 241 | if (th->rst) { 242 | tcp_discard(tsk, skb, th); 243 | rc = 0; 244 | goto out; 245 | } 246 | 247 | if (th->ack) { 248 | 249 | } else { 250 | 251 | 252 | } 253 | 254 | rc = tcp_send_reset(tsk); 255 | free_skb(skb); 256 | 257 | out: 258 | return rc; 259 | } 260 | 261 | /* 262 | * Follows RFC793 "Segment Arrives" section closely 263 | */ 264 | int tcp_input_state(struct sock *sk, struct tcphdr *th, struct sk_buff *skb) 265 | { 266 | struct tcp_sock *tsk = tcp_sk(sk); 267 | struct tcb *tcb = &tsk->tcb; 268 | 269 | tcpsock_dbg("input state", sk); 270 | 271 | switch (sk->state) { 272 | case TCP_CLOSE: 273 | return tcp_closed(tsk, skb, th); 274 | case TCP_LISTEN: 275 | return tcp_listen(tsk, skb, th); 276 | case TCP_SYN_SENT: 277 | return tcp_synsent(tsk, skb, th); 278 | } 279 | 280 | /* "Otherwise" section in RFC793 */ 281 | 282 | /* first check sequence number */ 283 | if (!tcp_verify_segment(tsk, th, skb)) { 284 | /* RFC793: If an incoming segment is not acceptable, an acknowledgment 285 | * should be sent in reply (unless the RST bit is set, if so drop 286 | * the segment and return): */ 287 | if (!th->rst) { 288 | tcp_send_ack(sk); 289 | } 290 | return_tcp_drop(sk, skb); 291 | } 292 | 293 | /* second check the RST bit */ 294 | if (th->rst) { 295 | free_skb(skb); 296 | tcp_enter_time_wait(sk); 297 | tsk->sk.ops->recv_notify(&tsk->sk); 298 | return 0; 299 | } 300 | 301 | /* third check security and precedence */ 302 | // Not implemented 303 | 304 | /* fourth check the SYN bit */ 305 | if (th->syn) { 306 | /* RFC 5961 Section 4.2 */ 307 | tcp_send_challenge_ack(sk, skb); 308 | return_tcp_drop(sk, skb); 309 | } 310 | 311 | /* fifth check the ACK field */ 312 | if (!th->ack) { 313 | return_tcp_drop(sk, skb); 314 | } 315 | 316 | // ACK bit is on 317 | switch (sk->state) { 318 | case TCP_SYN_RECEIVED: 319 | if (tcb->snd_una <= th->ack_seq && th->ack_seq < tcb->snd_nxt) { 320 | tcp_set_state(sk, TCP_ESTABLISHED); 321 | } else { 322 | return_tcp_drop(sk, skb); 323 | } 324 | case TCP_ESTABLISHED: 325 | case TCP_FIN_WAIT_1: 326 | case TCP_FIN_WAIT_2: 327 | case TCP_CLOSE_WAIT: 328 | case TCP_CLOSING: 329 | case TCP_LAST_ACK: 330 | if (tcb->snd_una < th->ack_seq && th->ack_seq <= tcb->snd_nxt) { 331 | tcb->snd_una = th->ack_seq; 332 | /* Any segments on the retransmission queue which are thereby 333 | entirely acknowledged are removed. */ 334 | tcp_rtt(tsk); 335 | tcp_clean_rto_queue(sk, tcb->snd_una); 336 | } 337 | 338 | if (th->ack_seq < tcb->snd_una) { 339 | // If the ACK is a duplicate, it can be ignored 340 | return_tcp_drop(sk, skb); 341 | } 342 | 343 | if (th->ack_seq > tcb->snd_nxt) { 344 | // If the ACK acks something not yet sent, then send an ACK, drop segment 345 | // and return 346 | // TODO: Dropping the seg here, why would I respond with an ACK? Linux 347 | // does not respond either 348 | //tcp_send_ack(&tsk->sk); 349 | return_tcp_drop(sk, skb); 350 | } 351 | 352 | if (tcb->snd_una < th->ack_seq && th->ack_seq <= tcb->snd_nxt) { 353 | // TODO: Send window should be updated 354 | } 355 | 356 | break; 357 | } 358 | 359 | /* If the write queue is empty, it means our FIN was acked */ 360 | if (skb_queue_empty(&sk->write_queue)) { 361 | switch (sk->state) { 362 | case TCP_FIN_WAIT_1: 363 | tcp_set_state(sk, TCP_FIN_WAIT_2); 364 | case TCP_FIN_WAIT_2: 365 | break; 366 | case TCP_CLOSING: 367 | /* In addition to the processing for the ESTABLISHED state, if 368 | * the ACK acknowledges our FIN then enter the TIME-WAIT state, 369 | otherwise ignore the segment. */ 370 | tcp_set_state(sk, TCP_TIME_WAIT); 371 | break; 372 | case TCP_LAST_ACK: 373 | /* The only thing that can arrive in this state is an acknowledgment of our FIN. 374 | * If our FIN is now acknowledged, delete the TCB, enter the CLOSED state, and return. */ 375 | free_skb(skb); 376 | return tcp_done(sk); 377 | case TCP_TIME_WAIT: 378 | /* TODO: The only thing that can arrive in this state is a 379 | retransmission of the remote FIN. Acknowledge it, and restart 380 | the 2 MSL timeout. */ 381 | if (tcb->rcv_nxt == th->seq) { 382 | tcpsock_dbg("Remote FIN retransmitted?", sk); 383 | // tcb->rcv_nxt += 1; 384 | tsk->flags |= TCP_FIN; 385 | tcp_send_ack(sk); 386 | } 387 | break; 388 | } 389 | } 390 | 391 | /* sixth, check the URG bit */ 392 | if (th->urg) { 393 | 394 | } 395 | 396 | int expected = skb->seq == tcb->rcv_nxt; 397 | 398 | /* seventh, process the segment txt */ 399 | switch (sk->state) { 400 | case TCP_ESTABLISHED: 401 | case TCP_FIN_WAIT_1: 402 | case TCP_FIN_WAIT_2: 403 | if (th->psh || skb->dlen > 0) { 404 | tcp_data_queue(tsk, th, skb); 405 | } 406 | 407 | break; 408 | case TCP_CLOSE_WAIT: 409 | case TCP_CLOSING: 410 | case TCP_LAST_ACK: 411 | case TCP_TIME_WAIT: 412 | /* This should not occur, since a FIN has been received from the 413 | remote side. Ignore the segment text. */ 414 | break; 415 | } 416 | 417 | /* eighth, check the FIN bit */ 418 | if (th->fin && expected) { 419 | tcpsock_dbg("Received in-sequence FIN", sk); 420 | 421 | switch (sk->state) { 422 | case TCP_CLOSE: 423 | case TCP_LISTEN: 424 | case TCP_SYN_SENT: 425 | // Do not process, since SEG.SEQ cannot be validated 426 | goto drop_and_unlock; 427 | } 428 | 429 | tcb->rcv_nxt += 1; 430 | tsk->flags |= TCP_FIN; 431 | sk->poll_events |= (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); 432 | 433 | tcp_send_ack(sk); 434 | tsk->sk.ops->recv_notify(&tsk->sk); 435 | 436 | switch (sk->state) { 437 | case TCP_SYN_RECEIVED: 438 | case TCP_ESTABLISHED: 439 | tcp_set_state(sk, TCP_CLOSE_WAIT); 440 | break; 441 | case TCP_FIN_WAIT_1: 442 | /* If our FIN has been ACKed (perhaps in this segment), then 443 | enter TIME-WAIT, start the time-wait timer, turn off the other 444 | timers; otherwise enter the CLOSING state. */ 445 | if (skb_queue_empty(&sk->write_queue)) { 446 | tcp_enter_time_wait(sk); 447 | } else { 448 | tcp_set_state(sk, TCP_CLOSING); 449 | } 450 | 451 | break; 452 | case TCP_FIN_WAIT_2: 453 | /* Enter the TIME-WAIT state. Start the time-wait timer, turn 454 | off the other timers. */ 455 | tcp_enter_time_wait(sk); 456 | break; 457 | case TCP_CLOSE_WAIT: 458 | case TCP_CLOSING: 459 | case TCP_LAST_ACK: 460 | /* Remain in the state */ 461 | break; 462 | case TCP_TIME_WAIT: 463 | /* TODO: Remain in the TIME-WAIT state. Restart the 2 MSL time-wait 464 | timeout. */ 465 | break; 466 | } 467 | } 468 | 469 | /* Congestion control and delacks */ 470 | switch (sk->state) { 471 | case TCP_ESTABLISHED: 472 | case TCP_FIN_WAIT_1: 473 | case TCP_FIN_WAIT_2: 474 | if (expected) { 475 | tcp_stop_delack_timer(tsk); 476 | 477 | int pending = min(skb_queue_len(&sk->write_queue), 3); 478 | /* RFC1122: A TCP SHOULD implement a delayed ACK, but an ACK should not 479 | * be excessively delayed; in particular, the delay MUST be less than 480 | * 0.5 seconds, and in a stream of full-sized segments there SHOULD 481 | * be an ACK for at least every second segment. */ 482 | if (tsk->inflight == 0 && pending > 0) { 483 | tcp_send_next(sk, pending); 484 | tsk->inflight += pending; 485 | tcp_rearm_rto_timer(tsk); 486 | } else if (th->psh || (skb->dlen > 1000 && ++tsk->delacks > 1)) { 487 | tsk->delacks = 0; 488 | tcp_send_ack(sk); 489 | } else if (skb->dlen > 0) { 490 | tsk->delack = timer_add(200, &tcp_send_delack, &tsk->sk); 491 | } 492 | } 493 | } 494 | 495 | free_skb(skb); 496 | 497 | unlock: 498 | return 0; 499 | drop_and_unlock: 500 | tcp_drop(sk, skb); 501 | goto unlock; 502 | } 503 | 504 | int tcp_receive(struct tcp_sock *tsk, void *buf, int len) 505 | { 506 | int rlen = 0; 507 | int curlen = 0; 508 | struct sock *sk = &tsk->sk; 509 | struct socket *sock = sk->sock; 510 | 511 | memset(buf, 0, len); 512 | 513 | while (rlen < len) { 514 | curlen = tcp_data_dequeue(tsk, buf + rlen, len - rlen); 515 | 516 | rlen += curlen; 517 | 518 | if (tsk->flags & TCP_PSH) { 519 | 520 | tsk->flags &= ~TCP_PSH; 521 | break; 522 | } 523 | 524 | if (tsk->flags & TCP_FIN || rlen == len) break; 525 | 526 | if (sock->flags & O_NONBLOCK) { 527 | if (rlen == 0) { 528 | rlen = -EAGAIN; 529 | } 530 | 531 | break; 532 | } else { 533 | pthread_mutex_lock(&tsk->sk.recv_wait.lock); 534 | socket_release(sock); 535 | wait_sleep(&tsk->sk.recv_wait); 536 | pthread_mutex_unlock(&tsk->sk.recv_wait.lock); 537 | socket_wr_acquire(sock); 538 | } 539 | } 540 | 541 | if (rlen >= 0) tcp_rearm_user_timeout(sk); 542 | 543 | return rlen; 544 | } 545 | -------------------------------------------------------------------------------- /src/tcp_output.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "utils.h" 3 | #include "tcp.h" 4 | #include "ip.h" 5 | #include "skbuff.h" 6 | #include "timer.h" 7 | 8 | static void *tcp_retransmission_timeout(void *arg); 9 | 10 | static struct sk_buff *tcp_alloc_skb(int optlen, int size) 11 | { 12 | int reserved = ETH_HDR_LEN + IP_HDR_LEN + TCP_HDR_LEN + optlen + size; 13 | struct sk_buff *skb = alloc_skb(reserved); 14 | 15 | skb_reserve(skb, reserved); 16 | skb->protocol = IP_TCP; 17 | skb->dlen = size; 18 | skb->seq = 0; 19 | 20 | return skb; 21 | } 22 | 23 | static int tcp_write_syn_options(struct tcphdr *th, struct tcp_options *opts, int optlen) 24 | { 25 | struct tcp_opt_mss *opt_mss = (struct tcp_opt_mss *) th->data; 26 | uint32_t i = 0; 27 | 28 | opt_mss->kind = TCP_OPT_MSS; 29 | opt_mss->len = TCP_OPTLEN_MSS; 30 | opt_mss->mss = htons(opts->mss); 31 | 32 | i += sizeof(struct tcp_opt_mss); 33 | 34 | if (opts->sack) { 35 | th->data[i++] = TCP_OPT_NOOP; 36 | th->data[i++] = TCP_OPT_NOOP; 37 | th->data[i++] = TCP_OPT_SACK_OK; 38 | th->data[i++] = TCP_OPTLEN_SACK; 39 | } 40 | 41 | th->hl = TCP_DOFFSET + (optlen / 4); 42 | 43 | return 0; 44 | } 45 | 46 | static int tcp_syn_options(struct sock *sk, struct tcp_options *opts) 47 | { 48 | struct tcp_sock *tsk = tcp_sk(sk); 49 | int optlen = 0; 50 | 51 | opts->mss = tsk->rmss; 52 | optlen += TCP_OPTLEN_MSS; 53 | 54 | if (tsk->sackok) { 55 | opts->sack = 1; 56 | optlen += TCP_OPT_NOOP * 2; 57 | optlen += TCP_OPTLEN_SACK; 58 | } else { 59 | opts->sack = 0; 60 | } 61 | 62 | return optlen; 63 | } 64 | 65 | static int tcp_write_options(struct tcp_sock *tsk, struct tcphdr *th) 66 | { 67 | uint8_t *ptr = th->data; 68 | 69 | if (!tsk->sackok || tsk->sacks[0].left == 0) return 0; 70 | 71 | *ptr++ = TCP_OPT_NOOP; 72 | *ptr++ = TCP_OPT_NOOP; 73 | *ptr++ = TCP_OPT_SACK; 74 | *ptr++ = 2 + tsk->sacklen * 8; 75 | 76 | struct tcp_sack_block *sb = (struct tcp_sack_block *)ptr; 77 | 78 | for (int i = tsk->sacklen - 1; i >= 0; i--) { 79 | sb->left = htonl(tsk->sacks[i].left); 80 | sb->right = htonl(tsk->sacks[i].right); 81 | tsk->sacks[i].left = 0; 82 | tsk->sacks[i].right = 0; 83 | 84 | sb += 1; 85 | ptr += sizeof(struct tcp_sack_block); 86 | } 87 | 88 | tsk->sacklen = 0; 89 | 90 | return 0; 91 | } 92 | 93 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, uint32_t seq) 94 | { 95 | struct tcp_sock *tsk = tcp_sk(sk); 96 | struct tcb *tcb = &tsk->tcb; 97 | struct tcphdr *thdr = tcp_hdr(skb); 98 | 99 | /* No options were previously set */ 100 | if (thdr->hl == 0) thdr->hl = TCP_DOFFSET; 101 | 102 | skb_push(skb, thdr->hl * 4); 103 | 104 | thdr->sport = sk->sport; 105 | thdr->dport = sk->dport; 106 | thdr->seq = seq; 107 | thdr->ack_seq = tcb->rcv_nxt; 108 | thdr->rsvd = 0; 109 | thdr->win = tcb->rcv_wnd; 110 | thdr->csum = 0; 111 | thdr->urp = 0; 112 | 113 | if (thdr->hl > 5) { 114 | tcp_write_options(tsk, thdr); 115 | } 116 | 117 | tcp_out_dbg(thdr, sk, skb); 118 | 119 | thdr->sport = htons(thdr->sport); 120 | thdr->dport = htons(thdr->dport); 121 | thdr->seq = htonl(thdr->seq); 122 | thdr->ack_seq = htonl(thdr->ack_seq); 123 | thdr->win = htons(thdr->win); 124 | thdr->csum = htons(thdr->csum); 125 | thdr->urp = htons(thdr->urp); 126 | thdr->csum = tcp_v4_checksum(skb, htonl(sk->saddr), htonl(sk->daddr)); 127 | 128 | return ip_output(sk, skb); 129 | } 130 | 131 | static int tcp_queue_transmit_skb(struct sock *sk, struct sk_buff *skb) 132 | { 133 | struct tcp_sock *tsk = tcp_sk(sk); 134 | struct tcb *tcb = &tsk->tcb; 135 | struct tcphdr * th = tcp_hdr(skb); 136 | int rc = 0; 137 | 138 | if (skb_queue_empty(&sk->write_queue)) { 139 | tcp_rearm_rto_timer(tsk); 140 | } 141 | 142 | if (tsk->inflight == 0) { 143 | /* Store sequence information into the socket buffer */ 144 | rc = tcp_transmit_skb(sk, skb, tcb->snd_nxt); 145 | tsk->inflight++; 146 | skb->seq = tcb->snd_nxt; 147 | tcb->snd_nxt += skb->dlen; 148 | skb->end_seq = tcb->snd_nxt; 149 | 150 | if (th->fin) tcb->snd_nxt++; 151 | } 152 | 153 | skb_queue_tail(&sk->write_queue, skb); 154 | 155 | return rc; 156 | } 157 | 158 | int tcp_send_synack(struct sock *sk) 159 | { 160 | if (sk->state != TCP_SYN_SENT) { 161 | print_err("TCP synack: Socket was not in correct state (SYN_SENT)\n"); 162 | return 1; 163 | } 164 | 165 | struct sk_buff *skb; 166 | struct tcphdr *th; 167 | struct tcb * tcb = &tcp_sk(sk)->tcb; 168 | int rc = 0; 169 | 170 | skb = tcp_alloc_skb(0, 0); 171 | th = tcp_hdr(skb); 172 | 173 | th->syn = 1; 174 | th->ack = 1; 175 | 176 | rc = tcp_transmit_skb(sk, skb, tcb->snd_nxt); 177 | free_skb(skb); 178 | 179 | return rc; 180 | } 181 | 182 | /* Routine for timer-invoked delayed acknowledgment */ 183 | void *tcp_send_delack(void *arg) 184 | { 185 | struct sock *sk = (struct sock *) arg; 186 | socket_wr_acquire(sk->sock); 187 | 188 | struct tcp_sock *tsk = tcp_sk(sk); 189 | tsk->delacks = 0; 190 | tcp_release_delack_timer(tsk); 191 | tcp_send_ack(sk); 192 | 193 | socket_release(sk->sock); 194 | 195 | return NULL; 196 | } 197 | 198 | int tcp_send_next(struct sock *sk, int amount) 199 | { 200 | struct tcp_sock *tsk = tcp_sk(sk); 201 | struct tcb *tcb = &tsk->tcb; 202 | struct tcphdr *th; 203 | struct sk_buff *next; 204 | struct list_head *item, *tmp; 205 | int i = 0; 206 | 207 | list_for_each_safe(item, tmp, &sk->write_queue.head) { 208 | if (++i > amount) break; 209 | next = list_entry(item, struct sk_buff, list); 210 | 211 | if (next == NULL) return -1; 212 | 213 | skb_reset_header(next); 214 | tcp_transmit_skb(sk, next, tcb->snd_nxt); 215 | 216 | next->seq = tcb->snd_nxt; 217 | tcb->snd_nxt += next->dlen; 218 | next->end_seq = tcb->snd_nxt; 219 | 220 | th = tcp_hdr(next); 221 | if (th->fin) tcb->snd_nxt++; 222 | } 223 | 224 | return 0; 225 | } 226 | 227 | static int tcp_options_len(struct sock *sk) 228 | { 229 | struct tcp_sock *tsk = tcp_sk(sk); 230 | uint8_t optlen = 0; 231 | 232 | if (tsk->sackok && tsk->sacklen > 0) { 233 | for (int i = 0; i < tsk->sacklen; i++) { 234 | if (tsk->sacks[i].left != 0) { 235 | optlen += 8; 236 | } 237 | } 238 | 239 | optlen += 2; 240 | } 241 | 242 | while (optlen % 4 > 0) optlen++; 243 | 244 | return optlen; 245 | } 246 | 247 | int tcp_send_ack(struct sock *sk) 248 | { 249 | if (sk->state == TCP_CLOSE) return 0; 250 | 251 | struct sk_buff *skb; 252 | struct tcphdr *th; 253 | struct tcb *tcb = &tcp_sk(sk)->tcb; 254 | int rc = 0; 255 | int optlen = tcp_options_len(sk); 256 | 257 | skb = tcp_alloc_skb(optlen, 0); 258 | 259 | th = tcp_hdr(skb); 260 | th->ack = 1; 261 | th->hl = TCP_DOFFSET + (optlen / 4); 262 | 263 | rc = tcp_transmit_skb(sk, skb, tcb->snd_nxt); 264 | free_skb(skb); 265 | 266 | return rc; 267 | } 268 | 269 | static int tcp_send_syn(struct sock *sk) 270 | { 271 | if (sk->state != TCP_SYN_SENT && sk->state != TCP_CLOSE && sk->state != TCP_LISTEN) { 272 | print_err("Socket was not in correct state (closed or listen)\n"); 273 | return 1; 274 | } 275 | 276 | struct sk_buff *skb; 277 | struct tcphdr *th; 278 | struct tcp_options opts = { 0 }; 279 | int tcp_options_len = 0; 280 | 281 | tcp_options_len = tcp_syn_options(sk, &opts); 282 | skb = tcp_alloc_skb(tcp_options_len, 0); 283 | th = tcp_hdr(skb); 284 | 285 | tcp_write_syn_options(th, &opts, tcp_options_len); 286 | sk->state = TCP_SYN_SENT; 287 | th->syn = 1; 288 | 289 | return tcp_queue_transmit_skb(sk, skb); 290 | } 291 | 292 | int tcp_send_fin(struct sock *sk) 293 | { 294 | if (sk->state == TCP_CLOSE) return 0; 295 | 296 | struct sk_buff *skb; 297 | struct tcphdr *th; 298 | int rc = 0; 299 | 300 | skb = tcp_alloc_skb(0, 0); 301 | 302 | th = tcp_hdr(skb); 303 | th->fin = 1; 304 | th->ack = 1; 305 | 306 | rc = tcp_queue_transmit_skb(sk, skb); 307 | 308 | return rc; 309 | } 310 | 311 | void tcp_select_initial_window(uint32_t *rcv_wnd) 312 | { 313 | *rcv_wnd = 44477; 314 | } 315 | 316 | static void tcp_notify_user(struct sock *sk) 317 | { 318 | switch (sk->state) { 319 | case TCP_CLOSE_WAIT: 320 | wait_wakeup(&sk->sock->sleep); 321 | break; 322 | } 323 | } 324 | 325 | static void *tcp_connect_rto(void *arg) 326 | { 327 | struct tcp_sock *tsk = (struct tcp_sock *) arg; 328 | struct tcb *tcb = &tsk->tcb; 329 | struct sock *sk = &tsk->sk; 330 | 331 | socket_wr_acquire(sk->sock); 332 | tcp_release_rto_timer(tsk); 333 | 334 | if (sk->state == TCP_SYN_SENT) { 335 | if (tsk->backoff > TCP_CONN_RETRIES) { 336 | tsk->sk.err = -ETIMEDOUT; 337 | sk->poll_events |= (POLLOUT | POLLERR | POLLHUP); 338 | tcp_done(sk); 339 | } else { 340 | struct sk_buff *skb = write_queue_head(sk); 341 | 342 | if (skb) { 343 | skb_reset_header(skb); 344 | tcp_transmit_skb(sk, skb, tcb->snd_una); 345 | 346 | tsk->backoff++; 347 | tcp_rearm_rto_timer(tsk); 348 | } 349 | } 350 | } else { 351 | print_err("TCP connect RTO triggered even when not in SYNSENT\n"); 352 | } 353 | 354 | socket_release(sk->sock); 355 | 356 | return NULL; 357 | } 358 | 359 | static void *tcp_retransmission_timeout(void *arg) 360 | { 361 | struct tcp_sock *tsk = (struct tcp_sock *) arg; 362 | struct tcb *tcb = &tsk->tcb; 363 | struct sock *sk = &tsk->sk; 364 | 365 | socket_wr_acquire(sk->sock); 366 | 367 | tcp_release_rto_timer(tsk); 368 | 369 | struct sk_buff *skb = write_queue_head(sk); 370 | 371 | if (!skb) { 372 | tsk->backoff = 0; 373 | tcpsock_dbg("TCP RTO queue empty, notifying user", sk); 374 | tcp_notify_user(sk); 375 | goto unlock; 376 | } 377 | 378 | struct tcphdr *th = tcp_hdr(skb); 379 | skb_reset_header(skb); 380 | 381 | tcp_transmit_skb(sk, skb, tcb->snd_una); 382 | /* RFC 6298: 2.5 Maximum value MAY be placed on RTO, provided it is at least 383 | 60 seconds */ 384 | if (tsk->rto > 60000) { 385 | tcp_done(sk); 386 | 387 | tsk->sk.err = -ETIMEDOUT; 388 | sk->poll_events |= (POLLOUT | POLLERR | POLLHUP); 389 | 390 | socket_release(sk->sock); 391 | return NULL; 392 | } else { 393 | /* RFC 6298: Section 5.5 double RTO time */ 394 | tsk->rto *= 2; 395 | tsk->backoff++; 396 | tsk->retransmit = timer_add(tsk->rto, &tcp_retransmission_timeout, tsk); 397 | 398 | if (th->fin) { 399 | tcp_handle_fin_state(sk); 400 | } 401 | } 402 | 403 | unlock: 404 | socket_release(sk->sock); 405 | 406 | return NULL; 407 | } 408 | 409 | void tcp_rearm_rto_timer(struct tcp_sock *tsk) 410 | { 411 | struct sock *sk = &tsk->sk; 412 | tcp_release_rto_timer(tsk); 413 | 414 | if (sk->state == TCP_SYN_SENT) { 415 | tsk->retransmit = timer_add(TCP_SYN_BACKOFF << tsk->backoff, &tcp_connect_rto, tsk); 416 | } else { 417 | tsk->retransmit = timer_add(tsk->rto, &tcp_retransmission_timeout, tsk); 418 | } 419 | } 420 | 421 | int tcp_connect(struct sock *sk) 422 | { 423 | struct tcp_sock *tsk = tcp_sk(sk); 424 | struct tcb *tcb = &tsk->tcb; 425 | int rc = 0; 426 | 427 | tsk->tcp_header_len = sizeof(struct tcphdr); 428 | tcb->iss = generate_iss(); 429 | tcb->snd_wnd = 0; 430 | tcb->snd_wl1 = 0; 431 | 432 | tcb->snd_una = tcb->iss; 433 | tcb->snd_up = tcb->iss; 434 | tcb->snd_nxt = tcb->iss; 435 | tcb->rcv_nxt = 0; 436 | 437 | tcp_select_initial_window(&tsk->tcb.rcv_wnd); 438 | 439 | rc = tcp_send_syn(sk); 440 | tcb->snd_nxt++; 441 | 442 | return rc; 443 | } 444 | 445 | int tcp_send(struct tcp_sock *tsk, const void *buf, int len) 446 | { 447 | struct sk_buff *skb; 448 | struct tcphdr *th; 449 | int slen = len; 450 | int mss = tsk->smss; 451 | int dlen = 0; 452 | 453 | while (slen > 0) { 454 | dlen = slen > mss ? mss : slen; 455 | slen -= dlen; 456 | 457 | skb = tcp_alloc_skb(0, dlen); 458 | skb_push(skb, dlen); 459 | memcpy(skb->data, buf, dlen); 460 | 461 | buf += dlen; 462 | 463 | th = tcp_hdr(skb); 464 | th->ack = 1; 465 | 466 | if (slen == 0) { 467 | th->psh = 1; 468 | } 469 | 470 | if (tcp_queue_transmit_skb(&tsk->sk, skb) == -1) { 471 | perror("Error on TCP skb queueing"); 472 | } 473 | } 474 | 475 | tcp_rearm_user_timeout(&tsk->sk); 476 | 477 | return len; 478 | } 479 | 480 | int tcp_send_reset(struct tcp_sock *tsk) 481 | { 482 | struct sk_buff *skb; 483 | struct tcphdr *th; 484 | struct tcb *tcb; 485 | int rc = 0; 486 | 487 | skb = tcp_alloc_skb(0, 0); 488 | th = tcp_hdr(skb); 489 | tcb = &tsk->tcb; 490 | 491 | th->rst = 1; 492 | tcb->snd_una = tcb->snd_nxt; 493 | 494 | rc = tcp_transmit_skb(&tsk->sk, skb, tcb->snd_nxt); 495 | free_skb(skb); 496 | 497 | return rc; 498 | } 499 | 500 | int tcp_send_challenge_ack(struct sock *sk, struct sk_buff *skb) 501 | { 502 | // TODO: implement me 503 | return 0; 504 | } 505 | 506 | int tcp_queue_fin(struct sock *sk) 507 | { 508 | struct sk_buff *skb; 509 | struct tcphdr *th; 510 | int rc = 0; 511 | 512 | skb = tcp_alloc_skb(0, 0); 513 | th = tcp_hdr(skb); 514 | 515 | th->fin = 1; 516 | th->ack = 1; 517 | 518 | tcpsock_dbg("Queueing fin", sk); 519 | 520 | rc = tcp_queue_transmit_skb(sk, skb); 521 | 522 | return rc; 523 | } 524 | -------------------------------------------------------------------------------- /src/timer.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "timer.h" 3 | #include "socket.h" 4 | 5 | static LIST_HEAD(timers); 6 | static int tick = 0; 7 | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; 8 | static pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; 9 | 10 | #ifdef DEBUG_TIMER 11 | static void timer_debug() 12 | { 13 | struct list_head *item; 14 | int cnt = 0; 15 | 16 | pthread_mutex_lock(&lock); 17 | 18 | list_for_each(item, &timers) { 19 | cnt++; 20 | } 21 | 22 | pthread_mutex_unlock(&lock); 23 | 24 | print_debug("TIMERS: Total amount currently %d", cnt); 25 | } 26 | #else 27 | static void timer_debug() 28 | { 29 | return; 30 | } 31 | #endif 32 | 33 | static void timer_free(struct timer *t) 34 | { 35 | pthread_mutex_destroy(&t->lock); 36 | free(t); 37 | } 38 | 39 | static struct timer *timer_alloc() 40 | { 41 | struct timer *t = calloc(sizeof(struct timer), 1); 42 | pthread_mutex_init(&t->lock, NULL); 43 | 44 | return t; 45 | } 46 | 47 | static void timers_tick() 48 | { 49 | struct list_head *item, *tmp = NULL; 50 | struct timer *t = NULL; 51 | int rc = 0; 52 | 53 | if ((rc = pthread_mutex_lock(&lock)) != 0) { 54 | print_err("Timer tick lock not acquired: %s\n", strerror(rc)); 55 | return; 56 | }; 57 | 58 | list_for_each_safe(item, tmp, &timers) { 59 | if (!item) continue; 60 | 61 | t = list_entry(item, struct timer, list); 62 | 63 | if ((rc = pthread_mutex_trylock(&t->lock)) != 0) { 64 | if (rc != EBUSY) { 65 | print_err("Timer free mutex lock: %s\n", strerror(rc)); 66 | } 67 | 68 | continue; 69 | } 70 | 71 | if (!t->cancelled && t->expires < tick) { 72 | t->cancelled = 1; 73 | pthread_t th; 74 | pthread_create(&th, NULL, t->handler, t->arg); 75 | } 76 | 77 | if (t->cancelled && t->refcnt == 0) { 78 | list_del(&t->list); 79 | pthread_mutex_unlock(&t->lock); 80 | 81 | timer_free(t); 82 | } else { 83 | pthread_mutex_unlock(&t->lock); 84 | } 85 | } 86 | 87 | pthread_mutex_unlock(&lock); 88 | } 89 | 90 | void timer_oneshot(uint32_t expire, void *(*handler)(void *), void *arg) 91 | { 92 | struct timer *t = timer_alloc(); 93 | 94 | int tick = timer_get_tick(); 95 | 96 | t->refcnt = 0; 97 | t->expires = tick + expire; 98 | t->cancelled = 0; 99 | 100 | if (t->expires < tick) { 101 | print_err("ERR: Timer expiry integer wrap around\n"); 102 | } 103 | 104 | t->handler = handler; 105 | t->arg = arg; 106 | 107 | pthread_mutex_lock(&lock); 108 | list_add_tail(&t->list, &timers); 109 | pthread_mutex_unlock(&lock); 110 | } 111 | 112 | struct timer *timer_add(uint32_t expire, void *(*handler)(void *), void *arg) 113 | { 114 | struct timer *t = timer_alloc(); 115 | 116 | int tick = timer_get_tick(); 117 | 118 | t->refcnt = 1; 119 | t->expires = tick + expire; 120 | t->cancelled = 0; 121 | 122 | if (t->expires < tick) { 123 | print_err("ERR: Timer expiry integer wrap around\n"); 124 | } 125 | 126 | t->handler = handler; 127 | t->arg = arg; 128 | 129 | pthread_mutex_lock(&lock); 130 | list_add_tail(&t->list, &timers); 131 | pthread_mutex_unlock(&lock); 132 | 133 | return t; 134 | } 135 | 136 | void timer_release(struct timer *t) 137 | { 138 | int rc = 0; 139 | 140 | if (!t) return; 141 | 142 | if ((rc = pthread_mutex_lock(&t->lock)) != 0) { 143 | print_err("Timer release lock: %s\n", strerror(rc)); 144 | return; 145 | }; 146 | 147 | t->refcnt--; 148 | 149 | pthread_mutex_unlock(&t->lock); 150 | } 151 | 152 | void timer_cancel(struct timer *t) 153 | { 154 | int rc = 0; 155 | 156 | if (!t) return; 157 | 158 | if ((rc = pthread_mutex_lock(&t->lock)) != 0) { 159 | print_err("Timer cancel lock: %s\n", strerror(rc)); 160 | return; 161 | }; 162 | 163 | t->refcnt--; 164 | t->cancelled = 1; 165 | 166 | pthread_mutex_unlock(&t->lock); 167 | } 168 | 169 | void *timers_start() 170 | { 171 | while (1) { 172 | if (usleep(10000) != 0) { 173 | perror("Timer usleep"); 174 | } 175 | 176 | pthread_rwlock_wrlock(&rwlock); 177 | tick += 10; 178 | pthread_rwlock_unlock(&rwlock); 179 | timers_tick(); 180 | 181 | if (tick % 5000 == 0) { 182 | socket_debug(); 183 | timer_debug(); 184 | } 185 | } 186 | } 187 | 188 | int timer_get_tick() 189 | { 190 | int copy = 0; 191 | pthread_rwlock_rdlock(&rwlock); 192 | copy = tick; 193 | pthread_rwlock_unlock(&rwlock); 194 | return copy; 195 | } 196 | -------------------------------------------------------------------------------- /src/tuntap_if.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "utils.h" 3 | #include "basic.h" 4 | 5 | static int tun_fd; 6 | static char* dev; 7 | 8 | char *tapaddr = "10.0.0.5"; 9 | char *taproute = "10.0.0.0/24"; 10 | 11 | static int set_if_route(char *dev, char *cidr) 12 | { 13 | return run_cmd("ip route add dev %s %s", dev, cidr); 14 | } 15 | 16 | static int set_if_address(char *dev, char *cidr) 17 | { 18 | return run_cmd("ip address add dev %s local %s", dev, cidr); 19 | } 20 | 21 | static int set_if_up(char *dev) 22 | { 23 | return run_cmd("ip link set dev %s up", dev); 24 | } 25 | 26 | /* 27 | * Taken from Kernel Documentation/networking/tuntap.txt 28 | */ 29 | static int tun_alloc(char *dev) 30 | { 31 | struct ifreq ifr; 32 | int fd, err; 33 | 34 | if( (fd = open("/dev/net/tap", O_RDWR)) < 0 ) { 35 | perror("Cannot open TUN/TAP dev\n" 36 | "Make sure one exists with " 37 | "'$ mknod /dev/net/tap c 10 200'"); 38 | exit(1); 39 | } 40 | 41 | CLEAR(ifr); 42 | 43 | /* Flags: IFF_TUN - TUN device (no Ethernet headers) 44 | * IFF_TAP - TAP device 45 | * 46 | * IFF_NO_PI - Do not provide packet information 47 | */ 48 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 49 | if( *dev ) { 50 | strncpy(ifr.ifr_name, dev, IFNAMSIZ); 51 | } 52 | 53 | if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){ 54 | perror("ERR: Could not ioctl tun"); 55 | close(fd); 56 | return err; 57 | } 58 | 59 | strcpy(dev, ifr.ifr_name); 60 | return fd; 61 | } 62 | 63 | int tun_read(char *buf, int len) 64 | { 65 | return read(tun_fd, buf, len); 66 | } 67 | 68 | int tun_write(char *buf, int len) 69 | { 70 | return write(tun_fd, buf, len); 71 | } 72 | 73 | void tun_init() 74 | { 75 | dev = calloc(10, 1); 76 | tun_fd = tun_alloc(dev); 77 | 78 | if (set_if_up(dev) != 0) { 79 | print_err("ERROR when setting up if\n"); 80 | } 81 | 82 | if (set_if_route(dev, taproute) != 0) { 83 | print_err("ERROR when setting route for if\n"); 84 | } 85 | 86 | if (set_if_address(dev, tapaddr) != 0) { 87 | print_err("ERROR when setting addr for if\n"); 88 | } 89 | } 90 | 91 | void free_tun() 92 | { 93 | free(dev); 94 | } 95 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | #include "syshead.h" 2 | #include "utils.h" 3 | 4 | extern int debug; 5 | 6 | int run_cmd(char *cmd, ...) 7 | { 8 | va_list ap; 9 | char buf[CMDBUFLEN]; 10 | va_start(ap, cmd); 11 | vsnprintf(buf, CMDBUFLEN, cmd, ap); 12 | 13 | va_end(ap); 14 | 15 | if (debug) { 16 | printf("EXEC: %s\n", buf); 17 | } 18 | 19 | return system(buf); 20 | } 21 | 22 | uint32_t sum_every_16bits(void *addr, int count) 23 | { 24 | register uint32_t sum = 0; 25 | uint16_t * ptr = addr; 26 | 27 | while( count > 1 ) { 28 | /* This is the inner loop */ 29 | sum += * ptr++; 30 | count -= 2; 31 | } 32 | 33 | /* Add left-over byte, if any */ 34 | if( count > 0 ) 35 | sum += * (uint8_t *) ptr; 36 | 37 | return sum; 38 | } 39 | 40 | uint16_t checksum(void *addr, int count, int start_sum) 41 | { 42 | /* Compute Internet Checksum for "count" bytes 43 | * beginning at location "addr". 44 | * Taken from https://tools.ietf.org/html/rfc1071 45 | */ 46 | uint32_t sum = start_sum; 47 | 48 | sum += sum_every_16bits(addr, count); 49 | 50 | /* Fold 32-bit sum to 16 bits */ 51 | while (sum>>16) 52 | sum = (sum & 0xffff) + (sum >> 16); 53 | 54 | return ~sum; 55 | } 56 | 57 | int get_address(char *host, char *port, struct sockaddr *addr) 58 | { 59 | struct addrinfo hints; 60 | struct addrinfo *result, *rp; 61 | int s; 62 | 63 | memset(&hints, 0, sizeof(struct addrinfo)); 64 | hints.ai_family = AF_INET; 65 | hints.ai_socktype = SOCK_STREAM; 66 | 67 | s = getaddrinfo(host, port, &hints, &result); 68 | 69 | if (s != 0) { 70 | print_err("getaddrinfo: %s\n", gai_strerror(s)); 71 | exit(EXIT_FAILURE); 72 | } 73 | 74 | for (rp = result; rp != NULL; rp = rp->ai_next) { 75 | *addr = *rp->ai_addr; 76 | freeaddrinfo(result); 77 | return 0; 78 | } 79 | 80 | return 1; 81 | } 82 | 83 | uint32_t parse_ipv4_string(char* addr) { 84 | uint8_t addr_bytes[4]; 85 | sscanf(addr, "%hhu.%hhu.%hhu.%hhu", &addr_bytes[3], &addr_bytes[2], &addr_bytes[1], &addr_bytes[0]); 86 | return addr_bytes[0] | addr_bytes[1] << 8 | addr_bytes[2] << 16 | addr_bytes[3] << 24; 87 | } 88 | 89 | uint32_t min(uint32_t x, uint32_t y) { 90 | return x > y ? y : x; 91 | } 92 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Tests 2 | 3 | Level-IP test suites consist currently of end-to-end tests, where the Linux host's applications are used to test traffic flow. 4 | 5 | In the future, a separate unit/packet flow test framework could be integrated into the stack. 6 | 7 | # Usage 8 | 9 | In the project's root folder, run 10 | 11 | make test 12 | 13 | Or a specific test-suite 14 | 15 | ./suites/arp/suite-arp 16 | 17 | Root privileges are required. 18 | -------------------------------------------------------------------------------- /tests/suites/arp/suite-arp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/../../utils/common" 6 | 7 | function test_arp { 8 | arping -c3 -I tap0 10.0.0.4 >/dev/null 9 | } 10 | 11 | test_run "test_arp" "$0" 12 | -------------------------------------------------------------------------------- /tests/suites/icmp/suite-icmp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/../../utils/common" 6 | 7 | function test_ping { 8 | ping -c3 -I tap0 10.0.0.4 > /dev/null 9 | } 10 | 11 | test_run "test_ping" "$0" 12 | -------------------------------------------------------------------------------- /tests/suites/tcp/curl-fixture.txt: -------------------------------------------------------------------------------- 1 | HTTP/1.0 200 OK 2 | Server: 3 | Date: 4 | Content-type: text/html; charset=UTF-8 5 | Content-Length: 326 6 | 7 | 8 | Directory listing for / 9 | 10 |

Directory listing for /

11 |
12 | 18 |
19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/suites/tcp/env-delayed: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/../../utils/common" 6 | # Simple end-to-end test for an application's curl 7 | 8 | function strip_http_header { 9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/' 10 | } 11 | 12 | function setup { 13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8002 >/dev/null 2>&1 & 14 | httpserver="$!" 15 | 16 | tc class add dev tap0 parent 1: classid 1:1 htb rate 100mbit 17 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:1 match ip sport 8002 0xffff 18 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:1 match ip dport 8002 0xffff 19 | tc qdisc add dev tap0 parent 1:1 netem delay 2000ms 20 | 21 | sleep 5 22 | } 23 | 24 | function teardown_suite { 25 | kill "$httpserver" 26 | } 27 | 28 | trap teardown_suite EXIT ERR 29 | setup 30 | -------------------------------------------------------------------------------- /tests/suites/tcp/env-duplication: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/../../utils/common" 6 | # Simple end-to-end test for an application's curl 7 | 8 | function strip_http_header { 9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/' 10 | } 11 | 12 | function setup { 13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8003 >/dev/null 2>&1 & 14 | httpserver="$!" 15 | 16 | tc class add dev tap0 parent 1: classid 1:2 htb rate 100mbit 17 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:2 match ip sport 8003 0xffff 18 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:2 match ip dport 8003 0xffff 19 | tc qdisc add dev tap0 parent 1:2 netem duplicate 50% 20 | 21 | sleep 5 22 | } 23 | 24 | function teardown_suite { 25 | kill "$httpserver" 26 | } 27 | 28 | trap teardown_suite EXIT ERR 29 | setup 30 | -------------------------------------------------------------------------------- /tests/suites/tcp/env-lossy: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/../../utils/common" 6 | # Simple end-to-end test for an application's curl 7 | 8 | function strip_http_header { 9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/' 10 | } 11 | 12 | function setup { 13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8004 >/dev/null 2>&1 & 14 | httpserver="$!" 15 | 16 | tc class add dev tap0 parent 1: classid 1:3 htb rate 100mbit 17 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:3 match ip sport 8004 0xffff 18 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:3 match ip dport 8004 0xffff 19 | tc qdisc add dev tap0 parent 1:3 netem loss 25% 20 | 21 | sleep 5 22 | } 23 | 24 | function teardown_suite { 25 | kill "$httpserver" 26 | } 27 | 28 | trap teardown_suite EXIT ERR 29 | setup 30 | -------------------------------------------------------------------------------- /tests/suites/tcp/env-normal: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/../../utils/common" 6 | # Simple end-to-end test for an application's curl 7 | 8 | function strip_http_header { 9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/' 10 | } 11 | 12 | function setup { 13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8001 >/dev/null 2>&1 & 14 | httpserver="$!" 15 | 16 | sleep 5 17 | } 18 | 19 | function teardown_suite { 20 | kill "$httpserver" 21 | } 22 | 23 | setup 24 | trap teardown_suite EXIT ERR 25 | -------------------------------------------------------------------------------- /tests/suites/tcp/suite-curl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/env-normal" 6 | source "$(dirname $0)/tests" 8001 "$0" 7 | -------------------------------------------------------------------------------- /tests/suites/tcp/suite-packet-delay: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/env-delayed" 6 | source "$(dirname $0)/tests" 8002 "$0" 7 | -------------------------------------------------------------------------------- /tests/suites/tcp/suite-packet-duplication: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/env-duplication" 6 | source "$(dirname $0)/tests" 8003 "$0" 7 | -------------------------------------------------------------------------------- /tests/suites/tcp/suite-packet-loss: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "$(dirname $0)/env-lossy" 6 | source "$(dirname $0)/tests" 8004 "$0" 7 | -------------------------------------------------------------------------------- /tests/suites/tcp/tests: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | port="$1" 6 | suite="$2" 7 | 8 | function test_synchronous_http_get { 9 | response="$("$repo/tools/level-ip" "$repo/apps/curl/curl" 10.0.0.5 $port | strip_http_header)" 10 | 11 | diff "$folder/curl-fixture.txt" <(echo "$response") 12 | } 13 | 14 | function test_poll_http_get { 15 | response="$("$repo/tools/level-ip" "$repo/apps/curl-poll/curl-poll" 10.0.0.5 $port | strip_http_header)" 16 | 17 | diff "$folder/curl-fixture.txt" <(echo "$response") 18 | } 19 | 20 | function test_tcp_connection_refused { 21 | "$repo/tools/level-ip" "$repo/apps/curl/curl" 10.0.0.5 9999 2>&1 | grep -q "Connection refused" 22 | } 23 | 24 | test_run "test_synchronous_http_get" "$suite" 25 | test_run "test_poll_http_get" "$suite" 26 | test_run "test_tcp_connection_refused" "$suite" 27 | -------------------------------------------------------------------------------- /tests/test-run-all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | source "utils/common" 6 | 7 | function teardown { 8 | kill "$stackip" 9 | } 10 | 11 | trap teardown EXIT ERR 12 | 13 | start_stack 14 | echo "Started lvl-ip with pid $stackip" 15 | sleep 5 # wait for stack to establish itself 16 | 17 | pids="" 18 | 19 | tc qdisc add dev tap0 root handle 1: htb 20 | 21 | ./suites/arp/suite-arp & 22 | pids="$pids $!" 23 | ./suites/icmp/suite-icmp & 24 | pids="$pids $!" 25 | ./suites/tcp/suite-curl & 26 | pids="$pids $!" 27 | ./suites/tcp/suite-packet-delay & 28 | pids="$pids $!" 29 | ./suites/tcp/suite-packet-duplication & 30 | pids="$pids $!" 31 | ./suites/tcp/suite-packet-loss & 32 | pids="$pids $!" 33 | 34 | rc=0 35 | for i in $pids; do 36 | wait $i 37 | pid_rc="$?" 38 | [ "$pid_rc" -ne 0 ] && rc="$pid_rc" 39 | done 40 | 41 | grep -i -B50 "SUMMARY: ThreadSanitizer:" ../lvl-ip-test.log && echo "Possible threading errors found." 42 | 43 | echo 44 | [ "$rc" -eq 0 ] && echo "Tests pass." 45 | exit "$rc" 46 | -------------------------------------------------------------------------------- /tests/utils/common: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | repo="$(git rev-parse --show-toplevel)" 6 | folder="$(dirname $0)" 7 | 8 | function start_stack { 9 | "$repo/lvl-ip" > ../lvl-ip-test.log 2>&1 & 10 | stackip="$!" 11 | 12 | for i in {1..3}; do 13 | ping -c1 -w1 10.0.0.5 >/dev/null || continue 14 | 15 | return 0 16 | done 17 | 18 | echo "Stack did not start up correctly" >&2 19 | cat ../lvl-ip-test.log 20 | return 1 21 | } 22 | 23 | function test_pass { 24 | echo -e "\t$2 Test pass: $1" 25 | } 26 | 27 | function test_fail { 28 | echo -e "\t$2 Test fail: $1" 2>&1 29 | exit 1 30 | } 31 | 32 | function test_run { 33 | eval "$1" || test_fail "$1" "$2" 34 | 35 | test_pass "$1" "$2" 36 | } 37 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | CPPFLAGS = -I ../include -Wall -Werror 2 | 3 | all: liblevelip 4 | 5 | debug: CFLAGS+= -DDEBUG_API 6 | debug: liblevelip 7 | 8 | liblevelip: liblevelip.c 9 | $(CC) $(CFLAGS) $(CPPFLAGS) -fPIC -shared -o liblevelip.so liblevelip.c -ldl 10 | 11 | .PHONY: 12 | clean: 13 | rm liblevelip.so* 14 | -------------------------------------------------------------------------------- /tools/level-ip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | prog="$1" 6 | shift 7 | 8 | LD_PRELOAD="$(dirname $0)/liblevelip.so" "$prog" "$@" 9 | -------------------------------------------------------------------------------- /tools/liblevelip.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include "syshead.h" 3 | #include "liblevelip.h" 4 | #include "ipc.h" 5 | #include "list.h" 6 | #include "utils.h" 7 | 8 | #define RCBUF_LEN 512 9 | 10 | static int (*__start_main)(int (*main) (int, char * *, char * *), int argc, \ 11 | char * * ubp_av, void (*init) (void), void (*fini) (void), \ 12 | void (*rtld_fini) (void), void (* stack_end)); 13 | 14 | static int (*_fcntl)(int fildes, int cmd, ...) = NULL; 15 | static int (*_setsockopt)(int fd, int level, int optname, 16 | const void *optval, socklen_t optlen) = NULL; 17 | static int (*_getsockopt)(int fd, int level, int optname, 18 | const void *optval, socklen_t *optlen) = NULL; 19 | static int (*_read)(int sockfd, void *buf, size_t len) = NULL; 20 | static int (*_write)(int sockfd, const void *buf, size_t len) = NULL; 21 | static int (*_connect)(int sockfd, const struct sockaddr *addr, socklen_t addrlen) = NULL; 22 | static int (*_socket)(int domain, int type, int protocol) = NULL; 23 | static int (*_close)(int fildes) = NULL; 24 | static int (*_poll)(struct pollfd fds[], nfds_t nfds, int timeout) = NULL; 25 | static int (*_pollchk)(struct pollfd *__fds, nfds_t __nfds, int __timeout, 26 | __SIZE_TYPE__ __fdslen) = NULL; 27 | 28 | static int (*_ppoll)(struct pollfd *fds, nfds_t nfds, 29 | const struct timespec *tmo_p, const sigset_t *sigmask) = NULL; 30 | static int (*_select)(int nfds, fd_set *restrict readfds, 31 | fd_set *restrict writefds, fd_set *restrict errorfds, 32 | struct timeval *restrict timeout); 33 | static ssize_t (*_sendto)(int sockfd, const void *message, size_t length, 34 | int flags, const struct sockaddr *dest_addr, 35 | socklen_t dest_len) = NULL; 36 | static ssize_t (*_recvfrom)(int sockfd, void *buf, size_t len, 37 | int flags, struct sockaddr *restrict address, 38 | socklen_t *restrict addrlen) = NULL; 39 | static int (*_getpeername)(int socket, struct sockaddr *restrict address, 40 | socklen_t *restrict address_len) = NULL; 41 | static int (*_getsockname)(int socket, struct sockaddr *restrict address, 42 | socklen_t *restrict address_len) = NULL; 43 | 44 | static int lvlip_socks_count = 0; 45 | static LIST_HEAD(lvlip_socks); 46 | 47 | static inline struct lvlip_sock *lvlip_get_sock(int fd) { 48 | struct list_head *item; 49 | struct lvlip_sock *sock; 50 | 51 | list_for_each(item, &lvlip_socks) { 52 | sock = list_entry(item, struct lvlip_sock, list); 53 | 54 | if (sock->fd == fd) return sock; 55 | }; 56 | 57 | return NULL; 58 | }; 59 | 60 | static int is_socket_supported(int domain, int type, int protocol) 61 | { 62 | if (domain != AF_INET) return 0; 63 | 64 | if (!(type & SOCK_STREAM)) return 0; 65 | 66 | if (protocol != 0 && protocol != IPPROTO_TCP) return 0; 67 | 68 | return 1; 69 | } 70 | 71 | static int init_socket(char *sockname) 72 | { 73 | struct sockaddr_un addr; 74 | int ret; 75 | int data_socket; 76 | 77 | /* Create local socket. */ 78 | 79 | data_socket = _socket(AF_UNIX, SOCK_STREAM, 0); 80 | if (data_socket == -1) { 81 | perror("socket"); 82 | exit(EXIT_FAILURE); 83 | } 84 | 85 | /* 86 | * For portability clear the whole structure, since some 87 | * implementations have additional (nonstandard) fields in 88 | * the structure. 89 | */ 90 | 91 | memset(&addr, 0, sizeof(struct sockaddr_un)); 92 | 93 | /* Connect socket to socket address */ 94 | 95 | addr.sun_family = AF_UNIX; 96 | strncpy(addr.sun_path, sockname, sizeof(addr.sun_path) - 1); 97 | 98 | ret = _connect(data_socket, (const struct sockaddr *) &addr, 99 | sizeof(struct sockaddr_un)); 100 | if (ret == -1) { 101 | print_err("Error connecting to level-ip. Is it up?\n"); 102 | exit(EXIT_FAILURE); 103 | } 104 | 105 | return data_socket; 106 | } 107 | 108 | static int free_socket(int lvlfd) 109 | { 110 | return _close(lvlfd); 111 | } 112 | 113 | static int transmit_lvlip(int lvlfd, struct ipc_msg *msg, int msglen) 114 | { 115 | char *buf[RCBUF_LEN]; 116 | 117 | // Send mocked syscall to lvl-ip 118 | if (_write(lvlfd, (char *)msg, msglen) == -1) { 119 | perror("Error on writing IPC"); 120 | } 121 | 122 | // Read return value from lvl-ip 123 | if (_read(lvlfd, buf, RCBUF_LEN) == -1) { 124 | perror("Could not read IPC response"); 125 | } 126 | 127 | struct ipc_msg *response = (struct ipc_msg *) buf; 128 | 129 | if (response->type != msg->type || response->pid != msg->pid) { 130 | print_err("ERR: IPC msg response expected type %d, pid %d\n" 131 | " actual type %d, pid %d\n", 132 | msg->type, msg->pid, response->type, response->pid); 133 | return -1; 134 | } 135 | 136 | struct ipc_err *err = (struct ipc_err *) response->data; 137 | 138 | if (err->rc == -1) errno = err->err; 139 | 140 | return err->rc; 141 | } 142 | 143 | int socket(int domain, int type, int protocol) 144 | { 145 | if (!is_socket_supported(domain, type, protocol)) { 146 | return _socket(domain, type, protocol); 147 | } 148 | 149 | struct lvlip_sock *sock; 150 | 151 | int lvlfd = init_socket("/tmp/lvlip.socket"); 152 | 153 | sock = lvlip_alloc(); 154 | sock->lvlfd = lvlfd; 155 | list_add_tail(&sock->list, &lvlip_socks); 156 | lvlip_socks_count++; 157 | 158 | int pid = getpid(); 159 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_socket); 160 | 161 | struct ipc_msg *msg = alloca(msglen); 162 | msg->type = IPC_SOCKET; 163 | msg->pid = pid; 164 | 165 | struct ipc_socket usersock = { 166 | .domain = domain, 167 | .type = type, 168 | .protocol = protocol 169 | }; 170 | 171 | memcpy(msg->data, &usersock, sizeof(struct ipc_socket)); 172 | 173 | int sockfd = transmit_lvlip(sock->lvlfd, msg, msglen); 174 | 175 | if (sockfd == -1) { 176 | /* Socket alloc failed */ 177 | lvlip_free(sock); 178 | return -1; 179 | } 180 | 181 | sock->fd = sockfd; 182 | 183 | lvl_sock_dbg("Socket called", sock); 184 | 185 | return sockfd; 186 | } 187 | 188 | int close(int fd) 189 | { 190 | struct lvlip_sock *sock = lvlip_get_sock(fd); 191 | 192 | if (sock == NULL) { 193 | /* No lvl-ip IPC socket associated */ 194 | return _close(fd); 195 | } 196 | 197 | lvl_sock_dbg("Close called", sock); 198 | 199 | int pid = getpid(); 200 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_close); 201 | int rc = 0; 202 | 203 | struct ipc_msg *msg = alloca(msglen); 204 | msg->type = IPC_CLOSE; 205 | msg->pid = pid; 206 | 207 | struct ipc_close *payload = (struct ipc_close *)msg->data; 208 | payload->sockfd = fd; 209 | 210 | rc = transmit_lvlip(sock->lvlfd, msg, msglen); 211 | free_socket(sock->lvlfd); 212 | 213 | return rc; 214 | } 215 | 216 | int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) 217 | { 218 | struct lvlip_sock *sock = lvlip_get_sock(sockfd); 219 | 220 | if (sock == NULL) { 221 | /* No lvl-ip IPC socket associated */ 222 | return _connect(sockfd, addr, addrlen); 223 | } 224 | 225 | lvl_sock_dbg("Connect called", sock); 226 | 227 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_connect); 228 | int pid = getpid(); 229 | 230 | struct ipc_msg *msg = alloca(msglen); 231 | msg->type = IPC_CONNECT; 232 | msg->pid = pid; 233 | 234 | struct ipc_connect payload = { 235 | .sockfd = sockfd, 236 | .addr = *addr, 237 | .addrlen = addrlen 238 | }; 239 | 240 | memcpy(msg->data, &payload, sizeof(struct ipc_connect)); 241 | 242 | return transmit_lvlip(sock->lvlfd, msg, msglen); 243 | } 244 | 245 | ssize_t write(int sockfd, const void *buf, size_t len) 246 | { 247 | struct lvlip_sock *sock = lvlip_get_sock(sockfd); 248 | 249 | if (sock == NULL) { 250 | /* No lvl-ip IPC socket associated */ 251 | return _write(sockfd, buf, len); 252 | } 253 | 254 | lvl_sock_dbg("Write called", sock); 255 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_write) + len; 256 | int pid = getpid(); 257 | 258 | struct ipc_msg *msg = alloca(msglen); 259 | msg->type = IPC_WRITE; 260 | msg->pid = pid; 261 | 262 | struct ipc_write payload = { 263 | .sockfd = sockfd, 264 | .len = len 265 | }; 266 | 267 | memcpy(msg->data, &payload, sizeof(struct ipc_write)); 268 | memcpy(((struct ipc_write *)msg->data)->buf, buf, len); 269 | 270 | return transmit_lvlip(sock->lvlfd, msg, msglen); 271 | } 272 | 273 | ssize_t read(int sockfd, void *buf, size_t len) 274 | { 275 | struct lvlip_sock *sock = lvlip_get_sock(sockfd); 276 | 277 | if (sock == NULL) { 278 | /* No lvl-ip IPC socket associated */ 279 | return _read(sockfd, buf, len); 280 | } 281 | 282 | lvl_sock_dbg("Read called", sock); 283 | 284 | int pid = getpid(); 285 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_read); 286 | 287 | struct ipc_msg *msg = alloca(msglen); 288 | msg->type = IPC_READ; 289 | msg->pid = pid; 290 | 291 | struct ipc_read payload = { 292 | .sockfd = sockfd, 293 | .len = len 294 | }; 295 | 296 | memcpy(msg->data, &payload, sizeof(struct ipc_read)); 297 | 298 | // Send mocked syscall to lvl-ip 299 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) { 300 | perror("Error on writing IPC read"); 301 | } 302 | 303 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_read) + len; 304 | char rbuf[rlen]; 305 | memset(rbuf, 0, rlen); 306 | 307 | // Read return value from lvl-ip 308 | if (_read(sock->lvlfd, rbuf, rlen) == -1) { 309 | perror("Could not read IPC read response"); 310 | } 311 | 312 | struct ipc_msg *response = (struct ipc_msg *) rbuf; 313 | 314 | if (response->type != IPC_READ || response->pid != pid) { 315 | print_err("ERR: IPC read response expected: type %d, pid %d\n" 316 | " actual: type %d, pid %d\n", 317 | IPC_READ, pid, response->type, response->pid); 318 | return -1; 319 | } 320 | 321 | struct ipc_err *error = (struct ipc_err *) response->data; 322 | if (error->rc < 0) { 323 | errno = error->err; 324 | return error->rc; 325 | } 326 | 327 | struct ipc_read *data = (struct ipc_read *) error->data; 328 | if (len < data->len) { 329 | print_err("IPC read received len error: %lu\n", data->len); 330 | return -1; 331 | } 332 | 333 | memset(buf, 0, len); 334 | memcpy(buf, data->buf, data->len); 335 | 336 | return data->len; 337 | } 338 | 339 | ssize_t send(int fd, const void *buf, size_t len, int flags) 340 | { 341 | return sendto(fd, buf, len, flags, NULL, 0); 342 | } 343 | 344 | ssize_t sendto(int fd, const void *buf, size_t len, 345 | int flags, const struct sockaddr *dest_addr, 346 | socklen_t dest_len) 347 | { 348 | if (!lvlip_get_sock(fd)) return _sendto(fd, buf, len, 349 | flags, dest_addr, dest_len); 350 | 351 | return write(fd, buf, len); 352 | } 353 | 354 | ssize_t recv(int fd, void *buf, size_t len, int flags) 355 | { 356 | return recvfrom(fd, buf, len, flags, NULL, 0); 357 | } 358 | 359 | ssize_t recvfrom(int fd, void *restrict buf, size_t len, 360 | int flags, struct sockaddr *restrict address, 361 | socklen_t *restrict addrlen) 362 | { 363 | if (!lvlip_get_sock(fd)) return _recvfrom(fd, buf, len, 364 | flags, address, addrlen); 365 | 366 | return read(fd, buf, len); 367 | } 368 | 369 | int poll(struct pollfd *fds, nfds_t nfds, int timeout) 370 | { 371 | struct pollfd *kernel_fds[nfds]; 372 | struct pollfd *lvlip_fds[nfds]; 373 | int lvlip_nfds = 0; 374 | int kernel_nfds = 0; 375 | int lvlip_sock = 0; 376 | 377 | struct lvlip_sock *sock = NULL; 378 | 379 | for (int i = 0; i < nfds; i++) { 380 | struct pollfd *pfd = &fds[i]; 381 | if ((sock = lvlip_get_sock(pfd->fd)) != NULL) { 382 | lvlip_fds[lvlip_nfds++] = pfd; 383 | lvlip_sock = sock->lvlfd; 384 | } else { 385 | kernel_fds[kernel_nfds++] = pfd; 386 | } 387 | } 388 | 389 | int blocking = 0; 390 | if (kernel_nfds > 0 && lvlip_nfds > 0 && timeout == -1) { 391 | /* Cannot sleep indefinitely when we demux poll 392 | with both kernel and lvlip fds */ 393 | timeout = 100; 394 | blocking = 1; 395 | } 396 | 397 | lvl_dbg("Poll called with kernel_nfds %d lvlip_nfds %d timeout %d", kernel_nfds, lvlip_nfds, timeout); 398 | 399 | for (;;) { 400 | int events = 0; 401 | if (kernel_nfds > 0) { 402 | for (int i = 0; i < kernel_nfds; i++) { 403 | lvl_dbg("Kernel nfd %d events %d timeout %d", kernel_fds[i]->fd, kernel_fds[i]->events, timeout); 404 | } 405 | 406 | events = _poll(*kernel_fds, kernel_nfds, timeout); 407 | 408 | if (events == -1) { 409 | perror("Poll kernel error"); 410 | errno = EAGAIN; 411 | return -1; 412 | } 413 | } 414 | 415 | if (lvlip_nfds < 1) { 416 | return events; 417 | } 418 | 419 | int pid = getpid(); 420 | int pollfd_size = sizeof(struct ipc_pollfd); 421 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_poll) + pollfd_size * lvlip_nfds; 422 | struct ipc_msg *msg = alloca(msglen); 423 | 424 | msg->type = IPC_POLL; 425 | msg->pid = pid; 426 | 427 | struct ipc_poll *data = (struct ipc_poll *)msg->data; 428 | data->nfds = lvlip_nfds; 429 | data->timeout = timeout; 430 | 431 | struct ipc_pollfd *pfd = NULL; 432 | for (int i = 0; i < lvlip_nfds; i++) { 433 | pfd = &data->fds[i]; 434 | pfd->fd = lvlip_fds[i]->fd; 435 | pfd->events = lvlip_fds[i]->events; 436 | pfd->revents = lvlip_fds[i]->revents; 437 | } 438 | 439 | if (_write(lvlip_sock, (char *)msg, msglen) == -1) { 440 | perror("Error on writing IPC poll"); 441 | errno = EAGAIN; 442 | return -1; 443 | } 444 | 445 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + pollfd_size * lvlip_nfds; 446 | char rbuf[rlen]; 447 | memset(rbuf, 0, rlen); 448 | 449 | // Read return value from lvl-ip 450 | if (_read(lvlip_sock, rbuf, rlen) == -1) { 451 | perror("Could not read IPC poll response"); 452 | errno = EAGAIN; 453 | return -1; 454 | } 455 | 456 | struct ipc_msg *response = (struct ipc_msg *) rbuf; 457 | 458 | if (response->type != IPC_POLL || response->pid != pid) { 459 | print_err("ERR: IPC poll response expected: type %d, pid %d\n" 460 | " actual: type %d, pid %d\n", 461 | IPC_POLL, pid, response->type, response->pid); 462 | errno = EAGAIN; 463 | return -1; 464 | } 465 | 466 | struct ipc_err *error = (struct ipc_err *) response->data; 467 | if (error->rc < 0) { 468 | errno = error->err; 469 | print_err("Error on poll %d %s\n", error->rc, strerror(errno)); 470 | return error->rc; 471 | } 472 | 473 | struct ipc_pollfd *returned = (struct ipc_pollfd *) error->data; 474 | 475 | for (int i = 0; i < lvlip_nfds; i++) { 476 | lvlip_fds[i]->events = returned[i].events; 477 | lvlip_fds[i]->revents = returned[i].revents; 478 | } 479 | 480 | int result = events + error->rc; 481 | 482 | if (result > 0 || !blocking) { 483 | for (int i = 0; i < nfds; i++) { 484 | lvl_dbg("Returning counts %d nfd %d with revents %d events %d timeout %d", result, i, fds[i].revents, fds[i].events, timeout); 485 | } 486 | 487 | return result; 488 | } 489 | } 490 | 491 | print_err("Poll returning with -1\n"); 492 | return -1; 493 | } 494 | 495 | int __poll_chk (struct pollfd *__fds, nfds_t __nfds, int __timeout, 496 | __SIZE_TYPE__ __fdslen) 497 | { 498 | return poll(__fds, __nfds, __timeout); 499 | } 500 | 501 | int ppoll(struct pollfd *fds, nfds_t nfds, 502 | const struct timespec *tmo_p, const sigset_t *sigmask) 503 | { 504 | print_err("Ppoll called but not supported\n"); 505 | return -1; 506 | } 507 | 508 | int select(int nfds, fd_set *restrict readfds, 509 | fd_set *restrict writefds, fd_set *restrict errorfds, 510 | struct timeval *restrict timeout) 511 | { 512 | print_err("Select not implemented yet\n"); 513 | return _select(nfds, readfds, writefds, errorfds, timeout); 514 | } 515 | 516 | 517 | int setsockopt(int fd, int level, int optname, 518 | const void *optval, socklen_t optlen) 519 | { 520 | struct lvlip_sock *sock = lvlip_get_sock(fd); 521 | if (sock == NULL) return _setsockopt(fd, level, optname, optval, optlen); 522 | 523 | lvl_sock_dbg("Setsockopt called", sock); 524 | 525 | /* WARN: Setsockopt not supported yet */ 526 | 527 | return 0; 528 | } 529 | 530 | int getsockopt(int fd, int level, int optname, 531 | void *optval, socklen_t *optlen) 532 | { 533 | struct lvlip_sock *sock = lvlip_get_sock(fd); 534 | if (sock == NULL) return _getsockopt(fd, level, optname, optval, optlen); 535 | 536 | lvl_sock_dbg("Getsockopt called: level %d optname %d optval %d socklen %d", 537 | sock, level, optname, *(int *)optval, *(int *)optlen); 538 | 539 | int pid = getpid(); 540 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_sockopt) + *optlen; 541 | 542 | struct ipc_msg *msg = alloca(msglen); 543 | msg->type = IPC_GETSOCKOPT; 544 | msg->pid = pid; 545 | 546 | struct ipc_sockopt opts = { 547 | .fd = fd, 548 | .level = level, 549 | .optname = optname, 550 | .optlen = *optlen, 551 | }; 552 | 553 | memcpy(&opts.optval, optval, *optlen); 554 | memcpy(msg->data, &opts, sizeof(struct ipc_sockopt) + *optlen); 555 | 556 | // Send mocked syscall to lvl-ip 557 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) { 558 | perror("Error on writing IPC getsockopt"); 559 | } 560 | 561 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockopt) + *optlen; 562 | char rbuf[rlen]; 563 | memset(rbuf, 0, rlen); 564 | 565 | // Read return value from lvl-ip 566 | if (_read(sock->lvlfd, rbuf, rlen) == -1) { 567 | perror("Could not read IPC getsockopt response"); 568 | } 569 | 570 | struct ipc_msg *response = (struct ipc_msg *) rbuf; 571 | 572 | if (response->type != IPC_GETSOCKOPT || response->pid != pid) { 573 | print_err("ERR: IPC getsockopt response expected: type %d, pid %d\n" 574 | " actual: type %d, pid %d\n", 575 | IPC_GETSOCKOPT, pid, response->type, response->pid); 576 | return -1; 577 | } 578 | 579 | struct ipc_err *error = (struct ipc_err *) response->data; 580 | if (error->rc != 0) { 581 | errno = error->err; 582 | return error->rc; 583 | } 584 | 585 | struct ipc_sockopt *optres = (struct ipc_sockopt *) error->data; 586 | 587 | lvl_sock_dbg("Got getsockopt level %d optname %d optval %d socklen %d", 588 | sock, optres->level, optres->optname, *(int *)optres->optval, optres->optlen); 589 | 590 | int val = *(int *)optres->optval; 591 | 592 | /* lvl-ip probably encoded the error value as negative */ 593 | val *= -1; 594 | 595 | *(int *)optval = val; 596 | *optlen = optres->optlen; 597 | 598 | return 0; 599 | } 600 | 601 | int getpeername(int socket, struct sockaddr *restrict address, 602 | socklen_t *restrict address_len) 603 | { 604 | struct lvlip_sock *sock = lvlip_get_sock(socket); 605 | if (sock == NULL) return _getpeername(socket, address, address_len); 606 | 607 | lvl_sock_dbg("Getpeername called", sock); 608 | 609 | int pid = getpid(); 610 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_sockname); 611 | 612 | struct ipc_msg *msg = alloca(msglen); 613 | msg->type = IPC_GETPEERNAME; 614 | msg->pid = pid; 615 | 616 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data; 617 | name->socket = socket; 618 | 619 | // Send mocked syscall to lvl-ip 620 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) { 621 | perror("Error on writing IPC getpeername"); 622 | } 623 | 624 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname); 625 | char rbuf[rlen]; 626 | memset(rbuf, 0, rlen); 627 | 628 | // Read return value from lvl-ip 629 | if (_read(sock->lvlfd, rbuf, rlen) == -1) { 630 | perror("Could not read IPC getpeername response"); 631 | } 632 | 633 | struct ipc_msg *response = (struct ipc_msg *) rbuf; 634 | 635 | if (response->type != IPC_GETPEERNAME || response->pid != pid) { 636 | print_err("ERR: IPC getpeername response expected: type %d, pid %d\n" 637 | " actual: type %d, pid %d\n", 638 | IPC_GETPEERNAME, pid, response->type, response->pid); 639 | return -1; 640 | } 641 | 642 | struct ipc_err *error = (struct ipc_err *) response->data; 643 | if (error->rc != 0) { 644 | errno = error->err; 645 | return error->rc; 646 | } 647 | 648 | struct ipc_sockname *nameres = (struct ipc_sockname *) error->data; 649 | 650 | lvl_sock_dbg("Got getpeername fd %d addrlen %d sa_data %p", 651 | sock, nameres->socket, nameres->address_len, nameres->sa_data); 652 | 653 | if (nameres->socket != socket) { 654 | print_err("Got socket %d but requested %d\n", nameres->socket, socket); 655 | } 656 | 657 | *address_len = nameres->address_len; 658 | memcpy(address, nameres->sa_data, nameres->address_len); 659 | 660 | return 0; 661 | } 662 | 663 | int getsockname(int socket, struct sockaddr *restrict address, 664 | socklen_t *restrict address_len) 665 | { 666 | struct lvlip_sock *sock = lvlip_get_sock(socket); 667 | if (sock == NULL) return _getsockname(socket, address, address_len); 668 | 669 | lvl_sock_dbg("Getsockname called", sock); 670 | 671 | int pid = getpid(); 672 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_sockname); 673 | 674 | struct ipc_msg *msg = alloca(msglen); 675 | msg->type = IPC_GETSOCKNAME; 676 | msg->pid = pid; 677 | 678 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data; 679 | name->socket = socket; 680 | 681 | // Send mocked syscall to lvl-ip 682 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) { 683 | perror("Error on writing IPC getsockname"); 684 | } 685 | 686 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname); 687 | char rbuf[rlen]; 688 | memset(rbuf, 0, rlen); 689 | 690 | // Read return value from lvl-ip 691 | if (_read(sock->lvlfd, rbuf, rlen) == -1) { 692 | perror("Could not read IPC getsockname response"); 693 | } 694 | 695 | struct ipc_msg *response = (struct ipc_msg *) rbuf; 696 | 697 | if (response->type != IPC_GETSOCKNAME || response->pid != pid) { 698 | print_err("ERR: IPC getsockname response expected: type %d, pid %d\n" 699 | " actual: type %d, pid %d\n", 700 | IPC_GETSOCKNAME, pid, response->type, response->pid); 701 | return -1; 702 | } 703 | 704 | struct ipc_err *error = (struct ipc_err *) response->data; 705 | if (error->rc != 0) { 706 | errno = error->err; 707 | return error->rc; 708 | } 709 | 710 | struct ipc_sockname *nameres = (struct ipc_sockname *) error->data; 711 | 712 | lvl_sock_dbg("Got getsockname fd %d addrlen %d sa_data %p", 713 | sock, nameres->socket, nameres->address_len, nameres->sa_data); 714 | 715 | if (nameres->socket != socket) { 716 | print_err("Got socket %d but requested %d\n", nameres->socket, socket); 717 | } 718 | 719 | *address_len = nameres->address_len; 720 | memcpy(address, nameres->sa_data, nameres->address_len); 721 | 722 | return 0; 723 | } 724 | 725 | int fcntl(int fildes, int cmd, ...) 726 | { 727 | int rc = -1; 728 | va_list ap; 729 | void *arg; 730 | 731 | struct lvlip_sock *sock = lvlip_get_sock(fildes); 732 | 733 | if (!sock) { 734 | va_start(ap, cmd); 735 | arg = va_arg(ap, void *); 736 | va_end(ap); 737 | 738 | return _fcntl(fildes, cmd, arg); 739 | } 740 | 741 | lvl_sock_dbg("Fcntl called", sock); 742 | 743 | int pid = getpid(); 744 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_fcntl) + sizeof(struct flock) + sizeof(int); 745 | struct ipc_msg *msg = alloca(msglen); 746 | 747 | msg->type = IPC_FCNTL; 748 | msg->pid = pid; 749 | 750 | struct ipc_fcntl *fc = (struct ipc_fcntl *)msg->data; 751 | fc->sockfd = fildes; 752 | fc->cmd = cmd; 753 | 754 | switch (cmd) { 755 | case F_GETFL: 756 | lvl_sock_dbg("Fcntl GETFL", sock); 757 | 758 | rc = transmit_lvlip(sock->lvlfd, msg, msglen); 759 | break; 760 | case F_SETFL: 761 | lvl_sock_dbg("Fcntl SETFL", sock); 762 | 763 | va_start(ap, cmd); 764 | 765 | int flags = va_arg(ap, int); 766 | memcpy(fc->data, &flags, sizeof(int)); 767 | 768 | va_end(ap); 769 | 770 | rc = transmit_lvlip(sock->lvlfd, msg, msglen); 771 | break; 772 | default: 773 | rc = -1; 774 | errno = EINVAL; 775 | break; 776 | } 777 | 778 | return rc; 779 | } 780 | 781 | int __libc_start_main(int (*main) (int, char * *, char * *), int argc, 782 | char * * ubp_av, void (*init) (void), void (*fini) (void), 783 | void (*rtld_fini) (void), void (* stack_end)) 784 | { 785 | __start_main = dlsym(RTLD_NEXT, "__libc_start_main"); 786 | 787 | _sendto = dlsym(RTLD_NEXT, "sendto"); 788 | _recvfrom = dlsym(RTLD_NEXT, "recvfrom"); 789 | _poll = dlsym(RTLD_NEXT, "poll"); 790 | _ppoll = dlsym(RTLD_NEXT, "ppoll"); 791 | _pollchk = dlsym(RTLD_NEXT, "__poll_chk"); 792 | _select = dlsym(RTLD_NEXT, "select"); 793 | _fcntl = dlsym(RTLD_NEXT, "fcntl"); 794 | _setsockopt = dlsym(RTLD_NEXT, "setsockopt"); 795 | _getsockopt = dlsym(RTLD_NEXT, "getsockopt"); 796 | _read = dlsym(RTLD_NEXT, "read"); 797 | _write = dlsym(RTLD_NEXT, "write"); 798 | _connect = dlsym(RTLD_NEXT, "connect"); 799 | _socket = dlsym(RTLD_NEXT, "socket"); 800 | _close = dlsym(RTLD_NEXT, "close"); 801 | _getpeername = dlsym(RTLD_NEXT, "getpeername"); 802 | _getsockname = dlsym(RTLD_NEXT, "getsockname"); 803 | 804 | list_init(&lvlip_socks); 805 | 806 | return __start_main(main, argc, ubp_av, init, fini, rtld_fini, stack_end); 807 | } 808 | -------------------------------------------------------------------------------- /tools/liblevelip.h: -------------------------------------------------------------------------------- 1 | #ifndef LIBLEVELIP_H_ 2 | #define LIBLEVELIP_H_ 3 | 4 | #include 5 | #include 6 | #include "list.h" 7 | #include "utils.h" 8 | 9 | #ifdef DEBUG_API 10 | #define lvl_dbg(msg, ...) \ 11 | do { \ 12 | print_debug("lvlip ttid %lu "msg, pthread_self(), ##__VA_ARGS__); \ 13 | } while (0) 14 | #define lvl_sock_dbg(msg, sock, ...) \ 15 | do { \ 16 | lvl_dbg("lvlfd %d fd %d: "msg, sock->lvlfd, sock->fd, ##__VA_ARGS__); \ 17 | } while (0) 18 | #else 19 | #define lvl_sock_dbg(msg, sock, ...) 20 | #define lvl_dbg(msg, ...) 21 | #endif 22 | 23 | struct lvlip_sock { 24 | struct list_head list; 25 | int lvlfd; /* For Level-IP IPC */ 26 | int fd; 27 | }; 28 | 29 | static inline struct lvlip_sock *lvlip_alloc() { 30 | struct lvlip_sock *sock = malloc(sizeof(struct lvlip_sock)); 31 | memset(sock, 0, sizeof(struct lvlip_sock)); 32 | 33 | return sock; 34 | }; 35 | 36 | static inline void lvlip_free(struct lvlip_sock *sock) { 37 | free(sock); 38 | } 39 | 40 | #endif 41 | --------------------------------------------------------------------------------