├── .github
└── ISSUE_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── Documentation
├── development.md
└── getting-started.md
├── LICENSE.md
├── Makefile
├── README.md
├── apps
├── curl-poll
│ ├── Makefile
│ └── curl-poll.c
└── curl
│ ├── Makefile
│ └── curl.c
├── build
└── BUILD
├── include
├── arp.h
├── basic.h
├── cli.h
├── dst.h
├── ethernet.h
├── icmpv4.h
├── inet.h
├── ip.h
├── ipc.h
├── list.h
├── netdev.h
├── route.h
├── skbuff.h
├── sock.h
├── socket.h
├── syshead.h
├── tcp.h
├── tcp_data.h
├── timer.h
├── tuntap_if.h
├── utils.h
└── wait.h
├── src
├── arp.c
├── cli.c
├── dst.c
├── icmpv4.c
├── inet.c
├── ip_input.c
├── ip_output.c
├── ipc.c
├── main.c
├── netdev.c
├── route.c
├── skbuff.c
├── sock.c
├── socket.c
├── tcp.c
├── tcp_data.c
├── tcp_input.c
├── tcp_output.c
├── timer.c
├── tuntap_if.c
└── utils.c
├── tests
├── README.md
├── suites
│ ├── arp
│ │ └── suite-arp
│ ├── icmp
│ │ └── suite-icmp
│ └── tcp
│ │ ├── curl-fixture.txt
│ │ ├── env-delayed
│ │ ├── env-duplication
│ │ ├── env-lossy
│ │ ├── env-normal
│ │ ├── suite-curl
│ │ ├── suite-packet-delay
│ │ ├── suite-packet-duplication
│ │ ├── suite-packet-loss
│ │ └── tests
├── test-run-all
└── utils
│ └── common
└── tools
├── Makefile
├── level-ip
├── liblevelip.c
└── liblevelip.h
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # Describe your issue
2 |
3 | - Is this a bug report or a feature request?
4 | - Describe the issue
5 | - What is the expected behaviour?
6 | - How to reproduce the problem?
7 |
8 | # Describe your running environment
9 |
10 | This is important for troubleshooting. Please attach at least the following info:
11 |
12 | - **OS** (e.g. `cat /etc/os-release`):
13 | - **Kernel** (e.g. `uname -a`):
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | build/*.o
3 | a.out
4 | *.o
5 | *cscope*
6 | lvl-ip
7 | tests/venv
8 | vgcore*
9 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 |
3 | language: c
4 |
5 | compiler:
6 | - clang
7 |
8 | before_install:
9 | - sudo apt-get -qq update
10 | - sudo apt-get install -qqy iputils-arping libcap-dev libcap2-bin
11 |
12 | script:
13 | - sudo mknod /dev/net/tap c 10 200
14 | - sudo chmod 0666 /dev/net/tap
15 | - sudo setcap cap_net_admin=ep /bin/ip
16 | - make test
17 |
--------------------------------------------------------------------------------
/Documentation/development.md:
--------------------------------------------------------------------------------
1 | # Development
2 |
3 | Level-IP is at a very alpha stage, has many hardcoded values and is not really intuitive to develop on.
4 |
5 | This document aims to provide information on the current features, roadmap and overall development routine.
6 |
7 | # Debugging
8 |
9 | Build Level-IP with `make debug`. It adds debug symbols and by default, enables Google's Address Sanitizer.
10 |
11 | ## Debug Output
12 |
13 | When built with `make debug`, `lvl-ip` becomes chatty and outputs debug statements. You can enable/disable different component debug output with macros defined in headers.
14 |
15 | For example, enabling socket-specific output:
16 |
17 | make clean
18 | CFLAGS+=-DDEBUG_SOCKET make debug
19 |
20 | ## Debugging Networking
21 |
22 | Use `tcpdump` with the IP address you're using, e.g.:
23 |
24 | $ tcpdump -i any host 10.0.0.4 -n
25 | IP 10.0.0.4.12000 > 10.0.0.5.8000: Flags [S], seq 1525252, win 512, length 0
26 | IP 10.0.0.5.8000 > 10.0.0.4.12000: Flags [S.], seq 1332068674, ack 1525253, win 29200, options [mss 1460], length 0
27 | IP 10.0.0.4.12000 > 10.0.0.5.8000: Flags [.], ack 1, win 512, length 0
28 |
29 | Together with the verbose `lvl-ip` output, you can troubleshoot behaviour and spot patterns.
30 |
31 | ## Tracing Program Code
32 |
33 | Simply run `gdb` with your favorite interface.
34 |
35 | Refer to https://sourceware.org/gdb/current/onlinedocs/gdb/Threads.html for debugging with threads.
36 |
37 | ## Debugging Memory Allocation and Use
38 |
39 | One of the useful debugging aids so far has been Address Sanitizer by Google. It is built in to newer GCC versions, and it is activated with `-fsanitize=address`. Sure enough, `make debug` enables this by default.
40 |
41 | https://github.com/google/sanitizers/wiki/AddressSanitizer
42 |
43 | ## Debugging Concurrency
44 |
45 | Level-IP uses multiple threads with shared data structures, therefore programming errors like race conditions are easy to introduce.
46 |
47 | Thread Sanitizer by Google is also built in to newer GCCs, which helps pinpointing concurrent access to variables without proper guards.
48 |
49 | https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual
50 |
51 | # Coding Style
52 |
53 | The foremost aim of Level-IP is to be an educational project on networking. Hence, source code readability should be focused on when developing Level-IP.
54 |
55 | TODO: Actual style guidelines, so far I have been just winging it.
56 |
57 | # Features
58 |
59 | First and foremost, Level-IP aims to be just an introduction to TCP/IP stacks. Hence, convenient features are prioritized over e.g. raw performance improvements.
60 |
61 | ## Current Features
62 |
63 | * One hardcoded interface/netdev (IP 10.0.0.4)
64 | * One hardcoded socket
65 | * Ethernet II frame handling
66 | * ARP request/reply, simple caching
67 | * ICMP pings and replies
68 | * IPv4 packet handling, checksum
69 | * One hardcoded route table with default netdevice
70 | * TCPv4 Handshake
71 | * TCP data transmission
72 | * TCP RFC793 "Segment Arrives"
73 | * TCP RFC6298 Retransmission calculation
74 | * TCP RFC793 User Timeout
75 |
76 | ## Upcoming features
77 |
78 | * IP Fragmentation
79 | * IP/ICMP Diagnostics
80 | * TCP Window Management
81 | * TCP Silly Window Syndrome Avoidance
82 | * TCP Zero-Window Probes
83 | * TCP Congestion Control
84 | * TCP Selective Acknowledgments (SACK)
85 | * Server socket API calls (bind, accept...)
86 | * Raw Socket (for arping, ping..)
87 | * 'select' socket API call
88 | * ...
89 |
--------------------------------------------------------------------------------
/Documentation/getting-started.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | Level-IP is a TCP/IP stack that is run as a single daemon process on your Linux host. Networking is achieved by configuring your Linux host to forward packets to/from Level-IP.
4 |
5 | To interface applications against Level-IP, a wrapper library for standard libc calls is provided. This wrapper can then be used with existing binaries such as `curl`, `surf` and `firefox` to redirect communications to Level-IP.
6 |
7 | DISCLAIMER: Level-IP is not a production-ready networking stack, and does not intend to be one. The nature of lower-level networking imposes a great responsiblity to the software and any security vulnerabilities can be disastrous. Hence, do not run Level-IP for extended periods of time, purely because it has bugs (and as all software, will continue to have them).
8 |
9 | # Building
10 |
11 | Standard `make` stuff.
12 |
13 | $ make all
14 |
15 | This builds `lvl-ip` itself, but also the libc wrapper and provided example applications.
16 |
17 | When building, `sudo setcap ...` probably asks super user permissions from you. This is because `lvl-ip` needs the `CAP_NET_ADMIN` capability to setup itself. After the setup, it drops that capability.
18 |
19 | Currently, `lvl-ip` also configures the tap interface through the `ip` tool. Hence, give it permissions too:
20 |
21 | $ which ip
22 | /usr/bin/ip
23 | $ sudo setcap cap_net_admin=ep /usr/bin/ip
24 |
25 | # Setup
26 |
27 | Level-IP uses a Linux TAP device to communicate to the outside world. In short, the tap device is initialized in the host Linux' networking stack, and `lvl-ip` can then read the L2 frames:
28 |
29 | $ sudo mknod /dev/net/tap c 10 200
30 | $ sudo chmod 0666 /dev/net/tap
31 |
32 | In essence, `lvl-ip` operates as a host inside the tap device's subnet. Therefore, in order to communicate with other hosts, the tap device needs to be set in a forwarding mode:
33 |
34 | An example from my (Arch) Linux machine, where `wlp2s0` is my outgoing interface, and `tap0` is the tap device for `lvl-ip`:
35 |
36 | $ sysctl -w net.ipv4.ip_forward=1
37 | $ iptables -I INPUT --source 10.0.0.0/24 -j ACCEPT
38 | $ iptables -t nat -I POSTROUTING --out-interface wlp2s0 -j MASQUERADE
39 | $ iptables -I FORWARD --in-interface wlp2s0 --out-interface tap0 -j ACCEPT
40 | $ iptables -I FORWARD --in-interface tap0 --out-interface wlp2s0 -j ACCEPT
41 |
42 | Now, packets coming from `lvl-ip` (10.0.0.4/24 in this case) should be NATed by the host Linux interfaces and traverse the FORWARD chain correctly to the host's outgoing gateway.
43 |
44 | See http://www.netfilter.org/documentation/HOWTO/packet-filtering-HOWTO-9.html for more info.
45 |
46 | # Usage
47 |
48 | When you've built lvl-ip and setup your host stack to forward packets, you can try communicating to the Internet:
49 |
50 | $ ./lvl-ip
51 |
52 | The userspace TCP/IP stack should start. Now, first test communications with the provided applications:
53 |
54 | $ cd tools
55 | $ ./level-ip ../apps/curl/curl google.com 80
56 |
57 | `./level-ip` is just a bash-script that allows `liblevelip.so` to take precedence over the libc socket API calls.
58 |
59 | The important point is that `./level-ip` aims to be usable against any existing dynamically-linked application. Let's try the _real_ `curl`:
60 |
61 | [saminiir@localhost tools]$ curl --version
62 | curl 7.50.0 (x86_64-pc-linux-gnu) libcurl/7.50.0 OpenSSL/1.0.2h zlib/1.2.8 libidn/1.33 libssh2/1.7.0
63 | Protocols: dict file ftp ftps gopher http https imap imaps pop3 pop3s rtsp scp sftp smb smbs smtp smtps telnet tftp
64 | Features: AsynchDNS IDN IPv6 Largefile GSS-API Kerberos SPNEGO NTLM NTLM_WB SSL libz TLS-SRP UnixSockets
65 | [saminiir@localhost tools]$ curl google.com
66 |
67 | 302 Moved
68 | 302 Moved
69 | The document has moved
70 | here.
71 |
72 |
73 | And instead of using the Linux' TCP/IP stack, let's try it with `lvl-ip`:
74 |
75 | [saminiir@localhost tools]$ ./level-ip curl google.com
76 |
77 | 302 Moved
78 | 302 Moved
79 | The document has moved
80 | here.
81 |
82 |
83 | The result is exactly the same. Under the hood, however, `curl` calls the libc socket API but these calls are redirected to `lvl-ip` instead.
84 |
85 | Try browsing the Web, with Level-IP doing the packet transfer:
86 |
87 | [saminiir@localhost tools]$ firefox --version
88 | Mozilla Firefox 47.0.1
89 | [saminiir@localhost tools]$ ./level-ip firefox google.com
90 |
91 | That's it!
92 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Sami Niiranen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CPPFLAGS = -I include -Wall -Werror -pthread
2 |
3 | src = $(wildcard src/*.c)
4 | obj = $(patsubst src/%.c, build/%.o, $(src))
5 | headers = $(wildcard include/*.h)
6 | apps = apps/curl/curl
7 |
8 | lvl-ip: $(obj)
9 | $(CC) $(CFLAGS) $(CPPFLAGS) $(obj) -o lvl-ip
10 | @echo
11 | @echo "lvl-ip needs CAP_NET_ADMIN:"
12 | sudo setcap cap_setpcap,cap_net_admin=ep lvl-ip
13 |
14 | build/%.o: src/%.c ${headers}
15 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
16 |
17 | debug: CFLAGS+= -DDEBUG_SOCKET -DDEBUG_TCP -g -fsanitize=thread
18 | debug: lvl-ip
19 |
20 | apps: $(apps)
21 | $(MAKE) -C tools
22 | $(MAKE) -C apps/curl
23 | $(MAKE) -C apps/curl-poll
24 |
25 | all: lvl-ip apps
26 |
27 | test: debug apps
28 | @echo
29 | @echo "Networking capabilites are required for test dependencies:"
30 | which arping | sudo xargs setcap cap_net_raw=ep
31 | which tc | sudo xargs setcap cap_net_admin=ep
32 | @echo
33 | cd tests && ./test-run-all
34 |
35 | clean:
36 | rm build/*.o lvl-ip
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Level-IP [](https://travis-ci.org/saminiir/level-ip)
2 |
3 | Level-IP is a Linux userspace TCP/IP stack, implemented with TUN/TAP devices.
4 |
5 | The main goals are to:
6 | * Learn TCP/IP
7 | * Learn Linux systems/network programming
8 | * Learn Linux Socket API
9 |
10 | The results of the learning experience will be accompanied by explanatory blog posts:
11 |
12 | - Part 1, Ethernet & ARP: http://www.saminiir.com/lets-code-tcp-ip-stack-1-ethernet-arp
13 | - Part 2, IPv4 & ICMPv4: http://www.saminiir.com/lets-code-tcp-ip-stack-2-ipv4-icmpv4
14 | - Part 3, TCP Basics & Handshake: http://www.saminiir.com/lets-code-tcp-ip-stack-3-tcp-handshake/
15 | - Part 4, TCP Data Flow & Socket API: http://www.saminiir.com/lets-code-tcp-ip-stack-4-tcp-data-flow-socket-api/
16 | - Part 5, TCP Retransmission: http://www.saminiir.com/lets-code-tcp-ip-stack-5-tcp-retransmission/
17 |
18 | See [Getting Started](Documentation/getting-started.md).
19 |
20 | For development documentation, start with [Development](Documentation/development.md).
21 |
22 | # Reference works
23 |
24 | * Linux kernel TCP/IP stack, [source code](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/net/ipv4)
25 | * picoTCP, [source code](https://github.com/tass-belgium/picotcp)
26 | * Xiaochen Wang's TCP/IP stack, [source code](https://github.com/chobits/tapip)
27 |
28 | # License
29 |
30 | See [LICENSE.md](LICENSE.md) (MIT)
31 |
--------------------------------------------------------------------------------
/apps/curl-poll/Makefile:
--------------------------------------------------------------------------------
1 | curl: curl-poll.c
2 | $(CC) curl-poll.c -o curl-poll
3 |
--------------------------------------------------------------------------------
/apps/curl-poll/curl-poll.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #define MAX_HOSTNAME 50
13 | #define RLEN 4096
14 |
15 | int get_address(char *host, char *port, struct sockaddr *addr)
16 | {
17 | struct addrinfo hints;
18 | struct addrinfo *result, *rp;
19 | int s;
20 |
21 | memset(&hints, 0, sizeof(struct addrinfo));
22 | hints.ai_family = AF_INET;
23 | hints.ai_socktype = SOCK_STREAM;
24 |
25 | s = getaddrinfo(host, port, &hints, &result);
26 |
27 | if (s != 0) {
28 | fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(s));
29 | exit(EXIT_FAILURE);
30 | }
31 |
32 | for (rp = result; rp != NULL; rp = rp->ai_next) {
33 | *addr = *rp->ai_addr;
34 | freeaddrinfo(result);
35 | return 0;
36 | }
37 |
38 | return 1;
39 | }
40 |
41 | int main(int argc, char **argv)
42 | {
43 | if (argc != 3 || strnlen(argv[1], MAX_HOSTNAME) == MAX_HOSTNAME) {
44 | fprintf(stderr, "Curl called but HOST or PORT not given or invalid\n");
45 | return 1;
46 | }
47 |
48 | struct sockaddr addr;
49 | int sock;
50 |
51 | if (strnlen(argv[2], 6) == 6) {
52 | fprintf(stderr, "Curl called but PORT malformed\n");
53 | return 1;
54 | }
55 |
56 | if (get_address(argv[1], argv[2], &addr) != 0) {
57 | fprintf(stderr, "Curl could not resolve hostname\n");
58 | return 1;
59 | }
60 |
61 | sock = socket(AF_INET, SOCK_STREAM, 0);
62 |
63 | if (fcntl(sock, F_SETFL, O_NONBLOCK) == -1) {
64 | perror("Setting socket nonblocking");
65 | return 1;
66 | }
67 |
68 | if (connect(sock, &addr, 16) == -1) {
69 | if (errno != EINPROGRESS) {
70 | perror("Curl could not establish connection");
71 | return 1;
72 | }
73 | }
74 |
75 | struct pollfd fds[1];
76 | fds[0].fd = sock;
77 | fds[0].events = POLLOUT;
78 |
79 | int ret = poll(fds, 1, -1);
80 |
81 | if (ret < 1) {
82 | perror("Poll failed");
83 | return 1;
84 | }
85 |
86 | assert(fds[0].revents & POLLOUT);
87 |
88 | char str[512];
89 |
90 | snprintf(str, 512, "GET / HTTP/1.1\r\nHost: %s:%s\r\nConnection: close\r\n\r\n", argv[1], argv[2]);
91 | int len = strlen(str);
92 |
93 | if (write(sock, str, len) != len) {
94 | perror("Write error");
95 | return 1;
96 | }
97 |
98 | int rlen = 0;
99 | while (1) {
100 | fds[0].events = POLLIN;
101 |
102 | ret = poll(fds, 1, -1);
103 |
104 | if (ret < 0) {
105 | perror("Poll failed");
106 | return 1;
107 | }
108 |
109 | if (fds[0].revents & POLLIN) {
110 | char buf[RLEN] = { 0 };
111 |
112 | if ((rlen = read(sock, buf, RLEN)) == -1) {
113 | perror("Read error");
114 | return 1;
115 | }
116 |
117 | if (rlen == 0) {
118 | /* We're done */
119 | break;
120 | }
121 |
122 | printf("%s", buf);
123 | }
124 |
125 | if (fds[0].revents & (POLLHUP | POLLERR)) {
126 | fprintf(stderr, "POLLHUP/ERR received %d\n", fds[0].revents);
127 | break;
128 | }
129 | }
130 |
131 | close(sock);
132 | }
133 |
--------------------------------------------------------------------------------
/apps/curl/Makefile:
--------------------------------------------------------------------------------
1 | curl: curl.c
2 | $(CC) curl.c -o curl
3 |
--------------------------------------------------------------------------------
/apps/curl/curl.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | #define MAX_HOSTNAME 50
9 | #define RLEN 4096
10 |
11 | int get_address(char *host, char *port, struct sockaddr *addr)
12 | {
13 | struct addrinfo hints;
14 | struct addrinfo *result, *rp;
15 | int s;
16 |
17 | memset(&hints, 0, sizeof(struct addrinfo));
18 | hints.ai_family = AF_INET;
19 | hints.ai_socktype = SOCK_STREAM;
20 |
21 | s = getaddrinfo(host, port, &hints, &result);
22 |
23 | if (s != 0) {
24 | printf("getaddrinfo: %s\n", gai_strerror(s));
25 | exit(EXIT_FAILURE);
26 | }
27 |
28 | for (rp = result; rp != NULL; rp = rp->ai_next) {
29 | *addr = *rp->ai_addr;
30 | freeaddrinfo(result);
31 | return 0;
32 | }
33 |
34 | return 1;
35 | }
36 |
37 | int main(int argc, char **argv)
38 | {
39 | if (argc != 3 || strnlen(argv[1], MAX_HOSTNAME) == MAX_HOSTNAME) {
40 | printf("Curl called but HOST or PORT not given or invalid\n");
41 | return 1;
42 | }
43 |
44 | struct sockaddr addr;
45 | int sock;
46 |
47 | if (strnlen(argv[2], 6) == 6) {
48 | printf("Curl called but PORT malformed\n");
49 | return 1;
50 | }
51 |
52 | if (get_address(argv[1], argv[2], &addr) != 0) {
53 | printf("Curl could not resolve hostname\n");
54 | return 1;
55 | }
56 |
57 | sock = socket(AF_INET, SOCK_STREAM, 0);
58 |
59 | if (connect(sock, &addr, 16) == -1) {
60 | perror("Curl could not establish connection");
61 | return 1;
62 | }
63 |
64 | char str[512];
65 |
66 | snprintf(str, 512, "GET / HTTP/1.1\r\nHost: %s:%s\r\nConnection: close\r\n\r\n", argv[1], argv[2]);
67 | int len = strlen(str);
68 |
69 | if (write(sock, str, len) != len) {
70 | printf("Write error\n");
71 | return 1;
72 | }
73 |
74 | char buf[RLEN] = { 0 };
75 | int rlen = 0;
76 |
77 | while ((rlen = read(sock, buf, RLEN)) > 0) {
78 | printf("%s", buf);
79 | }
80 |
81 | if (rlen == -1) {
82 | perror("Curl read error");
83 | return 1;
84 | }
85 |
86 | close(sock);
87 | }
88 |
--------------------------------------------------------------------------------
/build/BUILD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saminiir/level-ip/c1950ea0e0f9feceb5602432f1751b8ce71c4952/build/BUILD
--------------------------------------------------------------------------------
/include/arp.h:
--------------------------------------------------------------------------------
1 | #ifndef ARP_H
2 | #define ARP_H
3 | #include "syshead.h"
4 | #include "ethernet.h"
5 | #include "netdev.h"
6 | #include "skbuff.h"
7 | #include "list.h"
8 | #include "utils.h"
9 |
10 | #define ARP_ETHERNET 0x0001
11 | #define ARP_IPV4 0x0800
12 | #define ARP_REQUEST 0x0001
13 | #define ARP_REPLY 0x0002
14 |
15 | #define ARP_HDR_LEN sizeof(struct arp_hdr)
16 | #define ARP_DATA_LEN sizeof(struct arp_ipv4)
17 |
18 | #define ARP_CACHE_LEN 32
19 | #define ARP_FREE 0
20 | #define ARP_WAITING 1
21 | #define ARP_RESOLVED 2
22 |
23 | #ifdef DEBUG_ARP
24 | #define arp_dbg(str, hdr) \
25 | do { \
26 | print_debug("arp "str" (hwtype: %hu, protype: %.4hx, " \
27 | "hwsize: %d, prosize: %d, opcode: %.4hx)", \
28 | hdr->hwtype, hdr->protype, hdr->hwsize, \
29 | hdr->prosize, hdr->opcode); \
30 | } while (0)
31 |
32 | #define arpdata_dbg(str, data) \
33 | do { \
34 | print_debug("arp data "str" (smac: %.2hhx:%.2hhx:%.2hhx:%.2hhx" \
35 | ":%.2hhx:%.2hhx, sip: %hhu.%hhu.%hhu.%hhu, dmac: %.2hhx:%.2hhx" \
36 | ":%.2hhx:%.2hhx:%.2hhx:%.2hhx, dip: %hhu.%hhu.%hhu.%hhu)", \
37 | data->smac[0], data->smac[1], data->smac[2], data->smac[3], \
38 | data->smac[4], data->smac[5], data->sip >> 24, data->sip >> 16, \
39 | data->sip >> 8, data->sip >> 0, data->dmac[0], data->dmac[1], \
40 | data->dmac[2], data->dmac[3], data->dmac[4], data->dmac[5], \
41 | data->dip >> 24, data->dip >> 16, data->dip >> 8, data->dip >> 0); \
42 | } while (0)
43 |
44 | #define arpcache_dbg(str, entry) \
45 | do { \
46 | print_debug("arp cache "str" (hwtype: %hu, sip: %hhu.%hhu.%hhu.%hhu, " \
47 | "smac: %.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx, state: %d)", entry->hwtype, \
48 | entry->sip >> 24, entry->sip >> 16, entry->sip >> 8, entry->sip >> 0, \
49 | entry->smac[0], entry->smac[1], entry->smac[2], entry->smac[3], entry->smac[4], \
50 | entry->smac[5], entry->state); \
51 | } while (0)
52 | #else
53 | #define arp_dbg(str, hdr)
54 | #define arpdata_dbg(str, data)
55 | #define arpcache_dbg(str, entry)
56 | #endif
57 |
58 | struct arp_hdr
59 | {
60 | uint16_t hwtype;
61 | uint16_t protype;
62 | uint8_t hwsize;
63 | uint8_t prosize;
64 | uint16_t opcode;
65 | unsigned char data[];
66 | } __attribute__((packed));
67 |
68 | struct arp_ipv4
69 | {
70 | unsigned char smac[6];
71 | uint32_t sip;
72 | unsigned char dmac[6];
73 | uint32_t dip;
74 | } __attribute__((packed));
75 |
76 | struct arp_cache_entry
77 | {
78 | struct list_head list;
79 | uint16_t hwtype;
80 | uint32_t sip;
81 | unsigned char smac[6];
82 | unsigned int state;
83 | };
84 |
85 | void arp_init();
86 | void free_arp();
87 | void arp_rcv(struct sk_buff *skb);
88 | void arp_reply(struct sk_buff *skb, struct netdev *netdev);
89 | int arp_request(uint32_t sip, uint32_t dip, struct netdev *netdev);
90 | unsigned char* arp_get_hwaddr(uint32_t sip);
91 |
92 | static inline struct arp_hdr *arp_hdr(struct sk_buff *skb)
93 | {
94 | return (struct arp_hdr *)(skb->head + ETH_HDR_LEN);
95 | }
96 |
97 | #endif
98 |
--------------------------------------------------------------------------------
/include/basic.h:
--------------------------------------------------------------------------------
1 | #ifndef BASIC_H
2 | #define BASIC_H
3 |
4 | #define CLEAR(x) memset(&(x), 0, sizeof(x))
5 |
6 | #endif
7 |
--------------------------------------------------------------------------------
/include/cli.h:
--------------------------------------------------------------------------------
1 | #ifndef CLI_H_
2 | #define CLI_H_
3 |
4 | void parse_cli(int argc, char** argv);
5 |
6 | #endif
7 |
--------------------------------------------------------------------------------
/include/dst.h:
--------------------------------------------------------------------------------
1 | #ifndef DST_H_
2 | #define DST_H_
3 |
4 | #include "skbuff.h"
5 |
6 | struct sk_buff;
7 |
8 | int dst_neigh_output(struct sk_buff *skb);
9 |
10 | #endif
11 |
--------------------------------------------------------------------------------
/include/ethernet.h:
--------------------------------------------------------------------------------
1 | #ifndef ETHERNET_H_
2 | #define ETHERNET_H_
3 |
4 | #include
5 | #include "netdev.h"
6 | #include "skbuff.h"
7 | #include "syshead.h"
8 | #include "utils.h"
9 |
10 | #define ETH_HDR_LEN sizeof(struct eth_hdr)
11 |
12 | #ifdef DEBUG_ETH
13 | #define eth_dbg(msg, hdr) \
14 | do { \
15 | print_debug("eth "msg" (" \
16 | "dmac: %.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx, " \
17 | "smac: %.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx:%.2hhx, " \
18 | "ethertype: %.4hx)", \
19 | hdr->dmac[0], hdr->dmac[1], hdr->dmac[2], hdr->dmac[3], \
20 | hdr->dmac[4], hdr->dmac[5], hdr->smac[0], hdr->smac[1], \
21 | hdr->smac[2], hdr->smac[3], hdr->smac[4], hdr->smac[5], hdr->ethertype); \
22 | } while (0)
23 | #else
24 | #define eth_dbg(msg, hdr)
25 | #endif
26 |
27 | struct sk_buff;
28 | struct netdev;
29 |
30 | uint8_t *skb_head(struct sk_buff *skb);
31 |
32 | struct eth_hdr
33 | {
34 | uint8_t dmac[6];
35 | uint8_t smac[6];
36 | uint16_t ethertype;
37 | uint8_t payload[];
38 | } __attribute__((packed));
39 |
40 | static inline struct eth_hdr *eth_hdr(struct sk_buff *skb)
41 | {
42 | struct eth_hdr *hdr = (struct eth_hdr *)skb_head(skb);
43 |
44 | hdr->ethertype = ntohs(hdr->ethertype);
45 |
46 | return hdr;
47 | }
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/include/icmpv4.h:
--------------------------------------------------------------------------------
1 | #ifndef ICMPV4_H
2 | #define ICMPV4_H
3 |
4 | #include "syshead.h"
5 | #include "skbuff.h"
6 |
7 | #define ICMP_V4_REPLY 0x00
8 | #define ICMP_V4_DST_UNREACHABLE 0x03
9 | #define ICMP_V4_SRC_QUENCH 0x04
10 | #define ICMP_V4_REDIRECT 0x05
11 | #define ICMP_V4_ECHO 0x08
12 | #define ICMP_V4_ROUTER_ADV 0x09
13 | #define ICMP_V4_ROUTER_SOL 0x0a
14 | #define ICMP_V4_TIMEOUT 0x0b
15 | #define ICMP_V4_MALFORMED 0x0c
16 |
17 | struct icmp_v4 {
18 | uint8_t type;
19 | uint8_t code;
20 | uint16_t csum;
21 | uint8_t data[];
22 | } __attribute__((packed));
23 |
24 | struct icmp_v4_echo {
25 | uint16_t id;
26 | uint16_t seq;
27 | uint8_t data[];
28 | } __attribute__((packed));
29 |
30 | struct icmp_v4_dst_unreachable {
31 | uint8_t unused;
32 | uint8_t len;
33 | uint16_t var;
34 | uint8_t data[];
35 | } __attribute__((packed));
36 |
37 |
38 | void icmpv4_incoming(struct sk_buff *skb);
39 | void icmpv4_reply(struct sk_buff *skb);
40 |
41 | #endif
42 |
--------------------------------------------------------------------------------
/include/inet.h:
--------------------------------------------------------------------------------
1 | #ifndef _INET_H
2 | #define _INET_H
3 |
4 | #include "syshead.h"
5 | #include "socket.h"
6 | #include "skbuff.h"
7 |
8 | #ifdef DEBUG_SOCKET
9 | #define inet_dbg(sock, msg, ...) \
10 | do { \
11 | socket_dbg(sock, "INET "msg, ##__VA_ARGS__); \
12 | } while (0)
13 | #else
14 | #define inet_dbg(msg, th, ...)
15 | #endif
16 |
17 | int inet_create(struct socket *sock, int protocol);
18 | int inet_socket(struct socket *sock, int protocol);
19 | int inet_connect(struct socket *sock, struct sockaddr *addr, int addr_len, int flags);
20 | int inet_write(struct socket *sock, const void *buf, int len);
21 | int inet_read(struct socket *sock, void *buf, int len);
22 | int inet_close(struct socket *sock);
23 | int inet_free(struct socket *sock);
24 | int inet_abort(struct socket *sock);
25 | int inet_getpeername(struct socket *sock, struct sockaddr *restrict address,
26 | socklen_t *restrict address_len);
27 | int inet_getsockname(struct socket *sock, struct sockaddr *restrict address,
28 | socklen_t *restrict address_len);
29 |
30 | struct sock *inet_lookup(struct sk_buff *skb, uint16_t sport, uint16_t dport);
31 | #endif
32 |
--------------------------------------------------------------------------------
/include/ip.h:
--------------------------------------------------------------------------------
1 | #ifndef IPV4_H
2 | #define IPV4_H
3 | #include "syshead.h"
4 | #include "ethernet.h"
5 | #include "skbuff.h"
6 | #include "sock.h"
7 |
8 | #define IPV4 0x04
9 | #define IP_TCP 0x06
10 | #define ICMPV4 0x01
11 |
12 | #define IP_HDR_LEN sizeof(struct iphdr)
13 | #define ip_len(ip) (ip->len - (ip->ihl * 4))
14 |
15 | #ifdef DEBUG_IP
16 | #define ip_dbg(msg, hdr) \
17 | do { \
18 | print_debug("ip "msg" (ihl: %hhu version: %hhu tos: %hhu " \
19 | "len %hu id: %hu frag_offset: %hu ttl: %hhu " \
20 | "proto: %hhu csum: %hx " \
21 | "saddr: %hhu.%hhu.%hhu.%hhu daddr: %hhu.%hhu.%hhu.%hhu)", \
22 | hdr->ihl, \
23 | hdr->version, hdr->tos, hdr->len, hdr->id, \
24 | hdr->frag_offset, hdr->ttl, hdr->proto, hdr->csum, \
25 | hdr->saddr >> 24, hdr->saddr >> 16, hdr->saddr >> 8, hdr->saddr >> 0, \
26 | hdr->daddr >> 24, hdr->daddr >> 16, hdr->daddr >> 8, hdr->daddr >> 0); \
27 | } while (0)
28 | #else
29 | #define ip_dbg(msg, hdr)
30 | #endif
31 |
32 | struct iphdr {
33 | uint8_t ihl : 4; /* TODO: Support Big Endian hosts */
34 | uint8_t version : 4;
35 | uint8_t tos;
36 | uint16_t len;
37 | uint16_t id;
38 | uint16_t frag_offset;
39 | uint8_t ttl;
40 | uint8_t proto;
41 | uint16_t csum;
42 | uint32_t saddr;
43 | uint32_t daddr;
44 | uint8_t data[];
45 | } __attribute__((packed));
46 |
47 | static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
48 | {
49 | return (struct iphdr *)(skb->head + ETH_HDR_LEN);
50 | }
51 |
52 | static inline uint32_t ip_parse(char *addr)
53 | {
54 | uint32_t dst = 0;
55 |
56 | if (inet_pton(AF_INET, addr, &dst) != 1) {
57 | perror("ERR: Parsing inet address failed");
58 | exit(1);
59 | }
60 |
61 | return ntohl(dst);
62 | }
63 |
64 | int ip_rcv(struct sk_buff *skb);
65 | int ip_output(struct sock *sk, struct sk_buff *skb);
66 |
67 | #endif
68 |
--------------------------------------------------------------------------------
/include/ipc.h:
--------------------------------------------------------------------------------
1 | #ifndef IPC_H_
2 | #define IPC_H_
3 |
4 | #include "list.h"
5 |
6 | #ifdef DEBUG_IPC
7 | #define ipc_dbg(msg, th) \
8 | do { \
9 | print_debug("IPC sockets count %d, current sock %d, tid %lu: %s", \
10 | socket_count, th->sock, th->id, msg); \
11 | } while (0)
12 | #else
13 | #define ipc_dbg(msg, th)
14 | #endif
15 |
16 | void *start_ipc_listener();
17 |
18 | #define IPC_SOCKET 0x0001
19 | #define IPC_CONNECT 0x0002
20 | #define IPC_WRITE 0x0003
21 | #define IPC_READ 0x0004
22 | #define IPC_CLOSE 0x0005
23 | #define IPC_POLL 0x0006
24 | #define IPC_FCNTL 0x0007
25 | #define IPC_GETSOCKOPT 0x0008
26 | #define IPC_SETSOCKOPT 0x0009
27 | #define IPC_GETPEERNAME 0x000A
28 | #define IPC_GETSOCKNAME 0x000B
29 |
30 | struct ipc_thread {
31 | struct list_head list;
32 | int sock;
33 | pthread_t id;
34 | };
35 |
36 | struct ipc_msg {
37 | uint16_t type;
38 | pid_t pid;
39 | uint8_t data[];
40 | } __attribute__((packed));
41 |
42 | struct ipc_err {
43 | int rc;
44 | int err;
45 | uint8_t data[];
46 | } __attribute__((packed));
47 |
48 | struct ipc_socket {
49 | int domain;
50 | int type;
51 | int protocol;
52 | } __attribute__((packed));
53 |
54 | struct ipc_connect {
55 | int sockfd;
56 | struct sockaddr addr;
57 | socklen_t addrlen;
58 | } __attribute__((packed));
59 |
60 | struct ipc_write {
61 | int sockfd;
62 | size_t len;
63 | uint8_t buf[];
64 | } __attribute__((packed));
65 |
66 | struct ipc_read {
67 | int sockfd;
68 | size_t len;
69 | uint8_t buf[];
70 | } __attribute__((packed));
71 |
72 | struct ipc_close {
73 | int sockfd;
74 | } __attribute__((packed));
75 |
76 | struct ipc_pollfd {
77 | int fd;
78 | short int events;
79 | short int revents;
80 | } __attribute__((packed));
81 |
82 | struct ipc_poll {
83 | nfds_t nfds;
84 | int timeout;
85 | struct ipc_pollfd fds[];
86 | } __attribute__((packed));
87 |
88 | struct ipc_fcntl {
89 | int sockfd;
90 | int cmd;
91 | uint8_t data[];
92 | } __attribute__((packed));
93 |
94 | struct ipc_sockopt {
95 | int fd;
96 | int level;
97 | int optname;
98 | socklen_t optlen;
99 | uint8_t optval[];
100 | } __attribute__((packed));
101 |
102 | struct ipc_sockname {
103 | int socket;
104 | socklen_t address_len;
105 | uint8_t sa_data[128];
106 | };
107 |
108 | #endif
109 |
--------------------------------------------------------------------------------
/include/list.h:
--------------------------------------------------------------------------------
1 | #ifndef _LIST_H
2 | #define _LIST_H
3 |
4 | #include
5 |
6 | struct list_head {
7 | struct list_head *next;
8 | struct list_head *prev;
9 | };
10 |
11 | #define LIST_HEAD(name) \
12 | struct list_head name = { &(name), &(name) }
13 |
14 | static inline void list_init(struct list_head *head)
15 | {
16 | head->prev = head->next = head;
17 | }
18 |
19 | static inline void list_add(struct list_head *new, struct list_head *head)
20 | {
21 | head->next->prev = new;
22 | new->next = head->next;
23 | new->prev = head;
24 | head->next = new;
25 | }
26 |
27 | static inline void list_add_tail(struct list_head *new, struct list_head *head)
28 | {
29 | head->prev->next = new;
30 | new->prev = head->prev;
31 | new->next = head;
32 | head->prev = new;
33 | }
34 |
35 | static inline void list_del(struct list_head *elem)
36 | {
37 | struct list_head *prev = elem->prev;
38 | struct list_head *next = elem->next;
39 |
40 | prev->next = next;
41 | next->prev = prev;
42 | }
43 |
44 | #define list_entry(ptr, type, member) \
45 | ((type *) ((char *) (ptr) - offsetof(type, member)))
46 |
47 | #define list_first_entry(ptr, type, member) \
48 | list_entry((ptr)->next, type, member)
49 |
50 | #define list_for_each(pos, head) \
51 | for (pos = (head)->next; pos != (head); pos = pos->next)
52 |
53 | #define list_for_each_safe(pos, p, head) \
54 | for (pos = (head)->next, p = pos->next; \
55 | pos != (head); \
56 | pos = p, p = pos->next)
57 |
58 | static inline int list_empty(struct list_head *head)
59 | {
60 | return head->next == head;
61 | }
62 |
63 | #endif
64 |
--------------------------------------------------------------------------------
/include/netdev.h:
--------------------------------------------------------------------------------
1 | #ifndef NETDEV_H
2 | #define NETDEV_H
3 | #include "syshead.h"
4 | #include "ethernet.h"
5 | #include "skbuff.h"
6 | #include "utils.h"
7 |
8 | #define BUFLEN 1600
9 | #define MAX_ADDR_LEN 32
10 |
11 | #define netdev_dbg(fmt, args...) \
12 | do { \
13 | print_debug("NETDEV: "fmt, ##args); \
14 | } while (0)
15 |
16 | struct eth_hdr;
17 |
18 | struct netdev {
19 | uint32_t addr;
20 | uint8_t addr_len;
21 | uint8_t hwaddr[6];
22 | uint32_t mtu;
23 | };
24 |
25 | void netdev_init();
26 | int netdev_transmit(struct sk_buff *skb, uint8_t *dst, uint16_t ethertype);
27 | void *netdev_rx_loop();
28 | void free_netdev();
29 | struct netdev *netdev_get(uint32_t sip);
30 | #endif
31 |
--------------------------------------------------------------------------------
/include/route.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROUTE_H
2 | #define _ROUTE_H
3 |
4 | #include "list.h"
5 |
6 | #define RT_LOOPBACK 0x01
7 | #define RT_GATEWAY 0x02
8 | #define RT_HOST 0x04
9 | #define RT_REJECT 0x08
10 | #define RT_UP 0x10
11 |
12 | struct rtentry {
13 | struct list_head list;
14 | uint32_t dst;
15 | uint32_t gateway;
16 | uint32_t netmask;
17 | uint8_t flags;
18 | uint32_t metric;
19 | struct netdev *dev;
20 | };
21 |
22 | void route_init();
23 | struct rtentry *route_lookup(uint32_t daddr);
24 | void free_routes();
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------
/include/skbuff.h:
--------------------------------------------------------------------------------
1 | #ifndef SKBUFF_H_
2 | #define SKBUFF_H_
3 |
4 | #include "netdev.h"
5 | #include "route.h"
6 | #include "list.h"
7 | #include
8 |
9 | struct sk_buff {
10 | struct list_head list;
11 | struct rtentry *rt;
12 | struct netdev *dev;
13 | int refcnt;
14 | uint16_t protocol;
15 | uint32_t len;
16 | uint32_t dlen;
17 | uint32_t seq;
18 | uint32_t end_seq;
19 | uint8_t *end;
20 | uint8_t *head;
21 | uint8_t *data;
22 | uint8_t *payload;
23 | };
24 |
25 | struct sk_buff_head {
26 | struct list_head head;
27 |
28 | uint32_t qlen;
29 | };
30 |
31 | struct sk_buff *alloc_skb(unsigned int size);
32 | void free_skb(struct sk_buff *skb);
33 | uint8_t *skb_push(struct sk_buff *skb, unsigned int len);
34 | uint8_t *skb_head(struct sk_buff *skb);
35 | void *skb_reserve(struct sk_buff *skb, unsigned int len);
36 | void skb_reset_header(struct sk_buff *skb);
37 |
38 | static inline uint32_t skb_queue_len(const struct sk_buff_head *list)
39 | {
40 | return list->qlen;
41 | }
42 |
43 | static inline void skb_queue_init(struct sk_buff_head *list)
44 | {
45 | list_init(&list->head);
46 | list->qlen = 0;
47 | }
48 |
49 | static inline void skb_queue_add(struct sk_buff_head *list, struct sk_buff *new, struct sk_buff *next)
50 | {
51 | list_add_tail(&new->list, &next->list);
52 | list->qlen += 1;
53 | }
54 |
55 | static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *new)
56 | {
57 | list_add_tail(&new->list, &list->head);
58 | list->qlen += 1;
59 | }
60 |
61 | static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
62 | {
63 | struct sk_buff *skb = list_first_entry(&list->head, struct sk_buff, list);
64 | list_del(&skb->list);
65 | list->qlen -= 1;
66 |
67 | return skb;
68 | }
69 |
70 | static inline int skb_queue_empty(const struct sk_buff_head *list)
71 | {
72 | return skb_queue_len(list) < 1;
73 | }
74 |
75 | static inline struct sk_buff *skb_peek(struct sk_buff_head *list)
76 | {
77 | if (skb_queue_empty(list)) return NULL;
78 |
79 | return list_first_entry(&list->head, struct sk_buff, list);
80 | }
81 |
82 | static inline void skb_queue_free(struct sk_buff_head *list)
83 | {
84 | struct sk_buff *skb = NULL;
85 |
86 | while ((skb = skb_peek(list)) != NULL) {
87 | skb_dequeue(list);
88 | skb->refcnt--;
89 | free_skb(skb);
90 | }
91 | }
92 |
93 | #endif
94 |
--------------------------------------------------------------------------------
/include/sock.h:
--------------------------------------------------------------------------------
1 | #ifndef _SOCK_H
2 | #define _SOCK_H
3 |
4 | #include "socket.h"
5 | #include "wait.h"
6 | #include "skbuff.h"
7 |
8 | struct sock;
9 |
10 | struct net_ops {
11 | struct sock* (*alloc_sock) (int protocol);
12 | int (*init) (struct sock *sk);
13 | int (*connect) (struct sock *sk, const struct sockaddr *addr, int addr_len, int flags);
14 | int (*disconnect) (struct sock *sk, int flags);
15 | int (*write) (struct sock *sk, const void *buf, int len);
16 | int (*read) (struct sock *sk, void *buf, int len);
17 | int (*recv_notify) (struct sock *sk);
18 | int (*close) (struct sock *sk);
19 | int (*abort) (struct sock *sk);
20 | };
21 |
22 | struct sock {
23 | struct socket *sock;
24 | struct net_ops *ops;
25 | struct wait_lock recv_wait;
26 | struct sk_buff_head receive_queue;
27 | struct sk_buff_head write_queue;
28 | int protocol;
29 | int state;
30 | int err;
31 | short int poll_events;
32 | uint16_t sport;
33 | uint16_t dport;
34 | uint32_t saddr;
35 | uint32_t daddr;
36 | };
37 |
38 | static inline struct sk_buff *write_queue_head(struct sock *sk)
39 | {
40 | return skb_peek(&sk->write_queue);
41 | }
42 |
43 | struct sock *sk_alloc(struct net_ops *ops, int protocol);
44 | void sock_free(struct sock *sk);
45 | void sock_init_data(struct socket *sock, struct sock *sk);
46 | void sock_connected(struct sock *sk);
47 |
48 | #endif
49 |
--------------------------------------------------------------------------------
/include/socket.h:
--------------------------------------------------------------------------------
1 | #ifndef SOCKET_H_
2 | #define SOCKET_H_
3 |
4 | #include "sock.h"
5 | #include "wait.h"
6 | #include "list.h"
7 |
8 | #ifdef DEBUG_SOCKET
9 | #define socket_dbg(sock, msg, ...) \
10 | do { \
11 | print_debug("Socket fd %d pid %d state %d sk_state %d flags %d poll %d sport %d dport %d " \
12 | "recv-q %d send-q %d: "msg, \
13 | sock->fd, sock->pid, sock->state, sock->sk->state, sock->flags, \
14 | sock->sk->poll_events, \
15 | sock->sk->sport, sock->sk->dport, \
16 | sock->sk->receive_queue.qlen, \
17 | sock->sk->write_queue.qlen, ##__VA_ARGS__); \
18 | } while (0)
19 | #else
20 | #define socket_dbg(sock, msg, ...)
21 | #endif
22 |
23 | struct socket;
24 |
25 | enum socket_state {
26 | SS_FREE = 0, /* not allocated */
27 | SS_UNCONNECTED, /* unconnected to any socket */
28 | SS_CONNECTING, /* in process of connecting */
29 | SS_CONNECTED, /* connected to socket */
30 | SS_DISCONNECTING /* in process of disconnecting */
31 | };
32 |
33 | struct sock_type {
34 | struct sock_ops *sock_ops;
35 | struct net_ops *net_ops;
36 | int type;
37 | int protocol;
38 | };
39 |
40 | struct sock_ops {
41 | int (*connect) (struct socket *sock, const struct sockaddr *addr,
42 | int addr_len, int flags);
43 | int (*write) (struct socket *sock, const void *buf, int len);
44 | int (*read) (struct socket *sock, void *buf, int len);
45 | int (*close) (struct socket *sock);
46 | int (*free) (struct socket *sock);
47 | int (*abort) (struct socket *sock);
48 | int (*poll) (struct socket *sock);
49 | int (*getpeername) (struct socket *sock, struct sockaddr *restrict addr,
50 | socklen_t *restrict address_len);
51 | int (*getsockname) (struct socket *sock, struct sockaddr *restrict addr,
52 | socklen_t *restrict address_len);
53 | };
54 |
55 | struct net_family {
56 | int (*create) (struct socket *sock, int protocol);
57 | };
58 |
59 | struct socket {
60 | struct list_head list;
61 | int fd;
62 | pid_t pid;
63 | int refcnt;
64 | enum socket_state state;
65 | short type;
66 | int flags;
67 | struct sock *sk;
68 | struct sock_ops *ops;
69 | struct wait_lock sleep;
70 | pthread_rwlock_t lock;
71 | };
72 |
73 | void *socket_ipc_open(void *args);
74 | int _socket(pid_t pid, int domain, int type, int protocol);
75 | int _connect(pid_t pid, int sockfd, const struct sockaddr *addr, socklen_t addrlen);
76 | int _write(pid_t pid, int sockfd, const void *buf, const unsigned int count);
77 | int _read(pid_t pid, int sockfd, void *buf, const unsigned int count);
78 | int _close(pid_t pid, int sockfd);
79 | int _poll(pid_t pid, struct pollfd fds[], nfds_t nfds, int timeout);
80 | int _fcntl(pid_t pid, int fildes, int cmd, ...);
81 | int _getsockopt(pid_t pid, int fd, int level, int optname, void *optval, socklen_t *optlen);
82 | int _getpeername(pid_t pid, int socket, struct sockaddr *restrict address,
83 | socklen_t *restrict address_len);
84 | int _getsockname(pid_t pid, int socket, struct sockaddr *restrict address,
85 | socklen_t *restrict address_len);
86 |
87 | struct socket *socket_lookup(uint16_t sport, uint16_t dport);
88 | struct socket *socket_find(struct socket *sock);
89 | int socket_rd_acquire(struct socket *sock);
90 | int socket_wr_acquire(struct socket *sock);
91 | int socket_release(struct socket *sock);
92 | int socket_free(struct socket *sock);
93 | int socket_delete(struct socket *sock);
94 | void abort_sockets();
95 | void socket_debug();
96 |
97 | #endif
98 |
--------------------------------------------------------------------------------
/include/syshead.h:
--------------------------------------------------------------------------------
1 | #ifndef SYSHEAD_H
2 | #define SYSHEAD_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | #endif
30 |
--------------------------------------------------------------------------------
/include/tcp.h:
--------------------------------------------------------------------------------
1 | #ifndef TCP_H_
2 | #define TCP_H_
3 | #include "syshead.h"
4 | #include "ip.h"
5 | #include "timer.h"
6 | #include "utils.h"
7 |
8 | #define TCP_HDR_LEN sizeof(struct tcphdr)
9 | #define TCP_DOFFSET sizeof(struct tcphdr) / 4
10 |
11 | #define TCP_FIN 0x01
12 | #define TCP_SYN 0x02
13 | #define TCP_RST 0x04
14 | #define TCP_PSH 0x08
15 | #define TCP_ACK 0x10
16 |
17 | #define TCP_URG 0x20
18 | #define TCP_ECN 0x40
19 | #define TCP_WIN 0x80
20 |
21 | #define TCP_SYN_BACKOFF 500
22 | #define TCP_CONN_RETRIES 3
23 |
24 | #define TCP_OPT_NOOP 1
25 | #define TCP_OPTLEN_MSS 4
26 | #define TCP_OPT_MSS 2
27 | #define TCP_OPT_SACK_OK 4
28 | #define TCP_OPT_SACK 5
29 | #define TCP_OPTLEN_SACK 2
30 | #define TCP_OPT_TS 8
31 |
32 | #define TCP_2MSL 60000
33 | #define TCP_USER_TIMEOUT 180000
34 |
35 | #define tcp_sk(sk) ((struct tcp_sock *)sk)
36 | #define tcp_hlen(tcp) (tcp->hl << 2)
37 |
38 | #ifdef DEBUG_TCP
39 | extern const char *tcp_dbg_states[];
40 | #define tcp_in_dbg(hdr, sk, skb) \
41 | do { \
42 | print_debug("TCP %u.%u.%u.%u.%u > %u.%u.%u.%u.%u: " \
43 | "Flags [S%uA%uP%uF%uR%u], seq %u:%u, ack %u, win %u rto %d boff %d", \
44 | (uint8_t)(sk->daddr >> 24), (uint8_t)(sk->daddr >> 16), (uint8_t)(sk->daddr >> 8), (uint8_t)(sk->daddr >> 0), sk->dport, \
45 | (uint8_t)(sk->saddr >> 24), (uint8_t)(sk->saddr >> 16), (uint8_t)(sk->saddr >> 8), (uint8_t)(sk->saddr >> 0), sk->sport, \
46 | hdr->syn, hdr->ack, hdr->psh, hdr->fin, hdr->rst, hdr->seq - tcp_sk(sk)->tcb.irs, \
47 | hdr->seq + skb->dlen - tcp_sk(sk)->tcb.irs, \
48 | hdr->ack_seq - tcp_sk(sk)->tcb.iss, hdr->win, tcp_sk(sk)->rto, tcp_sk(sk)->backoff); \
49 | } while (0)
50 |
51 | #define tcp_out_dbg(hdr, sk, skb) \
52 | do { \
53 | print_debug("TCP %u.%u.%u.%u.%u > %u.%u.%u.%u.%u: " \
54 | "Flags [S%uA%uP%uF%uR%u], seq %u:%u, ack %u, win %u rto %d boff %d", \
55 | (uint8_t)(sk->saddr >> 24), (uint8_t)(sk->saddr >> 16), (uint8_t)(sk->saddr >> 8), (uint8_t)(sk->saddr >> 0), sk->sport, \
56 | (uint8_t)(sk->daddr >> 24), (uint8_t)(sk->daddr >> 16), (uint8_t)(sk->daddr >> 8), (uint8_t)(sk->daddr >> 0), sk->dport, \
57 | hdr->syn, hdr->ack, hdr->psh, hdr->fin, hdr->rst, hdr->seq - tcp_sk(sk)->tcb.iss, \
58 | hdr->seq + skb->dlen - tcp_sk(sk)->tcb.iss, \
59 | hdr->ack_seq - tcp_sk(sk)->tcb.irs, hdr->win, tcp_sk(sk)->rto, tcp_sk(sk)->backoff); \
60 | } while (0)
61 |
62 | #define tcpsock_dbg(msg, sk) \
63 | do { \
64 | print_debug("TCP x:%u > %u.%u.%u.%u.%u (snd_una %u, snd_nxt %u, snd_wnd %u, " \
65 | "snd_wl1 %u, snd_wl2 %u, rcv_nxt %u, rcv_wnd %u recv-q %d send-q %d " \
66 | "rto %d boff %d) state %s: "msg, \
67 | sk->sport, (uint8_t)(sk->daddr >> 24), (uint8_t)(sk->daddr >> 16), (uint8_t)(sk->daddr >> 8), (uint8_t)(sk->daddr >> 0), \
68 | sk->dport, tcp_sk(sk)->tcb.snd_una - tcp_sk(sk)->tcb.iss, \
69 | tcp_sk(sk)->tcb.snd_nxt - tcp_sk(sk)->tcb.iss, tcp_sk(sk)->tcb.snd_wnd, \
70 | tcp_sk(sk)->tcb.snd_wl1, tcp_sk(sk)->tcb.snd_wl2, \
71 | tcp_sk(sk)->tcb.rcv_nxt - tcp_sk(sk)->tcb.irs, tcp_sk(sk)->tcb.rcv_wnd, \
72 | sk->receive_queue.qlen, sk->write_queue.qlen, tcp_sk(sk)->rto, tcp_sk(sk)->backoff, \
73 | tcp_dbg_states[sk->state]); \
74 | } while (0)
75 |
76 | #define tcp_set_state(sk, state) \
77 | do { \
78 | tcpsock_dbg("state is now "#state, sk); \
79 | __tcp_set_state(sk, state); \
80 | } while (0)
81 |
82 | #define return_tcp_drop(sk, skb) \
83 | do { \
84 | tcpsock_dbg("dropping packet", sk); \
85 | return __tcp_drop(sk, skb); \
86 | } while (0)
87 |
88 | #define tcp_drop(tsk, skb) \
89 | do { \
90 | tcpsock_dbg("dropping packet", sk); \
91 | __tcp_drop(tsk, skb); \
92 | } while (0)
93 |
94 | #else
95 | #define tcp_in_dbg(hdr, sk, skb)
96 | #define tcp_out_dbg(hdr, sk, skb)
97 | #define tcpsock_dbg(msg, sk)
98 | #define tcp_set_state(sk, state) __tcp_set_state(sk, state)
99 | #define return_tcp_drop(tsk, skb) return __tcp_drop(tsk, skb)
100 | #define tcp_drop(tsk, skb) __tcp_drop(tsk, skb)
101 | #endif
102 |
103 | struct tcphdr {
104 | uint16_t sport;
105 | uint16_t dport;
106 | uint32_t seq;
107 | uint32_t ack_seq;
108 | uint8_t rsvd : 4;
109 | uint8_t hl : 4;
110 | uint8_t fin : 1,
111 | syn : 1,
112 | rst : 1,
113 | psh : 1,
114 | ack : 1,
115 | urg : 1,
116 | ece : 1,
117 | cwr : 1;
118 | uint16_t win;
119 | uint16_t csum;
120 | uint16_t urp;
121 | uint8_t data[];
122 | } __attribute__((packed));
123 |
124 | struct tcp_options {
125 | uint16_t options;
126 | uint16_t mss;
127 | uint8_t sack;
128 | };
129 |
130 | struct tcp_opt_mss {
131 | uint8_t kind;
132 | uint8_t len;
133 | uint16_t mss;
134 | } __attribute__((packed));
135 |
136 | struct tcpiphdr {
137 | uint32_t saddr;
138 | uint32_t daddr;
139 | uint8_t zero;
140 | uint8_t proto;
141 | uint16_t tlen;
142 | } __attribute__((packed));
143 |
144 | enum tcp_states {
145 | TCP_LISTEN, /* represents waiting for a connection request from any remote
146 | TCP and port. */
147 | TCP_SYN_SENT, /* represents waiting for a matching connection request
148 | after having sent a connection request. */
149 | TCP_SYN_RECEIVED, /* represents waiting for a confirming connection
150 | request acknowledgment after having both received and sent a
151 | connection request. */
152 | TCP_ESTABLISHED, /* represents an open connection, data received can be
153 | delivered to the user. The normal state for the data transfer phase
154 | of the connection. */
155 | TCP_FIN_WAIT_1, /* represents waiting for a connection termination request
156 | from the remote TCP, or an acknowledgment of the connection
157 | termination request previously sent. */
158 | TCP_FIN_WAIT_2, /* represents waiting for a connection termination request
159 | from the remote TCP. */
160 | TCP_CLOSE, /* represents no connection state at all. */
161 | TCP_CLOSE_WAIT, /* represents waiting for a connection termination request
162 | from the local user. */
163 | TCP_CLOSING, /* represents waiting for a connection termination request
164 | acknowledgment from the remote TCP. */
165 | TCP_LAST_ACK, /* represents waiting for an acknowledgment of the
166 | connection termination request previously sent to the remote TCP
167 | (which includes an acknowledgment of its connection termination
168 | request). */
169 | TCP_TIME_WAIT, /* represents waiting for enough time to pass to be sure
170 | the remote TCP received the acknowledgment of its connection
171 | termination request. */
172 | };
173 |
174 | struct tcb {
175 | uint32_t snd_una; /* oldest unacknowledged sequence number */
176 | uint32_t snd_nxt; /* next sequence number to be sent */
177 | uint32_t snd_wnd;
178 | uint32_t snd_up;
179 | uint32_t snd_wl1;
180 | uint32_t snd_wl2;
181 | uint32_t iss;
182 | uint32_t rcv_nxt; /* next sequence number expected on an incoming segments, and
183 | is the left or lower edge of the receive window */
184 | uint32_t rcv_wnd;
185 | uint32_t rcv_up;
186 | uint32_t irs;
187 | };
188 |
189 | struct tcp_sack_block {
190 | uint32_t left;
191 | uint32_t right;
192 | } __attribute__((packed));
193 |
194 | struct tcp_sock {
195 | struct sock sk;
196 | int fd;
197 | uint16_t tcp_header_len;
198 | struct tcb tcb;
199 | uint8_t flags;
200 | uint8_t backoff;
201 | int32_t srtt;
202 | int32_t rttvar;
203 | uint32_t rto;
204 | struct timer *retransmit;
205 | struct timer *delack;
206 | struct timer *keepalive;
207 | struct timer *linger;
208 | uint8_t delacks;
209 | uint16_t rmss;
210 | uint16_t smss;
211 | uint16_t cwnd;
212 | uint32_t inflight;
213 |
214 | uint8_t sackok;
215 | uint8_t sacks_allowed;
216 | uint8_t sacklen;
217 | struct tcp_sack_block sacks[4];
218 |
219 | uint8_t tsopt;
220 |
221 | struct sk_buff_head ofo_queue; /* Out-of-order queue */
222 | };
223 |
224 | static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
225 | {
226 | return (struct tcphdr *)(skb->head + ETH_HDR_LEN + IP_HDR_LEN);
227 | }
228 |
229 | void tcp_init();
230 | void tcp_in(struct sk_buff *skb);
231 | int tcp_checksum(struct tcp_sock *sock, struct tcphdr *thdr);
232 | void tcp_select_initial_window(uint32_t *rcv_wnd);
233 |
234 | int generate_iss();
235 | struct sock *tcp_alloc_sock();
236 | int tcp_v4_init_sock(struct sock *sk);
237 | int tcp_init_sock(struct sock *sk);
238 | void __tcp_set_state(struct sock *sk, uint32_t state);
239 | int tcp_v4_checksum(struct sk_buff *skb, uint32_t saddr, uint32_t daddr);
240 | int tcp_v4_connect(struct sock *sk, const struct sockaddr *addr, int addrlen, int flags);
241 | int tcp_connect(struct sock *sk);
242 | int tcp_disconnect(struct sock *sk, int flags);
243 | int tcp_write(struct sock *sk, const void *buf, int len);
244 | int tcp_read(struct sock *sk, void *buf, int len);
245 | int tcp_receive(struct tcp_sock *tsk, void *buf, int len);
246 | int tcp_input_state(struct sock *sk, struct tcphdr *th, struct sk_buff *skb);
247 | int tcp_send_synack(struct sock *sk);
248 | int tcp_send_next(struct sock *sk, int amount);
249 | int tcp_send_ack(struct sock *sk);
250 | void *tcp_send_delack(void *arg);
251 | int tcp_queue_fin(struct sock *sk);
252 | int tcp_send_fin(struct sock *sk);
253 | int tcp_send(struct tcp_sock *tsk, const void *buf, int len);
254 | int tcp_send_reset(struct tcp_sock *tsk);
255 | int tcp_send_challenge_ack(struct sock *sk, struct sk_buff *skb);
256 | int tcp_recv_notify(struct sock *sk);
257 | int tcp_close(struct sock *sk);
258 | int tcp_abort(struct sock *sk);
259 | int tcp_done(struct sock *sk);
260 | void tcp_rtt(struct tcp_sock *tsk);
261 | void tcp_handle_fin_state(struct sock *sk);
262 | void tcp_enter_time_wait(struct sock *sk);
263 | void tcp_clear_timers(struct sock *sk);
264 | void tcp_rearm_rto_timer(struct tcp_sock *tsk);
265 | void tcp_stop_rto_timer(struct tcp_sock *tsk);
266 | void tcp_release_rto_timer(struct tcp_sock *tsk);
267 | void tcp_stop_delack_timer(struct tcp_sock *tsk);
268 | void tcp_release_delack_timer(struct tcp_sock *tsk);
269 | void tcp_rearm_user_timeout(struct sock *sk);
270 | int tcp_calculate_sacks(struct tcp_sock *tsk);
271 |
272 | #endif
273 |
--------------------------------------------------------------------------------
/include/tcp_data.h:
--------------------------------------------------------------------------------
1 | #ifndef _TCP_DATA_H
2 | #define _TCP_DATA_H
3 |
4 | #include "tcp.h"
5 |
6 | int tcp_data_dequeue(struct tcp_sock *tsk, void *user_buf, int len);
7 | int tcp_data_queue(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb);
8 | int tcp_data_close(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb);
9 | #endif
10 |
--------------------------------------------------------------------------------
/include/timer.h:
--------------------------------------------------------------------------------
1 | #ifndef TIMER_H_
2 | #define TIMER_H_
3 |
4 | #include "syshead.h"
5 | #include "utils.h"
6 | #include "list.h"
7 |
8 | #define timer_dbg(msg, t) \
9 | do { \
10 | print_debug("Timer at %d: "msg": expires %d", tick, t->expires); \
11 | } while (0)
12 |
13 | struct timer {
14 | struct list_head list;
15 | int refcnt;
16 | uint32_t expires;
17 | int cancelled;
18 | void *(*handler)(void *);
19 | void *arg;
20 | pthread_mutex_t lock;
21 | };
22 |
23 | struct timer *timer_add(uint32_t expire, void *(*handler)(void *), void *arg);
24 | void timer_oneshot(uint32_t expire, void *(*handler)(void *), void *arg);
25 | void timer_release(struct timer *t);
26 | void timer_cancel(struct timer *t);
27 | void *timers_start();
28 | int timer_get_tick();
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/include/tuntap_if.h:
--------------------------------------------------------------------------------
1 | #ifndef TUNTAP_IF_H
2 | #define TUNTAP_IF_H
3 | void tun_init();
4 | int tun_read(char *buf, int len);
5 | int tun_write(char *buf, int len);
6 | void free_tun();
7 | #endif
8 |
--------------------------------------------------------------------------------
/include/utils.h:
--------------------------------------------------------------------------------
1 | #ifndef UTILS_H
2 | #define UTILS_H
3 |
4 | #define CMDBUFLEN 100
5 |
6 | #define print_debug(str, ...) \
7 | printf(str" - %s:%u\n", ##__VA_ARGS__, __FILE__, __LINE__);
8 |
9 | #define print_err(str, ...) \
10 | fprintf(stderr, str, ##__VA_ARGS__);
11 |
12 | int run_cmd(char *cmd, ...);
13 | uint32_t sum_every_16bits(void *addr, int count);
14 | uint16_t checksum(void *addr, int count, int start_sum);
15 | int get_address(char *host, char *port, struct sockaddr *addr);
16 | uint32_t parse_ipv4_string(char *addr);
17 | uint32_t min(uint32_t x, uint32_t y);
18 |
19 | #endif
20 |
--------------------------------------------------------------------------------
/include/wait.h:
--------------------------------------------------------------------------------
1 | #ifndef _WAIT_H
2 | #define _WAIT_H
3 |
4 | #include "syshead.h"
5 |
6 | struct wait_lock {
7 | pthread_cond_t ready;
8 | pthread_mutex_t lock;
9 | uint8_t sleeping;
10 | };
11 |
12 | static inline int wait_init(struct wait_lock *w) {
13 | pthread_cond_init(&w->ready, NULL);
14 | pthread_mutex_init(&w->lock, NULL);
15 | w->sleeping = 0;
16 |
17 | return 0;
18 | };
19 |
20 | static inline int wait_wakeup(struct wait_lock *w) {
21 | pthread_mutex_lock(&w->lock);
22 |
23 | pthread_cond_signal(&w->ready);
24 | w->sleeping = 0;
25 |
26 | pthread_mutex_unlock(&w->lock);
27 | return 0;
28 | };
29 |
30 | static inline int wait_sleep(struct wait_lock *w) {
31 | w->sleeping = 1;
32 | pthread_cond_wait(&w->ready, &w->lock);
33 |
34 | return 0;
35 | };
36 |
37 | static inline void wait_free(struct wait_lock *w) {
38 | wait_wakeup(w);
39 |
40 | pthread_mutex_destroy(&w->lock);
41 | pthread_cond_destroy(&w->ready);
42 | };
43 |
44 | #endif
45 |
--------------------------------------------------------------------------------
/src/arp.c:
--------------------------------------------------------------------------------
1 | #include "arp.h"
2 | #include "netdev.h"
3 | #include "skbuff.h"
4 | #include "list.h"
5 |
6 | /*
7 | * https://tools.ietf.org/html/rfc826
8 | */
9 |
10 | static uint8_t broadcast_hw[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
11 | static LIST_HEAD(arp_cache);
12 | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
13 |
14 | static struct sk_buff *arp_alloc_skb()
15 | {
16 | struct sk_buff *skb = alloc_skb(ETH_HDR_LEN + ARP_HDR_LEN + ARP_DATA_LEN);
17 | skb_reserve(skb, ETH_HDR_LEN + ARP_HDR_LEN + ARP_DATA_LEN);
18 | skb->protocol = htons(ETH_P_ARP);
19 |
20 | return skb;
21 | }
22 |
23 | static struct arp_cache_entry *arp_entry_alloc(struct arp_hdr *hdr, struct arp_ipv4 *data)
24 | {
25 | struct arp_cache_entry *entry = malloc(sizeof(struct arp_cache_entry));
26 | list_init(&entry->list);
27 |
28 | entry->state = ARP_RESOLVED;
29 | entry->hwtype = hdr->hwtype;
30 | entry->sip = data->sip;
31 | memcpy(entry->smac, data->smac, sizeof(entry->smac));
32 |
33 | return entry;
34 | }
35 |
36 | static int insert_arp_translation_table(struct arp_hdr *hdr, struct arp_ipv4 *data)
37 | {
38 | struct arp_cache_entry *entry = arp_entry_alloc(hdr, data);
39 |
40 | pthread_mutex_lock(&lock);
41 | list_add_tail(&entry->list, &arp_cache);
42 | pthread_mutex_unlock(&lock);
43 |
44 | return 0;
45 | }
46 |
47 | static int update_arp_translation_table(struct arp_hdr *hdr, struct arp_ipv4 *data)
48 | {
49 | struct list_head *item;
50 | struct arp_cache_entry *entry;
51 |
52 | pthread_mutex_lock(&lock);
53 | list_for_each(item, &arp_cache) {
54 | entry = list_entry(item, struct arp_cache_entry, list);
55 |
56 | if (entry->hwtype == hdr->hwtype && entry->sip == data->sip) {
57 | memcpy(entry->smac, data->smac, 6);
58 | pthread_mutex_unlock(&lock);
59 |
60 | return 1;
61 | }
62 | }
63 |
64 | pthread_mutex_unlock(&lock);
65 |
66 | return 0;
67 | }
68 |
69 | void arp_init()
70 | {
71 |
72 | }
73 |
74 | void arp_rcv(struct sk_buff *skb)
75 | {
76 | struct arp_hdr *arphdr;
77 | struct arp_ipv4 *arpdata;
78 | struct netdev *netdev;
79 | int merge = 0;
80 |
81 | arphdr = arp_hdr(skb);
82 |
83 | arphdr->hwtype = ntohs(arphdr->hwtype);
84 | arphdr->protype = ntohs(arphdr->protype);
85 | arphdr->opcode = ntohs(arphdr->opcode);
86 | arp_dbg("in", arphdr);
87 |
88 | if (arphdr->hwtype != ARP_ETHERNET) {
89 | printf("ARP: Unsupported HW type\n");
90 | goto drop_pkt;
91 | }
92 |
93 | if (arphdr->protype != ARP_IPV4) {
94 | printf("ARP: Unsupported protocol\n");
95 | goto drop_pkt;
96 | }
97 |
98 | arpdata = (struct arp_ipv4 *) arphdr->data;
99 |
100 | arpdata->sip = ntohl(arpdata->sip);
101 | arpdata->dip = ntohl(arpdata->dip);
102 | arpdata_dbg("receive", arpdata);
103 |
104 | merge = update_arp_translation_table(arphdr, arpdata);
105 |
106 | if (!(netdev = netdev_get(arpdata->dip))) {
107 | printf("ARP was not for us\n");
108 | goto drop_pkt;
109 | }
110 |
111 | if (!merge && insert_arp_translation_table(arphdr, arpdata) != 0) {
112 | print_err("ERR: No free space in ARP translation table\n");
113 | goto drop_pkt;
114 | }
115 |
116 | switch (arphdr->opcode) {
117 | case ARP_REQUEST:
118 | arp_reply(skb, netdev);
119 | return;
120 | default:
121 | printf("ARP: Opcode not supported\n");
122 | goto drop_pkt;
123 | }
124 |
125 | drop_pkt:
126 | free_skb(skb);
127 | return;
128 | }
129 |
130 | int arp_request(uint32_t sip, uint32_t dip, struct netdev *netdev)
131 | {
132 | struct sk_buff *skb;
133 | struct arp_hdr *arp;
134 | struct arp_ipv4 *payload;
135 | int rc = 0;
136 |
137 | skb = arp_alloc_skb();
138 |
139 | if (!skb) return -1;
140 |
141 | skb->dev = netdev;
142 |
143 | payload = (struct arp_ipv4 *) skb_push(skb, ARP_DATA_LEN);
144 |
145 | memcpy(payload->smac, netdev->hwaddr, netdev->addr_len);
146 | payload->sip = sip;
147 |
148 | memcpy(payload->dmac, broadcast_hw, netdev->addr_len);
149 | payload->dip = dip;
150 |
151 | arp = (struct arp_hdr *) skb_push(skb, ARP_HDR_LEN);
152 |
153 | arp_dbg("req", arp);
154 | arp->opcode = htons(ARP_REQUEST);
155 | arp->hwtype = htons(ARP_ETHERNET);
156 | arp->protype = htons(ETH_P_IP);
157 | arp->hwsize = netdev->addr_len;
158 | arp->prosize = 4;
159 |
160 | arpdata_dbg("req", payload);
161 | payload->sip = htonl(payload->sip);
162 | payload->dip = htonl(payload->dip);
163 |
164 | rc = netdev_transmit(skb, broadcast_hw, ETH_P_ARP);
165 | free_skb(skb);
166 | return rc;
167 | }
168 |
169 | void arp_reply(struct sk_buff *skb, struct netdev *netdev)
170 | {
171 | struct arp_hdr *arphdr;
172 | struct arp_ipv4 *arpdata;
173 |
174 | arphdr = arp_hdr(skb);
175 |
176 | skb_reserve(skb, ETH_HDR_LEN + ARP_HDR_LEN + ARP_DATA_LEN);
177 | skb_push(skb, ARP_HDR_LEN + ARP_DATA_LEN);
178 |
179 | arpdata = (struct arp_ipv4 *) arphdr->data;
180 |
181 | memcpy(arpdata->dmac, arpdata->smac, 6);
182 | arpdata->dip = arpdata->sip;
183 |
184 | memcpy(arpdata->smac, netdev->hwaddr, 6);
185 | arpdata->sip = netdev->addr;
186 |
187 | arphdr->opcode = ARP_REPLY;
188 |
189 | arp_dbg("reply", arphdr);
190 | arphdr->opcode = htons(arphdr->opcode);
191 | arphdr->hwtype = htons(arphdr->hwtype);
192 | arphdr->protype = htons(arphdr->protype);
193 |
194 | arpdata_dbg("reply", arpdata);
195 | arpdata->sip = htonl(arpdata->sip);
196 | arpdata->dip = htonl(arpdata->dip);
197 |
198 | skb->dev = netdev;
199 |
200 | netdev_transmit(skb, arpdata->dmac, ETH_P_ARP);
201 | free_skb(skb);
202 | }
203 |
204 | /*
205 | * Returns the HW address of the given source IP address
206 | * NULL if not found
207 | */
208 | unsigned char* arp_get_hwaddr(uint32_t sip)
209 | {
210 | struct list_head *item;
211 | struct arp_cache_entry *entry;
212 |
213 | pthread_mutex_lock(&lock);
214 | list_for_each(item, &arp_cache) {
215 | entry = list_entry(item, struct arp_cache_entry, list);
216 |
217 | if (entry->state == ARP_RESOLVED &&
218 | entry->sip == sip) {
219 | arpcache_dbg("entry", entry);
220 |
221 | uint8_t *copy = entry->smac;
222 | pthread_mutex_unlock(&lock);
223 |
224 | return copy;
225 | }
226 | }
227 |
228 | pthread_mutex_unlock(&lock);
229 |
230 | return NULL;
231 | }
232 |
233 | void free_arp()
234 | {
235 | struct list_head *item, *tmp;
236 | struct arp_cache_entry *entry;
237 |
238 | list_for_each_safe(item, tmp, &arp_cache) {
239 | entry = list_entry(item, struct arp_cache_entry, list);
240 | list_del(item);
241 |
242 | free(entry);
243 | }
244 | }
245 |
--------------------------------------------------------------------------------
/src/cli.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "utils.h"
3 | #include "cli.h"
4 |
5 | int debug = 0;
6 |
7 | static void usage(char *app)
8 | {
9 | print_err("Usage: %s\n", app);
10 | print_err("\n");
11 | print_err("Linux TCP/IP stack implemented with TUN/TAP devices.\n");
12 | print_err("Requires the CAP_NET_ADMIN capability. See capabilities(7).\n");
13 | print_err("See https://www.kernel.org/doc/Documentation/networking/tuntap.txt\n");
14 | print_err("\n");
15 | print_err("Options:\n");
16 | print_err(" -d Debug logging and tracing\n");
17 | print_err(" -h Print usage\n");
18 | print_err("\n");
19 | exit(1);
20 | }
21 |
22 | extern int optind;
23 |
24 | static int parse_opts(int *argc, char*** argv)
25 | {
26 | int opt;
27 |
28 | while ((opt = getopt(*argc, *argv, "hd")) != -1) {
29 | switch (opt) {
30 | case 'd':
31 | debug = 1;
32 | break;
33 | case 'h':
34 | default:
35 | usage(*argv[0]);
36 | }
37 | }
38 |
39 | *argc -= optind;
40 | *argv += optind;
41 |
42 | return optind;
43 | }
44 |
45 | void parse_cli(int argc, char **argv)
46 | {
47 | parse_opts(&argc, &argv);
48 | }
49 |
--------------------------------------------------------------------------------
/src/dst.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "dst.h"
3 | #include "ip.h"
4 | #include "arp.h"
5 |
6 | int dst_neigh_output(struct sk_buff *skb)
7 | {
8 | struct iphdr *iphdr = ip_hdr(skb);
9 | struct netdev *netdev = skb->dev;
10 | struct rtentry *rt = skb->rt;
11 | uint32_t daddr = ntohl(iphdr->daddr);
12 | uint32_t saddr = ntohl(iphdr->saddr);
13 |
14 | uint8_t *dmac;
15 |
16 | if (rt->flags & RT_GATEWAY) {
17 | daddr = rt->gateway;
18 | }
19 |
20 | dmac = arp_get_hwaddr(daddr);
21 |
22 | if (dmac) {
23 | return netdev_transmit(skb, dmac, ETH_P_IP);
24 | } else {
25 | arp_request(saddr, daddr, netdev);
26 |
27 | /* Inform upper layer that traffic was not sent, retry later */
28 | return -1;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/icmpv4.c:
--------------------------------------------------------------------------------
1 | #include "ethernet.h"
2 | #include "icmpv4.h"
3 | #include "ip.h"
4 | #include "utils.h"
5 |
6 | void icmpv4_incoming(struct sk_buff *skb)
7 | {
8 | struct iphdr *iphdr = ip_hdr(skb);
9 | struct icmp_v4 *icmp = (struct icmp_v4 *) iphdr->data;
10 |
11 | //TODO: Check csum
12 |
13 | switch (icmp->type) {
14 | case ICMP_V4_ECHO:
15 | icmpv4_reply(skb);
16 | return;
17 | case ICMP_V4_DST_UNREACHABLE:
18 | print_err("ICMPv4 received 'dst unreachable' code %d, "
19 | "check your routes and firewall rules\n", icmp->code);
20 | goto drop_pkt;
21 | default:
22 | print_err("ICMPv4 did not match supported types\n");
23 | goto drop_pkt;
24 | }
25 |
26 | drop_pkt:
27 | free_skb(skb);
28 | return;
29 | }
30 |
31 | void icmpv4_reply(struct sk_buff *skb)
32 | {
33 | struct iphdr *iphdr = ip_hdr(skb);
34 | struct icmp_v4 *icmp;
35 | struct sock sk;
36 | memset(&sk, 0, sizeof(struct sock));
37 |
38 | uint16_t icmp_len = iphdr->len - (iphdr->ihl * 4);
39 |
40 | skb_reserve(skb, ETH_HDR_LEN + IP_HDR_LEN + icmp_len);
41 | skb_push(skb, icmp_len);
42 |
43 | icmp = (struct icmp_v4 *)skb->data;
44 |
45 | icmp->type = ICMP_V4_REPLY;
46 | icmp->csum = 0;
47 | icmp->csum = checksum(icmp, icmp_len, 0);
48 |
49 | skb->protocol = ICMPV4;
50 | sk.daddr = iphdr->saddr;
51 |
52 | ip_output(&sk, skb);
53 | free_skb(skb);
54 | }
55 |
--------------------------------------------------------------------------------
/src/inet.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "inet.h"
3 | #include "socket.h"
4 | #include "sock.h"
5 | #include "tcp.h"
6 | #include "wait.h"
7 |
8 | extern struct net_ops tcp_ops;
9 |
10 | static int inet_stream_connect(struct socket *sock, const struct sockaddr *addr,
11 | int addr_len, int flags);
12 |
13 | static int INET_OPS = 1;
14 |
15 | struct net_family inet = {
16 | .create = inet_create,
17 | };
18 |
19 | static struct sock_ops inet_stream_ops = {
20 | .connect = &inet_stream_connect,
21 | .write = &inet_write,
22 | .read = &inet_read,
23 | .close = &inet_close,
24 | .free = &inet_free,
25 | .abort = &inet_abort,
26 | .getpeername = &inet_getpeername,
27 | .getsockname = &inet_getsockname,
28 | };
29 |
30 | static struct sock_type inet_ops[] = {
31 | {
32 | .sock_ops = &inet_stream_ops,
33 | .net_ops = &tcp_ops,
34 | .type = SOCK_STREAM,
35 | .protocol = IPPROTO_TCP,
36 | }
37 | };
38 |
39 | int inet_create(struct socket *sock, int protocol)
40 | {
41 | struct sock *sk;
42 | struct sock_type *skt = NULL;
43 |
44 | for (int i = 0; i < INET_OPS; i++) {
45 | if (inet_ops[i].type & sock->type) {
46 | skt = &inet_ops[i];
47 | break;
48 | }
49 | }
50 |
51 | if (!skt) {
52 | print_err("Could not find socktype for socket\n");
53 | return 1;
54 | }
55 |
56 | sock->ops = skt->sock_ops;
57 |
58 | sk = sk_alloc(skt->net_ops, protocol);
59 | sk->protocol = protocol;
60 |
61 | sock_init_data(sock, sk);
62 |
63 | return 0;
64 | }
65 |
66 | int inet_socket(struct socket *sock, int protocol)
67 | {
68 | return 0;
69 | }
70 |
71 | int inet_connect(struct socket *sock, struct sockaddr *addr,
72 | int addr_len, int flags)
73 | {
74 | return 0;
75 | }
76 |
77 | static int inet_stream_connect(struct socket *sock, const struct sockaddr *addr,
78 | int addr_len, int flags)
79 | {
80 | struct sock *sk = sock->sk;
81 | int rc = 0;
82 |
83 | if (addr_len < sizeof(addr->sa_family)) {
84 | return -EINVAL;
85 | }
86 |
87 | if (addr->sa_family == AF_UNSPEC) {
88 | sk->ops->disconnect(sk, flags);
89 | return -EAFNOSUPPORT;
90 | }
91 |
92 | switch (sock->state) {
93 | default:
94 | sk->err = -EINVAL;
95 | goto out;
96 | case SS_CONNECTED:
97 | sk->err = -EISCONN;
98 | goto out;
99 | case SS_CONNECTING:
100 | sk->err = -EALREADY;
101 | goto out;
102 | case SS_UNCONNECTED:
103 | sk->err = -EISCONN;
104 | if (sk->state != TCP_CLOSE) {
105 | goto out;
106 | }
107 |
108 | sk->ops->connect(sk, addr, addr_len, flags);
109 | sock->state = SS_CONNECTING;
110 | sk->err = -EINPROGRESS;
111 |
112 | if (sock->flags & O_NONBLOCK) {
113 | goto out;
114 | }
115 |
116 | pthread_mutex_lock(&sock->sleep.lock);
117 | while (sock->state == SS_CONNECTING && sk->err == -EINPROGRESS) {
118 | socket_release(sock);
119 | wait_sleep(&sock->sleep);
120 | socket_wr_acquire(sock);
121 | }
122 | pthread_mutex_unlock(&sock->sleep.lock);
123 | socket_wr_acquire(sock);
124 |
125 | switch (sk->err) {
126 | case -ETIMEDOUT:
127 | case -ECONNREFUSED:
128 | goto sock_error;
129 | }
130 |
131 | if (sk->err != 0) {
132 | goto out;
133 | }
134 |
135 | sock->state = SS_CONNECTED;
136 | break;
137 | }
138 |
139 | out:
140 | return sk->err;
141 | sock_error:
142 | rc = sk->err;
143 | return rc;
144 | }
145 |
146 | int inet_write(struct socket *sock, const void *buf, int len)
147 | {
148 | struct sock *sk = sock->sk;
149 |
150 | return sk->ops->write(sk, buf, len);
151 | }
152 |
153 | int inet_read(struct socket *sock, void *buf, int len)
154 | {
155 | struct sock *sk = sock->sk;
156 |
157 | return sk->ops->read(sk, buf, len);
158 | }
159 |
160 | struct sock *inet_lookup(struct sk_buff *skb, uint16_t sport, uint16_t dport)
161 | {
162 | struct socket *sock = socket_lookup(sport, dport);
163 | if (sock == NULL) return NULL;
164 |
165 | return sock->sk;
166 | }
167 |
168 | int inet_close(struct socket *sock)
169 | {
170 | if (!sock) {
171 | return 0;
172 | }
173 |
174 | struct sock *sk = sock->sk;
175 |
176 | return sock->sk->ops->close(sk);
177 | }
178 |
179 | int inet_free(struct socket *sock)
180 | {
181 | struct sock *sk = sock->sk;
182 | sock_free(sk);
183 | free(sock->sk);
184 |
185 | return 0;
186 | }
187 |
188 | int inet_abort(struct socket *sock)
189 | {
190 | struct sock *sk = sock->sk;
191 |
192 | if (sk) {
193 | sk->ops->abort(sk);
194 | }
195 |
196 | return 0;
197 | }
198 |
199 | int inet_getpeername(struct socket *sock, struct sockaddr *restrict address,
200 | socklen_t *address_len)
201 | {
202 | struct sock *sk = sock->sk;
203 |
204 | if (sk == NULL) {
205 | return -1;
206 | }
207 |
208 | struct sockaddr_in *res = (struct sockaddr_in *) address;
209 | res->sin_family = AF_INET;
210 | res->sin_port = htons(sk->dport);
211 | res->sin_addr.s_addr = htonl(sk->daddr);
212 | *address_len = sizeof(struct sockaddr_in);
213 |
214 | inet_dbg(sock, "geetpeername sin_family %d sin_port %d sin_addr %d addrlen %d",
215 | res->sin_family, ntohs(res->sin_port), ntohl(res->sin_addr.s_addr), *address_len);
216 |
217 | return 0;
218 | }
219 | int inet_getsockname(struct socket *sock, struct sockaddr *restrict address,
220 | socklen_t *address_len)
221 | {
222 | struct sock *sk = sock->sk;
223 |
224 | if (sk == NULL) {
225 | return -1;
226 | }
227 |
228 | struct sockaddr_in *res = (struct sockaddr_in *) address;
229 | res->sin_family = AF_INET;
230 | res->sin_port = htons(sk->sport);
231 | res->sin_addr.s_addr = htonl(sk->saddr);
232 | *address_len = sizeof(struct sockaddr_in);
233 |
234 | inet_dbg(sock, "getsockname sin_family %d sin_port %d sin_addr %d addrlen %d",
235 | res->sin_family, ntohs(res->sin_port), ntohl(res->sin_addr.s_addr), *address_len);
236 |
237 | return 0;
238 | }
239 |
--------------------------------------------------------------------------------
/src/ip_input.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "skbuff.h"
3 | #include "arp.h"
4 | #include "ip.h"
5 | #include "icmpv4.h"
6 | #include "tcp.h"
7 | #include "utils.h"
8 |
9 | static void ip_init_pkt(struct iphdr *ih)
10 | {
11 | ih->saddr = ntohl(ih->saddr);
12 | ih->daddr = ntohl(ih->daddr);
13 | ih->len = ntohs(ih->len);
14 | ih->id = ntohs(ih->id);
15 | }
16 |
17 | int ip_rcv(struct sk_buff *skb)
18 | {
19 | struct iphdr *ih = ip_hdr(skb);
20 | uint16_t csum = -1;
21 |
22 | if (ih->version != IPV4) {
23 | print_err("Datagram version was not IPv4\n");
24 | goto drop_pkt;
25 | }
26 |
27 | if (ih->ihl < 5) {
28 | print_err("IPv4 header length must be at least 5\n");
29 | goto drop_pkt;
30 | }
31 |
32 | if (ih->ttl == 0) {
33 | //TODO: Send ICMP error
34 | print_err("Time to live of datagram reached 0\n");
35 | goto drop_pkt;
36 | }
37 |
38 | csum = checksum(ih, ih->ihl * 4, 0);
39 |
40 | if (csum != 0) {
41 | // Invalid checksum, drop packet handling
42 | goto drop_pkt;
43 | }
44 |
45 | // TODO: Check fragmentation, possibly reassemble
46 |
47 | ip_init_pkt(ih);
48 |
49 | ip_dbg("in", ih);
50 |
51 | switch (ih->proto) {
52 | case ICMPV4:
53 | icmpv4_incoming(skb);
54 | return 0;
55 | case IP_TCP:
56 | tcp_in(skb);
57 | return 0;
58 | default:
59 | print_err("Unknown IP header proto\n");
60 | goto drop_pkt;
61 | }
62 |
63 | drop_pkt:
64 | free_skb(skb);
65 | return 0;
66 | }
67 |
--------------------------------------------------------------------------------
/src/ip_output.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "skbuff.h"
3 | #include "utils.h"
4 | #include "ip.h"
5 | #include "dst.h"
6 | #include "route.h"
7 |
8 | void ip_send_check(struct iphdr *ihdr)
9 | {
10 | uint32_t csum = checksum(ihdr, ihdr->ihl * 4, 0);
11 | ihdr->csum = csum;
12 | }
13 |
14 | int ip_output(struct sock *sk, struct sk_buff *skb)
15 | {
16 | struct rtentry *rt;
17 | struct iphdr *ihdr = ip_hdr(skb);
18 |
19 | rt = route_lookup(sk->daddr);
20 |
21 | if (!rt) {
22 | // TODO: dest_unreachable
23 | print_err("IP output route lookup fail\n");
24 | return -1;
25 | }
26 |
27 | skb->dev = rt->dev;
28 | skb->rt = rt;
29 |
30 | skb_push(skb, IP_HDR_LEN);
31 |
32 | ihdr->version = IPV4;
33 | ihdr->ihl = 0x05;
34 | ihdr->tos = 0;
35 | ihdr->len = skb->len;
36 | ihdr->id = ihdr->id;
37 | ihdr->frag_offset = 0x4000;
38 | ihdr->ttl = 64;
39 | ihdr->proto = skb->protocol;
40 | ihdr->saddr = skb->dev->addr;
41 | ihdr->daddr = sk->daddr;
42 | ihdr->csum = 0;
43 |
44 | ip_dbg("out", ihdr);
45 |
46 | ihdr->len = htons(ihdr->len);
47 | ihdr->id = htons(ihdr->id);
48 | ihdr->daddr = htonl(ihdr->daddr);
49 | ihdr->saddr = htonl(ihdr->saddr);
50 | ihdr->csum = htons(ihdr->csum);
51 | ihdr->frag_offset = htons(ihdr->frag_offset);
52 |
53 | ip_send_check(ihdr);
54 |
55 | return dst_neigh_output(skb);
56 | }
57 |
--------------------------------------------------------------------------------
/src/ipc.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "utils.h"
3 | #include "ipc.h"
4 | #include "socket.h"
5 |
6 | #define IPC_BUFLEN 8192
7 |
8 | static LIST_HEAD(sockets);
9 | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
10 | static int socket_count = 0;
11 |
12 | static struct ipc_thread *ipc_alloc_thread(int sock)
13 | {
14 | struct ipc_thread *th = calloc(sizeof(struct ipc_thread), 1);
15 | list_init(&th->list);
16 | th->sock = sock;
17 |
18 | pthread_mutex_lock(&lock);
19 | list_add_tail(&th->list, &sockets);
20 | socket_count++;
21 | pthread_mutex_unlock(&lock);
22 |
23 | ipc_dbg("New IPC socket allocated", th);
24 |
25 | return th;
26 | }
27 |
28 | static void ipc_free_thread(int sock)
29 | {
30 | struct list_head *item, *tmp = NULL;
31 | struct ipc_thread *th = NULL;
32 |
33 | pthread_mutex_lock(&lock);
34 |
35 | list_for_each_safe(item, tmp, &sockets) {
36 | th = list_entry(item, struct ipc_thread, list);
37 |
38 | if (th->sock == sock) {
39 | list_del(&th->list);
40 | ipc_dbg("IPC socket deleted", th);
41 |
42 | close(th->sock);
43 | free(th);
44 | socket_count--;
45 | break;
46 | }
47 |
48 | }
49 |
50 | pthread_mutex_unlock(&lock);
51 | }
52 |
53 | static int ipc_try_send(int sockfd, const void *buf, size_t len)
54 | {
55 | return send(sockfd, buf, len, MSG_NOSIGNAL);
56 | }
57 |
58 | static int ipc_write_rc(int sockfd, pid_t pid, uint16_t type, int rc)
59 | {
60 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err);
61 | struct ipc_msg *response = alloca(resplen);
62 |
63 | if (response == NULL) {
64 | print_err("Could not allocate memory for IPC write response\n");
65 | return -1;
66 | }
67 |
68 | response->type = type;
69 | response->pid = pid;
70 |
71 | struct ipc_err err;
72 |
73 | if (rc < 0) {
74 | err.err = -rc;
75 | err.rc = -1;
76 | } else {
77 | err.err = 0;
78 | err.rc = rc;
79 | }
80 |
81 | memcpy(response->data, &err, sizeof(struct ipc_err));
82 |
83 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) {
84 | perror("Error on writing IPC write response");
85 | }
86 |
87 | return 0;
88 | }
89 |
90 | static int ipc_read(int sockfd, struct ipc_msg *msg)
91 | {
92 | struct ipc_read *requested = (struct ipc_read *) msg->data;
93 | pid_t pid = msg->pid;
94 | int rlen = -1;
95 | char rbuf[requested->len];
96 | memset(rbuf, 0, requested->len);
97 |
98 | rlen = _read(pid, requested->sockfd, rbuf, requested->len);
99 |
100 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) +
101 | sizeof(struct ipc_read) + (rlen > 0 ? rlen : 0);
102 | struct ipc_msg *response = alloca(resplen);
103 | struct ipc_err *error = (struct ipc_err *) response->data;
104 | struct ipc_read *actual = (struct ipc_read *) error->data;
105 |
106 | if (response == NULL) {
107 | print_err("Could not allocate memory for IPC read response\n");
108 | return -1;
109 | }
110 |
111 | response->type = IPC_READ;
112 | response->pid = pid;
113 |
114 | error->rc = rlen < 0 ? -1 : rlen;
115 | error->err = rlen < 0 ? -rlen : 0;
116 |
117 | actual->sockfd = requested->sockfd;
118 | actual->len = rlen;
119 | memcpy(actual->buf, rbuf, rlen > 0 ? rlen : 0);
120 |
121 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) {
122 | perror("Error on writing IPC read response");
123 | }
124 |
125 | return 0;
126 | }
127 |
128 | static int ipc_write(int sockfd, struct ipc_msg *msg)
129 | {
130 | struct ipc_write *payload = (struct ipc_write *) msg->data;
131 | pid_t pid = msg->pid;
132 | int rc = -1;
133 | int head = IPC_BUFLEN - sizeof(struct ipc_write) - sizeof(struct ipc_msg);
134 |
135 | char buf[payload->len];
136 |
137 | memset(buf, 0, payload->len);
138 | memcpy(buf, payload->buf, payload->len > head ? head : payload->len);
139 |
140 | // Guard for payload that is longer than initial IPC_BUFLEN
141 | if (payload->len > head) {
142 | int tail = payload->len - head;
143 | int res = read(sockfd, &buf[head], tail);
144 |
145 | if (res == -1) {
146 | perror("Read on IPC payload guard");
147 | return -1;
148 | } else if (res != tail) {
149 | print_err("Hmm, we did not read exact payload amount in IPC write\n");
150 | }
151 | }
152 |
153 | rc = _write(pid, payload->sockfd, buf, payload->len);
154 |
155 | return ipc_write_rc(sockfd, pid, IPC_WRITE, rc);
156 | }
157 |
158 | static int ipc_connect(int sockfd, struct ipc_msg *msg)
159 | {
160 | struct ipc_connect *payload = (struct ipc_connect *)msg->data;
161 | pid_t pid = msg->pid;
162 | int rc = -1;
163 |
164 | rc = _connect(pid, payload->sockfd, &payload->addr, payload->addrlen);
165 |
166 | return ipc_write_rc(sockfd, pid, IPC_CONNECT, rc);
167 | }
168 |
169 | static int ipc_socket(int sockfd, struct ipc_msg *msg)
170 | {
171 | struct ipc_socket *sock = (struct ipc_socket *)msg->data;
172 | pid_t pid = msg->pid;
173 | int rc = -1;
174 |
175 | rc = _socket(pid, sock->domain, sock->type, sock->protocol);
176 |
177 | return ipc_write_rc(sockfd, pid, IPC_SOCKET, rc);
178 | }
179 |
180 | static int ipc_close(int sockfd, struct ipc_msg *msg)
181 | {
182 | struct ipc_close *payload = (struct ipc_close *)msg->data;
183 | pid_t pid = msg->pid;
184 | int rc = -1;
185 |
186 | rc = _close(pid, payload->sockfd);
187 |
188 | rc = ipc_write_rc(sockfd, pid, IPC_CLOSE, rc);
189 |
190 | return rc;
191 | }
192 |
193 | static int ipc_poll(int sockfd, struct ipc_msg *msg)
194 | {
195 | struct ipc_poll *data = (struct ipc_poll *)msg->data;
196 | pid_t pid = msg->pid;
197 | int rc = -1;
198 |
199 | struct pollfd fds[data->nfds];
200 |
201 | for (int i = 0; i < data->nfds; i++) {
202 | fds[i].fd = data->fds[i].fd;
203 | fds[i].events = data->fds[i].events;
204 | fds[i].revents = data->fds[i].revents;
205 | }
206 |
207 | rc = _poll(pid, fds, data->nfds, data->timeout);
208 |
209 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_pollfd) * data->nfds;
210 | struct ipc_msg *response = alloca(resplen);
211 |
212 | if (response == NULL) {
213 | print_err("Could not allocate memory for IPC write response\n");
214 | return -1;
215 | }
216 |
217 | response->type = IPC_POLL;
218 | response->pid = pid;
219 |
220 | struct ipc_err err;
221 |
222 | if (rc < 0) {
223 | err.err = -rc;
224 | err.rc = -1;
225 | } else {
226 | err.err = 0;
227 | err.rc = rc;
228 | }
229 |
230 | memcpy(response->data, &err, sizeof(struct ipc_err));
231 |
232 | struct ipc_pollfd *polled = (struct ipc_pollfd *) ((struct ipc_err *)response->data)->data;
233 |
234 | for (int i = 0; i < data->nfds; i++) {
235 | polled[i].fd = fds[i].fd;
236 | polled[i].events = fds[i].events;
237 | polled[i].revents = fds[i].revents;
238 | }
239 |
240 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) {
241 | perror("Error on writing IPC poll response");
242 | }
243 |
244 | return 0;
245 | }
246 |
247 | static int ipc_fcntl(int sockfd, struct ipc_msg *msg)
248 | {
249 | struct ipc_fcntl *fc = (struct ipc_fcntl *)msg->data;
250 | pid_t pid = msg->pid;
251 | int rc = -1;
252 |
253 | switch (fc->cmd) {
254 | case F_GETFL:
255 | rc = _fcntl(pid, fc->sockfd, fc->cmd);
256 | break;
257 | case F_SETFL:
258 | rc = _fcntl(pid, fc->sockfd, fc->cmd, *(int *)fc->data);
259 | break;
260 | default:
261 | print_err("IPC Fcntl cmd not supported %d\n", fc->cmd);
262 | rc = -EINVAL;
263 | }
264 |
265 | return ipc_write_rc(sockfd, pid, IPC_FCNTL, rc);
266 | }
267 |
268 | static int ipc_getsockopt(int sockfd, struct ipc_msg *msg)
269 | {
270 | struct ipc_sockopt *opts = (struct ipc_sockopt *)msg->data;
271 |
272 | pid_t pid = msg->pid;
273 | int rc = -1;
274 |
275 | rc = _getsockopt(pid, opts->fd, opts->level, opts->optname, opts->optval, &opts->optlen);
276 |
277 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockopt) + opts->optlen;
278 | struct ipc_msg *response = alloca(resplen);
279 |
280 | if (response == NULL) {
281 | print_err("Could not allocate memory for IPC getsockopt response\n");
282 | return -1;
283 | }
284 |
285 | response->type = IPC_GETSOCKOPT;
286 | response->pid = pid;
287 |
288 | struct ipc_err err;
289 |
290 | if (rc < 0) {
291 | err.err = -rc;
292 | err.rc = -1;
293 | } else {
294 | err.err = 0;
295 | err.rc = rc;
296 | }
297 |
298 | memcpy(response->data, &err, sizeof(struct ipc_err));
299 |
300 | struct ipc_sockopt *optres = (struct ipc_sockopt *) ((struct ipc_err *)response->data)->data;
301 |
302 | optres->fd = opts->fd;
303 | optres->level = opts->level;
304 | optres->optname = opts->optname;
305 | optres->optlen = opts->optlen;
306 | memcpy(&optres->optval, opts->optval, opts->optlen);
307 |
308 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) {
309 | perror("Error on writing IPC getsockopt response");
310 | }
311 |
312 | return rc;
313 | }
314 |
315 | static int ipc_getpeername(int sockfd, struct ipc_msg *msg)
316 | {
317 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data;
318 |
319 | pid_t pid = msg->pid;
320 | int rc = -1;
321 |
322 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname);
323 | struct ipc_msg *response = alloca(resplen);
324 |
325 | if (response == NULL) {
326 | print_err("Could not allocate memory for IPC getpeername response\n");
327 | return -1;
328 | }
329 |
330 | response->type = IPC_GETPEERNAME;
331 | response->pid = pid;
332 |
333 | struct ipc_sockname *nameres = (struct ipc_sockname *) ((struct ipc_err *)response->data)->data;
334 | rc = _getpeername(pid, name->socket, (struct sockaddr *)nameres->sa_data, &nameres->address_len);
335 |
336 | struct ipc_err err;
337 |
338 | if (rc < 0) {
339 | err.err = -rc;
340 | err.rc = -1;
341 | } else {
342 | err.err = 0;
343 | err.rc = rc;
344 | }
345 |
346 | memcpy(response->data, &err, sizeof(struct ipc_err));
347 |
348 | nameres->socket = name->socket;
349 |
350 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) {
351 | perror("Error on writing IPC getpeername response");
352 | }
353 |
354 | return rc;
355 | }
356 |
357 | static int ipc_getsockname(int sockfd, struct ipc_msg *msg)
358 | {
359 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data;
360 |
361 | pid_t pid = msg->pid;
362 | int rc = -1;
363 |
364 | int resplen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname);
365 | struct ipc_msg *response = alloca(resplen);
366 |
367 | if (response == NULL) {
368 | print_err("Could not allocate memory for IPC getsockname response\n");
369 | return -1;
370 | }
371 |
372 | response->type = IPC_GETSOCKNAME;
373 | response->pid = pid;
374 |
375 | struct ipc_sockname *nameres = (struct ipc_sockname *) ((struct ipc_err *)response->data)->data;
376 | rc = _getsockname(pid, name->socket, (struct sockaddr *)nameres->sa_data, &nameres->address_len);
377 |
378 | struct ipc_err err;
379 |
380 | if (rc < 0) {
381 | err.err = -rc;
382 | err.rc = -1;
383 | } else {
384 | err.err = 0;
385 | err.rc = rc;
386 | }
387 |
388 | memcpy(response->data, &err, sizeof(struct ipc_err));
389 |
390 | nameres->socket = name->socket;
391 |
392 | if (ipc_try_send(sockfd, (char *)response, resplen) == -1) {
393 | perror("Error on writing IPC getsockname response");
394 | }
395 |
396 | return rc;
397 | }
398 |
399 | static int demux_ipc_socket_call(int sockfd, char *cmdbuf, int blen)
400 | {
401 | struct ipc_msg *msg = (struct ipc_msg *)cmdbuf;
402 |
403 | switch (msg->type) {
404 | case IPC_SOCKET:
405 | return ipc_socket(sockfd, msg);
406 | break;
407 | case IPC_CONNECT:
408 | return ipc_connect(sockfd, msg);
409 | break;
410 | case IPC_WRITE:
411 | return ipc_write(sockfd, msg);
412 | break;
413 | case IPC_READ:
414 | return ipc_read(sockfd, msg);
415 | break;
416 | case IPC_CLOSE:
417 | return ipc_close(sockfd, msg);
418 | break;
419 | case IPC_POLL:
420 | return ipc_poll(sockfd, msg);
421 | break;
422 | case IPC_FCNTL:
423 | return ipc_fcntl(sockfd, msg);
424 | break;
425 | case IPC_GETSOCKOPT:
426 | return ipc_getsockopt(sockfd, msg);
427 | case IPC_GETPEERNAME:
428 | return ipc_getpeername(sockfd, msg);
429 | case IPC_GETSOCKNAME:
430 | return ipc_getsockname(sockfd, msg);
431 | default:
432 | print_err("No such IPC type %d\n", msg->type);
433 | break;
434 | };
435 |
436 | return 0;
437 | }
438 |
439 | void *socket_ipc_open(void *args) {
440 | int blen = IPC_BUFLEN;
441 | char buf[blen];
442 | int sockfd = *(int *)args;
443 | int rc = -1;
444 |
445 | while ((rc = read(sockfd, buf, blen)) > 0) {
446 | rc = demux_ipc_socket_call(sockfd, buf, blen);
447 |
448 | if (rc == -1) {
449 | print_err("Error on demuxing IPC socket call\n");
450 | close(sockfd);
451 | return NULL;
452 | };
453 | }
454 |
455 | ipc_free_thread(sockfd);
456 |
457 | if (rc == -1) {
458 | perror("socket ipc read");
459 | }
460 |
461 | return NULL;
462 | }
463 |
464 | void *start_ipc_listener()
465 | {
466 | int fd, rc, datasock;
467 | struct sockaddr_un un;
468 | char *sockname = "/tmp/lvlip.socket";
469 |
470 | unlink(sockname);
471 |
472 | if (strnlen(sockname, sizeof(un.sun_path)) == sizeof(un.sun_path)) {
473 | // Path is too long
474 | print_err("Path for UNIX socket is too long\n");
475 | exit(-1);
476 | }
477 |
478 | if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
479 | perror("IPC listener UNIX socket");
480 | exit(EXIT_FAILURE);
481 | }
482 |
483 | memset(&un, 0, sizeof(struct sockaddr_un));
484 | un.sun_family = AF_UNIX;
485 | strncpy(un.sun_path, sockname, sizeof(un.sun_path) - 1);
486 |
487 | rc = bind(fd, (const struct sockaddr *) &un, sizeof(struct sockaddr_un));
488 |
489 | if (rc == -1) {
490 | perror("IPC bind");
491 | exit(EXIT_FAILURE);
492 | }
493 |
494 | rc = listen(fd, 20);
495 |
496 | if (rc == -1) {
497 | perror("IPC listen");
498 | exit(EXIT_FAILURE);
499 | }
500 |
501 | if (chmod(sockname, S_IRUSR | S_IWUSR | S_IXUSR |
502 | S_IRGRP | S_IWGRP | S_IXGRP |
503 | S_IROTH | S_IWOTH | S_IXOTH) == -1) {
504 | perror("Chmod on lvl-ip IPC UNIX socket failed");
505 | exit(EXIT_FAILURE);
506 | }
507 |
508 | for (;;) {
509 | datasock = accept(fd, NULL, NULL);
510 | if (datasock == -1) {
511 | perror("IPC accept");
512 | exit(EXIT_FAILURE);
513 | }
514 |
515 | struct ipc_thread *th = ipc_alloc_thread(datasock);
516 |
517 | if (pthread_create(&th->id, NULL, &socket_ipc_open, &th->sock) != 0) {
518 | print_err("Error on socket thread creation\n");
519 | exit(1);
520 | };
521 | }
522 |
523 | close(fd);
524 |
525 | unlink(sockname);
526 |
527 | return NULL;
528 | }
529 |
--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "basic.h"
3 | #include "cli.h"
4 | #include "tuntap_if.h"
5 | #include "utils.h"
6 | #include "ipc.h"
7 | #include "timer.h"
8 | #include "route.h"
9 | #include "ethernet.h"
10 | #include "arp.h"
11 | #include "tcp.h"
12 | #include "netdev.h"
13 | #include "ip.h"
14 |
15 | #define MAX_CMD_LENGTH 6
16 |
17 | typedef void (*sighandler_t)(int);
18 |
19 | #define THREAD_CORE 0
20 | #define THREAD_TIMERS 1
21 | #define THREAD_IPC 2
22 | #define THREAD_SIGNAL 3
23 | static pthread_t threads[4];
24 |
25 | int running = 1;
26 | sigset_t mask;
27 |
28 | static void create_thread(pthread_t id, void *(*func) (void *))
29 | {
30 | if (pthread_create(&threads[id], NULL,
31 | func, NULL) != 0) {
32 | print_err("Could not create core thread\n");
33 | }
34 | }
35 |
36 | static void *stop_stack_handler(void *arg)
37 | {
38 | int err, signo;
39 |
40 | for (;;) {
41 | err = sigwait(&mask, &signo);
42 | if (err != 0) {
43 | print_err("Sigwait failed: %d\n", err);
44 | }
45 |
46 | switch (signo) {
47 | case SIGINT:
48 | case SIGQUIT:
49 | running = 0;
50 | pthread_cancel(threads[THREAD_IPC]);
51 | pthread_cancel(threads[THREAD_CORE]);
52 | pthread_cancel(threads[THREAD_TIMERS]);
53 | return 0;
54 | default:
55 | printf("Unexpected signal %d\n", signo);
56 | }
57 | }
58 | }
59 |
60 | static void init_signals()
61 | {
62 | int err;
63 |
64 | sigemptyset(&mask);
65 | sigaddset(&mask, SIGINT);
66 | sigaddset(&mask, SIGQUIT);
67 |
68 | if ((err = pthread_sigmask(SIG_BLOCK, &mask, NULL)) != 0) {
69 | print_err("SIG_BLOCK error\n");
70 | exit(1);
71 | }
72 | }
73 |
74 | static void init_stack()
75 | {
76 | tun_init();
77 | netdev_init();
78 | route_init();
79 | arp_init();
80 | tcp_init();
81 | }
82 |
83 | static void run_threads()
84 | {
85 | create_thread(THREAD_CORE, netdev_rx_loop);
86 | create_thread(THREAD_TIMERS, timers_start);
87 | create_thread(THREAD_IPC, start_ipc_listener);
88 | create_thread(THREAD_SIGNAL, stop_stack_handler);
89 | }
90 |
91 | static void wait_for_threads()
92 | {
93 | for (int i = 0; i < 3; i++) {
94 | if (pthread_join(threads[i], NULL) != 0) {
95 | print_err("Error when joining threads\n");
96 | exit(1);
97 | }
98 | }
99 | }
100 |
101 | void free_stack()
102 | {
103 | abort_sockets();
104 | free_arp();
105 | free_routes();
106 | free_netdev();
107 | free_tun();
108 | }
109 |
110 | void init_security()
111 | {
112 | if (prctl(PR_CAPBSET_DROP, CAP_NET_ADMIN) == -1) {
113 | perror("Error on network admin capability drop");
114 | exit(1);
115 | }
116 |
117 | if (prctl(PR_CAPBSET_DROP, CAP_SETPCAP) == -1) {
118 | perror("Error on capability set drop");
119 | exit(1);
120 | }
121 | }
122 |
123 | int main(int argc, char** argv)
124 | {
125 | parse_cli(argc, argv);
126 |
127 | init_signals();
128 | init_stack();
129 | init_security();
130 |
131 | run_threads();
132 | wait_for_threads();
133 |
134 | free_stack();
135 | }
136 |
--------------------------------------------------------------------------------
/src/netdev.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "utils.h"
3 | #include "skbuff.h"
4 | #include "netdev.h"
5 | #include "ethernet.h"
6 | #include "arp.h"
7 | #include "ip.h"
8 | #include "tuntap_if.h"
9 | #include "basic.h"
10 |
11 | struct netdev *loop;
12 | struct netdev *netdev;
13 | extern int running;
14 |
15 | static struct netdev *netdev_alloc(char *addr, char *hwaddr, uint32_t mtu)
16 | {
17 | struct netdev *dev = malloc(sizeof(struct netdev));
18 |
19 | dev->addr = ip_parse(addr);
20 |
21 | sscanf(hwaddr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &dev->hwaddr[0],
22 | &dev->hwaddr[1],
23 | &dev->hwaddr[2],
24 | &dev->hwaddr[3],
25 | &dev->hwaddr[4],
26 | &dev->hwaddr[5]);
27 |
28 | dev->addr_len = 6;
29 | dev->mtu = mtu;
30 |
31 | return dev;
32 | }
33 |
34 | void netdev_init(char *addr, char *hwaddr)
35 | {
36 | loop = netdev_alloc("127.0.0.1", "00:00:00:00:00:00", 1500);
37 | netdev = netdev_alloc("10.0.0.4", "00:0c:29:6d:50:25", 1500);
38 | }
39 |
40 | int netdev_transmit(struct sk_buff *skb, uint8_t *dst_hw, uint16_t ethertype)
41 | {
42 | struct netdev *dev;
43 | struct eth_hdr *hdr;
44 | int ret = 0;
45 |
46 | dev = skb->dev;
47 |
48 | skb_push(skb, ETH_HDR_LEN);
49 |
50 | hdr = (struct eth_hdr *)skb->data;
51 |
52 | memcpy(hdr->dmac, dst_hw, dev->addr_len);
53 | memcpy(hdr->smac, dev->hwaddr, dev->addr_len);
54 |
55 | hdr->ethertype = htons(ethertype);
56 | eth_dbg("out", hdr);
57 |
58 | ret = tun_write((char *)skb->data, skb->len);
59 |
60 | return ret;
61 | }
62 |
63 | static int netdev_receive(struct sk_buff *skb)
64 | {
65 | struct eth_hdr *hdr = eth_hdr(skb);
66 |
67 | eth_dbg("in", hdr);
68 |
69 | switch (hdr->ethertype) {
70 | case ETH_P_ARP:
71 | arp_rcv(skb);
72 | break;
73 | case ETH_P_IP:
74 | ip_rcv(skb);
75 | break;
76 | case ETH_P_IPV6:
77 | default:
78 | printf("Unsupported ethertype %x\n", hdr->ethertype);
79 | free_skb(skb);
80 | break;
81 | }
82 |
83 | return 0;
84 | }
85 |
86 | void *netdev_rx_loop()
87 | {
88 | while (running) {
89 | struct sk_buff *skb = alloc_skb(BUFLEN);
90 |
91 | if (tun_read((char *)skb->data, BUFLEN) < 0) {
92 | perror("ERR: Read from tun_fd");
93 | free_skb(skb);
94 | return NULL;
95 | }
96 |
97 | netdev_receive(skb);
98 | }
99 |
100 | return NULL;
101 | }
102 |
103 | struct netdev* netdev_get(uint32_t sip)
104 | {
105 | if (netdev->addr == sip) {
106 | return netdev;
107 | } else {
108 | return NULL;
109 | }
110 | }
111 |
112 | void free_netdev()
113 | {
114 | free(loop);
115 | free(netdev);
116 | }
117 |
--------------------------------------------------------------------------------
/src/route.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "route.h"
3 | #include "dst.h"
4 | #include "netdev.h"
5 | #include "list.h"
6 | #include "ip.h"
7 |
8 | static LIST_HEAD(routes);
9 |
10 | extern struct netdev *netdev;
11 | extern struct netdev *loop;
12 |
13 | extern char *tapaddr;
14 | extern char *taproute;
15 |
16 | static struct rtentry *route_alloc(uint32_t dst, uint32_t gateway, uint32_t netmask,
17 | uint8_t flags, uint32_t metric, struct netdev *dev)
18 | {
19 | struct rtentry *rt = malloc(sizeof(struct rtentry));
20 | list_init(&rt->list);
21 |
22 | rt->dst = dst;
23 | rt->gateway = gateway;
24 | rt->netmask = netmask;
25 | rt->flags = flags;
26 | rt->metric = metric;
27 | rt->dev = dev;
28 | return rt;
29 | }
30 |
31 | void route_add(uint32_t dst, uint32_t gateway, uint32_t netmask, uint8_t flags,
32 | uint32_t metric, struct netdev *dev)
33 | {
34 | struct rtentry *rt = route_alloc(dst, gateway, netmask, flags, metric, dev);
35 |
36 | list_add_tail(&rt->list, &routes);
37 | }
38 |
39 | void route_init()
40 | {
41 | route_add(loop->addr, 0, 0xff000000, RT_LOOPBACK, 0, loop);
42 | route_add(netdev->addr, 0, 0xffffff00, RT_HOST, 0, netdev);
43 | route_add(0, ip_parse(tapaddr), 0, RT_GATEWAY, 0, netdev);
44 | }
45 |
46 | struct rtentry *route_lookup(uint32_t daddr)
47 | {
48 | struct list_head *item;
49 | struct rtentry *rt = NULL;
50 |
51 | list_for_each(item, &routes) {
52 | rt = list_entry(item, struct rtentry, list);
53 | if ((daddr & rt->netmask) == (rt->dst & rt->netmask)) break;
54 | // If no matches, we default to to default gw (last item)
55 | }
56 |
57 | return rt;
58 | }
59 |
60 | void free_routes()
61 | {
62 | struct list_head *item, *tmp;
63 | struct rtentry *rt;
64 |
65 | list_for_each_safe(item, tmp, &routes) {
66 | rt = list_entry(item, struct rtentry, list);
67 | list_del(item);
68 |
69 | free(rt);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/skbuff.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "skbuff.h"
3 | #include "list.h"
4 |
5 | struct sk_buff *alloc_skb(unsigned int size)
6 | {
7 | struct sk_buff *skb = malloc(sizeof(struct sk_buff));
8 |
9 | memset(skb, 0, sizeof(struct sk_buff));
10 | skb->data = malloc(size);
11 | memset(skb->data, 0, size);
12 |
13 | skb->refcnt = 0;
14 | skb->head = skb->data;
15 | skb->end = skb->data + size;
16 |
17 | list_init(&skb->list);
18 |
19 | return skb;
20 | }
21 |
22 | void free_skb(struct sk_buff *skb)
23 | {
24 | if (skb->refcnt < 1) {
25 | free(skb->head);
26 | free(skb);
27 | }
28 | }
29 |
30 | void *skb_reserve(struct sk_buff *skb, unsigned int len)
31 | {
32 | skb->data += len;
33 |
34 | return skb->data;
35 | }
36 |
37 | uint8_t *skb_push(struct sk_buff *skb, unsigned int len)
38 | {
39 | skb->data -= len;
40 | skb->len += len;
41 |
42 | return skb->data;
43 | }
44 |
45 | uint8_t *skb_head(struct sk_buff *skb)
46 | {
47 | return skb->head;
48 | }
49 |
50 | void skb_reset_header(struct sk_buff *skb)
51 | {
52 | skb->data = skb->end - skb->dlen;
53 | skb->len = skb->dlen;
54 | }
55 |
--------------------------------------------------------------------------------
/src/sock.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "sock.h"
3 | #include "socket.h"
4 |
5 | struct sock *sk_alloc(struct net_ops *ops, int protocol)
6 | {
7 | struct sock *sk;
8 |
9 | sk = ops->alloc_sock(protocol);
10 |
11 | sk->ops = ops;
12 |
13 | return sk;
14 | }
15 |
16 | void sock_init_data(struct socket *sock, struct sock *sk)
17 | {
18 | sock->sk = sk;
19 | sk->sock = sock;
20 |
21 | wait_init(&sk->recv_wait);
22 | skb_queue_init(&sk->receive_queue);
23 | skb_queue_init(&sk->write_queue);
24 |
25 | sk->poll_events = 0;
26 |
27 | sk->ops->init(sk);
28 | }
29 |
30 | void sock_free(struct sock *sk)
31 | {
32 | skb_queue_free(&sk->receive_queue);
33 | skb_queue_free(&sk->write_queue);
34 | }
35 |
36 | void sock_connected(struct sock *sk)
37 | {
38 | struct socket *sock = sk->sock;
39 |
40 | sock->state = SS_CONNECTED;
41 | sk->err = 0;
42 | sk->poll_events = (POLLOUT | POLLWRNORM | POLLWRBAND);
43 |
44 | wait_wakeup(&sock->sleep);
45 | }
46 |
--------------------------------------------------------------------------------
/src/socket.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "utils.h"
3 | #include "socket.h"
4 | #include "inet.h"
5 | #include "wait.h"
6 | #include "timer.h"
7 |
8 | static int sock_amount = 0;
9 | static LIST_HEAD(sockets);
10 | static pthread_rwlock_t slock = PTHREAD_RWLOCK_INITIALIZER;
11 |
12 | extern struct net_family inet;
13 |
14 | static struct net_family *families[128] = {
15 | [AF_INET] = &inet,
16 | };
17 |
18 | static struct socket *alloc_socket(pid_t pid)
19 | {
20 | // TODO: Figure out a way to not shadow kernel file descriptors.
21 | // Now, we'll just expect the fds for a process to never exceed this.
22 | static int fd = 4097;
23 | struct socket *sock = malloc(sizeof (struct socket));
24 | list_init(&sock->list);
25 |
26 | sock->pid = pid;
27 | sock->refcnt = 1;
28 |
29 | pthread_rwlock_wrlock(&slock);
30 | sock->fd = fd++;
31 | pthread_rwlock_unlock(&slock);
32 |
33 | sock->state = SS_UNCONNECTED;
34 | sock->ops = NULL;
35 | sock->flags = O_RDWR;
36 | wait_init(&sock->sleep);
37 | pthread_rwlock_init(&sock->lock, NULL);
38 |
39 | return sock;
40 | }
41 |
42 | int socket_rd_acquire(struct socket *sock)
43 | {
44 | int rc = pthread_rwlock_wrlock(&sock->lock);
45 | sock->refcnt++;
46 | return rc;
47 | }
48 |
49 | int socket_wr_acquire(struct socket *sock)
50 | {
51 | int rc = pthread_rwlock_wrlock(&sock->lock);
52 | sock->refcnt++;
53 | return rc;
54 | }
55 |
56 | int socket_release(struct socket *sock)
57 | {
58 | int rc = 0;
59 | sock->refcnt--;
60 |
61 | if (sock->refcnt == 0) {
62 | rc = pthread_rwlock_unlock(&sock->lock);
63 | free(sock);
64 | } else {
65 | rc = pthread_rwlock_unlock(&sock->lock);
66 | }
67 |
68 | return rc;
69 | }
70 |
71 | int socket_free(struct socket *sock)
72 | {
73 | pthread_rwlock_wrlock(&slock);
74 | socket_wr_acquire(sock);
75 | list_del(&sock->list);
76 | sock_amount--;
77 | pthread_rwlock_unlock(&slock);
78 |
79 | if (sock->ops) {
80 | sock->ops->free(sock);
81 | }
82 |
83 | wait_free(&sock->sleep);
84 | socket_release(sock);
85 |
86 | return 0;
87 | }
88 |
89 | static void *socket_garbage_collect(void *arg)
90 | {
91 | struct socket *sock = socket_find((struct socket *)arg);
92 |
93 | if (sock == NULL) return NULL;
94 |
95 | socket_free(sock);
96 |
97 | return NULL;
98 | }
99 |
100 | int socket_delete(struct socket *sock)
101 | {
102 | int rc = 0;
103 |
104 | if (sock->state == SS_DISCONNECTING) goto out;
105 |
106 | sock->state = SS_DISCONNECTING;
107 | timer_oneshot(10000, &socket_garbage_collect, sock);
108 |
109 | out:
110 | return rc;
111 | }
112 |
113 | void abort_sockets() {
114 | struct list_head *item, *tmp;
115 | struct socket *sock;
116 |
117 | list_for_each_safe(item, tmp, &sockets) {
118 | sock = list_entry(item, struct socket, list);
119 | sock->ops->abort(sock);
120 | }
121 | }
122 |
123 | static struct socket *get_socket(pid_t pid, uint32_t fd)
124 | {
125 | struct list_head *item;
126 | struct socket *sock = NULL;
127 |
128 | pthread_rwlock_rdlock(&slock);
129 | list_for_each(item, &sockets) {
130 | sock = list_entry(item, struct socket, list);
131 | if (sock->pid == pid && sock->fd == fd) goto out;
132 | }
133 |
134 | sock = NULL;
135 |
136 | out:
137 | pthread_rwlock_unlock(&slock);
138 | return sock;
139 | }
140 |
141 | struct socket *socket_lookup(uint16_t remoteport, uint16_t localport)
142 | {
143 | struct list_head *item;
144 | struct socket *sock = NULL;
145 | struct sock *sk = NULL;
146 |
147 | pthread_rwlock_rdlock(&slock);
148 |
149 | list_for_each(item, &sockets) {
150 | sock = list_entry(item, struct socket, list);
151 |
152 | if (sock == NULL || sock->sk == NULL) continue;
153 | sk = sock->sk;
154 |
155 | if (sk->sport == localport && sk->dport == remoteport) {
156 | goto found;
157 | }
158 | }
159 |
160 | sock = NULL;
161 | found:
162 | pthread_rwlock_unlock(&slock);
163 | return sock;
164 | }
165 |
166 | struct socket *socket_find(struct socket *find)
167 | {
168 | struct list_head *item;
169 | struct socket *sock = NULL;
170 |
171 | pthread_rwlock_rdlock(&slock);
172 | list_for_each(item, &sockets) {
173 | sock = list_entry(item, struct socket, list);
174 | if (sock == find) goto out;
175 | }
176 |
177 | sock = NULL;
178 |
179 | out:
180 | pthread_rwlock_unlock(&slock);
181 | return sock;
182 | }
183 |
184 | #ifdef DEBUG_SOCKET
185 | void socket_debug()
186 | {
187 | struct list_head *item;
188 | struct socket *sock = NULL;
189 |
190 | pthread_rwlock_rdlock(&slock);
191 |
192 | list_for_each(item, &sockets) {
193 | sock = list_entry(item, struct socket, list);
194 | socket_rd_acquire(sock);
195 | socket_dbg(sock, "");
196 | socket_release(sock);
197 | }
198 |
199 | pthread_rwlock_unlock(&slock);
200 | }
201 | #else
202 | void socket_debug()
203 | {
204 | return;
205 | }
206 | #endif
207 |
208 | int _socket(pid_t pid, int domain, int type, int protocol)
209 | {
210 | struct socket *sock;
211 | struct net_family *family;
212 |
213 | if ((sock = alloc_socket(pid)) == NULL) {
214 | print_err("Could not alloc socket\n");
215 | return -1;
216 | }
217 |
218 | sock->type = type;
219 |
220 | family = families[domain];
221 |
222 | if (!family) {
223 | print_err("Domain not supported: %d\n", domain);
224 | goto abort_socket;
225 | }
226 |
227 | if (family->create(sock, protocol) != 0) {
228 | print_err("Creating domain failed\n");
229 | goto abort_socket;
230 | }
231 |
232 | pthread_rwlock_wrlock(&slock);
233 |
234 | list_add_tail(&sock->list, &sockets);
235 | sock_amount++;
236 |
237 | socket_rd_acquire(sock);
238 | pthread_rwlock_unlock(&slock);
239 | int rc = sock->fd;
240 | socket_release(sock);
241 |
242 | return rc;
243 |
244 | abort_socket:
245 | socket_free(sock);
246 | return -1;
247 | }
248 |
249 | int _connect(pid_t pid, int sockfd, const struct sockaddr *addr, socklen_t addrlen)
250 | {
251 | struct socket *sock;
252 |
253 | if ((sock = get_socket(pid, sockfd)) == NULL) {
254 | print_err("Connect: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid);
255 | return -EBADF;
256 | }
257 |
258 | socket_wr_acquire(sock);
259 |
260 | int rc = sock->ops->connect(sock, addr, addrlen, 0);
261 | switch (rc) {
262 | case -EINVAL:
263 | case -EAFNOSUPPORT:
264 | case -ECONNREFUSED:
265 | case -ETIMEDOUT:
266 | socket_release(sock);
267 | socket_free(sock);
268 | break;
269 | default:
270 | socket_release(sock);
271 | }
272 |
273 | return rc;
274 | }
275 |
276 | int _write(pid_t pid, int sockfd, const void *buf, const unsigned int count)
277 | {
278 | struct socket *sock;
279 |
280 | if ((sock = get_socket(pid, sockfd)) == NULL) {
281 | print_err("Write: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid);
282 | return -EBADF;
283 | }
284 |
285 | socket_wr_acquire(sock);
286 | int rc = sock->ops->write(sock, buf, count);
287 | socket_release(sock);
288 |
289 | return rc;
290 | }
291 |
292 | int _read(pid_t pid, int sockfd, void *buf, const unsigned int count)
293 | {
294 | struct socket *sock;
295 |
296 | if ((sock = get_socket(pid, sockfd)) == NULL) {
297 | print_err("Read: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid);
298 | return -EBADF;
299 | }
300 |
301 | socket_wr_acquire(sock);
302 | int rc = sock->ops->read(sock, buf, count);
303 | socket_release(sock);
304 |
305 | return rc;
306 | }
307 |
308 | int _close(pid_t pid, int sockfd)
309 | {
310 | struct socket *sock;
311 |
312 | if ((sock = get_socket(pid, sockfd)) == NULL) {
313 | print_err("Close: could not find socket (fd %u) for connection (pid %d)\n", sockfd, pid);
314 | return -EBADF;
315 | }
316 |
317 |
318 | socket_wr_acquire(sock);
319 | int rc = sock->ops->close(sock);
320 | socket_release(sock);
321 |
322 | return rc;
323 | }
324 |
325 | int _poll(pid_t pid, struct pollfd fds[], nfds_t nfds, int timeout)
326 | {
327 | for (;;) {
328 | int polled = 0;
329 |
330 | for (int i = 0; i < nfds; i++) {
331 | struct socket *sock;
332 | struct pollfd *poll = &fds[i];
333 | if ((sock = get_socket(pid, poll->fd)) == NULL) {
334 | print_err("Poll: could not find socket (fd %u) for connection (pid %d)\n", poll->fd, pid);
335 | return -EBADF;
336 | }
337 |
338 | socket_rd_acquire(sock);
339 | poll->revents = sock->sk->poll_events & (poll->events | POLLHUP | POLLERR | POLLNVAL);
340 | if (poll->revents > 0) {
341 | polled++;
342 | }
343 | socket_release(sock);
344 | }
345 |
346 | if (polled > 0 || timeout == 0) {
347 | return polled;
348 | } else {
349 | if (timeout > 0) {
350 | if (timeout > 10) {
351 | timeout -= 10;
352 | } else {
353 | timeout = 0;
354 | }
355 | }
356 | usleep(1000 * 10);
357 | }
358 | }
359 |
360 | return -EAGAIN;
361 | }
362 |
363 | int _fcntl(pid_t pid, int fildes, int cmd, ...)
364 | {
365 | struct socket *sock;
366 |
367 | if ((sock = get_socket(pid, fildes)) == NULL) {
368 | print_err("Fcntl: could not find socket (fd %u) for connection (pid %d)\n", fildes, pid);
369 | return -EBADF;
370 | }
371 |
372 | socket_wr_acquire(sock);
373 | va_list ap;
374 | int rc = 0;
375 |
376 | switch (cmd) {
377 | case F_GETFL:
378 | rc = sock->flags;
379 | goto out;
380 | case F_SETFL:
381 | va_start(ap, cmd);
382 | sock->flags = va_arg(ap, int);
383 | va_end(ap);
384 | rc = 0;
385 | goto out;
386 | default:
387 | rc = -1;
388 | goto out;
389 | }
390 |
391 | rc = -1;
392 |
393 | out:
394 | socket_release(sock);
395 | return rc;
396 | }
397 |
398 | int _getsockopt(pid_t pid, int fd, int level, int optname, void *optval, socklen_t *optlen)
399 | {
400 | struct socket *sock;
401 |
402 | if ((sock = get_socket(pid, fd)) == NULL) {
403 | print_err("Getsockopt: could not find socket (fd %u) for connection (pid %d)\n", fd, pid);
404 | return -EBADF;
405 | }
406 |
407 | int rc = 0;
408 |
409 | socket_rd_acquire(sock);
410 | switch (level) {
411 | case SOL_SOCKET:
412 | switch (optname) {
413 | case SO_ERROR:
414 | *optlen = 4;
415 | *(int *)optval = sock->sk->err;
416 | rc = 0;
417 | break;
418 | default:
419 | print_err("Getsockopt unsupported optname %d\n", optname);
420 | rc = -ENOPROTOOPT;
421 | break;
422 | }
423 |
424 | break;
425 | default:
426 | print_err("Getsockopt: Unsupported level %d\n", level);
427 | rc = -EINVAL;
428 | break;
429 | }
430 |
431 | socket_release(sock);
432 |
433 | return rc;
434 | }
435 |
436 | int _getpeername(pid_t pid, int socket, struct sockaddr *restrict address,
437 | socklen_t *restrict address_len)
438 | {
439 | struct socket *sock;
440 |
441 | if ((sock = get_socket(pid, socket)) == NULL) {
442 | print_err("Getpeername: could not find socket (fd %u) for connection (pid %d)\n", socket, pid);
443 | return -EBADF;
444 | }
445 |
446 | socket_rd_acquire(sock);
447 | int rc = sock->ops->getpeername(sock, address, address_len);
448 | socket_release(sock);
449 |
450 | return rc;
451 | }
452 |
453 | int _getsockname(pid_t pid, int socket, struct sockaddr *restrict address,
454 | socklen_t *restrict address_len)
455 | {
456 | struct socket *sock;
457 |
458 | if ((sock = get_socket(pid, socket)) == NULL) {
459 | print_err("Getsockname: could not find socket (fd %u) for connection (pid %d)\n", socket, pid);
460 | return -EBADF;
461 | }
462 |
463 | socket_rd_acquire(sock);
464 | int rc = sock->ops->getsockname(sock, address, address_len);
465 | socket_release(sock);
466 |
467 | return rc;
468 | }
469 |
--------------------------------------------------------------------------------
/src/tcp.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "inet.h"
3 | #include "tcp.h"
4 | #include "ip.h"
5 | #include "sock.h"
6 | #include "utils.h"
7 | #include "timer.h"
8 | #include "wait.h"
9 |
10 | #ifdef DEBUG_TCP
11 | const char *tcp_dbg_states[] = {
12 | "TCP_LISTEN", "TCP_SYNSENT", "TCP_SYN_RECEIVED", "TCP_ESTABLISHED", "TCP_FIN_WAIT_1",
13 | "TCP_FIN_WAIT_2", "TCP_CLOSE", "TCP_CLOSE_WAIT", "TCP_CLOSING", "TCP_LAST_ACK", "TCP_TIME_WAIT",
14 | };
15 | #endif
16 |
17 | static pthread_rwlock_t tcplock = PTHREAD_RWLOCK_INITIALIZER;
18 |
19 | struct net_ops tcp_ops = {
20 | .alloc_sock = &tcp_alloc_sock,
21 | .init = &tcp_v4_init_sock,
22 | .connect = &tcp_v4_connect,
23 | .disconnect = &tcp_disconnect,
24 | .write = &tcp_write,
25 | .read = &tcp_read,
26 | .recv_notify = &tcp_recv_notify,
27 | .close = &tcp_close,
28 | .abort = &tcp_abort,
29 | };
30 |
31 | void tcp_init()
32 | {
33 |
34 | }
35 |
36 | static void tcp_init_segment(struct tcphdr *th, struct iphdr *ih, struct sk_buff *skb)
37 | {
38 | th->sport = ntohs(th->sport);
39 | th->dport = ntohs(th->dport);
40 | th->seq = ntohl(th->seq);
41 | th->ack_seq = ntohl(th->ack_seq);
42 | th->win = ntohs(th->win);
43 | th->csum = ntohs(th->csum);
44 | th->urp = ntohs(th->urp);
45 |
46 | skb->seq = th->seq;
47 | skb->dlen = ip_len(ih) - tcp_hlen(th);
48 | skb->len = skb->dlen + th->syn + th->fin;
49 | skb->end_seq = skb->seq + skb->dlen;
50 | skb->payload = th->data;
51 | }
52 |
53 | static void tcp_clear_queues(struct tcp_sock *tsk) {
54 | skb_queue_free(&tsk->ofo_queue);
55 | }
56 |
57 | void tcp_in(struct sk_buff *skb)
58 | {
59 | struct sock *sk;
60 | struct iphdr *iph;
61 | struct tcphdr *th;
62 |
63 | iph = ip_hdr(skb);
64 | th = (struct tcphdr*) iph->data;
65 |
66 | tcp_init_segment(th, iph, skb);
67 |
68 | sk = inet_lookup(skb, th->sport, th->dport);
69 |
70 | if (sk == NULL) {
71 | print_err("No TCP socket for sport %d dport %d\n",
72 | th->sport, th->dport);
73 | free_skb(skb);
74 | return;
75 | }
76 | socket_wr_acquire(sk->sock);
77 |
78 | tcp_in_dbg(th, sk, skb);
79 | /* if (tcp_checksum(iph, th) != 0) { */
80 | /* goto discard; */
81 | /* } */
82 | tcp_input_state(sk, th, skb);
83 |
84 | socket_release(sk->sock);
85 | }
86 |
87 | int tcp_udp_checksum(uint32_t saddr, uint32_t daddr, uint8_t proto,
88 | uint8_t *data, uint16_t len)
89 | {
90 | uint32_t sum = 0;
91 |
92 | sum += saddr;
93 | sum += daddr;
94 | sum += htons(proto);
95 | sum += htons(len);
96 |
97 | return checksum(data, len, sum);
98 | }
99 |
100 | int tcp_v4_checksum(struct sk_buff *skb, uint32_t saddr, uint32_t daddr)
101 | {
102 | return tcp_udp_checksum(saddr, daddr, IP_TCP, skb->data, skb->len);
103 | }
104 |
105 | struct sock *tcp_alloc_sock()
106 | {
107 | struct tcp_sock *tsk = malloc(sizeof(struct tcp_sock));
108 |
109 | memset(tsk, 0, sizeof(struct tcp_sock));
110 | tsk->sk.state = TCP_CLOSE;
111 | tsk->sackok = 1;
112 |
113 | tsk->rmss = 1460;
114 | // Default to 536 as per spec
115 | tsk->smss = 536;
116 |
117 | skb_queue_init(&tsk->ofo_queue);
118 |
119 | return (struct sock *)tsk;
120 | }
121 |
122 | int tcp_v4_init_sock(struct sock *sk)
123 | {
124 | tcp_init_sock(sk);
125 | return 0;
126 | }
127 |
128 | int tcp_init_sock(struct sock *sk)
129 | {
130 | return 0;
131 | }
132 |
133 | void __tcp_set_state(struct sock *sk, uint32_t state)
134 | {
135 | sk->state = state;
136 | }
137 |
138 | static uint16_t generate_port()
139 | {
140 | /* TODO: Generate a proper port */
141 | static int port = 40000;
142 |
143 | pthread_rwlock_wrlock(&tcplock);
144 | int copy = ++port + (timer_get_tick() % 10000);
145 | pthread_rwlock_unlock(&tcplock);
146 |
147 | return copy;
148 | }
149 |
150 | int generate_iss()
151 | {
152 | /* TODO: Generate a proper ISS */
153 | return (int)time(NULL) * rand();
154 | }
155 |
156 | int tcp_v4_connect(struct sock *sk, const struct sockaddr *addr, int addrlen, int flags)
157 | {
158 | uint16_t dport = ((struct sockaddr_in *)addr)->sin_port;
159 | uint32_t daddr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
160 |
161 | sk->dport = ntohs(dport);
162 | sk->sport = generate_port();
163 | sk->daddr = ntohl(daddr);
164 | /* TODO: Do not hardcode lvl-ip local interface */
165 | sk->saddr = parse_ipv4_string("10.0.0.4");
166 |
167 | return tcp_connect(sk);
168 | }
169 |
170 | int tcp_disconnect(struct sock *sk, int flags)
171 | {
172 | return 0;
173 | }
174 |
175 | int tcp_write(struct sock *sk, const void *buf, int len)
176 | {
177 | struct tcp_sock *tsk = tcp_sk(sk);
178 | int ret = sk->err;
179 |
180 | if (ret != 0) goto out;
181 |
182 | switch (sk->state) {
183 | case TCP_ESTABLISHED:
184 | case TCP_CLOSE_WAIT:
185 | break;
186 | default:
187 | ret = -EBADF;
188 | goto out;
189 | }
190 |
191 | return tcp_send(tsk, buf, len);
192 |
193 | out:
194 | return ret;
195 | }
196 |
197 | int tcp_read(struct sock *sk, void *buf, int len)
198 | {
199 | struct tcp_sock *tsk = tcp_sk(sk);
200 | int ret = -1;
201 |
202 | switch (sk->state) {
203 | case TCP_CLOSE:
204 | ret = -EBADF;
205 | goto out;
206 | case TCP_LISTEN:
207 | case TCP_SYN_SENT:
208 | case TCP_SYN_RECEIVED:
209 | /* Queue for processing after entering ESTABLISHED state. If there
210 | is no room to queue this request, respond with "error:
211 | insufficient resources". */
212 | case TCP_ESTABLISHED:
213 | case TCP_FIN_WAIT_1:
214 | case TCP_FIN_WAIT_2:
215 | /* If insufficient incoming segments are queued to satisfy the
216 | request, queue the request. */
217 |
218 | break;
219 | case TCP_CLOSE_WAIT:
220 | /* If no text is awaiting delivery, the RECEIVE will get a
221 | "error: connection closing" response. Otherwise, any remaining
222 | text can be used to satisfy the RECEIVE. */
223 | if (!skb_queue_empty(&tsk->sk.receive_queue)) break;
224 | if (tsk->flags & TCP_FIN) {
225 | tsk->flags &= ~TCP_FIN;
226 | return 0;
227 | }
228 |
229 | break;
230 | case TCP_CLOSING:
231 | case TCP_LAST_ACK:
232 | case TCP_TIME_WAIT:
233 | ret = sk->err;
234 | goto out;
235 | default:
236 | goto out;
237 | }
238 |
239 | return tcp_receive(tsk, buf, len);
240 |
241 | out:
242 | return ret;
243 | }
244 |
245 | int tcp_recv_notify(struct sock *sk)
246 | {
247 | if (&(sk->recv_wait)) {
248 | return wait_wakeup(&sk->recv_wait);
249 | }
250 |
251 | // No recv wait lock
252 | return -1;
253 | }
254 |
255 | int tcp_close(struct sock *sk)
256 | {
257 | switch (sk->state) {
258 | case TCP_CLOSE:
259 | case TCP_CLOSING:
260 | case TCP_LAST_ACK:
261 | case TCP_TIME_WAIT:
262 | case TCP_FIN_WAIT_1:
263 | case TCP_FIN_WAIT_2:
264 | /* Respond with "error: connection closing". */
265 | sk->err = -EBADF;
266 | return -1;
267 | case TCP_LISTEN:
268 | case TCP_SYN_SENT:
269 | case TCP_SYN_RECEIVED:
270 | return tcp_done(sk);
271 | case TCP_ESTABLISHED:
272 | /* Queue this until all preceding SENDs have been segmentized, then
273 | form a FIN segment and send it. In any case, enter FIN-WAIT-1
274 | state. */
275 | tcp_set_state(sk, TCP_FIN_WAIT_1);
276 | tcp_queue_fin(sk);
277 | break;
278 | case TCP_CLOSE_WAIT:
279 | /* Queue this request until all preceding SENDs have been
280 | segmentized; then send a FIN segment, enter LAST_ACK state. */
281 | tcp_queue_fin(sk);
282 | break;
283 | default:
284 | print_err("Unknown TCP state for close\n");
285 | return -1;
286 | }
287 |
288 | return 0;
289 | }
290 |
291 | int tcp_abort(struct sock *sk)
292 | {
293 | struct tcp_sock *tsk = tcp_sk(sk);
294 | tcp_send_reset(tsk);
295 | return tcp_done(sk);
296 | }
297 |
298 | static int tcp_free(struct sock *sk)
299 | {
300 | struct tcp_sock *tsk = tcp_sk(sk);
301 |
302 | tcp_clear_timers(sk);
303 | tcp_clear_queues(tsk);
304 |
305 | wait_wakeup(&sk->sock->sleep);
306 |
307 | return 0;
308 | }
309 |
310 | int tcp_done(struct sock *sk)
311 | {
312 | tcp_set_state(sk, TCP_CLOSING);
313 | tcp_free(sk);
314 | return socket_delete(sk->sock);
315 | }
316 |
317 | void tcp_clear_timers(struct sock *sk)
318 | {
319 | struct tcp_sock *tsk = tcp_sk(sk);
320 | tcp_stop_rto_timer(tsk);
321 | tcp_stop_delack_timer(tsk);
322 |
323 | timer_cancel(tsk->keepalive);
324 | tsk->keepalive = NULL;
325 | timer_cancel(tsk->linger);
326 | tsk->linger = NULL;
327 | }
328 |
329 | void tcp_stop_rto_timer(struct tcp_sock *tsk)
330 | {
331 | if (tsk) {
332 | timer_cancel(tsk->retransmit);
333 | tsk->retransmit = NULL;
334 | tsk->backoff = 0;
335 | }
336 | }
337 |
338 | void tcp_release_rto_timer(struct tcp_sock *tsk)
339 | {
340 | if (tsk) {
341 | timer_release(tsk->retransmit);
342 | tsk->retransmit = NULL;
343 | }
344 | }
345 |
346 | void tcp_stop_delack_timer(struct tcp_sock *tsk)
347 | {
348 | timer_cancel(tsk->delack);
349 | tsk->delack = NULL;
350 | }
351 |
352 | void tcp_release_delack_timer(struct tcp_sock *tsk)
353 | {
354 | timer_release(tsk->delack);
355 | tsk->delack = NULL;
356 | }
357 |
358 | void tcp_handle_fin_state(struct sock *sk)
359 | {
360 | switch (sk->state) {
361 | case TCP_CLOSE_WAIT:
362 | tcp_set_state(sk, TCP_LAST_ACK);
363 | break;
364 | case TCP_ESTABLISHED:
365 | tcp_set_state(sk, TCP_FIN_WAIT_1);
366 | break;
367 | }
368 | }
369 |
370 | static void *tcp_linger(void *arg)
371 | {
372 | struct sock *sk = (struct sock *) arg;
373 | socket_wr_acquire(sk->sock);
374 | struct tcp_sock *tsk = tcp_sk(sk);
375 | tcpsock_dbg("TCP time-wait timeout, freeing TCB", sk);
376 |
377 | timer_cancel(tsk->linger);
378 | tsk->linger = NULL;
379 |
380 | tcp_done(sk);
381 | socket_release(sk->sock);
382 |
383 | return NULL;
384 | }
385 |
386 | static void *tcp_user_timeout(void *arg)
387 | {
388 | struct sock *sk = (struct sock *) arg;
389 | socket_wr_acquire(sk->sock);
390 | struct tcp_sock *tsk = tcp_sk(sk);
391 | tcpsock_dbg("TCP user timeout, freeing TCB and aborting conn", sk);
392 |
393 | timer_cancel(tsk->linger);
394 | tsk->linger = NULL;
395 |
396 | tcp_abort(sk);
397 | socket_release(sk->sock);
398 |
399 | return NULL;
400 | }
401 |
402 | void tcp_enter_time_wait(struct sock *sk)
403 | {
404 | struct tcp_sock *tsk = tcp_sk(sk);
405 |
406 | tcp_set_state(sk, TCP_TIME_WAIT);
407 |
408 | tcp_clear_timers(sk);
409 | /* RFC793 arbitrarily defines MSL to be 2 minutes */
410 | tsk->linger = timer_add(TCP_2MSL, &tcp_linger, sk);
411 | }
412 |
413 | void tcp_rearm_user_timeout(struct sock *sk)
414 | {
415 | struct tcp_sock *tsk = tcp_sk(sk);
416 |
417 | if (sk->state == TCP_TIME_WAIT) return;
418 |
419 | timer_cancel(tsk->linger);
420 | /* RFC793 set user timeout */
421 | tsk->linger = timer_add(TCP_USER_TIMEOUT, &tcp_user_timeout, sk);
422 | }
423 |
424 | void tcp_rtt(struct tcp_sock *tsk)
425 | {
426 | if (tsk->backoff > 0 || !tsk->retransmit) {
427 | // Karn's Algorithm: Don't measure retransmissions
428 | return;
429 | }
430 |
431 | int r = timer_get_tick() - (tsk->retransmit->expires - tsk->rto);
432 | if (r < 0) return;
433 |
434 | if (!tsk->srtt) {
435 | /* RFC6298 2.2 first measurement is made */
436 | tsk->srtt = r;
437 | tsk->rttvar = r / 2;
438 | } else {
439 | /* RFC6298 2.3 a subsequent measurement is made */
440 | double beta = 0.25;
441 | double alpha = 0.125;
442 | tsk->rttvar = (1 - beta) * tsk->rttvar + beta * abs(tsk->srtt - r);
443 | tsk->srtt = (1 - alpha) * tsk->srtt + alpha * r;
444 | }
445 |
446 | int k = 4 * tsk->rttvar;
447 |
448 | /* RFC6298 says RTO should be at least 1 second. Linux uses 200ms */
449 | if (k < 200) k = 200;
450 |
451 | tsk->rto = tsk->srtt + k;
452 | }
453 |
454 | int tcp_calculate_sacks(struct tcp_sock *tsk)
455 | {
456 | struct tcp_sack_block *sb = &tsk->sacks[tsk->sacklen];
457 |
458 | sb->left = 0;
459 | sb->right = 0;
460 |
461 | struct sk_buff *next;
462 | struct list_head *item, *tmp;
463 |
464 | list_for_each_safe(item, tmp, &tsk->ofo_queue.head) {
465 | next = list_entry(item, struct sk_buff, list);
466 |
467 | if (sb->left == 0) {
468 | sb->left = next->seq;
469 | tsk->sacklen++;
470 | }
471 |
472 | if (sb->right == 0) sb->right = next->end_seq;
473 | else if (sb->right == next->seq) sb->right = next->end_seq;
474 | else {
475 | if (tsk->sacklen >= tsk->sacks_allowed) break;
476 |
477 | sb = &tsk->sacks[tsk->sacklen];
478 | sb->left = next->seq;
479 | sb->right = next->end_seq;
480 | tsk->sacklen++;
481 | }
482 | }
483 |
484 | return 0;
485 | }
486 |
--------------------------------------------------------------------------------
/src/tcp_data.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "tcp.h"
3 | #include "list.h"
4 |
5 | /* Routine for inserting skbs ordered by seq into queue */
6 | static void tcp_data_insert_ordered(struct sk_buff_head *queue, struct sk_buff *skb)
7 | {
8 | struct sk_buff *next;
9 | struct list_head *item, *tmp;
10 |
11 | list_for_each_safe(item, tmp, &queue->head) {
12 | next = list_entry(item, struct sk_buff, list);
13 |
14 | if (skb->seq < next->seq) {
15 | if (skb->end_seq > next->seq) {
16 | /* TODO: We need to join skbs */
17 | print_err("Could not join skbs\n");
18 | } else {
19 | skb->refcnt++;
20 | skb_queue_add(queue, skb, next);
21 | return;
22 | }
23 | } else if (skb->seq == next->seq) {
24 | /* We already have this segment! */
25 | return;
26 | }
27 | }
28 |
29 | skb->refcnt++;
30 | skb_queue_tail(queue, skb);
31 | }
32 |
33 | /* Routine for transforming out-of-order segments into order */
34 | static void tcp_consume_ofo_queue(struct tcp_sock *tsk)
35 | {
36 | struct sock *sk = &tsk->sk;
37 | struct tcb *tcb = &tsk->tcb;
38 | struct sk_buff *skb = NULL;
39 |
40 | while ((skb = skb_peek(&tsk->ofo_queue)) != NULL
41 | && tcb->rcv_nxt == skb->seq) {
42 | /* skb is in-order, put it in receive queue */
43 | tcb->rcv_nxt += skb->dlen;
44 | skb_dequeue(&tsk->ofo_queue);
45 | skb_queue_tail(&sk->receive_queue, skb);
46 | }
47 | }
48 |
49 | int tcp_data_dequeue(struct tcp_sock *tsk, void *user_buf, int userlen)
50 | {
51 | struct sock *sk = &tsk->sk;
52 | struct tcphdr *th;
53 | int rlen = 0;
54 |
55 | while (!skb_queue_empty(&sk->receive_queue) && rlen < userlen) {
56 | struct sk_buff *skb = skb_peek(&sk->receive_queue);
57 | if (skb == NULL) break;
58 |
59 | th = tcp_hdr(skb);
60 |
61 | /* Guard datalen to not overflow userbuf */
62 | int dlen = (rlen + skb->dlen) > userlen ? (userlen - rlen) : skb->dlen;
63 | memcpy(user_buf, skb->payload, dlen);
64 |
65 | /* Accommodate next round of data dequeue */
66 | skb->dlen -= dlen;
67 | skb->payload += dlen;
68 | rlen += dlen;
69 | user_buf += dlen;
70 |
71 | /* skb is fully eaten, process flags and drop it */
72 | if (skb->dlen == 0) {
73 | if (th->psh) tsk->flags |= TCP_PSH;
74 | skb_dequeue(&sk->receive_queue);
75 | skb->refcnt--;
76 | free_skb(skb);
77 | }
78 | }
79 |
80 | if (skb_queue_empty(&sk->receive_queue) && !(tsk->flags & TCP_FIN)) {
81 | sk->poll_events &= ~POLLIN;
82 | }
83 |
84 | return rlen;
85 | }
86 |
87 | int tcp_data_queue(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb)
88 | {
89 | struct sock *sk = &tsk->sk;
90 | struct tcb *tcb = &tsk->tcb;
91 | int rc = 0;
92 |
93 | if (!tcb->rcv_wnd) {
94 | free_skb(skb);
95 | return -1;
96 | }
97 |
98 | int expected = skb->seq == tcb->rcv_nxt;
99 | if (expected) {
100 | tcb->rcv_nxt += skb->dlen;
101 |
102 | skb->refcnt++;
103 | skb_queue_tail(&sk->receive_queue, skb);
104 |
105 | tcp_consume_ofo_queue(tsk);
106 |
107 | // There is new data for user to read
108 | sk->poll_events |= (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND);
109 | tsk->sk.ops->recv_notify(&tsk->sk);
110 | } else {
111 | /* Segment passed validation, hence it is in-window
112 | but not the left-most sequence. Put into out-of-order queue
113 | for later processing */
114 | tcp_data_insert_ordered(&tsk->ofo_queue, skb);
115 |
116 | if (tsk->sackok) {
117 | tcp_calculate_sacks(tsk);
118 | }
119 |
120 | /* RFC5581: A TCP receiver SHOULD send an immediate duplicate ACK when an out-
121 | * of-order segment arrives. The purpose of this ACK is to inform the
122 | * sender that a segment was received out-of-order and which sequence
123 | * number is expected. */
124 | tcp_send_ack(sk);
125 | }
126 |
127 | return rc;
128 | }
129 |
--------------------------------------------------------------------------------
/src/tcp_input.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "tcp.h"
3 | #include "tcp_data.h"
4 | #include "skbuff.h"
5 | #include "sock.h"
6 |
7 | static int tcp_parse_opts(struct tcp_sock *tsk, struct tcphdr *th)
8 | {
9 | uint8_t *ptr = th->data;
10 | uint8_t optlen = tcp_hlen(th) - 20;
11 | struct tcp_opt_mss *opt_mss = NULL;
12 | uint8_t sack_seen = 0;
13 | uint8_t tsopt_seen = 0;
14 |
15 | while (optlen > 0 && optlen < 20) {
16 | switch (*ptr) {
17 | case TCP_OPT_MSS:
18 | opt_mss = (struct tcp_opt_mss *)ptr;
19 | uint16_t mss = ntohs(opt_mss->mss);
20 |
21 | if (mss > 536 && mss <= 1460) {
22 | tsk->smss = mss;
23 | }
24 |
25 | ptr += sizeof(struct tcp_opt_mss);
26 | optlen -= 4;
27 | break;
28 | case TCP_OPT_NOOP:
29 | ptr += 1;
30 | optlen--;
31 | break;
32 | case TCP_OPT_SACK_OK:
33 | sack_seen = 1;
34 | optlen--;
35 | break;
36 | case TCP_OPT_TS:
37 | tsopt_seen = 1;
38 | optlen--;
39 | break;
40 | default:
41 | print_err("Unrecognized TCPOPT\n");
42 | optlen--;
43 | break;
44 | }
45 | }
46 |
47 | if (!tsopt_seen) {
48 | tsk->tsopt = 0;
49 | }
50 |
51 | if (sack_seen && tsk->sackok) {
52 | // There's room for 4 sack blocks without TS OPT
53 | if (tsk->tsopt) tsk->sacks_allowed = 3;
54 | else tsk->sacks_allowed = 4;
55 | } else {
56 | tsk->sackok = 0;
57 | }
58 |
59 | return 0;
60 | }
61 |
62 | /*
63 | * Acks all segments from retransmissionn queue that are "older"
64 | * than current unacknowledged sequence
65 | */
66 | static int tcp_clean_rto_queue(struct sock *sk, uint32_t una)
67 | {
68 | struct tcp_sock *tsk = tcp_sk(sk);
69 | struct sk_buff *skb;
70 | int rc = 0;
71 |
72 | while ((skb = skb_peek(&sk->write_queue)) != NULL) {
73 | if (skb->seq > 0 && skb->end_seq <= una) {
74 | /* skb fully acknowledged */
75 | skb_dequeue(&sk->write_queue);
76 | skb->refcnt--;
77 | free_skb(skb);
78 | if (tsk->inflight > 0) {
79 | tsk->inflight--;
80 | }
81 | } else {
82 | break;
83 | }
84 | };
85 |
86 | if (skb == NULL || tsk->inflight == 0) {
87 | /* No unacknowledged skbs, stop rto timer */
88 | tcp_stop_rto_timer(tsk);
89 | }
90 |
91 | return rc;
92 | }
93 |
94 | static inline int __tcp_drop(struct sock *sk, struct sk_buff *skb)
95 | {
96 | free_skb(skb);
97 | return 0;
98 | }
99 |
100 | static int tcp_verify_segment(struct tcp_sock *tsk, struct tcphdr *th, struct sk_buff *skb)
101 | {
102 | struct tcb *tcb = &tsk->tcb;
103 |
104 | if (skb->dlen > 0 && tcb->rcv_wnd == 0) return 0;
105 |
106 | if (th->seq < tcb->rcv_nxt ||
107 | th->seq > (tcb->rcv_nxt + tcb->rcv_wnd)) {
108 | tcpsock_dbg("Received invalid segment", (&tsk->sk));
109 | return 0;
110 | }
111 |
112 | return 1;
113 | }
114 |
115 | /* TCP RST received */
116 | static void tcp_reset(struct sock *sk)
117 | {
118 | sk->poll_events = (POLLOUT | POLLWRNORM | POLLERR | POLLHUP);
119 | switch (sk->state) {
120 | case TCP_SYN_SENT:
121 | sk->err = -ECONNREFUSED;
122 | break;
123 | case TCP_CLOSE_WAIT:
124 | sk->err = -EPIPE;
125 | break;
126 | case TCP_CLOSE:
127 | return;
128 | default:
129 | sk->err = -ECONNRESET;
130 | break;
131 | }
132 |
133 | tcp_done(sk);
134 | }
135 |
136 | static inline int tcp_discard(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th)
137 | {
138 | free_skb(skb);
139 | return 0;
140 | }
141 |
142 | static int tcp_listen(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th)
143 | {
144 | free_skb(skb);
145 | return 0;
146 | }
147 |
148 | static int tcp_synsent(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th)
149 | {
150 | struct tcb *tcb = &tsk->tcb;
151 | struct sock *sk = &tsk->sk;
152 |
153 | tcpsock_dbg("state is synsent", sk);
154 |
155 | if (th->ack) {
156 | if (th->ack_seq <= tcb->iss || th->ack_seq > tcb->snd_nxt) {
157 | tcpsock_dbg("ACK is unacceptable", sk);
158 |
159 | if (th->rst) goto discard;
160 | goto reset_and_discard;
161 | }
162 |
163 | if (th->ack_seq < tcb->snd_una || th->ack_seq > tcb->snd_nxt) {
164 | tcpsock_dbg("ACK is unacceptable", sk);
165 | goto reset_and_discard;
166 | }
167 | }
168 |
169 | /* ACK is acceptable */
170 |
171 | if (th->rst) {
172 | tcp_reset(&tsk->sk);
173 | goto discard;
174 | }
175 |
176 | /* third check the security and precedence -> ignored */
177 |
178 | /* fourth check the SYN bit */
179 | if (!th->syn) {
180 | goto discard;
181 | }
182 |
183 | tcb->rcv_nxt = th->seq + 1;
184 | tcb->irs = th->seq;
185 | if (th->ack) {
186 | tcb->snd_una = th->ack_seq;
187 | /* Any packets in RTO queue that are acknowledged here should be removed */
188 | tcp_clean_rto_queue(sk, tcb->snd_una);
189 | }
190 |
191 | if (tcb->snd_una > tcb->iss) {
192 | tcp_set_state(sk, TCP_ESTABLISHED);
193 | tcb->snd_una = tcb->snd_nxt;
194 | tsk->backoff = 0;
195 | /* RFC 6298: Sender SHOULD set RTO <- 1 second */
196 | tsk->rto = 1000;
197 | tcp_send_ack(&tsk->sk);
198 | tcp_rearm_user_timeout(&tsk->sk);
199 | tcp_parse_opts(tsk, th);
200 | sock_connected(sk);
201 | } else {
202 | tcp_set_state(sk, TCP_SYN_RECEIVED);
203 | tcb->snd_una = tcb->iss;
204 | tcp_send_synack(&tsk->sk);
205 | }
206 |
207 | discard:
208 | tcp_drop(sk, skb);
209 | return 0;
210 | reset_and_discard:
211 | //TODO reset
212 | tcp_drop(sk, skb);
213 | return 0;
214 | }
215 |
216 | static int tcp_closed(struct tcp_sock *tsk, struct sk_buff *skb, struct tcphdr *th)
217 | {
218 | /*
219 | All data in the incoming segment is discarded. An incoming
220 | segment containing a RST is discarded. An incoming segment not
221 | containing a RST causes a RST to be sent in response. The
222 | acknowledgment and sequence field values are selected to make the
223 | reset sequence acceptable to the TCP that sent the offending
224 | segment.
225 |
226 | If the ACK bit is off, sequence number zero is used,
227 |
228 |
229 |
230 | If the ACK bit is on,
231 |
232 |
233 |
234 | Return.
235 | */
236 |
237 | int rc = -1;
238 |
239 | tcpsock_dbg("state is closed", (&tsk->sk));
240 |
241 | if (th->rst) {
242 | tcp_discard(tsk, skb, th);
243 | rc = 0;
244 | goto out;
245 | }
246 |
247 | if (th->ack) {
248 |
249 | } else {
250 |
251 |
252 | }
253 |
254 | rc = tcp_send_reset(tsk);
255 | free_skb(skb);
256 |
257 | out:
258 | return rc;
259 | }
260 |
261 | /*
262 | * Follows RFC793 "Segment Arrives" section closely
263 | */
264 | int tcp_input_state(struct sock *sk, struct tcphdr *th, struct sk_buff *skb)
265 | {
266 | struct tcp_sock *tsk = tcp_sk(sk);
267 | struct tcb *tcb = &tsk->tcb;
268 |
269 | tcpsock_dbg("input state", sk);
270 |
271 | switch (sk->state) {
272 | case TCP_CLOSE:
273 | return tcp_closed(tsk, skb, th);
274 | case TCP_LISTEN:
275 | return tcp_listen(tsk, skb, th);
276 | case TCP_SYN_SENT:
277 | return tcp_synsent(tsk, skb, th);
278 | }
279 |
280 | /* "Otherwise" section in RFC793 */
281 |
282 | /* first check sequence number */
283 | if (!tcp_verify_segment(tsk, th, skb)) {
284 | /* RFC793: If an incoming segment is not acceptable, an acknowledgment
285 | * should be sent in reply (unless the RST bit is set, if so drop
286 | * the segment and return): */
287 | if (!th->rst) {
288 | tcp_send_ack(sk);
289 | }
290 | return_tcp_drop(sk, skb);
291 | }
292 |
293 | /* second check the RST bit */
294 | if (th->rst) {
295 | free_skb(skb);
296 | tcp_enter_time_wait(sk);
297 | tsk->sk.ops->recv_notify(&tsk->sk);
298 | return 0;
299 | }
300 |
301 | /* third check security and precedence */
302 | // Not implemented
303 |
304 | /* fourth check the SYN bit */
305 | if (th->syn) {
306 | /* RFC 5961 Section 4.2 */
307 | tcp_send_challenge_ack(sk, skb);
308 | return_tcp_drop(sk, skb);
309 | }
310 |
311 | /* fifth check the ACK field */
312 | if (!th->ack) {
313 | return_tcp_drop(sk, skb);
314 | }
315 |
316 | // ACK bit is on
317 | switch (sk->state) {
318 | case TCP_SYN_RECEIVED:
319 | if (tcb->snd_una <= th->ack_seq && th->ack_seq < tcb->snd_nxt) {
320 | tcp_set_state(sk, TCP_ESTABLISHED);
321 | } else {
322 | return_tcp_drop(sk, skb);
323 | }
324 | case TCP_ESTABLISHED:
325 | case TCP_FIN_WAIT_1:
326 | case TCP_FIN_WAIT_2:
327 | case TCP_CLOSE_WAIT:
328 | case TCP_CLOSING:
329 | case TCP_LAST_ACK:
330 | if (tcb->snd_una < th->ack_seq && th->ack_seq <= tcb->snd_nxt) {
331 | tcb->snd_una = th->ack_seq;
332 | /* Any segments on the retransmission queue which are thereby
333 | entirely acknowledged are removed. */
334 | tcp_rtt(tsk);
335 | tcp_clean_rto_queue(sk, tcb->snd_una);
336 | }
337 |
338 | if (th->ack_seq < tcb->snd_una) {
339 | // If the ACK is a duplicate, it can be ignored
340 | return_tcp_drop(sk, skb);
341 | }
342 |
343 | if (th->ack_seq > tcb->snd_nxt) {
344 | // If the ACK acks something not yet sent, then send an ACK, drop segment
345 | // and return
346 | // TODO: Dropping the seg here, why would I respond with an ACK? Linux
347 | // does not respond either
348 | //tcp_send_ack(&tsk->sk);
349 | return_tcp_drop(sk, skb);
350 | }
351 |
352 | if (tcb->snd_una < th->ack_seq && th->ack_seq <= tcb->snd_nxt) {
353 | // TODO: Send window should be updated
354 | }
355 |
356 | break;
357 | }
358 |
359 | /* If the write queue is empty, it means our FIN was acked */
360 | if (skb_queue_empty(&sk->write_queue)) {
361 | switch (sk->state) {
362 | case TCP_FIN_WAIT_1:
363 | tcp_set_state(sk, TCP_FIN_WAIT_2);
364 | case TCP_FIN_WAIT_2:
365 | break;
366 | case TCP_CLOSING:
367 | /* In addition to the processing for the ESTABLISHED state, if
368 | * the ACK acknowledges our FIN then enter the TIME-WAIT state,
369 | otherwise ignore the segment. */
370 | tcp_set_state(sk, TCP_TIME_WAIT);
371 | break;
372 | case TCP_LAST_ACK:
373 | /* The only thing that can arrive in this state is an acknowledgment of our FIN.
374 | * If our FIN is now acknowledged, delete the TCB, enter the CLOSED state, and return. */
375 | free_skb(skb);
376 | return tcp_done(sk);
377 | case TCP_TIME_WAIT:
378 | /* TODO: The only thing that can arrive in this state is a
379 | retransmission of the remote FIN. Acknowledge it, and restart
380 | the 2 MSL timeout. */
381 | if (tcb->rcv_nxt == th->seq) {
382 | tcpsock_dbg("Remote FIN retransmitted?", sk);
383 | // tcb->rcv_nxt += 1;
384 | tsk->flags |= TCP_FIN;
385 | tcp_send_ack(sk);
386 | }
387 | break;
388 | }
389 | }
390 |
391 | /* sixth, check the URG bit */
392 | if (th->urg) {
393 |
394 | }
395 |
396 | int expected = skb->seq == tcb->rcv_nxt;
397 |
398 | /* seventh, process the segment txt */
399 | switch (sk->state) {
400 | case TCP_ESTABLISHED:
401 | case TCP_FIN_WAIT_1:
402 | case TCP_FIN_WAIT_2:
403 | if (th->psh || skb->dlen > 0) {
404 | tcp_data_queue(tsk, th, skb);
405 | }
406 |
407 | break;
408 | case TCP_CLOSE_WAIT:
409 | case TCP_CLOSING:
410 | case TCP_LAST_ACK:
411 | case TCP_TIME_WAIT:
412 | /* This should not occur, since a FIN has been received from the
413 | remote side. Ignore the segment text. */
414 | break;
415 | }
416 |
417 | /* eighth, check the FIN bit */
418 | if (th->fin && expected) {
419 | tcpsock_dbg("Received in-sequence FIN", sk);
420 |
421 | switch (sk->state) {
422 | case TCP_CLOSE:
423 | case TCP_LISTEN:
424 | case TCP_SYN_SENT:
425 | // Do not process, since SEG.SEQ cannot be validated
426 | goto drop_and_unlock;
427 | }
428 |
429 | tcb->rcv_nxt += 1;
430 | tsk->flags |= TCP_FIN;
431 | sk->poll_events |= (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND);
432 |
433 | tcp_send_ack(sk);
434 | tsk->sk.ops->recv_notify(&tsk->sk);
435 |
436 | switch (sk->state) {
437 | case TCP_SYN_RECEIVED:
438 | case TCP_ESTABLISHED:
439 | tcp_set_state(sk, TCP_CLOSE_WAIT);
440 | break;
441 | case TCP_FIN_WAIT_1:
442 | /* If our FIN has been ACKed (perhaps in this segment), then
443 | enter TIME-WAIT, start the time-wait timer, turn off the other
444 | timers; otherwise enter the CLOSING state. */
445 | if (skb_queue_empty(&sk->write_queue)) {
446 | tcp_enter_time_wait(sk);
447 | } else {
448 | tcp_set_state(sk, TCP_CLOSING);
449 | }
450 |
451 | break;
452 | case TCP_FIN_WAIT_2:
453 | /* Enter the TIME-WAIT state. Start the time-wait timer, turn
454 | off the other timers. */
455 | tcp_enter_time_wait(sk);
456 | break;
457 | case TCP_CLOSE_WAIT:
458 | case TCP_CLOSING:
459 | case TCP_LAST_ACK:
460 | /* Remain in the state */
461 | break;
462 | case TCP_TIME_WAIT:
463 | /* TODO: Remain in the TIME-WAIT state. Restart the 2 MSL time-wait
464 | timeout. */
465 | break;
466 | }
467 | }
468 |
469 | /* Congestion control and delacks */
470 | switch (sk->state) {
471 | case TCP_ESTABLISHED:
472 | case TCP_FIN_WAIT_1:
473 | case TCP_FIN_WAIT_2:
474 | if (expected) {
475 | tcp_stop_delack_timer(tsk);
476 |
477 | int pending = min(skb_queue_len(&sk->write_queue), 3);
478 | /* RFC1122: A TCP SHOULD implement a delayed ACK, but an ACK should not
479 | * be excessively delayed; in particular, the delay MUST be less than
480 | * 0.5 seconds, and in a stream of full-sized segments there SHOULD
481 | * be an ACK for at least every second segment. */
482 | if (tsk->inflight == 0 && pending > 0) {
483 | tcp_send_next(sk, pending);
484 | tsk->inflight += pending;
485 | tcp_rearm_rto_timer(tsk);
486 | } else if (th->psh || (skb->dlen > 1000 && ++tsk->delacks > 1)) {
487 | tsk->delacks = 0;
488 | tcp_send_ack(sk);
489 | } else if (skb->dlen > 0) {
490 | tsk->delack = timer_add(200, &tcp_send_delack, &tsk->sk);
491 | }
492 | }
493 | }
494 |
495 | free_skb(skb);
496 |
497 | unlock:
498 | return 0;
499 | drop_and_unlock:
500 | tcp_drop(sk, skb);
501 | goto unlock;
502 | }
503 |
504 | int tcp_receive(struct tcp_sock *tsk, void *buf, int len)
505 | {
506 | int rlen = 0;
507 | int curlen = 0;
508 | struct sock *sk = &tsk->sk;
509 | struct socket *sock = sk->sock;
510 |
511 | memset(buf, 0, len);
512 |
513 | while (rlen < len) {
514 | curlen = tcp_data_dequeue(tsk, buf + rlen, len - rlen);
515 |
516 | rlen += curlen;
517 |
518 | if (tsk->flags & TCP_PSH) {
519 |
520 | tsk->flags &= ~TCP_PSH;
521 | break;
522 | }
523 |
524 | if (tsk->flags & TCP_FIN || rlen == len) break;
525 |
526 | if (sock->flags & O_NONBLOCK) {
527 | if (rlen == 0) {
528 | rlen = -EAGAIN;
529 | }
530 |
531 | break;
532 | } else {
533 | pthread_mutex_lock(&tsk->sk.recv_wait.lock);
534 | socket_release(sock);
535 | wait_sleep(&tsk->sk.recv_wait);
536 | pthread_mutex_unlock(&tsk->sk.recv_wait.lock);
537 | socket_wr_acquire(sock);
538 | }
539 | }
540 |
541 | if (rlen >= 0) tcp_rearm_user_timeout(sk);
542 |
543 | return rlen;
544 | }
545 |
--------------------------------------------------------------------------------
/src/tcp_output.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "utils.h"
3 | #include "tcp.h"
4 | #include "ip.h"
5 | #include "skbuff.h"
6 | #include "timer.h"
7 |
8 | static void *tcp_retransmission_timeout(void *arg);
9 |
10 | static struct sk_buff *tcp_alloc_skb(int optlen, int size)
11 | {
12 | int reserved = ETH_HDR_LEN + IP_HDR_LEN + TCP_HDR_LEN + optlen + size;
13 | struct sk_buff *skb = alloc_skb(reserved);
14 |
15 | skb_reserve(skb, reserved);
16 | skb->protocol = IP_TCP;
17 | skb->dlen = size;
18 | skb->seq = 0;
19 |
20 | return skb;
21 | }
22 |
23 | static int tcp_write_syn_options(struct tcphdr *th, struct tcp_options *opts, int optlen)
24 | {
25 | struct tcp_opt_mss *opt_mss = (struct tcp_opt_mss *) th->data;
26 | uint32_t i = 0;
27 |
28 | opt_mss->kind = TCP_OPT_MSS;
29 | opt_mss->len = TCP_OPTLEN_MSS;
30 | opt_mss->mss = htons(opts->mss);
31 |
32 | i += sizeof(struct tcp_opt_mss);
33 |
34 | if (opts->sack) {
35 | th->data[i++] = TCP_OPT_NOOP;
36 | th->data[i++] = TCP_OPT_NOOP;
37 | th->data[i++] = TCP_OPT_SACK_OK;
38 | th->data[i++] = TCP_OPTLEN_SACK;
39 | }
40 |
41 | th->hl = TCP_DOFFSET + (optlen / 4);
42 |
43 | return 0;
44 | }
45 |
46 | static int tcp_syn_options(struct sock *sk, struct tcp_options *opts)
47 | {
48 | struct tcp_sock *tsk = tcp_sk(sk);
49 | int optlen = 0;
50 |
51 | opts->mss = tsk->rmss;
52 | optlen += TCP_OPTLEN_MSS;
53 |
54 | if (tsk->sackok) {
55 | opts->sack = 1;
56 | optlen += TCP_OPT_NOOP * 2;
57 | optlen += TCP_OPTLEN_SACK;
58 | } else {
59 | opts->sack = 0;
60 | }
61 |
62 | return optlen;
63 | }
64 |
65 | static int tcp_write_options(struct tcp_sock *tsk, struct tcphdr *th)
66 | {
67 | uint8_t *ptr = th->data;
68 |
69 | if (!tsk->sackok || tsk->sacks[0].left == 0) return 0;
70 |
71 | *ptr++ = TCP_OPT_NOOP;
72 | *ptr++ = TCP_OPT_NOOP;
73 | *ptr++ = TCP_OPT_SACK;
74 | *ptr++ = 2 + tsk->sacklen * 8;
75 |
76 | struct tcp_sack_block *sb = (struct tcp_sack_block *)ptr;
77 |
78 | for (int i = tsk->sacklen - 1; i >= 0; i--) {
79 | sb->left = htonl(tsk->sacks[i].left);
80 | sb->right = htonl(tsk->sacks[i].right);
81 | tsk->sacks[i].left = 0;
82 | tsk->sacks[i].right = 0;
83 |
84 | sb += 1;
85 | ptr += sizeof(struct tcp_sack_block);
86 | }
87 |
88 | tsk->sacklen = 0;
89 |
90 | return 0;
91 | }
92 |
93 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, uint32_t seq)
94 | {
95 | struct tcp_sock *tsk = tcp_sk(sk);
96 | struct tcb *tcb = &tsk->tcb;
97 | struct tcphdr *thdr = tcp_hdr(skb);
98 |
99 | /* No options were previously set */
100 | if (thdr->hl == 0) thdr->hl = TCP_DOFFSET;
101 |
102 | skb_push(skb, thdr->hl * 4);
103 |
104 | thdr->sport = sk->sport;
105 | thdr->dport = sk->dport;
106 | thdr->seq = seq;
107 | thdr->ack_seq = tcb->rcv_nxt;
108 | thdr->rsvd = 0;
109 | thdr->win = tcb->rcv_wnd;
110 | thdr->csum = 0;
111 | thdr->urp = 0;
112 |
113 | if (thdr->hl > 5) {
114 | tcp_write_options(tsk, thdr);
115 | }
116 |
117 | tcp_out_dbg(thdr, sk, skb);
118 |
119 | thdr->sport = htons(thdr->sport);
120 | thdr->dport = htons(thdr->dport);
121 | thdr->seq = htonl(thdr->seq);
122 | thdr->ack_seq = htonl(thdr->ack_seq);
123 | thdr->win = htons(thdr->win);
124 | thdr->csum = htons(thdr->csum);
125 | thdr->urp = htons(thdr->urp);
126 | thdr->csum = tcp_v4_checksum(skb, htonl(sk->saddr), htonl(sk->daddr));
127 |
128 | return ip_output(sk, skb);
129 | }
130 |
131 | static int tcp_queue_transmit_skb(struct sock *sk, struct sk_buff *skb)
132 | {
133 | struct tcp_sock *tsk = tcp_sk(sk);
134 | struct tcb *tcb = &tsk->tcb;
135 | struct tcphdr * th = tcp_hdr(skb);
136 | int rc = 0;
137 |
138 | if (skb_queue_empty(&sk->write_queue)) {
139 | tcp_rearm_rto_timer(tsk);
140 | }
141 |
142 | if (tsk->inflight == 0) {
143 | /* Store sequence information into the socket buffer */
144 | rc = tcp_transmit_skb(sk, skb, tcb->snd_nxt);
145 | tsk->inflight++;
146 | skb->seq = tcb->snd_nxt;
147 | tcb->snd_nxt += skb->dlen;
148 | skb->end_seq = tcb->snd_nxt;
149 |
150 | if (th->fin) tcb->snd_nxt++;
151 | }
152 |
153 | skb_queue_tail(&sk->write_queue, skb);
154 |
155 | return rc;
156 | }
157 |
158 | int tcp_send_synack(struct sock *sk)
159 | {
160 | if (sk->state != TCP_SYN_SENT) {
161 | print_err("TCP synack: Socket was not in correct state (SYN_SENT)\n");
162 | return 1;
163 | }
164 |
165 | struct sk_buff *skb;
166 | struct tcphdr *th;
167 | struct tcb * tcb = &tcp_sk(sk)->tcb;
168 | int rc = 0;
169 |
170 | skb = tcp_alloc_skb(0, 0);
171 | th = tcp_hdr(skb);
172 |
173 | th->syn = 1;
174 | th->ack = 1;
175 |
176 | rc = tcp_transmit_skb(sk, skb, tcb->snd_nxt);
177 | free_skb(skb);
178 |
179 | return rc;
180 | }
181 |
182 | /* Routine for timer-invoked delayed acknowledgment */
183 | void *tcp_send_delack(void *arg)
184 | {
185 | struct sock *sk = (struct sock *) arg;
186 | socket_wr_acquire(sk->sock);
187 |
188 | struct tcp_sock *tsk = tcp_sk(sk);
189 | tsk->delacks = 0;
190 | tcp_release_delack_timer(tsk);
191 | tcp_send_ack(sk);
192 |
193 | socket_release(sk->sock);
194 |
195 | return NULL;
196 | }
197 |
198 | int tcp_send_next(struct sock *sk, int amount)
199 | {
200 | struct tcp_sock *tsk = tcp_sk(sk);
201 | struct tcb *tcb = &tsk->tcb;
202 | struct tcphdr *th;
203 | struct sk_buff *next;
204 | struct list_head *item, *tmp;
205 | int i = 0;
206 |
207 | list_for_each_safe(item, tmp, &sk->write_queue.head) {
208 | if (++i > amount) break;
209 | next = list_entry(item, struct sk_buff, list);
210 |
211 | if (next == NULL) return -1;
212 |
213 | skb_reset_header(next);
214 | tcp_transmit_skb(sk, next, tcb->snd_nxt);
215 |
216 | next->seq = tcb->snd_nxt;
217 | tcb->snd_nxt += next->dlen;
218 | next->end_seq = tcb->snd_nxt;
219 |
220 | th = tcp_hdr(next);
221 | if (th->fin) tcb->snd_nxt++;
222 | }
223 |
224 | return 0;
225 | }
226 |
227 | static int tcp_options_len(struct sock *sk)
228 | {
229 | struct tcp_sock *tsk = tcp_sk(sk);
230 | uint8_t optlen = 0;
231 |
232 | if (tsk->sackok && tsk->sacklen > 0) {
233 | for (int i = 0; i < tsk->sacklen; i++) {
234 | if (tsk->sacks[i].left != 0) {
235 | optlen += 8;
236 | }
237 | }
238 |
239 | optlen += 2;
240 | }
241 |
242 | while (optlen % 4 > 0) optlen++;
243 |
244 | return optlen;
245 | }
246 |
247 | int tcp_send_ack(struct sock *sk)
248 | {
249 | if (sk->state == TCP_CLOSE) return 0;
250 |
251 | struct sk_buff *skb;
252 | struct tcphdr *th;
253 | struct tcb *tcb = &tcp_sk(sk)->tcb;
254 | int rc = 0;
255 | int optlen = tcp_options_len(sk);
256 |
257 | skb = tcp_alloc_skb(optlen, 0);
258 |
259 | th = tcp_hdr(skb);
260 | th->ack = 1;
261 | th->hl = TCP_DOFFSET + (optlen / 4);
262 |
263 | rc = tcp_transmit_skb(sk, skb, tcb->snd_nxt);
264 | free_skb(skb);
265 |
266 | return rc;
267 | }
268 |
269 | static int tcp_send_syn(struct sock *sk)
270 | {
271 | if (sk->state != TCP_SYN_SENT && sk->state != TCP_CLOSE && sk->state != TCP_LISTEN) {
272 | print_err("Socket was not in correct state (closed or listen)\n");
273 | return 1;
274 | }
275 |
276 | struct sk_buff *skb;
277 | struct tcphdr *th;
278 | struct tcp_options opts = { 0 };
279 | int tcp_options_len = 0;
280 |
281 | tcp_options_len = tcp_syn_options(sk, &opts);
282 | skb = tcp_alloc_skb(tcp_options_len, 0);
283 | th = tcp_hdr(skb);
284 |
285 | tcp_write_syn_options(th, &opts, tcp_options_len);
286 | sk->state = TCP_SYN_SENT;
287 | th->syn = 1;
288 |
289 | return tcp_queue_transmit_skb(sk, skb);
290 | }
291 |
292 | int tcp_send_fin(struct sock *sk)
293 | {
294 | if (sk->state == TCP_CLOSE) return 0;
295 |
296 | struct sk_buff *skb;
297 | struct tcphdr *th;
298 | int rc = 0;
299 |
300 | skb = tcp_alloc_skb(0, 0);
301 |
302 | th = tcp_hdr(skb);
303 | th->fin = 1;
304 | th->ack = 1;
305 |
306 | rc = tcp_queue_transmit_skb(sk, skb);
307 |
308 | return rc;
309 | }
310 |
311 | void tcp_select_initial_window(uint32_t *rcv_wnd)
312 | {
313 | *rcv_wnd = 44477;
314 | }
315 |
316 | static void tcp_notify_user(struct sock *sk)
317 | {
318 | switch (sk->state) {
319 | case TCP_CLOSE_WAIT:
320 | wait_wakeup(&sk->sock->sleep);
321 | break;
322 | }
323 | }
324 |
325 | static void *tcp_connect_rto(void *arg)
326 | {
327 | struct tcp_sock *tsk = (struct tcp_sock *) arg;
328 | struct tcb *tcb = &tsk->tcb;
329 | struct sock *sk = &tsk->sk;
330 |
331 | socket_wr_acquire(sk->sock);
332 | tcp_release_rto_timer(tsk);
333 |
334 | if (sk->state == TCP_SYN_SENT) {
335 | if (tsk->backoff > TCP_CONN_RETRIES) {
336 | tsk->sk.err = -ETIMEDOUT;
337 | sk->poll_events |= (POLLOUT | POLLERR | POLLHUP);
338 | tcp_done(sk);
339 | } else {
340 | struct sk_buff *skb = write_queue_head(sk);
341 |
342 | if (skb) {
343 | skb_reset_header(skb);
344 | tcp_transmit_skb(sk, skb, tcb->snd_una);
345 |
346 | tsk->backoff++;
347 | tcp_rearm_rto_timer(tsk);
348 | }
349 | }
350 | } else {
351 | print_err("TCP connect RTO triggered even when not in SYNSENT\n");
352 | }
353 |
354 | socket_release(sk->sock);
355 |
356 | return NULL;
357 | }
358 |
359 | static void *tcp_retransmission_timeout(void *arg)
360 | {
361 | struct tcp_sock *tsk = (struct tcp_sock *) arg;
362 | struct tcb *tcb = &tsk->tcb;
363 | struct sock *sk = &tsk->sk;
364 |
365 | socket_wr_acquire(sk->sock);
366 |
367 | tcp_release_rto_timer(tsk);
368 |
369 | struct sk_buff *skb = write_queue_head(sk);
370 |
371 | if (!skb) {
372 | tsk->backoff = 0;
373 | tcpsock_dbg("TCP RTO queue empty, notifying user", sk);
374 | tcp_notify_user(sk);
375 | goto unlock;
376 | }
377 |
378 | struct tcphdr *th = tcp_hdr(skb);
379 | skb_reset_header(skb);
380 |
381 | tcp_transmit_skb(sk, skb, tcb->snd_una);
382 | /* RFC 6298: 2.5 Maximum value MAY be placed on RTO, provided it is at least
383 | 60 seconds */
384 | if (tsk->rto > 60000) {
385 | tcp_done(sk);
386 |
387 | tsk->sk.err = -ETIMEDOUT;
388 | sk->poll_events |= (POLLOUT | POLLERR | POLLHUP);
389 |
390 | socket_release(sk->sock);
391 | return NULL;
392 | } else {
393 | /* RFC 6298: Section 5.5 double RTO time */
394 | tsk->rto *= 2;
395 | tsk->backoff++;
396 | tsk->retransmit = timer_add(tsk->rto, &tcp_retransmission_timeout, tsk);
397 |
398 | if (th->fin) {
399 | tcp_handle_fin_state(sk);
400 | }
401 | }
402 |
403 | unlock:
404 | socket_release(sk->sock);
405 |
406 | return NULL;
407 | }
408 |
409 | void tcp_rearm_rto_timer(struct tcp_sock *tsk)
410 | {
411 | struct sock *sk = &tsk->sk;
412 | tcp_release_rto_timer(tsk);
413 |
414 | if (sk->state == TCP_SYN_SENT) {
415 | tsk->retransmit = timer_add(TCP_SYN_BACKOFF << tsk->backoff, &tcp_connect_rto, tsk);
416 | } else {
417 | tsk->retransmit = timer_add(tsk->rto, &tcp_retransmission_timeout, tsk);
418 | }
419 | }
420 |
421 | int tcp_connect(struct sock *sk)
422 | {
423 | struct tcp_sock *tsk = tcp_sk(sk);
424 | struct tcb *tcb = &tsk->tcb;
425 | int rc = 0;
426 |
427 | tsk->tcp_header_len = sizeof(struct tcphdr);
428 | tcb->iss = generate_iss();
429 | tcb->snd_wnd = 0;
430 | tcb->snd_wl1 = 0;
431 |
432 | tcb->snd_una = tcb->iss;
433 | tcb->snd_up = tcb->iss;
434 | tcb->snd_nxt = tcb->iss;
435 | tcb->rcv_nxt = 0;
436 |
437 | tcp_select_initial_window(&tsk->tcb.rcv_wnd);
438 |
439 | rc = tcp_send_syn(sk);
440 | tcb->snd_nxt++;
441 |
442 | return rc;
443 | }
444 |
445 | int tcp_send(struct tcp_sock *tsk, const void *buf, int len)
446 | {
447 | struct sk_buff *skb;
448 | struct tcphdr *th;
449 | int slen = len;
450 | int mss = tsk->smss;
451 | int dlen = 0;
452 |
453 | while (slen > 0) {
454 | dlen = slen > mss ? mss : slen;
455 | slen -= dlen;
456 |
457 | skb = tcp_alloc_skb(0, dlen);
458 | skb_push(skb, dlen);
459 | memcpy(skb->data, buf, dlen);
460 |
461 | buf += dlen;
462 |
463 | th = tcp_hdr(skb);
464 | th->ack = 1;
465 |
466 | if (slen == 0) {
467 | th->psh = 1;
468 | }
469 |
470 | if (tcp_queue_transmit_skb(&tsk->sk, skb) == -1) {
471 | perror("Error on TCP skb queueing");
472 | }
473 | }
474 |
475 | tcp_rearm_user_timeout(&tsk->sk);
476 |
477 | return len;
478 | }
479 |
480 | int tcp_send_reset(struct tcp_sock *tsk)
481 | {
482 | struct sk_buff *skb;
483 | struct tcphdr *th;
484 | struct tcb *tcb;
485 | int rc = 0;
486 |
487 | skb = tcp_alloc_skb(0, 0);
488 | th = tcp_hdr(skb);
489 | tcb = &tsk->tcb;
490 |
491 | th->rst = 1;
492 | tcb->snd_una = tcb->snd_nxt;
493 |
494 | rc = tcp_transmit_skb(&tsk->sk, skb, tcb->snd_nxt);
495 | free_skb(skb);
496 |
497 | return rc;
498 | }
499 |
500 | int tcp_send_challenge_ack(struct sock *sk, struct sk_buff *skb)
501 | {
502 | // TODO: implement me
503 | return 0;
504 | }
505 |
506 | int tcp_queue_fin(struct sock *sk)
507 | {
508 | struct sk_buff *skb;
509 | struct tcphdr *th;
510 | int rc = 0;
511 |
512 | skb = tcp_alloc_skb(0, 0);
513 | th = tcp_hdr(skb);
514 |
515 | th->fin = 1;
516 | th->ack = 1;
517 |
518 | tcpsock_dbg("Queueing fin", sk);
519 |
520 | rc = tcp_queue_transmit_skb(sk, skb);
521 |
522 | return rc;
523 | }
524 |
--------------------------------------------------------------------------------
/src/timer.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "timer.h"
3 | #include "socket.h"
4 |
5 | static LIST_HEAD(timers);
6 | static int tick = 0;
7 | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
8 | static pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
9 |
10 | #ifdef DEBUG_TIMER
11 | static void timer_debug()
12 | {
13 | struct list_head *item;
14 | int cnt = 0;
15 |
16 | pthread_mutex_lock(&lock);
17 |
18 | list_for_each(item, &timers) {
19 | cnt++;
20 | }
21 |
22 | pthread_mutex_unlock(&lock);
23 |
24 | print_debug("TIMERS: Total amount currently %d", cnt);
25 | }
26 | #else
27 | static void timer_debug()
28 | {
29 | return;
30 | }
31 | #endif
32 |
33 | static void timer_free(struct timer *t)
34 | {
35 | pthread_mutex_destroy(&t->lock);
36 | free(t);
37 | }
38 |
39 | static struct timer *timer_alloc()
40 | {
41 | struct timer *t = calloc(sizeof(struct timer), 1);
42 | pthread_mutex_init(&t->lock, NULL);
43 |
44 | return t;
45 | }
46 |
47 | static void timers_tick()
48 | {
49 | struct list_head *item, *tmp = NULL;
50 | struct timer *t = NULL;
51 | int rc = 0;
52 |
53 | if ((rc = pthread_mutex_lock(&lock)) != 0) {
54 | print_err("Timer tick lock not acquired: %s\n", strerror(rc));
55 | return;
56 | };
57 |
58 | list_for_each_safe(item, tmp, &timers) {
59 | if (!item) continue;
60 |
61 | t = list_entry(item, struct timer, list);
62 |
63 | if ((rc = pthread_mutex_trylock(&t->lock)) != 0) {
64 | if (rc != EBUSY) {
65 | print_err("Timer free mutex lock: %s\n", strerror(rc));
66 | }
67 |
68 | continue;
69 | }
70 |
71 | if (!t->cancelled && t->expires < tick) {
72 | t->cancelled = 1;
73 | pthread_t th;
74 | pthread_create(&th, NULL, t->handler, t->arg);
75 | }
76 |
77 | if (t->cancelled && t->refcnt == 0) {
78 | list_del(&t->list);
79 | pthread_mutex_unlock(&t->lock);
80 |
81 | timer_free(t);
82 | } else {
83 | pthread_mutex_unlock(&t->lock);
84 | }
85 | }
86 |
87 | pthread_mutex_unlock(&lock);
88 | }
89 |
90 | void timer_oneshot(uint32_t expire, void *(*handler)(void *), void *arg)
91 | {
92 | struct timer *t = timer_alloc();
93 |
94 | int tick = timer_get_tick();
95 |
96 | t->refcnt = 0;
97 | t->expires = tick + expire;
98 | t->cancelled = 0;
99 |
100 | if (t->expires < tick) {
101 | print_err("ERR: Timer expiry integer wrap around\n");
102 | }
103 |
104 | t->handler = handler;
105 | t->arg = arg;
106 |
107 | pthread_mutex_lock(&lock);
108 | list_add_tail(&t->list, &timers);
109 | pthread_mutex_unlock(&lock);
110 | }
111 |
112 | struct timer *timer_add(uint32_t expire, void *(*handler)(void *), void *arg)
113 | {
114 | struct timer *t = timer_alloc();
115 |
116 | int tick = timer_get_tick();
117 |
118 | t->refcnt = 1;
119 | t->expires = tick + expire;
120 | t->cancelled = 0;
121 |
122 | if (t->expires < tick) {
123 | print_err("ERR: Timer expiry integer wrap around\n");
124 | }
125 |
126 | t->handler = handler;
127 | t->arg = arg;
128 |
129 | pthread_mutex_lock(&lock);
130 | list_add_tail(&t->list, &timers);
131 | pthread_mutex_unlock(&lock);
132 |
133 | return t;
134 | }
135 |
136 | void timer_release(struct timer *t)
137 | {
138 | int rc = 0;
139 |
140 | if (!t) return;
141 |
142 | if ((rc = pthread_mutex_lock(&t->lock)) != 0) {
143 | print_err("Timer release lock: %s\n", strerror(rc));
144 | return;
145 | };
146 |
147 | t->refcnt--;
148 |
149 | pthread_mutex_unlock(&t->lock);
150 | }
151 |
152 | void timer_cancel(struct timer *t)
153 | {
154 | int rc = 0;
155 |
156 | if (!t) return;
157 |
158 | if ((rc = pthread_mutex_lock(&t->lock)) != 0) {
159 | print_err("Timer cancel lock: %s\n", strerror(rc));
160 | return;
161 | };
162 |
163 | t->refcnt--;
164 | t->cancelled = 1;
165 |
166 | pthread_mutex_unlock(&t->lock);
167 | }
168 |
169 | void *timers_start()
170 | {
171 | while (1) {
172 | if (usleep(10000) != 0) {
173 | perror("Timer usleep");
174 | }
175 |
176 | pthread_rwlock_wrlock(&rwlock);
177 | tick += 10;
178 | pthread_rwlock_unlock(&rwlock);
179 | timers_tick();
180 |
181 | if (tick % 5000 == 0) {
182 | socket_debug();
183 | timer_debug();
184 | }
185 | }
186 | }
187 |
188 | int timer_get_tick()
189 | {
190 | int copy = 0;
191 | pthread_rwlock_rdlock(&rwlock);
192 | copy = tick;
193 | pthread_rwlock_unlock(&rwlock);
194 | return copy;
195 | }
196 |
--------------------------------------------------------------------------------
/src/tuntap_if.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "utils.h"
3 | #include "basic.h"
4 |
5 | static int tun_fd;
6 | static char* dev;
7 |
8 | char *tapaddr = "10.0.0.5";
9 | char *taproute = "10.0.0.0/24";
10 |
11 | static int set_if_route(char *dev, char *cidr)
12 | {
13 | return run_cmd("ip route add dev %s %s", dev, cidr);
14 | }
15 |
16 | static int set_if_address(char *dev, char *cidr)
17 | {
18 | return run_cmd("ip address add dev %s local %s", dev, cidr);
19 | }
20 |
21 | static int set_if_up(char *dev)
22 | {
23 | return run_cmd("ip link set dev %s up", dev);
24 | }
25 |
26 | /*
27 | * Taken from Kernel Documentation/networking/tuntap.txt
28 | */
29 | static int tun_alloc(char *dev)
30 | {
31 | struct ifreq ifr;
32 | int fd, err;
33 |
34 | if( (fd = open("/dev/net/tap", O_RDWR)) < 0 ) {
35 | perror("Cannot open TUN/TAP dev\n"
36 | "Make sure one exists with "
37 | "'$ mknod /dev/net/tap c 10 200'");
38 | exit(1);
39 | }
40 |
41 | CLEAR(ifr);
42 |
43 | /* Flags: IFF_TUN - TUN device (no Ethernet headers)
44 | * IFF_TAP - TAP device
45 | *
46 | * IFF_NO_PI - Do not provide packet information
47 | */
48 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
49 | if( *dev ) {
50 | strncpy(ifr.ifr_name, dev, IFNAMSIZ);
51 | }
52 |
53 | if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){
54 | perror("ERR: Could not ioctl tun");
55 | close(fd);
56 | return err;
57 | }
58 |
59 | strcpy(dev, ifr.ifr_name);
60 | return fd;
61 | }
62 |
63 | int tun_read(char *buf, int len)
64 | {
65 | return read(tun_fd, buf, len);
66 | }
67 |
68 | int tun_write(char *buf, int len)
69 | {
70 | return write(tun_fd, buf, len);
71 | }
72 |
73 | void tun_init()
74 | {
75 | dev = calloc(10, 1);
76 | tun_fd = tun_alloc(dev);
77 |
78 | if (set_if_up(dev) != 0) {
79 | print_err("ERROR when setting up if\n");
80 | }
81 |
82 | if (set_if_route(dev, taproute) != 0) {
83 | print_err("ERROR when setting route for if\n");
84 | }
85 |
86 | if (set_if_address(dev, tapaddr) != 0) {
87 | print_err("ERROR when setting addr for if\n");
88 | }
89 | }
90 |
91 | void free_tun()
92 | {
93 | free(dev);
94 | }
95 |
--------------------------------------------------------------------------------
/src/utils.c:
--------------------------------------------------------------------------------
1 | #include "syshead.h"
2 | #include "utils.h"
3 |
4 | extern int debug;
5 |
6 | int run_cmd(char *cmd, ...)
7 | {
8 | va_list ap;
9 | char buf[CMDBUFLEN];
10 | va_start(ap, cmd);
11 | vsnprintf(buf, CMDBUFLEN, cmd, ap);
12 |
13 | va_end(ap);
14 |
15 | if (debug) {
16 | printf("EXEC: %s\n", buf);
17 | }
18 |
19 | return system(buf);
20 | }
21 |
22 | uint32_t sum_every_16bits(void *addr, int count)
23 | {
24 | register uint32_t sum = 0;
25 | uint16_t * ptr = addr;
26 |
27 | while( count > 1 ) {
28 | /* This is the inner loop */
29 | sum += * ptr++;
30 | count -= 2;
31 | }
32 |
33 | /* Add left-over byte, if any */
34 | if( count > 0 )
35 | sum += * (uint8_t *) ptr;
36 |
37 | return sum;
38 | }
39 |
40 | uint16_t checksum(void *addr, int count, int start_sum)
41 | {
42 | /* Compute Internet Checksum for "count" bytes
43 | * beginning at location "addr".
44 | * Taken from https://tools.ietf.org/html/rfc1071
45 | */
46 | uint32_t sum = start_sum;
47 |
48 | sum += sum_every_16bits(addr, count);
49 |
50 | /* Fold 32-bit sum to 16 bits */
51 | while (sum>>16)
52 | sum = (sum & 0xffff) + (sum >> 16);
53 |
54 | return ~sum;
55 | }
56 |
57 | int get_address(char *host, char *port, struct sockaddr *addr)
58 | {
59 | struct addrinfo hints;
60 | struct addrinfo *result, *rp;
61 | int s;
62 |
63 | memset(&hints, 0, sizeof(struct addrinfo));
64 | hints.ai_family = AF_INET;
65 | hints.ai_socktype = SOCK_STREAM;
66 |
67 | s = getaddrinfo(host, port, &hints, &result);
68 |
69 | if (s != 0) {
70 | print_err("getaddrinfo: %s\n", gai_strerror(s));
71 | exit(EXIT_FAILURE);
72 | }
73 |
74 | for (rp = result; rp != NULL; rp = rp->ai_next) {
75 | *addr = *rp->ai_addr;
76 | freeaddrinfo(result);
77 | return 0;
78 | }
79 |
80 | return 1;
81 | }
82 |
83 | uint32_t parse_ipv4_string(char* addr) {
84 | uint8_t addr_bytes[4];
85 | sscanf(addr, "%hhu.%hhu.%hhu.%hhu", &addr_bytes[3], &addr_bytes[2], &addr_bytes[1], &addr_bytes[0]);
86 | return addr_bytes[0] | addr_bytes[1] << 8 | addr_bytes[2] << 16 | addr_bytes[3] << 24;
87 | }
88 |
89 | uint32_t min(uint32_t x, uint32_t y) {
90 | return x > y ? y : x;
91 | }
92 |
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # Tests
2 |
3 | Level-IP test suites consist currently of end-to-end tests, where the Linux host's applications are used to test traffic flow.
4 |
5 | In the future, a separate unit/packet flow test framework could be integrated into the stack.
6 |
7 | # Usage
8 |
9 | In the project's root folder, run
10 |
11 | make test
12 |
13 | Or a specific test-suite
14 |
15 | ./suites/arp/suite-arp
16 |
17 | Root privileges are required.
18 |
--------------------------------------------------------------------------------
/tests/suites/arp/suite-arp:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/../../utils/common"
6 |
7 | function test_arp {
8 | arping -c3 -I tap0 10.0.0.4 >/dev/null
9 | }
10 |
11 | test_run "test_arp" "$0"
12 |
--------------------------------------------------------------------------------
/tests/suites/icmp/suite-icmp:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/../../utils/common"
6 |
7 | function test_ping {
8 | ping -c3 -I tap0 10.0.0.4 > /dev/null
9 | }
10 |
11 | test_run "test_ping" "$0"
12 |
--------------------------------------------------------------------------------
/tests/suites/tcp/curl-fixture.txt:
--------------------------------------------------------------------------------
1 | HTTP/1.0 200 OK
2 | Server:
3 | Date:
4 | Content-type: text/html; charset=UTF-8
5 | Content-Length: 326
6 |
7 |
8 | Directory listing for /
9 |
10 | Directory listing for /
11 |
12 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/tests/suites/tcp/env-delayed:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/../../utils/common"
6 | # Simple end-to-end test for an application's curl
7 |
8 | function strip_http_header {
9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/'
10 | }
11 |
12 | function setup {
13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8002 >/dev/null 2>&1 &
14 | httpserver="$!"
15 |
16 | tc class add dev tap0 parent 1: classid 1:1 htb rate 100mbit
17 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:1 match ip sport 8002 0xffff
18 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:1 match ip dport 8002 0xffff
19 | tc qdisc add dev tap0 parent 1:1 netem delay 2000ms
20 |
21 | sleep 5
22 | }
23 |
24 | function teardown_suite {
25 | kill "$httpserver"
26 | }
27 |
28 | trap teardown_suite EXIT ERR
29 | setup
30 |
--------------------------------------------------------------------------------
/tests/suites/tcp/env-duplication:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/../../utils/common"
6 | # Simple end-to-end test for an application's curl
7 |
8 | function strip_http_header {
9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/'
10 | }
11 |
12 | function setup {
13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8003 >/dev/null 2>&1 &
14 | httpserver="$!"
15 |
16 | tc class add dev tap0 parent 1: classid 1:2 htb rate 100mbit
17 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:2 match ip sport 8003 0xffff
18 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:2 match ip dport 8003 0xffff
19 | tc qdisc add dev tap0 parent 1:2 netem duplicate 50%
20 |
21 | sleep 5
22 | }
23 |
24 | function teardown_suite {
25 | kill "$httpserver"
26 | }
27 |
28 | trap teardown_suite EXIT ERR
29 | setup
30 |
--------------------------------------------------------------------------------
/tests/suites/tcp/env-lossy:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/../../utils/common"
6 | # Simple end-to-end test for an application's curl
7 |
8 | function strip_http_header {
9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/'
10 | }
11 |
12 | function setup {
13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8004 >/dev/null 2>&1 &
14 | httpserver="$!"
15 |
16 | tc class add dev tap0 parent 1: classid 1:3 htb rate 100mbit
17 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:3 match ip sport 8004 0xffff
18 | tc filter add dev tap0 parent 1: protocol ip prio 1 u32 flowid 1:3 match ip dport 8004 0xffff
19 | tc qdisc add dev tap0 parent 1:3 netem loss 25%
20 |
21 | sleep 5
22 | }
23 |
24 | function teardown_suite {
25 | kill "$httpserver"
26 | }
27 |
28 | trap teardown_suite EXIT ERR
29 | setup
30 |
--------------------------------------------------------------------------------
/tests/suites/tcp/env-normal:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/../../utils/common"
6 | # Simple end-to-end test for an application's curl
7 |
8 | function strip_http_header {
9 | sed 's/^Date:.*/Date:/' | sed 's/^Server:.*/Server:/'
10 | }
11 |
12 | function setup {
13 | /usr/bin/env python2.7 -m SimpleHTTPServer 8001 >/dev/null 2>&1 &
14 | httpserver="$!"
15 |
16 | sleep 5
17 | }
18 |
19 | function teardown_suite {
20 | kill "$httpserver"
21 | }
22 |
23 | setup
24 | trap teardown_suite EXIT ERR
25 |
--------------------------------------------------------------------------------
/tests/suites/tcp/suite-curl:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/env-normal"
6 | source "$(dirname $0)/tests" 8001 "$0"
7 |
--------------------------------------------------------------------------------
/tests/suites/tcp/suite-packet-delay:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/env-delayed"
6 | source "$(dirname $0)/tests" 8002 "$0"
7 |
--------------------------------------------------------------------------------
/tests/suites/tcp/suite-packet-duplication:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/env-duplication"
6 | source "$(dirname $0)/tests" 8003 "$0"
7 |
--------------------------------------------------------------------------------
/tests/suites/tcp/suite-packet-loss:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "$(dirname $0)/env-lossy"
6 | source "$(dirname $0)/tests" 8004 "$0"
7 |
--------------------------------------------------------------------------------
/tests/suites/tcp/tests:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | port="$1"
6 | suite="$2"
7 |
8 | function test_synchronous_http_get {
9 | response="$("$repo/tools/level-ip" "$repo/apps/curl/curl" 10.0.0.5 $port | strip_http_header)"
10 |
11 | diff "$folder/curl-fixture.txt" <(echo "$response")
12 | }
13 |
14 | function test_poll_http_get {
15 | response="$("$repo/tools/level-ip" "$repo/apps/curl-poll/curl-poll" 10.0.0.5 $port | strip_http_header)"
16 |
17 | diff "$folder/curl-fixture.txt" <(echo "$response")
18 | }
19 |
20 | function test_tcp_connection_refused {
21 | "$repo/tools/level-ip" "$repo/apps/curl/curl" 10.0.0.5 9999 2>&1 | grep -q "Connection refused"
22 | }
23 |
24 | test_run "test_synchronous_http_get" "$suite"
25 | test_run "test_poll_http_get" "$suite"
26 | test_run "test_tcp_connection_refused" "$suite"
27 |
--------------------------------------------------------------------------------
/tests/test-run-all:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | source "utils/common"
6 |
7 | function teardown {
8 | kill "$stackip"
9 | }
10 |
11 | trap teardown EXIT ERR
12 |
13 | start_stack
14 | echo "Started lvl-ip with pid $stackip"
15 | sleep 5 # wait for stack to establish itself
16 |
17 | pids=""
18 |
19 | tc qdisc add dev tap0 root handle 1: htb
20 |
21 | ./suites/arp/suite-arp &
22 | pids="$pids $!"
23 | ./suites/icmp/suite-icmp &
24 | pids="$pids $!"
25 | ./suites/tcp/suite-curl &
26 | pids="$pids $!"
27 | ./suites/tcp/suite-packet-delay &
28 | pids="$pids $!"
29 | ./suites/tcp/suite-packet-duplication &
30 | pids="$pids $!"
31 | ./suites/tcp/suite-packet-loss &
32 | pids="$pids $!"
33 |
34 | rc=0
35 | for i in $pids; do
36 | wait $i
37 | pid_rc="$?"
38 | [ "$pid_rc" -ne 0 ] && rc="$pid_rc"
39 | done
40 |
41 | grep -i -B50 "SUMMARY: ThreadSanitizer:" ../lvl-ip-test.log && echo "Possible threading errors found."
42 |
43 | echo
44 | [ "$rc" -eq 0 ] && echo "Tests pass."
45 | exit "$rc"
46 |
--------------------------------------------------------------------------------
/tests/utils/common:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | repo="$(git rev-parse --show-toplevel)"
6 | folder="$(dirname $0)"
7 |
8 | function start_stack {
9 | "$repo/lvl-ip" > ../lvl-ip-test.log 2>&1 &
10 | stackip="$!"
11 |
12 | for i in {1..3}; do
13 | ping -c1 -w1 10.0.0.5 >/dev/null || continue
14 |
15 | return 0
16 | done
17 |
18 | echo "Stack did not start up correctly" >&2
19 | cat ../lvl-ip-test.log
20 | return 1
21 | }
22 |
23 | function test_pass {
24 | echo -e "\t$2 Test pass: $1"
25 | }
26 |
27 | function test_fail {
28 | echo -e "\t$2 Test fail: $1" 2>&1
29 | exit 1
30 | }
31 |
32 | function test_run {
33 | eval "$1" || test_fail "$1" "$2"
34 |
35 | test_pass "$1" "$2"
36 | }
37 |
--------------------------------------------------------------------------------
/tools/Makefile:
--------------------------------------------------------------------------------
1 | CPPFLAGS = -I ../include -Wall -Werror
2 |
3 | all: liblevelip
4 |
5 | debug: CFLAGS+= -DDEBUG_API
6 | debug: liblevelip
7 |
8 | liblevelip: liblevelip.c
9 | $(CC) $(CFLAGS) $(CPPFLAGS) -fPIC -shared -o liblevelip.so liblevelip.c -ldl
10 |
11 | .PHONY:
12 | clean:
13 | rm liblevelip.so*
14 |
--------------------------------------------------------------------------------
/tools/level-ip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | prog="$1"
6 | shift
7 |
8 | LD_PRELOAD="$(dirname $0)/liblevelip.so" "$prog" "$@"
9 |
--------------------------------------------------------------------------------
/tools/liblevelip.c:
--------------------------------------------------------------------------------
1 | #define _GNU_SOURCE
2 | #include "syshead.h"
3 | #include "liblevelip.h"
4 | #include "ipc.h"
5 | #include "list.h"
6 | #include "utils.h"
7 |
8 | #define RCBUF_LEN 512
9 |
10 | static int (*__start_main)(int (*main) (int, char * *, char * *), int argc, \
11 | char * * ubp_av, void (*init) (void), void (*fini) (void), \
12 | void (*rtld_fini) (void), void (* stack_end));
13 |
14 | static int (*_fcntl)(int fildes, int cmd, ...) = NULL;
15 | static int (*_setsockopt)(int fd, int level, int optname,
16 | const void *optval, socklen_t optlen) = NULL;
17 | static int (*_getsockopt)(int fd, int level, int optname,
18 | const void *optval, socklen_t *optlen) = NULL;
19 | static int (*_read)(int sockfd, void *buf, size_t len) = NULL;
20 | static int (*_write)(int sockfd, const void *buf, size_t len) = NULL;
21 | static int (*_connect)(int sockfd, const struct sockaddr *addr, socklen_t addrlen) = NULL;
22 | static int (*_socket)(int domain, int type, int protocol) = NULL;
23 | static int (*_close)(int fildes) = NULL;
24 | static int (*_poll)(struct pollfd fds[], nfds_t nfds, int timeout) = NULL;
25 | static int (*_pollchk)(struct pollfd *__fds, nfds_t __nfds, int __timeout,
26 | __SIZE_TYPE__ __fdslen) = NULL;
27 |
28 | static int (*_ppoll)(struct pollfd *fds, nfds_t nfds,
29 | const struct timespec *tmo_p, const sigset_t *sigmask) = NULL;
30 | static int (*_select)(int nfds, fd_set *restrict readfds,
31 | fd_set *restrict writefds, fd_set *restrict errorfds,
32 | struct timeval *restrict timeout);
33 | static ssize_t (*_sendto)(int sockfd, const void *message, size_t length,
34 | int flags, const struct sockaddr *dest_addr,
35 | socklen_t dest_len) = NULL;
36 | static ssize_t (*_recvfrom)(int sockfd, void *buf, size_t len,
37 | int flags, struct sockaddr *restrict address,
38 | socklen_t *restrict addrlen) = NULL;
39 | static int (*_getpeername)(int socket, struct sockaddr *restrict address,
40 | socklen_t *restrict address_len) = NULL;
41 | static int (*_getsockname)(int socket, struct sockaddr *restrict address,
42 | socklen_t *restrict address_len) = NULL;
43 |
44 | static int lvlip_socks_count = 0;
45 | static LIST_HEAD(lvlip_socks);
46 |
47 | static inline struct lvlip_sock *lvlip_get_sock(int fd) {
48 | struct list_head *item;
49 | struct lvlip_sock *sock;
50 |
51 | list_for_each(item, &lvlip_socks) {
52 | sock = list_entry(item, struct lvlip_sock, list);
53 |
54 | if (sock->fd == fd) return sock;
55 | };
56 |
57 | return NULL;
58 | };
59 |
60 | static int is_socket_supported(int domain, int type, int protocol)
61 | {
62 | if (domain != AF_INET) return 0;
63 |
64 | if (!(type & SOCK_STREAM)) return 0;
65 |
66 | if (protocol != 0 && protocol != IPPROTO_TCP) return 0;
67 |
68 | return 1;
69 | }
70 |
71 | static int init_socket(char *sockname)
72 | {
73 | struct sockaddr_un addr;
74 | int ret;
75 | int data_socket;
76 |
77 | /* Create local socket. */
78 |
79 | data_socket = _socket(AF_UNIX, SOCK_STREAM, 0);
80 | if (data_socket == -1) {
81 | perror("socket");
82 | exit(EXIT_FAILURE);
83 | }
84 |
85 | /*
86 | * For portability clear the whole structure, since some
87 | * implementations have additional (nonstandard) fields in
88 | * the structure.
89 | */
90 |
91 | memset(&addr, 0, sizeof(struct sockaddr_un));
92 |
93 | /* Connect socket to socket address */
94 |
95 | addr.sun_family = AF_UNIX;
96 | strncpy(addr.sun_path, sockname, sizeof(addr.sun_path) - 1);
97 |
98 | ret = _connect(data_socket, (const struct sockaddr *) &addr,
99 | sizeof(struct sockaddr_un));
100 | if (ret == -1) {
101 | print_err("Error connecting to level-ip. Is it up?\n");
102 | exit(EXIT_FAILURE);
103 | }
104 |
105 | return data_socket;
106 | }
107 |
108 | static int free_socket(int lvlfd)
109 | {
110 | return _close(lvlfd);
111 | }
112 |
113 | static int transmit_lvlip(int lvlfd, struct ipc_msg *msg, int msglen)
114 | {
115 | char *buf[RCBUF_LEN];
116 |
117 | // Send mocked syscall to lvl-ip
118 | if (_write(lvlfd, (char *)msg, msglen) == -1) {
119 | perror("Error on writing IPC");
120 | }
121 |
122 | // Read return value from lvl-ip
123 | if (_read(lvlfd, buf, RCBUF_LEN) == -1) {
124 | perror("Could not read IPC response");
125 | }
126 |
127 | struct ipc_msg *response = (struct ipc_msg *) buf;
128 |
129 | if (response->type != msg->type || response->pid != msg->pid) {
130 | print_err("ERR: IPC msg response expected type %d, pid %d\n"
131 | " actual type %d, pid %d\n",
132 | msg->type, msg->pid, response->type, response->pid);
133 | return -1;
134 | }
135 |
136 | struct ipc_err *err = (struct ipc_err *) response->data;
137 |
138 | if (err->rc == -1) errno = err->err;
139 |
140 | return err->rc;
141 | }
142 |
143 | int socket(int domain, int type, int protocol)
144 | {
145 | if (!is_socket_supported(domain, type, protocol)) {
146 | return _socket(domain, type, protocol);
147 | }
148 |
149 | struct lvlip_sock *sock;
150 |
151 | int lvlfd = init_socket("/tmp/lvlip.socket");
152 |
153 | sock = lvlip_alloc();
154 | sock->lvlfd = lvlfd;
155 | list_add_tail(&sock->list, &lvlip_socks);
156 | lvlip_socks_count++;
157 |
158 | int pid = getpid();
159 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_socket);
160 |
161 | struct ipc_msg *msg = alloca(msglen);
162 | msg->type = IPC_SOCKET;
163 | msg->pid = pid;
164 |
165 | struct ipc_socket usersock = {
166 | .domain = domain,
167 | .type = type,
168 | .protocol = protocol
169 | };
170 |
171 | memcpy(msg->data, &usersock, sizeof(struct ipc_socket));
172 |
173 | int sockfd = transmit_lvlip(sock->lvlfd, msg, msglen);
174 |
175 | if (sockfd == -1) {
176 | /* Socket alloc failed */
177 | lvlip_free(sock);
178 | return -1;
179 | }
180 |
181 | sock->fd = sockfd;
182 |
183 | lvl_sock_dbg("Socket called", sock);
184 |
185 | return sockfd;
186 | }
187 |
188 | int close(int fd)
189 | {
190 | struct lvlip_sock *sock = lvlip_get_sock(fd);
191 |
192 | if (sock == NULL) {
193 | /* No lvl-ip IPC socket associated */
194 | return _close(fd);
195 | }
196 |
197 | lvl_sock_dbg("Close called", sock);
198 |
199 | int pid = getpid();
200 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_close);
201 | int rc = 0;
202 |
203 | struct ipc_msg *msg = alloca(msglen);
204 | msg->type = IPC_CLOSE;
205 | msg->pid = pid;
206 |
207 | struct ipc_close *payload = (struct ipc_close *)msg->data;
208 | payload->sockfd = fd;
209 |
210 | rc = transmit_lvlip(sock->lvlfd, msg, msglen);
211 | free_socket(sock->lvlfd);
212 |
213 | return rc;
214 | }
215 |
216 | int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
217 | {
218 | struct lvlip_sock *sock = lvlip_get_sock(sockfd);
219 |
220 | if (sock == NULL) {
221 | /* No lvl-ip IPC socket associated */
222 | return _connect(sockfd, addr, addrlen);
223 | }
224 |
225 | lvl_sock_dbg("Connect called", sock);
226 |
227 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_connect);
228 | int pid = getpid();
229 |
230 | struct ipc_msg *msg = alloca(msglen);
231 | msg->type = IPC_CONNECT;
232 | msg->pid = pid;
233 |
234 | struct ipc_connect payload = {
235 | .sockfd = sockfd,
236 | .addr = *addr,
237 | .addrlen = addrlen
238 | };
239 |
240 | memcpy(msg->data, &payload, sizeof(struct ipc_connect));
241 |
242 | return transmit_lvlip(sock->lvlfd, msg, msglen);
243 | }
244 |
245 | ssize_t write(int sockfd, const void *buf, size_t len)
246 | {
247 | struct lvlip_sock *sock = lvlip_get_sock(sockfd);
248 |
249 | if (sock == NULL) {
250 | /* No lvl-ip IPC socket associated */
251 | return _write(sockfd, buf, len);
252 | }
253 |
254 | lvl_sock_dbg("Write called", sock);
255 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_write) + len;
256 | int pid = getpid();
257 |
258 | struct ipc_msg *msg = alloca(msglen);
259 | msg->type = IPC_WRITE;
260 | msg->pid = pid;
261 |
262 | struct ipc_write payload = {
263 | .sockfd = sockfd,
264 | .len = len
265 | };
266 |
267 | memcpy(msg->data, &payload, sizeof(struct ipc_write));
268 | memcpy(((struct ipc_write *)msg->data)->buf, buf, len);
269 |
270 | return transmit_lvlip(sock->lvlfd, msg, msglen);
271 | }
272 |
273 | ssize_t read(int sockfd, void *buf, size_t len)
274 | {
275 | struct lvlip_sock *sock = lvlip_get_sock(sockfd);
276 |
277 | if (sock == NULL) {
278 | /* No lvl-ip IPC socket associated */
279 | return _read(sockfd, buf, len);
280 | }
281 |
282 | lvl_sock_dbg("Read called", sock);
283 |
284 | int pid = getpid();
285 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_read);
286 |
287 | struct ipc_msg *msg = alloca(msglen);
288 | msg->type = IPC_READ;
289 | msg->pid = pid;
290 |
291 | struct ipc_read payload = {
292 | .sockfd = sockfd,
293 | .len = len
294 | };
295 |
296 | memcpy(msg->data, &payload, sizeof(struct ipc_read));
297 |
298 | // Send mocked syscall to lvl-ip
299 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) {
300 | perror("Error on writing IPC read");
301 | }
302 |
303 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_read) + len;
304 | char rbuf[rlen];
305 | memset(rbuf, 0, rlen);
306 |
307 | // Read return value from lvl-ip
308 | if (_read(sock->lvlfd, rbuf, rlen) == -1) {
309 | perror("Could not read IPC read response");
310 | }
311 |
312 | struct ipc_msg *response = (struct ipc_msg *) rbuf;
313 |
314 | if (response->type != IPC_READ || response->pid != pid) {
315 | print_err("ERR: IPC read response expected: type %d, pid %d\n"
316 | " actual: type %d, pid %d\n",
317 | IPC_READ, pid, response->type, response->pid);
318 | return -1;
319 | }
320 |
321 | struct ipc_err *error = (struct ipc_err *) response->data;
322 | if (error->rc < 0) {
323 | errno = error->err;
324 | return error->rc;
325 | }
326 |
327 | struct ipc_read *data = (struct ipc_read *) error->data;
328 | if (len < data->len) {
329 | print_err("IPC read received len error: %lu\n", data->len);
330 | return -1;
331 | }
332 |
333 | memset(buf, 0, len);
334 | memcpy(buf, data->buf, data->len);
335 |
336 | return data->len;
337 | }
338 |
339 | ssize_t send(int fd, const void *buf, size_t len, int flags)
340 | {
341 | return sendto(fd, buf, len, flags, NULL, 0);
342 | }
343 |
344 | ssize_t sendto(int fd, const void *buf, size_t len,
345 | int flags, const struct sockaddr *dest_addr,
346 | socklen_t dest_len)
347 | {
348 | if (!lvlip_get_sock(fd)) return _sendto(fd, buf, len,
349 | flags, dest_addr, dest_len);
350 |
351 | return write(fd, buf, len);
352 | }
353 |
354 | ssize_t recv(int fd, void *buf, size_t len, int flags)
355 | {
356 | return recvfrom(fd, buf, len, flags, NULL, 0);
357 | }
358 |
359 | ssize_t recvfrom(int fd, void *restrict buf, size_t len,
360 | int flags, struct sockaddr *restrict address,
361 | socklen_t *restrict addrlen)
362 | {
363 | if (!lvlip_get_sock(fd)) return _recvfrom(fd, buf, len,
364 | flags, address, addrlen);
365 |
366 | return read(fd, buf, len);
367 | }
368 |
369 | int poll(struct pollfd *fds, nfds_t nfds, int timeout)
370 | {
371 | struct pollfd *kernel_fds[nfds];
372 | struct pollfd *lvlip_fds[nfds];
373 | int lvlip_nfds = 0;
374 | int kernel_nfds = 0;
375 | int lvlip_sock = 0;
376 |
377 | struct lvlip_sock *sock = NULL;
378 |
379 | for (int i = 0; i < nfds; i++) {
380 | struct pollfd *pfd = &fds[i];
381 | if ((sock = lvlip_get_sock(pfd->fd)) != NULL) {
382 | lvlip_fds[lvlip_nfds++] = pfd;
383 | lvlip_sock = sock->lvlfd;
384 | } else {
385 | kernel_fds[kernel_nfds++] = pfd;
386 | }
387 | }
388 |
389 | int blocking = 0;
390 | if (kernel_nfds > 0 && lvlip_nfds > 0 && timeout == -1) {
391 | /* Cannot sleep indefinitely when we demux poll
392 | with both kernel and lvlip fds */
393 | timeout = 100;
394 | blocking = 1;
395 | }
396 |
397 | lvl_dbg("Poll called with kernel_nfds %d lvlip_nfds %d timeout %d", kernel_nfds, lvlip_nfds, timeout);
398 |
399 | for (;;) {
400 | int events = 0;
401 | if (kernel_nfds > 0) {
402 | for (int i = 0; i < kernel_nfds; i++) {
403 | lvl_dbg("Kernel nfd %d events %d timeout %d", kernel_fds[i]->fd, kernel_fds[i]->events, timeout);
404 | }
405 |
406 | events = _poll(*kernel_fds, kernel_nfds, timeout);
407 |
408 | if (events == -1) {
409 | perror("Poll kernel error");
410 | errno = EAGAIN;
411 | return -1;
412 | }
413 | }
414 |
415 | if (lvlip_nfds < 1) {
416 | return events;
417 | }
418 |
419 | int pid = getpid();
420 | int pollfd_size = sizeof(struct ipc_pollfd);
421 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_poll) + pollfd_size * lvlip_nfds;
422 | struct ipc_msg *msg = alloca(msglen);
423 |
424 | msg->type = IPC_POLL;
425 | msg->pid = pid;
426 |
427 | struct ipc_poll *data = (struct ipc_poll *)msg->data;
428 | data->nfds = lvlip_nfds;
429 | data->timeout = timeout;
430 |
431 | struct ipc_pollfd *pfd = NULL;
432 | for (int i = 0; i < lvlip_nfds; i++) {
433 | pfd = &data->fds[i];
434 | pfd->fd = lvlip_fds[i]->fd;
435 | pfd->events = lvlip_fds[i]->events;
436 | pfd->revents = lvlip_fds[i]->revents;
437 | }
438 |
439 | if (_write(lvlip_sock, (char *)msg, msglen) == -1) {
440 | perror("Error on writing IPC poll");
441 | errno = EAGAIN;
442 | return -1;
443 | }
444 |
445 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + pollfd_size * lvlip_nfds;
446 | char rbuf[rlen];
447 | memset(rbuf, 0, rlen);
448 |
449 | // Read return value from lvl-ip
450 | if (_read(lvlip_sock, rbuf, rlen) == -1) {
451 | perror("Could not read IPC poll response");
452 | errno = EAGAIN;
453 | return -1;
454 | }
455 |
456 | struct ipc_msg *response = (struct ipc_msg *) rbuf;
457 |
458 | if (response->type != IPC_POLL || response->pid != pid) {
459 | print_err("ERR: IPC poll response expected: type %d, pid %d\n"
460 | " actual: type %d, pid %d\n",
461 | IPC_POLL, pid, response->type, response->pid);
462 | errno = EAGAIN;
463 | return -1;
464 | }
465 |
466 | struct ipc_err *error = (struct ipc_err *) response->data;
467 | if (error->rc < 0) {
468 | errno = error->err;
469 | print_err("Error on poll %d %s\n", error->rc, strerror(errno));
470 | return error->rc;
471 | }
472 |
473 | struct ipc_pollfd *returned = (struct ipc_pollfd *) error->data;
474 |
475 | for (int i = 0; i < lvlip_nfds; i++) {
476 | lvlip_fds[i]->events = returned[i].events;
477 | lvlip_fds[i]->revents = returned[i].revents;
478 | }
479 |
480 | int result = events + error->rc;
481 |
482 | if (result > 0 || !blocking) {
483 | for (int i = 0; i < nfds; i++) {
484 | lvl_dbg("Returning counts %d nfd %d with revents %d events %d timeout %d", result, i, fds[i].revents, fds[i].events, timeout);
485 | }
486 |
487 | return result;
488 | }
489 | }
490 |
491 | print_err("Poll returning with -1\n");
492 | return -1;
493 | }
494 |
495 | int __poll_chk (struct pollfd *__fds, nfds_t __nfds, int __timeout,
496 | __SIZE_TYPE__ __fdslen)
497 | {
498 | return poll(__fds, __nfds, __timeout);
499 | }
500 |
501 | int ppoll(struct pollfd *fds, nfds_t nfds,
502 | const struct timespec *tmo_p, const sigset_t *sigmask)
503 | {
504 | print_err("Ppoll called but not supported\n");
505 | return -1;
506 | }
507 |
508 | int select(int nfds, fd_set *restrict readfds,
509 | fd_set *restrict writefds, fd_set *restrict errorfds,
510 | struct timeval *restrict timeout)
511 | {
512 | print_err("Select not implemented yet\n");
513 | return _select(nfds, readfds, writefds, errorfds, timeout);
514 | }
515 |
516 |
517 | int setsockopt(int fd, int level, int optname,
518 | const void *optval, socklen_t optlen)
519 | {
520 | struct lvlip_sock *sock = lvlip_get_sock(fd);
521 | if (sock == NULL) return _setsockopt(fd, level, optname, optval, optlen);
522 |
523 | lvl_sock_dbg("Setsockopt called", sock);
524 |
525 | /* WARN: Setsockopt not supported yet */
526 |
527 | return 0;
528 | }
529 |
530 | int getsockopt(int fd, int level, int optname,
531 | void *optval, socklen_t *optlen)
532 | {
533 | struct lvlip_sock *sock = lvlip_get_sock(fd);
534 | if (sock == NULL) return _getsockopt(fd, level, optname, optval, optlen);
535 |
536 | lvl_sock_dbg("Getsockopt called: level %d optname %d optval %d socklen %d",
537 | sock, level, optname, *(int *)optval, *(int *)optlen);
538 |
539 | int pid = getpid();
540 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_sockopt) + *optlen;
541 |
542 | struct ipc_msg *msg = alloca(msglen);
543 | msg->type = IPC_GETSOCKOPT;
544 | msg->pid = pid;
545 |
546 | struct ipc_sockopt opts = {
547 | .fd = fd,
548 | .level = level,
549 | .optname = optname,
550 | .optlen = *optlen,
551 | };
552 |
553 | memcpy(&opts.optval, optval, *optlen);
554 | memcpy(msg->data, &opts, sizeof(struct ipc_sockopt) + *optlen);
555 |
556 | // Send mocked syscall to lvl-ip
557 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) {
558 | perror("Error on writing IPC getsockopt");
559 | }
560 |
561 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockopt) + *optlen;
562 | char rbuf[rlen];
563 | memset(rbuf, 0, rlen);
564 |
565 | // Read return value from lvl-ip
566 | if (_read(sock->lvlfd, rbuf, rlen) == -1) {
567 | perror("Could not read IPC getsockopt response");
568 | }
569 |
570 | struct ipc_msg *response = (struct ipc_msg *) rbuf;
571 |
572 | if (response->type != IPC_GETSOCKOPT || response->pid != pid) {
573 | print_err("ERR: IPC getsockopt response expected: type %d, pid %d\n"
574 | " actual: type %d, pid %d\n",
575 | IPC_GETSOCKOPT, pid, response->type, response->pid);
576 | return -1;
577 | }
578 |
579 | struct ipc_err *error = (struct ipc_err *) response->data;
580 | if (error->rc != 0) {
581 | errno = error->err;
582 | return error->rc;
583 | }
584 |
585 | struct ipc_sockopt *optres = (struct ipc_sockopt *) error->data;
586 |
587 | lvl_sock_dbg("Got getsockopt level %d optname %d optval %d socklen %d",
588 | sock, optres->level, optres->optname, *(int *)optres->optval, optres->optlen);
589 |
590 | int val = *(int *)optres->optval;
591 |
592 | /* lvl-ip probably encoded the error value as negative */
593 | val *= -1;
594 |
595 | *(int *)optval = val;
596 | *optlen = optres->optlen;
597 |
598 | return 0;
599 | }
600 |
601 | int getpeername(int socket, struct sockaddr *restrict address,
602 | socklen_t *restrict address_len)
603 | {
604 | struct lvlip_sock *sock = lvlip_get_sock(socket);
605 | if (sock == NULL) return _getpeername(socket, address, address_len);
606 |
607 | lvl_sock_dbg("Getpeername called", sock);
608 |
609 | int pid = getpid();
610 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_sockname);
611 |
612 | struct ipc_msg *msg = alloca(msglen);
613 | msg->type = IPC_GETPEERNAME;
614 | msg->pid = pid;
615 |
616 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data;
617 | name->socket = socket;
618 |
619 | // Send mocked syscall to lvl-ip
620 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) {
621 | perror("Error on writing IPC getpeername");
622 | }
623 |
624 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname);
625 | char rbuf[rlen];
626 | memset(rbuf, 0, rlen);
627 |
628 | // Read return value from lvl-ip
629 | if (_read(sock->lvlfd, rbuf, rlen) == -1) {
630 | perror("Could not read IPC getpeername response");
631 | }
632 |
633 | struct ipc_msg *response = (struct ipc_msg *) rbuf;
634 |
635 | if (response->type != IPC_GETPEERNAME || response->pid != pid) {
636 | print_err("ERR: IPC getpeername response expected: type %d, pid %d\n"
637 | " actual: type %d, pid %d\n",
638 | IPC_GETPEERNAME, pid, response->type, response->pid);
639 | return -1;
640 | }
641 |
642 | struct ipc_err *error = (struct ipc_err *) response->data;
643 | if (error->rc != 0) {
644 | errno = error->err;
645 | return error->rc;
646 | }
647 |
648 | struct ipc_sockname *nameres = (struct ipc_sockname *) error->data;
649 |
650 | lvl_sock_dbg("Got getpeername fd %d addrlen %d sa_data %p",
651 | sock, nameres->socket, nameres->address_len, nameres->sa_data);
652 |
653 | if (nameres->socket != socket) {
654 | print_err("Got socket %d but requested %d\n", nameres->socket, socket);
655 | }
656 |
657 | *address_len = nameres->address_len;
658 | memcpy(address, nameres->sa_data, nameres->address_len);
659 |
660 | return 0;
661 | }
662 |
663 | int getsockname(int socket, struct sockaddr *restrict address,
664 | socklen_t *restrict address_len)
665 | {
666 | struct lvlip_sock *sock = lvlip_get_sock(socket);
667 | if (sock == NULL) return _getsockname(socket, address, address_len);
668 |
669 | lvl_sock_dbg("Getsockname called", sock);
670 |
671 | int pid = getpid();
672 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_sockname);
673 |
674 | struct ipc_msg *msg = alloca(msglen);
675 | msg->type = IPC_GETSOCKNAME;
676 | msg->pid = pid;
677 |
678 | struct ipc_sockname *name = (struct ipc_sockname *)msg->data;
679 | name->socket = socket;
680 |
681 | // Send mocked syscall to lvl-ip
682 | if (_write(sock->lvlfd, (char *)msg, msglen) == -1) {
683 | perror("Error on writing IPC getsockname");
684 | }
685 |
686 | int rlen = sizeof(struct ipc_msg) + sizeof(struct ipc_err) + sizeof(struct ipc_sockname);
687 | char rbuf[rlen];
688 | memset(rbuf, 0, rlen);
689 |
690 | // Read return value from lvl-ip
691 | if (_read(sock->lvlfd, rbuf, rlen) == -1) {
692 | perror("Could not read IPC getsockname response");
693 | }
694 |
695 | struct ipc_msg *response = (struct ipc_msg *) rbuf;
696 |
697 | if (response->type != IPC_GETSOCKNAME || response->pid != pid) {
698 | print_err("ERR: IPC getsockname response expected: type %d, pid %d\n"
699 | " actual: type %d, pid %d\n",
700 | IPC_GETSOCKNAME, pid, response->type, response->pid);
701 | return -1;
702 | }
703 |
704 | struct ipc_err *error = (struct ipc_err *) response->data;
705 | if (error->rc != 0) {
706 | errno = error->err;
707 | return error->rc;
708 | }
709 |
710 | struct ipc_sockname *nameres = (struct ipc_sockname *) error->data;
711 |
712 | lvl_sock_dbg("Got getsockname fd %d addrlen %d sa_data %p",
713 | sock, nameres->socket, nameres->address_len, nameres->sa_data);
714 |
715 | if (nameres->socket != socket) {
716 | print_err("Got socket %d but requested %d\n", nameres->socket, socket);
717 | }
718 |
719 | *address_len = nameres->address_len;
720 | memcpy(address, nameres->sa_data, nameres->address_len);
721 |
722 | return 0;
723 | }
724 |
725 | int fcntl(int fildes, int cmd, ...)
726 | {
727 | int rc = -1;
728 | va_list ap;
729 | void *arg;
730 |
731 | struct lvlip_sock *sock = lvlip_get_sock(fildes);
732 |
733 | if (!sock) {
734 | va_start(ap, cmd);
735 | arg = va_arg(ap, void *);
736 | va_end(ap);
737 |
738 | return _fcntl(fildes, cmd, arg);
739 | }
740 |
741 | lvl_sock_dbg("Fcntl called", sock);
742 |
743 | int pid = getpid();
744 | int msglen = sizeof(struct ipc_msg) + sizeof(struct ipc_fcntl) + sizeof(struct flock) + sizeof(int);
745 | struct ipc_msg *msg = alloca(msglen);
746 |
747 | msg->type = IPC_FCNTL;
748 | msg->pid = pid;
749 |
750 | struct ipc_fcntl *fc = (struct ipc_fcntl *)msg->data;
751 | fc->sockfd = fildes;
752 | fc->cmd = cmd;
753 |
754 | switch (cmd) {
755 | case F_GETFL:
756 | lvl_sock_dbg("Fcntl GETFL", sock);
757 |
758 | rc = transmit_lvlip(sock->lvlfd, msg, msglen);
759 | break;
760 | case F_SETFL:
761 | lvl_sock_dbg("Fcntl SETFL", sock);
762 |
763 | va_start(ap, cmd);
764 |
765 | int flags = va_arg(ap, int);
766 | memcpy(fc->data, &flags, sizeof(int));
767 |
768 | va_end(ap);
769 |
770 | rc = transmit_lvlip(sock->lvlfd, msg, msglen);
771 | break;
772 | default:
773 | rc = -1;
774 | errno = EINVAL;
775 | break;
776 | }
777 |
778 | return rc;
779 | }
780 |
781 | int __libc_start_main(int (*main) (int, char * *, char * *), int argc,
782 | char * * ubp_av, void (*init) (void), void (*fini) (void),
783 | void (*rtld_fini) (void), void (* stack_end))
784 | {
785 | __start_main = dlsym(RTLD_NEXT, "__libc_start_main");
786 |
787 | _sendto = dlsym(RTLD_NEXT, "sendto");
788 | _recvfrom = dlsym(RTLD_NEXT, "recvfrom");
789 | _poll = dlsym(RTLD_NEXT, "poll");
790 | _ppoll = dlsym(RTLD_NEXT, "ppoll");
791 | _pollchk = dlsym(RTLD_NEXT, "__poll_chk");
792 | _select = dlsym(RTLD_NEXT, "select");
793 | _fcntl = dlsym(RTLD_NEXT, "fcntl");
794 | _setsockopt = dlsym(RTLD_NEXT, "setsockopt");
795 | _getsockopt = dlsym(RTLD_NEXT, "getsockopt");
796 | _read = dlsym(RTLD_NEXT, "read");
797 | _write = dlsym(RTLD_NEXT, "write");
798 | _connect = dlsym(RTLD_NEXT, "connect");
799 | _socket = dlsym(RTLD_NEXT, "socket");
800 | _close = dlsym(RTLD_NEXT, "close");
801 | _getpeername = dlsym(RTLD_NEXT, "getpeername");
802 | _getsockname = dlsym(RTLD_NEXT, "getsockname");
803 |
804 | list_init(&lvlip_socks);
805 |
806 | return __start_main(main, argc, ubp_av, init, fini, rtld_fini, stack_end);
807 | }
808 |
--------------------------------------------------------------------------------
/tools/liblevelip.h:
--------------------------------------------------------------------------------
1 | #ifndef LIBLEVELIP_H_
2 | #define LIBLEVELIP_H_
3 |
4 | #include
5 | #include
6 | #include "list.h"
7 | #include "utils.h"
8 |
9 | #ifdef DEBUG_API
10 | #define lvl_dbg(msg, ...) \
11 | do { \
12 | print_debug("lvlip ttid %lu "msg, pthread_self(), ##__VA_ARGS__); \
13 | } while (0)
14 | #define lvl_sock_dbg(msg, sock, ...) \
15 | do { \
16 | lvl_dbg("lvlfd %d fd %d: "msg, sock->lvlfd, sock->fd, ##__VA_ARGS__); \
17 | } while (0)
18 | #else
19 | #define lvl_sock_dbg(msg, sock, ...)
20 | #define lvl_dbg(msg, ...)
21 | #endif
22 |
23 | struct lvlip_sock {
24 | struct list_head list;
25 | int lvlfd; /* For Level-IP IPC */
26 | int fd;
27 | };
28 |
29 | static inline struct lvlip_sock *lvlip_alloc() {
30 | struct lvlip_sock *sock = malloc(sizeof(struct lvlip_sock));
31 | memset(sock, 0, sizeof(struct lvlip_sock));
32 |
33 | return sock;
34 | };
35 |
36 | static inline void lvlip_free(struct lvlip_sock *sock) {
37 | free(sock);
38 | }
39 |
40 | #endif
41 |
--------------------------------------------------------------------------------