├── .gitsparse ├── .gitmodules ├── README.md ├── nuse.sh ├── dpdk-sdk-build.sh ├── .gitignore ├── nuse.conf.sample ├── .travis.yml ├── nuse-libc.h ├── nuse-vif.h ├── Makefile.dpdk ├── nuse-config.h ├── nuse.h ├── nuse-vif.c ├── nuse-hostcalls.h ├── nuse-vif-pipe.c ├── Makefile.include ├── nuse-vif-rawsock.c ├── Makefile ├── sim.c ├── nuse-vif-tap.c ├── Makefile.rump ├── nuse-hostcalls.c ├── nuse-fiber.c ├── nuse-vif-netmap.c ├── nuse-config.c ├── nuse-vif-dpdk.c ├── nuse.c ├── LICENSE ├── nuse-syscalls.c ├── list.h └── nuse-glue.c /.gitsparse: -------------------------------------------------------------------------------- 1 | rump:sys/rump/librump/rumpkern 2 | rump:lib/librumpclient 3 | rump:lib/librumphijack 4 | rump:lib/librumpuser 5 | rump:sys/rump/include/rump 6 | rump:sys/sys/syscall.h 7 | rump:sys/sys/syscallargs.h 8 | rump:sys/srcsys 9 | rump:.gitignore 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "rump"] 2 | path = rump 3 | url = https://github.com/libos-nuse/src-netbsd.git 4 | [submodule "netmap"] 5 | path = netmap 6 | url = https://code.google.com/p/netmap/ 7 | ignore = dirty 8 | [submodule "dpdk"] 9 | path = dpdk 10 | url = git://dpdk.org/dpdk 11 | ignore = dirty 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # linux-libos-tools 2 | userspace tools for linux libos 3 | 4 | [![Build Status](https://travis-ci.org/libos-nuse/linux-libos-tools.png)](https://travis-ci.org/libos-nuse/linux-libos-tools) 5 | [![Circle CI](https://circleci.com/gh/libos-nuse/linux-libos-tools.svg?style=svg)](https://circleci.com/gh/libos-nuse/linux-libos-tools) 6 | -------------------------------------------------------------------------------- /nuse.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ -e /usr/lib64 ]; then 3 | sudo ln -f -s `pwd`/libnuse-linux.so /usr/lib64/libnuse-linux.so 4 | fi 5 | sudo ln -f -s `pwd`/libnuse-linux.so /usr/lib/libnuse-linux.so 6 | sudo chown root libnuse-linux.so 7 | sudo chmod 4755 libnuse-linux.so 8 | 9 | LD_LIBRARY_PATH=.:../../../ LD_PRELOAD=liblinux.so:libnuse-linux.so $* 10 | -------------------------------------------------------------------------------- /dpdk-sdk-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | git submodule init 4 | git submodule update dpdk 5 | 6 | : ${RTE_SDK:=$(pwd)/dpdk} 7 | : ${RTE_TARGET:=build} 8 | export RTE_SDK 9 | export RTE_TARGET 10 | 11 | set -e 12 | cd dpdk 13 | make -j1 CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=y T=$(uname -m)-native-linuxapp-gcc config 14 | make SRCARCH=x86 CONFIG_RTE_BUILD_COMBINE_LIBS=y EXTRA_CFLAGS="-fPIC -g" \ 15 | || (echo "dpdk build failed" && exit 1) 16 | 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE! Don't add files that are generated in specific 3 | # subdirectories here. Add them in the ".gitignore" file 4 | # in that subdirectory instead. 5 | # 6 | # NOTE! Please use 'git ls-files -i --exclude-standard' 7 | # command after changing this file, to see if there are 8 | # any tracked files which get ignored after the change. 9 | # 10 | # Normal rules 11 | # 12 | *.o 13 | *.o.* 14 | git-sparse 15 | *.cmd 16 | *.so 17 | nuse 18 | *~ 19 | -------------------------------------------------------------------------------- /nuse.conf.sample: -------------------------------------------------------------------------------- 1 | 2 | 3 | interface eth0 4 | address 192.168.0.10 5 | netmask 255.255.255.0 6 | # if macaddr is not specified, random mac addr is used. 7 | viftype TAP 8 | 9 | interface p1p1 10 | address 172.16.0.1 11 | netmask 255.255.255.0 12 | macaddr 00:01:01:01:01:01 13 | # if viftype is not specified, defualt raw socket is used. 14 | 15 | route 16 | network 172.16.1.0 17 | netmask 255.255.255.0 18 | gateway 172.16.0.2 19 | 20 | route 21 | network 0.0.0.0 22 | netmask 0.0.0.0 23 | gateway 192.168.0.1 24 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | install: 3 | - sudo apt-get update 4 | - sudo apt-get install bc 5 | - sudo apt-get -y install linux-headers-generic 6 | compiler: 7 | - gcc 8 | script: 9 | - git clone git://github.com/libos-nuse/net-next-nuse.git ../net-next-nuse 10 | - git clone . ../net-next-nuse/arch/lib/tools 11 | - cd ../net-next-nuse && make defconfig ARCH=lib V=1 12 | # disable DPDK build for a while 13 | - make clean ARCH=lib V=1 && make library ARCH=lib V=1 14 | - ./tools/testing/libos/nuse-test.sh 15 | - make clean ARCH=lib V=1 && make library ARCH=lib OPT=no V=1 16 | - ./tools/testing/libos/nuse-test.sh 17 | -------------------------------------------------------------------------------- /nuse-libc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * glibc prototypes for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | */ 7 | 8 | #ifndef NUSE_LIBC_H 9 | #define NUSE_LIBC_H 10 | 11 | /* stdlib.h */ 12 | void *malloc(size_t size); 13 | void free(void *ptr); 14 | long int random(void); 15 | 16 | pid_t getpid(void); 17 | int nanosleep(const struct timespec *req, struct timespec *rem); 18 | int clock_gettime(clockid_t clk_id, struct timespec *tp); 19 | char *getenv(const char *name); 20 | 21 | /* ipaddress config */ 22 | typedef uint32_t in_addr_t; 23 | extern in_addr_t inet_addr(const char *cp); 24 | 25 | unsigned int if_nametoindex(const char *ifname); 26 | 27 | #endif /* NUSE_LIBC_H */ 28 | -------------------------------------------------------------------------------- /nuse-vif.h: -------------------------------------------------------------------------------- 1 | #ifndef NUSE_VIF_H 2 | #define NUSE_VIF_H 3 | 4 | struct SimDevice; 5 | 6 | enum viftype { 7 | NUSE_VIF_RAWSOCK = 0, 8 | NUSE_VIF_NETMAP, /* not yet */ 9 | NUSE_VIF_DPDK, /* not yet */ 10 | NUSE_VIF_TAP, /* not yet */ 11 | NUSE_VIF_PIPE, /* not yet */ 12 | NUSE_VIF_MAX 13 | }; 14 | 15 | struct nuse_vif { 16 | int sock; 17 | enum viftype type; 18 | void *private; 19 | }; 20 | 21 | struct nuse_vif_impl { 22 | void (*read)(struct nuse_vif *, struct SimDevice *); 23 | void (*write)(struct nuse_vif *, struct SimDevice *, 24 | unsigned char *, int); 25 | void *(*create)(const char *); 26 | void (*delete)(struct nuse_vif *); 27 | }; 28 | 29 | #ifndef __define_initcall 30 | typedef int (*initcall_t)(void); 31 | #define __define_initcall(fn, id) \ 32 | static initcall_t __initcall_ ## fn ## id \ 33 | __attribute__((__used__)) \ 34 | __attribute__((__section__(".initcall" #id ".init"))) = fn; 35 | #endif 36 | 37 | void *nuse_vif_create(enum viftype type, const char *ifname); 38 | void nuse_vif_read(struct nuse_vif *vif, struct SimDevice *dev); 39 | void nuse_vif_write(struct nuse_vif *vif, struct SimDevice *dev, 40 | unsigned char *data, int len); 41 | 42 | #endif /* NUSE_VIF_H */ 43 | -------------------------------------------------------------------------------- /Makefile.dpdk: -------------------------------------------------------------------------------- 1 | include Makefile.include 2 | 3 | DPDK_SRC=nuse-vif-dpdk.c 4 | DPDK_OBJ=$(addsuffix .o,$(basename $(DPDK_SRC))) 5 | RTE_SDK=$(shell expr `pwd`)/dpdk 6 | RTE_TARGET=build 7 | 8 | DPDK_CFLAGS += -include $(RTE_SDK)/$(RTE_TARGET)/include/rte_config.h \ 9 | -I$(RTE_SDK)/$(RTE_TARGET)/include -I$(ARCH_DIR)/include \ 10 | -DRTE_MACHINE_CPUFLAG_SSE -DRTE_MACHINE_CPUFLAG_SSE2 -DRTE_MACHINE_CPUFLAG_SSE3 \ 11 | -DRTE_MACHINE_CPUFLAG_SSSE3 -DRTE_MACHINE_CPUFLAG_SSE4_1 -DRTE_MACHINE_CPUFLAG_SSE4_2 \ 12 | -DRTE_MACHINE_CPUFLAG_AES -DRTE_MACHINE_CPUFLAG_PCLMULQDQ -DRTE_MACHINE_CPUFLAG_AVX \ 13 | -DRTE_MACHINE_CPUFLAG_RDRAND -DRTE_MACHINE_CPUFLAG_FSGSBASE -DRTE_MACHINE_CPUFLAG_F16C \ 14 | -DRTE_COMPILE_TIME_CPUFLAGS=RTE_CPUFLAG_SSE,RTE_CPUFLAG_SSE2,RTE_CPUFLAG_SSE3,RTE_CPUFLAG_SSSE3,RTE_CPUFLAG_SSE4_1,RTE_CPUFLAG_SSE4_2,RTE_CPUFLAG_AES,RTE_CPUFLAG_PCLMULQDQ,RTE_CPUFLAG_AVX,RTE_CPUFLAG_RDRAND,RTE_CPUFLAG_FSGSBASE,RTE_CPUFLAG_F16C 15 | 16 | DPDK_LDLIBS += -lrte_pmd_virtio_uio -lrte_pmd_vmxnet3_uio -lrte_pmd_ixgbe \ 17 | -lrte_pmd_e1000 -lrte_timer -lrte_hash \ 18 | -lrte_mbuf -lethdev \ 19 | -lrte_eal -lrte_mempool -lrte_ring -lrte_pmd_ring -lrte_malloc 20 | 21 | dpdkl_yes=$(DPDK_OBJ) $(DPDK_LDLIBS) 22 | dpdkl_no= 23 | 24 | nuse-vif-dpdk.o: nuse-vif-dpdk.c Makefile.dpdk 25 | ifeq ($(DPDK), yes) 26 | $(QUIET_CC) $(CC) $(CFLAGS) -m64 -pthread -march=native $(DPDK_CFLAGS) -c $< -o $@ 27 | endif 28 | -------------------------------------------------------------------------------- /nuse-config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * config file interface for NUSE 3 | * Copyright (c) 2015 Ryo Nakamura 4 | * 5 | * Author: Ryo Nakamura 6 | * Hajime Tazaki 7 | */ 8 | 9 | #ifndef _NUSE_CONFIG_H_ 10 | #define _NUSE_CONFIG_H_ 11 | 12 | #include /* PATH_MAX */ 13 | #include 14 | #include 15 | 16 | 17 | #define NUSE_VIF_MAX 16 18 | #define NUSE_ROUTE_MAX 16 19 | #define NUSE_ADDR_STRLEN 16 20 | #define NUSE_MACADDR_STRLEN 20 21 | 22 | 23 | struct nuse_vif_config { 24 | char ifname[IFNAMSIZ]; 25 | char address[NUSE_ADDR_STRLEN]; 26 | char netmask[NUSE_ADDR_STRLEN]; 27 | char macaddr[NUSE_MACADDR_STRLEN]; 28 | 29 | enum viftype type; 30 | 31 | u_char mac[ETH_ALEN]; 32 | 33 | struct ifreq ifr_vif_addr; 34 | struct ifreq ifr_vif_mask; 35 | 36 | /* for vif-pipe */ 37 | char pipepath[PATH_MAX]; 38 | }; 39 | 40 | struct nuse_route_config { 41 | char network[NUSE_ADDR_STRLEN]; 42 | char netmask[NUSE_ADDR_STRLEN]; 43 | char gateway[NUSE_ADDR_STRLEN]; 44 | 45 | struct rtentry route; 46 | }; 47 | 48 | struct nuse_config { 49 | int vif_cnt, route_cnt; 50 | struct nuse_vif_config *vifs[NUSE_VIF_MAX]; 51 | struct nuse_route_config *routes[NUSE_ROUTE_MAX]; 52 | }; 53 | 54 | 55 | /* open cfname and return struct nuse_config */ 56 | int nuse_config_parse(struct nuse_config *cf, char *cfname); 57 | 58 | /* free cf->nuse_vif_config and cf->nuse_route_config */ 59 | void nuse_config_free(struct nuse_config *cf); 60 | 61 | 62 | 63 | #endif /* _NUSE_CONFIG_H_ */ 64 | -------------------------------------------------------------------------------- /nuse.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Network stack in userspace (NUSE) for POSIX userspace backend 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | */ 7 | 8 | #ifndef NUSE_H 9 | #define NUSE_H 10 | 11 | struct pollfd; 12 | struct SimDevice; 13 | struct SimSocket; 14 | 15 | /* nuse.c */ 16 | struct nuse_socket { 17 | struct SimSocket *kern_sock; 18 | int refcnt; 19 | int flags; 20 | }; 21 | 22 | struct nuse_fd { 23 | int real_fd; 24 | struct epoll_fd *epoll_fd; 25 | struct nuse_socket *nuse_sock; 26 | }; 27 | extern struct nuse_fd nuse_fd_table[1024]; 28 | void nuse_dev_rx(struct SimDevice *dev, char *buf, int size); 29 | 30 | /* nuse-fiber.c */ 31 | void *nuse_fiber_new(void * (*callback)(void *), 32 | void *context, uint32_t stackSize, const char *name); 33 | void nuse_fiber_start(void *handler); 34 | int nuse_fiber_isself(void *handler); 35 | void nuse_fiber_wait(void *handler); 36 | int nuse_fiber_wakeup(void *handler); 37 | void nuse_fiber_free(void *handler); 38 | void *nuse_fiber_new_from_caller(uint32_t stackSize, const char *name); 39 | int nuse_fiber_is_stopped(void *handler); 40 | void nuse_fiber_stop(void *handler); 41 | void nuse_add_timer(unsigned long ns, void *(*func) (void *arg), 42 | void *arg, void *handler); 43 | void nuse_task_add(void *fiber); 44 | void nuse_set_affinity(void); 45 | 46 | /* nuse-poll.c */ 47 | int nuse_poll(struct pollfd *fds, unsigned int nfds, 48 | struct timespec *end_time); 49 | 50 | /* nuse-syscalls.c */ 51 | void nuse_syscall_proxy_init(void); 52 | void nuse_syscall_proxy_exit(void); 53 | 54 | #endif /* NUSE_H */ 55 | -------------------------------------------------------------------------------- /nuse-vif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * virtual network interface feature for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "nuse-hostcalls.h" 14 | #include "nuse-vif.h" 15 | 16 | struct nuse_vif_impl *nuse_vif[NUSE_VIF_MAX]; 17 | 18 | void 19 | nuse_vif_read(struct nuse_vif *vif, struct SimDevice *dev) 20 | { 21 | struct nuse_vif_impl *impl = nuse_vif[vif->type]; 22 | 23 | return impl->read(vif, dev); 24 | } 25 | 26 | void 27 | nuse_vif_write(struct nuse_vif *vif, struct SimDevice *dev, 28 | unsigned char *data, int len) 29 | { 30 | struct nuse_vif_impl *impl = nuse_vif[vif->type]; 31 | 32 | return impl->write(vif, dev, data, len); 33 | } 34 | 35 | static int 36 | nuse_set_if_promisc(const char *ifname) 37 | { 38 | int fd; 39 | struct ifreq ifr; 40 | 41 | fd = host_socket(AF_INET, SOCK_DGRAM, 0); 42 | memset(&ifr, 0, sizeof(ifr)); 43 | strncpy(ifr.ifr_name, ifname, IFNAMSIZ - 1); 44 | 45 | if (host_ioctl(fd, SIOCGIFFLAGS, &ifr) != 0) { 46 | printf("failed to get interface status\n"); 47 | return -1; 48 | } 49 | 50 | ifr.ifr_flags |= IFF_UP | IFF_PROMISC; 51 | 52 | if (host_ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) { 53 | printf("failed to set interface to promisc\n"); 54 | return -1; 55 | } 56 | 57 | return 0; 58 | } 59 | 60 | void * 61 | nuse_vif_create(enum viftype type, const char *ifname) 62 | { 63 | struct nuse_vif_impl *impl = nuse_vif[type]; 64 | 65 | /* configure promiscus */ 66 | if (type != NUSE_VIF_TAP) 67 | nuse_set_if_promisc(ifname); 68 | 69 | return impl->create(ifname); 70 | } 71 | 72 | void 73 | nuse_vif_delete(struct nuse_vif *vif) 74 | { 75 | struct nuse_vif_impl *impl = nuse_vif[vif->type]; 76 | 77 | return impl->delete(vif); 78 | } 79 | 80 | -------------------------------------------------------------------------------- /nuse-hostcalls.h: -------------------------------------------------------------------------------- 1 | /* 2 | * host system/library calls for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | */ 7 | 8 | #ifndef NUSE_HOSTCALLS_H 9 | #define NUSE_HOSTCALLS_H 10 | 11 | struct pollfd; 12 | struct pthread; 13 | struct pthread_attr; 14 | typedef unsigned long int pthread_t; 15 | 16 | #ifndef errno 17 | //#include 18 | extern int *__errno_location (void); 19 | #define errno (*__errno_location()) 20 | #endif /* errno */ 21 | 22 | /* nuse-hostcalls.c */ 23 | void nuse_hostcall_init(void); 24 | 25 | extern int (*host_socket)(int fd, int type, int proto); 26 | extern int (*host_close)(int fd); 27 | extern int (*host_bind)(int, const struct sockaddr *, int); 28 | extern ssize_t (*host_read)(int fd, void *buf, size_t count); 29 | extern ssize_t (*host_send)(int sockfd, const void *buf, size_t len, int flags); 30 | extern ssize_t (*host_sendmsg)(int sockfd, const struct msghdr *msg, int flags); 31 | extern ssize_t (*host_write)(int fd, const void *buf, size_t count); 32 | extern ssize_t (*host_writev)(int fd, const struct iovec *iovec, size_t count); 33 | extern int (*host_open)(const char *pathname, int flags,...); 34 | extern int (*host_open64)(const char *pathname, int flags,...); 35 | extern int (*host_ioctl)(int d, int request, ...); 36 | extern int (*host_pipe)(int pipefd[2]); 37 | extern int (*host_poll)(struct pollfd *, int, int); 38 | extern int (*host_pthread_create)(pthread_t *, const struct pthread_attr *, 39 | void *(*)(void *), void *); 40 | extern int (*host_pthread_join)(pthread_t thread, void **retval); 41 | extern char *(*host_getenv)(const char *name); 42 | extern FILE *(*host_fdopen)(int fd, const char *mode); 43 | extern int (*host_fcntl)(int fd, int cmd, ... /* arg */ ); 44 | extern int (*host_fclose)(FILE *fp); 45 | extern size_t (*host_fwrite)(const void *ptr, size_t size, size_t nmemb, 46 | FILE *stream); 47 | extern int (*host_access)(const char *pathname, int mode); 48 | extern int (*host_listen)(int sockfd, int backlog); 49 | extern int (*host_accept)(int sockfd, struct sockaddr *addr, 50 | socklen_t *addrlen); 51 | extern int (*host_getsockopt)(int sockfd, int level, int optname, 52 | void *optval, int *optlen); 53 | extern int (*host_setsockopt)(int sockfd, int level, int optname, 54 | const void *optval, int optlen); 55 | extern pid_t (*host_getpid)(void); 56 | 57 | #endif /* NUSE_HOSTCALLS_H */ 58 | -------------------------------------------------------------------------------- /nuse-vif-pipe.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nuse-hostcalls.h" 11 | #include "nuse-vif.h" 12 | #include "nuse.h" 13 | 14 | #define NUSE_DEFAULT_PIPE_PRIV 0666 15 | 16 | static int 17 | named_pipe_alloc(const char *path) 18 | { 19 | int fd; 20 | 21 | if (mkfifo(path, NUSE_DEFAULT_PIPE_PRIV) < 0) { 22 | perror ("mkfifo"); 23 | return -1; 24 | } 25 | 26 | if ((fd = host_open(path, O_RDWR)) < 0) { 27 | perror ("open"); 28 | return -1; 29 | } 30 | 31 | return fd; 32 | } 33 | 34 | void 35 | nuse_vif_pipe_read(struct nuse_vif *vif, struct SimDevice *dev) 36 | { 37 | int sock = vif->sock; 38 | char buf[8192]; 39 | ssize_t size; 40 | 41 | while (1) { 42 | size = host_read(sock, buf, sizeof(buf)); 43 | if (size < 0) { 44 | perror("read"); 45 | host_close(sock); 46 | return; 47 | } else if (size == 0) { 48 | host_close(sock); 49 | return; 50 | } 51 | 52 | nuse_dev_rx(dev, buf, size); 53 | } 54 | } 55 | 56 | void 57 | nuse_vif_pipe_write(struct nuse_vif *vif, struct SimDevice *dev, 58 | unsigned char *data, int len) 59 | { 60 | int sock = vif->sock; 61 | int ret = host_write(sock, data, len); 62 | 63 | if (ret == -1) 64 | perror ("write"); 65 | } 66 | 67 | void * 68 | nuse_vif_pipe_create(const char *pipepath) 69 | { 70 | int sock; 71 | struct nuse_vif *vif; 72 | 73 | sock = named_pipe_alloc(pipepath); 74 | if (sock < 0) { 75 | printf ("failed to create named pipe \"%s\"\n", pipepath); 76 | return NULL; 77 | } 78 | 79 | vif = malloc (sizeof(struct nuse_vif)); 80 | vif->sock = sock; 81 | vif->type = NUSE_VIF_PIPE; 82 | 83 | return vif; 84 | } 85 | 86 | void 87 | nuse_vif_pipe_delete(struct nuse_vif *vif) 88 | { 89 | int sock = vif->sock; 90 | free(vif); 91 | host_close(sock); 92 | } 93 | 94 | static struct nuse_vif_impl nuse_vif_pipe = { 95 | .read = nuse_vif_pipe_read, 96 | .write = nuse_vif_pipe_write, 97 | .create = nuse_vif_pipe_create, 98 | .delete = nuse_vif_pipe_delete, 99 | }; 100 | 101 | extern struct nuse_vif_impl *nuse_vif[NUSE_VIF_MAX]; 102 | 103 | int __attribute__((constructor)) 104 | nuse_vif_pipe_init(void) 105 | { 106 | nuse_vif[NUSE_VIF_PIPE] = &nuse_vif_pipe; 107 | return 0; 108 | } 109 | -------------------------------------------------------------------------------- /Makefile.include: -------------------------------------------------------------------------------- 1 | ifneq ($(O),) 2 | ifeq ($(origin O), command line) 3 | dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) 4 | ABSOLUTE_O := $(shell cd $(O) ; pwd) 5 | OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) 6 | COMMAND_O := O=$(ABSOLUTE_O) 7 | ifeq ($(objtree),) 8 | objtree := $(O) 9 | endif 10 | endif 11 | endif 12 | 13 | # check that the output directory actually exists 14 | ifneq ($(OUTPUT),) 15 | OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) 16 | $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) 17 | endif 18 | 19 | # 20 | # Include saner warnings here, which can catch bugs: 21 | # 22 | EXTRA_WARNINGS := -Wbad-function-cast 23 | EXTRA_WARNINGS += -Wdeclaration-after-statement 24 | EXTRA_WARNINGS += -Wformat-security 25 | EXTRA_WARNINGS += -Wformat-y2k 26 | EXTRA_WARNINGS += -Winit-self 27 | EXTRA_WARNINGS += -Wmissing-declarations 28 | EXTRA_WARNINGS += -Wmissing-prototypes 29 | EXTRA_WARNINGS += -Wnested-externs 30 | EXTRA_WARNINGS += -Wno-system-headers 31 | EXTRA_WARNINGS += -Wold-style-definition 32 | EXTRA_WARNINGS += -Wpacked 33 | EXTRA_WARNINGS += -Wredundant-decls 34 | EXTRA_WARNINGS += -Wshadow 35 | EXTRA_WARNINGS += -Wstrict-aliasing=3 36 | EXTRA_WARNINGS += -Wstrict-prototypes 37 | EXTRA_WARNINGS += -Wswitch-default 38 | EXTRA_WARNINGS += -Wswitch-enum 39 | EXTRA_WARNINGS += -Wundef 40 | EXTRA_WARNINGS += -Wwrite-strings 41 | EXTRA_WARNINGS += -Wformat 42 | 43 | ifneq ($(findstring $(MAKEFLAGS), w),w) 44 | PRINT_DIR = --no-print-directory 45 | else 46 | NO_SUBDIR = : 47 | endif 48 | 49 | # 50 | # Define a callable command for descending to a new directory 51 | # 52 | # Call by doing: $(call descend,directory[,target]) 53 | # 54 | descend = \ 55 | +mkdir -p $(OUTPUT)$(1) && \ 56 | $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) 57 | 58 | QUIET_SUBDIR0 = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir 59 | QUIET_SUBDIR1 = 60 | 61 | ifneq ($(findstring $(MAKEFLAGS),s),s) 62 | ifneq ($(V),1) 63 | QUIET_CC = @echo ' CC '$@; 64 | QUIET_CC_FPIC = @echo ' CC FPIC '$@; 65 | QUIET_AR = @echo ' AR '$@; 66 | QUIET_LINK = @echo ' LINK '$@; 67 | QUIET_MKDIR = @echo ' MKDIR '$@; 68 | QUIET_GEN = @echo ' GEN '$@; 69 | QUIET_SUBDIR0 = +@subdir= 70 | QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ 71 | echo ' SUBDIR '$$subdir; \ 72 | $(MAKE) $(PRINT_DIR) -C $$subdir 73 | QUIET_FLEX = @echo ' FLEX '$@; 74 | QUIET_BISON = @echo ' BISON '$@; 75 | 76 | descend = \ 77 | +@echo ' DESCEND '$(1); \ 78 | mkdir -p $(OUTPUT)$(1) && \ 79 | $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) 80 | 81 | QUIET_CLEAN = @printf ' CLEAN %s\n' $1; 82 | QUIET_INSTALL = @printf ' INSTALL %s\n' $1; 83 | endif 84 | endif 85 | -------------------------------------------------------------------------------- /nuse-vif-rawsock.c: -------------------------------------------------------------------------------- 1 | /* 2 | * raw socket based virtual network interface feature for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #define _GNU_SOURCE /* Get RTLD_NEXT */ 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "nuse-hostcalls.h" 21 | #include "nuse-vif.h" 22 | #include "nuse.h" 23 | #include "nuse-libc.h" 24 | 25 | void 26 | nuse_vif_raw_read(struct nuse_vif *vif, struct SimDevice *dev) 27 | { 28 | int sock = vif->sock; 29 | char buf[8192]; 30 | ssize_t size; 31 | 32 | while (1) { 33 | size = host_read(sock, buf, sizeof(buf)); 34 | if (size < 0) { 35 | printf("read error errno=%d\n", errno); 36 | host_close(sock); 37 | return; 38 | } else if (size == 0) { 39 | printf("read error: closed. errno=%d\n", errno); 40 | host_close(sock); 41 | return; 42 | } 43 | 44 | nuse_dev_rx(dev, buf, size); 45 | } 46 | 47 | printf("%s: should not reach", __func__); 48 | } 49 | 50 | void 51 | nuse_vif_raw_write(struct nuse_vif *vif, struct SimDevice *dev, 52 | unsigned char *data, int len) 53 | { 54 | int sock = vif->sock; 55 | int ret = host_write(sock, data, len); 56 | 57 | if (ret == -1) 58 | perror("write"); 59 | } 60 | 61 | void * 62 | nuse_vif_raw_create(const char *ifname) 63 | { 64 | int err; 65 | int sock = host_socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 66 | struct sockaddr_ll ll; 67 | struct nuse_vif *vif; 68 | 69 | if (sock < 0) 70 | perror("socket"); 71 | 72 | memset(&ll, 0, sizeof(ll)); 73 | 74 | ll.sll_family = AF_PACKET; 75 | ll.sll_ifindex = if_nametoindex(ifname); 76 | ll.sll_protocol = htons(ETH_P_ALL); 77 | err = host_bind(sock, (struct sockaddr *)&ll, sizeof(ll)); 78 | if (err) 79 | perror("bind"); 80 | 81 | vif = malloc(sizeof(struct nuse_vif)); 82 | vif->sock = sock; 83 | vif->type = NUSE_VIF_RAWSOCK; /* FIXME */ 84 | return vif; 85 | } 86 | 87 | void 88 | nuse_vif_raw_delete(struct nuse_vif *vif) 89 | { 90 | int sock = vif->sock; 91 | free(vif); 92 | host_close(sock); 93 | } 94 | 95 | static struct nuse_vif_impl nuse_vif_rawsock = { 96 | .read = nuse_vif_raw_read, 97 | .write = nuse_vif_raw_write, 98 | .create = nuse_vif_raw_create, 99 | .delete = nuse_vif_raw_delete, 100 | }; 101 | 102 | extern struct nuse_vif_impl *nuse_vif[NUSE_VIF_MAX]; 103 | 104 | int __attribute__((constructor)) 105 | nuse_vif_rawsock_init(void) 106 | { 107 | nuse_vif[NUSE_VIF_RAWSOCK] = &nuse_vif_rawsock; 108 | return 0; 109 | } 110 | __define_initcall(nuse_vif_rawsock_init, 1); 111 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include Makefile.include 2 | 3 | NUSE_LIB=libnuse-linux-$(KERNELVERSION).so 4 | SIM_LIB=libsim-linux-$(KERNELVERSION).so 5 | KERNEL_LIB=liblinux-$(KERNELVERSION).so 6 | RUMP_HIJACK_LIB=libnuse-hijack.so 7 | RUMP_CLIENT_LIB=librumpclient.so 8 | RUMP_SERVER_LIB=librumpserver.so 9 | LIBOS_DIR=.. 10 | srctree=$(LIBOS_DIR)/../../ 11 | 12 | CC=gcc 13 | 14 | all: $(NUSE_LIB) $(SIM_LIB) $(RUMP_HIJACK_LIB) $(RUMP_CLIENT_LIB) 15 | 16 | clean: 17 | $(call QUIET_CLEAN, nuse) rm -f *.o lib*.so 18 | $(call QUIET_CLEAN, rump) $(MAKE) clean -s -f Makefile.rump 19 | 20 | # vif extensions 21 | NUSE_SRC="" 22 | 23 | NETMAP?=no 24 | DPDK?=no 25 | 26 | dpdk/build/lib/libintel_dpdk.a: 27 | ifeq ($(DPDK), yes) 28 | $(QUIET_GEN) git submodule init && git submodule update dpdk 29 | $(error "Execute the DPDK build script 'dpdk-sdk-build.sh' at the arch/lib/tools directory") 30 | endif 31 | 32 | netmap: 33 | ifeq ($(NETMAP), yes) 34 | $(QUIET_GEN) git submodule init && git submodule update netmap 35 | endif 36 | 37 | # sources and objects 38 | NUSE_SRC= 39 | ifeq "$(DPDK)" "yes" 40 | include Makefile.dpdk 41 | DPDK_LDFLAGS=-L$(RTE_SDK)/$(RTE_TARGET)/lib 42 | endif 43 | 44 | ifeq "$(NETMAP)" "yes" 45 | NUSE_SRC+=nuse-vif-netmap.c 46 | CFLAGS+= -Inetmap/sys 47 | endif 48 | 49 | NUSE_SRC+=\ 50 | nuse-fiber.c nuse-vif.c nuse-hostcalls.c nuse-config.c \ 51 | nuse-vif-rawsock.c nuse-vif-tap.c nuse-vif-pipe.c nuse-glue.c nuse.c 52 | 53 | 54 | SIM_SRC=sim.c 55 | 56 | SIM_OBJ=$(addsuffix .o,$(basename $(SIM_SRC))) 57 | NUSE_OBJ=$(addsuffix .o,$(basename $(NUSE_SRC))) 58 | KERNEL_OBJS_SIM=$(addprefix $(srctree)/, $(OBJS)) 59 | ALL_OBJS+=$(SIM_OBJ) $(NUSE_OBJ) 60 | 61 | # build flags 62 | LDFLAGS_NUSE = -shared -nodefaultlibs -L. -lrumpserver -ldl -lrt $(DPDK_LDFLAGS) -Wl,-z,lazy 63 | LDFLAGS_SIM = -shared -nodefaultlibs -g3 -Wl,-O1 -Wl,-T$(LIBOS_DIR)/linker.lds $(covl_$(COV)) 64 | CFLAGS+= -Wall -fno-stack-protector -U_FORTIFY_SOURCE -fPIC -g3 -I. -I$(LIBOS_DIR)/include 65 | export CFLAGS srctree LIBOS_DIR 66 | 67 | # build target 68 | %.o : %.c Makefile 69 | $(QUIET_CC) $(CC) $(CFLAGS) -c $< 70 | 71 | # order of $(dpdkl_$(DPDK)) matters... 72 | $(NUSE_LIB): $(DPDK_OBJ) $(NUSE_OBJ) $(RUMP_SERVER_LIB) $(srctree)/$(KERNEL_LIB) Makefile 73 | $(QUIET_LINK) $(CC) -Wl,--whole-archive $(dpdkl_$(DPDK)) $(NUSE_OBJ) $(LDFLAGS_NUSE) -o $@ ;\ 74 | ln -s -f $(NUSE_LIB) libnuse-linux.so ;\ 75 | ln -s -f ./nuse.sh ./nuse 76 | 77 | $(SIM_LIB): $(SIM_OBJ) $(srctree)/$(KERNEL_LIB) Makefile 78 | $(QUIET_LINK) $(CC) -Wl,--whole-archive $(SIM_OBJ) $(KERNEL_OBJS_SIM) $(LDFLAGS_SIM) -o $@; \ 79 | ln -s -f $(SIM_LIB) libsim-linux.so 80 | 81 | $(RUMP_CLIENT_LIB): Makefile.rump Makefile FORCE 82 | $(Q) $(MAKE) $(PRINT_DIR) -f Makefile.rump $@ 83 | 84 | $(RUMP_HIJACK_LIB): $(RUMP_CLIENT_LIB) Makefile.rump Makefile FORCE 85 | $(Q) $(MAKE) $(PRINT_DIR) -f Makefile.rump $@ 86 | 87 | $(RUMP_SERVER_LIB): Makefile.rump Makefile FORCE 88 | $(Q) $(MAKE) $(PRINT_DIR) -f Makefile.rump $@ 89 | 90 | FORCH: 91 | .PHONY: clean FORCE 92 | .NOTPARALLEL : $(RUMP_SERVER_LIB) $(RUMP_CLIENT_LIB) $(RUMP_HIJACK_LIB) 93 | -------------------------------------------------------------------------------- /sim.c: -------------------------------------------------------------------------------- 1 | /* 2 | * network simulator backend for library version of Linux kernel 3 | * Copyright (c) 2015 INRIA, Hajime Tazaki 4 | * 5 | * Author: Mathieu Lacage 6 | * Hajime Tazaki 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "sim-init.h" 18 | #include "sim.h" 19 | 20 | FILE *stderr = NULL; 21 | 22 | extern struct SimImported g_imported; 23 | extern struct SimKernel *g_kernel; 24 | 25 | static int num_handler = 0; 26 | void *atexit_list[1024]; 27 | 28 | extern void lib_init(struct SimExported *exported, 29 | const struct SimImported *imported, 30 | struct SimKernel *kernel); 31 | 32 | void sim_init(struct SimExported *exported, const struct SimImported *imported, 33 | struct SimKernel *kernel) 34 | { 35 | int i; 36 | 37 | lib_init(exported, imported, kernel); 38 | /* XXX handle atexit registration for gcov */ 39 | for (i = 0; i < 1024; i++) { 40 | if (atexit_list[i]) { 41 | g_imported.atexit(g_kernel, 42 | (void (*)(void))atexit_list[i]); 43 | } 44 | } 45 | 46 | } 47 | 48 | int fclose(FILE *fp) 49 | { 50 | return g_imported.fclose(g_kernel, fp); 51 | } 52 | char *getenv(const char *name) 53 | { 54 | return g_imported.getenv(g_kernel, name); 55 | } 56 | int access(const char *pathname, int mode) 57 | { 58 | return g_imported.access(g_kernel, pathname, mode); 59 | } 60 | int atexit(void (*function)(void)) 61 | { 62 | if (g_imported.atexit == 0) { 63 | atexit_list[num_handler++] = function; 64 | return 0; 65 | } else { 66 | return g_imported.atexit(g_kernel, function); 67 | } 68 | } 69 | pid_t getpid(void) 70 | { 71 | return (pid_t)0; 72 | } 73 | int mkdir(const char *pathname, mode_t mode) 74 | { 75 | return g_imported.mkdir(g_kernel, pathname, mode); 76 | } 77 | int open(const char *pathname, int flags) 78 | { 79 | return g_imported.open(g_kernel, pathname, flags); 80 | } 81 | int fcntl(int fd, int cmd, ... /* arg */) 82 | { 83 | return 0; 84 | } 85 | int __fxstat(int ver, int fd, void *buf) 86 | { 87 | return g_imported.__fxstat(g_kernel, ver, fd, buf); 88 | } 89 | int fseek(FILE *stream, long offset, int whence) 90 | { 91 | return g_imported.fseek(g_kernel, stream, offset, whence); 92 | } 93 | long ftell(FILE *stream) 94 | { 95 | return g_imported.ftell(g_kernel, stream); 96 | } 97 | void setbuf(FILE *stream, char *buf) 98 | { 99 | return g_imported.setbuf(g_kernel, stream, buf); 100 | } 101 | FILE *fdopen(int fd, const char *mode) 102 | { 103 | return g_imported.fdopen(g_kernel, fd, mode); 104 | } 105 | size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) 106 | { 107 | return g_imported.fread(g_kernel, ptr, size, nmemb, stream); 108 | } 109 | size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) 110 | { 111 | return g_imported.fwrite(g_kernel, ptr, size, nmemb, stream); 112 | } 113 | -------------------------------------------------------------------------------- /nuse-vif-tap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * tun/tap based virtual network interface feature for NUSE 3 | * Copyright (c) 2015 Ryo Nakamura 4 | * 5 | * Author: Ryo Nakamura 6 | * Hajime Tazaki 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "nuse-hostcalls.h" 21 | #include "nuse-vif.h" 22 | #include "nuse.h" 23 | 24 | static int 25 | tap_alloc(const char *dev) 26 | { 27 | /* create tunnel interface */ 28 | int fd; 29 | struct ifreq ifr; 30 | 31 | fd = host_open("/dev/net/tun", O_RDWR, 0); 32 | if (fd < 0) { 33 | printf("failed to open /dev/tun\n"); 34 | return -1; 35 | } 36 | 37 | memset(&ifr, 0, sizeof(ifr)); 38 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 39 | snprintf(ifr.ifr_name, IFNAMSIZ, "%s", dev); 40 | 41 | if (host_ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { 42 | printf("failed to create tap interface \"%s\"\n", 43 | ifr.ifr_name); 44 | return -1; 45 | } 46 | 47 | printf("%s: alloc tap interface %s\n", __func__, dev); 48 | 49 | return fd; 50 | } 51 | 52 | 53 | int 54 | tap_up(char *dev) 55 | { 56 | int udp_fd; 57 | struct ifreq ifr; 58 | 59 | /* Make Tunnel interface up state */ 60 | udp_fd = host_socket(AF_INET, SOCK_DGRAM, 0); 61 | if (udp_fd < 0) { 62 | printf("failt to create control socket of tap interface.\n"); 63 | return -1; 64 | } 65 | 66 | memset(&ifr, 0, sizeof(ifr)); 67 | ifr.ifr_flags = IFF_UP; 68 | strncpy(ifr.ifr_name, dev, IFNAMSIZ); 69 | 70 | if (host_ioctl(udp_fd, SIOCSIFFLAGS, (void *)&ifr) < 0) { 71 | printf("failed to make %s up.\n", dev); 72 | host_close(udp_fd); 73 | return -1; 74 | } 75 | 76 | host_close(udp_fd); 77 | 78 | return 1; 79 | } 80 | 81 | void 82 | nuse_vif_tap_read(struct nuse_vif *vif, struct SimDevice *dev) 83 | { 84 | int sock = vif->sock; 85 | char buf[8192]; 86 | ssize_t size; 87 | 88 | while (1) { 89 | size = host_read(sock, buf, sizeof(buf)); 90 | if (size < 0) { 91 | perror("read"); 92 | host_close(sock); 93 | return; 94 | } else if (size == 0) { 95 | host_close(sock); 96 | return; 97 | } 98 | 99 | nuse_dev_rx(dev, buf, size); 100 | } 101 | } 102 | 103 | void 104 | nuse_vif_tap_write(struct nuse_vif *vif, struct SimDevice *dev, 105 | unsigned char *data, int len) 106 | { 107 | int sock = vif->sock; 108 | int ret = host_write(sock, data, len); 109 | 110 | if (ret == -1) 111 | perror("write"); 112 | } 113 | 114 | 115 | void * 116 | nuse_vif_tap_create(const char *ifname) 117 | { 118 | int sock; 119 | char dev[IFNAMSIZ]; 120 | struct nuse_vif *vif; 121 | 122 | snprintf(dev, sizeof(dev), "nuse-%s", ifname); 123 | 124 | sock = tap_alloc(dev); 125 | if (sock < 0) { 126 | perror("tap_alloc"); 127 | return NULL; 128 | } 129 | 130 | if (tap_up(dev) < 0) { 131 | perror("tap_up"); 132 | return NULL; 133 | } 134 | 135 | vif = malloc(sizeof(struct nuse_vif)); 136 | vif->sock = sock; 137 | vif->type = NUSE_VIF_TAP; 138 | 139 | return vif; 140 | } 141 | 142 | void 143 | nuse_vif_tap_delete(struct nuse_vif *vif) 144 | { 145 | int sock = vif->sock; 146 | free(vif); 147 | host_close(sock); 148 | } 149 | 150 | static struct nuse_vif_impl nuse_vif_tap = { 151 | .read = nuse_vif_tap_read, 152 | .write = nuse_vif_tap_write, 153 | .create = nuse_vif_tap_create, 154 | .delete = nuse_vif_tap_delete, 155 | }; 156 | 157 | 158 | extern struct nuse_vif_impl *nuse_vif[NUSE_VIF_MAX]; 159 | 160 | int __attribute__((constructor)) 161 | nuse_vif_tap_init(void) 162 | { 163 | nuse_vif[NUSE_VIF_TAP] = &nuse_vif_tap; 164 | return 0; 165 | } 166 | -------------------------------------------------------------------------------- /Makefile.rump: -------------------------------------------------------------------------------- 1 | include Makefile.include 2 | 3 | ###### source 4 | RUMP_TOP=rump 5 | # RUMP_DEBUG=-DDEBUG -DDEBUGJACK 6 | RUMPUSER_CONF_H=$(RUMP_TOP)/lib/librumpuser/rumpuser_config.h 7 | 8 | RUMP_NUSE_SRC=nuse-syscalls.c 9 | 10 | RUMP_SRC= \ 11 | $(RUMP_TOP)/lib/librumpuser/rumpuser_sp.c \ 12 | $(RUMP_TOP)/lib/librumpuser/rumpuser.c \ 13 | $(RUMP_TOP)/lib/librumpuser/rumpuser_errtrans.c \ 14 | $(RUMP_TOP)/lib/librumpuser/rumpuser_random.c \ 15 | $(RUMP_TOP)/lib/librumpuser/rumpuser_pth_dummy.c 16 | 17 | RUMP_HIJACK_SRC= \ 18 | $(RUMP_TOP)/lib/librumphijack/hijack.c 19 | 20 | RUMP_CLIENT_SRC= \ 21 | $(RUMP_TOP)/lib/librumpclient/rumpclient.c \ 22 | $(RUMP_TOP)/sys/rump/librump/rumpkern/rump_syscalls.c 23 | 24 | ###### build flags 25 | RUMP_CFLAGS=-I./$(RUMP_TOP)/lib/librumpuser \ 26 | -I./$(RUMP_TOP)/sys/rump/include \ 27 | -I./$(RUMP_TOP)/sys \ 28 | -I./$(RUMP_TOP)/lib/librumpuser \ 29 | -I./$(RUMP_TOP)/lib/librumpclient \ 30 | $(CFLAGS) -DRUMPUSER_CONFIG -DLIBRUMPUSER \ 31 | -I. -DRUMP_CLIENT \ 32 | $(RUMP_DEBUG) 33 | 34 | RUMP_LDFLAGS=-lpthread -ldl -Wl,-rpath=. -L. -lrumpclient $(covl_$(COV)) -Wl,-z,lazy 35 | 36 | RUMP_NUSE_OBJ=$(addsuffix .o,$(basename $(RUMP_NUSE_SRC))) 37 | RUMP_OBJ=$(addsuffix .o,$(basename $(RUMP_SRC))) 38 | RUMP_CLIENT_OBJ=$(addsuffix .o,$(basename $(RUMP_CLIENT_SRC))) 39 | RUMP_HIJACK_OBJ=$(addsuffix .o,$(basename $(RUMP_HIJACK_SRC))) 40 | RUMPS_OBJ=$(RUMP_OBJ) $(RUMP_NUSE_OBJ) 41 | 42 | 43 | ### required to build rump-netbsd-src 44 | quiet_cmd_gitrump = GIT $@ 45 | cmd_gitrump = if [ ! -d "rump" ] ; then \ 46 | git submodule init; \ 47 | git submodule update rump ;\ 48 | fi ;\ 49 | rm -f git-sparse ;\ 50 | wget --no-check-certificate -q \ 51 | https://raw.githubusercontent.com/icoxfog417/git-sparse/master/git-sparse \ 52 | -O git-sparse ;\ 53 | sed -i "s/\/bin\/sh/\/bin\/bash/" git-sparse ;\ 54 | chmod +x git-sparse ;\ 55 | PATH=${PATH}:. git sparse 56 | 57 | git-sparse: 58 | $(QUIET_GEN) wget --no-check-certificate -q \ 59 | https://raw.githubusercontent.com/icoxfog417/git-sparse/master/git-sparse \ 60 | -O git-sparse ;\ 61 | sed -i "s/\/bin\/sh/\/bin\/bash/" git-sparse ;\ 62 | chmod +x git-sparse 63 | 64 | $(RUMP_TOP): git-sparse 65 | $(QUIET_GEN) git submodule init ;\ 66 | git submodule update rump ;\ 67 | PATH=${PATH}:. git sparse 68 | 69 | 70 | $(RUMPUSER_CONF_H): $(RUMP_TOP) 71 | @$(QUIET_GEN) cd $(RUMP_TOP)/lib/librumpuser/ && LDFLAGS= CFLAGS= ./configure 72 | 73 | 74 | #### rumpserver object compile: embed to liblinux-nuse.so 75 | $(RUMP_NUSE_OBJ): %.o : %.c $(RUMPUSER_CONF_H) Makefile.rump 76 | $(QUIET_CC) $(CC) $(CFLAGS) $(RUMP_CFLAGS) \ 77 | -I$(srctree)/include -DHOSTOPS -c $< -o $@ 78 | $(RUMP_OBJ): %.o : %.c $(RUMPUSER_CONF_H) Makefile.rump 79 | $(QUIET_CC) $(CC) $(CFLAGS) $(RUMP_CFLAGS) -DHOSTOPS -c $< -o $@ 80 | librumpserver.so: $(RUMP_TOP) $(RUMP_OBJ) $(RUMP_NUSE_OBJ) Makefile.rump Makefile 81 | $(QUIET_LINK) $(CC) -shared -Wl,-z,lazy $(RUMPS_OBJ) -o $@ 82 | 83 | 84 | #### rump {client,hijack} compile 85 | $(RUMP_CLIENT_OBJ): %.o : %.c $(RUMPUSER_CONF_H) Makefile.rump Makefile 86 | $(QUIET_CC) $(CC) $(RUMP_CFLAGS) -U_FORTIFY_SOURCE -c $< -o $@ 87 | $(RUMP_HIJACK_OBJ): %.o : %.c Makefile.rump Makefile 88 | $(QUIET_CC) $(CC) $(RUMP_CFLAGS) -U_FORTIFY_SOURCE -c $< -o $@ 89 | 90 | #### librumpclient.so 91 | librumpclient.so: $(RUMP_CLIENT_OBJ) Makefile.rump 92 | $(QUIET_LINK) $(CC) -shared $(RUMP_CLIENT_OBJ) $(covl_$(COV)) -o $@ 93 | 94 | #### libnuse-hijack.so 95 | libnuse-hijack.so: $(RUMP_HIJACK_OBJ) $(RUMP_CLIENT_LIB) Makefile.rump 96 | $(QUIET_LINK) $(CC) -shared $(RUMP_HIJACK_OBJ) $(RUMP_LDFLAGS) -o $@ 97 | 98 | clean: 99 | $(call QUIET_CLEAN, RUMP) rm -f $(RUMP_CLIENT_OBJ) $(RUMP_NUSE_OBJ) \ 100 | $(RUMP_HIJACK_OBJ) $(RUMP_OBJ) git-sparse $(RUMPUSER_CONF_H) 101 | 102 | .PHONY: clean 103 | .NOTPARALLEL : 104 | -------------------------------------------------------------------------------- /nuse-hostcalls.c: -------------------------------------------------------------------------------- 1 | /* 2 | * host system/library calls for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | */ 7 | 8 | #define _GNU_SOURCE /* Get RTLD_NEXT */ 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "nuse-hostcalls.h" 18 | 19 | /* nuse-hostcalls.c */ 20 | int (*host_pthread_create)(pthread_t *, const struct pthread_attr *, 21 | void *(*)(void *), void *) = NULL; 22 | int (*host_pthread_join)(pthread_t thread, void **retval) = NULL; 23 | int (*host_poll)(struct pollfd *, int, int) = NULL; 24 | int (*host_socket)(int fd, int type, int proto) = NULL; 25 | int (*host_close)(int fd) = NULL; 26 | int (*host_bind)(int, const struct sockaddr *, int) = NULL; 27 | ssize_t (*host_write)(int fd, const void *buf, size_t count) = NULL; 28 | ssize_t (*host_send)(int sockfd, const void *buf, size_t len, int flags) = NULL; 29 | ssize_t (*host_sendmsg)(int sockfd, const struct msghdr *msg, int flags) = NULL; 30 | ssize_t (*host_read)(int fd, void *buf, size_t count) = NULL; 31 | ssize_t (*host_writev)(int fd, const struct iovec *iovec, size_t count) = NULL; 32 | int (*host_open)(const char *pathname, int flags,...) = NULL; 33 | int (*host_open64)(const char *pathname, int flags,...) = NULL; 34 | int (*host_ioctl)(int d, int request, ...) = NULL; 35 | char *(*host_getenv)(const char *name) = NULL; 36 | int (*host_fclose)(FILE *fp) = NULL; 37 | FILE *(*host_fdopen)(int fd, const char *mode) = NULL; 38 | int (*host_fcntl)(int fd, int cmd, ... /* arg */ ) = NULL; 39 | size_t (*host_fwrite)(const void *ptr, size_t size, size_t nmemb, 40 | FILE *stream) = NULL; 41 | int (*host_access)(const char *pathname, int mode) = NULL; 42 | int (*host_pipe)(int pipefd[2]) = NULL; 43 | int (*host_listen)(int sockfd, int backlog) = NULL; 44 | int (*host_accept)(int sockfd, struct sockaddr *addr, 45 | socklen_t *addrlen) = NULL; 46 | int (*host_getsockopt)(int sockfd, int level, int optname, 47 | void *optval, int *optlen) = NULL; 48 | int (*host_setsockopt)(int sockfd, int level, int optname, 49 | const void *optval, int optlen) = NULL; 50 | pid_t (*host_getpid)(void) = NULL; 51 | 52 | static void * 53 | nuse_hostcall_resolve_sym(const char *sym) 54 | { 55 | void *resolv; 56 | 57 | resolv = dlsym(RTLD_NEXT, sym); 58 | if (!resolv) { 59 | printf("dlsym fail %s (%s)\n", sym, dlerror()); 60 | assert(0); 61 | } 62 | return resolv; 63 | } 64 | 65 | void nuse_hostcall_init(void) 66 | { 67 | /* host functions */ 68 | host_socket = nuse_hostcall_resolve_sym("socket"); 69 | host_send = nuse_hostcall_resolve_sym("send"); 70 | host_sendmsg = nuse_hostcall_resolve_sym("sendmsg"); 71 | host_write = nuse_hostcall_resolve_sym("write"); 72 | host_writev = nuse_hostcall_resolve_sym("writev"); 73 | host_read = nuse_hostcall_resolve_sym("read"); 74 | host_listen = nuse_hostcall_resolve_sym("listen"); 75 | host_accept = nuse_hostcall_resolve_sym("accept"); 76 | host_close = nuse_hostcall_resolve_sym("close"); 77 | host_bind = nuse_hostcall_resolve_sym("bind"); 78 | host_pthread_create = nuse_hostcall_resolve_sym("pthread_create"); 79 | host_pthread_join = nuse_hostcall_resolve_sym("pthread_join"); 80 | host_poll = nuse_hostcall_resolve_sym("poll"); 81 | host_open = nuse_hostcall_resolve_sym("open"); 82 | host_open64 = nuse_hostcall_resolve_sym("open64"); 83 | host_ioctl = nuse_hostcall_resolve_sym("ioctl"); 84 | host_pipe = nuse_hostcall_resolve_sym("pipe"); 85 | host_getenv = nuse_hostcall_resolve_sym("getenv"); 86 | host_fdopen = nuse_hostcall_resolve_sym("fdopen"); 87 | host_fcntl = nuse_hostcall_resolve_sym("fcntl"); 88 | host_fclose = nuse_hostcall_resolve_sym("fclose"); 89 | host_fwrite = nuse_hostcall_resolve_sym("fwrite"); 90 | host_access = nuse_hostcall_resolve_sym("access"); 91 | host_getpid = nuse_hostcall_resolve_sym("getpid"); 92 | host_setsockopt = nuse_hostcall_resolve_sym("setsockopt"); 93 | host_getsockopt = nuse_hostcall_resolve_sym("getsockopt"); 94 | 95 | } 96 | 97 | -------------------------------------------------------------------------------- /nuse-fiber.c: -------------------------------------------------------------------------------- 1 | /* 2 | * userspace context primitive for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | */ 7 | 8 | #include 9 | #define __USE_GNU 1 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "nuse.h" 24 | 25 | /* FIXME */ 26 | extern int (*host_pthread_create)(pthread_t *, const pthread_attr_t *, 27 | void *(*)(void *), void *); 28 | 29 | struct NuseFiber { 30 | pthread_t pthread; 31 | pthread_mutex_t mutex; 32 | pthread_cond_t condvar; 33 | void * (*func)(void *); 34 | void *context; 35 | const char *name; 36 | int canceled; 37 | timer_t timerid; 38 | }; 39 | 40 | #include 41 | /* FIXME: more precise affinity shoudl be required. */ 42 | void 43 | nuse_set_affinity(void) 44 | { 45 | cpu_set_t cpuset; 46 | 47 | CPU_ZERO(&cpuset); 48 | CPU_SET(0, &cpuset); 49 | sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpuset); 50 | } 51 | 52 | void *nuse_fiber_new_from_caller(uint32_t stackSize, const char *name) 53 | { 54 | struct NuseFiber *fiber = malloc(sizeof(struct NuseFiber)); 55 | 56 | fiber->func = NULL; 57 | fiber->context = NULL; 58 | fiber->pthread = pthread_self(); 59 | fiber->name = name; 60 | fiber->canceled = 0; 61 | fiber->timerid = NULL; 62 | return fiber; 63 | } 64 | 65 | void *nuse_fiber_new(void * (*callback)(void *), 66 | void *context, 67 | uint32_t stackSize, 68 | const char *name) 69 | { 70 | struct NuseFiber *fiber = nuse_fiber_new_from_caller(stackSize, name); 71 | 72 | fiber->func = callback; 73 | fiber->context = context; 74 | return fiber; 75 | } 76 | 77 | void nuse_fiber_start(void *handler) 78 | { 79 | struct NuseFiber *fiber = handler; 80 | int error; 81 | 82 | error = pthread_mutex_init(&fiber->mutex, NULL); 83 | assert(error == 0); 84 | error = pthread_cond_init(&fiber->condvar, NULL); 85 | assert(error == 0); 86 | 87 | error = host_pthread_create(&fiber->pthread, NULL, 88 | fiber->func, fiber->context); 89 | assert(error == 0); 90 | prctl(PR_SET_NAME, fiber->name, 0, 0, 0); 91 | } 92 | 93 | int 94 | nuse_fiber_isself(void *handler) 95 | { 96 | struct NuseFiber *fiber = handler; 97 | 98 | return fiber->pthread == pthread_self(); 99 | } 100 | 101 | void 102 | nuse_fiber_wait(void *handler) 103 | { 104 | struct NuseFiber *fiber = handler; 105 | 106 | pthread_mutex_lock(&fiber->mutex); 107 | pthread_cond_wait(&fiber->condvar, &fiber->mutex); 108 | pthread_mutex_unlock(&fiber->mutex); 109 | } 110 | 111 | int 112 | nuse_fiber_wakeup(void *handler) 113 | { 114 | struct NuseFiber *fiber = handler; 115 | int ret; 116 | pthread_mutex_lock(&fiber->mutex); 117 | ret = pthread_cond_signal(&fiber->condvar); 118 | pthread_mutex_unlock(&fiber->mutex); 119 | 120 | return ret == 0 ? 1 : 0; 121 | } 122 | 123 | void nuse_fiber_stop(void *handler) 124 | { 125 | struct NuseFiber *fiber = handler; 126 | 127 | fiber->canceled = 0; 128 | if (fiber->timerid) 129 | timer_delete(fiber->timerid); 130 | fiber->timerid = NULL; 131 | } 132 | 133 | int nuse_fiber_is_stopped(void *handler) 134 | { 135 | struct NuseFiber *fiber = handler; 136 | 137 | return fiber->canceled; 138 | } 139 | 140 | void nuse_fiber_free(void *handler) 141 | { 142 | struct NuseFiber *fiber = handler; 143 | 144 | pthread_mutex_destroy(&fiber->mutex); 145 | pthread_cond_destroy(&fiber->condvar); 146 | /* pthread_join (fiber->pthread, 0); */ 147 | free(fiber); 148 | } 149 | 150 | 151 | /* hijacked functions */ 152 | int pthread_create(pthread_t *thread, const pthread_attr_t *attr, 153 | void *(*start_routine)(void *), void *arg) 154 | { 155 | int ret = host_pthread_create(thread, attr, start_routine, arg); 156 | struct NuseFiber *fiber; 157 | int error; 158 | 159 | if (ret) 160 | return ret; 161 | 162 | fiber = malloc(sizeof(struct NuseFiber)); 163 | fiber->func = NULL; 164 | fiber->context = NULL; 165 | fiber->pthread = *thread; 166 | fiber->name = "app_thread"; 167 | fiber->canceled = 0; 168 | prctl(PR_SET_NAME, fiber->name, 0, 0, 0); 169 | 170 | 171 | error = pthread_mutex_init(&fiber->mutex, NULL); 172 | assert(error == 0); 173 | error = pthread_cond_init(&fiber->condvar, NULL); 174 | assert(error == 0); 175 | 176 | nuse_task_add(fiber); 177 | return ret; 178 | } 179 | 180 | 181 | struct NuseTimerTrampolineContext { 182 | void *(*callback)(void *arg); 183 | void *context; 184 | timer_t timerid; 185 | }; 186 | 187 | static void 188 | nuse_timer_trampoline(sigval_t context) 189 | { 190 | struct NuseTimerTrampolineContext *ctx = context.sival_ptr; 191 | 192 | ctx->callback(ctx->context); 193 | if (ctx->timerid) 194 | timer_delete(ctx->timerid); 195 | ctx->timerid = NULL; 196 | free(ctx); 197 | } 198 | 199 | void 200 | nuse_add_timer(unsigned long ns, 201 | void *(*func)(void *arg), 202 | void *arg, 203 | void *handler) 204 | { 205 | struct sigevent se; 206 | struct itimerspec new_value; 207 | struct NuseFiber *fiber = handler; 208 | int ret; 209 | 210 | new_value.it_value.tv_sec = ns / (1000 * 1000 * 1000); 211 | new_value.it_value.tv_nsec = ns % (1000 * 1000 * 1000); 212 | new_value.it_interval.tv_sec = 0; 213 | new_value.it_interval.tv_nsec = 0; 214 | 215 | struct NuseTimerTrampolineContext *ctx = 216 | malloc(sizeof(struct NuseTimerTrampolineContext)); 217 | if (!ctx) 218 | return; 219 | ctx->callback = func; 220 | ctx->context = arg; 221 | 222 | memset(&se, 0, sizeof(se)); 223 | se.sigev_value.sival_ptr = ctx; 224 | se.sigev_notify = SIGEV_THREAD; 225 | se.sigev_notify_function = nuse_timer_trampoline; 226 | se.sigev_notify_attributes = NULL; 227 | ret = timer_create(CLOCK_REALTIME, &se, &ctx->timerid); 228 | if (ret) 229 | perror("timer_create"); 230 | ret = timer_settime(ctx->timerid, 0, &new_value, NULL); 231 | if (ret) 232 | perror("timer_settime"); 233 | 234 | fiber->timerid = ctx->timerid; 235 | } 236 | -------------------------------------------------------------------------------- /nuse-vif-netmap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * netmapp based virtual network interface feature for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | * Ryo Nakamura 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | #define NETMAP_WITH_LIBS 27 | #include 28 | 29 | #include "sim-types.h" 30 | #include "sim-assert.h" 31 | #include "nuse-vif.h" 32 | #include "nuse-hostcalls.h" 33 | 34 | 35 | static int vale_rings = 0; 36 | 37 | #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) 38 | 39 | struct nuse_vif_netmap { 40 | int fd; 41 | struct netmap_if *nifp; 42 | }; 43 | 44 | extern struct SimDevicePacket lib_dev_create_packet(struct SimDevice *dev, 45 | int size); 46 | extern void lib_dev_rx(struct SimDevice *device, struct SimDevicePacket packet); 47 | extern void *lib_dev_get_private(struct SimDevice *); 48 | extern void lib_softirq_wakeup(void); 49 | 50 | #define BURST_MAX 1024 51 | 52 | 53 | 54 | static int 55 | netmap_get_nifp(const char *ifname, struct netmap_if **_nifp) 56 | { 57 | int fd; 58 | char *mem; 59 | struct nmreq nmr; 60 | struct netmap_if *nifp; 61 | 62 | /* open netmap for ring */ 63 | 64 | fd = open("/dev/netmap", O_RDWR); 65 | if (fd < 0) { 66 | printf("unable to open /dev/netmap\n"); 67 | return 0; 68 | } 69 | 70 | memset(&nmr, 0, sizeof(nmr)); 71 | strcpy(nmr.nr_name, ifname); 72 | nmr.nr_version = NETMAP_API; 73 | nmr.nr_ringid = 0 | (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 74 | nmr.nr_flags |= NR_REG_ALL_NIC; 75 | 76 | if (ioctl(fd, NIOCREGIF, &nmr) < 0) { 77 | printf("unable to register interface %s\n", ifname); 78 | return 0; 79 | } 80 | 81 | if (vale_rings && strncmp(ifname, "vale", 4) == 0) { 82 | nmr.nr_rx_rings = vale_rings; 83 | nmr.nr_tx_rings = vale_rings; 84 | } 85 | 86 | mem = mmap(NULL, nmr.nr_memsize, 87 | PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 88 | if (mem == MAP_FAILED) { 89 | printf("unable to mmap\n"); 90 | return 0; 91 | } 92 | 93 | nifp = NETMAP_IF(mem, nmr.nr_offset); 94 | *_nifp = nifp; 95 | 96 | return fd; 97 | } 98 | 99 | void 100 | nuse_vif_netmap_read(struct nuse_vif *vif, struct SimDevice *dev) 101 | { 102 | int n; 103 | uint32_t m, rx, cur, size; 104 | struct pollfd x[1]; 105 | struct netmap_slot *rs; 106 | struct netmap_ring *ring; 107 | struct nuse_vif_netmap *netmap = vif->private; 108 | struct netmap_if *nifp = netmap->nifp; 109 | 110 | x[0].fd = netmap->fd; 111 | x[0].events = POLLIN; 112 | 113 | while (1) { 114 | 115 | host_poll(x, 1, -1); 116 | 117 | for (n = 0; n < nifp->ni_rx_rings; n++) { 118 | ring = NETMAP_RXRING(nifp, n); 119 | 120 | if (nm_ring_empty(ring)) 121 | continue; 122 | 123 | m = nm_ring_space(ring); 124 | m = MIN(m, BURST_MAX); 125 | 126 | cur = ring->cur; 127 | 128 | for (rx = 0; rx < m; rx++) { 129 | rs = &ring->slot[cur]; 130 | char *p = NETMAP_BUF(ring, rs->buf_idx); 131 | 132 | size = rs->len; 133 | 134 | struct ethhdr { 135 | unsigned char h_dest[6]; 136 | unsigned char h_source[6]; 137 | uint16_t h_proto; 138 | } *hdr = (struct ethhdr *)p; 139 | 140 | #if DEBUG 141 | printf("proto = 0x%x, dst= %X:%X:%X:%X:%X:%X\n", 142 | ntohs( 143 | hdr->h_proto), 144 | hdr->h_dest[0], hdr->h_dest[1], 145 | hdr->h_dest[2], 146 | hdr->h_dest[3], hdr->h_dest[4], 147 | hdr->h_dest[5]); 148 | #endif 149 | 150 | struct SimDevicePacket packet = 151 | lib_dev_create_packet(dev, size); 152 | 153 | /* XXX: FIXME should not copy */ 154 | memcpy(packet.buffer, hdr, size); 155 | lib_dev_rx(dev, packet); 156 | 157 | cur = nm_ring_next(ring, cur); 158 | } 159 | lib_softirq_wakeup(); 160 | 161 | ring->head = ring->cur = cur; 162 | } 163 | } 164 | } 165 | 166 | void 167 | nuse_vif_netmap_write(struct nuse_vif *vif, struct SimDevice *dev, 168 | unsigned char *data, int len) 169 | { 170 | uint32_t cur; 171 | struct netmap_slot *ts; 172 | struct netmap_ring *ring; 173 | struct nuse_vif_netmap *netmap = vif->private; 174 | struct netmap_if *nifp = netmap->nifp; 175 | 176 | /* XXX make it be parallel */ 177 | ring = NETMAP_TXRING(nifp, 0); 178 | 179 | if (nm_ring_empty(ring)) 180 | return; 181 | 182 | cur = ring->cur; 183 | ts = &ring->slot[cur]; 184 | memcpy(NETMAP_BUF(ring, ts->buf_idx), data, len); 185 | ts->len = len; 186 | 187 | cur = nm_ring_next(ring, cur); 188 | ring->head = ring->cur = cur; 189 | 190 | ioctl(netmap->fd, NIOCTXSYNC, NULL); 191 | } 192 | 193 | void * 194 | nuse_vif_netmap_create(const char *ifname) 195 | { 196 | 197 | struct nuse_vif *vif; 198 | struct nuse_vif_netmap *netmap; 199 | 200 | netmap = (struct nuse_vif_netmap *)malloc(sizeof(*netmap)); 201 | memset(netmap, 0, sizeof(struct nuse_vif_netmap)); 202 | 203 | vif = malloc(sizeof(struct nuse_vif)); 204 | memset(vif, 0, sizeof(struct nuse_vif)); 205 | vif->type = NUSE_VIF_NETMAP; 206 | vif->private = netmap; 207 | 208 | netmap->fd = netmap_get_nifp(ifname, &netmap->nifp); 209 | if (!netmap->fd) { 210 | printf("failed to open netmap for \"%s\"\n", ifname); 211 | free(netmap); 212 | free(vif); 213 | assert(0); 214 | } 215 | 216 | return vif; 217 | } 218 | 219 | void 220 | nuse_vif_netmap_delete(struct nuse_vif *vif) 221 | { 222 | struct nuse_vif_netmap *netmap = vif->private; 223 | 224 | close(netmap->fd); 225 | free(netmap); 226 | free(vif); 227 | } 228 | 229 | static struct nuse_vif_impl nuse_vif_netmap = { 230 | .read = nuse_vif_netmap_read, 231 | .write = nuse_vif_netmap_write, 232 | .create = nuse_vif_netmap_create, 233 | .delete = nuse_vif_netmap_delete, 234 | }; 235 | 236 | extern struct nuse_vif_impl *nuse_vif[NUSE_VIF_MAX]; 237 | 238 | int __attribute__((constructor)) 239 | nuse_vif_netmap_init(void) 240 | { 241 | nuse_vif[NUSE_VIF_NETMAP] = &nuse_vif_netmap; 242 | return 0; 243 | } 244 | -------------------------------------------------------------------------------- /nuse-config.c: -------------------------------------------------------------------------------- 1 | /* 2 | * config file interface for NUSE 3 | * Copyright (c) 2015 Ryo Nakamura 4 | * 5 | * Author: Ryo Nakamura 6 | * Hajime Tazaki 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "nuse-vif.h" 17 | #include "nuse-config.h" 18 | #include "nuse-libc.h" 19 | 20 | static int 21 | strsplit(char *str, char **args, int max) 22 | { 23 | int argc; 24 | char *c; 25 | 26 | for (argc = 0, c = str; *c == ' ' || *c == '\t' || *c == '\n'; c++) 27 | ; 28 | while (*c && argc < max) { 29 | args[argc++] = c; 30 | while (*c && *c > ' ') 31 | c++; 32 | while (*c && *c <= ' ') 33 | *c++ = '\0'; 34 | } 35 | 36 | return argc; 37 | } 38 | 39 | static int 40 | nuse_config_parse_interface(char *line, FILE *fp, struct nuse_config *cf) 41 | { 42 | int ret; 43 | char buf[1024], *p, *args[2]; 44 | struct nuse_vif_config *vifcf; 45 | struct sockaddr_in *sin; 46 | 47 | memset(buf, 0, sizeof(buf)); 48 | 49 | vifcf = (struct nuse_vif_config *) 50 | malloc(sizeof(struct nuse_vif_config)); 51 | 52 | memset(vifcf, 0, sizeof(struct nuse_vif_config)); 53 | vifcf->type = NUSE_VIF_RAWSOCK; /* default */ 54 | strcpy(vifcf->macaddr, "00:00:00:00:00:00"); /* means random */ 55 | 56 | strsplit(line, args, sizeof(args)); 57 | 58 | strncpy(vifcf->ifname, args[1], IFNAMSIZ); 59 | 60 | while ((p = fgets(buf, sizeof(buf), fp)) != NULL) { 61 | 62 | ret = strsplit(buf, args, sizeof(args)); 63 | 64 | if (ret == 0) 65 | /* no item in the line */ 66 | break; 67 | 68 | if (strncmp(args[0], "address", 7) == 0) 69 | strncpy(vifcf->address, args[1], NUSE_ADDR_STRLEN); 70 | if (strncmp(args[0], "netmask", 7) == 0) 71 | strncpy(vifcf->netmask, args[1], NUSE_ADDR_STRLEN); 72 | if (strncmp(args[0], "macaddr", 7) == 0) 73 | strncpy(vifcf->macaddr, args[1], NUSE_MACADDR_STRLEN); 74 | if (strncmp(args[0], "viftype", 7) == 0) { 75 | if (strncmp(args[1], "RAW", 3) == 0) 76 | vifcf->type = NUSE_VIF_RAWSOCK; 77 | else if (strncmp(args[1], "NETMAP", 6) == 0) 78 | vifcf->type = NUSE_VIF_NETMAP; 79 | else if (strncmp(args[1], "TAP", 3) == 0) 80 | vifcf->type = NUSE_VIF_TAP; 81 | else if (strncmp(args[1], "DPDK", 4) == 0) 82 | vifcf->type = NUSE_VIF_DPDK; 83 | else if (strncmp(args[1], "PIPE", 4) == 0) 84 | vifcf->type = NUSE_VIF_PIPE; 85 | else { 86 | printf("invalid vif type %s\n", args[1]); 87 | free(vifcf); 88 | return 0; 89 | } 90 | } 91 | 92 | /* mkfifo path for VIF_PIPE */ 93 | if (strncmp(args[0], "pipepath", 4) == 0) { 94 | strncpy(vifcf->pipepath, args[1], PATH_MAX); 95 | } 96 | } 97 | 98 | /* setup ifreq */ 99 | sin = (struct sockaddr_in *)&vifcf->ifr_vif_addr.ifr_addr; 100 | sin->sin_family = AF_INET; 101 | sin->sin_addr.s_addr = inet_addr(vifcf->address); 102 | 103 | sin = (struct sockaddr_in *)&vifcf->ifr_vif_mask.ifr_netmask; 104 | sin->sin_family = AF_INET; 105 | sin->sin_addr.s_addr = inet_addr(vifcf->netmask); 106 | 107 | /* XXX: ifname attached to nuse process is same as host stck */ 108 | strncpy(vifcf->ifr_vif_addr.ifr_name, vifcf->ifname, IFNAMSIZ); 109 | strncpy(vifcf->ifr_vif_mask.ifr_name, vifcf->ifname, IFNAMSIZ); 110 | 111 | /* reassemble mac address */ 112 | if (sscanf(vifcf->macaddr, "%u:%u:%u:%u:%u:%u", 113 | (unsigned int *)&vifcf->mac[0], 114 | (unsigned int *)&vifcf->mac[1], 115 | (unsigned int *)&vifcf->mac[2], 116 | (unsigned int *)&vifcf->mac[3], 117 | (unsigned int *)&vifcf->mac[4], 118 | (unsigned int *)&vifcf->mac[5]) < 1) { 119 | printf("failed to parse mac address %s\n", vifcf->macaddr); 120 | free(vifcf); 121 | return 0; 122 | } 123 | 124 | cf->vifs[cf->vif_cnt++] = vifcf; 125 | 126 | return 1; 127 | } 128 | 129 | static int 130 | nuse_config_parse_route(char *line, FILE *fp, struct nuse_config *cf) 131 | { 132 | int ret, net, mask, gate; 133 | char buf[1024], *p, *args[2]; 134 | struct nuse_route_config *rtcf; 135 | struct sockaddr_in *sin; 136 | 137 | net = 0; 138 | mask = 0; 139 | gate = 0; 140 | 141 | memset(buf, 0, sizeof(buf)); 142 | 143 | rtcf = (struct nuse_route_config *) 144 | malloc(sizeof(struct nuse_route_config)); 145 | if (!rtcf) 146 | assert(0); 147 | 148 | memset(rtcf, 0, sizeof(struct nuse_route_config)); 149 | 150 | while ((p = fgets(buf, sizeof(buf), fp)) != NULL) { 151 | 152 | ret = strsplit(buf, args, sizeof(args)); 153 | 154 | if (ret == 0) 155 | /* no item in the line */ 156 | break; 157 | 158 | if (strncmp(args[0], "network", 7) == 0) { 159 | strncpy(rtcf->network, args[1], NUSE_ADDR_STRLEN); 160 | net = 1; 161 | } 162 | if (strncmp(args[0], "netmask", 7) == 0) { 163 | strncpy(rtcf->netmask, args[1], NUSE_ADDR_STRLEN); 164 | mask = 1; 165 | } 166 | if (strncmp(args[0], "gateway", 7) == 0) { 167 | strncpy(rtcf->gateway, args[1], NUSE_ADDR_STRLEN); 168 | gate = 1; 169 | } 170 | 171 | if (net && mask && gate) 172 | break; 173 | } 174 | 175 | if (!net) 176 | printf("network is not configured !\n"); 177 | if (!mask) 178 | printf("netmask is not configured !\n"); 179 | if (!gate) 180 | printf("netmask is not configured !\n"); 181 | 182 | if (!net || !mask || !gate) { 183 | free (rtcf); 184 | return 0; 185 | } 186 | 187 | /* setup rtentry */ 188 | sin = (struct sockaddr_in *)&rtcf->route.rt_dst; 189 | sin->sin_family = AF_INET; 190 | sin->sin_addr.s_addr = inet_addr(rtcf->network); 191 | 192 | sin = (struct sockaddr_in *)&rtcf->route.rt_genmask; 193 | sin->sin_family = AF_INET; 194 | sin->sin_addr.s_addr = inet_addr(rtcf->netmask); 195 | 196 | sin = (struct sockaddr_in *)&rtcf->route.rt_gateway; 197 | sin->sin_family = AF_INET; 198 | sin->sin_addr.s_addr = inet_addr(rtcf->gateway); 199 | 200 | rtcf->route.rt_flags = RTF_UP | RTF_GATEWAY; 201 | rtcf->route.rt_metric = 0; 202 | 203 | cf->routes[cf->route_cnt++] = rtcf; 204 | 205 | return 1; 206 | } 207 | 208 | 209 | int 210 | nuse_config_parse(struct nuse_config *cf, char *cfname) 211 | { 212 | FILE *fp; 213 | int ret = 0; 214 | char buf[1024]; 215 | 216 | memset(cf, 0, sizeof(struct nuse_config)); 217 | fp = fopen(cfname, "r"); 218 | if (fp == NULL) { 219 | perror("fopen"); 220 | return 0; 221 | } 222 | 223 | while (fgets(buf, sizeof(buf), fp) != NULL) { 224 | if (strncmp(buf, "interface ", 10) == 0) 225 | ret = nuse_config_parse_interface(buf, fp, cf); 226 | else if (strncmp(buf, "route", 5) == 0) 227 | ret = nuse_config_parse_route(buf, fp, cf); 228 | else 229 | continue; 230 | if (!ret) 231 | break; 232 | } 233 | 234 | fclose(fp); 235 | 236 | return ret; 237 | } 238 | 239 | void 240 | nuse_config_free(struct nuse_config *cf) 241 | { 242 | int n; 243 | 244 | for (n = 0; n < cf->vif_cnt; n++) 245 | free(cf->vifs[n]); 246 | 247 | for (n = 0; n < cf->route_cnt; n++) 248 | free(cf->routes[n]); 249 | } 250 | 251 | -------------------------------------------------------------------------------- /nuse-vif-dpdk.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Intel DPDK based virtual network interface feature for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Ryo Nakamura 6 | * Hajime Tazaki 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include "sim-types.h" 29 | #include "nuse-vif.h" 30 | 31 | 32 | #define PE(_fmt, ...) { \ 33 | while (1) { \ 34 | printf("%s:%d: " _fmt "\n", \ 35 | __func__, __LINE__, ## __VA_ARGS__); \ 36 | goto exit; \ 37 | } \ 38 | } 39 | 40 | #define PO(_fmt, ...) { \ 41 | while (1) { \ 42 | printf("%s:%d: " _fmt "\n", \ 43 | __func__, __LINE__, ## __VA_ARGS__); \ 44 | } \ 45 | } 46 | 47 | static int dpdk_init = 0; 48 | 49 | static const char *ealargs[] = { 50 | "nuse_vif_dpdk", 51 | "-c 1", 52 | "-n 1", 53 | }; 54 | 55 | 56 | extern struct SimDevicePacket lib_dev_create_packet(struct SimDevice *dev, 57 | int size); 58 | extern void lib_dev_rx(struct SimDevice *device, 59 | struct SimDevicePacket packet); 60 | extern void *lib_dev_get_private(struct SimDevice *); 61 | extern void lib_softirq_wakeup(void); 62 | 63 | static const struct rte_eth_rxconf rxconf = { 64 | .rx_thresh = { 65 | .pthresh = 1, 66 | .hthresh = 1, 67 | .wthresh = 1, 68 | }, 69 | }; 70 | 71 | static const struct rte_eth_txconf txconf = { 72 | .tx_thresh = { 73 | .pthresh = 1, 74 | .hthresh = 1, 75 | .wthresh = 1, 76 | }, 77 | .tx_rs_thresh = 1, 78 | }; 79 | 80 | #define MAX_PKT_BURST 16 81 | #define MEMPOOL_CACHE_SZ 32 82 | #define MAX_PACKET_SZ 2048 83 | #define MBUF_NUM 512 84 | #define MBUF_SIZ \ 85 | (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 86 | 87 | #define NUMDESC 256 88 | #define NUMQUEUE 1 89 | #define PORTID (dpdk->portid) 90 | 91 | 92 | struct nuse_vif_dpdk { 93 | 94 | int portid; 95 | struct rte_mempool *rxpool, *txpool; /* rin buffer pool */ 96 | 97 | char txpoolname[16], rxpoolname[16]; 98 | 99 | /* burst receive context by rump dpdk code */ 100 | struct rte_mbuf *rms[MAX_PKT_BURST]; 101 | int npkts; 102 | int bufidx; 103 | }; 104 | 105 | 106 | static inline void 107 | deliverframes(struct nuse_vif_dpdk *dpdk, struct SimDevice *dev) 108 | { 109 | void *data; 110 | uint32_t size; 111 | struct rte_mbuf *rm, *rm0; 112 | 113 | rm0 = dpdk->rms[dpdk->bufidx]; 114 | dpdk->npkts--; 115 | dpdk->bufidx++; 116 | 117 | for (rm = rm0; rm; rm = rm->pkt.next) { 118 | struct SimDevicePacket packet; 119 | 120 | data = rte_pktmbuf_mtod(rm, void *); 121 | size = rte_pktmbuf_data_len(rm); 122 | 123 | packet = lib_dev_create_packet(dev, size); 124 | memcpy(packet.buffer, data, size); 125 | lib_dev_rx(dev, packet); 126 | } 127 | 128 | lib_softirq_wakeup(); 129 | } 130 | 131 | void 132 | nuse_vif_dpdk_read(struct nuse_vif *vif, struct SimDevice *dev) 133 | { 134 | struct nuse_vif_dpdk *dpdk = vif->private; 135 | 136 | while (1) { 137 | /* this burst receive is inspired by RUMP dpdk */ 138 | 139 | /* there are packets which are not delivered to SimDevice */ 140 | if (dpdk->npkts > 0) 141 | while (dpdk->npkts > 0) 142 | deliverframes(dpdk, dev); 143 | 144 | /* there is no undelivered packet. Check new packets. */ 145 | if (dpdk->npkts == 0) { 146 | dpdk->npkts = rte_eth_rx_burst(PORTID, 0, dpdk->rms, 147 | MAX_PKT_BURST); 148 | dpdk->bufidx = 0; 149 | } 150 | 151 | /* XXX: busy wait ??? */ 152 | } 153 | } 154 | 155 | void 156 | nuse_vif_dpdk_write(struct nuse_vif *vif, struct SimDevice *dev, 157 | unsigned char *data, int len) 158 | { 159 | void *pkt; 160 | struct rte_mbuf *rm; 161 | struct nuse_vif_dpdk *dpdk = vif->private; 162 | 163 | rm = rte_pktmbuf_alloc(dpdk->txpool); 164 | pkt = rte_pktmbuf_append(rm, len); 165 | memcpy(pkt, data, len); 166 | 167 | rte_eth_tx_burst(PORTID, 0, &rm, 1); 168 | /* XXX: should be bursted !! */ 169 | } 170 | 171 | 172 | static int 173 | dpdk_if_init(struct nuse_vif_dpdk *dpdk) 174 | { 175 | int ret = 0; 176 | struct rte_eth_conf portconf; 177 | struct rte_eth_link link; 178 | 179 | if (!dpdk_init) { 180 | ret = rte_eal_init(sizeof(ealargs) / sizeof(ealargs[0]), 181 | (void *)(uintptr_t)ealargs); 182 | if (ret < 0) 183 | PE("failed to initialize eal"); 184 | 185 | ret = -EINVAL; 186 | 187 | ret = rte_eal_pci_probe(); 188 | if (ret < 0) 189 | PE("eal pci probe failed"); 190 | 191 | dpdk_init = 1; 192 | } 193 | 194 | dpdk->txpool = 195 | rte_mempool_create(dpdk->txpoolname, 196 | MBUF_NUM, MBUF_SIZ, MEMPOOL_CACHE_SZ, 197 | sizeof(struct rte_pktmbuf_pool_private), 198 | rte_pktmbuf_pool_init, NULL, 199 | rte_pktmbuf_init, NULL, 0, 0); 200 | 201 | if (dpdk->txpool == NULL) 202 | PE("failed to allocate tx pool"); 203 | 204 | 205 | dpdk->rxpool = 206 | rte_mempool_create(dpdk->rxpoolname, MBUF_NUM, MBUF_SIZ, 0, 207 | sizeof(struct rte_pktmbuf_pool_private), 208 | rte_pktmbuf_pool_init, NULL, 209 | rte_pktmbuf_init, NULL, 0, 0); 210 | 211 | if (dpdk->rxpool == NULL) 212 | PE("failed to allocate rx pool"); 213 | 214 | 215 | memset(&portconf, 0, sizeof(portconf)); 216 | ret = rte_eth_dev_configure(PORTID, NUMQUEUE, NUMQUEUE, &portconf); 217 | if (ret < 0) 218 | PE("failed to configure port"); 219 | 220 | 221 | ret = rte_eth_rx_queue_setup(PORTID, 0, NUMDESC, 0, &rxconf, 222 | dpdk->rxpool); 223 | 224 | if (ret < 0) 225 | PE("failed to setup rx queue"); 226 | 227 | ret = rte_eth_tx_queue_setup(PORTID, 0, NUMDESC, 0, &txconf); 228 | if (ret < 0) 229 | PE("failed to setup tx queue"); 230 | 231 | ret = rte_eth_dev_start(PORTID); 232 | if (ret < 0) 233 | PE("failed to start device"); 234 | 235 | rte_eth_link_get(PORTID, &link); 236 | if (!link.link_status) 237 | PO("interface state is down"); 238 | 239 | /* should be promisc ? */ 240 | rte_eth_promiscuous_enable(PORTID); 241 | 242 | exit: 243 | return ret; 244 | } 245 | 246 | void * 247 | nuse_vif_dpdk_create(const char *ifname) 248 | { 249 | struct nuse_vif *vif; 250 | struct nuse_vif_dpdk *dpdk; 251 | 252 | dpdk = malloc(sizeof(struct nuse_vif_dpdk)); 253 | if (sscanf(ifname, "dpdk%d", &dpdk->portid) <= 0) { 254 | free(dpdk); 255 | PO("ifname failure %s", ifname); 256 | return NULL; 257 | } 258 | snprintf(dpdk->txpoolname, 16, "%s%s", "tx", ifname); 259 | snprintf(dpdk->rxpoolname, 16, "%s%s", "rx", ifname); 260 | 261 | if (dpdk_if_init(dpdk) < 0) { 262 | free(dpdk); 263 | PO("failed to init dpdk interface"); 264 | return NULL; 265 | } 266 | 267 | 268 | vif = malloc(sizeof(struct nuse_vif)); 269 | vif->type = NUSE_VIF_DPDK; 270 | vif->private = dpdk; 271 | 272 | return (void *)vif; 273 | } 274 | 275 | void 276 | nuse_vif_dpdk_delete(struct nuse_vif *vif) 277 | { 278 | struct nuse_vif_dpdk *dpdk = vif->private; 279 | 280 | /* XXX: how to close dpdk...? */ 281 | 282 | free(dpdk); 283 | free(vif); 284 | } 285 | 286 | static struct nuse_vif_impl nuse_vif_dpdk = { 287 | .read = nuse_vif_dpdk_read, 288 | .write = nuse_vif_dpdk_write, 289 | .create = nuse_vif_dpdk_create, 290 | .delete = nuse_vif_dpdk_delete, 291 | }; 292 | 293 | 294 | extern struct nuse_vif_impl *nuse_vif[NUSE_VIF_MAX]; 295 | 296 | int __attribute__((constructor)) 297 | nuse_vif_dpdk_init(void) 298 | { 299 | nuse_vif[NUSE_VIF_DPDK] = &nuse_vif_dpdk; 300 | return 0; 301 | } 302 | -------------------------------------------------------------------------------- /nuse.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Network stack in userspace (NUSE) for POSIX userspace backend 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | * Ryo Nakamura 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "list.h" /* linked-list */ 20 | 21 | #include "sim-init.h" 22 | #include "sim-assert.h" 23 | #include "sim.h" 24 | #include "nuse.h" 25 | #include "nuse-hostcalls.h" 26 | #include "nuse-vif.h" 27 | #include "nuse-config.h" 28 | #include "nuse-libc.h" 29 | 30 | struct SimTask; 31 | struct SimExported *g_exported = NULL; 32 | 33 | struct NuseTask { 34 | struct list_head head; 35 | struct SimTask *s_task; 36 | }; 37 | 38 | int nuse_socket(int domain, int type, int protocol); 39 | int nuse_ioctl(int fd, int request, ...); 40 | int nuse_close(int fd); 41 | 42 | int nuse_vprintf(struct SimKernel *kernel, const char *str, va_list args) 43 | { 44 | return vprintf(str, args); 45 | } 46 | void *nuse_malloc(struct SimKernel *kernel, unsigned long size) 47 | { 48 | return malloc(size); 49 | } 50 | void nuse_free(struct SimKernel *kernel, void *buffer) 51 | { 52 | return free(buffer); 53 | } 54 | 55 | void *nuse_memcpy(struct SimKernel *kernel, void *dst, const void *src, 56 | unsigned long size) 57 | { 58 | return memcpy(dst, src, size); 59 | } 60 | void *nuse_memset(struct SimKernel *kernel, void *dst, char value, 61 | unsigned long size) 62 | { 63 | return memset(dst, value, size); 64 | } 65 | __u64 nuse_current_ns(struct SimKernel *kernel) 66 | { 67 | struct timespec tp; 68 | 69 | clock_gettime(CLOCK_MONOTONIC, &tp); 70 | return tp.tv_sec * 1000000000 + tp.tv_nsec; 71 | } 72 | unsigned long nuse_random(struct SimKernel *kernel) 73 | { 74 | return random(); 75 | } 76 | char *nuse_getenv(struct SimKernel *kernel, const char *name) 77 | { 78 | return host_getenv(name); 79 | } 80 | int nuse_fclose(struct SimKernel *kernel, FILE *fp) 81 | { 82 | return host_fclose(fp); 83 | } 84 | size_t nuse_fwrite(struct SimKernel *kernel, const void *ptr, 85 | size_t size, size_t nmemb, FILE *stream) 86 | { 87 | return host_fwrite(ptr, size, nmemb, stream); 88 | } 89 | int nuse_access(struct SimKernel *kernel, const char *pathname, int mode) 90 | { 91 | return host_access(pathname, mode); 92 | } 93 | int atexit(void (*function)(void)) 94 | { 95 | /* XXX: need to handle host_atexit, but can't dynamically resolv 96 | the symbol so, ignore it for the time being */ 97 | return 0; 98 | } 99 | 100 | static struct NuseTask *g_nuse_main_ctx = NULL; 101 | struct list_head g_task_lists = LIST_HEAD_INIT(g_task_lists); 102 | 103 | struct SimTask *nuse_task_current(struct SimKernel *kernel) 104 | { 105 | struct NuseTask *task; 106 | void *fiber; 107 | 108 | list_for_each_entry(task, &g_task_lists, head) { 109 | void *fiber = g_exported->task_get_private(task->s_task); 110 | if (fiber && nuse_fiber_isself(fiber)) { 111 | return task->s_task; 112 | } 113 | } 114 | 115 | if (!g_nuse_main_ctx) { 116 | fiber = nuse_fiber_new_from_caller(1 << 16, "init"); 117 | g_nuse_main_ctx = malloc(sizeof(struct NuseTask)); 118 | g_nuse_main_ctx->s_task = g_exported->task_create(fiber, getpid()); 119 | list_add_tail(&g_nuse_main_ctx->head, &g_task_lists); 120 | } 121 | return g_nuse_main_ctx->s_task; 122 | } 123 | 124 | struct NuseTaskTrampolineContext { 125 | void (*callback)(void *); 126 | void *context; 127 | struct NuseTask *task; 128 | }; 129 | 130 | void 131 | nuse_task_add(void *fiber) 132 | { 133 | struct NuseTask *task = malloc(sizeof(struct NuseTask)); 134 | task->s_task = g_exported->task_create(fiber, getpid()); 135 | 136 | list_add_tail(&task->head, &g_task_lists); 137 | } 138 | 139 | static void *nuse_task_start_trampoline(void *context) 140 | { 141 | /* we use this trampoline solely for the purpose of executing 142 | lib_update_jiffies. prior to calling the callback. */ 143 | struct NuseTaskTrampolineContext *ctx = context; 144 | int found = 0; 145 | struct NuseTask *task; 146 | 147 | void (*callback)(void *); 148 | void *callback_context; 149 | 150 | list_for_each_entry(task, &g_task_lists, head) { 151 | if (g_exported->task_get_private(task->s_task) == 152 | g_exported->task_get_private(ctx->task->s_task)) { 153 | found = 1; 154 | break; 155 | } 156 | } 157 | if (!found) { 158 | printf("task not found\n"); 159 | return NULL; 160 | } 161 | 162 | if (nuse_fiber_is_stopped(g_exported->task_get_private(ctx->task->s_task))) { 163 | lib_free(ctx); 164 | lib_update_jiffies(); 165 | lib_printf("canceled\n"); 166 | return NULL; 167 | } 168 | 169 | callback = ctx->callback; 170 | callback_context = ctx->context; 171 | task = ctx->task; 172 | lib_free(ctx); 173 | lib_update_jiffies(); 174 | 175 | callback(callback_context); 176 | 177 | /* nuse_fiber_free (task->private); */ 178 | if (task->head.prev != LIST_POISON2) 179 | list_del(&task->head); 180 | free(task); 181 | 182 | return ctx; 183 | } 184 | 185 | struct SimTask *nuse_task_start(struct SimKernel *kernel, 186 | void (*callback) (void *), void *context) 187 | { 188 | struct NuseTask *task = NULL; 189 | struct NuseTaskTrampolineContext *ctx = 190 | lib_malloc(sizeof(struct NuseTaskTrampolineContext)); 191 | 192 | if (!ctx) 193 | return NULL; 194 | ctx->callback = callback; 195 | ctx->context = context; 196 | 197 | void *fiber = nuse_fiber_new(&nuse_task_start_trampoline, ctx, 1 << 16, 198 | "task"); 199 | task = malloc(sizeof(struct NuseTask)); 200 | task->s_task = g_exported->task_create(fiber, getpid()); 201 | ctx->task = task; 202 | 203 | if (!nuse_fiber_is_stopped(g_exported->task_get_private(task->s_task))) 204 | list_add_tail(&task->head, &g_task_lists); 205 | 206 | nuse_fiber_start(fiber); 207 | return task->s_task; 208 | } 209 | 210 | void *nuse_event_schedule_ns(struct SimKernel *kernel, 211 | __u64 ns, void (*fn) (void *context), void *context, 212 | void (*dummy_fn)(void)) 213 | { 214 | struct NuseTask *task = NULL; 215 | struct NuseTaskTrampolineContext *ctx = 216 | lib_malloc(sizeof(struct NuseTaskTrampolineContext)); 217 | void *fiber; 218 | 219 | if (!ctx) 220 | return NULL; 221 | ctx->callback = fn; 222 | ctx->context = context; 223 | 224 | /* without fiber_start (pthread) */ 225 | fiber = nuse_fiber_new_from_caller(1 << 16, "task_sched"); 226 | task = malloc(sizeof(struct NuseTask)); 227 | task->s_task = g_exported->task_create(fiber, getpid()); 228 | ctx->task = task; 229 | 230 | list_add_tail(&task->head, &g_task_lists); 231 | 232 | nuse_add_timer(ns, nuse_task_start_trampoline, ctx, fiber); 233 | 234 | return task; 235 | } 236 | 237 | void nuse_event_cancel(struct SimKernel *kernel, void *event) 238 | { 239 | struct NuseTask *task = event; 240 | 241 | nuse_fiber_stop(g_exported->task_get_private(task->s_task)); 242 | /* nuse_fiber_free (task->private); */ 243 | if (task->head.prev != LIST_POISON2) 244 | list_del(&task->head); 245 | } 246 | 247 | void nuse_task_wait(struct SimKernel *kernel) 248 | { 249 | struct SimTask *task; 250 | 251 | task = nuse_task_current(NULL); 252 | lib_assert(task != NULL); 253 | nuse_fiber_wait(g_exported->task_get_private(task)); 254 | lib_update_jiffies(); 255 | } 256 | 257 | int nuse_task_wakeup(struct SimKernel *kernel, struct SimTask *task) 258 | { 259 | return nuse_fiber_wakeup(g_exported->task_get_private(task)); 260 | } 261 | 262 | void * 263 | nuse_netdev_rx_trampoline(void *context) 264 | { 265 | struct SimDevice *dev = context; 266 | struct nuse_vif *vif = g_exported->dev_get_private(dev); 267 | 268 | nuse_vif_read(vif, dev); 269 | printf("should not reach here %s\n", __func__); 270 | /* should not reach */ 271 | return dev; 272 | } 273 | 274 | void 275 | nuse_dev_rx(struct SimDevice *dev, char *buf, int size) 276 | { 277 | #ifdef DEBUG 278 | struct ethhdr { 279 | unsigned char h_dest[6]; 280 | unsigned char h_source[6]; 281 | uint16_t h_proto; 282 | } *hdr = (struct ethhdr *)buf; 283 | #endif 284 | 285 | struct SimDevicePacket packet = g_exported->dev_create_packet(dev, size); 286 | /* XXX: FIXME should not copy */ 287 | memcpy(packet.buffer, buf, size); 288 | g_exported->dev_rx(dev, packet); 289 | lib_softirq_wakeup(); 290 | } 291 | 292 | void 293 | nuse_dev_xmit(struct SimKernel *kernel, struct SimDevice *dev, 294 | unsigned char *data, int len) 295 | { 296 | struct nuse_vif *vif = g_exported->dev_get_private(dev); 297 | 298 | nuse_vif_write(vif, dev, data, len); 299 | lib_softirq_wakeup(); 300 | } 301 | 302 | void nuse_signal_raised(struct SimKernel *kernel, struct SimTask *task, int sig) 303 | { 304 | static int logged = 0; 305 | 306 | if (!logged) { 307 | lib_printf("%s: Not implemented yet\n", __func__); 308 | logged = 1; 309 | } 310 | } 311 | 312 | void 313 | nuse_poll_event(int flag, void *context) 314 | { 315 | pthread_cond_t *condvar; 316 | int ret; 317 | 318 | condvar = (pthread_cond_t *)context; 319 | ret = pthread_cond_signal(condvar); 320 | if (ret != 0) 321 | perror("pthread_cond_signal"); 322 | } 323 | 324 | void 325 | nuse_netdev_lo_up(void) 326 | { 327 | int err; 328 | static int init_loopback = 0; 329 | struct ifreq ifr; 330 | 331 | /* loopback IFF_UP */ 332 | if (!init_loopback) { 333 | memset(&ifr, 0, sizeof(struct ifreq)); 334 | ifr.ifr_flags = IFF_UP; 335 | sprintf(ifr.ifr_name, "lo"); 336 | int sock = nuse_socket(PF_INET, SOCK_DGRAM, 0); 337 | err = nuse_ioctl(sock, SIOCSIFFLAGS, &ifr); 338 | if (err) 339 | printf("err devinet_ioctl %d\n", err); 340 | init_loopback = 1; 341 | nuse_close(sock); 342 | } 343 | } 344 | 345 | void 346 | nuse_netdev_create(struct nuse_vif_config *vifcf) 347 | { 348 | /* create net_device for nuse process from nuse_vif_config */ 349 | int err; 350 | struct nuse_vif *vif; 351 | struct ifreq ifr; 352 | struct NuseTask *task = NULL; 353 | void *fiber; 354 | int sock; 355 | struct SimDevice *dev; 356 | 357 | printf("create vif %s\n", vifcf->ifname); 358 | printf(" address = %s\n", vifcf->address); 359 | printf(" netmask = %s\n", vifcf->netmask); 360 | printf(" macaddr = %s\n", vifcf->macaddr); 361 | printf(" type = %d\n", vifcf->type); 362 | 363 | if (vifcf->type == NUSE_VIF_PIPE) { 364 | printf(" path = %s\n", vifcf->pipepath); 365 | vif = nuse_vif_create(vifcf->type, vifcf->pipepath); 366 | } else { 367 | vif = nuse_vif_create(vifcf->type, vifcf->ifname); 368 | } 369 | 370 | if (!vif) { 371 | printf("vif create error\n"); 372 | lib_assert(0); 373 | } 374 | 375 | /* create new new_device */ 376 | dev = g_exported->dev_create(vifcf->ifname, vif, 0); 377 | 378 | /* assign new hw address */ 379 | if (vifcf->mac[0] == 0 && vifcf->mac[1] == 0 && vifcf->mac[2] == 0 && 380 | vifcf->mac[3] == 0 && vifcf->mac[4] == 0 && vifcf->mac[5] == 0) { 381 | /* eth_random_addr like */ 382 | long int mac = random(); 383 | memcpy(&vifcf->mac[2], &mac, sizeof(long int)); 384 | vifcf->mac[0] &= 0xfe; /* clear multicast bit */ 385 | vifcf->mac[0] |= 0x02; /* set local assignment bit (IEEE802) */ 386 | 387 | printf(" mac address for %s is randomized ", vifcf->ifname); 388 | printf("%02x:%02x:%02x:%02x:%02x:%02x\n", 389 | vifcf->mac[0], vifcf->mac[1], vifcf->mac[2], 390 | vifcf->mac[3], vifcf->mac[4], vifcf->mac[5]); 391 | } 392 | g_exported->dev_set_address(dev, vifcf->mac); 393 | 394 | /* assign IPv4 address */ 395 | /* XXX: ifr_name is already filed by nuse_config_parse_interface, 396 | I don't know why, but vifcf->ifr_vif_addr.ifr_name is NULL here. */ 397 | strcpy(vifcf->ifr_vif_addr.ifr_name, vifcf->ifname); 398 | 399 | sock = nuse_socket(PF_INET, SOCK_DGRAM, 0); 400 | err = nuse_ioctl(sock, SIOCSIFADDR, &vifcf->ifr_vif_addr); 401 | if (err) { 402 | perror("ioctl"); 403 | printf("err ioctl for assign address %s for %s %d\n", 404 | vifcf->address, vifcf->ifname, err); 405 | } 406 | 407 | /* set netmask */ 408 | err = nuse_ioctl(sock, SIOCSIFNETMASK, &vifcf->ifr_vif_mask); 409 | if (err) { 410 | perror("ioctl"); 411 | printf("err ioctl for assign netmask %s for %s %d\n", 412 | vifcf->netmask, vifcf->ifname, err); 413 | } 414 | 415 | /* IFF_UP */ 416 | memset(&ifr, 0, sizeof(ifr)); 417 | ifr.ifr_flags = IFF_UP; 418 | strncpy(ifr.ifr_name, vifcf->ifname, IFNAMSIZ); 419 | 420 | err = nuse_ioctl(sock, SIOCSIFFLAGS, &ifr); 421 | if (err) { 422 | perror("devinet_ioctl"); 423 | printf("err devinet_ioctl to set ifup dev %s %d\n", 424 | vifcf->ifname, err); 425 | } 426 | 427 | /* wait for packets */ 428 | fiber = nuse_fiber_new(&nuse_netdev_rx_trampoline, dev, 429 | 1 << 16, "NET_RX"); 430 | task = malloc(sizeof(struct NuseTask)); 431 | task->s_task = g_exported->task_create(fiber, getpid()); 432 | list_add_tail(&task->head, &g_task_lists); 433 | nuse_fiber_start(fiber); 434 | } 435 | 436 | void 437 | nuse_route_install(struct nuse_route_config *rtcf) 438 | { 439 | int err, sock; 440 | 441 | sock = nuse_socket(PF_INET, SOCK_DGRAM, 0); 442 | err = nuse_ioctl(sock, SIOCADDRT, &rtcf->route); 443 | if (err) 444 | printf("err ip_rt_ioctl to add route to %s via %s %d\n", 445 | rtcf->network, rtcf->gateway, err); 446 | nuse_close(sock); 447 | 448 | } 449 | 450 | extern void lib_init(struct SimExported *exported, 451 | const struct SimImported *imported, 452 | struct SimKernel *kernel); 453 | 454 | void __attribute__((constructor)) 455 | nuse_init(void) 456 | { 457 | int n; 458 | char *config; 459 | struct nuse_config cf; 460 | 461 | nuse_hostcall_init(); 462 | nuse_set_affinity(); 463 | 464 | /* create descriptor table */ 465 | memset(nuse_fd_table, 0, sizeof(nuse_fd_table)); 466 | nuse_fd_table[1].real_fd = 1; 467 | nuse_fd_table[2].real_fd = 2; 468 | 469 | /* are those rump hypercalls? */ 470 | struct SimImported *imported = malloc(sizeof(struct SimImported)); 471 | memset(imported, 0, sizeof(struct SimImported)); 472 | imported->vprintf = nuse_vprintf; 473 | imported->malloc = nuse_malloc; 474 | imported->free = nuse_free; 475 | imported->memcpy = nuse_memcpy; 476 | imported->memset = nuse_memset; 477 | imported->atexit = NULL; /* not implemented */ 478 | imported->access = nuse_access; 479 | imported->getenv = nuse_getenv; 480 | imported->mkdir = NULL; /* not implemented */ 481 | /* it's not hypercall, but just a POSIX glue ? */ 482 | imported->open = NULL; /* not used */ 483 | imported->__fxstat = NULL; /* not implemented */ 484 | imported->fseek = NULL; /* not implemented */ 485 | imported->setbuf = NULL; /* not implemented */ 486 | imported->ftell = NULL; /* not implemented */ 487 | imported->fdopen = NULL; /* not implemented */ 488 | imported->fread = NULL; /* not implemented */ 489 | imported->fwrite = nuse_fwrite; 490 | imported->fclose = nuse_fclose; 491 | imported->random = nuse_random; 492 | imported->event_schedule_ns = nuse_event_schedule_ns; 493 | imported->event_cancel = nuse_event_cancel; 494 | imported->current_ns = nuse_current_ns; 495 | imported->task_start = nuse_task_start; 496 | imported->task_wait = nuse_task_wait; 497 | imported->task_current = nuse_task_current; 498 | imported->task_wakeup = nuse_task_wakeup; 499 | imported->task_yield = NULL; /* not implemented */ 500 | imported->dev_xmit = nuse_dev_xmit; 501 | imported->signal_raised = nuse_signal_raised; 502 | imported->poll_event = nuse_poll_event; 503 | 504 | g_exported = malloc(sizeof(struct SimExported)); 505 | lib_init (g_exported, imported, NULL); 506 | 507 | /* loopback IFF_UP * / */ 508 | nuse_netdev_lo_up(); 509 | 510 | srand(time(NULL)); /* for mac address randomization */ 511 | 512 | /* read and parse a config file */ 513 | config = host_getenv("NUSECONF"); 514 | if (config == NULL) 515 | printf("config file is not specified\n"); 516 | else { 517 | if (!nuse_config_parse(&cf, config)) { 518 | printf("parse config file failed\n"); 519 | lib_assert(0); 520 | } 521 | 522 | /* create netdevs specified by config file */ 523 | for (n = 0; n < cf.vif_cnt; n++) 524 | nuse_netdev_create(cf.vifs[n]); 525 | 526 | /* setup route entries */ 527 | for (n = 0; n < cf.route_cnt; n++) 528 | nuse_route_install(cf.routes[n]); 529 | } 530 | 531 | /* now it's ready to accept IPC */ 532 | nuse_syscall_proxy_init(); 533 | } 534 | 535 | void __attribute__((destructor)) 536 | nuse_exit(void) 537 | { 538 | printf("finishing NUSE\n"); 539 | nuse_syscall_proxy_exit(); 540 | } 541 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /nuse-syscalls.c: -------------------------------------------------------------------------------- 1 | /* 2 | * System call table for NUSE/x86-64, copied from arch/x86/um/syscall_*.c 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include /* XXX: for rfork flags */ 13 | #include 14 | #include 15 | #include "generated/utsrelease.h" 16 | #include "generated/compile.h" 17 | 18 | 19 | #define RUMPSERVER_DEFAULT "/tmp/rump-server-nuse" 20 | 21 | 22 | /* XXX: should be auto-generated by make KBUILD_SRC= ARCH=x86 archheaders */ 23 | #if 0 24 | /* #include */ 25 | #undef __SYSCALL_64 26 | #define __SYSCALL_64(nr, sym, compat) [nr] = sym, 27 | #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) 28 | 29 | typedef void (*sys_call_ptr_t)(void); 30 | 31 | struct nuse_sys_call_t { 32 | int syscall_num; 33 | void *syscall_handler; 34 | }; 35 | 36 | const struct nuse_sys_call_t sys_call_table[] = { 37 | {0, read}, 38 | {1, write}, 39 | {2, open}, 40 | {3, close}, 41 | {7, poll}, 42 | {16, ioctl}, 43 | {23, select}, 44 | {41, socket}, 45 | {42, connect}, 46 | {43, accept}, 47 | {44, sendto}, 48 | {45, recvfrom}, 49 | {46, sendmsg}, 50 | {47, recvmsg}, 51 | {49, bind}, 52 | {50, listen}, 53 | {51, getsockname}, 54 | {52, getpeername}, 55 | {54, setsockopt}, 56 | {55, getsockopt}, 57 | {213, epoll_create}, 58 | {232, epoll_wait}, 59 | {233, epoll_ctl}, 60 | }; 61 | #endif 62 | 63 | 64 | /* XXX */ 65 | //#include 66 | struct user_msghdr { 67 | void *msg_name; /* ptr to socket address structure */ 68 | int msg_namelen; /* size of socket address structure */ 69 | struct iovec *msg_iov; /* scatter/gather array */ 70 | size_t msg_iovlen; /* # elements in msg_iov */ 71 | void *msg_control; /* ancillary data */ 72 | size_t msg_controllen; /* ancillary data buffer length */ 73 | unsigned int msg_flags; /* flags on received message */ 74 | }; 75 | 76 | /* XXX (see rumpuser_sp.c) */ 77 | extern __thread struct spclient *th_spc; 78 | struct lwp; /* dummy */ 79 | 80 | struct user_msghdr; 81 | struct epoll_event; 82 | typedef unsigned int nfds_t; 83 | 84 | int nuse_socket(int domain, int type, int protocol); 85 | int nuse_close(int fd); 86 | ssize_t nuse_recvmsg(int fd, const struct user_msghdr *msghdr, int flags); 87 | ssize_t nuse_sendmsg(int fd, const struct user_msghdr *msghdr, int flags); 88 | int nuse_sendmmsg(int fd, struct mmsghdr *msgvec, unsigned int vlen, 89 | unsigned int flags); 90 | int nuse_recvmmsg(int fd, struct mmsghdr *msgvec, unsigned int vlen, 91 | unsigned int flags, struct timespec *timeout); 92 | int nuse_getsockname(int fd, struct sockaddr *name, socklen_t *namelen); 93 | int nuse_getpeername(int fd, struct sockaddr *name, socklen_t *namelen); 94 | int nuse_bind(int fd, struct sockaddr *name, int namelen); 95 | int nuse_connect(int fd, const struct sockaddr *v1, int v2); 96 | int nuse_listen(int fd, int v1); 97 | int nuse_accept4(int fd, struct sockaddr *addr, int *addrlen, int flags); 98 | int nuse_accept(int fd, struct sockaddr *addr, socklen_t *addrlen); 99 | ssize_t nuse_write(int fd, const void *buf, size_t count); 100 | ssize_t nuse_writev(int fd, const struct iovec *iov, size_t count); 101 | ssize_t nuse_sendto(int fd, const void *buf, size_t len, int flags, 102 | const struct sockaddr *dest_addr, socklen_t addrlen); 103 | ssize_t nuse_send(int fd, const void *buf, size_t len, int flags); 104 | ssize_t nuse_read(int fd, void *buf, size_t count); 105 | ssize_t nuse_recvfrom(int fd, void *buf, size_t len, int flags, 106 | struct sockaddr *from, socklen_t *fromlen); 107 | int nuse_recv(int fd, void *buf, size_t count, int flags); 108 | int nuse_setsockopt(int fd, int level, int optname, 109 | const void *optval, int optlen); 110 | int nuse_getsockopt(int fd, int level, int optname, 111 | void *optval, unsigned int *optlen); 112 | int open(const char *pathname, int flags, mode_t mode); 113 | int open64(const char *pathname, int flags, mode_t mode); 114 | int nuse_ioctl(int fd, int request, ...); 115 | int poll(struct pollfd *fds, nfds_t nfds, int timeout); 116 | int select(int nfds, fd_set *readfds, fd_set *writefds, 117 | fd_set *exceptfds, struct timeval *timeout); 118 | int epoll_create(int size); 119 | int epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event); 120 | int epoll_wait(int epollfd, struct epoll_event *events, 121 | int maxevents, int timeout); 122 | /* XXX */ 123 | int nuse_socketpair(int, int, int, int *); 124 | 125 | 126 | #if BYTE_ORDER == BIG_ENDIAN 127 | #define SCARG(p,k) ((p)->k.be.datum) /* get arg from args pointer */ 128 | #elif BYTE_ORDER == LITTLE_ENDIAN 129 | #define SCARG(p,k) ((p)->k.le.datum) /* get arg from args pointer */ 130 | #else 131 | #error "what byte order is this machine?" 132 | #endif 133 | 134 | 135 | static int nuse_copyin(const void *uaddr, void *kaddr, size_t len) 136 | { 137 | int error = 0; 138 | 139 | if (__predict_false(uaddr == NULL && len)) { 140 | return EFAULT; 141 | } 142 | 143 | if (!th_spc) { 144 | memcpy(kaddr, uaddr, len); 145 | } else if (len) { 146 | error = rumpuser_sp_copyin(th_spc, uaddr, kaddr, len); 147 | } 148 | 149 | return error; 150 | } 151 | 152 | static int nuse_copyout(const void *kaddr, void *uaddr, size_t len) 153 | { 154 | int error = 0; 155 | 156 | if (__predict_false(uaddr == NULL && len)) { 157 | return EFAULT; 158 | } 159 | 160 | if (!th_spc) { 161 | memcpy(uaddr, kaddr, len); 162 | } 163 | else if (len) { 164 | error = rumpuser_sp_copyout(th_spc, kaddr, uaddr, len); 165 | } 166 | 167 | return error; 168 | } 169 | 170 | static int 171 | sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, 172 | register_t *retval) 173 | { 174 | /* { 175 | syscallarg(int) domain; 176 | syscallarg(int) type; 177 | syscallarg(int) protocol; 178 | } */ 179 | int error; 180 | 181 | error = nuse_socket(SCARG(uap, domain), SCARG(uap, type), 182 | SCARG(uap, protocol)); 183 | if (error != -1) { 184 | *retval = error; 185 | error = 0; 186 | } 187 | return error; 188 | } 189 | 190 | static int sys_bind(struct lwp *l, const struct sys_bind_args *uap, 191 | register_t *retval) 192 | { 193 | /* { 194 | syscallarg(int) s; 195 | syscallarg(const struct sockaddr *) name; 196 | syscallarg(unsigned int) namelen; 197 | } */ 198 | int error, len; 199 | void *name; 200 | 201 | len = SCARG(uap, namelen); 202 | if (len > 0 && SCARG(uap, name) == NULL) 203 | return EINVAL; 204 | name = malloc(len); 205 | 206 | error = nuse_copyin(SCARG(uap, name), name, len); 207 | if (error) 208 | goto end; 209 | 210 | error = nuse_bind(SCARG(uap, s), name, len); 211 | if (error != -1) { 212 | *retval = error; 213 | error = 0; 214 | } 215 | 216 | end: 217 | free(name); 218 | return error; 219 | } 220 | 221 | static int sys_listen(struct lwp *l, const struct sys_listen_args *uap, 222 | register_t *retval) 223 | { 224 | /* { 225 | syscallarg(int) s; 226 | syscallarg(int) backlog; 227 | } */ 228 | int error; 229 | 230 | error = nuse_listen(SCARG(uap, s), SCARG(uap, backlog)); 231 | *retval = error; 232 | return error; 233 | } 234 | 235 | static int sys_accept(struct lwp *l, const struct sys_accept_args *uap, 236 | register_t *retval) 237 | { 238 | /* { 239 | syscallarg(int) s; 240 | syscallarg(struct sockaddr *) name; 241 | syscallarg(unsigned int *) anamelen; 242 | } */ 243 | int error; 244 | 245 | error = nuse_accept(SCARG(uap, s), SCARG(uap, name), 246 | SCARG(uap, anamelen)); 247 | *retval = error; 248 | return error; 249 | } 250 | 251 | static int sys_connect(struct lwp *l, const struct sys_connect_args *uap, 252 | register_t *retval) 253 | { 254 | /* { 255 | syscallarg(int) s; 256 | syscallarg(const struct sockaddr *) name; 257 | syscallarg(unsigned int) namelen; 258 | } */ 259 | int error; 260 | 261 | error = nuse_connect(SCARG(uap, s), SCARG(uap, name), 262 | SCARG(uap, namelen)); 263 | *retval = error; 264 | return error; 265 | } 266 | 267 | static int sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap, 268 | register_t *retval) 269 | { 270 | /* { 271 | syscallarg(int) domain; 272 | syscallarg(int) type; 273 | syscallarg(int) protocol; 274 | syscallarg(int *) rsv; 275 | } */ 276 | 277 | int error; 278 | error = nuse_socketpair(SCARG(uap, domain), 279 | SCARG(uap, type), 280 | SCARG(uap, protocol), 281 | SCARG(uap, rsv)); 282 | *retval = error; 283 | return error; 284 | } 285 | 286 | static int sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, 287 | register_t *retval) 288 | { 289 | /* { 290 | syscallarg(int) s; 291 | syscallarg(const void *) buf; 292 | syscallarg(size_t) len; 293 | syscallarg(int) flags; 294 | syscallarg(const struct sockaddr *) to; 295 | syscallarg(unsigned int) tolen; 296 | } */ 297 | int error, len; 298 | void *buf; 299 | struct sockaddr *to; 300 | 301 | len = SCARG(uap, len); 302 | if (len > 0 && SCARG(uap, buf) == NULL) 303 | return EINVAL; 304 | buf = malloc(len); 305 | error = nuse_copyin(SCARG(uap, buf), buf, len); 306 | if (error) 307 | goto end; 308 | 309 | len = SCARG(uap, tolen); 310 | if (len > 0 && SCARG(uap, to) == NULL) 311 | return EINVAL; 312 | to = malloc(len); 313 | error = nuse_copyin(SCARG(uap, to), to, len); 314 | if (error) 315 | goto end; 316 | 317 | error = nuse_sendto(SCARG(uap, s), 318 | buf, 319 | SCARG(uap, len), 320 | SCARG(uap, flags), 321 | to, 322 | SCARG(uap, tolen)); 323 | if (error != -1) { 324 | *retval = error; 325 | error = 0; 326 | } 327 | end: 328 | return error; 329 | } 330 | 331 | static int sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, 332 | register_t *retval) 333 | { 334 | /* { 335 | syscallarg(int) s; 336 | syscallarg(const struct msghdr *) msg; 337 | syscallarg(int) flags; 338 | } */ 339 | int error, iovsz, i; 340 | struct user_msghdr msg; 341 | struct iovec *iov; 342 | void *rmsg_name; 343 | 344 | error = nuse_copyin(SCARG(uap, msg), &msg, sizeof(msg)); 345 | if (error) 346 | return error; 347 | iovsz = msg.msg_iovlen * sizeof(struct iovec); 348 | iov = malloc(iovsz); 349 | error = nuse_copyin(msg.msg_iov, iov, iovsz); 350 | if (error) 351 | return error; 352 | 353 | for (i = 0; i < msg.msg_iovlen; i++) { 354 | void *riov = iov[i].iov_base; 355 | iov[i].iov_base = malloc(iov[i].iov_len); 356 | error = nuse_copyin(riov, iov[i].iov_base, 357 | iov[i].iov_len); 358 | if (error) 359 | return error; 360 | } 361 | msg.msg_iov = iov; 362 | 363 | rmsg_name = msg.msg_name; 364 | msg.msg_name = malloc(msg.msg_namelen); 365 | error = nuse_copyin(rmsg_name, msg.msg_name, msg.msg_namelen); 366 | if (error) 367 | return error; 368 | 369 | error = nuse_sendmsg(SCARG(uap, s), 370 | &msg, 371 | SCARG(uap, flags)); 372 | 373 | if (error != -1) { 374 | *retval = error; 375 | error = 0; 376 | } 377 | free(iov); 378 | return error; 379 | } 380 | 381 | static int sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, 382 | register_t *retval) 383 | { 384 | /* { 385 | syscallarg(int) s; 386 | syscallarg(void *) buf; 387 | syscallarg(size_t) len; 388 | syscallarg(int) flags; 389 | syscallarg(struct sockaddr *) from; 390 | syscallarg(unsigned int *) fromlenaddr; 391 | } */ 392 | int error; 393 | error = nuse_recvfrom(SCARG(uap, s), 394 | SCARG(uap, buf), 395 | SCARG(uap, len), 396 | SCARG(uap, flags), 397 | SCARG(uap, from), 398 | SCARG(uap, fromlenaddr)); 399 | *retval = error; 400 | return error; 401 | } 402 | 403 | /* XXX */ 404 | struct iov_iter { 405 | int type; 406 | size_t iov_offset; 407 | size_t count; 408 | union { 409 | const struct iovec *iov; 410 | const struct kvec *kvec; 411 | const struct bio_vec *bvec; 412 | }; 413 | unsigned long nr_segs; 414 | }; 415 | 416 | static int sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, 417 | register_t *retval) 418 | { 419 | /* { 420 | syscallarg(int) s; 421 | syscallarg(struct msghdr *) msg; 422 | syscallarg(int) flags; 423 | } */ 424 | int error, iovsz, i; 425 | struct user_msghdr msg; 426 | struct iovec *liov, *riov; 427 | 428 | error = nuse_copyin(SCARG(uap, msg), &msg, sizeof(msg)); 429 | if (error) 430 | return error; 431 | iovsz = msg.msg_iovlen * sizeof(struct iovec); 432 | liov = malloc(iovsz); 433 | riov = malloc(iovsz); 434 | msg.msg_name = malloc(msg.msg_namelen); 435 | 436 | error = nuse_copyin(msg.msg_iov, riov, iovsz); 437 | if (error) 438 | return error; 439 | 440 | for (i = 0; i < msg.msg_iovlen; i++) { 441 | liov[i].iov_base = malloc(riov[i].iov_len); 442 | liov[i].iov_len = riov[i].iov_len; 443 | } 444 | 445 | msg.msg_iov = liov; 446 | error = nuse_recvmsg(SCARG(uap, s), 447 | &msg, 448 | SCARG(uap, flags)); 449 | if (error != -1) { 450 | *retval = error; 451 | error = 0; 452 | } 453 | 454 | if (error == 0) { 455 | for (i = 0; i < msg.msg_iovlen; i++) { 456 | error = nuse_copyout(liov[i].iov_base, 457 | riov[i].iov_base, 458 | liov[i].iov_len); 459 | } 460 | } 461 | 462 | if (msg.msg_control != NULL) 463 | error = nuse_copyout(&msg.msg_control, 464 | (SCARG(uap, msg)) 465 | + sizeof(void *) + sizeof(int) 466 | + sizeof(struct iov_iter) + sizeof(void *), 467 | msg.msg_controllen); 468 | 469 | /* XXX: no idea how msg_name should copyout to client */ 470 | #if 0 471 | if (error == 0) 472 | error = rumpuser_sp_copyout(th_spc, &msg.msg_name, 473 | rmsg_name, msg.msg_namelen); 474 | #endif 475 | 476 | free(riov); 477 | free(liov); 478 | free(msg.msg_name); 479 | return error; 480 | } 481 | 482 | static int sys_sendmmsg(struct lwp *l, const struct sys_sendmmsg_args *uap, 483 | register_t *retval) 484 | { 485 | /* { 486 | syscallarg(int) s; 487 | syscallarg(struct mmsghdr *) mmsg; 488 | syscallarg(unsigned int) vlen; 489 | syscallarg(unsigned int) flags; 490 | } */ 491 | int error; 492 | error = nuse_sendmmsg(SCARG(uap, s), 493 | SCARG(uap, mmsg), 494 | SCARG(uap, vlen), 495 | SCARG(uap, flags)); 496 | *retval = error; 497 | return error; 498 | } 499 | 500 | static int sys_recvmmsg(struct lwp *l, const struct sys_recvmmsg_args *uap, 501 | register_t *retval) 502 | { 503 | /* { 504 | syscallarg(int) s; 505 | syscallarg(struct mmsghdr *) mmsg; 506 | syscallarg(unsigned int) vlen; 507 | syscallarg(unsigned int) flags; 508 | syscallarg(struct timespec *) timeout; 509 | } */ 510 | int error; 511 | error = nuse_recvmmsg(SCARG(uap, s), 512 | SCARG(uap, mmsg), 513 | SCARG(uap, vlen), 514 | SCARG(uap, flags), 515 | SCARG(uap, timeout)); 516 | *retval = error; 517 | return error; 518 | } 519 | 520 | #if 0 521 | int 522 | sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, 523 | register_t *retval) 524 | { 525 | /* { 526 | syscallarg(int) s; 527 | syscallarg(int) how; 528 | } */ 529 | struct socket *so; 530 | int error; 531 | 532 | error = fd_getsock(SCARG(uap, s), &so); 533 | if (error != 0) 534 | return error; 535 | solock(so); 536 | error = soshutdown(so, SCARG(uap, how)); 537 | sounlock(so); 538 | fd_putfile(SCARG(uap, s)); 539 | return error; 540 | } 541 | #endif 542 | 543 | static int sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, 544 | register_t *retval) 545 | { 546 | /* { 547 | syscallarg(int) s; 548 | syscallarg(int) level; 549 | syscallarg(int) name; 550 | syscallarg(const void *) val; 551 | syscallarg(unsigned int) valsize; 552 | } */ 553 | int error, len; 554 | void *val; 555 | 556 | len = SCARG(uap, valsize); 557 | if (len > 0 && SCARG(uap, val) == NULL) 558 | return EINVAL; 559 | val = malloc(len); 560 | 561 | error = nuse_copyin(SCARG(uap, val), val, len); 562 | if (error) 563 | goto end; 564 | 565 | error = nuse_setsockopt(SCARG(uap, s), 566 | SCARG(uap, level), 567 | SCARG(uap, name), 568 | val, 569 | SCARG(uap, valsize)); 570 | end: 571 | *retval = error; 572 | free(val); 573 | return error; 574 | } 575 | 576 | static int sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, 577 | register_t *retval) 578 | { 579 | /* { 580 | syscallarg(int) s; 581 | syscallarg(int) level; 582 | syscallarg(int) name; 583 | syscallarg(void *) val; 584 | syscallarg(unsigned int *) avalsize; 585 | } */ 586 | int error; 587 | unsigned int valsize; 588 | void *data = NULL; 589 | 590 | if (SCARG(uap, val) != NULL) { 591 | error = nuse_copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize)); 592 | if (error) 593 | return error; 594 | data = malloc(valsize); 595 | } else 596 | valsize = 0; 597 | 598 | error = nuse_getsockopt(SCARG(uap, s), 599 | SCARG(uap, level), 600 | SCARG(uap, name), 601 | data, 602 | &valsize); 603 | 604 | if (valsize > 0) { 605 | error = nuse_copyout(data, SCARG(uap, val), valsize); 606 | if (error) 607 | goto out; 608 | 609 | error = nuse_copyout(&valsize, SCARG(uap, avalsize), sizeof(valsize)); 610 | if (error) 611 | goto out; 612 | } 613 | 614 | out: 615 | free(data); 616 | *retval = error; 617 | return error; 618 | } 619 | 620 | static int sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, 621 | register_t *retval) 622 | { 623 | /* { 624 | syscallarg(int) fdes; 625 | syscallarg(struct sockaddr *) asa; 626 | syscallarg(unsigned int *) alen; 627 | } */ 628 | int error; 629 | socklen_t valsize; 630 | struct sockaddr *data; 631 | 632 | if (SCARG(uap, asa) != NULL) { 633 | error = nuse_copyin(SCARG(uap, alen), &valsize, sizeof(valsize)); 634 | if (error) 635 | return error; 636 | data = malloc(valsize); 637 | } else 638 | valsize = 0; 639 | 640 | error = nuse_getsockname(SCARG(uap, fdes), 641 | data, 642 | &valsize); 643 | 644 | if (valsize > 0) { 645 | error = nuse_copyout(data, SCARG(uap, asa), valsize); 646 | if (error) 647 | goto out; 648 | 649 | error = nuse_copyout(&valsize, SCARG(uap, alen), sizeof(valsize)); 650 | if (error) 651 | goto out; 652 | } 653 | 654 | out: 655 | free(data); 656 | *retval = error; 657 | return error; 658 | } 659 | 660 | /* 661 | * Get name of peer for connected socket. 662 | */ 663 | static int sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, 664 | register_t *retval) 665 | { 666 | /* { 667 | syscallarg(int) fdes; 668 | syscallarg(struct sockaddr *) asa; 669 | syscallarg(unsigned int *) alen; 670 | } */ 671 | int error; 672 | error = nuse_getpeername(SCARG(uap, fdes), 673 | SCARG(uap, asa), 674 | SCARG(uap, alen)); 675 | *retval = error; 676 | return error; 677 | } 678 | 679 | /* XXX:XXX */ 680 | #define IFREQ_SIZE 40 681 | static int sys_ioctl(struct lwp *l, const struct sys_ioctl_args *uap, 682 | register_t *retval) 683 | { 684 | /* { 685 | syscallarg(int) fd; 686 | syscallarg(u_long) com; 687 | syscallarg(void *) data; 688 | } */ 689 | int error; 690 | void *data = NULL; 691 | 692 | /* if-level ioctl */ 693 | if (SCARG(uap, com) >= SIOCGIFNAME) { 694 | data = malloc(IFREQ_SIZE); 695 | error = nuse_copyin(SCARG(uap, data), data, IFREQ_SIZE); 696 | if (error) 697 | return error; 698 | 699 | error = nuse_ioctl(SCARG(uap, fd), 700 | SCARG(uap, com), 701 | data); 702 | if (error != -1) { 703 | *retval = error; 704 | error = 0; 705 | } 706 | 707 | error = nuse_copyout(data, SCARG(uap, data), IFREQ_SIZE); 708 | } else { 709 | error = nuse_ioctl(SCARG(uap, fd), 710 | SCARG(uap, com), 711 | SCARG(uap, data)); 712 | if (error != -1) { 713 | *retval = error; 714 | error = 0; 715 | } 716 | 717 | } 718 | 719 | return error; 720 | } 721 | 722 | typedef int sy_call_t(struct lwp *, const void *, register_t *); 723 | struct rump_onesyscall { 724 | int ros_num; 725 | sy_call_t *ros_handler; 726 | }; 727 | static const struct rump_onesyscall mysys[] = { 728 | /* { 6, (sy_call_t *)sys_close }, */ 729 | { 27, (sy_call_t *)sys_recvmsg }, 730 | { 28, (sy_call_t *)sys_sendmsg }, 731 | { 29, (sy_call_t *)sys_recvfrom }, 732 | { 30, (sy_call_t *)sys_accept }, 733 | { 31, (sy_call_t *)sys_getpeername }, 734 | { 32, (sy_call_t *)sys_getsockname }, 735 | { 54, (sy_call_t *)sys_ioctl }, 736 | { 98, (sy_call_t *)sys_connect }, 737 | { 104, (sy_call_t *)sys_bind }, 738 | { 105, (sy_call_t *)sys_setsockopt }, 739 | { 106, (sy_call_t *)sys_listen }, 740 | { 118, (sy_call_t *)sys_getsockopt }, 741 | { 133, (sy_call_t *)sys_sendto }, 742 | /* { 134, (sy_call_t *)sys_shutdown }, */ 743 | { 135, (sy_call_t *)sys_socketpair }, 744 | { 394, (sy_call_t *)sys___socket30 }, 745 | /* { 456, (sy_call_t *)sys_paccept }, */ 746 | { 475, (sy_call_t *)sys_recvmmsg }, 747 | { 476, (sy_call_t *)sys_sendmmsg }, 748 | }; 749 | 750 | static int nuse_hyp_syscall(int num, void *arg, long *retval) 751 | { 752 | int rv, i; 753 | register_t regrv[2] = {0, 0}; 754 | sy_call_t *syscall = NULL; 755 | struct lwp *l = NULL; 756 | 757 | /* XXX */ 758 | for (i = 0; i < sizeof(mysys) / sizeof(mysys[0]) ; i++) { 759 | if (mysys[i].ros_num == num) { 760 | syscall = mysys[i].ros_handler; 761 | break; 762 | } 763 | } 764 | if (!syscall) { 765 | retval[0] = -1; 766 | return -1; 767 | } 768 | 769 | rv = syscall(l, (void *)arg, regrv); 770 | retval[0] = regrv[0]; 771 | retval[1] = regrv[1]; 772 | return rv; 773 | } 774 | 775 | int rumpclient_syscall(int num, void *data, size_t dlen, register_t *retval) 776 | { 777 | return nuse_hyp_syscall(num, data, retval); 778 | } 779 | 780 | static void nuse_schedule(void){} 781 | static void nuse_unschedule(void){} 782 | static void nuse_user_unschedule(int nlocks, int *countp, void *interlock){} 783 | static void nuse_user_schedule(int nlocks, void *interlock){} 784 | static int nuse_lwproc_rfork(void *priv, int flags, const char *comm){return 0;} 785 | static void nuse_lwproc_switch(struct lwp *newlwp){} 786 | static void nuse_lwproc_release(void){} 787 | static int nuse_lwproc_newlwp(pid_t pid){return 0;} 788 | static struct lwp *nuse_lwproc_curlwp(void){return NULL;} 789 | static void nuse_hyp_lwpexit(void){} 790 | static void nuse_hyp_execnotify(const char *comm){} 791 | 792 | static const struct rumpuser_hyperup nuse_hyp = { 793 | .hyp_schedule = nuse_schedule, 794 | .hyp_unschedule = nuse_unschedule, 795 | .hyp_backend_unschedule = nuse_user_unschedule, 796 | .hyp_backend_schedule = nuse_user_schedule, 797 | .hyp_lwproc_switch = nuse_lwproc_switch, 798 | .hyp_lwproc_release = nuse_lwproc_release, 799 | .hyp_lwproc_rfork = nuse_lwproc_rfork, 800 | .hyp_lwproc_newlwp = nuse_lwproc_newlwp, 801 | .hyp_lwproc_curlwp = nuse_lwproc_curlwp, 802 | .hyp_lwpexit = nuse_hyp_lwpexit, 803 | .hyp_syscall = nuse_hyp_syscall, 804 | .hyp_execnotify = nuse_hyp_execnotify, 805 | .hyp_getpid = getpid, 806 | }; 807 | 808 | void 809 | nuse_syscall_proxy_init(void) 810 | { 811 | char *url; 812 | char buf[64]; 813 | 814 | url = getenv("RUMP_SERVER"); 815 | if (!url) { 816 | sprintf(buf, "unix://%s.%d", RUMPSERVER_DEFAULT, getpid()); 817 | url = strdup(buf); 818 | } 819 | 820 | if (rumpuser_init(RUMPUSER_VERSION, &nuse_hyp) != 0) { 821 | rumpuser_dprintf("rumpuser init failed\n"); 822 | return /* EINVAL */; 823 | } 824 | umask (0007); 825 | rumpuser_sp_init(url, "Linux", UTS_RELEASE, UTS_MACHINE); 826 | rumpuser_dprintf("nuse syscall proxy start at %s\n", url); 827 | } 828 | 829 | void 830 | nuse_syscall_proxy_exit(void) 831 | { 832 | rumpuser_dprintf("rump_server finishing.\n"); 833 | rumpuser_sp_fini(NULL); 834 | } 835 | -------------------------------------------------------------------------------- /list.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIB_LINUX_LIST_H 2 | #define __LIB_LINUX_LIST_H 3 | 4 | #include 5 | struct list_head { 6 | struct list_head *next, *prev; 7 | }; 8 | # define POISON_POINTER_DELTA 0 9 | #define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) 10 | #define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) 11 | 12 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 13 | 14 | #define container_of(ptr, type, member) ({ \ 15 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 16 | (type *)( (char *)__mptr - offsetof(type,member) );}) 17 | 18 | /* #include */ 19 | /* #include */ 20 | /* #include */ 21 | /* #include */ 22 | 23 | /* 24 | * Simple doubly linked list implementation. 25 | * 26 | * Some of the internal functions ("__xxx") are useful when 27 | * manipulating whole lists rather than single entries, as 28 | * sometimes we already know the next/prev entries and we can 29 | * generate better code by using them directly rather than 30 | * using the generic single-entry routines. 31 | */ 32 | 33 | #define LIST_HEAD_INIT(name) { &(name), &(name) } 34 | 35 | #define LIST_HEAD(name) \ 36 | struct list_head name = LIST_HEAD_INIT(name) 37 | 38 | static inline void INIT_LIST_HEAD(struct list_head *list) 39 | { 40 | list->next = list; 41 | list->prev = list; 42 | } 43 | 44 | /* 45 | * Insert a new entry between two known consecutive entries. 46 | * 47 | * This is only for internal list manipulation where we know 48 | * the prev/next entries already! 49 | */ 50 | #ifndef CONFIG_DEBUG_LIST 51 | static inline void __list_add(struct list_head *new, 52 | struct list_head *prev, 53 | struct list_head *next) 54 | { 55 | next->prev = new; 56 | new->next = next; 57 | new->prev = prev; 58 | prev->next = new; 59 | } 60 | #else 61 | extern void __list_add(struct list_head *new, 62 | struct list_head *prev, 63 | struct list_head *next); 64 | #endif 65 | 66 | /** 67 | * list_add - add a new entry 68 | * @new: new entry to be added 69 | * @head: list head to add it after 70 | * 71 | * Insert a new entry after the specified head. 72 | * This is good for implementing stacks. 73 | */ 74 | static inline void list_add(struct list_head *new, struct list_head *head) 75 | { 76 | __list_add(new, head, head->next); 77 | } 78 | 79 | 80 | /** 81 | * list_add_tail - add a new entry 82 | * @new: new entry to be added 83 | * @head: list head to add it before 84 | * 85 | * Insert a new entry before the specified head. 86 | * This is useful for implementing queues. 87 | */ 88 | static inline void list_add_tail(struct list_head *new, struct list_head *head) 89 | { 90 | __list_add(new, head->prev, head); 91 | } 92 | 93 | /* 94 | * Delete a list entry by making the prev/next entries 95 | * point to each other. 96 | * 97 | * This is only for internal list manipulation where we know 98 | * the prev/next entries already! 99 | */ 100 | static inline void __list_del(struct list_head * prev, struct list_head * next) 101 | { 102 | next->prev = prev; 103 | prev->next = next; 104 | } 105 | 106 | /** 107 | * list_del - deletes entry from list. 108 | * @entry: the element to delete from the list. 109 | * Note: list_empty() on entry does not return true after this, the entry is 110 | * in an undefined state. 111 | */ 112 | #ifndef CONFIG_DEBUG_LIST 113 | static inline void __list_del_entry(struct list_head *entry) 114 | { 115 | __list_del(entry->prev, entry->next); 116 | } 117 | 118 | static inline void list_del(struct list_head *entry) 119 | { 120 | __list_del(entry->prev, entry->next); 121 | entry->next = LIST_POISON1; 122 | entry->prev = LIST_POISON2; 123 | } 124 | #else 125 | extern void __list_del_entry(struct list_head *entry); 126 | extern void list_del(struct list_head *entry); 127 | #endif 128 | 129 | /** 130 | * list_replace - replace old entry by new one 131 | * @old : the element to be replaced 132 | * @new : the new element to insert 133 | * 134 | * If @old was empty, it will be overwritten. 135 | */ 136 | static inline void list_replace(struct list_head *old, 137 | struct list_head *new) 138 | { 139 | new->next = old->next; 140 | new->next->prev = new; 141 | new->prev = old->prev; 142 | new->prev->next = new; 143 | } 144 | 145 | static inline void list_replace_init(struct list_head *old, 146 | struct list_head *new) 147 | { 148 | list_replace(old, new); 149 | INIT_LIST_HEAD(old); 150 | } 151 | 152 | /** 153 | * list_del_init - deletes entry from list and reinitialize it. 154 | * @entry: the element to delete from the list. 155 | */ 156 | static inline void list_del_init(struct list_head *entry) 157 | { 158 | __list_del_entry(entry); 159 | INIT_LIST_HEAD(entry); 160 | } 161 | 162 | /** 163 | * list_move - delete from one list and add as another's head 164 | * @list: the entry to move 165 | * @head: the head that will precede our entry 166 | */ 167 | static inline void list_move(struct list_head *list, struct list_head *head) 168 | { 169 | __list_del_entry(list); 170 | list_add(list, head); 171 | } 172 | 173 | /** 174 | * list_move_tail - delete from one list and add as another's tail 175 | * @list: the entry to move 176 | * @head: the head that will follow our entry 177 | */ 178 | static inline void list_move_tail(struct list_head *list, 179 | struct list_head *head) 180 | { 181 | __list_del_entry(list); 182 | list_add_tail(list, head); 183 | } 184 | 185 | /** 186 | * list_is_last - tests whether @list is the last entry in list @head 187 | * @list: the entry to test 188 | * @head: the head of the list 189 | */ 190 | static inline int list_is_last(const struct list_head *list, 191 | const struct list_head *head) 192 | { 193 | return list->next == head; 194 | } 195 | 196 | /** 197 | * list_empty - tests whether a list is empty 198 | * @head: the list to test. 199 | */ 200 | static inline int list_empty(const struct list_head *head) 201 | { 202 | return head->next == head; 203 | } 204 | 205 | /** 206 | * list_empty_careful - tests whether a list is empty and not being modified 207 | * @head: the list to test 208 | * 209 | * Description: 210 | * tests whether a list is empty _and_ checks that no other CPU might be 211 | * in the process of modifying either member (next or prev) 212 | * 213 | * NOTE: using list_empty_careful() without synchronization 214 | * can only be safe if the only activity that can happen 215 | * to the list entry is list_del_init(). Eg. it cannot be used 216 | * if another CPU could re-list_add() it. 217 | */ 218 | static inline int list_empty_careful(const struct list_head *head) 219 | { 220 | struct list_head *next = head->next; 221 | return (next == head) && (next == head->prev); 222 | } 223 | 224 | /** 225 | * list_rotate_left - rotate the list to the left 226 | * @head: the head of the list 227 | */ 228 | static inline void list_rotate_left(struct list_head *head) 229 | { 230 | struct list_head *first; 231 | 232 | if (!list_empty(head)) { 233 | first = head->next; 234 | list_move_tail(first, head); 235 | } 236 | } 237 | 238 | /** 239 | * list_is_singular - tests whether a list has just one entry. 240 | * @head: the list to test. 241 | */ 242 | static inline int list_is_singular(const struct list_head *head) 243 | { 244 | return !list_empty(head) && (head->next == head->prev); 245 | } 246 | 247 | static inline void __list_cut_position(struct list_head *list, 248 | struct list_head *head, struct list_head *entry) 249 | { 250 | struct list_head *new_first = entry->next; 251 | list->next = head->next; 252 | list->next->prev = list; 253 | list->prev = entry; 254 | entry->next = list; 255 | head->next = new_first; 256 | new_first->prev = head; 257 | } 258 | 259 | /** 260 | * list_cut_position - cut a list into two 261 | * @list: a new list to add all removed entries 262 | * @head: a list with entries 263 | * @entry: an entry within head, could be the head itself 264 | * and if so we won't cut the list 265 | * 266 | * This helper moves the initial part of @head, up to and 267 | * including @entry, from @head to @list. You should 268 | * pass on @entry an element you know is on @head. @list 269 | * should be an empty list or a list you do not care about 270 | * losing its data. 271 | * 272 | */ 273 | static inline void list_cut_position(struct list_head *list, 274 | struct list_head *head, struct list_head *entry) 275 | { 276 | if (list_empty(head)) 277 | return; 278 | if (list_is_singular(head) && 279 | (head->next != entry && head != entry)) 280 | return; 281 | if (entry == head) 282 | INIT_LIST_HEAD(list); 283 | else 284 | __list_cut_position(list, head, entry); 285 | } 286 | 287 | static inline void __list_splice(const struct list_head *list, 288 | struct list_head *prev, 289 | struct list_head *next) 290 | { 291 | struct list_head *first = list->next; 292 | struct list_head *last = list->prev; 293 | 294 | first->prev = prev; 295 | prev->next = first; 296 | 297 | last->next = next; 298 | next->prev = last; 299 | } 300 | 301 | /** 302 | * list_splice - join two lists, this is designed for stacks 303 | * @list: the new list to add. 304 | * @head: the place to add it in the first list. 305 | */ 306 | static inline void list_splice(const struct list_head *list, 307 | struct list_head *head) 308 | { 309 | if (!list_empty(list)) 310 | __list_splice(list, head, head->next); 311 | } 312 | 313 | /** 314 | * list_splice_tail - join two lists, each list being a queue 315 | * @list: the new list to add. 316 | * @head: the place to add it in the first list. 317 | */ 318 | static inline void list_splice_tail(struct list_head *list, 319 | struct list_head *head) 320 | { 321 | if (!list_empty(list)) 322 | __list_splice(list, head->prev, head); 323 | } 324 | 325 | /** 326 | * list_splice_init - join two lists and reinitialise the emptied list. 327 | * @list: the new list to add. 328 | * @head: the place to add it in the first list. 329 | * 330 | * The list at @list is reinitialised 331 | */ 332 | static inline void list_splice_init(struct list_head *list, 333 | struct list_head *head) 334 | { 335 | if (!list_empty(list)) { 336 | __list_splice(list, head, head->next); 337 | INIT_LIST_HEAD(list); 338 | } 339 | } 340 | 341 | /** 342 | * list_splice_tail_init - join two lists and reinitialise the emptied list 343 | * @list: the new list to add. 344 | * @head: the place to add it in the first list. 345 | * 346 | * Each of the lists is a queue. 347 | * The list at @list is reinitialised 348 | */ 349 | static inline void list_splice_tail_init(struct list_head *list, 350 | struct list_head *head) 351 | { 352 | if (!list_empty(list)) { 353 | __list_splice(list, head->prev, head); 354 | INIT_LIST_HEAD(list); 355 | } 356 | } 357 | 358 | /** 359 | * list_entry - get the struct for this entry 360 | * @ptr: the &struct list_head pointer. 361 | * @type: the type of the struct this is embedded in. 362 | * @member: the name of the list_head within the struct. 363 | */ 364 | #define list_entry(ptr, type, member) \ 365 | container_of(ptr, type, member) 366 | 367 | /** 368 | * list_first_entry - get the first element from a list 369 | * @ptr: the list head to take the element from. 370 | * @type: the type of the struct this is embedded in. 371 | * @member: the name of the list_head within the struct. 372 | * 373 | * Note, that list is expected to be not empty. 374 | */ 375 | #define list_first_entry(ptr, type, member) \ 376 | list_entry((ptr)->next, type, member) 377 | 378 | /** 379 | * list_last_entry - get the last element from a list 380 | * @ptr: the list head to take the element from. 381 | * @type: the type of the struct this is embedded in. 382 | * @member: the name of the list_head within the struct. 383 | * 384 | * Note, that list is expected to be not empty. 385 | */ 386 | #define list_last_entry(ptr, type, member) \ 387 | list_entry((ptr)->prev, type, member) 388 | 389 | /** 390 | * list_first_entry_or_null - get the first element from a list 391 | * @ptr: the list head to take the element from. 392 | * @type: the type of the struct this is embedded in. 393 | * @member: the name of the list_head within the struct. 394 | * 395 | * Note that if the list is empty, it returns NULL. 396 | */ 397 | #define list_first_entry_or_null(ptr, type, member) \ 398 | (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) 399 | 400 | /** 401 | * list_next_entry - get the next element in list 402 | * @pos: the type * to cursor 403 | * @member: the name of the list_head within the struct. 404 | */ 405 | #define list_next_entry(pos, member) \ 406 | list_entry((pos)->member.next, typeof(*(pos)), member) 407 | 408 | /** 409 | * list_prev_entry - get the prev element in list 410 | * @pos: the type * to cursor 411 | * @member: the name of the list_head within the struct. 412 | */ 413 | #define list_prev_entry(pos, member) \ 414 | list_entry((pos)->member.prev, typeof(*(pos)), member) 415 | 416 | /** 417 | * list_for_each - iterate over a list 418 | * @pos: the &struct list_head to use as a loop cursor. 419 | * @head: the head for your list. 420 | */ 421 | #define list_for_each(pos, head) \ 422 | for (pos = (head)->next; pos != (head); pos = pos->next) 423 | 424 | /** 425 | * list_for_each_prev - iterate over a list backwards 426 | * @pos: the &struct list_head to use as a loop cursor. 427 | * @head: the head for your list. 428 | */ 429 | #define list_for_each_prev(pos, head) \ 430 | for (pos = (head)->prev; pos != (head); pos = pos->prev) 431 | 432 | /** 433 | * list_for_each_safe - iterate over a list safe against removal of list entry 434 | * @pos: the &struct list_head to use as a loop cursor. 435 | * @n: another &struct list_head to use as temporary storage 436 | * @head: the head for your list. 437 | */ 438 | #define list_for_each_safe(pos, n, head) \ 439 | for (pos = (head)->next, n = pos->next; pos != (head); \ 440 | pos = n, n = pos->next) 441 | 442 | /** 443 | * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry 444 | * @pos: the &struct list_head to use as a loop cursor. 445 | * @n: another &struct list_head to use as temporary storage 446 | * @head: the head for your list. 447 | */ 448 | #define list_for_each_prev_safe(pos, n, head) \ 449 | for (pos = (head)->prev, n = pos->prev; \ 450 | pos != (head); \ 451 | pos = n, n = pos->prev) 452 | 453 | /** 454 | * list_for_each_entry - iterate over list of given type 455 | * @pos: the type * to use as a loop cursor. 456 | * @head: the head for your list. 457 | * @member: the name of the list_head within the struct. 458 | */ 459 | #define list_for_each_entry(pos, head, member) \ 460 | for (pos = list_first_entry(head, typeof(*pos), member); \ 461 | &pos->member != (head); \ 462 | pos = list_next_entry(pos, member)) 463 | 464 | /** 465 | * list_for_each_entry_reverse - iterate backwards over list of given type. 466 | * @pos: the type * to use as a loop cursor. 467 | * @head: the head for your list. 468 | * @member: the name of the list_head within the struct. 469 | */ 470 | #define list_for_each_entry_reverse(pos, head, member) \ 471 | for (pos = list_last_entry(head, typeof(*pos), member); \ 472 | &pos->member != (head); \ 473 | pos = list_prev_entry(pos, member)) 474 | 475 | /** 476 | * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue() 477 | * @pos: the type * to use as a start point 478 | * @head: the head of the list 479 | * @member: the name of the list_head within the struct. 480 | * 481 | * Prepares a pos entry for use as a start point in list_for_each_entry_continue(). 482 | */ 483 | #define list_prepare_entry(pos, head, member) \ 484 | ((pos) ? : list_entry(head, typeof(*pos), member)) 485 | 486 | /** 487 | * list_for_each_entry_continue - continue iteration over list of given type 488 | * @pos: the type * to use as a loop cursor. 489 | * @head: the head for your list. 490 | * @member: the name of the list_head within the struct. 491 | * 492 | * Continue to iterate over list of given type, continuing after 493 | * the current position. 494 | */ 495 | #define list_for_each_entry_continue(pos, head, member) \ 496 | for (pos = list_next_entry(pos, member); \ 497 | &pos->member != (head); \ 498 | pos = list_next_entry(pos, member)) 499 | 500 | /** 501 | * list_for_each_entry_continue_reverse - iterate backwards from the given point 502 | * @pos: the type * to use as a loop cursor. 503 | * @head: the head for your list. 504 | * @member: the name of the list_head within the struct. 505 | * 506 | * Start to iterate over list of given type backwards, continuing after 507 | * the current position. 508 | */ 509 | #define list_for_each_entry_continue_reverse(pos, head, member) \ 510 | for (pos = list_prev_entry(pos, member); \ 511 | &pos->member != (head); \ 512 | pos = list_prev_entry(pos, member)) 513 | 514 | /** 515 | * list_for_each_entry_from - iterate over list of given type from the current point 516 | * @pos: the type * to use as a loop cursor. 517 | * @head: the head for your list. 518 | * @member: the name of the list_head within the struct. 519 | * 520 | * Iterate over list of given type, continuing from current position. 521 | */ 522 | #define list_for_each_entry_from(pos, head, member) \ 523 | for (; &pos->member != (head); \ 524 | pos = list_next_entry(pos, member)) 525 | 526 | /** 527 | * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry 528 | * @pos: the type * to use as a loop cursor. 529 | * @n: another type * to use as temporary storage 530 | * @head: the head for your list. 531 | * @member: the name of the list_head within the struct. 532 | */ 533 | #define list_for_each_entry_safe(pos, n, head, member) \ 534 | for (pos = list_first_entry(head, typeof(*pos), member), \ 535 | n = list_next_entry(pos, member); \ 536 | &pos->member != (head); \ 537 | pos = n, n = list_next_entry(n, member)) 538 | 539 | /** 540 | * list_for_each_entry_safe_continue - continue list iteration safe against removal 541 | * @pos: the type * to use as a loop cursor. 542 | * @n: another type * to use as temporary storage 543 | * @head: the head for your list. 544 | * @member: the name of the list_head within the struct. 545 | * 546 | * Iterate over list of given type, continuing after current point, 547 | * safe against removal of list entry. 548 | */ 549 | #define list_for_each_entry_safe_continue(pos, n, head, member) \ 550 | for (pos = list_next_entry(pos, member), \ 551 | n = list_next_entry(pos, member); \ 552 | &pos->member != (head); \ 553 | pos = n, n = list_next_entry(n, member)) 554 | 555 | /** 556 | * list_for_each_entry_safe_from - iterate over list from current point safe against removal 557 | * @pos: the type * to use as a loop cursor. 558 | * @n: another type * to use as temporary storage 559 | * @head: the head for your list. 560 | * @member: the name of the list_head within the struct. 561 | * 562 | * Iterate over list of given type from current point, safe against 563 | * removal of list entry. 564 | */ 565 | #define list_for_each_entry_safe_from(pos, n, head, member) \ 566 | for (n = list_next_entry(pos, member); \ 567 | &pos->member != (head); \ 568 | pos = n, n = list_next_entry(n, member)) 569 | 570 | /** 571 | * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal 572 | * @pos: the type * to use as a loop cursor. 573 | * @n: another type * to use as temporary storage 574 | * @head: the head for your list. 575 | * @member: the name of the list_head within the struct. 576 | * 577 | * Iterate backwards over list of given type, safe against removal 578 | * of list entry. 579 | */ 580 | #define list_for_each_entry_safe_reverse(pos, n, head, member) \ 581 | for (pos = list_last_entry(head, typeof(*pos), member), \ 582 | n = list_prev_entry(pos, member); \ 583 | &pos->member != (head); \ 584 | pos = n, n = list_prev_entry(n, member)) 585 | 586 | /** 587 | * list_safe_reset_next - reset a stale list_for_each_entry_safe loop 588 | * @pos: the loop cursor used in the list_for_each_entry_safe loop 589 | * @n: temporary storage used in list_for_each_entry_safe 590 | * @member: the name of the list_head within the struct. 591 | * 592 | * list_safe_reset_next is not safe to use in general if the list may be 593 | * modified concurrently (eg. the lock is dropped in the loop body). An 594 | * exception to this is if the cursor element (pos) is pinned in the list, 595 | * and list_safe_reset_next is called after re-taking the lock and before 596 | * completing the current iteration of the loop body. 597 | */ 598 | #define list_safe_reset_next(pos, n, member) \ 599 | n = list_next_entry(pos, member) 600 | 601 | #if 0 602 | /* 603 | * Double linked lists with a single pointer list head. 604 | * Mostly useful for hash tables where the two pointer list head is 605 | * too wasteful. 606 | * You lose the ability to access the tail in O(1). 607 | */ 608 | 609 | #define HLIST_HEAD_INIT { .first = NULL } 610 | #define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } 611 | #define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) 612 | static inline void INIT_HLIST_NODE(struct hlist_node *h) 613 | { 614 | h->next = NULL; 615 | h->pprev = NULL; 616 | } 617 | 618 | static inline int hlist_unhashed(const struct hlist_node *h) 619 | { 620 | return !h->pprev; 621 | } 622 | 623 | static inline int hlist_empty(const struct hlist_head *h) 624 | { 625 | return !h->first; 626 | } 627 | 628 | static inline void __hlist_del(struct hlist_node *n) 629 | { 630 | struct hlist_node *next = n->next; 631 | struct hlist_node **pprev = n->pprev; 632 | *pprev = next; 633 | if (next) 634 | next->pprev = pprev; 635 | } 636 | 637 | static inline void hlist_del(struct hlist_node *n) 638 | { 639 | __hlist_del(n); 640 | n->next = LIST_POISON1; 641 | n->pprev = LIST_POISON2; 642 | } 643 | 644 | static inline void hlist_del_init(struct hlist_node *n) 645 | { 646 | if (!hlist_unhashed(n)) { 647 | __hlist_del(n); 648 | INIT_HLIST_NODE(n); 649 | } 650 | } 651 | 652 | static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) 653 | { 654 | struct hlist_node *first = h->first; 655 | n->next = first; 656 | if (first) 657 | first->pprev = &n->next; 658 | h->first = n; 659 | n->pprev = &h->first; 660 | } 661 | 662 | /* next must be != NULL */ 663 | static inline void hlist_add_before(struct hlist_node *n, 664 | struct hlist_node *next) 665 | { 666 | n->pprev = next->pprev; 667 | n->next = next; 668 | next->pprev = &n->next; 669 | *(n->pprev) = n; 670 | } 671 | 672 | static inline void hlist_add_behind(struct hlist_node *n, 673 | struct hlist_node *prev) 674 | { 675 | n->next = prev->next; 676 | prev->next = n; 677 | n->pprev = &prev->next; 678 | 679 | if (n->next) 680 | n->next->pprev = &n->next; 681 | } 682 | 683 | /* after that we'll appear to be on some hlist and hlist_del will work */ 684 | static inline void hlist_add_fake(struct hlist_node *n) 685 | { 686 | n->pprev = &n->next; 687 | } 688 | 689 | /* 690 | * Move a list from one list head to another. Fixup the pprev 691 | * reference of the first entry if it exists. 692 | */ 693 | static inline void hlist_move_list(struct hlist_head *old, 694 | struct hlist_head *new) 695 | { 696 | new->first = old->first; 697 | if (new->first) 698 | new->first->pprev = &new->first; 699 | old->first = NULL; 700 | } 701 | 702 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) 703 | 704 | #define hlist_for_each(pos, head) \ 705 | for (pos = (head)->first; pos ; pos = pos->next) 706 | 707 | #define hlist_for_each_safe(pos, n, head) \ 708 | for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ 709 | pos = n) 710 | 711 | #define hlist_entry_safe(ptr, type, member) \ 712 | ({ typeof(ptr) ____ptr = (ptr); \ 713 | ____ptr ? hlist_entry(____ptr, type, member) : NULL; \ 714 | }) 715 | 716 | /** 717 | * hlist_for_each_entry - iterate over list of given type 718 | * @pos: the type * to use as a loop cursor. 719 | * @head: the head for your list. 720 | * @member: the name of the hlist_node within the struct. 721 | */ 722 | #define hlist_for_each_entry(pos, head, member) \ 723 | for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ 724 | pos; \ 725 | pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) 726 | 727 | /** 728 | * hlist_for_each_entry_continue - iterate over a hlist continuing after current point 729 | * @pos: the type * to use as a loop cursor. 730 | * @member: the name of the hlist_node within the struct. 731 | */ 732 | #define hlist_for_each_entry_continue(pos, member) \ 733 | for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\ 734 | pos; \ 735 | pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) 736 | 737 | /** 738 | * hlist_for_each_entry_from - iterate over a hlist continuing from current point 739 | * @pos: the type * to use as a loop cursor. 740 | * @member: the name of the hlist_node within the struct. 741 | */ 742 | #define hlist_for_each_entry_from(pos, member) \ 743 | for (; pos; \ 744 | pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) 745 | 746 | /** 747 | * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry 748 | * @pos: the type * to use as a loop cursor. 749 | * @n: another &struct hlist_node to use as temporary storage 750 | * @head: the head for your list. 751 | * @member: the name of the hlist_node within the struct. 752 | */ 753 | #define hlist_for_each_entry_safe(pos, n, head, member) \ 754 | for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\ 755 | pos && ({ n = pos->member.next; 1; }); \ 756 | pos = hlist_entry_safe(n, typeof(*pos), member)) 757 | 758 | #endif 759 | #endif 760 | -------------------------------------------------------------------------------- /nuse-glue.c: -------------------------------------------------------------------------------- 1 | /* 2 | * system calls glue code for NUSE 3 | * Copyright (c) 2015 Hajime Tazaki 4 | * 5 | * Author: Hajime Tazaki 6 | * 7 | * Note: some of the code is picked from rumpkernel, written by Antti Kantee. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #define __USE_GNU 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "sim-assert.h" 28 | #include "nuse-hostcalls.h" 29 | #include "nuse.h" 30 | #include "nuse-libc.h" 31 | #include "sim-init.h" 32 | #include "sim.h" 33 | 34 | extern struct SimExported *g_exported; 35 | struct SimSocket; 36 | 37 | #define weak_alias(name, aliasname) \ 38 | extern __typeof (name) aliasname __attribute__ ((weak, alias (# name))) 39 | #define MSEC_PER_SEC 1000L 40 | #define NSEC_PER_MSEC 1000000L 41 | #define NSEC_PER_SEC 1000000000L 42 | 43 | static inline __s64 timespec_to_ns(const struct timespec *ts) 44 | { 45 | return ((__s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; 46 | } 47 | 48 | /* FIXME: need to be configurable */ 49 | struct nuse_fd nuse_fd_table[1024]; 50 | 51 | /* epoll relates */ 52 | struct epoll_fd { 53 | struct epoll_fd *next; 54 | struct epoll_event *ev; 55 | int fd; 56 | }; 57 | 58 | 59 | int nuse_socket(int domain, int type, int protocol) 60 | { 61 | struct SimSocket *kernel_socket; 62 | int ret, real_fd; 63 | 64 | if (domain == AF_UNIX) { 65 | if (!host_socket) nuse_hostcall_init(); 66 | real_fd = host_socket (domain, type, protocol); 67 | if (real_fd == -1) 68 | return real_fd; 69 | nuse_fd_table[real_fd].real_fd = real_fd; 70 | nuse_fd_table[real_fd].nuse_sock = NULL; 71 | return real_fd; 72 | } 73 | 74 | ret = g_exported->sock_socket(domain, type, protocol, &kernel_socket); 75 | if (ret < 0) 76 | errno = -ret; 77 | real_fd = host_open("/", O_RDONLY, 0); 78 | nuse_fd_table[real_fd].nuse_sock = malloc(sizeof(struct nuse_socket)); 79 | memset(nuse_fd_table[real_fd].nuse_sock, 0, sizeof(struct nuse_socket)); 80 | 81 | nuse_fd_table[real_fd].nuse_sock->kern_sock = kernel_socket; 82 | nuse_fd_table[real_fd].nuse_sock->refcnt++; 83 | nuse_fd_table[real_fd].real_fd = real_fd; 84 | 85 | return real_fd; 86 | } 87 | weak_alias(nuse_socket, socket); 88 | 89 | int nuse_close(int fd) 90 | { 91 | int ret = 0; 92 | 93 | if (!nuse_fd_table[fd].nuse_sock) { 94 | if (nuse_fd_table[fd].epoll_fd > 0) { 95 | free(nuse_fd_table[fd].epoll_fd); 96 | nuse_fd_table[fd].epoll_fd = NULL; 97 | return 0; 98 | } else if (nuse_fd_table[fd].real_fd > 0) { 99 | if (!host_close) nuse_hostcall_init(); 100 | return host_close(nuse_fd_table[fd].real_fd); 101 | } 102 | return EBADF; 103 | } 104 | 105 | if (--nuse_fd_table[fd].nuse_sock->refcnt > 0) { 106 | goto end; 107 | } 108 | 109 | ret = g_exported->sock_close(nuse_fd_table[fd].nuse_sock->kern_sock); 110 | if (ret < 0) 111 | errno = -ret; 112 | 113 | end: 114 | nuse_fd_table[fd].nuse_sock = 0; 115 | host_close(nuse_fd_table[fd].real_fd); 116 | return ret; 117 | } 118 | weak_alias(nuse_close, close); 119 | 120 | ssize_t nuse_recvmsg(int fd, struct msghdr *msghdr, int flags) 121 | { 122 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 123 | ssize_t ret; 124 | 125 | if (nuse_fd_table[fd].nuse_sock->flags & O_NONBLOCK) 126 | flags |= MSG_DONTWAIT; 127 | ret = g_exported->sock_recvmsg(kernel_socket, msghdr, flags); 128 | if (ret < 0) 129 | errno = -ret; 130 | return ret; 131 | } 132 | weak_alias(nuse_recvmsg, recvmsg); 133 | 134 | /* XXX: timeout is not implemented. */ 135 | int nuse_recvmmsg(int fd, struct mmsghdr *msgvec, unsigned int vlen, 136 | int flags, const struct timespec *timeout) 137 | { 138 | int err, datagrams; 139 | struct mmsghdr *entry; 140 | 141 | datagrams = 0; 142 | entry = msgvec; 143 | err = 0; 144 | 145 | while (datagrams < vlen) { 146 | err = nuse_recvmsg(fd, 147 | (struct msghdr *)entry, 148 | flags); 149 | if (err < 0) 150 | break; 151 | entry->msg_len = err; 152 | ++entry; 153 | ++datagrams; 154 | } 155 | 156 | /* We only return an error if no datagrams were able to be recvmmsg */ 157 | if (datagrams != 0) 158 | return datagrams; 159 | 160 | return err; 161 | } 162 | /* 163 | * FIXME: recvmmsg has different prototypes in different libc(s) ? 164 | * such as recvmmsg(...., const struct timespec *) or 165 | * recvmmsg(...., struct timespec *) etc. 166 | * so disable weak alias for a while. 167 | * 168 | */ 169 | #if 0 170 | weak_alias(nuse_recvmmsg, recvmmsg); 171 | weak_alias(nuse_recvmmsg, __recvmmsg); 172 | #endif 173 | 174 | ssize_t nuse_sendmsg(int fd, const struct msghdr *msghdr, int flags) 175 | { 176 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 177 | ssize_t ret; 178 | 179 | if (nuse_fd_table[fd].nuse_sock->flags & O_NONBLOCK) 180 | flags |= MSG_DONTWAIT; 181 | ret = g_exported->sock_sendmsg(kernel_socket, msghdr, flags); 182 | if (ret < 0) 183 | errno = -ret; 184 | return ret; 185 | } 186 | weak_alias(nuse_sendmsg, sendmsg); 187 | 188 | int nuse_sendmmsg(int fd, struct mmsghdr *msgvec, unsigned int vlen, 189 | int flags) 190 | { 191 | int err, datagrams; 192 | struct mmsghdr *entry; 193 | 194 | datagrams = 0; 195 | entry = msgvec; 196 | err = 0; 197 | 198 | while (datagrams < vlen) { 199 | err = nuse_sendmsg(fd, 200 | (struct msghdr *)entry, 201 | flags); 202 | if (err < 0) 203 | break; 204 | entry->msg_len = err; 205 | ++entry; 206 | ++datagrams; 207 | } 208 | 209 | /* We only return an error if no datagrams were able to be sent */ 210 | if (datagrams != 0) 211 | return datagrams; 212 | 213 | return err; 214 | } 215 | weak_alias(nuse_sendmmsg, sendmmsg); 216 | weak_alias(nuse_sendmmsg, __sendmmsg); 217 | 218 | int nuse_getsockname(int fd, struct sockaddr *name, socklen_t *namelen) 219 | { 220 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 221 | int ret; 222 | 223 | ret = g_exported->sock_getsockname(kernel_socket, name, (int *)namelen); 224 | return ret; 225 | } 226 | weak_alias(nuse_getsockname, getsockname); 227 | 228 | int nuse_getpeername(int fd, struct sockaddr *name, socklen_t *namelen) 229 | { 230 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 231 | int ret; 232 | 233 | ret = g_exported->sock_getsockname(kernel_socket, name, (int *)namelen); 234 | return ret; 235 | } 236 | weak_alias(nuse_getpeername, getpeername); 237 | 238 | int nuse_bind(int fd, const struct sockaddr *name, socklen_t namelen) 239 | { 240 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 241 | int ret; 242 | 243 | ret = g_exported->sock_bind(kernel_socket, name, namelen); 244 | return ret; 245 | } 246 | weak_alias(nuse_bind, bind); 247 | 248 | int nuse_connect(int fd, const struct sockaddr *addr, socklen_t len) 249 | { 250 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 251 | int ret; 252 | 253 | ret = g_exported->sock_connect(kernel_socket, addr, len, 254 | nuse_fd_table[fd].nuse_sock->flags); 255 | return ret; 256 | } 257 | weak_alias(nuse_connect, connect); 258 | 259 | int nuse_listen(int fd, int v1) 260 | { 261 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 262 | int retval; 263 | 264 | retval = g_exported->sock_listen(kernel_socket, v1); 265 | return retval; 266 | } 267 | weak_alias(nuse_listen, listen); 268 | 269 | #if 0 270 | int lib_sock_shutdown(struct SimSocket *socket, int how) 271 | { 272 | struct socket *sock = (struct socket *)socket; 273 | int retval = sock->ops->shutdown(sock, how); 274 | 275 | return retval; 276 | } 277 | #endif 278 | 279 | int nuse_accept4(int fd, struct sockaddr *addr, int *addrlen, int flags) 280 | { 281 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 282 | struct SimSocket *new_socket = NULL; 283 | int retval, real_fd; 284 | 285 | retval = g_exported->sock_accept(kernel_socket, &new_socket, flags); 286 | if (retval < 0) { 287 | errno = -retval; 288 | free(new_socket); 289 | perror("accept err"); 290 | return -1; 291 | } 292 | if (addr != 0) { 293 | retval = g_exported->sock_getpeername(new_socket, addr, (int *)addrlen); 294 | if (retval < 0) { 295 | errno = -retval; 296 | g_exported->sock_close(new_socket); 297 | perror("getpeername err"); 298 | return -1; 299 | } 300 | } 301 | 302 | real_fd = host_open("/", O_RDONLY, 0); 303 | nuse_fd_table[real_fd].nuse_sock = malloc(sizeof(struct nuse_socket)); 304 | memset(nuse_fd_table[real_fd].nuse_sock, 0, sizeof(struct nuse_socket)); 305 | 306 | nuse_fd_table[real_fd].nuse_sock->kern_sock = new_socket; 307 | nuse_fd_table[real_fd].nuse_sock->refcnt++; 308 | nuse_fd_table[real_fd].real_fd = real_fd; 309 | return real_fd; 310 | } 311 | int nuse_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 312 | { 313 | return nuse_accept4(fd, addr, (int *)addrlen, 314 | nuse_fd_table[fd].nuse_sock->flags); 315 | } 316 | weak_alias(nuse_accept, accept); 317 | 318 | ssize_t nuse_write(int fd, const void *buf, size_t count) 319 | { 320 | if (!nuse_fd_table[fd].nuse_sock) { 321 | if (!host_write) nuse_hostcall_init(); 322 | return host_write(nuse_fd_table[fd].real_fd, buf, count); 323 | } 324 | 325 | struct msghdr msg; 326 | struct iovec iov; 327 | 328 | msg.msg_control = 0; 329 | msg.msg_controllen = 0; 330 | msg.msg_iovlen = 1; 331 | msg.msg_iov = &iov; 332 | iov.iov_len = count; 333 | iov.iov_base = (void *)buf; 334 | msg.msg_name = 0; 335 | msg.msg_namelen = 0; 336 | return nuse_sendmsg(fd, &msg, 0); 337 | } 338 | weak_alias(nuse_write, write); 339 | 340 | ssize_t nuse_writev(int fd, const struct iovec *iov, int count) 341 | { 342 | if (!nuse_fd_table[fd].nuse_sock) { 343 | if (!host_writev) nuse_hostcall_init(); 344 | return host_writev(nuse_fd_table[fd].real_fd, iov, count); 345 | } 346 | 347 | struct msghdr msg; 348 | 349 | msg.msg_control = 0; 350 | msg.msg_controllen = 0; 351 | msg.msg_iovlen = 1; 352 | msg.msg_iov = (struct iovec *)iov; 353 | msg.msg_name = 0; 354 | msg.msg_namelen = 0; 355 | return nuse_sendmsg(fd, &msg, 0); 356 | } 357 | weak_alias(nuse_writev, writev); 358 | 359 | ssize_t nuse_sendto(int fd, const void *buf, size_t len, int flags, 360 | const struct sockaddr *dest_addr, unsigned int addrlen) 361 | { 362 | struct msghdr msg; 363 | struct iovec iov; 364 | ssize_t retval; 365 | 366 | memset(&msg, 0, sizeof(struct msghdr)); 367 | msg.msg_control = 0; 368 | msg.msg_controllen = 0; 369 | msg.msg_iovlen = 1; 370 | msg.msg_iov = &iov; 371 | iov.iov_len = len; 372 | iov.iov_base = (void *)buf; 373 | msg.msg_name = (void *)dest_addr; 374 | msg.msg_namelen = addrlen; 375 | retval = nuse_sendmsg(fd, &msg, flags); 376 | return retval; 377 | } 378 | weak_alias(nuse_sendto, sendto); 379 | 380 | ssize_t nuse_send(int fd, const void *buf, size_t len, int flags) 381 | { 382 | return nuse_sendto(fd, buf, len, flags, 0, 0); 383 | } 384 | weak_alias(nuse_send, send); 385 | 386 | ssize_t nuse_read(int fd, void *buf, size_t count) 387 | { 388 | if (!nuse_fd_table[fd].nuse_sock) { 389 | if (!host_read) nuse_hostcall_init(); 390 | return host_read(nuse_fd_table[fd].real_fd, buf, count); 391 | } 392 | 393 | struct msghdr msg; 394 | struct iovec iov; 395 | ssize_t retval; 396 | 397 | msg.msg_control = 0; 398 | msg.msg_controllen = 0; 399 | msg.msg_iovlen = 1; 400 | msg.msg_iov = &iov; 401 | iov.iov_len = count; 402 | iov.iov_base = buf; 403 | msg.msg_name = 0; 404 | msg.msg_namelen = 0; 405 | retval = nuse_recvmsg(fd, &msg, 0); 406 | return retval; 407 | } 408 | weak_alias(nuse_read, read); 409 | 410 | #ifdef _K_SS_MAXSIZE 411 | #define SOCK_MAX_ADDRESS_SIZE _K_SS_MAXSIZE 412 | #else 413 | #define SOCK_MAX_ADDRESS_SIZE 128 414 | #endif 415 | ssize_t nuse_recvfrom(int fd, void *buf, size_t len, int flags, 416 | struct sockaddr *from, socklen_t *fromlen) 417 | { 418 | uint8_t address[SOCK_MAX_ADDRESS_SIZE]; 419 | struct msghdr msg; 420 | struct iovec iov; 421 | ssize_t retval; 422 | 423 | msg.msg_control = 0; 424 | msg.msg_controllen = 0; 425 | msg.msg_iovlen = 1; 426 | msg.msg_iov = &iov; 427 | iov.iov_len = len; 428 | iov.iov_base = buf; 429 | msg.msg_name = address; 430 | msg.msg_namelen = SOCK_MAX_ADDRESS_SIZE; 431 | retval = nuse_recvmsg(fd, &msg, flags); 432 | if (retval != -1 && from != 0) { 433 | if (*fromlen < msg.msg_namelen) { 434 | errno = EINVAL; 435 | return -1; 436 | } else { 437 | *fromlen = msg.msg_namelen; 438 | memcpy(from, msg.msg_name, msg.msg_namelen); 439 | } 440 | } 441 | return retval; 442 | } 443 | weak_alias(nuse_recvfrom, recvfrom); 444 | 445 | ssize_t nuse_recv(int fd, void *buf, size_t count, int flags) 446 | { 447 | return nuse_recvfrom(fd, buf, count, flags, 0, 0); 448 | } 449 | weak_alias(nuse_recv, recv); 450 | 451 | int nuse_setsockopt(int fd, int level, int optname, 452 | const void *optval, socklen_t optlen) 453 | { 454 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 455 | int retval = g_exported->sock_setsockopt(kernel_socket, level, optname, optval, 456 | optlen); 457 | if (retval < 0) { 458 | errno = -retval; 459 | return -1; 460 | } 461 | return retval; 462 | } 463 | weak_alias(nuse_setsockopt, setsockopt); 464 | 465 | int nuse_getsockopt(int fd, int level, int optname, 466 | void *optval, socklen_t *optlen) 467 | { 468 | struct SimSocket *kernel_socket = nuse_fd_table[fd].nuse_sock->kern_sock; 469 | int retval = g_exported->sock_getsockopt(kernel_socket, level, optname, optval, 470 | (int *)optlen); 471 | if (retval < 0) { 472 | errno = -retval; 473 | return -1; 474 | } 475 | return retval; 476 | } 477 | weak_alias(nuse_getsockopt, getsockopt); 478 | 479 | int nuse_ioctl(int fd, int request, ...) 480 | { 481 | va_list vl; 482 | char *argp; 483 | 484 | va_start(vl, request); 485 | argp = va_arg(vl, char *); 486 | va_end(vl); 487 | 488 | if (!nuse_fd_table[fd].nuse_sock) { 489 | if (!host_ioctl) nuse_hostcall_init(); 490 | return host_ioctl(nuse_fd_table[fd].real_fd, request, argp); 491 | } 492 | 493 | return g_exported->sock_ioctl(nuse_fd_table[fd].nuse_sock->kern_sock, request, argp); 494 | } 495 | weak_alias(nuse_ioctl, ioctl); 496 | 497 | int fcntl(int fd, int cmd, ... /* arg */ ) 498 | { 499 | va_list vl; 500 | int *argp; 501 | long err = -EINVAL; 502 | 503 | va_start(vl, cmd); 504 | argp = va_arg(vl, int *); 505 | va_end(vl); 506 | 507 | if (!nuse_fd_table[fd].nuse_sock) { 508 | if (!host_fcntl) nuse_hostcall_init(); 509 | return host_fcntl(nuse_fd_table[fd].real_fd, cmd, argp); 510 | } 511 | 512 | /* nuse routine */ 513 | switch (cmd) { 514 | case F_DUPFD: 515 | err = host_fcntl(nuse_fd_table[fd].real_fd, cmd, argp); 516 | if (err == -1) { 517 | return err; 518 | } 519 | nuse_fd_table[err].real_fd = err; 520 | nuse_fd_table[err].nuse_sock = nuse_fd_table[fd].nuse_sock; 521 | nuse_fd_table[err].epoll_fd = nuse_fd_table[fd].epoll_fd; 522 | nuse_fd_table[err].nuse_sock->refcnt++; 523 | 524 | break; 525 | case F_GETFL: 526 | return nuse_fd_table[fd].nuse_sock->flags; 527 | break; 528 | case F_SETFL: 529 | nuse_fd_table[fd].nuse_sock->flags = (intptr_t)argp; 530 | return 0; 531 | break; 532 | default: 533 | break; 534 | } 535 | return err; 536 | } 537 | 538 | int open(const char *pathname, int flags, ...) 539 | { 540 | va_list vl; 541 | 542 | va_start(vl, flags); 543 | 544 | if (!host_open) nuse_hostcall_init(); 545 | int real_fd = host_open(pathname, flags, va_arg(vl, mode_t)); 546 | va_end(vl); 547 | 548 | if (real_fd < 0) { 549 | perror("open"); 550 | return -1; 551 | } 552 | nuse_fd_table[real_fd].real_fd = real_fd; 553 | return real_fd; 554 | } 555 | 556 | int open64(const char *pathname, int flags, mode_t mode) 557 | { 558 | if (!host_open64) nuse_hostcall_init(); 559 | int real_fd = host_open64(pathname, flags, mode); 560 | 561 | /* printf ("%d, %llu %s %s\n", nuse_fd_table[curfd].real_fd, curfd, pathname, __FUNCTION__); */ 562 | if (real_fd < 0) { 563 | perror("open64"); 564 | return -1; 565 | } 566 | nuse_fd_table[real_fd].real_fd = real_fd; 567 | return real_fd; 568 | } 569 | 570 | int pipe(int pipefd[2]) 571 | { 572 | if (!host_pipe) nuse_hostcall_init(); 573 | int ret = host_pipe(pipefd); 574 | 575 | if (ret == -1) 576 | return ret; 577 | 578 | nuse_fd_table[pipefd[0]].real_fd = pipefd[0]; 579 | nuse_fd_table[pipefd[1]].real_fd = pipefd[1]; 580 | return ret; 581 | } 582 | 583 | 584 | /* From librumphijack/hijack.c */ 585 | struct pollarg { 586 | struct pollfd *pfds; 587 | nfds_t nfds; 588 | const struct timespec *ts; 589 | const sigset_t *sigmask; 590 | int pipefd; 591 | int errnum; 592 | }; 593 | 594 | static void * 595 | hostpoll(void *arg) 596 | { 597 | struct pollarg *parg = arg; 598 | intptr_t rv; 599 | int to; 600 | 601 | to = parg->ts ? timespec_to_ns(parg->ts) / NSEC_PER_MSEC : -1; 602 | if (!host_poll) nuse_hostcall_init(); 603 | rv = host_poll(parg->pfds, parg->nfds, to); 604 | if (rv == -1) 605 | parg->errnum = errno; 606 | lib_assert(write(parg->pipefd, &rv, sizeof(rv)) > 0); 607 | 608 | return (void *)rv; 609 | } 610 | 611 | /* 612 | * poll is easy as long as the call comes in the fds only in one 613 | * kernel. otherwise its quite tricky... 614 | */ 615 | 616 | static int 617 | do_host_nuse_poll(struct pollfd *fds, nfds_t nfds, struct timespec *ts) 618 | { 619 | /* copied from librumphijack/hijack.c */ 620 | struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 621 | int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 622 | struct pollarg parg; 623 | void *trv_val; 624 | int sverrno = 0, rv_rump, rv_host, errno_rump, errno_host; 625 | pthread_t pt; 626 | nfds_t i; 627 | int rv; 628 | 629 | /* 630 | * ok, this is where it gets tricky. We must support 631 | * this since it's a very common operation in certain 632 | * types of software (telnet, netcat, etc). We allocate 633 | * two vectors and run two poll commands in separate 634 | * threads. Whichever returns first "wins" and the 635 | * other kernel's fds won't show activity. 636 | */ 637 | rv = -1; 638 | 639 | /* allocate full vector for O(n) joining after call */ 640 | pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 641 | if (!pfd_host) 642 | goto out; 643 | pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 644 | if (!pfd_rump) { 645 | goto out; 646 | } 647 | 648 | /* 649 | * then, open two pipes, one for notifications 650 | * to each kernel. 651 | * 652 | * At least the rump pipe should probably be 653 | * cached, along with the helper threads. This 654 | * should give a microbenchmark improvement (haven't 655 | * experienced a macro-level problem yet, though). 656 | */ 657 | if ((rv = pipe(rpipe)) == -1) { 658 | sverrno = errno; 659 | } 660 | if (rv == 0 && (rv = pipe(hpipe)) == -1) { 661 | sverrno = errno; 662 | } 663 | 664 | /* split vectors (or signal errors) */ 665 | for (i = 0; i < nfds; i++) { 666 | int fd; 667 | 668 | fds[i].revents = 0; 669 | if (fds[i].fd == -1) { 670 | pfd_host[i].fd = -1; 671 | pfd_rump[i].fd = -1; 672 | } else if (nuse_fd_table[fds[i].fd].nuse_sock) { 673 | pfd_host[i].fd = -1; 674 | fd = fds[i].fd; 675 | if (fd == rpipe[0] || fd == rpipe[1]) { 676 | fds[i].revents = POLLNVAL; 677 | if (rv != -1) 678 | rv++; 679 | } 680 | pfd_rump[i].fd = fd; 681 | pfd_rump[i].events = fds[i].events; 682 | } else { 683 | pfd_rump[i].fd = -1; 684 | fd = fds[i].fd; 685 | if (fd == hpipe[0] || fd == hpipe[1]) { 686 | fds[i].revents = POLLNVAL; 687 | if (rv != -1) 688 | rv++; 689 | } 690 | pfd_host[i].fd = fd; 691 | pfd_host[i].events = fds[i].events; 692 | } 693 | pfd_rump[i].revents = pfd_host[i].revents = 0; 694 | } 695 | if (rv) { 696 | goto out; 697 | } 698 | 699 | pfd_host[nfds].fd = hpipe[0]; 700 | pfd_host[nfds].events = POLLIN; 701 | pfd_rump[nfds].fd = rpipe[0]; 702 | pfd_rump[nfds].events = POLLIN; 703 | 704 | /* 705 | * then, create a thread to do host part and meanwhile 706 | * do rump kernel part right here 707 | */ 708 | 709 | parg.pfds = pfd_host; 710 | parg.nfds = nfds+1; 711 | parg.ts = ts; 712 | /* parg.sigmask = sigmask; */ 713 | parg.pipefd = rpipe[1]; 714 | host_pthread_create(&pt, NULL, hostpoll, &parg); 715 | 716 | rv_rump = nuse_poll(pfd_rump, nfds+1, ts); 717 | errno_rump = errno; 718 | lib_assert(write(hpipe[1], &rv, sizeof(rv)) > 0); 719 | host_pthread_join(pt, &trv_val); 720 | rv_host = (int)(intptr_t)trv_val; 721 | errno_host = parg.errnum; 722 | 723 | /* strip cross-thread notification from real results */ 724 | if (rv_host > 0 && pfd_host[nfds].revents & POLLIN) { 725 | rv_host--; 726 | } 727 | if (rv_rump > 0 && pfd_rump[nfds].revents & POLLIN) { 728 | rv_rump--; 729 | } 730 | 731 | /* then merge the results into what's reported to the caller */ 732 | if (rv_rump > 0 || rv_host > 0) { 733 | /* SUCCESS */ 734 | 735 | rv = 0; 736 | if (rv_rump > 0) { 737 | for (i = 0; i < nfds; i++) { 738 | if (pfd_rump[i].fd != -1) 739 | fds[i].revents 740 | = pfd_rump[i].revents; 741 | } 742 | rv += rv_rump; 743 | } 744 | if (rv_host > 0) { 745 | for (i = 0; i < nfds; i++) { 746 | if (pfd_host[i].fd != -1) 747 | fds[i].revents 748 | = pfd_host[i].revents; 749 | } 750 | rv += rv_host; 751 | } 752 | lib_assert(rv > 0); 753 | sverrno = 0; 754 | } else if (rv_rump == -1 || rv_host == -1) { 755 | /* ERROR */ 756 | 757 | /* just pick one kernel at "random" */ 758 | rv = -1; 759 | if (rv_host == -1) { 760 | sverrno = errno_host; 761 | } else if (rv_rump == -1) { 762 | sverrno = errno_rump; 763 | } 764 | } else { 765 | /* TIMEOUT */ 766 | 767 | rv = 0; 768 | lib_assert(rv_rump == 0 && rv_host == 0); 769 | } 770 | 771 | out: 772 | if (rpipe[0] != -1) 773 | host_close(rpipe[0]); 774 | if (rpipe[1] != -1) 775 | host_close(rpipe[1]); 776 | if (hpipe[0] != -1) 777 | host_close(hpipe[0]); 778 | if (hpipe[1] != -1) 779 | host_close(hpipe[1]); 780 | free(pfd_host); 781 | free(pfd_rump); 782 | errno = sverrno; 783 | 784 | return rv; 785 | } 786 | 787 | /* copied from librumphijack/hijack.c */ 788 | static void 789 | checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 790 | { 791 | nfds_t i; 792 | 793 | for (i = 0; i < nfds; i++) { 794 | if (fds[i].fd == -1) 795 | continue; 796 | 797 | if (nuse_fd_table[fds[i].fd].nuse_sock) 798 | (*rumpcall)++; 799 | else 800 | (*hostcall)++; 801 | } 802 | } 803 | 804 | struct poll_table_ref 805 | { 806 | int ret; 807 | void *opaque; 808 | }; 809 | 810 | int 811 | nuse_poll(struct pollfd *fds, unsigned int nfds, struct timespec *end_time) 812 | { 813 | struct poll_table_ref table, *current_table; 814 | struct poll_table_ref null_table = {0}; 815 | int count = 0, error; 816 | int i, mask, timed_out = 0; 817 | pthread_cond_t condvar; 818 | pthread_mutex_t cond_mutex; 819 | struct timespec ts; 820 | 821 | error = pthread_cond_init(&condvar, NULL); 822 | lib_assert(error == 0); 823 | error = pthread_mutex_init(&cond_mutex, NULL); 824 | lib_assert(error == 0); 825 | 826 | /* initialize all outgoing events. */ 827 | for (i = 0; i < nfds; ++i) 828 | fds[i].revents = 0; 829 | 830 | table.opaque = NULL; 831 | current_table = &table; 832 | if (end_time && end_time->tv_sec == 0 && 833 | end_time->tv_nsec == 0) { 834 | current_table = NULL; 835 | timed_out = 1; 836 | } 837 | 838 | /* call (sim) kernel side */ 839 | for (;;) { 840 | for (i = 0; i < nfds; ++i) { 841 | struct SimSocket *sock; 842 | 843 | /* host's fd */ 844 | if (fds[i].fd == -1) 845 | continue; 846 | if (!nuse_fd_table[fds[i].fd].nuse_sock) 847 | continue; 848 | /* nuse's fd */ 849 | sock = nuse_fd_table[fds[i].fd].nuse_sock->kern_sock; 850 | if (current_table) { 851 | current_table->ret = fds[i].events | POLLERR | POLLHUP; 852 | current_table->opaque = &condvar; 853 | } 854 | else 855 | current_table = &null_table; 856 | 857 | g_exported->sock_poll((struct SimSocket *)sock, current_table); 858 | 859 | mask = current_table->ret; 860 | mask &= (fds[i].events | POLLERR | POLLHUP); 861 | fds[i].revents = mask; 862 | if (mask) { 863 | count++; 864 | current_table = NULL; 865 | } 866 | } 867 | 868 | current_table = NULL; 869 | if (count || timed_out) { 870 | break; 871 | } 872 | 873 | if (timed_out == 0) { 874 | /* infinite wait */ 875 | if (end_time == NULL) { 876 | pthread_mutex_lock(&cond_mutex); 877 | pthread_cond_wait(&condvar, &cond_mutex); 878 | pthread_mutex_unlock(&cond_mutex); 879 | } 880 | /* sleeped wait */ 881 | else { 882 | clock_gettime(CLOCK_REALTIME, &ts); 883 | ts.tv_sec += end_time->tv_sec; 884 | ts.tv_nsec += end_time->tv_nsec; 885 | pthread_mutex_lock(&cond_mutex); 886 | error = pthread_cond_timedwait(&condvar, &cond_mutex, &ts); 887 | pthread_mutex_unlock(&cond_mutex); 888 | } 889 | timed_out = 1; 890 | } 891 | } /* for loop */ 892 | 893 | pthread_mutex_destroy(&cond_mutex); 894 | pthread_cond_destroy(&condvar); 895 | g_exported->sock_pollfreewait(table.opaque); 896 | return count; 897 | } 898 | 899 | int 900 | poll(struct pollfd *fds, nfds_t nfds, int timeout) 901 | { 902 | struct timespec *to = NULL, end_time; 903 | int hostcall = 0, rumpcall = 0; 904 | int count; 905 | 906 | if (timeout >= 0) { 907 | end_time.tv_sec = timeout / MSEC_PER_SEC; 908 | end_time.tv_nsec = NSEC_PER_MSEC * (timeout % MSEC_PER_SEC); 909 | to = &end_time; 910 | } 911 | 912 | checkpoll(fds, nfds, &hostcall, &rumpcall); 913 | if (hostcall && rumpcall) { 914 | /* this is the case to write carefully between host and nuse */ 915 | /* see rump/hijack.c for more detail */ 916 | count = do_host_nuse_poll(fds, nfds, to); 917 | } 918 | else { 919 | if (hostcall) { 920 | count = host_poll(fds, nfds, timeout); 921 | } 922 | else { 923 | count = nuse_poll(fds, nfds, to); 924 | } 925 | } 926 | 927 | return count; 928 | } 929 | weak_alias (poll, __poll); 930 | 931 | int 932 | select(int nfds, fd_set *readfds, fd_set *writefds, 933 | fd_set *exceptfds, struct timeval *timeout) 934 | { 935 | struct pollfd pollFd[nfds]; 936 | int fd; 937 | 938 | memset(pollFd, 0, sizeof(struct pollfd) * nfds); 939 | 940 | if (nfds == -1) { 941 | errno = EINVAL; 942 | return -1; 943 | } 944 | if (readfds == 0 && writefds == 0 && exceptfds == 0) { 945 | errno = EINVAL; 946 | return -1; 947 | } 948 | if (timeout) { 949 | if (timeout->tv_sec < 0 || timeout->tv_usec < 0) { 950 | errno = EINVAL; 951 | return -1; 952 | } 953 | } 954 | 955 | for (fd = 0; fd < nfds; fd++) { 956 | int event = 0; 957 | 958 | if (readfds != 0 && FD_ISSET(fd, readfds)) 959 | event |= POLLIN; 960 | if (writefds != 0 && FD_ISSET(fd, writefds)) 961 | event |= POLLOUT; 962 | if (exceptfds != 0 && FD_ISSET(fd, exceptfds)) 963 | event |= POLLPRI; 964 | 965 | pollFd[fd].events = event; 966 | pollFd[fd].revents = 0; 967 | 968 | if (event) { 969 | if (!nuse_fd_table[fd].nuse_sock) { 970 | errno = EBADF; 971 | return -1; 972 | } 973 | pollFd[fd].fd = fd; 974 | } 975 | else { 976 | pollFd[fd].fd = -1; 977 | } 978 | } 979 | 980 | /* select(2): */ 981 | /* Some code calls select() with all three sets empty, nfds zero, 982 | and a non-NULL timeout as a fairly portable way to sleep with 983 | subsecond precision. */ 984 | /* 130825: this condition will be passed by dce_poll () */ 985 | 986 | int to_msec = -1; 987 | 988 | if (timeout) 989 | to_msec = timeout->tv_sec * 1000 + timeout->tv_usec / 1000; 990 | 991 | int ret = poll(pollFd, nfds, to_msec); 992 | 993 | if (readfds) 994 | FD_ZERO(readfds); 995 | if (writefds) 996 | FD_ZERO(writefds); 997 | if (exceptfds) 998 | FD_ZERO(exceptfds); 999 | 1000 | if (ret > 0) { 1001 | ret = 0; 1002 | for (fd = 0; fd < nfds; fd++) { 1003 | if (readfds && 1004 | ((POLLIN & pollFd[fd].revents) || 1005 | (POLLHUP & pollFd[fd].revents) 1006 | || (POLLERR & pollFd[fd].revents))) { 1007 | FD_SET(pollFd[fd].fd, readfds); 1008 | ret++; 1009 | } 1010 | if (writefds && (POLLOUT & pollFd[fd].revents)) { 1011 | FD_SET(pollFd[fd].fd, writefds); 1012 | ret++; 1013 | } 1014 | if (exceptfds && (POLLPRI & pollFd[fd].revents)) { 1015 | FD_SET(pollFd[fd].fd, exceptfds); 1016 | ret++; 1017 | } 1018 | } 1019 | } 1020 | return ret; 1021 | } 1022 | 1023 | int 1024 | epoll_create(int size) 1025 | { 1026 | struct epoll_fd *epfd = malloc(sizeof(struct epoll_fd)); 1027 | int real_fd; 1028 | 1029 | memset(epfd, 0, sizeof(struct epoll_fd)); 1030 | real_fd = host_open("/", O_RDONLY, 0); 1031 | nuse_fd_table[real_fd].real_fd = real_fd; 1032 | nuse_fd_table[real_fd].epoll_fd = epfd; 1033 | return real_fd; 1034 | } 1035 | 1036 | int 1037 | epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event) 1038 | { 1039 | struct epoll_fd *prev = NULL, *epfd = nuse_fd_table[epollfd].epoll_fd; 1040 | 1041 | if (!epfd) 1042 | return EBADF; 1043 | 1044 | struct epoll_event *ev; 1045 | 1046 | switch (op) { 1047 | case EPOLL_CTL_ADD: 1048 | ev = (struct epoll_event *)malloc(sizeof(struct epoll_event)); 1049 | memset(ev, 0, sizeof(struct epoll_event)); 1050 | memcpy(ev, event, sizeof(struct epoll_event)); 1051 | 1052 | if (!epfd->ev) { 1053 | epfd->ev = ev; 1054 | epfd->fd = fd; 1055 | } else { 1056 | prev = epfd; 1057 | while (epfd->next) { 1058 | prev = epfd; 1059 | epfd = epfd->next; 1060 | } 1061 | 1062 | epfd->next = malloc(sizeof(struct epoll_fd)); 1063 | memset(epfd->next, 0, sizeof(struct epoll_fd)); 1064 | epfd->next->ev = ev; 1065 | epfd->next->fd = fd; 1066 | } 1067 | break; 1068 | case EPOLL_CTL_MOD: 1069 | while (epfd && epfd->fd != fd) 1070 | epfd = epfd->next; 1071 | ev = epfd->ev; 1072 | memcpy(ev, event, sizeof(struct epoll_event)); 1073 | epfd->fd = fd; 1074 | break; 1075 | case EPOLL_CTL_DEL: 1076 | while (epfd && epfd->fd != fd) { 1077 | prev = epfd; 1078 | epfd = epfd->next; 1079 | } 1080 | if (!epfd) { 1081 | printf("NUSE: no fd found for EPOLL_CTL_DEL (fd=%d)\n", fd); 1082 | errno = ENOENT; 1083 | return -1; 1084 | } 1085 | ev = epfd->ev; 1086 | epfd->fd = -1; 1087 | free(ev); 1088 | if (prev) { 1089 | prev->next = epfd->next; 1090 | epfd = prev; 1091 | } 1092 | 1093 | break; 1094 | default: 1095 | break; 1096 | } 1097 | 1098 | return 0; 1099 | } 1100 | 1101 | int 1102 | epoll_wait(int epollfd, struct epoll_event *events, 1103 | int maxevents, int timeout) 1104 | { 1105 | struct epoll_fd *cur, *epfd = nuse_fd_table[epollfd].epoll_fd; 1106 | 1107 | if (!epfd) 1108 | return EBADF; 1109 | 1110 | struct pollfd pollFd[maxevents]; 1111 | int j = 0; 1112 | 1113 | memset(pollFd, 0, sizeof(struct pollfd) * maxevents); 1114 | for (j = 0; j < maxevents; j++) { 1115 | pollFd[j].fd = -1; 1116 | } 1117 | j = 0; 1118 | 1119 | for (cur = epfd; cur && cur->ev; cur = cur->next) { 1120 | struct epoll_event *ev = cur->ev; 1121 | int pevent = 0; 1122 | if (ev->events & EPOLLIN) 1123 | pevent |= POLLIN; 1124 | if (ev->events & EPOLLOUT) 1125 | pevent |= POLLOUT; 1126 | 1127 | pollFd[j].events = pevent; 1128 | pollFd[j].fd = cur->fd; 1129 | pollFd[j++].revents = 0; 1130 | } 1131 | 1132 | int pollRet = poll(pollFd, maxevents, timeout); 1133 | if (pollRet > 0) { 1134 | pollRet = 0; 1135 | /* FIXME: c10k... far fast */ 1136 | for (j = 0; j < maxevents; j++) { 1137 | int fd = pollFd[j].fd; 1138 | struct epoll_event *rev = NULL; 1139 | 1140 | for (cur = epfd; cur && epfd->ev; cur = cur->next) { 1141 | rev = cur->ev; 1142 | if (cur->fd == fd) 1143 | break; 1144 | } 1145 | 1146 | if ((POLLIN & pollFd[j].revents) || 1147 | (POLLHUP & pollFd[j].revents) 1148 | || (POLLERR & pollFd[j].revents)) { 1149 | memcpy(events, rev, sizeof(struct epoll_event)); 1150 | events->events = pollFd[j].revents; 1151 | printf("epoll woke up for read with %d\n", fd); 1152 | pollRet++; 1153 | events++; 1154 | } 1155 | if (POLLOUT & pollFd[j].revents) { 1156 | memcpy(events, rev, sizeof(struct epoll_event)); 1157 | events->events = pollFd[j].revents; 1158 | /* *events = *epollFd->evs[fd]; */ 1159 | /* events->data.fd = fd; */ 1160 | printf("epoll woke up for write with %d\n", fd); 1161 | pollRet++; 1162 | events++; 1163 | } 1164 | if (POLLPRI & pollFd[j].revents) { 1165 | memcpy(events, rev, sizeof(struct epoll_event)); 1166 | events->events = pollFd[j].revents; 1167 | printf("epoll woke up for other with %d\n", fd); 1168 | /* *events = *epollFd->evs[fd]; */ 1169 | /* events->data.fd = fd; */ 1170 | pollRet++; 1171 | events++; 1172 | } 1173 | 1174 | } 1175 | } 1176 | 1177 | return pollRet; 1178 | } 1179 | 1180 | --------------------------------------------------------------------------------