├── scripts └── postinst ├── src ├── Kbuild.default ├── linux.mk ├── utils.c ├── CMakeLists.txt ├── udpd.c ├── ca_ctl.c ├── ca_proto.c ├── ca.h ├── ca_conn.c └── ca_core.c ├── .gitignore ├── LICENSE ├── CMakeLists.txt └── README.md /scripts/postinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # for post install 3 | depmod -a 4 | -------------------------------------------------------------------------------- /src/Kbuild.default: -------------------------------------------------------------------------------- 1 | #hostprogs-y := udpd 2 | #always := $(hostprogs-y) 3 | #HOSTCFLAGS_udpd.o += -I$(objtree)/usr/include 4 | 5 | ip_vs_ca-objs := ca_core.o ca_conn.o ca_ctl.o ca_proto.o utils.o 6 | obj-m += ip_vs_ca.o 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.cmd 3 | .tmp_versions/ 4 | *.symvers 5 | *.o 6 | *.order 7 | *.unsigned 8 | *.mod.c 9 | *.d 10 | *.ko 11 | G* 12 | *.sh 13 | toa/ 14 | linux/ 15 | *.py 16 | *.pyc 17 | tmp/ 18 | udpd 19 | kernel/ 20 | build/ 21 | *.deb 22 | *.rpm 23 | *.cmake 24 | _CPack_Packages/ 25 | CMakeFiles/ 26 | CMakeCache.txt 27 | install_manifest.txt 28 | Kbuild 29 | Kbuild.ccflags 30 | Makefile 31 | ChangeLog 32 | -------------------------------------------------------------------------------- /src/linux.mk: -------------------------------------------------------------------------------- 1 | KERNEL_DIR = /lib/modules/`uname -r`/build 2 | MODULEDIR := $(shell pwd) 3 | 4 | 5 | .PHONY: modules start stop restart 6 | default: modules 7 | 8 | modules: 9 | make -C $(KERNEL_DIR) M=$(MODULEDIR) modules 10 | 11 | clean distclean: 12 | rm -f *.o *.mod.c .*.*.cmd *.ko *.ko.unsigned 13 | rm -rf .tmp_versions 14 | rm -f udpd *.order *.symvers .*.cmd 15 | 16 | start: 17 | insmod ./ip_vs_ca.ko 18 | 19 | stop: 20 | rmmod ip_vs_ca 21 | 22 | restart: 23 | remod ip_vs_ca && insmod ./ip_vs_ca.ko 24 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | /* 2 | * utils.c 3 | * Copyright (C) 2016 yubo@yubo.org 4 | * 2016-02-14 5 | */ 6 | 7 | #include 8 | #include 9 | #include "ca.h" 10 | 11 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0) 12 | #define sys_close ksys_close 13 | #endif 14 | 15 | unsigned long **find_sys_call_table(void) { 16 | 17 | unsigned long ptr; 18 | unsigned long *p; 19 | 20 | IP_VS_CA_DBG("Found the sys_call_table!!!\n"); 21 | for (ptr = (unsigned long)sys_close; 22 | ptr < (unsigned long)&loops_per_jiffy; 23 | ptr += sizeof(void *)) { 24 | 25 | p = (unsigned long *)ptr; 26 | 27 | if (p[__NR_close] == (unsigned long)sys_close) { 28 | IP_VS_CA_DBG("Found the sys_call_table!!! __NR_close[%d] sys_close[%lx]\n" 29 | " __NR_getpeername[%d] sct[__NR_getpeername][0x%lx]\n", 30 | __NR_close, 31 | (unsigned long)sys_close, 32 | __NR_getpeername, 33 | p[__NR_getpeername]); 34 | return (unsigned long **)p; 35 | } 36 | } 37 | 38 | return NULL; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, yubo 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the author nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | if(NOT DEFINED DISABLE_ICMP) 4 | message(STATUS "ENABLE ICMP") 5 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIP_VS_CA_ICMP") 6 | endif() 7 | 8 | if(DEFINED ENABLE_DEBUG) 9 | message(STATUS "ENABLE_DEBUG: ${ENABLE_DEBUG}") 10 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIP_VS_CA_DEBUG") 11 | endif() 12 | 13 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIP_VS_CA_VERSION=\\\"${CPACK_PACKAGE_VERSION}\\\"") 14 | 15 | file(WRITE "${CMAKE_CURRENT_SOURCE_DIR}/Kbuild.ccflags" 16 | "\nccflags-y := ${CMAKE_C_FLAGS}\n" ) 17 | 18 | add_custom_target(KMODFILES ALL 19 | DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ip_vs_ca.ko) 20 | 21 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/Kbuild" 22 | COMMAND cat 23 | "${CMAKE_CURRENT_SOURCE_DIR}/Kbuild.default" 24 | "${CMAKE_CURRENT_SOURCE_DIR}/Kbuild.ccflags" 25 | > "${CMAKE_CURRENT_SOURCE_DIR}/Kbuild" 26 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/Kbuild.ccflags" 27 | "${CMAKE_CURRENT_SOURCE_DIR}/Kbuild.default" 28 | COMMENT "Create Kbuild") 29 | 30 | # Build the module 31 | add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/ip_vs_ca.ko 32 | COMMAND make -C ${KERNEL_SRCDIR} M=${CMAKE_CURRENT_SOURCE_DIR} modules 33 | DEPENDS ca_core.c ca_conn.c ca_ctl.c ca_proto.c utils.c Kbuild 34 | COMMENT "Building ip_vs_ca.ko") 35 | 36 | 37 | # Installation of the module 38 | set(module_install_path /lib/modules/${KERNEL_RELEASE}/kernel/extra/ip_vs_ca) 39 | INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/ip_vs_ca.ko 40 | DESTINATION ${module_install_path} 41 | PERMISSIONS OWNER_READ OWNER_EXECUTE 42 | GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | project(ip_vs_ca C) 4 | 5 | execute_process(COMMAND uname -r 6 | OUTPUT_VARIABLE KERNEL_RELEASE 7 | OUTPUT_STRIP_TRAILING_WHITESPACE) 8 | 9 | find_path(KERNEL_SRCDIR Makefile PATHS 10 | /lib/modules/${KERNEL_RELEASE}/source 11 | /lib/modules/${KERNEL_RELEASE}/build) 12 | 13 | message(STATUS "Kernel release: ${KERNEL_RELEASE}") 14 | message(STATUS "Kernel source: ${KERNEL_SRCDIR}") 15 | 16 | set(CPACK_GENERATOR "RPM;DEB") 17 | set(CPACK_PACKAGE_NAME "ip_vs_ca") 18 | set(CPACK_PACKAGE_CONTACT "yubo@yubo.org") 19 | set(CPACK_PACKAGE_VENDOR "yubo.org") 20 | set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "ip vs ca - get ip vs(fullnat) client addr") 21 | set(CPACK_PACKAGE_VERSION "0.1.3") 22 | set(CPACK_PACKAGE_DESCRIPTION "get ip vs(fullnat) client addr") 23 | SET(CPACK_OUTPUT_FILE_PREFIX package) 24 | set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${KERNEL_RELEASE}-${CPACK_PACKAGE_VERSION}.${CMAKE_SYSTEM_PROCESSOR}") 25 | set(CPACK_PACKAGE_RELOCATABLE false) 26 | set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/scripts/postinst") 27 | set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/scripts/postinst") 28 | set(CPACK_RPM_CHANGELOG_FILE "${CMAKE_CURRENT_BINARY_DIR}/ChangeLog") 29 | set(CPACK_RPM_PACKAGE_LICENSE "BSD") 30 | set(CPACK_RPM_PACKAGE_GROUP "System Environment/Kernel") 31 | set(CPACK_RPM_PACKAGE_DESCRIPTION ${CPACK_PACKAGE_DESCRIPTION}) 32 | set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION 33 | "/usr" 34 | "/lib" 35 | "/lib/modules" 36 | "/lib/modules/${KERNEL_RELEASE}" 37 | "/lib/modules/${KERNEL_RELEASE}/kernel" 38 | "/lib/modules/${KERNEL_RELEASE}/kernel/extra") 39 | include(CPack) 40 | 41 | add_custom_command(OUTPUT ChangeLog 42 | COMMAND git log --format='* %cd %aN%n- (%h) %s%d%n' --date=local | sed -r 's/[0-9]+:[0-9]+:[0-9]+ //' >> ${CMAKE_CURRENT_BINARY_DIR}/ChangeLog 43 | COMMENT "export git log to ChangeLog") 44 | 45 | add_custom_target(PKGFILES ALL DEPENDS ChangeLog) 46 | 47 | add_custom_target(deploy COMMAND ghr --delete ${CPACK_PACKAGE_VERSION} package) 48 | 49 | add_subdirectory(src) 50 | -------------------------------------------------------------------------------- /src/udpd.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define IPv4(a, b, c, d) ((uint32_t)(((a) & 0xff) << 24) | \ 12 | (((b) & 0xff) << 16) | \ 13 | (((c) & 0xff) << 8) | \ 14 | ((d) & 0xff)) 15 | 16 | #define ERR_EXIT(m) \ 17 | do { \ 18 | perror(m); \ 19 | exit(EXIT_FAILURE); \ 20 | } while (0) 21 | 22 | #define BIND_ADDR INADDR_ANY 23 | //#define BIND_ADDR IPv4(0,0,0,0) 24 | 25 | int main(int argc, char *argv[]) 26 | { 27 | int sock, port, n; 28 | struct sockaddr_in servaddr, peeraddr; 29 | char recvbuf[1024] = {0}; 30 | socklen_t peerlen; 31 | //int on; 32 | 33 | 34 | if (!(argc == 2 && (port = atoi(argv[1])))) { 35 | fprintf(stderr, "Usage\n\t%s \n", 36 | argv[0]); 37 | exit(EXIT_FAILURE); 38 | } 39 | 40 | if ((sock = socket(PF_INET, SOCK_DGRAM, 0)) < 0) 41 | ERR_EXIT("socket error"); 42 | 43 | //on = 1; 44 | //setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on) ); 45 | // 46 | 47 | printf("port %d\n", port); 48 | 49 | memset(&servaddr, 0, sizeof(servaddr)); 50 | servaddr.sin_family = AF_INET; 51 | servaddr.sin_port = htons(port); 52 | servaddr.sin_addr.s_addr = htonl(BIND_ADDR); 53 | 54 | printf("serv %s:%d \n", inet_ntoa(servaddr.sin_addr), 55 | ntohs(servaddr.sin_port)); 56 | 57 | if (bind(sock, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0) 58 | ERR_EXIT("bind error"); 59 | 60 | while (1){ 61 | peerlen = sizeof(peeraddr); 62 | memset(recvbuf, 0, sizeof(recvbuf)); 63 | n = recvfrom(sock, recvbuf, sizeof(recvbuf), 0, 64 | (struct sockaddr *)&peeraddr, &peerlen); 65 | if(n == -1) { 66 | if (errno == EINTR) 67 | continue; 68 | ERR_EXIT("recvfrom error"); 69 | }else if(n > 0) { 70 | printf("recv(%d) peer from %s:%d\n", 71 | n, inet_ntoa(peeraddr.sin_addr), 72 | ntohs(peeraddr.sin_port)); 73 | sendto(sock, recvbuf, n, 0, 74 | (struct sockaddr*)&peeraddr, 75 | sizeof(peeraddr)); 76 | fputs(recvbuf, stdout); 77 | } 78 | } 79 | close(sock); 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IPVS CA 2 | 3 | get ip vs(fullnat) client addr 4 | 5 | 由taobao/toa修改,可作为独立模块编译安装, 支持tcp/udp 6 | 7 | 支持 centos6.6(linux 2.6.32-220) / centos7.2(linux 3.10.0-237.4.5) / ubuntu14.04(linux 3.13.0-77-generic) / ubuntu16.04(linux 4.4.0-64-generic) / centos7.2(linux 4.9.2-1.el7) / ubuntu19.04 (5.0.0-25-generic) 8 | 9 | 对应内核在[github.com/yubo/LVS](https://github.com/yubo/LVS/tree/lvs_v2),兼容[taobao/LVS(lvs_v2)](https://github.com/alibaba/LVS/tree/lvs_v2) 10 | 11 | 支持taobao/lvs_v2版本的tcp opt报文格式,新加入了icmp echo报文(payload),实现了tcp/udp local - client 地址对应关系的通告 12 | 13 | [lvs官网](http://linuxvirtualserver.org/)在2012年8月放出了fullnat第一个版本,其中的 TCPOPT_ADDR 为 200,之后ali的github上放出的,改为了254,导致有些版本兼容的问题,可确认tcpopt的值后,修改 /proc/sys/net/ca/tcpopt_addr(默认为 200) 14 | 15 | - kernel include/net/ip_vs.h 16 | - ip_vs_ca src/ca.h 17 | 18 | ## Feature 19 | - [x] Build as a module 20 | - [x] Support TCP 21 | - [x] Support UDP 22 | - [x] Support centos 6.6 23 | - [x] Support centos 7.2 rpmbuild 24 | - [x] Support ubuntu 14.04(trusty) dpkg 25 | - [x] Support ubuntu 16.04.2(xenial) dpkg 26 | - [x] Support ubuntu 19.04(disco) dpkg (untested) 27 | - [x] Linux (2.6.32 - 5.0.0) 28 | 29 | ## Demo 30 | 31 | lvs(fullnat) client address TCP 32 | [![TCP](https://asciinema.org/a/7e1qyj3ovn8yfe6a3srfcj104.png)](https://asciinema.org/a/7e1qyj3ovn8yfe6a3srfcj104?autoplay=1) 33 | 34 | lvs(fullnat) client address UDP 35 | [![UDP](https://asciinema.org/a/c0q9u1jhr367qay237azaep5e.png)](https://asciinema.org/a/c0q9u1jhr367qay237azaep5e?autoplay=1) 36 | 37 | ## Install 38 | 39 | #### build kmod 40 | ```shell 41 | cd src 42 | make 43 | insmod ./ip_vs_ca.ko 44 | ``` 45 | 46 | ### build rpm/deb 47 | ```shell 48 | ## install cmake-3.2.1 49 | cmake . 50 | #cmake -DDISABLE_ICMP=1 -DENABLE_DEBUG=1 .. 51 | make package 52 | rpm -ivh ip_vs_ca-`uname -r`-0.1.0.x86_64.rpm 53 | #or 54 | dpkg -i ip_vs_ca-`uname -r`-0.1.0.x86_64.deb 55 | modprobe ip_vs_ca 56 | ``` 57 | 58 | ### proc sys ctl 59 | 60 | 可以通过修改以下文件来设置连接超时回收的时间 61 | 62 | - /proc/sys/net/ca/tcp_timeout (defualt 100s) 63 | - /proc/sys/net/ca/udp_timeout (defualt 310s) 64 | - /proc/sys/net/ca/tcpopt_addr (defualt 200) 65 | 66 | 查看计数器和版本信息 67 | 68 | - /proc/net/ip_vs_ca_stats 69 | 70 | ## syscall 71 | 72 | #### tx 73 | 74 | 修改了 tx 方向的相关系统调用的地址修改,当对 client ip:port 访问时,会转换成 lvs lcoal ip:port 75 | 76 | - sendto() 77 | - connect() 78 | 79 | #### rx 80 | 81 | 修改了 rx 方向的系统调用函数,当访问lvs fnat方式转发的数据时,lvs local ip:port 会转换成 client ip:port 82 | 83 | - accept() 84 | - accept4() 85 | - recvfrom() 86 | - getpeername() 87 | 88 | 89 | #### other 90 | 91 | 在获取 remote addr时,以`recvfrom(sock, recvbuf, sizeof(recvbuf), 0, (struct sockaddr *)&addr, &len)`为例, 传入的地址类型和长度需要符合以下条件 92 | 93 | - `len == sizeof(struct sockaddr_in)` 94 | - `((struct sockaddr_in *)&addr)->sin_family == AF_INET` 95 | 96 | 97 | ## Udpd example 98 | 99 | [udpd.c](src/udpd.c) 100 | 101 | ```c 102 | char recvbuf[1024] = {0}; 103 | struct sockaddr_in peeraddr; 104 | socklen_t peerlen; 105 | int n; 106 | 107 | peerlen = sizeof(peeraddr); 108 | n = recvfrom(sock, recvbuf, sizeof(recvbuf), 0, 109 | (struct sockaddr *)&peeraddr, &peerlen); 110 | printf("recv %d %s:%d\n", peerlen, 111 | inet_ntoa(peeraddr.sin_addr), 112 | ntohs(peeraddr.sin_port)); 113 | } 114 | ``` 115 | -------------------------------------------------------------------------------- /src/ca_ctl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TOA: Address is a new TCP Option 3 | * Address include ip+port, Now only support IPV4 4 | */ 5 | 6 | /* 7 | * ca_core.c 8 | * Copyright (C) 2016 yubo@yubo.org 9 | * 2016-02-14 10 | */ 11 | #include "ca.h" 12 | 13 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) 14 | #include 15 | #define USE_PROC_CTREATE1 16 | #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) 17 | #define USE_PROC_CTREATE 18 | #endif 19 | 20 | int tcpopt_addr = 200; 21 | struct ip_vs_ca_stat_mib *ext_stats; 22 | 23 | #ifdef USE_PROC_CTREATE 24 | static struct proc_dir_entry *ca_stats; 25 | #endif 26 | static struct ctl_table_header *sysctl_header; 27 | extern int sysctl_ip_vs_ca_timeouts[IP_VS_CA_S_LAST + 1]; 28 | static int tcpopt_addr_min = 0; 29 | static int tcpopt_addr_max = 255; 30 | 31 | /* 32 | * IPVS sysctl table (under the /proc/sys/net/ca/) 33 | * Do not change order or insert new entries without 34 | * align with netns init in ip_vs_control_net_init() 35 | */ 36 | static struct ctl_table vs_vars[] = { 37 | { 38 | .procname = "tcp_timeout", 39 | .data = &sysctl_ip_vs_ca_timeouts[IP_VS_CA_S_TCP], 40 | .maxlen = sizeof(int), 41 | .mode = 0644, 42 | .proc_handler = proc_dointvec_jiffies, 43 | }, 44 | { 45 | .procname = "udp_timeout", 46 | .data = &sysctl_ip_vs_ca_timeouts[IP_VS_CA_S_UDP], 47 | .maxlen = sizeof(int), 48 | .mode = 0644, 49 | .proc_handler = proc_dointvec_jiffies, 50 | }, 51 | { 52 | .procname = "tcpopt_addr", 53 | .data = &tcpopt_addr, 54 | .maxlen = sizeof(int), 55 | .mode = 0644, 56 | .proc_handler = proc_dointvec_minmax, 57 | .extra1 = &tcpopt_addr_min, 58 | .extra2 = &tcpopt_addr_max, 59 | }, 60 | {.procname = 0} 61 | }; 62 | 63 | const struct ctl_path net_vs_ctl_path[] = { 64 | { 65 | .procname = "net", 66 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) 67 | .ctl_name = CTL_NET, 68 | #endif 69 | }, 70 | {.procname = "ca"}, 71 | {.procname = 0} 72 | }; 73 | EXPORT_SYMBOL_GPL(net_vs_ctl_path); 74 | 75 | 76 | struct ip_vs_ca_stats_entry ip_vs_ca_stats[] = { 77 | IP_VS_CA_STAT_ITEM("syn_recv_sock_ip_vs_ca", SYN_RECV_SOCK_IP_VS_CA_CNT), 78 | IP_VS_CA_STAT_ITEM("syn_recv_sock_no_ip_vs_ca", SYN_RECV_SOCK_NO_IP_VS_CA_CNT), 79 | IP_VS_CA_STAT_ITEM("conn_new", CONN_NEW_CNT), 80 | IP_VS_CA_STAT_ITEM("conn_del", CONN_DEL_CNT), 81 | IP_VS_CA_STAT_END 82 | }; 83 | 84 | 85 | /* 86 | * Statistics of toa in proc /proc/net/ip_vs_ca_stats 87 | */ 88 | static int ip_vs_ca_stats_show(struct seq_file *seq, void *v) 89 | { 90 | int i, j, cpu_nr; 91 | char buff[10]; 92 | 93 | /* print CPU first */ 94 | seq_printf(seq, "IP_VS_CA(%s)\n", IP_VS_CA_VERSION); 95 | seq_printf(seq, "%-25s ", ""); 96 | cpu_nr = num_possible_cpus(); 97 | for (i = 0; i < cpu_nr; i++) 98 | if (cpu_online(i)){ 99 | sprintf(buff, "CPU%d", i); 100 | seq_printf(seq, " %10s", buff); 101 | } 102 | seq_putc(seq, '\n'); 103 | 104 | i = 0; 105 | while (NULL != ip_vs_ca_stats[i].name) { 106 | seq_printf(seq, "%-25s:", ip_vs_ca_stats[i].name); 107 | for (j = 0; j < cpu_nr; j++) { 108 | if (cpu_online(j)) { 109 | seq_printf(seq, " %10lu", *( 110 | ((unsigned long *) per_cpu_ptr( 111 | ext_stats, j)) + ip_vs_ca_stats[i].entry 112 | )); 113 | } 114 | } 115 | seq_putc(seq, '\n'); 116 | i++; 117 | } 118 | return 0; 119 | } 120 | 121 | static int ip_vs_ca_stats_seq_open(struct inode *inode, struct file *file) 122 | { 123 | return single_open(file, ip_vs_ca_stats_show, NULL); 124 | } 125 | 126 | 127 | static const struct file_operations ip_vs_ca_stats_fops = { 128 | .owner = THIS_MODULE, 129 | .open = ip_vs_ca_stats_seq_open, 130 | .read = seq_read, 131 | .llseek = seq_lseek, 132 | .release = single_release, 133 | }; 134 | 135 | int __init ip_vs_ca_control_init(void){ 136 | ext_stats = alloc_percpu(struct ip_vs_ca_stat_mib); 137 | if (NULL == ext_stats) 138 | return 1; 139 | 140 | 141 | #ifdef USE_PROC_CTREATE1 142 | proc_create("ip_vs_ca_stats", 0, init_net.proc_net, &ip_vs_ca_stats_fops); 143 | #elif defined USE_PROC_CTREATE 144 | ca_stats = proc_create("ip_vs_ca_stats", 0, init_net.proc_net, &ip_vs_ca_stats_fops); 145 | #else 146 | proc_net_fops_create(&init_net, "ip_vs_ca_stats", 0, &ip_vs_ca_stats_fops); 147 | #endif 148 | 149 | sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); 150 | 151 | return 0; 152 | } 153 | 154 | void ip_vs_ca_control_cleanup(void) 155 | { 156 | synchronize_net(); 157 | unregister_sysctl_table(sysctl_header); 158 | #ifdef USE_PROC_CTREATE1 159 | remove_proc_entry("ip_vs_ca_stats", init_net.proc_net); 160 | #elif defined USE_PROC_CTREATE 161 | proc_remove(ca_stats); 162 | #else 163 | proc_net_remove(&init_net, "ip_vs_ca_stats"); 164 | #endif 165 | if (NULL != ext_stats) { 166 | free_percpu(ext_stats); 167 | ext_stats = NULL; 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/ca_proto.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ca_proto.c 3 | * Copyright (C) 2016 yubo@yubo.org 4 | * 2016-02-25 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "ca.h" 18 | 19 | enum { 20 | IP_VS_CA_PROTO_TCP = 0, 21 | IP_VS_CA_PROTO_UDP, 22 | IP_VS_CA_PROTO_TAB_SIZE 23 | }; 24 | 25 | int sysctl_ip_vs_ca_timeouts[IP_VS_CA_S_LAST + 1] = { 26 | [IP_VS_CA_S_TCP] = 100 * HZ, 27 | [IP_VS_CA_S_UDP] = 310 * HZ, 28 | [IP_VS_CA_S_LAST] = 2 * HZ, 29 | }; 30 | 31 | static struct ip_vs_ca_protocol *ip_vs_ca_proto_table[IP_VS_CA_PROTO_TAB_SIZE]; 32 | 33 | static struct ip_vs_ca_conn *tcpudp_conn_get(int af, const struct sk_buff *skb, 34 | struct ip_vs_ca_protocol *pp, 35 | const struct ip_vs_ca_iphdr *iph, 36 | unsigned int proto_off) 37 | { 38 | __be16 _ports[2], *pptr; 39 | 40 | pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 41 | if (pptr == NULL) 42 | return NULL; 43 | 44 | return ip_vs_ca_conn_get(af, iph->protocol, 45 | &iph->saddr, pptr[0], IP_VS_CA_IN); 46 | } 47 | 48 | static int tcpudp_icmp_process(int af, struct sk_buff *skb, 49 | struct ip_vs_ca_protocol *pp, 50 | const struct ip_vs_ca_iphdr *iph, 51 | struct icmphdr *icmph, struct ipvs_ca *ca, 52 | int *verdict, struct ip_vs_ca_conn **cpp) 53 | { 54 | 55 | IP_VS_CA_INC_STATS(ext_stats, SYN_RECV_SOCK_IP_VS_CA_CNT); 56 | //create cp 57 | *cpp = ip_vs_ca_conn_new(af, pp, 58 | iph->saddr.ip , ca->sport, 59 | iph->daddr.ip, ca->dport, 60 | ca->toa.addr, ca->toa.port, 61 | skb); 62 | if (*cpp == NULL){ 63 | goto out; 64 | } 65 | 66 | ip_vs_ca_conn_put(*cpp); 67 | *verdict = NF_ACCEPT; 68 | return 0; 69 | 70 | out: 71 | *cpp = NULL; 72 | *verdict = NF_ACCEPT; 73 | return 1; 74 | } 75 | /* 76 | * #################### tcp ################## 77 | */ 78 | 79 | /* Parse TCP options in skb, try to get client ip, port 80 | * @param skb [in] received skb, it should be a ack/get-ack packet. 81 | * @return NULL if we don't get client ip/port; 82 | * value of toa_data in ret_ptr if we get client ip/port. 83 | */ 84 | static __u64 get_ip_vs_ca_data(struct tcphdr *th) 85 | { 86 | int length; 87 | union ip_vs_ca_data tdata; 88 | unsigned char *ptr; 89 | 90 | if (th == NULL) { 91 | return 0; 92 | } 93 | 94 | length = (th->doff * 4) - sizeof(struct tcphdr); 95 | ptr = (unsigned char *) (th + 1); 96 | 97 | while (length > 0) { 98 | int opcode = *ptr++; 99 | int opsize; 100 | switch (opcode) { 101 | case TCPOPT_EOL: 102 | return 0; 103 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 104 | length--; 105 | continue; 106 | default: 107 | opsize = *ptr++; 108 | if (opsize < 2) /* "silly options" */ 109 | return 0; 110 | if (opsize > length) 111 | /* don't parse partial options */ 112 | return 0; 113 | if (tcpopt_addr == opcode && 114 | TCPOLEN_ADDR == opsize) { 115 | memcpy(&tdata.data, ptr - 2, sizeof(tdata.data)); 116 | #if 0 117 | IP_VS_CA_DBG("find toa data: ip = " 118 | "%pI4, port = %u\n", 119 | &tdata.tcp.addr, 120 | ntohs(tdata.tcp.port)); 121 | IP_VS_CA_DBG("coded toa data: %llx\n", 122 | tdata.data); 123 | #endif 124 | return tdata.data; 125 | } 126 | ptr += opsize - 2; 127 | length -= opsize; 128 | } 129 | } 130 | return 0; 131 | } 132 | 133 | 134 | static int 135 | tcp_skb_process(int af, struct sk_buff *skb, struct ip_vs_ca_protocol *pp, 136 | const struct ip_vs_ca_iphdr *iph, 137 | int *verdict, struct ip_vs_ca_conn **cpp) 138 | { 139 | struct tcphdr _tcph, *th; 140 | union ip_vs_ca_data tdata = {.data = 0}; 141 | 142 | th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); 143 | if (th == NULL) { 144 | goto out; 145 | } 146 | 147 | if (!th->syn){ 148 | goto out; 149 | } 150 | 151 | if ((tdata.data = get_ip_vs_ca_data(th)) != 0){ 152 | IP_VS_CA_INC_STATS(ext_stats, SYN_RECV_SOCK_IP_VS_CA_CNT); 153 | //create cp 154 | *cpp = ip_vs_ca_conn_new(af, pp, 155 | iph->saddr.ip , th->source, 156 | iph->daddr.ip, th->dest, 157 | tdata.tcp.addr, tdata.tcp.port, 158 | skb); 159 | if (*cpp == NULL){ 160 | goto out; 161 | } 162 | 163 | ip_vs_ca_conn_put(*cpp); 164 | *verdict = NF_ACCEPT; 165 | return 0; 166 | } 167 | 168 | IP_VS_CA_INC_STATS(ext_stats, SYN_RECV_SOCK_NO_IP_VS_CA_CNT); 169 | goto out; 170 | 171 | out: 172 | *cpp = NULL; 173 | *verdict = NF_ACCEPT; 174 | return 1; 175 | } 176 | 177 | 178 | struct ip_vs_ca_protocol ip_vs_ca_protocol_tcp = { 179 | .name = "TCP", 180 | .protocol = IPPROTO_TCP, 181 | .skb_process = tcp_skb_process, 182 | .icmp_process = tcpudp_icmp_process, 183 | .conn_get = tcpudp_conn_get, 184 | .timeout = &sysctl_ip_vs_ca_timeouts[IP_VS_CA_S_TCP], 185 | }; 186 | 187 | /* 188 | * #################### udp ################## 189 | */ 190 | 191 | static int udp_skb_process(int af, struct sk_buff *skb, 192 | struct ip_vs_ca_protocol *pp, 193 | const struct ip_vs_ca_iphdr *iph, 194 | int *verdict, struct ip_vs_ca_conn **cpp) 195 | { 196 | if (false){ 197 | *cpp = NULL; 198 | *verdict = NF_ACCEPT; 199 | return 0; 200 | } 201 | return 1; 202 | } 203 | 204 | 205 | struct ip_vs_ca_protocol ip_vs_ca_protocol_udp = { 206 | .name = "UDP", 207 | .protocol = IPPROTO_UDP, 208 | .skb_process = udp_skb_process, 209 | .icmp_process = tcpudp_icmp_process, 210 | .conn_get = tcpudp_conn_get, 211 | .timeout = &sysctl_ip_vs_ca_timeouts[IP_VS_CA_S_UDP], 212 | }; 213 | 214 | 215 | /* 216 | * get ip_vs_ca_protocol object by its proto. 217 | */ 218 | struct ip_vs_ca_protocol *ip_vs_ca_proto_get(unsigned short proto) 219 | { 220 | int i; 221 | 222 | for(i = 0; iprotocol == proto) 224 | return ip_vs_ca_proto_table[i]; 225 | } 226 | return NULL; 227 | } 228 | 229 | 230 | int __init ip_vs_ca_protocol_init(void) 231 | { 232 | ip_vs_ca_proto_table[IP_VS_CA_PROTO_TCP] = &ip_vs_ca_protocol_tcp; 233 | ip_vs_ca_proto_table[IP_VS_CA_PROTO_UDP] = &ip_vs_ca_protocol_udp; 234 | return 0; 235 | } 236 | 237 | void ip_vs_ca_protocol_cleanup(void) 238 | { 239 | 240 | } 241 | -------------------------------------------------------------------------------- /src/ca.h: -------------------------------------------------------------------------------- 1 | #ifndef __IP_VS_CA_H__ 2 | #define __IP_VS_CA_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include /* for struct ipv6hdr */ 9 | #include /* for ipv6_addr_copy */ 10 | #include /* for icmp_send */ 11 | 12 | #ifndef IP_VS_CA_VERSION 13 | #define IP_VS_CA_VERSION "0.1.*" 14 | #endif 15 | 16 | /*#define IP_VS_CA_DEBUG*/ 17 | 18 | #define IP_VS_CA_CONN_TAB_BITS 8 19 | #define IP_VS_CA_CONN_TAB_SIZE (1 << IP_VS_CA_CONN_TAB_BITS) 20 | #define IP_VS_CA_CONN_TAB_MASK (IP_VS_CA_CONN_TAB_SIZE - 1) 21 | 22 | #define IP_VS_CA_CONN_F_HASHED 0x0040 /* hashed entry */ 23 | #define IP_VS_CA_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ 24 | 25 | #define IP_VS_CA_ERR(msg...) \ 26 | do { \ 27 | printk(KERN_ERR "[ERR] IP_VS_CA: " msg); \ 28 | } while (0) 29 | 30 | #ifdef IP_VS_CA_DEBUG 31 | #define EnterFunction() \ 32 | do { \ 33 | printk(KERN_DEBUG \ 34 | pr_fmt("Enter: %s, %s line %i\n"), \ 35 | __func__, __FILE__, __LINE__); \ 36 | } while (0) 37 | #define LeaveFunction() \ 38 | do { \ 39 | printk(KERN_DEBUG \ 40 | pr_fmt("Leave: %s, %s line %i\n"), \ 41 | __func__, __FILE__, __LINE__); \ 42 | } while (0) 43 | #define IP_VS_CA_DBG(msg...) \ 44 | do { \ 45 | if (net_ratelimit()) \ 46 | printk(KERN_DEBUG "[DEBUG] IP_VS_CA: " msg); \ 47 | } while (0) 48 | 49 | #define IP_VS_CA_INFO(msg...) \ 50 | do { \ 51 | if (net_ratelimit()) \ 52 | printk(KERN_INFO "[INFO] IP_VS_CA: " msg); \ 53 | } while (0) 54 | #else 55 | 56 | #define EnterFunction() do {} while (0) 57 | #define LeaveFunction() do {} while (0) 58 | #define IP_VS_CA_DBG(msg...) do {} while (0) 59 | #define IP_VS_CA_INFO(msg...) do {} while (0) 60 | 61 | 62 | #endif 63 | 64 | /*#define TCPOPT_ADDR 254*/ 65 | extern int tcpopt_addr; 66 | /* MUST be 4n !!!! */ 67 | #define TCPOLEN_ADDR 8 /* |opcode|size|ip+port| = 1 + 1 + 6 */ 68 | 69 | struct ip_vs_ca_conn; 70 | struct ip_vs_ca_iphdr; 71 | struct ip_vs_ca_conn; 72 | 73 | /* 74 | static inline void ip_vs_ca_addr_copy(int af, union nf_inet_addr *dst, 75 | const union nf_inet_addr *src) 76 | { 77 | #ifdef CONFIG_IP_VS_CA_IPV6 78 | if (af == AF_INET6) 79 | ipv6_addr_copy(&dst->in6, &src->in6); 80 | else 81 | #endif 82 | dst->ip = src->ip; 83 | } 84 | */ 85 | 86 | 87 | static inline int ip_vs_ca_addr_equal(int af, const union nf_inet_addr *a, 88 | const union nf_inet_addr *b) 89 | { 90 | #ifdef CONFIG_IP_VS_CA_IPV6 91 | if (af == AF_INET6) 92 | return ipv6_addr_equal(&a->in6, &b->in6); 93 | #endif 94 | return a->ip == b->ip; 95 | } 96 | 97 | struct ip_vs_ca_iphdr { 98 | int len; 99 | __u8 protocol; 100 | union nf_inet_addr saddr; 101 | union nf_inet_addr daddr; 102 | }; 103 | 104 | static inline void 105 | ip_vs_ca_fill_iphdr(int af, const void *nh, struct ip_vs_ca_iphdr *iphdr) 106 | { 107 | const struct iphdr *iph = nh; 108 | iphdr->len = iph->ihl * 4; 109 | iphdr->protocol = iph->protocol; 110 | iphdr->saddr.ip = iph->saddr; 111 | iphdr->daddr.ip = iph->daddr; 112 | } 113 | 114 | 115 | /* 116 | * IP_VS_CA structure allocated for each connection 117 | */ 118 | struct ip_vs_ca_conn { 119 | struct list_head s_list; /* hashed list heads for s_addr(lvs local ip) */ 120 | struct list_head c_list; /* hashed list heads for client ip */ 121 | 122 | u16 af; /* address family */ 123 | __u8 protocol; /* Which protocol (TCP/UDP) */ 124 | union nf_inet_addr s_addr; /* source address */ 125 | union nf_inet_addr d_addr; /* destination address */ 126 | __be16 s_port; /* source port */ 127 | __be16 d_port; /* destination port */ 128 | 129 | union nf_inet_addr c_addr; /* origin address */ 130 | __be16 c_port; /* origin port */ 131 | 132 | atomic_t refcnt; /* reference count */ 133 | struct timer_list timer; /* Expiration timer */ 134 | volatile unsigned long timeout; /* timeout */ 135 | 136 | /* Flags and state transition */ 137 | spinlock_t lock; /* lock for state transition */ 138 | volatile __u16 flags; /* status flags */ 139 | volatile __u16 state; /* state info */ 140 | volatile __u16 old_state; 141 | }; 142 | 143 | /* MUST be 4 bytes alignment */ 144 | struct ip_vs_tcpo_addr { 145 | __u8 opcode; 146 | __u8 opsize; 147 | __u16 port; 148 | __u32 addr; 149 | } __attribute__((__packed__)); 150 | 151 | 152 | struct ipvs_ca { 153 | __u8 code; /* magic code */ 154 | __u8 protocol; /* Which protocol (TCP/UDP) */ 155 | __be16 sport; 156 | __be16 dport; 157 | struct ip_vs_tcpo_addr toa; 158 | } __attribute__((__packed__)); 159 | 160 | union ip_vs_ca_data { 161 | __u64 data; 162 | struct ip_vs_tcpo_addr tcp; 163 | }; 164 | 165 | /* statistics about toa in proc /proc/net/ip_vs_ca_stat */ 166 | enum { 167 | SYN_RECV_SOCK_IP_VS_CA_CNT = 1, 168 | SYN_RECV_SOCK_NO_IP_VS_CA_CNT, 169 | CONN_NEW_CNT, 170 | CONN_DEL_CNT, 171 | IP_VS_CA_STAT_LAST 172 | }; 173 | 174 | enum { 175 | IP_VS_CA_S_TCP = 0, 176 | IP_VS_CA_S_UDP, 177 | IP_VS_CA_S_LAST 178 | }; 179 | 180 | enum { 181 | IP_VS_CA_IN = 0, /* in s_addr -> c_addr */ 182 | IP_VS_CA_OUT /* out c_addr -> s_addr */ 183 | }; 184 | 185 | 186 | struct ip_vs_ca_stats_entry { 187 | char *name; 188 | int entry; 189 | }; 190 | 191 | #define IP_VS_CA_STAT_ITEM(_name, _entry) { \ 192 | .name = _name, \ 193 | .entry = _entry, \ 194 | } 195 | 196 | #define IP_VS_CA_STAT_END { \ 197 | NULL, \ 198 | 0, \ 199 | } 200 | 201 | struct ip_vs_ca_stat_mib { 202 | unsigned long mibs[IP_VS_CA_STAT_LAST]; 203 | }; 204 | 205 | #define IP_VS_CA_INC_STATS(mib, field) \ 206 | (per_cpu_ptr(mib, smp_processor_id())->mibs[field]++) 207 | 208 | struct syscall_links { 209 | asmlinkage long (*getpeername)(int, struct sockaddr __user *, int __user *); 210 | asmlinkage long (*accept4)(int, struct sockaddr __user *, int __user *, int); 211 | asmlinkage long (*recvfrom)(int, void __user *, size_t, unsigned, 212 | struct sockaddr __user *, int __user *); 213 | asmlinkage long (*connect)(int, struct sockaddr __user *, int); 214 | asmlinkage long (*accept)(int, struct sockaddr __user *, int __user *); 215 | asmlinkage long (*sendto)(int, void __user *, size_t, unsigned, 216 | struct sockaddr __user *, int); 217 | }; 218 | 219 | struct ip_vs_ca_protocol { 220 | struct ip_vs_ca_protocol *next; 221 | char *name; 222 | __u8 protocol; 223 | u16 num_states; 224 | int dont_defrag; 225 | atomic_t appcnt; /* counter of proto app incs */ 226 | int *timeout; /* protocol timeout table */ 227 | 228 | int (*skb_process) (int af, struct sk_buff * skb, 229 | struct ip_vs_ca_protocol * pp, 230 | const struct ip_vs_ca_iphdr * iph, 231 | int *verdict, struct ip_vs_ca_conn ** cpp); 232 | 233 | int (*icmp_process) (int af, struct sk_buff * skb, 234 | struct ip_vs_ca_protocol * pp, 235 | const struct ip_vs_ca_iphdr * iph, 236 | struct icmphdr *icmph, struct ipvs_ca *ca, 237 | int *verdict, struct ip_vs_ca_conn ** cpp); 238 | 239 | struct ip_vs_ca_conn * 240 | (*conn_get) (int af, const struct sk_buff * skb, 241 | struct ip_vs_ca_protocol * pp, 242 | const struct ip_vs_ca_iphdr * iph, 243 | unsigned int proto_off); 244 | 245 | }; 246 | 247 | static inline void __ip_vs_ca_conn_put(struct ip_vs_ca_conn *cp) 248 | { 249 | atomic_dec(&cp->refcnt); 250 | } 251 | 252 | extern int ip_vs_ca_conn_init(void); 253 | extern void ip_vs_ca_conn_cleanup(void); 254 | extern void ip_vs_ca_conn_put(struct ip_vs_ca_conn *cp); 255 | extern void ip_vs_ca_conn_cleanup(void); 256 | extern struct ip_vs_ca_conn *ip_vs_ca_conn_get(int af, __u8 protocol, 257 | const union nf_inet_addr *s_addr, __be16 s_port, int dir); 258 | struct ip_vs_ca_conn *ip_vs_ca_conn_new(int af, 259 | struct ip_vs_ca_protocol *pp, 260 | __be32 saddr, __be16 sport, 261 | __be32 daddr, __be16 dport, 262 | __be32 oaddr, __be16 oport, 263 | struct sk_buff *skb); 264 | 265 | extern int ip_vs_ca_protocol_init(void); 266 | extern void ip_vs_ca_protocol_cleanup(void); 267 | extern struct ip_vs_ca_protocol *ip_vs_ca_proto_get(unsigned short proto); 268 | 269 | extern int ip_vs_ca_control_init(void); 270 | extern void ip_vs_ca_control_cleanup(void); 271 | 272 | extern unsigned long **find_sys_call_table(void); 273 | const char *ip_vs_ca_proto_name(unsigned proto); 274 | 275 | 276 | extern struct ip_vs_ca_stat_mib *ext_stats; 277 | 278 | #endif 279 | -------------------------------------------------------------------------------- /src/ca_conn.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ca_conn.c 3 | * Copyright (C) 2016 yubo@yubo.org 4 | * 2016-02-14 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include /* for proc_net_* */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "ca.h" 15 | 16 | static struct list_head *ip_vs_ca_conn_tab_s; 17 | static struct list_head *ip_vs_ca_conn_tab_c; 18 | 19 | /* SLAB cache for IP_VS_CA connections */ 20 | static struct kmem_cache *ip_vs_ca_conn_cachep __read_mostly; 21 | 22 | /* counter for current IP_VS_CA connections */ 23 | static atomic_t ip_vs_ca_conn_count = ATOMIC_INIT(0); 24 | 25 | /* random value for IP_VS_CA connection hash */ 26 | static unsigned int ip_vs_ca_conn_rnd; 27 | 28 | /* 29 | * Fine locking granularity for big connection hash table 30 | */ 31 | #define CT_LOCKARRAY_BITS 16 32 | #define CT_LOCKARRAY_SIZE (1<ip, (__force u32) port, proto, 60 | ip_vs_ca_conn_rnd) 61 | & IP_VS_CA_CONN_TAB_MASK; 62 | } 63 | 64 | /* 65 | * Lock two buckets of ip_vs_ca_conn_tab 66 | */ 67 | static inline void ct_lock2(unsigned shash, unsigned chash) 68 | { 69 | unsigned slock, clock; 70 | 71 | slock = shash & CT_LOCKARRAY_MASK; 72 | clock = chash & CT_LOCKARRAY_MASK; 73 | 74 | /* lock the conntab bucket */ 75 | if (slock < clock) { 76 | ct_lock(shash); 77 | ct_lock(chash); 78 | } else if (slock > clock) { 79 | ct_lock(chash); 80 | ct_lock(shash); 81 | } else { 82 | ct_lock(shash); 83 | } 84 | } 85 | 86 | /* 87 | * Unlock two buckets of ip_vs_ca_conn_tab 88 | */ 89 | static inline void ct_unlock2(unsigned shash, unsigned chash) 90 | { 91 | unsigned slock, clock; 92 | 93 | slock = shash & CT_LOCKARRAY_MASK; 94 | clock = chash & CT_LOCKARRAY_MASK; 95 | 96 | /* lock the conntab bucket */ 97 | if (slock < clock) { 98 | ct_unlock(chash); 99 | ct_unlock(shash); 100 | } else if (slock > clock) { 101 | ct_unlock(shash); 102 | ct_unlock(chash); 103 | } else { 104 | ct_unlock(shash); 105 | } 106 | } 107 | 108 | /* 109 | * Hashed ip_vs_ca_conn into ip_vs_ca_conn_tab 110 | * returns bool success. 111 | */ 112 | 113 | static inline int __ip_vs_ca_conn_hash(struct ip_vs_ca_conn *cp, unsigned shash, 114 | unsigned chash) 115 | { 116 | int ret; 117 | 118 | if (!(cp->flags & IP_VS_CA_CONN_F_HASHED)) { 119 | list_add(&cp->s_list, &ip_vs_ca_conn_tab_s[shash]); 120 | list_add(&cp->c_list, &ip_vs_ca_conn_tab_c[chash]); 121 | cp->flags |= IP_VS_CA_CONN_F_HASHED; 122 | atomic_inc(&cp->refcnt); 123 | ret = 1; 124 | } else { 125 | IP_VS_CA_ERR("request for already hashed, called from %pF\n", 126 | __builtin_return_address(0)); 127 | ret = 0; 128 | } 129 | 130 | return ret; 131 | } 132 | 133 | /* 134 | * Hashed ip_vs_ca_conn in two buckets of ip_vs_ca_conn_tab 135 | * by caddr/cport/vaddr/vport and raddr/rport/laddr/lport, 136 | * returns bool success. 137 | */ 138 | static int 139 | ip_vs_ca_conn_hash(struct ip_vs_ca_conn *cp) 140 | { 141 | unsigned shash, chash; 142 | int ret; 143 | 144 | shash = ip_vs_ca_conn_hashkey(cp->af, cp->protocol, 145 | &cp->s_addr, cp->s_port); 146 | chash = ip_vs_ca_conn_hashkey(cp->af, cp->protocol, 147 | &cp->c_addr, cp->c_port); 148 | 149 | ct_lock2(shash, chash); 150 | 151 | ret = __ip_vs_ca_conn_hash(cp, shash, chash); 152 | 153 | ct_unlock2(shash, chash); 154 | 155 | return ret; 156 | } 157 | 158 | /* 159 | * UNhashes ip_vs_ca_conn from ip_vs_ca_conn_tab. 160 | * cp->refcnt must be equal 2, 161 | * returns bool success. 162 | */ 163 | static int 164 | ip_vs_ca_conn_unhash(struct ip_vs_ca_conn *cp) 165 | { 166 | unsigned shash, chash; 167 | int ret; 168 | 169 | shash = ip_vs_ca_conn_hashkey(cp->af, cp->protocol, 170 | &cp->s_addr, cp->s_port); 171 | chash = ip_vs_ca_conn_hashkey(cp->af, cp->protocol, 172 | &cp->c_addr, cp->c_port); 173 | 174 | /* locked */ 175 | ct_lock2(shash, chash); 176 | 177 | /* unhashed */ 178 | if ((cp->flags & IP_VS_CA_CONN_F_HASHED) 179 | && (atomic_read(&cp->refcnt) == 2)) { 180 | list_del(&cp->s_list); 181 | list_del(&cp->c_list); 182 | cp->flags &= ~IP_VS_CA_CONN_F_HASHED; 183 | atomic_dec(&cp->refcnt); 184 | ret = 1; 185 | } else { 186 | ret = 0; 187 | } 188 | 189 | ct_unlock2(shash, chash); 190 | 191 | return ret; 192 | } 193 | 194 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0) 195 | static void ip_vs_ca_conn_expire(struct timer_list *t) 196 | { 197 | struct ip_vs_ca_conn *cp = from_timer(cp, t, timer); 198 | #else 199 | static void ip_vs_ca_conn_expire(unsigned long data) 200 | { 201 | struct ip_vs_ca_conn *cp = (struct ip_vs_ca_conn *)data; 202 | #endif 203 | 204 | /* 205 | * Set proper timeout. 206 | */ 207 | cp->timeout = 60 * HZ; 208 | 209 | /* 210 | * hey, I'm using it 211 | */ 212 | atomic_inc(&cp->refcnt); 213 | 214 | /* 215 | * unhash it if it is hashed in the conn table 216 | */ 217 | if (!ip_vs_ca_conn_unhash(cp)) 218 | goto expire_later; 219 | 220 | /* 221 | * refcnt==1 implies I'm the only one referrer 222 | */ 223 | if (likely(atomic_read(&cp->refcnt) == 1)) { 224 | /* delete the timer if it is activated by other users */ 225 | if (timer_pending(&cp->timer)) 226 | del_timer(&cp->timer); 227 | 228 | atomic_dec(&ip_vs_ca_conn_count); 229 | IP_VS_CA_INC_STATS(ext_stats, CONN_DEL_CNT); 230 | 231 | IP_VS_CA_DBG("conn expire: %pI4:%d(%pI4:%d) -> %pI4:%d timer:%p\n", 232 | &cp->s_addr.ip, ntohs(cp->s_port), 233 | &cp->c_addr.ip, ntohs(cp->c_port), 234 | &cp->d_addr.ip, ntohs(cp->d_port), 235 | &cp->timer); 236 | kmem_cache_free(ip_vs_ca_conn_cachep, cp); 237 | return; 238 | } 239 | 240 | /* hash it back to the table */ 241 | ip_vs_ca_conn_hash(cp); 242 | 243 | expire_later: 244 | IP_VS_CA_DBG("delayed: conn->refcnt-1=%d\n", 245 | atomic_read(&cp->refcnt) - 1); 246 | 247 | ip_vs_ca_conn_put(cp); 248 | } 249 | 250 | struct ip_vs_ca_conn *ip_vs_ca_conn_new(int af, 251 | struct ip_vs_ca_protocol *pp, 252 | __be32 saddr, __be16 sport, 253 | __be32 daddr, __be16 dport, 254 | __be32 caddr, __be16 cport, 255 | struct sk_buff *skb) 256 | { 257 | struct ip_vs_ca_conn *cp; 258 | 259 | //EnterFunction(); 260 | 261 | cp = kmem_cache_zalloc(ip_vs_ca_conn_cachep, GFP_ATOMIC); 262 | if (cp == NULL) { 263 | IP_VS_CA_ERR("%s(): no memory\n", __func__); 264 | return NULL; 265 | } 266 | 267 | /* now init connection */ 268 | IP_VS_CA_DBG("setup_timer, %p\n", &cp->timer); 269 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0) 270 | timer_setup(&cp->timer, ip_vs_ca_conn_expire, 0); 271 | #else 272 | setup_timer(&cp->timer, ip_vs_ca_conn_expire, (unsigned long)cp); 273 | #endif 274 | cp->af = af; 275 | cp->protocol = pp->protocol; 276 | //ip_vs_ca_addr_copy(af, &cp->saddr, saddr); 277 | cp->s_addr.ip = saddr; 278 | cp->s_port = sport; 279 | //ip_vs_ca_addr_copy(af, &cp->caddr, caddr); 280 | cp->c_addr.ip = caddr; 281 | cp->c_port = cport; 282 | //ip_vs_ca_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af, &cp->daddr, daddr); 283 | cp->d_addr.ip = daddr; 284 | cp->d_port = dport; 285 | 286 | cp->flags = 0; 287 | 288 | spin_lock_init(&cp->lock); 289 | atomic_set(&cp->refcnt, 1); 290 | atomic_inc(&ip_vs_ca_conn_count); 291 | IP_VS_CA_INC_STATS(ext_stats, CONN_NEW_CNT); 292 | 293 | cp->state = 0; 294 | cp->timeout = *pp->timeout; 295 | 296 | ip_vs_ca_conn_hash(cp); 297 | 298 | IP_VS_CA_DBG("conn new: proto:%u, %pI4:%d(%pI4:%d) -> %pI4:%d\n", 299 | cp->protocol, 300 | &cp->s_addr.ip, ntohs(cp->s_port), 301 | &cp->c_addr.ip, ntohs(cp->c_port), 302 | &cp->d_addr.ip, ntohs(cp->d_port)); 303 | //LeaveFunction(); 304 | return cp; 305 | } 306 | 307 | /* 308 | * just ipv4 309 | */ 310 | struct ip_vs_ca_conn *ip_vs_ca_conn_get(int af, __u8 protocol, 311 | const union nf_inet_addr *addr, __be16 port, int dir) { 312 | unsigned hash; 313 | struct ip_vs_ca_conn *cp; 314 | 315 | hash = ip_vs_ca_conn_hashkey(af, protocol, addr, port); 316 | 317 | ct_lock(hash); 318 | 319 | if (dir == IP_VS_CA_IN) { 320 | list_for_each_entry(cp, &ip_vs_ca_conn_tab_s[hash], s_list) { 321 | if (cp->af == af && 322 | ip_vs_ca_addr_equal(af, addr, &cp->s_addr) && 323 | port == cp->s_port && 324 | protocol == cp->protocol) { 325 | /* HIT */ 326 | atomic_inc(&cp->refcnt); 327 | ct_unlock(hash); 328 | return cp; 329 | } 330 | } 331 | }else{ 332 | list_for_each_entry(cp, &ip_vs_ca_conn_tab_c[hash], c_list) { 333 | if (cp->af == af && 334 | ip_vs_ca_addr_equal(af, addr, &cp->c_addr) && 335 | port == cp->c_port && 336 | protocol == cp->protocol) { 337 | /* HIT */ 338 | atomic_inc(&cp->refcnt); 339 | ct_unlock(hash); 340 | return cp; 341 | } 342 | } 343 | } 344 | ct_unlock(hash); 345 | return NULL; 346 | } 347 | 348 | void ip_vs_ca_conn_put(struct ip_vs_ca_conn *cp) 349 | { 350 | /* reset it expire in its timeout */ 351 | /* IP_VS_CA_DBG("mod_timer %lu\n", cp->timeout / HZ); */ 352 | mod_timer(&cp->timer, jiffies + cp->timeout); 353 | __ip_vs_ca_conn_put(cp); 354 | } 355 | 356 | static void ip_vs_ca_conn_expire_now(struct ip_vs_ca_conn *cp) 357 | { 358 | IP_VS_CA_DBG("expire_now: timer(%p)\n", &cp->timer); 359 | if (del_timer(&cp->timer)) 360 | mod_timer(&cp->timer, jiffies); 361 | } 362 | 363 | /* 364 | * Flush all the connection entries in the ip_vs_ca_conn_tab 365 | */ 366 | static void ip_vs_ca_conn_flush(void) 367 | { 368 | int idx; 369 | struct ip_vs_ca_conn *cp; 370 | 371 | flush_again: 372 | for (idx = 0; idx < IP_VS_CA_CONN_TAB_SIZE; idx++) { 373 | /* 374 | * Lock is actually needed in this loop. 375 | */ 376 | ct_lock(idx); 377 | 378 | list_for_each_entry(cp, &ip_vs_ca_conn_tab_s[idx], s_list) { 379 | IP_VS_CA_DBG("del connection\n"); 380 | ip_vs_ca_conn_expire_now(cp); 381 | } 382 | ct_unlock(idx); 383 | } 384 | 385 | /* the counter may be not NULL, because maybe some conn entries 386 | are run by slow timer handler or unhashed but still referred */ 387 | if (atomic_read(&ip_vs_ca_conn_count) != 0) { 388 | schedule(); 389 | goto flush_again; 390 | } 391 | } 392 | 393 | int __init ip_vs_ca_conn_init(void){ 394 | int idx; 395 | 396 | ip_vs_ca_conn_tab_s = vmalloc(IP_VS_CA_CONN_TAB_SIZE * 397 | (sizeof(struct list_head))); 398 | if (!ip_vs_ca_conn_tab_s) 399 | return -ENOMEM; 400 | 401 | ip_vs_ca_conn_tab_c = vmalloc(IP_VS_CA_CONN_TAB_SIZE * 402 | (sizeof(struct list_head))); 403 | if (!ip_vs_ca_conn_tab_c) 404 | return -ENOMEM; 405 | 406 | /* Allocate ip_vs_ca_conn slab cache */ 407 | ip_vs_ca_conn_cachep = kmem_cache_create("ip_vs_ca_conn", 408 | sizeof(struct ip_vs_ca_conn), 409 | 0, SLAB_HWCACHE_ALIGN, NULL); 410 | if (!ip_vs_ca_conn_cachep) { 411 | vfree(ip_vs_ca_conn_tab_s); 412 | vfree(ip_vs_ca_conn_tab_c); 413 | return -ENOMEM; 414 | } 415 | 416 | IP_VS_CA_INFO("Connection hash table configured " 417 | "(size=%d, memory=%ldKbytes)\n", 418 | IP_VS_CA_CONN_TAB_SIZE, 419 | (long)(IP_VS_CA_CONN_TAB_SIZE * sizeof(struct list_head)) / 1024); 420 | 421 | IP_VS_CA_DBG("Each connection entry needs %Zd bytes at least\n", 422 | sizeof(struct ip_vs_ca_conn)); 423 | 424 | for (idx = 0; idx < IP_VS_CA_CONN_TAB_SIZE; idx++) { 425 | INIT_LIST_HEAD(&ip_vs_ca_conn_tab_s[idx]); 426 | INIT_LIST_HEAD(&ip_vs_ca_conn_tab_c[idx]); 427 | } 428 | 429 | for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { 430 | spin_lock_init(&__ip_vs_ca_conntbl_lock_array[idx].l); 431 | } 432 | 433 | /* calculate the random value for connection hash */ 434 | get_random_bytes(&ip_vs_ca_conn_rnd, sizeof(ip_vs_ca_conn_rnd)); 435 | 436 | return 0; 437 | } 438 | 439 | void ip_vs_ca_conn_cleanup(void) 440 | { 441 | ip_vs_ca_conn_flush(); 442 | kmem_cache_destroy(ip_vs_ca_conn_cachep); 443 | vfree(ip_vs_ca_conn_tab_s); 444 | vfree(ip_vs_ca_conn_tab_c); 445 | } 446 | -------------------------------------------------------------------------------- /src/ca_core.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ca_core.c 3 | * Copyright (C) 2016 yubo@yubo.org 4 | * 2016-02-14 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "ca.h" 14 | 15 | 16 | unsigned long **sys_call_table; 17 | unsigned long original_cr0; 18 | struct syscall_links sys; 19 | 20 | static int 21 | ca_use_count_inc(void) 22 | { 23 | return try_module_get(THIS_MODULE); 24 | } 25 | 26 | static void 27 | ca_use_count_dec(void) 28 | { 29 | module_put(THIS_MODULE); 30 | } 31 | 32 | 33 | static void 34 | ip_vs_ca_modify_uaddr(int fd, struct sockaddr *uaddr, int len, int dir) 35 | { 36 | int err, ret = 0; 37 | struct socket *sock = NULL; 38 | struct sockaddr_in sin; 39 | union nf_inet_addr addr; 40 | struct ip_vs_ca_conn *cp; 41 | 42 | if (len != sizeof(struct sockaddr_in)){ 43 | ret = -1; 44 | goto out; 45 | } 46 | 47 | err = copy_from_user(&sin, uaddr, len); 48 | if (err){ 49 | ret = -2; 50 | goto out; 51 | } 52 | 53 | if (sin.sin_family != AF_INET){ 54 | ret = -3; 55 | goto out; 56 | } 57 | 58 | sock = sockfd_lookup(fd, &err); 59 | if (!sock){ 60 | ret = -4; 61 | goto out; 62 | } 63 | 64 | IP_VS_CA_DBG("%s called, sin{.family:%d, .port:%d, addr:%pI4} sock.type:%d\n", 65 | __func__, sin.sin_family, ntohs(sin.sin_port), 66 | &sin.sin_addr.s_addr, sock->type); 67 | 68 | addr.ip = sin.sin_addr.s_addr; 69 | 70 | if (sock->type == SOCK_STREAM){ 71 | cp = ip_vs_ca_conn_get(sin.sin_family, IPPROTO_TCP, &addr, 72 | sin.sin_port, dir); 73 | }else if(sock->type == SOCK_DGRAM){ 74 | cp = ip_vs_ca_conn_get(sin.sin_family, IPPROTO_UDP, &addr, 75 | sin.sin_port, dir); 76 | }else{ 77 | ret = -5; 78 | goto out; 79 | } 80 | 81 | IP_VS_CA_DBG("lookup type:%d %pI4:%d %s\n", 82 | sock->type, 83 | &addr.ip, ntohs(sin.sin_port), 84 | cp ? "hit" : "not hit"); 85 | 86 | if (!cp){ 87 | ret = -6; 88 | goto out; 89 | } 90 | 91 | IP_VS_CA_DBG("%s called, %d %pI4:%d(%pI4:%d)->%pI4:%d\n", 92 | __func__, cp->protocol, 93 | &sin.sin_addr.s_addr, ntohs(sin.sin_port), 94 | &cp->c_addr.ip, ntohs(cp->c_port), 95 | &cp->d_addr.ip, ntohs(cp->d_port)); 96 | 97 | if (dir == IP_VS_CA_IN) { 98 | sin.sin_addr.s_addr = cp->c_addr.ip; 99 | sin.sin_port = cp->c_port; 100 | } else { 101 | sin.sin_addr.s_addr = cp->s_addr.ip; 102 | sin.sin_port = cp->s_port; 103 | } 104 | ip_vs_ca_conn_put(cp); 105 | if(copy_to_user(uaddr, &sin, len)) { 106 | ret = -7; 107 | goto out; 108 | } 109 | 110 | out: 111 | if (sock && sock->file) 112 | sockfd_put(sock); 113 | 114 | IP_VS_CA_DBG("ip_vs_ca_modify_uaddr err:%d\n", ret); 115 | 116 | return; 117 | } 118 | 119 | /* 120 | * ./net/socket.c:1624 121 | */ 122 | asmlinkage static long 123 | getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) 124 | { 125 | int ret, len; 126 | 127 | if (!ca_use_count_inc()) 128 | return -1; 129 | IP_VS_CA_DBG("getpeername called\n"); 130 | 131 | ret = sys.getpeername(fd, usockaddr, usockaddr_len); 132 | if (ret < 0) 133 | goto out; 134 | 135 | get_user(len, usockaddr_len); 136 | ip_vs_ca_modify_uaddr(fd, usockaddr, len, IP_VS_CA_IN); 137 | 138 | out: 139 | ca_use_count_dec(); 140 | return ret; 141 | } 142 | 143 | asmlinkage static long 144 | accept4(int fd, struct sockaddr __user *upeer_sockaddr, 145 | int __user *upeer_addrlen, int flags) 146 | { 147 | int ret, len; 148 | 149 | if (!ca_use_count_inc()) 150 | return -1; 151 | IP_VS_CA_DBG("accept4 called\n"); 152 | 153 | ret = sys.accept4(fd, upeer_sockaddr, upeer_addrlen, flags); 154 | if (ret < 0){ 155 | IP_VS_CA_DBG("accept4 (%d, %p, %d, %d) ret:%d\n", fd, upeer_sockaddr, *upeer_addrlen, flags, ret); 156 | goto out; 157 | } 158 | 159 | get_user(len, upeer_addrlen); 160 | ip_vs_ca_modify_uaddr(fd, upeer_sockaddr, len, IP_VS_CA_IN); 161 | 162 | out: 163 | ca_use_count_dec(); 164 | return ret; 165 | } 166 | 167 | asmlinkage static long 168 | accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) 169 | { 170 | return accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 171 | } 172 | 173 | asmlinkage static long 174 | recvfrom(int fd, void __user *ubuf, size_t size, unsigned flags, 175 | struct sockaddr __user *addr, int __user *addr_len) 176 | { 177 | int ret, len; 178 | 179 | if (!ca_use_count_inc()) 180 | return -1; 181 | 182 | if(addr == NULL || addr_len == NULL){ 183 | ret = sys.recvfrom(fd, ubuf, size, flags, addr, addr_len); 184 | goto out; 185 | } 186 | 187 | 188 | ret = sys.recvfrom(fd, ubuf, size, flags, addr, addr_len); 189 | if (ret < 0) 190 | goto out; 191 | 192 | get_user(len, addr_len); 193 | ip_vs_ca_modify_uaddr(fd, addr, len, IP_VS_CA_IN); 194 | 195 | out: 196 | ca_use_count_dec(); 197 | return ret; 198 | } 199 | 200 | asmlinkage static long 201 | connect(int fd, struct sockaddr __user *uservaddr, int addrlen) 202 | { 203 | int ret; 204 | 205 | if (!ca_use_count_inc()) 206 | return -1; 207 | 208 | ip_vs_ca_modify_uaddr(fd, uservaddr, addrlen, IP_VS_CA_OUT); 209 | ret = sys.connect(fd, uservaddr, addrlen); 210 | 211 | ca_use_count_dec(); 212 | return ret; 213 | } 214 | 215 | asmlinkage static long 216 | sendto(int fd, void __user *buff, size_t len, unsigned int flags, 217 | struct sockaddr __user *addr, int addr_len) 218 | { 219 | int ret; 220 | 221 | if (!ca_use_count_inc()) 222 | return -1; 223 | 224 | ip_vs_ca_modify_uaddr(fd, addr, addr_len, IP_VS_CA_OUT); 225 | ret = sys.sendto(fd, buff, len, flags, addr, addr_len); 226 | 227 | ca_use_count_dec(); 228 | return ret; 229 | } 230 | 231 | const char *ip_vs_ca_proto_name(unsigned proto) 232 | { 233 | static char buf[20]; 234 | 235 | switch (proto) { 236 | case IPPROTO_IP: 237 | return "IP"; 238 | case IPPROTO_UDP: 239 | return "UDP"; 240 | case IPPROTO_TCP: 241 | return "TCP"; 242 | case IPPROTO_ICMP: 243 | return "ICMP"; 244 | #ifdef CONFIG_IP_VS_IPV6 245 | case IPPROTO_ICMPV6: 246 | return "ICMPv6"; 247 | #endif 248 | default: 249 | sprintf(buf, "IP_%d", proto); 250 | return buf; 251 | } 252 | } 253 | 254 | static int ip_vs_ca_syscall_init(void) 255 | { 256 | if (!(sys_call_table = find_sys_call_table())){ 257 | IP_VS_CA_ERR("get sys call table failed.\n"); 258 | return -1; 259 | } 260 | 261 | original_cr0 = read_cr0(); 262 | write_cr0(original_cr0 & ~0x00010000); 263 | IP_VS_CA_DBG("Loading ip_vs_ca module, sys call table at %p\n", sys_call_table); 264 | 265 | sys.getpeername = (void *)(sys_call_table[__NR_getpeername]); 266 | sys.accept4 = (void *)(sys_call_table[__NR_accept4]); 267 | sys.recvfrom = (void *)(sys_call_table[__NR_recvfrom]); 268 | sys.connect = (void *)(sys_call_table[__NR_connect]); 269 | sys.accept = (void *)(sys_call_table[__NR_accept]); 270 | sys.sendto = (void *)(sys_call_table[__NR_sendto]); 271 | 272 | sys_call_table[__NR_getpeername]= (void *)getpeername; 273 | sys_call_table[__NR_accept4] = (void *)accept4; 274 | sys_call_table[__NR_recvfrom] = (void *)recvfrom; 275 | sys_call_table[__NR_connect] = (void *)connect; 276 | sys_call_table[__NR_accept] = (void *)accept; 277 | sys_call_table[__NR_sendto] = (void *)sendto; 278 | 279 | write_cr0(original_cr0); 280 | 281 | return 0; 282 | } 283 | 284 | static void ip_vs_ca_syscall_cleanup(void) 285 | { 286 | if (!sys_call_table){ 287 | return; 288 | } 289 | 290 | write_cr0(original_cr0 & ~0x00010000); 291 | 292 | sys_call_table[__NR_getpeername] = (void *)sys.getpeername; 293 | sys_call_table[__NR_accept4] = (void *)sys.accept4; 294 | sys_call_table[__NR_recvfrom] = (void *)sys.recvfrom; 295 | sys_call_table[__NR_connect] = (void *)sys.connect; 296 | sys_call_table[__NR_accept] = (void *)sys.accept; 297 | sys_call_table[__NR_sendto] = (void *)sys.sendto; 298 | 299 | write_cr0(original_cr0); 300 | //msleep(100); 301 | sys_call_table = NULL; 302 | } 303 | 304 | static unsigned int _ip_vs_ca_in_hook(struct sk_buff *skb); 305 | 306 | 307 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) 308 | static unsigned int 309 | ip_vs_ca_in_hook(void *priv, struct sk_buff *skb, 310 | const struct nf_hook_state *state) 311 | #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) 312 | static unsigned int 313 | ip_vs_ca_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 314 | const struct net_device *in, 315 | const struct net_device *out, 316 | const void *ignore) 317 | #else 318 | ip_vs_ca_in_hook(unsigned int hooknum, struct sk_buff *skb, 319 | const struct net_device *in, const struct net_device *out, 320 | int (*okfn) (struct sk_buff *)) 321 | #endif 322 | { 323 | return _ip_vs_ca_in_hook(skb); 324 | } 325 | 326 | static unsigned int _ip_vs_ca_in_hook(struct sk_buff *skb) 327 | { 328 | struct ip_vs_ca_iphdr iph; 329 | struct ip_vs_ca_conn *cp; 330 | struct ip_vs_ca_protocol *pp; 331 | int af; 332 | 333 | //EnterFunction(); 334 | 335 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 336 | 337 | if (af != AF_INET) { 338 | goto out; 339 | } 340 | 341 | ip_vs_ca_fill_iphdr(af, skb_network_header(skb), &iph); 342 | 343 | 344 | /* 345 | * Big tappo: only PACKET_HOST, including loopback for local client 346 | * Don't handle local packets on IPv6 for now 347 | */ 348 | if (unlikely(skb->pkt_type != PACKET_HOST)) { 349 | /* 350 | IP_VS_CA_DBG("packet type=%d proto=%d daddr=%pI4 ignored\n", 351 | skb->pkt_type, 352 | iph.protocol, &iph.daddr.ip); 353 | */ 354 | goto out; 355 | } 356 | 357 | if (iph.protocol == IPPROTO_ICMP) { 358 | #ifndef IP_VS_CA_ICMP 359 | return NF_ACCEPT; 360 | #else 361 | struct iphdr *ih; 362 | struct icmphdr _icmph, *icmph; 363 | struct ipvs_ca _ca, *ca; 364 | 365 | IP_VS_CA_DBG("icmp packet recv\n"); 366 | 367 | ih = (struct iphdr *)skb_network_header(skb); 368 | 369 | icmph = skb_header_pointer(skb, iph.len, 370 | sizeof(_icmph), &_icmph); 371 | 372 | if (icmph == NULL){ 373 | IP_VS_CA_DBG("icmphdr NULL\n"); 374 | goto out; 375 | } 376 | 377 | if(ntohs(ih->tot_len) == sizeof(*ih)+sizeof(*icmph)+sizeof(*ca) 378 | && icmph->type == ICMP_ECHO 379 | && icmph->code == 0 380 | && icmph->un.echo.id == 0x1234 381 | && icmph->un.echo.sequence == 0){ 382 | ca = skb_header_pointer(skb, iph.len + sizeof(*icmph), 383 | sizeof(_ca), &_ca); 384 | 385 | if (ca == NULL){ 386 | IP_VS_CA_DBG("ca NULL\n"); 387 | goto out; 388 | } 389 | 390 | if(ca->code != 123 391 | || ca->toa.opcode != tcpopt_addr 392 | || ca->toa.opsize != TCPOLEN_ADDR){ 393 | IP_VS_CA_DBG("ca not hit. {.code:%d, .protocol:%d," 394 | " .toa.opcode:%d, .toa.opsize:%d}\n", 395 | ca->code, ca->protocol, ca->toa.opcode, ca->toa.opsize); 396 | goto out; 397 | } 398 | 399 | pp = ip_vs_ca_proto_get(ca->protocol); 400 | if (unlikely(!pp)) 401 | goto out; 402 | 403 | cp = pp->conn_get(af, skb, pp, &iph, iph.len); 404 | if(unlikely(cp)){ 405 | ip_vs_ca_conn_put(cp); 406 | goto out; 407 | }else{ 408 | int v; 409 | if(pp->icmp_process(af, skb, pp, &iph, icmph, ca, 410 | &v, &cp) == 0){ 411 | return v; 412 | }else{ 413 | goto out; 414 | } 415 | } 416 | }else{ 417 | IP_VS_CA_DBG("icmphdr not hit tot_len:%d, " 418 | "icmp{.type:%d, .code:%d .echo.id:0x%04x," 419 | " .echo.sequence:%d}\n" 420 | "want tot_len:%lu icmp.type:%d\n", 421 | ntohs(ih->tot_len), icmph->type, 422 | icmph->code, icmph->un.echo.id, 423 | icmph->un.echo.sequence, 424 | sizeof(*ih)+sizeof(*icmph)+sizeof(*ca), 425 | ICMP_ECHO); 426 | goto out; 427 | } 428 | #endif 429 | }else if (iph.protocol == IPPROTO_TCP) { 430 | /* Protocol supported? */ 431 | pp = ip_vs_ca_proto_get(iph.protocol); 432 | if (unlikely(!pp)) 433 | goto out; 434 | 435 | /* 436 | * Check if the packet belongs to an existing connection entry 437 | */ 438 | cp = pp->conn_get(af, skb, pp, &iph, iph.len); 439 | 440 | if (likely(cp)) { 441 | ip_vs_ca_conn_put(cp); 442 | goto out; 443 | } else { 444 | int v; 445 | /* create a new connection */ 446 | if(pp->skb_process(af, skb, pp, &iph, &v, &cp) == 0){ 447 | //LeaveFunction(); 448 | return v; 449 | }else{ 450 | goto out; 451 | } 452 | } 453 | } 454 | 455 | out: 456 | //LeaveFunction(); 457 | return NF_ACCEPT; 458 | } 459 | 460 | static struct nf_hook_ops ip_vs_ca_ops[] __read_mostly = { 461 | { 462 | .hook = (nf_hookfn *)ip_vs_ca_in_hook, 463 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0) 464 | .owner = THIS_MODULE, 465 | #endif 466 | .pf = NFPROTO_IPV4, 467 | .hooknum = NF_INET_LOCAL_IN, 468 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 469 | }, 470 | }; 471 | 472 | static int __init ip_vs_ca_init(void) 473 | { 474 | int ret; 475 | 476 | ret = ip_vs_ca_syscall_init(); 477 | if (ret < 0){ 478 | IP_VS_CA_ERR("can't modify syscall table.\n"); 479 | goto out_err; 480 | } 481 | IP_VS_CA_DBG("modify syscall table done.\n"); 482 | 483 | ip_vs_ca_protocol_init(); 484 | IP_VS_CA_DBG("ip_vs_ca_protocol_init done.\n"); 485 | 486 | ret = ip_vs_ca_control_init(); 487 | if (ret < 0){ 488 | IP_VS_CA_ERR("can't modify syscall table.\n"); 489 | goto cleanup_syscall; 490 | } 491 | IP_VS_CA_DBG("ip_vs_ca_control_init done.\n"); 492 | 493 | ret = ip_vs_ca_conn_init(); 494 | if (ret < 0){ 495 | IP_VS_CA_ERR("can't setup connection table.\n"); 496 | goto cleanup_control; 497 | } 498 | IP_VS_CA_DBG("ip_vs_ca_conn_init done.\n"); 499 | 500 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) 501 | ret = nf_register_net_hooks(NULL, ip_vs_ca_ops, ARRAY_SIZE(ip_vs_ca_ops)); 502 | #else 503 | ret = nf_register_hooks(ip_vs_ca_ops, ARRAY_SIZE(ip_vs_ca_ops)); 504 | #endif 505 | if (ret < 0){ 506 | IP_VS_CA_ERR("can't register hooks.\n"); 507 | goto cleanup_conn; 508 | } 509 | IP_VS_CA_DBG("nf_register_hooks done.\n"); 510 | 511 | IP_VS_CA_INFO("ip_vs_ca loaded."); 512 | return ret; 513 | 514 | cleanup_conn: 515 | ip_vs_ca_conn_cleanup(); 516 | cleanup_control: 517 | ip_vs_ca_control_cleanup(); 518 | cleanup_syscall: 519 | ip_vs_ca_syscall_cleanup(); 520 | out_err: 521 | return ret; 522 | } 523 | 524 | static void __exit ip_vs_ca_exit(void) 525 | { 526 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) 527 | nf_unregister_net_hooks(NULL, ip_vs_ca_ops, ARRAY_SIZE(ip_vs_ca_ops)); 528 | #else 529 | nf_unregister_hooks(ip_vs_ca_ops, ARRAY_SIZE(ip_vs_ca_ops)); 530 | #endif 531 | ip_vs_ca_conn_cleanup(); 532 | ip_vs_ca_protocol_cleanup(); 533 | ip_vs_ca_control_cleanup(); 534 | ip_vs_ca_syscall_cleanup(); 535 | IP_VS_CA_INFO("ip_vs_ca unloaded."); 536 | } 537 | 538 | module_init(ip_vs_ca_init); 539 | module_exit(ip_vs_ca_exit); 540 | MODULE_LICENSE("GPL"); 541 | MODULE_AUTHOR("Yu Bo"); 542 | 543 | --------------------------------------------------------------------------------