├── LICENSE ├── README.md ├── config.sh ├── go.mod ├── kmod ├── Makefile └── tun.c.4.19 ├── main.go ├── tun └── tun.go └── tunnel ├── define.go ├── receive.go ├── send.go ├── tunnel.go └── udp.go /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gotun-tunnel 2 | A high throughput point to point tunnel 3 | 4 | ## multiqueue-tun 5 | 6 | ## multi-socket(with/without reuseport) 7 | -------------------------------------------------------------------------------- /config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # go build 4 | 5 | if [ $1 == 'client' ] 6 | then 7 | ./tuntap client 4 192 168 56 1 35 & 8 | ifconfig wg2 172.16.0.2/30 9 | else 10 | ./tuntap server 4 192 168 56 1 35 & 11 | ifconfig wg2 172.16.0.1/30 12 | fi 13 | ifconfig wg2 mtu 1460 txqueuelen 2000 14 | 15 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module tuntap 2 | 3 | go 1.13 4 | 5 | require ( 6 | golang.org/x/net v0.0.0-20210510120150-4163338589ed 7 | golang.org/x/sys v0.0.0-20210514084401-e8d321eab015 8 | ) 9 | -------------------------------------------------------------------------------- /kmod/Makefile: -------------------------------------------------------------------------------- 1 | obj-m += tun.o 2 | -------------------------------------------------------------------------------- /kmod/tun.c.4.19: -------------------------------------------------------------------------------- 1 | /* 2 | * TUN - Universal TUN/TAP device driver. 3 | * Copyright (C) 1999-2002 Maxim Krasnyansky 4 | * 5 | * This program is free software; you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation; either version 2 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $ 16 | */ 17 | 18 | /* 19 | * Changes: 20 | * 21 | * Mike Kershaw 2005/08/14 22 | * Add TUNSETLINK ioctl to set the link encapsulation 23 | * 24 | * Mark Smith 25 | * Use eth_random_addr() for tap MAC address. 26 | * 27 | * Harald Roelle 2004/04/20 28 | * Fixes in packet dropping, queue length setting and queue wakeup. 29 | * Increased default tx queue length. 30 | * Added ethtool API. 31 | * Minor cleanups 32 | * 33 | * Daniel Podlejski 34 | * Modifications for 2.3.99-pre5 kernel. 35 | */ 36 | 37 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 38 | 39 | #define DRV_NAME "tun" 40 | #define DRV_VERSION "1.6" 41 | #define DRV_DESCRIPTION "Universal TUN/TAP device driver" 42 | #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky " 43 | 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | #include 67 | #include 68 | #include 69 | #include 70 | #include 71 | #include 72 | #include 73 | #include 74 | #include 75 | #include 76 | #include 77 | #include 78 | #include 79 | #include 80 | 81 | #include 82 | #include 83 | 84 | static int rw = 0; 85 | module_param(rw, int, 0); 86 | 87 | static void tun_default_link_ksettings(struct net_device *dev, 88 | struct ethtool_link_ksettings *cmd); 89 | 90 | /* Uncomment to enable debugging */ 91 | /* #define TUN_DEBUG 1 */ 92 | 93 | #ifdef TUN_DEBUG 94 | static int debug; 95 | 96 | #define tun_debug(level, tun, fmt, args...) \ 97 | do { \ 98 | if (tun->debug) \ 99 | netdev_printk(level, tun->dev, fmt, ##args); \ 100 | } while (0) 101 | #define DBG1(level, fmt, args...) \ 102 | do { \ 103 | if (debug == 2) \ 104 | printk(level fmt, ##args); \ 105 | } while (0) 106 | #else 107 | #define tun_debug(level, tun, fmt, args...) \ 108 | do { \ 109 | if (0) \ 110 | netdev_printk(level, tun->dev, fmt, ##args); \ 111 | } while (0) 112 | #define DBG1(level, fmt, args...) \ 113 | do { \ 114 | if (0) \ 115 | printk(level fmt, ##args); \ 116 | } while (0) 117 | #endif 118 | 119 | #define TUN_HEADROOM 256 120 | #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 121 | 122 | /* TUN device flags */ 123 | 124 | /* IFF_ATTACH_QUEUE is never stored in device flags, 125 | * overload it to mean fasync when stored there. 126 | */ 127 | #define TUN_FASYNC IFF_ATTACH_QUEUE 128 | /* High bits in flags field are unused. */ 129 | #define TUN_VNET_LE 0x80000000 130 | #define TUN_VNET_BE 0x40000000 131 | 132 | #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ 133 | IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) 134 | 135 | #define GOODCOPY_LEN 128 136 | 137 | #define FLT_EXACT_COUNT 8 138 | struct tap_filter { 139 | unsigned int count; /* Number of addrs. Zero means disabled */ 140 | u32 mask[2]; /* Mask of the hashed addrs */ 141 | unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; 142 | }; 143 | 144 | /* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal 145 | * to max number of VCPUs in guest. */ 146 | #define MAX_TAP_QUEUES 256 147 | #define MAX_TAP_FLOWS 4096 148 | 149 | #define TUN_FLOW_EXPIRE (3 * HZ) 150 | 151 | struct tun_pcpu_stats { 152 | u64 rx_packets; 153 | u64 rx_bytes; 154 | u64 tx_packets; 155 | u64 tx_bytes; 156 | struct u64_stats_sync syncp; 157 | u32 rx_dropped; 158 | u32 tx_dropped; 159 | u32 rx_frame_errors; 160 | }; 161 | 162 | /* A tun_file connects an open character device to a tuntap netdevice. It 163 | * also contains all socket related structures (except sock_fprog and tap_filter) 164 | * to serve as one transmit queue for tuntap device. The sock_fprog and 165 | * tap_filter were kept in tun_struct since they were used for filtering for the 166 | * netdevice not for a specific queue (at least I didn't see the requirement for 167 | * this). 168 | * 169 | * RCU usage: 170 | * The tun_file and tun_struct are loosely coupled, the pointer from one to the 171 | * other can only be read while rcu_read_lock or rtnl_lock is held. 172 | */ 173 | struct tun_file { 174 | struct sock sk; 175 | struct socket socket; 176 | struct socket_wq wq; 177 | struct tun_struct __rcu *tun; 178 | struct fasync_struct *fasync; 179 | /* only used for fasnyc */ 180 | unsigned int flags; 181 | union { 182 | u16 queue_index; 183 | unsigned int ifindex; 184 | }; 185 | struct napi_struct napi; 186 | bool napi_enabled; 187 | bool napi_frags_enabled; 188 | struct mutex napi_mutex; /* Protects access to the above napi */ 189 | struct list_head next; 190 | struct tun_struct *detached; 191 | struct ptr_ring tx_ring; 192 | struct xdp_rxq_info xdp_rxq; 193 | }; 194 | 195 | struct tun_flow_entry { 196 | struct hlist_node hash_link; 197 | struct rcu_head rcu; 198 | struct tun_struct *tun; 199 | 200 | u32 rxhash; 201 | u32 rps_rxhash; 202 | int queue_index; 203 | unsigned long updated; 204 | }; 205 | 206 | #define TUN_NUM_FLOW_ENTRIES 1024 207 | #define TUN_MASK_FLOW_ENTRIES (TUN_NUM_FLOW_ENTRIES - 1) 208 | 209 | struct tun_prog { 210 | struct rcu_head rcu; 211 | struct bpf_prog *prog; 212 | }; 213 | 214 | /* Since the socket were moved to tun_file, to preserve the behavior of persist 215 | * device, socket filter, sndbuf and vnet header size were restore when the 216 | * file were attached to a persist device. 217 | */ 218 | struct tun_struct { 219 | struct tun_file __rcu *tfiles[MAX_TAP_QUEUES]; 220 | unsigned int numqueues; 221 | unsigned int flags; 222 | kuid_t owner; 223 | kgid_t group; 224 | 225 | struct net_device *dev; 226 | netdev_features_t set_features; 227 | #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ 228 | NETIF_F_TSO6) 229 | 230 | int align; 231 | int vnet_hdr_sz; 232 | int sndbuf; 233 | struct tap_filter txflt; 234 | struct sock_fprog fprog; 235 | /* protected by rtnl lock */ 236 | bool filter_attached; 237 | #ifdef TUN_DEBUG 238 | int debug; 239 | #endif 240 | spinlock_t lock; 241 | struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; 242 | struct timer_list flow_gc_timer; 243 | unsigned long ageing_time; 244 | unsigned int numdisabled; 245 | struct list_head disabled; 246 | void *security; 247 | u32 flow_count; 248 | u32 rx_batched; 249 | struct tun_pcpu_stats __percpu *pcpu_stats; 250 | struct bpf_prog __rcu *xdp_prog; 251 | struct tun_prog __rcu *steering_prog; 252 | struct tun_prog __rcu *filter_prog; 253 | struct ethtool_link_ksettings link_ksettings; 254 | }; 255 | 256 | struct veth { 257 | __be16 h_vlan_proto; 258 | __be16 h_vlan_TCI; 259 | }; 260 | 261 | bool tun_is_xdp_frame(void *ptr) 262 | { 263 | return (unsigned long)ptr & TUN_XDP_FLAG; 264 | } 265 | EXPORT_SYMBOL(tun_is_xdp_frame); 266 | 267 | void *tun_xdp_to_ptr(void *ptr) 268 | { 269 | return (void *)((unsigned long)ptr | TUN_XDP_FLAG); 270 | } 271 | EXPORT_SYMBOL(tun_xdp_to_ptr); 272 | 273 | void *tun_ptr_to_xdp(void *ptr) 274 | { 275 | return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); 276 | } 277 | EXPORT_SYMBOL(tun_ptr_to_xdp); 278 | 279 | static int tun_napi_receive(struct napi_struct *napi, int budget) 280 | { 281 | struct tun_file *tfile = container_of(napi, struct tun_file, napi); 282 | struct sk_buff_head *queue = &tfile->sk.sk_write_queue; 283 | struct sk_buff_head process_queue; 284 | struct sk_buff *skb; 285 | int received = 0; 286 | 287 | __skb_queue_head_init(&process_queue); 288 | 289 | spin_lock(&queue->lock); 290 | skb_queue_splice_tail_init(queue, &process_queue); 291 | spin_unlock(&queue->lock); 292 | 293 | while (received < budget && (skb = __skb_dequeue(&process_queue))) { 294 | napi_gro_receive(napi, skb); 295 | ++received; 296 | } 297 | 298 | if (!skb_queue_empty(&process_queue)) { 299 | spin_lock(&queue->lock); 300 | skb_queue_splice(&process_queue, queue); 301 | spin_unlock(&queue->lock); 302 | } 303 | 304 | return received; 305 | } 306 | 307 | static int tun_napi_poll(struct napi_struct *napi, int budget) 308 | { 309 | unsigned int received; 310 | 311 | received = tun_napi_receive(napi, budget); 312 | 313 | if (received < budget) 314 | napi_complete_done(napi, received); 315 | 316 | return received; 317 | } 318 | 319 | static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, 320 | bool napi_en, bool napi_frags) 321 | { 322 | tfile->napi_enabled = napi_en; 323 | tfile->napi_frags_enabled = napi_en && napi_frags; 324 | if (napi_en) { 325 | netif_tx_napi_add(tun->dev, &tfile->napi, tun_napi_poll, 326 | NAPI_POLL_WEIGHT); 327 | napi_enable(&tfile->napi); 328 | } 329 | } 330 | 331 | static void tun_napi_disable(struct tun_file *tfile) 332 | { 333 | if (tfile->napi_enabled) 334 | napi_disable(&tfile->napi); 335 | } 336 | 337 | static void tun_napi_del(struct tun_file *tfile) 338 | { 339 | if (tfile->napi_enabled) 340 | netif_napi_del(&tfile->napi); 341 | } 342 | 343 | static bool tun_napi_frags_enabled(const struct tun_file *tfile) 344 | { 345 | return tfile->napi_frags_enabled; 346 | } 347 | 348 | #ifdef CONFIG_TUN_VNET_CROSS_LE 349 | static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) 350 | { 351 | return tun->flags & TUN_VNET_BE ? false : 352 | virtio_legacy_is_little_endian(); 353 | } 354 | 355 | static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) 356 | { 357 | int be = !!(tun->flags & TUN_VNET_BE); 358 | 359 | if (put_user(be, argp)) 360 | return -EFAULT; 361 | 362 | return 0; 363 | } 364 | 365 | static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) 366 | { 367 | int be; 368 | 369 | if (get_user(be, argp)) 370 | return -EFAULT; 371 | 372 | if (be) 373 | tun->flags |= TUN_VNET_BE; 374 | else 375 | tun->flags &= ~TUN_VNET_BE; 376 | 377 | return 0; 378 | } 379 | #else 380 | static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) 381 | { 382 | return virtio_legacy_is_little_endian(); 383 | } 384 | 385 | static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) 386 | { 387 | return -EINVAL; 388 | } 389 | 390 | static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) 391 | { 392 | return -EINVAL; 393 | } 394 | #endif /* CONFIG_TUN_VNET_CROSS_LE */ 395 | 396 | static inline bool tun_is_little_endian(struct tun_struct *tun) 397 | { 398 | return tun->flags & TUN_VNET_LE || 399 | tun_legacy_is_little_endian(tun); 400 | } 401 | 402 | static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) 403 | { 404 | return __virtio16_to_cpu(tun_is_little_endian(tun), val); 405 | } 406 | 407 | static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) 408 | { 409 | return __cpu_to_virtio16(tun_is_little_endian(tun), val); 410 | } 411 | 412 | static inline u32 tun_hashfn(u32 rxhash) 413 | { 414 | return rxhash & TUN_MASK_FLOW_ENTRIES; 415 | } 416 | 417 | static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) 418 | { 419 | struct tun_flow_entry *e; 420 | 421 | hlist_for_each_entry_rcu(e, head, hash_link) { 422 | if (e->rxhash == rxhash) 423 | return e; 424 | } 425 | return NULL; 426 | } 427 | 428 | static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, 429 | struct hlist_head *head, 430 | u32 rxhash, u16 queue_index) 431 | { 432 | struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC); 433 | 434 | if (e) { 435 | tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n", 436 | rxhash, queue_index); 437 | e->updated = jiffies; 438 | e->rxhash = rxhash; 439 | e->rps_rxhash = 0; 440 | e->queue_index = queue_index; 441 | e->tun = tun; 442 | hlist_add_head_rcu(&e->hash_link, head); 443 | ++tun->flow_count; 444 | } 445 | return e; 446 | } 447 | 448 | static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) 449 | { 450 | tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", 451 | e->rxhash, e->queue_index); 452 | hlist_del_rcu(&e->hash_link); 453 | kfree_rcu(e, rcu); 454 | --tun->flow_count; 455 | } 456 | 457 | static void tun_flow_flush(struct tun_struct *tun) 458 | { 459 | int i; 460 | 461 | spin_lock_bh(&tun->lock); 462 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { 463 | struct tun_flow_entry *e; 464 | struct hlist_node *n; 465 | 466 | hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) 467 | tun_flow_delete(tun, e); 468 | } 469 | spin_unlock_bh(&tun->lock); 470 | } 471 | 472 | static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) 473 | { 474 | int i; 475 | 476 | spin_lock_bh(&tun->lock); 477 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { 478 | struct tun_flow_entry *e; 479 | struct hlist_node *n; 480 | 481 | hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { 482 | if (e->queue_index == queue_index) 483 | tun_flow_delete(tun, e); 484 | } 485 | } 486 | spin_unlock_bh(&tun->lock); 487 | } 488 | 489 | static void tun_flow_cleanup(struct timer_list *t) 490 | { 491 | struct tun_struct *tun = from_timer(tun, t, flow_gc_timer); 492 | unsigned long delay = tun->ageing_time; 493 | unsigned long next_timer = jiffies + delay; 494 | unsigned long count = 0; 495 | int i; 496 | 497 | tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); 498 | 499 | spin_lock(&tun->lock); 500 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { 501 | struct tun_flow_entry *e; 502 | struct hlist_node *n; 503 | 504 | hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { 505 | unsigned long this_timer; 506 | 507 | this_timer = e->updated + delay; 508 | if (time_before_eq(this_timer, jiffies)) { 509 | tun_flow_delete(tun, e); 510 | continue; 511 | } 512 | count++; 513 | if (time_before(this_timer, next_timer)) 514 | next_timer = this_timer; 515 | } 516 | } 517 | 518 | if (count) 519 | mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); 520 | spin_unlock(&tun->lock); 521 | } 522 | 523 | static void tun_flow_update(struct tun_struct *tun, u32 rxhash, 524 | struct tun_file *tfile) 525 | { 526 | struct hlist_head *head; 527 | struct tun_flow_entry *e; 528 | unsigned long delay = tun->ageing_time; 529 | u16 queue_index = tfile->queue_index; 530 | 531 | if (!rxhash) 532 | return; 533 | else 534 | head = &tun->flows[tun_hashfn(rxhash)]; 535 | 536 | rcu_read_lock(); 537 | 538 | e = tun_flow_find(head, rxhash); 539 | if (likely(e)) { 540 | /* TODO: keep queueing to old queue until it's empty? */ 541 | e->queue_index = queue_index; 542 | e->updated = jiffies; 543 | sock_rps_record_flow_hash(e->rps_rxhash); 544 | } else { 545 | spin_lock_bh(&tun->lock); 546 | if (!tun_flow_find(head, rxhash) && 547 | tun->flow_count < MAX_TAP_FLOWS) 548 | tun_flow_create(tun, head, rxhash, queue_index); 549 | 550 | if (!timer_pending(&tun->flow_gc_timer)) 551 | mod_timer(&tun->flow_gc_timer, 552 | round_jiffies_up(jiffies + delay)); 553 | spin_unlock_bh(&tun->lock); 554 | } 555 | 556 | rcu_read_unlock(); 557 | } 558 | 559 | /** 560 | * Save the hash received in the stack receive path and update the 561 | * flow_hash table accordingly. 562 | */ 563 | static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) 564 | { 565 | if (unlikely(e->rps_rxhash != hash)) 566 | e->rps_rxhash = hash; 567 | } 568 | 569 | /* We try to identify a flow through its rxhash first. The reason that 570 | * we do not check rxq no. is because some cards(e.g 82599), chooses 571 | * the rxq based on the txq where the last packet of the flow comes. As 572 | * the userspace application move between processors, we may get a 573 | * different rxq no. here. If we could not get rxhash, then we would 574 | * hope the rxq no. may help here. 575 | */ 576 | static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) 577 | { 578 | struct tun_flow_entry *e; 579 | u32 txq = 0; 580 | u32 numqueues = 0; 581 | 582 | numqueues = READ_ONCE(tun->numqueues); 583 | 584 | txq = __skb_get_hash_symmetric(skb); 585 | if (txq) { 586 | e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); 587 | if (e) { 588 | tun_flow_save_rps_rxhash(e, txq); 589 | txq = e->queue_index; 590 | } else 591 | /* use multiply and shift instead of expensive divide */ 592 | txq = ((u64)txq * numqueues) >> 32; 593 | } else if (likely(skb_rx_queue_recorded(skb))) { 594 | txq = skb_get_rx_queue(skb); 595 | while (unlikely(txq >= numqueues)) 596 | txq -= numqueues; 597 | } 598 | 599 | return txq; 600 | } 601 | 602 | static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) 603 | { 604 | struct tun_prog *prog; 605 | u32 numqueues; 606 | u16 ret = 0; 607 | 608 | numqueues = READ_ONCE(tun->numqueues); 609 | if (!numqueues) 610 | return 0; 611 | 612 | prog = rcu_dereference(tun->steering_prog); 613 | if (prog) 614 | ret = bpf_prog_run_clear_cb(prog->prog, skb); 615 | 616 | return ret % numqueues; 617 | } 618 | 619 | static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, 620 | struct net_device *sb_dev, 621 | select_queue_fallback_t fallback) 622 | { 623 | struct tun_struct *tun = netdev_priv(dev); 624 | u16 ret; 625 | 626 | rcu_read_lock(); 627 | if (rcu_dereference(tun->steering_prog)) 628 | ret = tun_ebpf_select_queue(tun, skb); 629 | else 630 | ret = tun_automq_select_queue(tun, skb); 631 | rcu_read_unlock(); 632 | 633 | return ret; 634 | } 635 | 636 | static inline bool tun_not_capable(struct tun_struct *tun) 637 | { 638 | const struct cred *cred = current_cred(); 639 | struct net *net = dev_net(tun->dev); 640 | 641 | return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || 642 | (gid_valid(tun->group) && !in_egroup_p(tun->group))) && 643 | !ns_capable(net->user_ns, CAP_NET_ADMIN); 644 | } 645 | 646 | static void tun_set_real_num_queues(struct tun_struct *tun) 647 | { 648 | netif_set_real_num_tx_queues(tun->dev, tun->numqueues); 649 | netif_set_real_num_rx_queues(tun->dev, tun->numqueues); 650 | } 651 | 652 | static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile) 653 | { 654 | tfile->detached = tun; 655 | list_add_tail(&tfile->next, &tun->disabled); 656 | ++tun->numdisabled; 657 | } 658 | 659 | static struct tun_struct *tun_enable_queue(struct tun_file *tfile) 660 | { 661 | struct tun_struct *tun = tfile->detached; 662 | 663 | tfile->detached = NULL; 664 | list_del_init(&tfile->next); 665 | --tun->numdisabled; 666 | return tun; 667 | } 668 | 669 | void tun_ptr_free(void *ptr) 670 | { 671 | if (!ptr) 672 | return; 673 | if (tun_is_xdp_frame(ptr)) { 674 | struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); 675 | 676 | xdp_return_frame(xdpf); 677 | } else { 678 | __skb_array_destroy_skb(ptr); 679 | } 680 | } 681 | EXPORT_SYMBOL_GPL(tun_ptr_free); 682 | 683 | static void tun_queue_purge(struct tun_file *tfile) 684 | { 685 | void *ptr; 686 | 687 | while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL) 688 | tun_ptr_free(ptr); 689 | 690 | skb_queue_purge(&tfile->sk.sk_write_queue); 691 | skb_queue_purge(&tfile->sk.sk_error_queue); 692 | } 693 | 694 | static void __tun_detach(struct tun_file *tfile, bool clean) 695 | { 696 | struct tun_file *ntfile; 697 | struct tun_struct *tun; 698 | 699 | tun = rtnl_dereference(tfile->tun); 700 | 701 | if (tun && clean) { 702 | tun_napi_disable(tfile); 703 | tun_napi_del(tfile); 704 | } 705 | 706 | if (tun && !tfile->detached) { 707 | u16 index = tfile->queue_index; 708 | BUG_ON(index >= tun->numqueues); 709 | 710 | rcu_assign_pointer(tun->tfiles[index], 711 | tun->tfiles[tun->numqueues - 1]); 712 | ntfile = rtnl_dereference(tun->tfiles[index]); 713 | ntfile->queue_index = index; 714 | rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], 715 | NULL); 716 | 717 | --tun->numqueues; 718 | if (clean) { 719 | RCU_INIT_POINTER(tfile->tun, NULL); 720 | sock_put(&tfile->sk); 721 | } else 722 | tun_disable_queue(tun, tfile); 723 | 724 | synchronize_net(); 725 | tun_flow_delete_by_queue(tun, tun->numqueues + 1); 726 | /* Drop read queue */ 727 | tun_queue_purge(tfile); 728 | tun_set_real_num_queues(tun); 729 | } else if (tfile->detached && clean) { 730 | tun = tun_enable_queue(tfile); 731 | sock_put(&tfile->sk); 732 | } 733 | 734 | if (clean) { 735 | if (tun && tun->numqueues == 0 && tun->numdisabled == 0) { 736 | netif_carrier_off(tun->dev); 737 | 738 | if (!(tun->flags & IFF_PERSIST) && 739 | tun->dev->reg_state == NETREG_REGISTERED) 740 | unregister_netdevice(tun->dev); 741 | } 742 | if (tun) 743 | xdp_rxq_info_unreg(&tfile->xdp_rxq); 744 | ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); 745 | sock_put(&tfile->sk); 746 | } 747 | } 748 | 749 | static void tun_detach(struct tun_file *tfile, bool clean) 750 | { 751 | struct tun_struct *tun; 752 | struct net_device *dev; 753 | 754 | rtnl_lock(); 755 | tun = rtnl_dereference(tfile->tun); 756 | dev = tun ? tun->dev : NULL; 757 | __tun_detach(tfile, clean); 758 | if (dev) 759 | netdev_state_change(dev); 760 | rtnl_unlock(); 761 | } 762 | 763 | static void tun_detach_all(struct net_device *dev) 764 | { 765 | struct tun_struct *tun = netdev_priv(dev); 766 | struct tun_file *tfile, *tmp; 767 | int i, n = tun->numqueues; 768 | 769 | for (i = 0; i < n; i++) { 770 | tfile = rtnl_dereference(tun->tfiles[i]); 771 | BUG_ON(!tfile); 772 | tun_napi_disable(tfile); 773 | tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; 774 | tfile->socket.sk->sk_data_ready(tfile->socket.sk); 775 | RCU_INIT_POINTER(tfile->tun, NULL); 776 | --tun->numqueues; 777 | } 778 | list_for_each_entry(tfile, &tun->disabled, next) { 779 | tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; 780 | tfile->socket.sk->sk_data_ready(tfile->socket.sk); 781 | RCU_INIT_POINTER(tfile->tun, NULL); 782 | } 783 | BUG_ON(tun->numqueues != 0); 784 | 785 | synchronize_net(); 786 | for (i = 0; i < n; i++) { 787 | tfile = rtnl_dereference(tun->tfiles[i]); 788 | tun_napi_del(tfile); 789 | /* Drop read queue */ 790 | tun_queue_purge(tfile); 791 | xdp_rxq_info_unreg(&tfile->xdp_rxq); 792 | sock_put(&tfile->sk); 793 | } 794 | list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { 795 | tun_enable_queue(tfile); 796 | tun_queue_purge(tfile); 797 | xdp_rxq_info_unreg(&tfile->xdp_rxq); 798 | sock_put(&tfile->sk); 799 | } 800 | BUG_ON(tun->numdisabled != 0); 801 | 802 | if (tun->flags & IFF_PERSIST) 803 | module_put(THIS_MODULE); 804 | } 805 | 806 | static int tun_attach(struct tun_struct *tun, struct file *file, 807 | bool skip_filter, bool napi, bool napi_frags, 808 | bool publish_tun) 809 | { 810 | struct tun_file *tfile = file->private_data; 811 | struct net_device *dev = tun->dev; 812 | int err; 813 | 814 | err = security_tun_dev_attach(tfile->socket.sk, tun->security); 815 | if (err < 0) 816 | goto out; 817 | 818 | err = -EINVAL; 819 | if (rtnl_dereference(tfile->tun) && !tfile->detached) 820 | goto out; 821 | 822 | err = -EBUSY; 823 | if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1) 824 | goto out; 825 | 826 | err = -E2BIG; 827 | if (!tfile->detached && 828 | tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES) 829 | goto out; 830 | 831 | err = 0; 832 | 833 | /* Re-attach the filter to persist device */ 834 | if (!skip_filter && (tun->filter_attached == true)) { 835 | lock_sock(tfile->socket.sk); 836 | err = sk_attach_filter(&tun->fprog, tfile->socket.sk); 837 | release_sock(tfile->socket.sk); 838 | if (!err) 839 | goto out; 840 | } 841 | 842 | if (!tfile->detached && 843 | ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len, 844 | GFP_KERNEL, tun_ptr_free)) { 845 | err = -ENOMEM; 846 | goto out; 847 | } 848 | 849 | tfile->queue_index = tun->numqueues; 850 | tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; 851 | 852 | if (tfile->detached) { 853 | /* Re-attach detached tfile, updating XDP queue_index */ 854 | WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq)); 855 | 856 | if (tfile->xdp_rxq.queue_index != tfile->queue_index) 857 | tfile->xdp_rxq.queue_index = tfile->queue_index; 858 | } else { 859 | /* Setup XDP RX-queue info, for new tfile getting attached */ 860 | err = xdp_rxq_info_reg(&tfile->xdp_rxq, 861 | tun->dev, tfile->queue_index); 862 | if (err < 0) 863 | goto out; 864 | err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq, 865 | MEM_TYPE_PAGE_SHARED, NULL); 866 | if (err < 0) { 867 | xdp_rxq_info_unreg(&tfile->xdp_rxq); 868 | goto out; 869 | } 870 | err = 0; 871 | } 872 | 873 | if (tfile->detached) { 874 | tun_enable_queue(tfile); 875 | } else { 876 | sock_hold(&tfile->sk); 877 | tun_napi_init(tun, tfile, napi, napi_frags); 878 | } 879 | 880 | /* device is allowed to go away first, so no need to hold extra 881 | * refcnt. 882 | */ 883 | 884 | /* Publish tfile->tun and tun->tfiles only after we've fully 885 | * initialized tfile; otherwise we risk using half-initialized 886 | * object. 887 | */ 888 | if (publish_tun) 889 | rcu_assign_pointer(tfile->tun, tun); 890 | rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); 891 | tun->numqueues++; 892 | tun_set_real_num_queues(tun); 893 | out: 894 | return err; 895 | } 896 | 897 | static struct tun_struct *tun_get(struct tun_file *tfile) 898 | { 899 | struct tun_struct *tun; 900 | 901 | rcu_read_lock(); 902 | tun = rcu_dereference(tfile->tun); 903 | if (tun) 904 | dev_hold(tun->dev); 905 | rcu_read_unlock(); 906 | 907 | return tun; 908 | } 909 | 910 | static void tun_put(struct tun_struct *tun) 911 | { 912 | dev_put(tun->dev); 913 | } 914 | 915 | /* TAP filtering */ 916 | static void addr_hash_set(u32 *mask, const u8 *addr) 917 | { 918 | int n = ether_crc(ETH_ALEN, addr) >> 26; 919 | mask[n >> 5] |= (1 << (n & 31)); 920 | } 921 | 922 | static unsigned int addr_hash_test(const u32 *mask, const u8 *addr) 923 | { 924 | int n = ether_crc(ETH_ALEN, addr) >> 26; 925 | return mask[n >> 5] & (1 << (n & 31)); 926 | } 927 | 928 | static int update_filter(struct tap_filter *filter, void __user *arg) 929 | { 930 | struct { u8 u[ETH_ALEN]; } *addr; 931 | struct tun_filter uf; 932 | int err, alen, n, nexact; 933 | 934 | if (copy_from_user(&uf, arg, sizeof(uf))) 935 | return -EFAULT; 936 | 937 | if (!uf.count) { 938 | /* Disabled */ 939 | filter->count = 0; 940 | return 0; 941 | } 942 | 943 | alen = ETH_ALEN * uf.count; 944 | addr = memdup_user(arg + sizeof(uf), alen); 945 | if (IS_ERR(addr)) 946 | return PTR_ERR(addr); 947 | 948 | /* The filter is updated without holding any locks. Which is 949 | * perfectly safe. We disable it first and in the worst 950 | * case we'll accept a few undesired packets. */ 951 | filter->count = 0; 952 | wmb(); 953 | 954 | /* Use first set of addresses as an exact filter */ 955 | for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++) 956 | memcpy(filter->addr[n], addr[n].u, ETH_ALEN); 957 | 958 | nexact = n; 959 | 960 | /* Remaining multicast addresses are hashed, 961 | * unicast will leave the filter disabled. */ 962 | memset(filter->mask, 0, sizeof(filter->mask)); 963 | for (; n < uf.count; n++) { 964 | if (!is_multicast_ether_addr(addr[n].u)) { 965 | err = 0; /* no filter */ 966 | goto free_addr; 967 | } 968 | addr_hash_set(filter->mask, addr[n].u); 969 | } 970 | 971 | /* For ALLMULTI just set the mask to all ones. 972 | * This overrides the mask populated above. */ 973 | if ((uf.flags & TUN_FLT_ALLMULTI)) 974 | memset(filter->mask, ~0, sizeof(filter->mask)); 975 | 976 | /* Now enable the filter */ 977 | wmb(); 978 | filter->count = nexact; 979 | 980 | /* Return the number of exact filters */ 981 | err = nexact; 982 | free_addr: 983 | kfree(addr); 984 | return err; 985 | } 986 | 987 | /* Returns: 0 - drop, !=0 - accept */ 988 | static int run_filter(struct tap_filter *filter, const struct sk_buff *skb) 989 | { 990 | /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect 991 | * at this point. */ 992 | struct ethhdr *eh = (struct ethhdr *) skb->data; 993 | int i; 994 | 995 | /* Exact match */ 996 | for (i = 0; i < filter->count; i++) 997 | if (ether_addr_equal(eh->h_dest, filter->addr[i])) 998 | return 1; 999 | 1000 | /* Inexact match (multicast only) */ 1001 | if (is_multicast_ether_addr(eh->h_dest)) 1002 | return addr_hash_test(filter->mask, eh->h_dest); 1003 | 1004 | return 0; 1005 | } 1006 | 1007 | /* 1008 | * Checks whether the packet is accepted or not. 1009 | * Returns: 0 - drop, !=0 - accept 1010 | */ 1011 | static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) 1012 | { 1013 | if (!filter->count) 1014 | return 1; 1015 | 1016 | return run_filter(filter, skb); 1017 | } 1018 | 1019 | /* Network device part of the driver */ 1020 | 1021 | static const struct ethtool_ops tun_ethtool_ops; 1022 | 1023 | /* Net device detach from fd. */ 1024 | static void tun_net_uninit(struct net_device *dev) 1025 | { 1026 | tun_detach_all(dev); 1027 | } 1028 | 1029 | /* Net device open. */ 1030 | static int tun_net_open(struct net_device *dev) 1031 | { 1032 | netif_tx_start_all_queues(dev); 1033 | 1034 | return 0; 1035 | } 1036 | 1037 | /* Net device close. */ 1038 | static int tun_net_close(struct net_device *dev) 1039 | { 1040 | netif_tx_stop_all_queues(dev); 1041 | return 0; 1042 | } 1043 | 1044 | /* Net device start xmit */ 1045 | static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) 1046 | { 1047 | #ifdef CONFIG_RPS 1048 | if (tun->numqueues == 1 && static_key_false(&rps_needed)) { 1049 | /* Select queue was not called for the skbuff, so we extract the 1050 | * RPS hash and save it into the flow_table here. 1051 | */ 1052 | __u32 rxhash; 1053 | 1054 | rxhash = __skb_get_hash_symmetric(skb); 1055 | if (rxhash) { 1056 | struct tun_flow_entry *e; 1057 | e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], 1058 | rxhash); 1059 | if (e) 1060 | tun_flow_save_rps_rxhash(e, rxhash); 1061 | } 1062 | } 1063 | #endif 1064 | } 1065 | 1066 | static unsigned int run_ebpf_filter(struct tun_struct *tun, 1067 | struct sk_buff *skb, 1068 | int len) 1069 | { 1070 | struct tun_prog *prog = rcu_dereference(tun->filter_prog); 1071 | 1072 | if (prog) 1073 | len = bpf_prog_run_clear_cb(prog->prog, skb); 1074 | 1075 | return len; 1076 | } 1077 | 1078 | /* Net device start xmit */ 1079 | static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) 1080 | { 1081 | struct tun_struct *tun = netdev_priv(dev); 1082 | int txq = skb->queue_mapping; 1083 | struct tun_file *tfile; 1084 | int len = skb->len; 1085 | int num = tun->numqueues / 2; 1086 | 1087 | rcu_read_lock(); 1088 | rw = rw ? (rw == 1 ? num : 0) : (txq - txq % num); 1089 | tfile = rcu_dereference(tun->tfiles[rw + txq % num]); 1090 | 1091 | /* Drop packet if interface is not attached */ 1092 | if (!tfile) 1093 | goto drop; 1094 | 1095 | if (!rcu_dereference(tun->steering_prog)) 1096 | tun_automq_xmit(tun, skb); 1097 | 1098 | tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); 1099 | 1100 | BUG_ON(!tfile); 1101 | 1102 | /* Drop if the filter does not like it. 1103 | * This is a noop if the filter is disabled. 1104 | * Filter can be enabled only for the TAP devices. */ 1105 | if (!check_filter(&tun->txflt, skb)) 1106 | goto drop; 1107 | 1108 | if (tfile->socket.sk->sk_filter && 1109 | sk_filter(tfile->socket.sk, skb)) 1110 | goto drop; 1111 | 1112 | len = run_ebpf_filter(tun, skb, len); 1113 | if (len == 0 || pskb_trim(skb, len)) 1114 | goto drop; 1115 | 1116 | if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) 1117 | goto drop; 1118 | 1119 | skb_tx_timestamp(skb); 1120 | 1121 | /* Orphan the skb - required as we might hang on to it 1122 | * for indefinite time. 1123 | */ 1124 | skb_orphan(skb); 1125 | 1126 | nf_reset(skb); 1127 | 1128 | if (ptr_ring_produce(&tfile->tx_ring, skb)) 1129 | goto drop; 1130 | 1131 | /* Notify and wake up reader process */ 1132 | if (tfile->flags & TUN_FASYNC) 1133 | kill_fasync(&tfile->fasync, SIGIO, POLL_IN); 1134 | tfile->socket.sk->sk_data_ready(tfile->socket.sk); 1135 | 1136 | rcu_read_unlock(); 1137 | return NETDEV_TX_OK; 1138 | 1139 | drop: 1140 | this_cpu_inc(tun->pcpu_stats->tx_dropped); 1141 | skb_tx_error(skb); 1142 | kfree_skb(skb); 1143 | rcu_read_unlock(); 1144 | return NET_XMIT_DROP; 1145 | } 1146 | 1147 | static void tun_net_mclist(struct net_device *dev) 1148 | { 1149 | /* 1150 | * This callback is supposed to deal with mc filter in 1151 | * _rx_ path and has nothing to do with the _tx_ path. 1152 | * In rx path we always accept everything userspace gives us. 1153 | */ 1154 | } 1155 | 1156 | static netdev_features_t tun_net_fix_features(struct net_device *dev, 1157 | netdev_features_t features) 1158 | { 1159 | struct tun_struct *tun = netdev_priv(dev); 1160 | 1161 | return (features & tun->set_features) | (features & ~TUN_USER_FEATURES); 1162 | } 1163 | 1164 | static void tun_set_headroom(struct net_device *dev, int new_hr) 1165 | { 1166 | struct tun_struct *tun = netdev_priv(dev); 1167 | 1168 | if (new_hr < NET_SKB_PAD) 1169 | new_hr = NET_SKB_PAD; 1170 | 1171 | tun->align = new_hr; 1172 | } 1173 | 1174 | static void 1175 | tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) 1176 | { 1177 | u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0; 1178 | struct tun_struct *tun = netdev_priv(dev); 1179 | struct tun_pcpu_stats *p; 1180 | int i; 1181 | 1182 | for_each_possible_cpu(i) { 1183 | u64 rxpackets, rxbytes, txpackets, txbytes; 1184 | unsigned int start; 1185 | 1186 | p = per_cpu_ptr(tun->pcpu_stats, i); 1187 | do { 1188 | start = u64_stats_fetch_begin(&p->syncp); 1189 | rxpackets = p->rx_packets; 1190 | rxbytes = p->rx_bytes; 1191 | txpackets = p->tx_packets; 1192 | txbytes = p->tx_bytes; 1193 | } while (u64_stats_fetch_retry(&p->syncp, start)); 1194 | 1195 | stats->rx_packets += rxpackets; 1196 | stats->rx_bytes += rxbytes; 1197 | stats->tx_packets += txpackets; 1198 | stats->tx_bytes += txbytes; 1199 | 1200 | /* u32 counters */ 1201 | rx_dropped += p->rx_dropped; 1202 | rx_frame_errors += p->rx_frame_errors; 1203 | tx_dropped += p->tx_dropped; 1204 | } 1205 | stats->rx_dropped = rx_dropped; 1206 | stats->rx_frame_errors = rx_frame_errors; 1207 | stats->tx_dropped = tx_dropped; 1208 | } 1209 | 1210 | static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1211 | struct netlink_ext_ack *extack) 1212 | { 1213 | struct tun_struct *tun = netdev_priv(dev); 1214 | struct bpf_prog *old_prog; 1215 | 1216 | old_prog = rtnl_dereference(tun->xdp_prog); 1217 | rcu_assign_pointer(tun->xdp_prog, prog); 1218 | if (old_prog) 1219 | bpf_prog_put(old_prog); 1220 | 1221 | return 0; 1222 | } 1223 | 1224 | static u32 tun_xdp_query(struct net_device *dev) 1225 | { 1226 | struct tun_struct *tun = netdev_priv(dev); 1227 | const struct bpf_prog *xdp_prog; 1228 | 1229 | xdp_prog = rtnl_dereference(tun->xdp_prog); 1230 | if (xdp_prog) 1231 | return xdp_prog->aux->id; 1232 | 1233 | return 0; 1234 | } 1235 | 1236 | static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1237 | { 1238 | switch (xdp->command) { 1239 | case XDP_SETUP_PROG: 1240 | return tun_xdp_set(dev, xdp->prog, xdp->extack); 1241 | case XDP_QUERY_PROG: 1242 | xdp->prog_id = tun_xdp_query(dev); 1243 | return 0; 1244 | default: 1245 | return -EINVAL; 1246 | } 1247 | } 1248 | 1249 | static const struct net_device_ops tun_netdev_ops = { 1250 | .ndo_uninit = tun_net_uninit, 1251 | .ndo_open = tun_net_open, 1252 | .ndo_stop = tun_net_close, 1253 | .ndo_start_xmit = tun_net_xmit, 1254 | .ndo_fix_features = tun_net_fix_features, 1255 | .ndo_select_queue = tun_select_queue, 1256 | .ndo_set_rx_headroom = tun_set_headroom, 1257 | .ndo_get_stats64 = tun_net_get_stats64, 1258 | }; 1259 | 1260 | static void __tun_xdp_flush_tfile(struct tun_file *tfile) 1261 | { 1262 | /* Notify and wake up reader process */ 1263 | if (tfile->flags & TUN_FASYNC) 1264 | kill_fasync(&tfile->fasync, SIGIO, POLL_IN); 1265 | tfile->socket.sk->sk_data_ready(tfile->socket.sk); 1266 | } 1267 | 1268 | static int tun_xdp_xmit(struct net_device *dev, int n, 1269 | struct xdp_frame **frames, u32 flags) 1270 | { 1271 | struct tun_struct *tun = netdev_priv(dev); 1272 | struct tun_file *tfile; 1273 | u32 numqueues; 1274 | int drops = 0; 1275 | int cnt = n; 1276 | int i; 1277 | 1278 | if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 1279 | return -EINVAL; 1280 | 1281 | rcu_read_lock(); 1282 | 1283 | resample: 1284 | numqueues = READ_ONCE(tun->numqueues); 1285 | if (!numqueues) { 1286 | rcu_read_unlock(); 1287 | return -ENXIO; /* Caller will free/return all frames */ 1288 | } 1289 | 1290 | tfile = rcu_dereference(tun->tfiles[smp_processor_id() % 1291 | numqueues]); 1292 | if (unlikely(!tfile)) 1293 | goto resample; 1294 | 1295 | spin_lock(&tfile->tx_ring.producer_lock); 1296 | for (i = 0; i < n; i++) { 1297 | struct xdp_frame *xdp = frames[i]; 1298 | /* Encode the XDP flag into lowest bit for consumer to differ 1299 | * XDP buffer from sk_buff. 1300 | */ 1301 | void *frame = tun_xdp_to_ptr(xdp); 1302 | 1303 | if (__ptr_ring_produce(&tfile->tx_ring, frame)) { 1304 | this_cpu_inc(tun->pcpu_stats->tx_dropped); 1305 | xdp_return_frame_rx_napi(xdp); 1306 | drops++; 1307 | } 1308 | } 1309 | spin_unlock(&tfile->tx_ring.producer_lock); 1310 | 1311 | if (flags & XDP_XMIT_FLUSH) 1312 | __tun_xdp_flush_tfile(tfile); 1313 | 1314 | rcu_read_unlock(); 1315 | return cnt - drops; 1316 | } 1317 | 1318 | static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) 1319 | { 1320 | struct xdp_frame *frame = convert_to_xdp_frame(xdp); 1321 | 1322 | if (unlikely(!frame)) 1323 | return -EOVERFLOW; 1324 | 1325 | return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH); 1326 | } 1327 | 1328 | static const struct net_device_ops tap_netdev_ops = { 1329 | .ndo_uninit = tun_net_uninit, 1330 | .ndo_open = tun_net_open, 1331 | .ndo_stop = tun_net_close, 1332 | .ndo_start_xmit = tun_net_xmit, 1333 | .ndo_fix_features = tun_net_fix_features, 1334 | .ndo_set_rx_mode = tun_net_mclist, 1335 | .ndo_set_mac_address = eth_mac_addr, 1336 | .ndo_validate_addr = eth_validate_addr, 1337 | .ndo_select_queue = tun_select_queue, 1338 | .ndo_features_check = passthru_features_check, 1339 | .ndo_set_rx_headroom = tun_set_headroom, 1340 | .ndo_get_stats64 = tun_net_get_stats64, 1341 | .ndo_bpf = tun_xdp, 1342 | .ndo_xdp_xmit = tun_xdp_xmit, 1343 | }; 1344 | 1345 | static void tun_flow_init(struct tun_struct *tun) 1346 | { 1347 | int i; 1348 | 1349 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) 1350 | INIT_HLIST_HEAD(&tun->flows[i]); 1351 | 1352 | tun->ageing_time = TUN_FLOW_EXPIRE; 1353 | timer_setup(&tun->flow_gc_timer, tun_flow_cleanup, 0); 1354 | mod_timer(&tun->flow_gc_timer, 1355 | round_jiffies_up(jiffies + tun->ageing_time)); 1356 | } 1357 | 1358 | static void tun_flow_uninit(struct tun_struct *tun) 1359 | { 1360 | del_timer_sync(&tun->flow_gc_timer); 1361 | tun_flow_flush(tun); 1362 | } 1363 | 1364 | #define MIN_MTU 68 1365 | #define MAX_MTU 65535 1366 | 1367 | /* Initialize net device. */ 1368 | static void tun_net_init(struct net_device *dev) 1369 | { 1370 | struct tun_struct *tun = netdev_priv(dev); 1371 | 1372 | switch (tun->flags & TUN_TYPE_MASK) { 1373 | case IFF_TUN: 1374 | dev->netdev_ops = &tun_netdev_ops; 1375 | 1376 | /* Point-to-Point TUN Device */ 1377 | dev->hard_header_len = 0; 1378 | dev->addr_len = 0; 1379 | dev->mtu = 1500; 1380 | 1381 | /* Zero header length */ 1382 | dev->type = ARPHRD_NONE; 1383 | dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; 1384 | break; 1385 | 1386 | case IFF_TAP: 1387 | dev->netdev_ops = &tap_netdev_ops; 1388 | /* Ethernet TAP Device */ 1389 | ether_setup(dev); 1390 | dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1391 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1392 | 1393 | eth_hw_addr_random(dev); 1394 | 1395 | break; 1396 | } 1397 | 1398 | dev->min_mtu = MIN_MTU; 1399 | dev->max_mtu = MAX_MTU - dev->hard_header_len; 1400 | } 1401 | 1402 | static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) 1403 | { 1404 | struct sock *sk = tfile->socket.sk; 1405 | 1406 | return (tun->dev->flags & IFF_UP) && sock_writeable(sk); 1407 | } 1408 | 1409 | /* Character device part */ 1410 | 1411 | /* Poll */ 1412 | static __poll_t tun_chr_poll(struct file *file, poll_table *wait) 1413 | { 1414 | struct tun_file *tfile = file->private_data; 1415 | struct tun_struct *tun = tun_get(tfile); 1416 | struct sock *sk; 1417 | __poll_t mask = 0; 1418 | 1419 | if (!tun) 1420 | return EPOLLERR; 1421 | 1422 | sk = tfile->socket.sk; 1423 | 1424 | tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); 1425 | 1426 | poll_wait(file, sk_sleep(sk), wait); 1427 | 1428 | if (!ptr_ring_empty(&tfile->tx_ring)) 1429 | mask |= EPOLLIN | EPOLLRDNORM; 1430 | 1431 | /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to 1432 | * guarantee EPOLLOUT to be raised by either here or 1433 | * tun_sock_write_space(). Then process could get notification 1434 | * after it writes to a down device and meets -EIO. 1435 | */ 1436 | if (tun_sock_writeable(tun, tfile) || 1437 | (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && 1438 | tun_sock_writeable(tun, tfile))) 1439 | mask |= EPOLLOUT | EPOLLWRNORM; 1440 | 1441 | if (tun->dev->reg_state != NETREG_REGISTERED) 1442 | mask = EPOLLERR; 1443 | 1444 | tun_put(tun); 1445 | return mask; 1446 | } 1447 | 1448 | static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, 1449 | size_t len, 1450 | const struct iov_iter *it) 1451 | { 1452 | struct sk_buff *skb; 1453 | size_t linear; 1454 | int err; 1455 | int i; 1456 | 1457 | if (it->nr_segs > MAX_SKB_FRAGS + 1) 1458 | return ERR_PTR(-ENOMEM); 1459 | 1460 | local_bh_disable(); 1461 | skb = napi_get_frags(&tfile->napi); 1462 | local_bh_enable(); 1463 | if (!skb) 1464 | return ERR_PTR(-ENOMEM); 1465 | 1466 | linear = iov_iter_single_seg_count(it); 1467 | err = __skb_grow(skb, linear); 1468 | if (err) 1469 | goto free; 1470 | 1471 | skb->len = len; 1472 | skb->data_len = len - linear; 1473 | skb->truesize += skb->data_len; 1474 | 1475 | for (i = 1; i < it->nr_segs; i++) { 1476 | struct page_frag *pfrag = ¤t->task_frag; 1477 | size_t fragsz = it->iov[i].iov_len; 1478 | 1479 | if (fragsz == 0 || fragsz > PAGE_SIZE) { 1480 | err = -EINVAL; 1481 | goto free; 1482 | } 1483 | 1484 | if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) { 1485 | err = -ENOMEM; 1486 | goto free; 1487 | } 1488 | 1489 | skb_fill_page_desc(skb, i - 1, pfrag->page, 1490 | pfrag->offset, fragsz); 1491 | page_ref_inc(pfrag->page); 1492 | pfrag->offset += fragsz; 1493 | } 1494 | 1495 | return skb; 1496 | free: 1497 | /* frees skb and all frags allocated with napi_alloc_frag() */ 1498 | napi_free_frags(&tfile->napi); 1499 | return ERR_PTR(err); 1500 | } 1501 | 1502 | /* prepad is the amount to reserve at front. len is length after that. 1503 | * linear is a hint as to how much to copy (usually headers). */ 1504 | static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, 1505 | size_t prepad, size_t len, 1506 | size_t linear, int noblock) 1507 | { 1508 | struct sock *sk = tfile->socket.sk; 1509 | struct sk_buff *skb; 1510 | int err; 1511 | 1512 | /* Under a page? Don't bother with paged skb. */ 1513 | if (prepad + len < PAGE_SIZE || !linear) 1514 | linear = len; 1515 | 1516 | skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 1517 | &err, 0); 1518 | if (!skb) 1519 | return ERR_PTR(err); 1520 | 1521 | skb_reserve(skb, prepad); 1522 | skb_put(skb, linear); 1523 | skb->data_len = len - linear; 1524 | skb->len += len - linear; 1525 | 1526 | return skb; 1527 | } 1528 | 1529 | static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, 1530 | struct sk_buff *skb, int more) 1531 | { 1532 | struct sk_buff_head *queue = &tfile->sk.sk_write_queue; 1533 | struct sk_buff_head process_queue; 1534 | u32 rx_batched = tun->rx_batched; 1535 | bool rcv = false; 1536 | 1537 | if (!rx_batched || (!more && skb_queue_empty(queue))) { 1538 | local_bh_disable(); 1539 | skb_record_rx_queue(skb, tfile->queue_index); 1540 | netif_receive_skb(skb); 1541 | local_bh_enable(); 1542 | return; 1543 | } 1544 | 1545 | spin_lock(&queue->lock); 1546 | if (!more || skb_queue_len(queue) == rx_batched) { 1547 | __skb_queue_head_init(&process_queue); 1548 | skb_queue_splice_tail_init(queue, &process_queue); 1549 | rcv = true; 1550 | } else { 1551 | __skb_queue_tail(queue, skb); 1552 | } 1553 | spin_unlock(&queue->lock); 1554 | 1555 | if (rcv) { 1556 | struct sk_buff *nskb; 1557 | 1558 | local_bh_disable(); 1559 | while ((nskb = __skb_dequeue(&process_queue))) { 1560 | skb_record_rx_queue(nskb, tfile->queue_index); 1561 | netif_receive_skb(nskb); 1562 | } 1563 | skb_record_rx_queue(skb, tfile->queue_index); 1564 | netif_receive_skb(skb); 1565 | local_bh_enable(); 1566 | } 1567 | } 1568 | 1569 | static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile, 1570 | int len, int noblock, bool zerocopy) 1571 | { 1572 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 1573 | return false; 1574 | 1575 | if (tfile->socket.sk->sk_sndbuf != INT_MAX) 1576 | return false; 1577 | 1578 | if (!noblock) 1579 | return false; 1580 | 1581 | if (zerocopy) 1582 | return false; 1583 | 1584 | if (SKB_DATA_ALIGN(len + TUN_RX_PAD) + 1585 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) 1586 | return false; 1587 | 1588 | return true; 1589 | } 1590 | 1591 | static struct sk_buff *tun_build_skb(struct tun_struct *tun, 1592 | struct tun_file *tfile, 1593 | struct iov_iter *from, 1594 | struct virtio_net_hdr *hdr, 1595 | int len, int *skb_xdp) 1596 | { 1597 | struct page_frag *alloc_frag = ¤t->task_frag; 1598 | struct sk_buff *skb; 1599 | struct bpf_prog *xdp_prog; 1600 | int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1601 | unsigned int delta = 0; 1602 | char *buf; 1603 | size_t copied; 1604 | int err, pad = TUN_RX_PAD; 1605 | 1606 | rcu_read_lock(); 1607 | xdp_prog = rcu_dereference(tun->xdp_prog); 1608 | if (xdp_prog) 1609 | pad += TUN_HEADROOM; 1610 | buflen += SKB_DATA_ALIGN(len + pad); 1611 | rcu_read_unlock(); 1612 | 1613 | alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); 1614 | if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) 1615 | return ERR_PTR(-ENOMEM); 1616 | 1617 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1618 | copied = copy_page_from_iter(alloc_frag->page, 1619 | alloc_frag->offset + pad, 1620 | len, from); 1621 | if (copied != len) 1622 | return ERR_PTR(-EFAULT); 1623 | 1624 | /* There's a small window that XDP may be set after the check 1625 | * of xdp_prog above, this should be rare and for simplicity 1626 | * we do XDP on skb in case the headroom is not enough. 1627 | */ 1628 | if (hdr->gso_type || !xdp_prog) 1629 | *skb_xdp = 1; 1630 | else 1631 | *skb_xdp = 0; 1632 | 1633 | local_bh_disable(); 1634 | rcu_read_lock(); 1635 | xdp_prog = rcu_dereference(tun->xdp_prog); 1636 | if (xdp_prog && !*skb_xdp) { 1637 | struct xdp_buff xdp; 1638 | void *orig_data; 1639 | u32 act; 1640 | 1641 | xdp.data_hard_start = buf; 1642 | xdp.data = buf + pad; 1643 | xdp_set_data_meta_invalid(&xdp); 1644 | xdp.data_end = xdp.data + len; 1645 | xdp.rxq = &tfile->xdp_rxq; 1646 | orig_data = xdp.data; 1647 | act = bpf_prog_run_xdp(xdp_prog, &xdp); 1648 | 1649 | switch (act) { 1650 | case XDP_REDIRECT: 1651 | get_page(alloc_frag->page); 1652 | alloc_frag->offset += buflen; 1653 | err = xdp_do_redirect(tun->dev, &xdp, xdp_prog); 1654 | xdp_do_flush_map(); 1655 | if (err) 1656 | goto err_redirect; 1657 | rcu_read_unlock(); 1658 | local_bh_enable(); 1659 | return NULL; 1660 | case XDP_TX: 1661 | get_page(alloc_frag->page); 1662 | alloc_frag->offset += buflen; 1663 | if (tun_xdp_tx(tun->dev, &xdp) < 0) 1664 | goto err_redirect; 1665 | rcu_read_unlock(); 1666 | local_bh_enable(); 1667 | return NULL; 1668 | case XDP_PASS: 1669 | delta = orig_data - xdp.data; 1670 | len = xdp.data_end - xdp.data; 1671 | break; 1672 | default: 1673 | bpf_warn_invalid_xdp_action(act); 1674 | /* fall through */ 1675 | case XDP_ABORTED: 1676 | trace_xdp_exception(tun->dev, xdp_prog, act); 1677 | /* fall through */ 1678 | case XDP_DROP: 1679 | goto err_xdp; 1680 | } 1681 | } 1682 | 1683 | skb = build_skb(buf, buflen); 1684 | if (!skb) { 1685 | rcu_read_unlock(); 1686 | local_bh_enable(); 1687 | return ERR_PTR(-ENOMEM); 1688 | } 1689 | 1690 | skb_reserve(skb, pad - delta); 1691 | skb_put(skb, len); 1692 | skb_set_owner_w(skb, tfile->socket.sk); 1693 | get_page(alloc_frag->page); 1694 | alloc_frag->offset += buflen; 1695 | 1696 | rcu_read_unlock(); 1697 | local_bh_enable(); 1698 | 1699 | return skb; 1700 | 1701 | err_redirect: 1702 | put_page(alloc_frag->page); 1703 | err_xdp: 1704 | rcu_read_unlock(); 1705 | local_bh_enable(); 1706 | this_cpu_inc(tun->pcpu_stats->rx_dropped); 1707 | return NULL; 1708 | } 1709 | 1710 | /* Get packet from user space buffer */ 1711 | static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, 1712 | void *msg_control, struct iov_iter *from, 1713 | int noblock, bool more) 1714 | { 1715 | struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; 1716 | struct sk_buff *skb; 1717 | size_t total_len = iov_iter_count(from); 1718 | size_t len = total_len, align = tun->align, linear; 1719 | struct virtio_net_hdr gso = { 0 }; 1720 | struct tun_pcpu_stats *stats; 1721 | int good_linear; 1722 | int copylen; 1723 | bool zerocopy = false; 1724 | int err; 1725 | u32 rxhash = 0; 1726 | int skb_xdp = 1; 1727 | bool frags = tun_napi_frags_enabled(tfile); 1728 | 1729 | if (!(tun->flags & IFF_NO_PI)) { 1730 | if (len < sizeof(pi)) 1731 | return -EINVAL; 1732 | len -= sizeof(pi); 1733 | 1734 | if (!copy_from_iter_full(&pi, sizeof(pi), from)) 1735 | return -EFAULT; 1736 | } 1737 | 1738 | if (tun->flags & IFF_VNET_HDR) { 1739 | int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); 1740 | 1741 | if (len < vnet_hdr_sz) 1742 | return -EINVAL; 1743 | len -= vnet_hdr_sz; 1744 | 1745 | if (!copy_from_iter_full(&gso, sizeof(gso), from)) 1746 | return -EFAULT; 1747 | 1748 | if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 1749 | tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) 1750 | gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); 1751 | 1752 | if (tun16_to_cpu(tun, gso.hdr_len) > len) 1753 | return -EINVAL; 1754 | iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); 1755 | } 1756 | 1757 | if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { 1758 | align += NET_IP_ALIGN; 1759 | if (unlikely(len < ETH_HLEN || 1760 | (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) 1761 | return -EINVAL; 1762 | } 1763 | 1764 | good_linear = SKB_MAX_HEAD(align); 1765 | 1766 | if (msg_control) { 1767 | struct iov_iter i = *from; 1768 | 1769 | /* There are 256 bytes to be copied in skb, so there is 1770 | * enough room for skb expand head in case it is used. 1771 | * The rest of the buffer is mapped from userspace. 1772 | */ 1773 | copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; 1774 | if (copylen > good_linear) 1775 | copylen = good_linear; 1776 | linear = copylen; 1777 | iov_iter_advance(&i, copylen); 1778 | if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) 1779 | zerocopy = true; 1780 | } 1781 | 1782 | if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { 1783 | /* For the packet that is not easy to be processed 1784 | * (e.g gso or jumbo packet), we will do it at after 1785 | * skb was created with generic XDP routine. 1786 | */ 1787 | skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); 1788 | if (IS_ERR(skb)) { 1789 | this_cpu_inc(tun->pcpu_stats->rx_dropped); 1790 | return PTR_ERR(skb); 1791 | } 1792 | if (!skb) 1793 | return total_len; 1794 | } else { 1795 | if (!zerocopy) { 1796 | copylen = len; 1797 | if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) 1798 | linear = good_linear; 1799 | else 1800 | linear = tun16_to_cpu(tun, gso.hdr_len); 1801 | } 1802 | 1803 | if (frags) { 1804 | mutex_lock(&tfile->napi_mutex); 1805 | skb = tun_napi_alloc_frags(tfile, copylen, from); 1806 | /* tun_napi_alloc_frags() enforces a layout for the skb. 1807 | * If zerocopy is enabled, then this layout will be 1808 | * overwritten by zerocopy_sg_from_iter(). 1809 | */ 1810 | zerocopy = false; 1811 | } else { 1812 | skb = tun_alloc_skb(tfile, align, copylen, linear, 1813 | noblock); 1814 | } 1815 | 1816 | if (IS_ERR(skb)) { 1817 | if (PTR_ERR(skb) != -EAGAIN) 1818 | this_cpu_inc(tun->pcpu_stats->rx_dropped); 1819 | if (frags) 1820 | mutex_unlock(&tfile->napi_mutex); 1821 | return PTR_ERR(skb); 1822 | } 1823 | 1824 | if (zerocopy) 1825 | err = zerocopy_sg_from_iter(skb, from); 1826 | else 1827 | err = skb_copy_datagram_from_iter(skb, 0, from, len); 1828 | 1829 | if (err) { 1830 | err = -EFAULT; 1831 | drop: 1832 | this_cpu_inc(tun->pcpu_stats->rx_dropped); 1833 | kfree_skb(skb); 1834 | if (frags) { 1835 | tfile->napi.skb = NULL; 1836 | mutex_unlock(&tfile->napi_mutex); 1837 | } 1838 | 1839 | return err; 1840 | } 1841 | } 1842 | 1843 | if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { 1844 | this_cpu_inc(tun->pcpu_stats->rx_frame_errors); 1845 | kfree_skb(skb); 1846 | if (frags) { 1847 | tfile->napi.skb = NULL; 1848 | mutex_unlock(&tfile->napi_mutex); 1849 | } 1850 | 1851 | return -EINVAL; 1852 | } 1853 | 1854 | switch (tun->flags & TUN_TYPE_MASK) { 1855 | case IFF_TUN: 1856 | if (tun->flags & IFF_NO_PI) { 1857 | u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; 1858 | 1859 | switch (ip_version) { 1860 | case 4: 1861 | pi.proto = htons(ETH_P_IP); 1862 | break; 1863 | case 6: 1864 | pi.proto = htons(ETH_P_IPV6); 1865 | break; 1866 | default: 1867 | this_cpu_inc(tun->pcpu_stats->rx_dropped); 1868 | kfree_skb(skb); 1869 | return -EINVAL; 1870 | } 1871 | } 1872 | 1873 | skb_reset_mac_header(skb); 1874 | skb->protocol = pi.proto; 1875 | skb->dev = tun->dev; 1876 | break; 1877 | case IFF_TAP: 1878 | if (!frags) 1879 | skb->protocol = eth_type_trans(skb, tun->dev); 1880 | break; 1881 | } 1882 | 1883 | /* copy skb_ubuf_info for callback when skb has no error */ 1884 | if (zerocopy) { 1885 | skb_shinfo(skb)->destructor_arg = msg_control; 1886 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1887 | skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; 1888 | } else if (msg_control) { 1889 | struct ubuf_info *uarg = msg_control; 1890 | uarg->callback(uarg, false); 1891 | } 1892 | 1893 | skb_reset_network_header(skb); 1894 | skb_probe_transport_header(skb, 0); 1895 | 1896 | if (skb_xdp) { 1897 | struct bpf_prog *xdp_prog; 1898 | int ret; 1899 | 1900 | local_bh_disable(); 1901 | rcu_read_lock(); 1902 | xdp_prog = rcu_dereference(tun->xdp_prog); 1903 | if (xdp_prog) { 1904 | ret = do_xdp_generic(xdp_prog, skb); 1905 | if (ret != XDP_PASS) { 1906 | rcu_read_unlock(); 1907 | local_bh_enable(); 1908 | if (frags) { 1909 | tfile->napi.skb = NULL; 1910 | mutex_unlock(&tfile->napi_mutex); 1911 | } 1912 | return total_len; 1913 | } 1914 | } 1915 | rcu_read_unlock(); 1916 | local_bh_enable(); 1917 | } 1918 | 1919 | /* Compute the costly rx hash only if needed for flow updates. 1920 | * We may get a very small possibility of OOO during switching, not 1921 | * worth to optimize. 1922 | */ 1923 | if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 && 1924 | !tfile->detached) 1925 | rxhash = __skb_get_hash_symmetric(skb); 1926 | 1927 | rcu_read_lock(); 1928 | if (unlikely(!(tun->dev->flags & IFF_UP))) { 1929 | err = -EIO; 1930 | rcu_read_unlock(); 1931 | goto drop; 1932 | } 1933 | 1934 | if (frags) { 1935 | /* Exercise flow dissector code path. */ 1936 | u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); 1937 | 1938 | if (unlikely(headlen > skb_headlen(skb))) { 1939 | this_cpu_inc(tun->pcpu_stats->rx_dropped); 1940 | napi_free_frags(&tfile->napi); 1941 | rcu_read_unlock(); 1942 | mutex_unlock(&tfile->napi_mutex); 1943 | WARN_ON(1); 1944 | return -ENOMEM; 1945 | } 1946 | 1947 | local_bh_disable(); 1948 | napi_gro_frags(&tfile->napi); 1949 | local_bh_enable(); 1950 | mutex_unlock(&tfile->napi_mutex); 1951 | } else if (tfile->napi_enabled) { 1952 | struct sk_buff_head *queue = &tfile->sk.sk_write_queue; 1953 | int queue_len; 1954 | 1955 | spin_lock_bh(&queue->lock); 1956 | __skb_queue_tail(queue, skb); 1957 | queue_len = skb_queue_len(queue); 1958 | spin_unlock(&queue->lock); 1959 | 1960 | if (!more || queue_len > NAPI_POLL_WEIGHT) 1961 | napi_schedule(&tfile->napi); 1962 | 1963 | local_bh_enable(); 1964 | } else if (!IS_ENABLED(CONFIG_4KSTACKS)) { 1965 | tun_rx_batched(tun, tfile, skb, more); 1966 | } else { 1967 | netif_rx_ni(skb); 1968 | } 1969 | rcu_read_unlock(); 1970 | 1971 | stats = get_cpu_ptr(tun->pcpu_stats); 1972 | u64_stats_update_begin(&stats->syncp); 1973 | stats->rx_packets++; 1974 | stats->rx_bytes += len; 1975 | u64_stats_update_end(&stats->syncp); 1976 | put_cpu_ptr(stats); 1977 | 1978 | if (rxhash) 1979 | tun_flow_update(tun, rxhash, tfile); 1980 | 1981 | return total_len; 1982 | } 1983 | 1984 | static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) 1985 | { 1986 | struct file *file = iocb->ki_filp; 1987 | struct tun_file *tfile = file->private_data; 1988 | struct tun_struct *tun = tun_get(tfile); 1989 | ssize_t result; 1990 | 1991 | if (!tun) 1992 | return -EBADFD; 1993 | 1994 | result = tun_get_user(tun, tfile, NULL, from, 1995 | file->f_flags & O_NONBLOCK, false); 1996 | 1997 | tun_put(tun); 1998 | return result; 1999 | } 2000 | 2001 | static ssize_t tun_put_user_xdp(struct tun_struct *tun, 2002 | struct tun_file *tfile, 2003 | struct xdp_frame *xdp_frame, 2004 | struct iov_iter *iter) 2005 | { 2006 | int vnet_hdr_sz = 0; 2007 | size_t size = xdp_frame->len; 2008 | struct tun_pcpu_stats *stats; 2009 | size_t ret; 2010 | 2011 | if (tun->flags & IFF_VNET_HDR) { 2012 | struct virtio_net_hdr gso = { 0 }; 2013 | 2014 | vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); 2015 | if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) 2016 | return -EINVAL; 2017 | if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != 2018 | sizeof(gso))) 2019 | return -EFAULT; 2020 | iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); 2021 | } 2022 | 2023 | ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; 2024 | 2025 | stats = get_cpu_ptr(tun->pcpu_stats); 2026 | u64_stats_update_begin(&stats->syncp); 2027 | stats->tx_packets++; 2028 | stats->tx_bytes += ret; 2029 | u64_stats_update_end(&stats->syncp); 2030 | put_cpu_ptr(tun->pcpu_stats); 2031 | 2032 | return ret; 2033 | } 2034 | 2035 | /* Put packet to the user space buffer */ 2036 | static ssize_t tun_put_user(struct tun_struct *tun, 2037 | struct tun_file *tfile, 2038 | struct sk_buff *skb, 2039 | struct iov_iter *iter) 2040 | { 2041 | struct tun_pi pi = { 0, skb->protocol }; 2042 | struct tun_pcpu_stats *stats; 2043 | ssize_t total; 2044 | int vlan_offset = 0; 2045 | int vlan_hlen = 0; 2046 | int vnet_hdr_sz = 0; 2047 | 2048 | if (skb_vlan_tag_present(skb)) 2049 | vlan_hlen = VLAN_HLEN; 2050 | 2051 | if (tun->flags & IFF_VNET_HDR) 2052 | vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); 2053 | 2054 | total = skb->len + vlan_hlen + vnet_hdr_sz; 2055 | 2056 | if (!(tun->flags & IFF_NO_PI)) { 2057 | if (iov_iter_count(iter) < sizeof(pi)) 2058 | return -EINVAL; 2059 | 2060 | total += sizeof(pi); 2061 | if (iov_iter_count(iter) < total) { 2062 | /* Packet will be striped */ 2063 | pi.flags |= TUN_PKT_STRIP; 2064 | } 2065 | 2066 | if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi)) 2067 | return -EFAULT; 2068 | } 2069 | 2070 | if (vnet_hdr_sz) { 2071 | struct virtio_net_hdr gso; 2072 | 2073 | if (iov_iter_count(iter) < vnet_hdr_sz) 2074 | return -EINVAL; 2075 | 2076 | if (virtio_net_hdr_from_skb(skb, &gso, 2077 | tun_is_little_endian(tun), true, 2078 | vlan_hlen)) { 2079 | struct skb_shared_info *sinfo = skb_shinfo(skb); 2080 | pr_err("unexpected GSO type: " 2081 | "0x%x, gso_size %d, hdr_len %d\n", 2082 | sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), 2083 | tun16_to_cpu(tun, gso.hdr_len)); 2084 | print_hex_dump(KERN_ERR, "tun: ", 2085 | DUMP_PREFIX_NONE, 2086 | 16, 1, skb->head, 2087 | min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); 2088 | WARN_ON_ONCE(1); 2089 | return -EINVAL; 2090 | } 2091 | 2092 | if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) 2093 | return -EFAULT; 2094 | 2095 | iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); 2096 | } 2097 | 2098 | if (vlan_hlen) { 2099 | int ret; 2100 | struct veth veth; 2101 | 2102 | veth.h_vlan_proto = skb->vlan_proto; 2103 | veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); 2104 | 2105 | vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); 2106 | 2107 | ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); 2108 | if (ret || !iov_iter_count(iter)) 2109 | goto done; 2110 | 2111 | ret = copy_to_iter(&veth, sizeof(veth), iter); 2112 | if (ret != sizeof(veth) || !iov_iter_count(iter)) 2113 | goto done; 2114 | } 2115 | 2116 | skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset); 2117 | 2118 | done: 2119 | /* caller is in process context, */ 2120 | stats = get_cpu_ptr(tun->pcpu_stats); 2121 | u64_stats_update_begin(&stats->syncp); 2122 | stats->tx_packets++; 2123 | stats->tx_bytes += skb->len + vlan_hlen; 2124 | u64_stats_update_end(&stats->syncp); 2125 | put_cpu_ptr(tun->pcpu_stats); 2126 | 2127 | return total; 2128 | } 2129 | 2130 | static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) 2131 | { 2132 | DECLARE_WAITQUEUE(wait, current); 2133 | void *ptr = NULL; 2134 | int error = 0; 2135 | 2136 | ptr = ptr_ring_consume(&tfile->tx_ring); 2137 | if (ptr) 2138 | goto out; 2139 | if (noblock) { 2140 | error = -EAGAIN; 2141 | goto out; 2142 | } 2143 | 2144 | add_wait_queue(&tfile->wq.wait, &wait); 2145 | 2146 | while (1) { 2147 | set_current_state(TASK_INTERRUPTIBLE); 2148 | ptr = ptr_ring_consume(&tfile->tx_ring); 2149 | if (ptr) 2150 | break; 2151 | if (signal_pending(current)) { 2152 | error = -ERESTARTSYS; 2153 | break; 2154 | } 2155 | if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) { 2156 | error = -EFAULT; 2157 | break; 2158 | } 2159 | 2160 | schedule(); 2161 | } 2162 | 2163 | __set_current_state(TASK_RUNNING); 2164 | remove_wait_queue(&tfile->wq.wait, &wait); 2165 | 2166 | out: 2167 | *err = error; 2168 | return ptr; 2169 | } 2170 | 2171 | static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, 2172 | struct iov_iter *to, 2173 | int noblock, void *ptr) 2174 | { 2175 | ssize_t ret; 2176 | int err; 2177 | 2178 | tun_debug(KERN_INFO, tun, "tun_do_read\n"); 2179 | 2180 | if (!iov_iter_count(to)) { 2181 | tun_ptr_free(ptr); 2182 | return 0; 2183 | } 2184 | 2185 | if (!ptr) { 2186 | /* Read frames from ring */ 2187 | ptr = tun_ring_recv(tfile, noblock, &err); 2188 | if (!ptr) 2189 | return err; 2190 | } 2191 | 2192 | if (tun_is_xdp_frame(ptr)) { 2193 | struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); 2194 | 2195 | ret = tun_put_user_xdp(tun, tfile, xdpf, to); 2196 | xdp_return_frame(xdpf); 2197 | } else { 2198 | struct sk_buff *skb = ptr; 2199 | 2200 | ret = tun_put_user(tun, tfile, skb, to); 2201 | if (unlikely(ret < 0)) 2202 | kfree_skb(skb); 2203 | else 2204 | consume_skb(skb); 2205 | } 2206 | 2207 | return ret; 2208 | } 2209 | 2210 | static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) 2211 | { 2212 | struct file *file = iocb->ki_filp; 2213 | struct tun_file *tfile = file->private_data; 2214 | struct tun_struct *tun = tun_get(tfile); 2215 | ssize_t len = iov_iter_count(to), ret; 2216 | 2217 | if (!tun) 2218 | return -EBADFD; 2219 | ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL); 2220 | ret = min_t(ssize_t, ret, len); 2221 | if (ret > 0) 2222 | iocb->ki_pos = ret; 2223 | tun_put(tun); 2224 | return ret; 2225 | } 2226 | 2227 | static void tun_prog_free(struct rcu_head *rcu) 2228 | { 2229 | struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu); 2230 | 2231 | bpf_prog_destroy(prog->prog); 2232 | kfree(prog); 2233 | } 2234 | 2235 | static int __tun_set_ebpf(struct tun_struct *tun, 2236 | struct tun_prog __rcu **prog_p, 2237 | struct bpf_prog *prog) 2238 | { 2239 | struct tun_prog *old, *new = NULL; 2240 | 2241 | if (prog) { 2242 | new = kmalloc(sizeof(*new), GFP_KERNEL); 2243 | if (!new) 2244 | return -ENOMEM; 2245 | new->prog = prog; 2246 | } 2247 | 2248 | spin_lock_bh(&tun->lock); 2249 | old = rcu_dereference_protected(*prog_p, 2250 | lockdep_is_held(&tun->lock)); 2251 | rcu_assign_pointer(*prog_p, new); 2252 | spin_unlock_bh(&tun->lock); 2253 | 2254 | if (old) 2255 | call_rcu(&old->rcu, tun_prog_free); 2256 | 2257 | return 0; 2258 | } 2259 | 2260 | static void tun_free_netdev(struct net_device *dev) 2261 | { 2262 | struct tun_struct *tun = netdev_priv(dev); 2263 | 2264 | BUG_ON(!(list_empty(&tun->disabled))); 2265 | free_percpu(tun->pcpu_stats); 2266 | tun_flow_uninit(tun); 2267 | security_tun_dev_free_security(tun->security); 2268 | __tun_set_ebpf(tun, &tun->steering_prog, NULL); 2269 | __tun_set_ebpf(tun, &tun->filter_prog, NULL); 2270 | } 2271 | 2272 | static void tun_setup(struct net_device *dev) 2273 | { 2274 | struct tun_struct *tun = netdev_priv(dev); 2275 | 2276 | tun->owner = INVALID_UID; 2277 | tun->group = INVALID_GID; 2278 | tun_default_link_ksettings(dev, &tun->link_ksettings); 2279 | 2280 | dev->ethtool_ops = &tun_ethtool_ops; 2281 | dev->needs_free_netdev = true; 2282 | dev->priv_destructor = tun_free_netdev; 2283 | /* We prefer our own queue length */ 2284 | dev->tx_queue_len = TUN_READQ_SIZE; 2285 | } 2286 | 2287 | /* Trivial set of netlink ops to allow deleting tun or tap 2288 | * device with netlink. 2289 | */ 2290 | static int tun_validate(struct nlattr *tb[], struct nlattr *data[], 2291 | struct netlink_ext_ack *extack) 2292 | { 2293 | NL_SET_ERR_MSG(extack, 2294 | "tun/tap creation via rtnetlink is not supported."); 2295 | return -EOPNOTSUPP; 2296 | } 2297 | 2298 | static size_t tun_get_size(const struct net_device *dev) 2299 | { 2300 | BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t)); 2301 | BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t)); 2302 | 2303 | return nla_total_size(sizeof(uid_t)) + /* OWNER */ 2304 | nla_total_size(sizeof(gid_t)) + /* GROUP */ 2305 | nla_total_size(sizeof(u8)) + /* TYPE */ 2306 | nla_total_size(sizeof(u8)) + /* PI */ 2307 | nla_total_size(sizeof(u8)) + /* VNET_HDR */ 2308 | nla_total_size(sizeof(u8)) + /* PERSIST */ 2309 | nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */ 2310 | nla_total_size(sizeof(u32)) + /* NUM_QUEUES */ 2311 | nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */ 2312 | 0; 2313 | } 2314 | 2315 | static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev) 2316 | { 2317 | struct tun_struct *tun = netdev_priv(dev); 2318 | 2319 | if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK)) 2320 | goto nla_put_failure; 2321 | if (uid_valid(tun->owner) && 2322 | nla_put_u32(skb, IFLA_TUN_OWNER, 2323 | from_kuid_munged(current_user_ns(), tun->owner))) 2324 | goto nla_put_failure; 2325 | if (gid_valid(tun->group) && 2326 | nla_put_u32(skb, IFLA_TUN_GROUP, 2327 | from_kgid_munged(current_user_ns(), tun->group))) 2328 | goto nla_put_failure; 2329 | if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI))) 2330 | goto nla_put_failure; 2331 | if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR))) 2332 | goto nla_put_failure; 2333 | if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST))) 2334 | goto nla_put_failure; 2335 | if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE, 2336 | !!(tun->flags & IFF_MULTI_QUEUE))) 2337 | goto nla_put_failure; 2338 | if (tun->flags & IFF_MULTI_QUEUE) { 2339 | if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues)) 2340 | goto nla_put_failure; 2341 | if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES, 2342 | tun->numdisabled)) 2343 | goto nla_put_failure; 2344 | } 2345 | 2346 | return 0; 2347 | 2348 | nla_put_failure: 2349 | return -EMSGSIZE; 2350 | } 2351 | 2352 | static struct rtnl_link_ops tun_link_ops __read_mostly = { 2353 | .kind = DRV_NAME, 2354 | .priv_size = sizeof(struct tun_struct), 2355 | .setup = tun_setup, 2356 | .validate = tun_validate, 2357 | .get_size = tun_get_size, 2358 | .fill_info = tun_fill_info, 2359 | }; 2360 | 2361 | static void tun_sock_write_space(struct sock *sk) 2362 | { 2363 | struct tun_file *tfile; 2364 | wait_queue_head_t *wqueue; 2365 | 2366 | if (!sock_writeable(sk)) 2367 | return; 2368 | 2369 | if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) 2370 | return; 2371 | 2372 | wqueue = sk_sleep(sk); 2373 | if (wqueue && waitqueue_active(wqueue)) 2374 | wake_up_interruptible_sync_poll(wqueue, EPOLLOUT | 2375 | EPOLLWRNORM | EPOLLWRBAND); 2376 | 2377 | tfile = container_of(sk, struct tun_file, sk); 2378 | kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); 2379 | } 2380 | 2381 | static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 2382 | { 2383 | int ret; 2384 | struct tun_file *tfile = container_of(sock, struct tun_file, socket); 2385 | struct tun_struct *tun = tun_get(tfile); 2386 | 2387 | if (!tun) 2388 | return -EBADFD; 2389 | 2390 | ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, 2391 | m->msg_flags & MSG_DONTWAIT, 2392 | m->msg_flags & MSG_MORE); 2393 | tun_put(tun); 2394 | return ret; 2395 | } 2396 | 2397 | static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, 2398 | int flags) 2399 | { 2400 | struct tun_file *tfile = container_of(sock, struct tun_file, socket); 2401 | struct tun_struct *tun = tun_get(tfile); 2402 | void *ptr = m->msg_control; 2403 | int ret; 2404 | 2405 | if (!tun) { 2406 | ret = -EBADFD; 2407 | goto out_free; 2408 | } 2409 | 2410 | if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { 2411 | ret = -EINVAL; 2412 | goto out_put_tun; 2413 | } 2414 | if (flags & MSG_ERRQUEUE) { 2415 | ret = sock_recv_errqueue(sock->sk, m, total_len, 2416 | SOL_PACKET, TUN_TX_TIMESTAMP); 2417 | goto out; 2418 | } 2419 | ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr); 2420 | if (ret > (ssize_t)total_len) { 2421 | m->msg_flags |= MSG_TRUNC; 2422 | ret = flags & MSG_TRUNC ? ret : total_len; 2423 | } 2424 | out: 2425 | tun_put(tun); 2426 | return ret; 2427 | 2428 | out_put_tun: 2429 | tun_put(tun); 2430 | out_free: 2431 | tun_ptr_free(ptr); 2432 | return ret; 2433 | } 2434 | 2435 | static int tun_ptr_peek_len(void *ptr) 2436 | { 2437 | if (likely(ptr)) { 2438 | if (tun_is_xdp_frame(ptr)) { 2439 | struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); 2440 | 2441 | return xdpf->len; 2442 | } 2443 | return __skb_array_len_with_tag(ptr); 2444 | } else { 2445 | return 0; 2446 | } 2447 | } 2448 | 2449 | static int tun_peek_len(struct socket *sock) 2450 | { 2451 | struct tun_file *tfile = container_of(sock, struct tun_file, socket); 2452 | struct tun_struct *tun; 2453 | int ret = 0; 2454 | 2455 | tun = tun_get(tfile); 2456 | if (!tun) 2457 | return 0; 2458 | 2459 | ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len); 2460 | tun_put(tun); 2461 | 2462 | return ret; 2463 | } 2464 | 2465 | /* Ops structure to mimic raw sockets with tun */ 2466 | static const struct proto_ops tun_socket_ops = { 2467 | .peek_len = tun_peek_len, 2468 | .sendmsg = tun_sendmsg, 2469 | .recvmsg = tun_recvmsg, 2470 | }; 2471 | 2472 | static struct proto tun_proto = { 2473 | .name = "tun", 2474 | .owner = THIS_MODULE, 2475 | .obj_size = sizeof(struct tun_file), 2476 | }; 2477 | 2478 | static int tun_flags(struct tun_struct *tun) 2479 | { 2480 | return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP); 2481 | } 2482 | 2483 | static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr, 2484 | char *buf) 2485 | { 2486 | struct tun_struct *tun = netdev_priv(to_net_dev(dev)); 2487 | return sprintf(buf, "0x%x\n", tun_flags(tun)); 2488 | } 2489 | 2490 | static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr, 2491 | char *buf) 2492 | { 2493 | struct tun_struct *tun = netdev_priv(to_net_dev(dev)); 2494 | return uid_valid(tun->owner)? 2495 | sprintf(buf, "%u\n", 2496 | from_kuid_munged(current_user_ns(), tun->owner)): 2497 | sprintf(buf, "-1\n"); 2498 | } 2499 | 2500 | static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr, 2501 | char *buf) 2502 | { 2503 | struct tun_struct *tun = netdev_priv(to_net_dev(dev)); 2504 | return gid_valid(tun->group) ? 2505 | sprintf(buf, "%u\n", 2506 | from_kgid_munged(current_user_ns(), tun->group)): 2507 | sprintf(buf, "-1\n"); 2508 | } 2509 | 2510 | static DEVICE_ATTR(tun_flags, 0444, tun_show_flags, NULL); 2511 | static DEVICE_ATTR(owner, 0444, tun_show_owner, NULL); 2512 | static DEVICE_ATTR(group, 0444, tun_show_group, NULL); 2513 | 2514 | static struct attribute *tun_dev_attrs[] = { 2515 | &dev_attr_tun_flags.attr, 2516 | &dev_attr_owner.attr, 2517 | &dev_attr_group.attr, 2518 | NULL 2519 | }; 2520 | 2521 | static const struct attribute_group tun_attr_group = { 2522 | .attrs = tun_dev_attrs 2523 | }; 2524 | 2525 | static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) 2526 | { 2527 | struct tun_struct *tun; 2528 | struct tun_file *tfile = file->private_data; 2529 | struct net_device *dev; 2530 | int err; 2531 | 2532 | if (tfile->detached) 2533 | return -EINVAL; 2534 | 2535 | if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) { 2536 | if (!capable(CAP_NET_ADMIN)) 2537 | return -EPERM; 2538 | 2539 | if (!(ifr->ifr_flags & IFF_NAPI) || 2540 | (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP) 2541 | return -EINVAL; 2542 | } 2543 | 2544 | dev = __dev_get_by_name(net, ifr->ifr_name); 2545 | if (dev) { 2546 | if (ifr->ifr_flags & IFF_TUN_EXCL) 2547 | return -EBUSY; 2548 | if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) 2549 | tun = netdev_priv(dev); 2550 | else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) 2551 | tun = netdev_priv(dev); 2552 | else 2553 | return -EINVAL; 2554 | 2555 | if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) != 2556 | !!(tun->flags & IFF_MULTI_QUEUE)) 2557 | return -EINVAL; 2558 | 2559 | if (tun_not_capable(tun)) 2560 | return -EPERM; 2561 | err = security_tun_dev_open(tun->security); 2562 | if (err < 0) 2563 | return err; 2564 | 2565 | err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, 2566 | ifr->ifr_flags & IFF_NAPI, 2567 | ifr->ifr_flags & IFF_NAPI_FRAGS, true); 2568 | if (err < 0) 2569 | return err; 2570 | 2571 | if (tun->flags & IFF_MULTI_QUEUE && 2572 | (tun->numqueues + tun->numdisabled > 1)) { 2573 | /* One or more queue has already been attached, no need 2574 | * to initialize the device again. 2575 | */ 2576 | netdev_state_change(dev); 2577 | return 0; 2578 | } 2579 | 2580 | tun->flags = (tun->flags & ~TUN_FEATURES) | 2581 | (ifr->ifr_flags & TUN_FEATURES); 2582 | 2583 | netdev_state_change(dev); 2584 | } else { 2585 | char *name; 2586 | unsigned long flags = 0; 2587 | int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? 2588 | MAX_TAP_QUEUES : 1; 2589 | 2590 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2591 | return -EPERM; 2592 | err = security_tun_dev_create(); 2593 | if (err < 0) 2594 | return err; 2595 | 2596 | /* Set dev type */ 2597 | if (ifr->ifr_flags & IFF_TUN) { 2598 | /* TUN device */ 2599 | flags |= IFF_TUN; 2600 | name = "tun%d"; 2601 | } else if (ifr->ifr_flags & IFF_TAP) { 2602 | /* TAP device */ 2603 | flags |= IFF_TAP; 2604 | name = "tap%d"; 2605 | } else 2606 | return -EINVAL; 2607 | 2608 | if (*ifr->ifr_name) 2609 | name = ifr->ifr_name; 2610 | 2611 | dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, 2612 | NET_NAME_UNKNOWN, tun_setup, queues, 2613 | queues); 2614 | 2615 | if (!dev) 2616 | return -ENOMEM; 2617 | err = dev_get_valid_name(net, dev, name); 2618 | if (err < 0) 2619 | goto err_free_dev; 2620 | 2621 | dev_net_set(dev, net); 2622 | dev->rtnl_link_ops = &tun_link_ops; 2623 | dev->ifindex = tfile->ifindex; 2624 | dev->sysfs_groups[0] = &tun_attr_group; 2625 | 2626 | tun = netdev_priv(dev); 2627 | tun->dev = dev; 2628 | tun->flags = flags; 2629 | tun->txflt.count = 0; 2630 | tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 2631 | 2632 | tun->align = NET_SKB_PAD; 2633 | tun->filter_attached = false; 2634 | tun->sndbuf = tfile->socket.sk->sk_sndbuf; 2635 | tun->rx_batched = 0; 2636 | RCU_INIT_POINTER(tun->steering_prog, NULL); 2637 | 2638 | tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); 2639 | if (!tun->pcpu_stats) { 2640 | err = -ENOMEM; 2641 | goto err_free_dev; 2642 | } 2643 | 2644 | spin_lock_init(&tun->lock); 2645 | 2646 | err = security_tun_dev_alloc_security(&tun->security); 2647 | if (err < 0) 2648 | goto err_free_stat; 2649 | 2650 | tun_net_init(dev); 2651 | tun_flow_init(tun); 2652 | 2653 | dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | 2654 | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | 2655 | NETIF_F_HW_VLAN_STAG_TX; 2656 | dev->features = dev->hw_features | NETIF_F_LLTX; 2657 | dev->vlan_features = dev->features & 2658 | ~(NETIF_F_HW_VLAN_CTAG_TX | 2659 | NETIF_F_HW_VLAN_STAG_TX); 2660 | 2661 | tun->flags = (tun->flags & ~TUN_FEATURES) | 2662 | (ifr->ifr_flags & TUN_FEATURES); 2663 | 2664 | INIT_LIST_HEAD(&tun->disabled); 2665 | err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, 2666 | ifr->ifr_flags & IFF_NAPI_FRAGS, false); 2667 | if (err < 0) 2668 | goto err_free_flow; 2669 | 2670 | err = register_netdevice(tun->dev); 2671 | if (err < 0) 2672 | goto err_detach; 2673 | /* free_netdev() won't check refcnt, to aovid race 2674 | * with dev_put() we need publish tun after registration. 2675 | */ 2676 | rcu_assign_pointer(tfile->tun, tun); 2677 | } 2678 | 2679 | netif_carrier_on(tun->dev); 2680 | 2681 | tun_debug(KERN_INFO, tun, "tun_set_iff\n"); 2682 | 2683 | /* Make sure persistent devices do not get stuck in 2684 | * xoff state. 2685 | */ 2686 | if (netif_running(tun->dev)) 2687 | netif_tx_wake_all_queues(tun->dev); 2688 | 2689 | strcpy(ifr->ifr_name, tun->dev->name); 2690 | return 0; 2691 | 2692 | err_detach: 2693 | tun_detach_all(dev); 2694 | /* register_netdevice() already called tun_free_netdev() */ 2695 | goto err_free_dev; 2696 | 2697 | err_free_flow: 2698 | tun_flow_uninit(tun); 2699 | security_tun_dev_free_security(tun->security); 2700 | err_free_stat: 2701 | free_percpu(tun->pcpu_stats); 2702 | err_free_dev: 2703 | free_netdev(dev); 2704 | return err; 2705 | } 2706 | 2707 | static void tun_get_iff(struct net *net, struct tun_struct *tun, 2708 | struct ifreq *ifr) 2709 | { 2710 | tun_debug(KERN_INFO, tun, "tun_get_iff\n"); 2711 | 2712 | strcpy(ifr->ifr_name, tun->dev->name); 2713 | 2714 | ifr->ifr_flags = tun_flags(tun); 2715 | 2716 | } 2717 | 2718 | /* This is like a cut-down ethtool ops, except done via tun fd so no 2719 | * privs required. */ 2720 | static int set_offload(struct tun_struct *tun, unsigned long arg) 2721 | { 2722 | netdev_features_t features = 0; 2723 | 2724 | if (arg & TUN_F_CSUM) { 2725 | features |= NETIF_F_HW_CSUM; 2726 | arg &= ~TUN_F_CSUM; 2727 | 2728 | if (arg & (TUN_F_TSO4|TUN_F_TSO6)) { 2729 | if (arg & TUN_F_TSO_ECN) { 2730 | features |= NETIF_F_TSO_ECN; 2731 | arg &= ~TUN_F_TSO_ECN; 2732 | } 2733 | if (arg & TUN_F_TSO4) 2734 | features |= NETIF_F_TSO; 2735 | if (arg & TUN_F_TSO6) 2736 | features |= NETIF_F_TSO6; 2737 | arg &= ~(TUN_F_TSO4|TUN_F_TSO6); 2738 | } 2739 | 2740 | arg &= ~TUN_F_UFO; 2741 | } 2742 | 2743 | /* This gives the user a way to test for new features in future by 2744 | * trying to set them. */ 2745 | if (arg) 2746 | return -EINVAL; 2747 | 2748 | tun->set_features = features; 2749 | tun->dev->wanted_features &= ~TUN_USER_FEATURES; 2750 | tun->dev->wanted_features |= features; 2751 | netdev_update_features(tun->dev); 2752 | 2753 | return 0; 2754 | } 2755 | 2756 | static void tun_detach_filter(struct tun_struct *tun, int n) 2757 | { 2758 | int i; 2759 | struct tun_file *tfile; 2760 | 2761 | for (i = 0; i < n; i++) { 2762 | tfile = rtnl_dereference(tun->tfiles[i]); 2763 | lock_sock(tfile->socket.sk); 2764 | sk_detach_filter(tfile->socket.sk); 2765 | release_sock(tfile->socket.sk); 2766 | } 2767 | 2768 | tun->filter_attached = false; 2769 | } 2770 | 2771 | static int tun_attach_filter(struct tun_struct *tun) 2772 | { 2773 | int i, ret = 0; 2774 | struct tun_file *tfile; 2775 | 2776 | for (i = 0; i < tun->numqueues; i++) { 2777 | tfile = rtnl_dereference(tun->tfiles[i]); 2778 | lock_sock(tfile->socket.sk); 2779 | ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); 2780 | release_sock(tfile->socket.sk); 2781 | if (ret) { 2782 | tun_detach_filter(tun, i); 2783 | return ret; 2784 | } 2785 | } 2786 | 2787 | tun->filter_attached = true; 2788 | return ret; 2789 | } 2790 | 2791 | static void tun_set_sndbuf(struct tun_struct *tun) 2792 | { 2793 | struct tun_file *tfile; 2794 | int i; 2795 | 2796 | for (i = 0; i < tun->numqueues; i++) { 2797 | tfile = rtnl_dereference(tun->tfiles[i]); 2798 | tfile->socket.sk->sk_sndbuf = tun->sndbuf; 2799 | } 2800 | } 2801 | 2802 | static int tun_set_queue(struct file *file, struct ifreq *ifr) 2803 | { 2804 | struct tun_file *tfile = file->private_data; 2805 | struct tun_struct *tun; 2806 | int ret = 0; 2807 | 2808 | rtnl_lock(); 2809 | 2810 | if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { 2811 | tun = tfile->detached; 2812 | if (!tun) { 2813 | ret = -EINVAL; 2814 | goto unlock; 2815 | } 2816 | ret = security_tun_dev_attach_queue(tun->security); 2817 | if (ret < 0) 2818 | goto unlock; 2819 | ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, 2820 | tun->flags & IFF_NAPI_FRAGS, true); 2821 | } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { 2822 | tun = rtnl_dereference(tfile->tun); 2823 | if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) 2824 | ret = -EINVAL; 2825 | else 2826 | __tun_detach(tfile, false); 2827 | } else 2828 | ret = -EINVAL; 2829 | 2830 | if (ret >= 0) 2831 | netdev_state_change(tun->dev); 2832 | 2833 | unlock: 2834 | rtnl_unlock(); 2835 | return ret; 2836 | } 2837 | 2838 | static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p, 2839 | void __user *data) 2840 | { 2841 | struct bpf_prog *prog; 2842 | int fd; 2843 | 2844 | if (copy_from_user(&fd, data, sizeof(fd))) 2845 | return -EFAULT; 2846 | 2847 | if (fd == -1) { 2848 | prog = NULL; 2849 | } else { 2850 | prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); 2851 | if (IS_ERR(prog)) 2852 | return PTR_ERR(prog); 2853 | } 2854 | 2855 | return __tun_set_ebpf(tun, prog_p, prog); 2856 | } 2857 | 2858 | static long __tun_chr_ioctl(struct file *file, unsigned int cmd, 2859 | unsigned long arg, int ifreq_len) 2860 | { 2861 | struct tun_file *tfile = file->private_data; 2862 | struct net *net = sock_net(&tfile->sk); 2863 | struct tun_struct *tun; 2864 | void __user* argp = (void __user*)arg; 2865 | struct ifreq ifr; 2866 | kuid_t owner; 2867 | kgid_t group; 2868 | int sndbuf; 2869 | int vnet_hdr_sz; 2870 | unsigned int ifindex; 2871 | int le; 2872 | int ret; 2873 | bool do_notify = false; 2874 | 2875 | if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || 2876 | (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) { 2877 | if (copy_from_user(&ifr, argp, ifreq_len)) 2878 | return -EFAULT; 2879 | } else { 2880 | memset(&ifr, 0, sizeof(ifr)); 2881 | } 2882 | if (cmd == TUNGETFEATURES) { 2883 | /* Currently this just means: "what IFF flags are valid?". 2884 | * This is needed because we never checked for invalid flags on 2885 | * TUNSETIFF. 2886 | */ 2887 | return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES, 2888 | (unsigned int __user*)argp); 2889 | } else if (cmd == TUNSETQUEUE) { 2890 | return tun_set_queue(file, &ifr); 2891 | } else if (cmd == SIOCGSKNS) { 2892 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2893 | return -EPERM; 2894 | return open_related_ns(&net->ns, get_net_ns); 2895 | } 2896 | 2897 | ret = 0; 2898 | rtnl_lock(); 2899 | 2900 | tun = tun_get(tfile); 2901 | if (cmd == TUNSETIFF) { 2902 | ret = -EEXIST; 2903 | if (tun) 2904 | goto unlock; 2905 | 2906 | ifr.ifr_name[IFNAMSIZ-1] = '\0'; 2907 | 2908 | ret = tun_set_iff(net, file, &ifr); 2909 | 2910 | if (ret) 2911 | goto unlock; 2912 | 2913 | if (copy_to_user(argp, &ifr, ifreq_len)) 2914 | ret = -EFAULT; 2915 | goto unlock; 2916 | } 2917 | if (cmd == TUNSETIFINDEX) { 2918 | ret = -EPERM; 2919 | if (tun) 2920 | goto unlock; 2921 | 2922 | ret = -EFAULT; 2923 | if (copy_from_user(&ifindex, argp, sizeof(ifindex))) 2924 | goto unlock; 2925 | 2926 | ret = 0; 2927 | tfile->ifindex = ifindex; 2928 | goto unlock; 2929 | } 2930 | 2931 | ret = -EBADFD; 2932 | if (!tun) 2933 | goto unlock; 2934 | 2935 | tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd); 2936 | 2937 | ret = 0; 2938 | switch (cmd) { 2939 | case TUNGETIFF: 2940 | tun_get_iff(current->nsproxy->net_ns, tun, &ifr); 2941 | 2942 | if (tfile->detached) 2943 | ifr.ifr_flags |= IFF_DETACH_QUEUE; 2944 | if (!tfile->socket.sk->sk_filter) 2945 | ifr.ifr_flags |= IFF_NOFILTER; 2946 | 2947 | if (copy_to_user(argp, &ifr, ifreq_len)) 2948 | ret = -EFAULT; 2949 | break; 2950 | 2951 | case TUNSETNOCSUM: 2952 | /* Disable/Enable checksum */ 2953 | 2954 | /* [unimplemented] */ 2955 | tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n", 2956 | arg ? "disabled" : "enabled"); 2957 | break; 2958 | 2959 | case TUNSETPERSIST: 2960 | /* Disable/Enable persist mode. Keep an extra reference to the 2961 | * module to prevent the module being unprobed. 2962 | */ 2963 | if (arg && !(tun->flags & IFF_PERSIST)) { 2964 | tun->flags |= IFF_PERSIST; 2965 | __module_get(THIS_MODULE); 2966 | do_notify = true; 2967 | } 2968 | if (!arg && (tun->flags & IFF_PERSIST)) { 2969 | tun->flags &= ~IFF_PERSIST; 2970 | module_put(THIS_MODULE); 2971 | do_notify = true; 2972 | } 2973 | 2974 | tun_debug(KERN_INFO, tun, "persist %s\n", 2975 | arg ? "enabled" : "disabled"); 2976 | break; 2977 | 2978 | case TUNSETOWNER: 2979 | /* Set owner of the device */ 2980 | owner = make_kuid(current_user_ns(), arg); 2981 | if (!uid_valid(owner)) { 2982 | ret = -EINVAL; 2983 | break; 2984 | } 2985 | tun->owner = owner; 2986 | do_notify = true; 2987 | tun_debug(KERN_INFO, tun, "owner set to %u\n", 2988 | from_kuid(&init_user_ns, tun->owner)); 2989 | break; 2990 | 2991 | case TUNSETGROUP: 2992 | /* Set group of the device */ 2993 | group = make_kgid(current_user_ns(), arg); 2994 | if (!gid_valid(group)) { 2995 | ret = -EINVAL; 2996 | break; 2997 | } 2998 | tun->group = group; 2999 | do_notify = true; 3000 | tun_debug(KERN_INFO, tun, "group set to %u\n", 3001 | from_kgid(&init_user_ns, tun->group)); 3002 | break; 3003 | 3004 | case TUNSETLINK: 3005 | /* Only allow setting the type when the interface is down */ 3006 | if (tun->dev->flags & IFF_UP) { 3007 | tun_debug(KERN_INFO, tun, 3008 | "Linktype set failed because interface is up\n"); 3009 | ret = -EBUSY; 3010 | } else { 3011 | tun->dev->type = (int) arg; 3012 | tun_debug(KERN_INFO, tun, "linktype set to %d\n", 3013 | tun->dev->type); 3014 | ret = 0; 3015 | } 3016 | break; 3017 | 3018 | #ifdef TUN_DEBUG 3019 | case TUNSETDEBUG: 3020 | tun->debug = arg; 3021 | break; 3022 | #endif 3023 | case TUNSETOFFLOAD: 3024 | ret = set_offload(tun, arg); 3025 | break; 3026 | 3027 | case TUNSETTXFILTER: 3028 | /* Can be set only for TAPs */ 3029 | ret = -EINVAL; 3030 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 3031 | break; 3032 | ret = update_filter(&tun->txflt, (void __user *)arg); 3033 | break; 3034 | 3035 | case SIOCGIFHWADDR: 3036 | /* Get hw address */ 3037 | memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); 3038 | ifr.ifr_hwaddr.sa_family = tun->dev->type; 3039 | if (copy_to_user(argp, &ifr, ifreq_len)) 3040 | ret = -EFAULT; 3041 | break; 3042 | 3043 | case SIOCSIFHWADDR: 3044 | /* Set hw address */ 3045 | tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n", 3046 | ifr.ifr_hwaddr.sa_data); 3047 | 3048 | ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); 3049 | break; 3050 | 3051 | case TUNGETSNDBUF: 3052 | sndbuf = tfile->socket.sk->sk_sndbuf; 3053 | if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) 3054 | ret = -EFAULT; 3055 | break; 3056 | 3057 | case TUNSETSNDBUF: 3058 | if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) { 3059 | ret = -EFAULT; 3060 | break; 3061 | } 3062 | if (sndbuf <= 0) { 3063 | ret = -EINVAL; 3064 | break; 3065 | } 3066 | 3067 | tun->sndbuf = sndbuf; 3068 | tun_set_sndbuf(tun); 3069 | break; 3070 | 3071 | case TUNGETVNETHDRSZ: 3072 | vnet_hdr_sz = tun->vnet_hdr_sz; 3073 | if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) 3074 | ret = -EFAULT; 3075 | break; 3076 | 3077 | case TUNSETVNETHDRSZ: 3078 | if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { 3079 | ret = -EFAULT; 3080 | break; 3081 | } 3082 | if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { 3083 | ret = -EINVAL; 3084 | break; 3085 | } 3086 | 3087 | tun->vnet_hdr_sz = vnet_hdr_sz; 3088 | break; 3089 | 3090 | case TUNGETVNETLE: 3091 | le = !!(tun->flags & TUN_VNET_LE); 3092 | if (put_user(le, (int __user *)argp)) 3093 | ret = -EFAULT; 3094 | break; 3095 | 3096 | case TUNSETVNETLE: 3097 | if (get_user(le, (int __user *)argp)) { 3098 | ret = -EFAULT; 3099 | break; 3100 | } 3101 | if (le) 3102 | tun->flags |= TUN_VNET_LE; 3103 | else 3104 | tun->flags &= ~TUN_VNET_LE; 3105 | break; 3106 | 3107 | case TUNGETVNETBE: 3108 | ret = tun_get_vnet_be(tun, argp); 3109 | break; 3110 | 3111 | case TUNSETVNETBE: 3112 | ret = tun_set_vnet_be(tun, argp); 3113 | break; 3114 | 3115 | case TUNATTACHFILTER: 3116 | /* Can be set only for TAPs */ 3117 | ret = -EINVAL; 3118 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 3119 | break; 3120 | ret = -EFAULT; 3121 | if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) 3122 | break; 3123 | 3124 | ret = tun_attach_filter(tun); 3125 | break; 3126 | 3127 | case TUNDETACHFILTER: 3128 | /* Can be set only for TAPs */ 3129 | ret = -EINVAL; 3130 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 3131 | break; 3132 | ret = 0; 3133 | tun_detach_filter(tun, tun->numqueues); 3134 | break; 3135 | 3136 | case TUNGETFILTER: 3137 | ret = -EINVAL; 3138 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 3139 | break; 3140 | ret = -EFAULT; 3141 | if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog))) 3142 | break; 3143 | ret = 0; 3144 | break; 3145 | 3146 | case TUNSETSTEERINGEBPF: 3147 | ret = tun_set_ebpf(tun, &tun->steering_prog, argp); 3148 | break; 3149 | 3150 | case TUNSETFILTEREBPF: 3151 | ret = tun_set_ebpf(tun, &tun->filter_prog, argp); 3152 | break; 3153 | 3154 | default: 3155 | ret = -EINVAL; 3156 | break; 3157 | } 3158 | 3159 | if (do_notify) 3160 | netdev_state_change(tun->dev); 3161 | 3162 | unlock: 3163 | rtnl_unlock(); 3164 | if (tun) 3165 | tun_put(tun); 3166 | return ret; 3167 | } 3168 | 3169 | static long tun_chr_ioctl(struct file *file, 3170 | unsigned int cmd, unsigned long arg) 3171 | { 3172 | return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq)); 3173 | } 3174 | 3175 | #ifdef CONFIG_COMPAT 3176 | static long tun_chr_compat_ioctl(struct file *file, 3177 | unsigned int cmd, unsigned long arg) 3178 | { 3179 | switch (cmd) { 3180 | case TUNSETIFF: 3181 | case TUNGETIFF: 3182 | case TUNSETTXFILTER: 3183 | case TUNGETSNDBUF: 3184 | case TUNSETSNDBUF: 3185 | case SIOCGIFHWADDR: 3186 | case SIOCSIFHWADDR: 3187 | arg = (unsigned long)compat_ptr(arg); 3188 | break; 3189 | default: 3190 | arg = (compat_ulong_t)arg; 3191 | break; 3192 | } 3193 | 3194 | /* 3195 | * compat_ifreq is shorter than ifreq, so we must not access beyond 3196 | * the end of that structure. All fields that are used in this 3197 | * driver are compatible though, we don't need to convert the 3198 | * contents. 3199 | */ 3200 | return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq)); 3201 | } 3202 | #endif /* CONFIG_COMPAT */ 3203 | 3204 | static int tun_chr_fasync(int fd, struct file *file, int on) 3205 | { 3206 | struct tun_file *tfile = file->private_data; 3207 | int ret; 3208 | 3209 | if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) 3210 | goto out; 3211 | 3212 | if (on) { 3213 | __f_setown(file, task_pid(current), PIDTYPE_TGID, 0); 3214 | tfile->flags |= TUN_FASYNC; 3215 | } else 3216 | tfile->flags &= ~TUN_FASYNC; 3217 | ret = 0; 3218 | out: 3219 | return ret; 3220 | } 3221 | 3222 | static int tun_chr_open(struct inode *inode, struct file * file) 3223 | { 3224 | struct net *net = current->nsproxy->net_ns; 3225 | struct tun_file *tfile; 3226 | 3227 | DBG1(KERN_INFO, "tunX: tun_chr_open\n"); 3228 | 3229 | tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 3230 | &tun_proto, 0); 3231 | if (!tfile) 3232 | return -ENOMEM; 3233 | if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) { 3234 | sk_free(&tfile->sk); 3235 | return -ENOMEM; 3236 | } 3237 | 3238 | mutex_init(&tfile->napi_mutex); 3239 | RCU_INIT_POINTER(tfile->tun, NULL); 3240 | tfile->flags = 0; 3241 | tfile->ifindex = 0; 3242 | 3243 | init_waitqueue_head(&tfile->wq.wait); 3244 | RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); 3245 | 3246 | tfile->socket.file = file; 3247 | tfile->socket.ops = &tun_socket_ops; 3248 | 3249 | sock_init_data(&tfile->socket, &tfile->sk); 3250 | 3251 | tfile->sk.sk_write_space = tun_sock_write_space; 3252 | tfile->sk.sk_sndbuf = INT_MAX; 3253 | 3254 | file->private_data = tfile; 3255 | INIT_LIST_HEAD(&tfile->next); 3256 | 3257 | sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); 3258 | 3259 | return 0; 3260 | } 3261 | 3262 | static int tun_chr_close(struct inode *inode, struct file *file) 3263 | { 3264 | struct tun_file *tfile = file->private_data; 3265 | 3266 | tun_detach(tfile, true); 3267 | 3268 | return 0; 3269 | } 3270 | 3271 | #ifdef CONFIG_PROC_FS 3272 | static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file) 3273 | { 3274 | struct tun_file *tfile = file->private_data; 3275 | struct tun_struct *tun; 3276 | struct ifreq ifr; 3277 | 3278 | memset(&ifr, 0, sizeof(ifr)); 3279 | 3280 | rtnl_lock(); 3281 | tun = tun_get(tfile); 3282 | if (tun) 3283 | tun_get_iff(current->nsproxy->net_ns, tun, &ifr); 3284 | rtnl_unlock(); 3285 | 3286 | if (tun) 3287 | tun_put(tun); 3288 | 3289 | seq_printf(m, "iff:\t%s\n", ifr.ifr_name); 3290 | } 3291 | #endif 3292 | 3293 | static const struct file_operations tun_fops = { 3294 | .owner = THIS_MODULE, 3295 | .llseek = no_llseek, 3296 | .read_iter = tun_chr_read_iter, 3297 | .write_iter = tun_chr_write_iter, 3298 | .poll = tun_chr_poll, 3299 | .unlocked_ioctl = tun_chr_ioctl, 3300 | #ifdef CONFIG_COMPAT 3301 | .compat_ioctl = tun_chr_compat_ioctl, 3302 | #endif 3303 | .open = tun_chr_open, 3304 | .release = tun_chr_close, 3305 | .fasync = tun_chr_fasync, 3306 | #ifdef CONFIG_PROC_FS 3307 | .show_fdinfo = tun_chr_show_fdinfo, 3308 | #endif 3309 | }; 3310 | 3311 | static struct miscdevice tun_miscdev = { 3312 | .minor = TUN_MINOR, 3313 | .name = "tun", 3314 | .nodename = "net/tun", 3315 | .fops = &tun_fops, 3316 | }; 3317 | 3318 | /* ethtool interface */ 3319 | 3320 | static void tun_default_link_ksettings(struct net_device *dev, 3321 | struct ethtool_link_ksettings *cmd) 3322 | { 3323 | ethtool_link_ksettings_zero_link_mode(cmd, supported); 3324 | ethtool_link_ksettings_zero_link_mode(cmd, advertising); 3325 | cmd->base.speed = SPEED_10; 3326 | cmd->base.duplex = DUPLEX_FULL; 3327 | cmd->base.port = PORT_TP; 3328 | cmd->base.phy_address = 0; 3329 | cmd->base.autoneg = AUTONEG_DISABLE; 3330 | } 3331 | 3332 | static int tun_get_link_ksettings(struct net_device *dev, 3333 | struct ethtool_link_ksettings *cmd) 3334 | { 3335 | struct tun_struct *tun = netdev_priv(dev); 3336 | 3337 | memcpy(cmd, &tun->link_ksettings, sizeof(*cmd)); 3338 | return 0; 3339 | } 3340 | 3341 | static int tun_set_link_ksettings(struct net_device *dev, 3342 | const struct ethtool_link_ksettings *cmd) 3343 | { 3344 | struct tun_struct *tun = netdev_priv(dev); 3345 | 3346 | memcpy(&tun->link_ksettings, cmd, sizeof(*cmd)); 3347 | return 0; 3348 | } 3349 | 3350 | static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 3351 | { 3352 | struct tun_struct *tun = netdev_priv(dev); 3353 | 3354 | strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 3355 | strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 3356 | 3357 | switch (tun->flags & TUN_TYPE_MASK) { 3358 | case IFF_TUN: 3359 | strlcpy(info->bus_info, "tun", sizeof(info->bus_info)); 3360 | break; 3361 | case IFF_TAP: 3362 | strlcpy(info->bus_info, "tap", sizeof(info->bus_info)); 3363 | break; 3364 | } 3365 | } 3366 | 3367 | static u32 tun_get_msglevel(struct net_device *dev) 3368 | { 3369 | #ifdef TUN_DEBUG 3370 | struct tun_struct *tun = netdev_priv(dev); 3371 | return tun->debug; 3372 | #else 3373 | return -EOPNOTSUPP; 3374 | #endif 3375 | } 3376 | 3377 | static void tun_set_msglevel(struct net_device *dev, u32 value) 3378 | { 3379 | #ifdef TUN_DEBUG 3380 | struct tun_struct *tun = netdev_priv(dev); 3381 | tun->debug = value; 3382 | #endif 3383 | } 3384 | 3385 | static int tun_get_coalesce(struct net_device *dev, 3386 | struct ethtool_coalesce *ec) 3387 | { 3388 | struct tun_struct *tun = netdev_priv(dev); 3389 | 3390 | ec->rx_max_coalesced_frames = tun->rx_batched; 3391 | 3392 | return 0; 3393 | } 3394 | 3395 | static int tun_set_coalesce(struct net_device *dev, 3396 | struct ethtool_coalesce *ec) 3397 | { 3398 | struct tun_struct *tun = netdev_priv(dev); 3399 | 3400 | if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT) 3401 | tun->rx_batched = NAPI_POLL_WEIGHT; 3402 | else 3403 | tun->rx_batched = ec->rx_max_coalesced_frames; 3404 | 3405 | return 0; 3406 | } 3407 | 3408 | static const struct ethtool_ops tun_ethtool_ops = { 3409 | .get_drvinfo = tun_get_drvinfo, 3410 | .get_msglevel = tun_get_msglevel, 3411 | .set_msglevel = tun_set_msglevel, 3412 | .get_link = ethtool_op_get_link, 3413 | .get_ts_info = ethtool_op_get_ts_info, 3414 | .get_coalesce = tun_get_coalesce, 3415 | .set_coalesce = tun_set_coalesce, 3416 | .get_link_ksettings = tun_get_link_ksettings, 3417 | .set_link_ksettings = tun_set_link_ksettings, 3418 | }; 3419 | 3420 | static int tun_queue_resize(struct tun_struct *tun) 3421 | { 3422 | struct net_device *dev = tun->dev; 3423 | struct tun_file *tfile; 3424 | struct ptr_ring **rings; 3425 | int n = tun->numqueues + tun->numdisabled; 3426 | int ret, i; 3427 | 3428 | rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); 3429 | if (!rings) 3430 | return -ENOMEM; 3431 | 3432 | for (i = 0; i < tun->numqueues; i++) { 3433 | tfile = rtnl_dereference(tun->tfiles[i]); 3434 | rings[i] = &tfile->tx_ring; 3435 | } 3436 | list_for_each_entry(tfile, &tun->disabled, next) 3437 | rings[i++] = &tfile->tx_ring; 3438 | 3439 | ret = ptr_ring_resize_multiple(rings, n, 3440 | dev->tx_queue_len, GFP_KERNEL, 3441 | tun_ptr_free); 3442 | 3443 | kfree(rings); 3444 | return ret; 3445 | } 3446 | 3447 | static int tun_device_event(struct notifier_block *unused, 3448 | unsigned long event, void *ptr) 3449 | { 3450 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3451 | struct tun_struct *tun = netdev_priv(dev); 3452 | int i; 3453 | 3454 | if (dev->rtnl_link_ops != &tun_link_ops) 3455 | return NOTIFY_DONE; 3456 | 3457 | switch (event) { 3458 | case NETDEV_CHANGE_TX_QUEUE_LEN: 3459 | if (tun_queue_resize(tun)) 3460 | return NOTIFY_BAD; 3461 | break; 3462 | case NETDEV_UP: 3463 | for (i = 0; i < tun->numqueues; i++) { 3464 | struct tun_file *tfile; 3465 | 3466 | tfile = rtnl_dereference(tun->tfiles[i]); 3467 | tfile->socket.sk->sk_write_space(tfile->socket.sk); 3468 | } 3469 | break; 3470 | default: 3471 | break; 3472 | } 3473 | 3474 | return NOTIFY_DONE; 3475 | } 3476 | 3477 | static struct notifier_block tun_notifier_block __read_mostly = { 3478 | .notifier_call = tun_device_event, 3479 | }; 3480 | 3481 | static int __init tun_init(void) 3482 | { 3483 | int ret = 0; 3484 | 3485 | pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION); 3486 | 3487 | ret = rtnl_link_register(&tun_link_ops); 3488 | if (ret) { 3489 | pr_err("Can't register link_ops\n"); 3490 | goto err_linkops; 3491 | } 3492 | 3493 | ret = misc_register(&tun_miscdev); 3494 | if (ret) { 3495 | pr_err("Can't register misc device %d\n", TUN_MINOR); 3496 | goto err_misc; 3497 | } 3498 | 3499 | ret = register_netdevice_notifier(&tun_notifier_block); 3500 | if (ret) { 3501 | pr_err("Can't register netdevice notifier\n"); 3502 | goto err_notifier; 3503 | } 3504 | 3505 | return 0; 3506 | 3507 | err_notifier: 3508 | misc_deregister(&tun_miscdev); 3509 | err_misc: 3510 | rtnl_link_unregister(&tun_link_ops); 3511 | err_linkops: 3512 | return ret; 3513 | } 3514 | 3515 | static void tun_cleanup(void) 3516 | { 3517 | misc_deregister(&tun_miscdev); 3518 | rtnl_link_unregister(&tun_link_ops); 3519 | unregister_netdevice_notifier(&tun_notifier_block); 3520 | } 3521 | 3522 | /* Get an underlying socket object from tun file. Returns error unless file is 3523 | * attached to a device. The returned object works like a packet socket, it 3524 | * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 3525 | * holding a reference to the file for as long as the socket is in use. */ 3526 | struct socket *tun_get_socket(struct file *file) 3527 | { 3528 | struct tun_file *tfile; 3529 | if (file->f_op != &tun_fops) 3530 | return ERR_PTR(-EINVAL); 3531 | tfile = file->private_data; 3532 | if (!tfile) 3533 | return ERR_PTR(-EBADFD); 3534 | return &tfile->socket; 3535 | } 3536 | EXPORT_SYMBOL_GPL(tun_get_socket); 3537 | 3538 | struct ptr_ring *tun_get_tx_ring(struct file *file) 3539 | { 3540 | struct tun_file *tfile; 3541 | 3542 | if (file->f_op != &tun_fops) 3543 | return ERR_PTR(-EINVAL); 3544 | tfile = file->private_data; 3545 | if (!tfile) 3546 | return ERR_PTR(-EBADFD); 3547 | return &tfile->tx_ring; 3548 | } 3549 | EXPORT_SYMBOL_GPL(tun_get_tx_ring); 3550 | 3551 | module_init(tun_init); 3552 | module_exit(tun_cleanup); 3553 | MODULE_DESCRIPTION(DRV_DESCRIPTION); 3554 | MODULE_AUTHOR(DRV_COPYRIGHT); 3555 | MODULE_LICENSE("GPL"); 3556 | MODULE_ALIAS_MISCDEV(TUN_MINOR); 3557 | MODULE_ALIAS("devname:net/tun"); 3558 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | // "runtime" 5 | "fmt" 6 | "os" 7 | "log" 8 | "net" 9 | "net/http" 10 | _ "net/http/pprof" 11 | "strconv" 12 | "tuntap/tunnel" 13 | "tuntap/tun" 14 | ) 15 | 16 | func init() { 17 | // runtime.LockOSThread() 18 | // runtime.GOMAXPROCS(48) 19 | } 20 | 21 | func main() { 22 | var client bool = false 23 | var queues int = 4 24 | var addr [4]byte 25 | var key int 26 | 27 | if len(os.Args) == 5 { 28 | if os.Args[1] == "client" { 29 | client = true 30 | } 31 | queues, _ = strconv.Atoi(os.Args[2]) 32 | ip := net.ParseIP(os.Args[3]).To4() 33 | for i := 0; i < len(ip); i += 1 { 34 | addr[i] = ip[i] 35 | } 36 | key, _ = strconv.Atoi(os.Args[4]) 37 | } else { 38 | fmt.Println("./tuntap server|client(mode) 10(queues) 192.168.56.1(IP address) key(pre shared key)") 39 | os.Exit(1) 40 | } 41 | go func() { 42 | log.Println(http.ListenAndServe("127.0.0.1:6061", nil)) 43 | }() 44 | 45 | tun := func() (tun.Device) { 46 | return tun.CreateTUN("wg2", 1500, queues) 47 | } () 48 | 49 | instance := tunnel.NewInstance(tun, key, addr, client, queues) 50 | instance.WG.Wait() 51 | } 52 | -------------------------------------------------------------------------------- /tun/tun.go: -------------------------------------------------------------------------------- 1 | package tun 2 | 3 | import ( 4 | "os" 5 | "unsafe" 6 | "golang.org/x/sys/unix" 7 | ) 8 | 9 | type Device interface { 10 | Read(int, []byte) (int, error) 11 | Write(int, []byte) (int, error) 12 | } 13 | 14 | const ( 15 | cloneDevicePath = "/dev/net/tun" 16 | ifReqSize = unix.IFNAMSIZ + 640 17 | ) 18 | 19 | type NativeTun struct { 20 | rwFiles []*os.File 21 | queues int 22 | } 23 | 24 | func (tun *NativeTun) Write(index int, buff []byte) (int, error) { 25 | return tun.rwFiles[index % tun.queues].Write(buff) 26 | } 27 | 28 | func (tun *NativeTun) Read(index int, buff []byte) (int, error) { 29 | n, _ := tun.rwFiles[index % tun.queues].Read(buff[:]) 30 | return n, nil 31 | } 32 | 33 | func CreateTUN(name string, mtu int, queues int) (Device) { 34 | 35 | var fds []*os.File = make([]*os.File, queues) 36 | var ifr [ifReqSize]byte 37 | var flags uint16 = unix.IFF_TUN | unix.IFF_MULTI_QUEUE 38 | nameBytes := []byte(name) 39 | copy(ifr[:], nameBytes) 40 | *(*uint16)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = flags 41 | 42 | for i := 0; i < len(fds); i++ { 43 | nfd, _ := unix.Open(cloneDevicePath, os.O_RDWR, 0) 44 | unix.Syscall(unix.SYS_IOCTL, uintptr(nfd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&ifr[0]))) 45 | unix.SetNonblock(nfd, false) 46 | 47 | fds[i] = os.NewFile(uintptr(nfd), cloneDevicePath) 48 | } 49 | tun := &NativeTun{ 50 | rwFiles: fds, 51 | queues: queues, 52 | } 53 | return tun 54 | } 55 | -------------------------------------------------------------------------------- /tunnel/define.go: -------------------------------------------------------------------------------- 1 | package tunnel 2 | 3 | const ( 4 | PortNum = 12345 5 | MinCryptoPoolSize = 4 6 | IOBufferLen = 15000 7 | CryptionBufferLen = 8000 8 | MaxPacketSzie = 2000 9 | ) 10 | -------------------------------------------------------------------------------- /tunnel/receive.go: -------------------------------------------------------------------------------- 1 | package tunnel 2 | 3 | import ( 4 | "sync" 5 | //"fmt" 6 | ) 7 | 8 | func addToDecryptionBuffer(inboundQueue chan *Packet, decryptionQueue chan *Packet, pktent *Packet) { 9 | inboundQueue <- pktent 10 | decryptionQueue <- pktent 11 | } 12 | 13 | func (tunnel *Tunnel) RoutineReadFromUDP(queue int, max_enc int) { 14 | pool := make([]Packet, IOBufferLen, IOBufferLen) 15 | for i := 0; i < len(pool); i += 1 { 16 | pool[i].buffer = make([]byte, MaxPacketSzie, MaxPacketSzie) 17 | pool[i].Mutex = sync.Mutex{} 18 | pool[i].Lock() 19 | } 20 | var pos, enc int = 0, 0 21 | for { 22 | pkt := pool[pos % len(pool)] 23 | //fmt.Printf("####### Receive from UDP:%d\n", queue) 24 | size := tunnel.Receive(queue, pkt.buffer[:]) 25 | if pkt.buffer[0] == 'H' { 26 | continue 27 | } 28 | pkt.packet = pkt.buffer[:size] 29 | addToDecryptionBuffer(tunnel.queue.inbound[queue], tunnel.queue.decryption[queue][enc % max_enc], &pkt) 30 | pos += 1 31 | enc += 1 32 | } 33 | } 34 | 35 | func (tunnel *Tunnel) RoutineDecryption(queue int, enc int) { 36 | key := byte(tunnel.key) 37 | for { 38 | pkt, _ := <-tunnel.queue.decryption[queue][enc] 39 | // decrypt packet 40 | for i := 0; i < len(pkt.packet); i += 1 { 41 | pkt.packet[i] -= key 42 | } 43 | pkt.Unlock() 44 | } 45 | } 46 | 47 | func (tunnel *Tunnel) RoutineWriteToTUN(index int) { 48 | for { 49 | pkt, _ := <-tunnel.queue.inbound[index] 50 | pkt.Lock() 51 | //fmt.Printf("####### Write to TUN:%d\n", index) 52 | tunnel.tun.tunnel.Write(index, pkt.buffer[:len(pkt.packet)]) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /tunnel/send.go: -------------------------------------------------------------------------------- 1 | package tunnel 2 | 3 | import ( 4 | // "fmt" 5 | "sync" 6 | ) 7 | 8 | func addToEncryptionBuffer(outboundQueue chan *Packet, encryptionQueue chan *Packet, pktent *Packet) { 9 | outboundQueue <- pktent 10 | encryptionQueue <- pktent 11 | } 12 | 13 | func (tunnel *Tunnel) RoutineReadFromTUN(queue int, max_enc int) { 14 | pool := make([]Packet, IOBufferLen, IOBufferLen) 15 | for i := 0; i < len(pool); i += 1 { 16 | pool[i].buffer = make([]byte, MaxPacketSzie, MaxPacketSzie) 17 | pool[i].Mutex = sync.Mutex{} 18 | pool[i].Lock() 19 | } 20 | var pos, enc int = 0, 0 21 | for { 22 | pkt := pool[pos % len(pool)] 23 | size, _ := tunnel.tun.tunnel.Read(queue, pkt.buffer[:]) 24 | pkt.packet = pkt.buffer[:size] 25 | //fmt.Printf("####### read from tun:%d\n", index) 26 | addToEncryptionBuffer(tunnel.queue.outbound[queue], tunnel.queue.encryption[queue][enc % max_enc], &pkt) 27 | pos += 1 28 | enc += 1 29 | } 30 | } 31 | 32 | func (tunnel *Tunnel) RoutineEncryption(queue int, enc int) { 33 | key := byte(tunnel.key) 34 | for { 35 | pkt, _ := <-tunnel.queue.encryption[queue][enc] 36 | // encrypt packet 37 | for i := 0; i < len(pkt.packet); i += 1 { 38 | pkt.packet[i] += key 39 | } 40 | pkt.Unlock() 41 | } 42 | } 43 | 44 | func (tunnel *Tunnel) RoutineWriteToUDP(index int) { 45 | for { 46 | pkt, _ := <-tunnel.queue.outbound[index] 47 | pkt.Lock() 48 | //fmt.Printf("####### Write to UDP:%d\n", index) 49 | tunnel.Send(index, pkt.packet) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /tunnel/tunnel.go: -------------------------------------------------------------------------------- 1 | package tunnel 2 | 3 | import ( 4 | "runtime" 5 | "sync" 6 | "tuntap/tun" 7 | ) 8 | 9 | type Packet struct { 10 | sync.Mutex 11 | buffer []byte 12 | packet []byte 13 | } 14 | 15 | type Tunnel struct { 16 | client bool 17 | key int 18 | WG sync.WaitGroup 19 | net struct { 20 | socket *UDPScoket 21 | port int 22 | addr [4]byte 23 | } 24 | queue struct { 25 | inbound []chan *Packet 26 | outbound []chan *Packet 27 | encryption [][]chan *Packet 28 | decryption [][]chan *Packet 29 | } 30 | tun struct { 31 | tunnel tun.Device 32 | queues int 33 | } 34 | } 35 | 36 | func NewInstance(tunTunnel tun.Device, key int, addr [4]byte, client bool, queues int) *Tunnel { 37 | tunnel := new(Tunnel) 38 | tunnel.client = client 39 | tunnel.key = key 40 | tunnel.tun.queues = queues 41 | tunnel.tun.tunnel = tunTunnel 42 | tunnel.net.port = 12346 43 | tunnel.net.addr = addr 44 | 45 | if tunnel.client { 46 | tunnel.net.socket = CreateUDPScoket(tunnel.net.port, tunnel.net.addr, tunnel.tun.queues, 1) 47 | } else { 48 | tunnel.net.socket = CreateUDPScoket(tunnel.net.port, tunnel.net.addr, tunnel.tun.queues, 0) 49 | } 50 | 51 | tunnel.queue.outbound = make([]chan *Packet, queues) 52 | tunnel.queue.inbound = make([]chan *Packet, queues) 53 | 54 | enc := runtime.NumCPU()/queues 55 | if enc < PortNum { 56 | enc = PortNum 57 | } 58 | tunnel.queue.encryption = make([][]chan *Packet, queues) 59 | tunnel.queue.decryption = make([][]chan *Packet, queues) 60 | 61 | for i := 0; i < queues; i += 1 { 62 | tunnel.queue.outbound[i] = make(chan *Packet, IOBufferLen) 63 | tunnel.queue.inbound[i] = make(chan *Packet, IOBufferLen) 64 | tunnel.queue.encryption[i] = make([]chan *Packet, enc) 65 | tunnel.queue.decryption[i] = make([]chan *Packet, enc) 66 | for j := 0; j < enc; j += 1 { 67 | tunnel.queue.encryption[i][j] = make(chan *Packet, CryptionBufferLen) 68 | tunnel.queue.decryption[i][j] = make(chan *Packet, CryptionBufferLen) 69 | go tunnel.RoutineDecryption(i, j) 70 | go tunnel.RoutineEncryption(i, j) 71 | } 72 | go tunnel.RoutineReadFromUDP(i, enc) 73 | go tunnel.RoutineWriteToTUN(i) 74 | go tunnel.RoutineReadFromTUN(i, enc) 75 | go tunnel.RoutineWriteToUDP(i) 76 | } 77 | tunnel.WG.Add(1) 78 | 79 | return tunnel 80 | } 81 | -------------------------------------------------------------------------------- /tunnel/udp.go: -------------------------------------------------------------------------------- 1 | package tunnel 2 | 3 | import ( 4 | // "fmt" 5 | "golang.org/x/sys/unix" 6 | ) 7 | 8 | type End struct { 9 | end unix.Sockaddr 10 | } 11 | 12 | type UDPScoket struct { 13 | sock []int 14 | end []End 15 | queues int 16 | } 17 | 18 | func getSockaddr(port int, addr [4]byte) (sa unix.Sockaddr) { 19 | address := unix.SockaddrInet4 { 20 | Port: port, 21 | Addr: addr, 22 | } 23 | return &address 24 | } 25 | 26 | func CreateUDPScoket(port int, addr [4]byte, queues int, client int) (*UDPScoket) { 27 | socket := new(UDPScoket) 28 | socket.sock = make([]int, queues, queues) 29 | socket.end = make([]End, queues, queues) 30 | initial := make([]byte, 1, 1) 31 | initial[0] = 'H' 32 | for i := 0; i < queues; i += 1 { 33 | tport := port + i; 34 | socket.sock[i] = create() 35 | address := &unix.SockaddrInet4 { 36 | Port: tport, 37 | Addr: addr, 38 | } 39 | if client == 1 { 40 | socket.end[i].end = getSockaddr(tport, addr) 41 | unix.Connect(socket.sock[i], address) 42 | send(socket.sock[i], &socket.end[i], initial) 43 | } else { 44 | unix.Bind(socket.sock[i], address) 45 | } 46 | } 47 | socket.queues = queues 48 | return socket 49 | } 50 | 51 | func (tunnel *Tunnel) Receive(index int, buff []byte) (int) { 52 | socket := tunnel.net.socket 53 | n := receive(socket.sock[index], buff, &socket.end[index]) 54 | return n 55 | } 56 | 57 | func (tunnel *Tunnel) Send(index int, buff []byte) { 58 | socket := tunnel.net.socket 59 | send(socket.sock[index], &socket.end[index], buff) 60 | } 61 | 62 | func create() (int) { 63 | fd, _ := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0) 64 | unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEADDR, 1) 65 | unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEPORT, 1) 66 | return fd 67 | } 68 | 69 | func send(sock int, end *End, buff []byte) { 70 | if end.end != nil { 71 | unix.Sendto(sock, buff, 0, end.end) 72 | // fmt.Printf("send internal ##### %d\n", sock) 73 | } 74 | } 75 | 76 | func receive(sock int, buff []byte, end *End) (int) { 77 | size, dst, _ := unix.Recvfrom(sock, buff, 0) 78 | end.end = dst 79 | // fmt.Printf("receive internal ##### sock:%d\n", sock) 80 | return size 81 | } 82 | --------------------------------------------------------------------------------