├── LICENSE
├── README.md
├── config.sh
├── go.mod
├── kmod
├── Makefile
└── tun.c.4.19
├── main.go
├── tun
└── tun.go
└── tunnel
├── define.go
├── receive.go
├── send.go
├── tunnel.go
└── udp.go
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # gotun-tunnel
2 | A high throughput point to point tunnel
3 |
4 | ## multiqueue-tun
5 |
6 | ## multi-socket(with/without reuseport)
7 |
--------------------------------------------------------------------------------
/config.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # go build
4 |
5 | if [ $1 == 'client' ]
6 | then
7 | ./tuntap client 4 192 168 56 1 35 &
8 | ifconfig wg2 172.16.0.2/30
9 | else
10 | ./tuntap server 4 192 168 56 1 35 &
11 | ifconfig wg2 172.16.0.1/30
12 | fi
13 | ifconfig wg2 mtu 1460 txqueuelen 2000
14 |
15 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module tuntap
2 |
3 | go 1.13
4 |
5 | require (
6 | golang.org/x/net v0.0.0-20210510120150-4163338589ed
7 | golang.org/x/sys v0.0.0-20210514084401-e8d321eab015
8 | )
9 |
--------------------------------------------------------------------------------
/kmod/Makefile:
--------------------------------------------------------------------------------
1 | obj-m += tun.o
2 |
--------------------------------------------------------------------------------
/kmod/tun.c.4.19:
--------------------------------------------------------------------------------
1 | /*
2 | * TUN - Universal TUN/TAP device driver.
3 | * Copyright (C) 1999-2002 Maxim Krasnyansky
4 | *
5 | * This program is free software; you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public License as published by
7 | * the Free Software Foundation; either version 2 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
16 | */
17 |
18 | /*
19 | * Changes:
20 | *
21 | * Mike Kershaw 2005/08/14
22 | * Add TUNSETLINK ioctl to set the link encapsulation
23 | *
24 | * Mark Smith
25 | * Use eth_random_addr() for tap MAC address.
26 | *
27 | * Harald Roelle 2004/04/20
28 | * Fixes in packet dropping, queue length setting and queue wakeup.
29 | * Increased default tx queue length.
30 | * Added ethtool API.
31 | * Minor cleanups
32 | *
33 | * Daniel Podlejski
34 | * Modifications for 2.3.99-pre5 kernel.
35 | */
36 |
37 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
38 |
39 | #define DRV_NAME "tun"
40 | #define DRV_VERSION "1.6"
41 | #define DRV_DESCRIPTION "Universal TUN/TAP device driver"
42 | #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky "
43 |
44 | #include
45 | #include
46 | #include
47 | #include
48 | #include
49 | #include
50 | #include
51 | #include
52 | #include
53 | #include
54 | #include
55 | #include
56 | #include
57 | #include
58 | #include
59 | #include
60 | #include
61 | #include
62 | #include
63 | #include
64 | #include
65 | #include
66 | #include
67 | #include
68 | #include
69 | #include
70 | #include
71 | #include
72 | #include
73 | #include
74 | #include
75 | #include
76 | #include
77 | #include
78 | #include
79 | #include
80 |
81 | #include
82 | #include
83 |
84 | static int rw = 0;
85 | module_param(rw, int, 0);
86 |
87 | static void tun_default_link_ksettings(struct net_device *dev,
88 | struct ethtool_link_ksettings *cmd);
89 |
90 | /* Uncomment to enable debugging */
91 | /* #define TUN_DEBUG 1 */
92 |
93 | #ifdef TUN_DEBUG
94 | static int debug;
95 |
96 | #define tun_debug(level, tun, fmt, args...) \
97 | do { \
98 | if (tun->debug) \
99 | netdev_printk(level, tun->dev, fmt, ##args); \
100 | } while (0)
101 | #define DBG1(level, fmt, args...) \
102 | do { \
103 | if (debug == 2) \
104 | printk(level fmt, ##args); \
105 | } while (0)
106 | #else
107 | #define tun_debug(level, tun, fmt, args...) \
108 | do { \
109 | if (0) \
110 | netdev_printk(level, tun->dev, fmt, ##args); \
111 | } while (0)
112 | #define DBG1(level, fmt, args...) \
113 | do { \
114 | if (0) \
115 | printk(level fmt, ##args); \
116 | } while (0)
117 | #endif
118 |
119 | #define TUN_HEADROOM 256
120 | #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
121 |
122 | /* TUN device flags */
123 |
124 | /* IFF_ATTACH_QUEUE is never stored in device flags,
125 | * overload it to mean fasync when stored there.
126 | */
127 | #define TUN_FASYNC IFF_ATTACH_QUEUE
128 | /* High bits in flags field are unused. */
129 | #define TUN_VNET_LE 0x80000000
130 | #define TUN_VNET_BE 0x40000000
131 |
132 | #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
133 | IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
134 |
135 | #define GOODCOPY_LEN 128
136 |
137 | #define FLT_EXACT_COUNT 8
138 | struct tap_filter {
139 | unsigned int count; /* Number of addrs. Zero means disabled */
140 | u32 mask[2]; /* Mask of the hashed addrs */
141 | unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
142 | };
143 |
144 | /* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal
145 | * to max number of VCPUs in guest. */
146 | #define MAX_TAP_QUEUES 256
147 | #define MAX_TAP_FLOWS 4096
148 |
149 | #define TUN_FLOW_EXPIRE (3 * HZ)
150 |
151 | struct tun_pcpu_stats {
152 | u64 rx_packets;
153 | u64 rx_bytes;
154 | u64 tx_packets;
155 | u64 tx_bytes;
156 | struct u64_stats_sync syncp;
157 | u32 rx_dropped;
158 | u32 tx_dropped;
159 | u32 rx_frame_errors;
160 | };
161 |
162 | /* A tun_file connects an open character device to a tuntap netdevice. It
163 | * also contains all socket related structures (except sock_fprog and tap_filter)
164 | * to serve as one transmit queue for tuntap device. The sock_fprog and
165 | * tap_filter were kept in tun_struct since they were used for filtering for the
166 | * netdevice not for a specific queue (at least I didn't see the requirement for
167 | * this).
168 | *
169 | * RCU usage:
170 | * The tun_file and tun_struct are loosely coupled, the pointer from one to the
171 | * other can only be read while rcu_read_lock or rtnl_lock is held.
172 | */
173 | struct tun_file {
174 | struct sock sk;
175 | struct socket socket;
176 | struct socket_wq wq;
177 | struct tun_struct __rcu *tun;
178 | struct fasync_struct *fasync;
179 | /* only used for fasnyc */
180 | unsigned int flags;
181 | union {
182 | u16 queue_index;
183 | unsigned int ifindex;
184 | };
185 | struct napi_struct napi;
186 | bool napi_enabled;
187 | bool napi_frags_enabled;
188 | struct mutex napi_mutex; /* Protects access to the above napi */
189 | struct list_head next;
190 | struct tun_struct *detached;
191 | struct ptr_ring tx_ring;
192 | struct xdp_rxq_info xdp_rxq;
193 | };
194 |
195 | struct tun_flow_entry {
196 | struct hlist_node hash_link;
197 | struct rcu_head rcu;
198 | struct tun_struct *tun;
199 |
200 | u32 rxhash;
201 | u32 rps_rxhash;
202 | int queue_index;
203 | unsigned long updated;
204 | };
205 |
206 | #define TUN_NUM_FLOW_ENTRIES 1024
207 | #define TUN_MASK_FLOW_ENTRIES (TUN_NUM_FLOW_ENTRIES - 1)
208 |
209 | struct tun_prog {
210 | struct rcu_head rcu;
211 | struct bpf_prog *prog;
212 | };
213 |
214 | /* Since the socket were moved to tun_file, to preserve the behavior of persist
215 | * device, socket filter, sndbuf and vnet header size were restore when the
216 | * file were attached to a persist device.
217 | */
218 | struct tun_struct {
219 | struct tun_file __rcu *tfiles[MAX_TAP_QUEUES];
220 | unsigned int numqueues;
221 | unsigned int flags;
222 | kuid_t owner;
223 | kgid_t group;
224 |
225 | struct net_device *dev;
226 | netdev_features_t set_features;
227 | #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
228 | NETIF_F_TSO6)
229 |
230 | int align;
231 | int vnet_hdr_sz;
232 | int sndbuf;
233 | struct tap_filter txflt;
234 | struct sock_fprog fprog;
235 | /* protected by rtnl lock */
236 | bool filter_attached;
237 | #ifdef TUN_DEBUG
238 | int debug;
239 | #endif
240 | spinlock_t lock;
241 | struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
242 | struct timer_list flow_gc_timer;
243 | unsigned long ageing_time;
244 | unsigned int numdisabled;
245 | struct list_head disabled;
246 | void *security;
247 | u32 flow_count;
248 | u32 rx_batched;
249 | struct tun_pcpu_stats __percpu *pcpu_stats;
250 | struct bpf_prog __rcu *xdp_prog;
251 | struct tun_prog __rcu *steering_prog;
252 | struct tun_prog __rcu *filter_prog;
253 | struct ethtool_link_ksettings link_ksettings;
254 | };
255 |
256 | struct veth {
257 | __be16 h_vlan_proto;
258 | __be16 h_vlan_TCI;
259 | };
260 |
261 | bool tun_is_xdp_frame(void *ptr)
262 | {
263 | return (unsigned long)ptr & TUN_XDP_FLAG;
264 | }
265 | EXPORT_SYMBOL(tun_is_xdp_frame);
266 |
267 | void *tun_xdp_to_ptr(void *ptr)
268 | {
269 | return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
270 | }
271 | EXPORT_SYMBOL(tun_xdp_to_ptr);
272 |
273 | void *tun_ptr_to_xdp(void *ptr)
274 | {
275 | return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
276 | }
277 | EXPORT_SYMBOL(tun_ptr_to_xdp);
278 |
279 | static int tun_napi_receive(struct napi_struct *napi, int budget)
280 | {
281 | struct tun_file *tfile = container_of(napi, struct tun_file, napi);
282 | struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
283 | struct sk_buff_head process_queue;
284 | struct sk_buff *skb;
285 | int received = 0;
286 |
287 | __skb_queue_head_init(&process_queue);
288 |
289 | spin_lock(&queue->lock);
290 | skb_queue_splice_tail_init(queue, &process_queue);
291 | spin_unlock(&queue->lock);
292 |
293 | while (received < budget && (skb = __skb_dequeue(&process_queue))) {
294 | napi_gro_receive(napi, skb);
295 | ++received;
296 | }
297 |
298 | if (!skb_queue_empty(&process_queue)) {
299 | spin_lock(&queue->lock);
300 | skb_queue_splice(&process_queue, queue);
301 | spin_unlock(&queue->lock);
302 | }
303 |
304 | return received;
305 | }
306 |
307 | static int tun_napi_poll(struct napi_struct *napi, int budget)
308 | {
309 | unsigned int received;
310 |
311 | received = tun_napi_receive(napi, budget);
312 |
313 | if (received < budget)
314 | napi_complete_done(napi, received);
315 |
316 | return received;
317 | }
318 |
319 | static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
320 | bool napi_en, bool napi_frags)
321 | {
322 | tfile->napi_enabled = napi_en;
323 | tfile->napi_frags_enabled = napi_en && napi_frags;
324 | if (napi_en) {
325 | netif_tx_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
326 | NAPI_POLL_WEIGHT);
327 | napi_enable(&tfile->napi);
328 | }
329 | }
330 |
331 | static void tun_napi_disable(struct tun_file *tfile)
332 | {
333 | if (tfile->napi_enabled)
334 | napi_disable(&tfile->napi);
335 | }
336 |
337 | static void tun_napi_del(struct tun_file *tfile)
338 | {
339 | if (tfile->napi_enabled)
340 | netif_napi_del(&tfile->napi);
341 | }
342 |
343 | static bool tun_napi_frags_enabled(const struct tun_file *tfile)
344 | {
345 | return tfile->napi_frags_enabled;
346 | }
347 |
348 | #ifdef CONFIG_TUN_VNET_CROSS_LE
349 | static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
350 | {
351 | return tun->flags & TUN_VNET_BE ? false :
352 | virtio_legacy_is_little_endian();
353 | }
354 |
355 | static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
356 | {
357 | int be = !!(tun->flags & TUN_VNET_BE);
358 |
359 | if (put_user(be, argp))
360 | return -EFAULT;
361 |
362 | return 0;
363 | }
364 |
365 | static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
366 | {
367 | int be;
368 |
369 | if (get_user(be, argp))
370 | return -EFAULT;
371 |
372 | if (be)
373 | tun->flags |= TUN_VNET_BE;
374 | else
375 | tun->flags &= ~TUN_VNET_BE;
376 |
377 | return 0;
378 | }
379 | #else
380 | static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
381 | {
382 | return virtio_legacy_is_little_endian();
383 | }
384 |
385 | static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
386 | {
387 | return -EINVAL;
388 | }
389 |
390 | static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
391 | {
392 | return -EINVAL;
393 | }
394 | #endif /* CONFIG_TUN_VNET_CROSS_LE */
395 |
396 | static inline bool tun_is_little_endian(struct tun_struct *tun)
397 | {
398 | return tun->flags & TUN_VNET_LE ||
399 | tun_legacy_is_little_endian(tun);
400 | }
401 |
402 | static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
403 | {
404 | return __virtio16_to_cpu(tun_is_little_endian(tun), val);
405 | }
406 |
407 | static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
408 | {
409 | return __cpu_to_virtio16(tun_is_little_endian(tun), val);
410 | }
411 |
412 | static inline u32 tun_hashfn(u32 rxhash)
413 | {
414 | return rxhash & TUN_MASK_FLOW_ENTRIES;
415 | }
416 |
417 | static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
418 | {
419 | struct tun_flow_entry *e;
420 |
421 | hlist_for_each_entry_rcu(e, head, hash_link) {
422 | if (e->rxhash == rxhash)
423 | return e;
424 | }
425 | return NULL;
426 | }
427 |
428 | static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
429 | struct hlist_head *head,
430 | u32 rxhash, u16 queue_index)
431 | {
432 | struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
433 |
434 | if (e) {
435 | tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
436 | rxhash, queue_index);
437 | e->updated = jiffies;
438 | e->rxhash = rxhash;
439 | e->rps_rxhash = 0;
440 | e->queue_index = queue_index;
441 | e->tun = tun;
442 | hlist_add_head_rcu(&e->hash_link, head);
443 | ++tun->flow_count;
444 | }
445 | return e;
446 | }
447 |
448 | static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
449 | {
450 | tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
451 | e->rxhash, e->queue_index);
452 | hlist_del_rcu(&e->hash_link);
453 | kfree_rcu(e, rcu);
454 | --tun->flow_count;
455 | }
456 |
457 | static void tun_flow_flush(struct tun_struct *tun)
458 | {
459 | int i;
460 |
461 | spin_lock_bh(&tun->lock);
462 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
463 | struct tun_flow_entry *e;
464 | struct hlist_node *n;
465 |
466 | hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
467 | tun_flow_delete(tun, e);
468 | }
469 | spin_unlock_bh(&tun->lock);
470 | }
471 |
472 | static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
473 | {
474 | int i;
475 |
476 | spin_lock_bh(&tun->lock);
477 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
478 | struct tun_flow_entry *e;
479 | struct hlist_node *n;
480 |
481 | hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
482 | if (e->queue_index == queue_index)
483 | tun_flow_delete(tun, e);
484 | }
485 | }
486 | spin_unlock_bh(&tun->lock);
487 | }
488 |
489 | static void tun_flow_cleanup(struct timer_list *t)
490 | {
491 | struct tun_struct *tun = from_timer(tun, t, flow_gc_timer);
492 | unsigned long delay = tun->ageing_time;
493 | unsigned long next_timer = jiffies + delay;
494 | unsigned long count = 0;
495 | int i;
496 |
497 | tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
498 |
499 | spin_lock(&tun->lock);
500 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
501 | struct tun_flow_entry *e;
502 | struct hlist_node *n;
503 |
504 | hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
505 | unsigned long this_timer;
506 |
507 | this_timer = e->updated + delay;
508 | if (time_before_eq(this_timer, jiffies)) {
509 | tun_flow_delete(tun, e);
510 | continue;
511 | }
512 | count++;
513 | if (time_before(this_timer, next_timer))
514 | next_timer = this_timer;
515 | }
516 | }
517 |
518 | if (count)
519 | mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
520 | spin_unlock(&tun->lock);
521 | }
522 |
523 | static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
524 | struct tun_file *tfile)
525 | {
526 | struct hlist_head *head;
527 | struct tun_flow_entry *e;
528 | unsigned long delay = tun->ageing_time;
529 | u16 queue_index = tfile->queue_index;
530 |
531 | if (!rxhash)
532 | return;
533 | else
534 | head = &tun->flows[tun_hashfn(rxhash)];
535 |
536 | rcu_read_lock();
537 |
538 | e = tun_flow_find(head, rxhash);
539 | if (likely(e)) {
540 | /* TODO: keep queueing to old queue until it's empty? */
541 | e->queue_index = queue_index;
542 | e->updated = jiffies;
543 | sock_rps_record_flow_hash(e->rps_rxhash);
544 | } else {
545 | spin_lock_bh(&tun->lock);
546 | if (!tun_flow_find(head, rxhash) &&
547 | tun->flow_count < MAX_TAP_FLOWS)
548 | tun_flow_create(tun, head, rxhash, queue_index);
549 |
550 | if (!timer_pending(&tun->flow_gc_timer))
551 | mod_timer(&tun->flow_gc_timer,
552 | round_jiffies_up(jiffies + delay));
553 | spin_unlock_bh(&tun->lock);
554 | }
555 |
556 | rcu_read_unlock();
557 | }
558 |
559 | /**
560 | * Save the hash received in the stack receive path and update the
561 | * flow_hash table accordingly.
562 | */
563 | static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
564 | {
565 | if (unlikely(e->rps_rxhash != hash))
566 | e->rps_rxhash = hash;
567 | }
568 |
569 | /* We try to identify a flow through its rxhash first. The reason that
570 | * we do not check rxq no. is because some cards(e.g 82599), chooses
571 | * the rxq based on the txq where the last packet of the flow comes. As
572 | * the userspace application move between processors, we may get a
573 | * different rxq no. here. If we could not get rxhash, then we would
574 | * hope the rxq no. may help here.
575 | */
576 | static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
577 | {
578 | struct tun_flow_entry *e;
579 | u32 txq = 0;
580 | u32 numqueues = 0;
581 |
582 | numqueues = READ_ONCE(tun->numqueues);
583 |
584 | txq = __skb_get_hash_symmetric(skb);
585 | if (txq) {
586 | e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
587 | if (e) {
588 | tun_flow_save_rps_rxhash(e, txq);
589 | txq = e->queue_index;
590 | } else
591 | /* use multiply and shift instead of expensive divide */
592 | txq = ((u64)txq * numqueues) >> 32;
593 | } else if (likely(skb_rx_queue_recorded(skb))) {
594 | txq = skb_get_rx_queue(skb);
595 | while (unlikely(txq >= numqueues))
596 | txq -= numqueues;
597 | }
598 |
599 | return txq;
600 | }
601 |
602 | static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
603 | {
604 | struct tun_prog *prog;
605 | u32 numqueues;
606 | u16 ret = 0;
607 |
608 | numqueues = READ_ONCE(tun->numqueues);
609 | if (!numqueues)
610 | return 0;
611 |
612 | prog = rcu_dereference(tun->steering_prog);
613 | if (prog)
614 | ret = bpf_prog_run_clear_cb(prog->prog, skb);
615 |
616 | return ret % numqueues;
617 | }
618 |
619 | static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
620 | struct net_device *sb_dev,
621 | select_queue_fallback_t fallback)
622 | {
623 | struct tun_struct *tun = netdev_priv(dev);
624 | u16 ret;
625 |
626 | rcu_read_lock();
627 | if (rcu_dereference(tun->steering_prog))
628 | ret = tun_ebpf_select_queue(tun, skb);
629 | else
630 | ret = tun_automq_select_queue(tun, skb);
631 | rcu_read_unlock();
632 |
633 | return ret;
634 | }
635 |
636 | static inline bool tun_not_capable(struct tun_struct *tun)
637 | {
638 | const struct cred *cred = current_cred();
639 | struct net *net = dev_net(tun->dev);
640 |
641 | return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
642 | (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
643 | !ns_capable(net->user_ns, CAP_NET_ADMIN);
644 | }
645 |
646 | static void tun_set_real_num_queues(struct tun_struct *tun)
647 | {
648 | netif_set_real_num_tx_queues(tun->dev, tun->numqueues);
649 | netif_set_real_num_rx_queues(tun->dev, tun->numqueues);
650 | }
651 |
652 | static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile)
653 | {
654 | tfile->detached = tun;
655 | list_add_tail(&tfile->next, &tun->disabled);
656 | ++tun->numdisabled;
657 | }
658 |
659 | static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
660 | {
661 | struct tun_struct *tun = tfile->detached;
662 |
663 | tfile->detached = NULL;
664 | list_del_init(&tfile->next);
665 | --tun->numdisabled;
666 | return tun;
667 | }
668 |
669 | void tun_ptr_free(void *ptr)
670 | {
671 | if (!ptr)
672 | return;
673 | if (tun_is_xdp_frame(ptr)) {
674 | struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
675 |
676 | xdp_return_frame(xdpf);
677 | } else {
678 | __skb_array_destroy_skb(ptr);
679 | }
680 | }
681 | EXPORT_SYMBOL_GPL(tun_ptr_free);
682 |
683 | static void tun_queue_purge(struct tun_file *tfile)
684 | {
685 | void *ptr;
686 |
687 | while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
688 | tun_ptr_free(ptr);
689 |
690 | skb_queue_purge(&tfile->sk.sk_write_queue);
691 | skb_queue_purge(&tfile->sk.sk_error_queue);
692 | }
693 |
694 | static void __tun_detach(struct tun_file *tfile, bool clean)
695 | {
696 | struct tun_file *ntfile;
697 | struct tun_struct *tun;
698 |
699 | tun = rtnl_dereference(tfile->tun);
700 |
701 | if (tun && clean) {
702 | tun_napi_disable(tfile);
703 | tun_napi_del(tfile);
704 | }
705 |
706 | if (tun && !tfile->detached) {
707 | u16 index = tfile->queue_index;
708 | BUG_ON(index >= tun->numqueues);
709 |
710 | rcu_assign_pointer(tun->tfiles[index],
711 | tun->tfiles[tun->numqueues - 1]);
712 | ntfile = rtnl_dereference(tun->tfiles[index]);
713 | ntfile->queue_index = index;
714 | rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
715 | NULL);
716 |
717 | --tun->numqueues;
718 | if (clean) {
719 | RCU_INIT_POINTER(tfile->tun, NULL);
720 | sock_put(&tfile->sk);
721 | } else
722 | tun_disable_queue(tun, tfile);
723 |
724 | synchronize_net();
725 | tun_flow_delete_by_queue(tun, tun->numqueues + 1);
726 | /* Drop read queue */
727 | tun_queue_purge(tfile);
728 | tun_set_real_num_queues(tun);
729 | } else if (tfile->detached && clean) {
730 | tun = tun_enable_queue(tfile);
731 | sock_put(&tfile->sk);
732 | }
733 |
734 | if (clean) {
735 | if (tun && tun->numqueues == 0 && tun->numdisabled == 0) {
736 | netif_carrier_off(tun->dev);
737 |
738 | if (!(tun->flags & IFF_PERSIST) &&
739 | tun->dev->reg_state == NETREG_REGISTERED)
740 | unregister_netdevice(tun->dev);
741 | }
742 | if (tun)
743 | xdp_rxq_info_unreg(&tfile->xdp_rxq);
744 | ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
745 | sock_put(&tfile->sk);
746 | }
747 | }
748 |
749 | static void tun_detach(struct tun_file *tfile, bool clean)
750 | {
751 | struct tun_struct *tun;
752 | struct net_device *dev;
753 |
754 | rtnl_lock();
755 | tun = rtnl_dereference(tfile->tun);
756 | dev = tun ? tun->dev : NULL;
757 | __tun_detach(tfile, clean);
758 | if (dev)
759 | netdev_state_change(dev);
760 | rtnl_unlock();
761 | }
762 |
763 | static void tun_detach_all(struct net_device *dev)
764 | {
765 | struct tun_struct *tun = netdev_priv(dev);
766 | struct tun_file *tfile, *tmp;
767 | int i, n = tun->numqueues;
768 |
769 | for (i = 0; i < n; i++) {
770 | tfile = rtnl_dereference(tun->tfiles[i]);
771 | BUG_ON(!tfile);
772 | tun_napi_disable(tfile);
773 | tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
774 | tfile->socket.sk->sk_data_ready(tfile->socket.sk);
775 | RCU_INIT_POINTER(tfile->tun, NULL);
776 | --tun->numqueues;
777 | }
778 | list_for_each_entry(tfile, &tun->disabled, next) {
779 | tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
780 | tfile->socket.sk->sk_data_ready(tfile->socket.sk);
781 | RCU_INIT_POINTER(tfile->tun, NULL);
782 | }
783 | BUG_ON(tun->numqueues != 0);
784 |
785 | synchronize_net();
786 | for (i = 0; i < n; i++) {
787 | tfile = rtnl_dereference(tun->tfiles[i]);
788 | tun_napi_del(tfile);
789 | /* Drop read queue */
790 | tun_queue_purge(tfile);
791 | xdp_rxq_info_unreg(&tfile->xdp_rxq);
792 | sock_put(&tfile->sk);
793 | }
794 | list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
795 | tun_enable_queue(tfile);
796 | tun_queue_purge(tfile);
797 | xdp_rxq_info_unreg(&tfile->xdp_rxq);
798 | sock_put(&tfile->sk);
799 | }
800 | BUG_ON(tun->numdisabled != 0);
801 |
802 | if (tun->flags & IFF_PERSIST)
803 | module_put(THIS_MODULE);
804 | }
805 |
806 | static int tun_attach(struct tun_struct *tun, struct file *file,
807 | bool skip_filter, bool napi, bool napi_frags,
808 | bool publish_tun)
809 | {
810 | struct tun_file *tfile = file->private_data;
811 | struct net_device *dev = tun->dev;
812 | int err;
813 |
814 | err = security_tun_dev_attach(tfile->socket.sk, tun->security);
815 | if (err < 0)
816 | goto out;
817 |
818 | err = -EINVAL;
819 | if (rtnl_dereference(tfile->tun) && !tfile->detached)
820 | goto out;
821 |
822 | err = -EBUSY;
823 | if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1)
824 | goto out;
825 |
826 | err = -E2BIG;
827 | if (!tfile->detached &&
828 | tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES)
829 | goto out;
830 |
831 | err = 0;
832 |
833 | /* Re-attach the filter to persist device */
834 | if (!skip_filter && (tun->filter_attached == true)) {
835 | lock_sock(tfile->socket.sk);
836 | err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
837 | release_sock(tfile->socket.sk);
838 | if (!err)
839 | goto out;
840 | }
841 |
842 | if (!tfile->detached &&
843 | ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len,
844 | GFP_KERNEL, tun_ptr_free)) {
845 | err = -ENOMEM;
846 | goto out;
847 | }
848 |
849 | tfile->queue_index = tun->numqueues;
850 | tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
851 |
852 | if (tfile->detached) {
853 | /* Re-attach detached tfile, updating XDP queue_index */
854 | WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
855 |
856 | if (tfile->xdp_rxq.queue_index != tfile->queue_index)
857 | tfile->xdp_rxq.queue_index = tfile->queue_index;
858 | } else {
859 | /* Setup XDP RX-queue info, for new tfile getting attached */
860 | err = xdp_rxq_info_reg(&tfile->xdp_rxq,
861 | tun->dev, tfile->queue_index);
862 | if (err < 0)
863 | goto out;
864 | err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
865 | MEM_TYPE_PAGE_SHARED, NULL);
866 | if (err < 0) {
867 | xdp_rxq_info_unreg(&tfile->xdp_rxq);
868 | goto out;
869 | }
870 | err = 0;
871 | }
872 |
873 | if (tfile->detached) {
874 | tun_enable_queue(tfile);
875 | } else {
876 | sock_hold(&tfile->sk);
877 | tun_napi_init(tun, tfile, napi, napi_frags);
878 | }
879 |
880 | /* device is allowed to go away first, so no need to hold extra
881 | * refcnt.
882 | */
883 |
884 | /* Publish tfile->tun and tun->tfiles only after we've fully
885 | * initialized tfile; otherwise we risk using half-initialized
886 | * object.
887 | */
888 | if (publish_tun)
889 | rcu_assign_pointer(tfile->tun, tun);
890 | rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
891 | tun->numqueues++;
892 | tun_set_real_num_queues(tun);
893 | out:
894 | return err;
895 | }
896 |
897 | static struct tun_struct *tun_get(struct tun_file *tfile)
898 | {
899 | struct tun_struct *tun;
900 |
901 | rcu_read_lock();
902 | tun = rcu_dereference(tfile->tun);
903 | if (tun)
904 | dev_hold(tun->dev);
905 | rcu_read_unlock();
906 |
907 | return tun;
908 | }
909 |
910 | static void tun_put(struct tun_struct *tun)
911 | {
912 | dev_put(tun->dev);
913 | }
914 |
915 | /* TAP filtering */
916 | static void addr_hash_set(u32 *mask, const u8 *addr)
917 | {
918 | int n = ether_crc(ETH_ALEN, addr) >> 26;
919 | mask[n >> 5] |= (1 << (n & 31));
920 | }
921 |
922 | static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
923 | {
924 | int n = ether_crc(ETH_ALEN, addr) >> 26;
925 | return mask[n >> 5] & (1 << (n & 31));
926 | }
927 |
928 | static int update_filter(struct tap_filter *filter, void __user *arg)
929 | {
930 | struct { u8 u[ETH_ALEN]; } *addr;
931 | struct tun_filter uf;
932 | int err, alen, n, nexact;
933 |
934 | if (copy_from_user(&uf, arg, sizeof(uf)))
935 | return -EFAULT;
936 |
937 | if (!uf.count) {
938 | /* Disabled */
939 | filter->count = 0;
940 | return 0;
941 | }
942 |
943 | alen = ETH_ALEN * uf.count;
944 | addr = memdup_user(arg + sizeof(uf), alen);
945 | if (IS_ERR(addr))
946 | return PTR_ERR(addr);
947 |
948 | /* The filter is updated without holding any locks. Which is
949 | * perfectly safe. We disable it first and in the worst
950 | * case we'll accept a few undesired packets. */
951 | filter->count = 0;
952 | wmb();
953 |
954 | /* Use first set of addresses as an exact filter */
955 | for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
956 | memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
957 |
958 | nexact = n;
959 |
960 | /* Remaining multicast addresses are hashed,
961 | * unicast will leave the filter disabled. */
962 | memset(filter->mask, 0, sizeof(filter->mask));
963 | for (; n < uf.count; n++) {
964 | if (!is_multicast_ether_addr(addr[n].u)) {
965 | err = 0; /* no filter */
966 | goto free_addr;
967 | }
968 | addr_hash_set(filter->mask, addr[n].u);
969 | }
970 |
971 | /* For ALLMULTI just set the mask to all ones.
972 | * This overrides the mask populated above. */
973 | if ((uf.flags & TUN_FLT_ALLMULTI))
974 | memset(filter->mask, ~0, sizeof(filter->mask));
975 |
976 | /* Now enable the filter */
977 | wmb();
978 | filter->count = nexact;
979 |
980 | /* Return the number of exact filters */
981 | err = nexact;
982 | free_addr:
983 | kfree(addr);
984 | return err;
985 | }
986 |
987 | /* Returns: 0 - drop, !=0 - accept */
988 | static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
989 | {
990 | /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
991 | * at this point. */
992 | struct ethhdr *eh = (struct ethhdr *) skb->data;
993 | int i;
994 |
995 | /* Exact match */
996 | for (i = 0; i < filter->count; i++)
997 | if (ether_addr_equal(eh->h_dest, filter->addr[i]))
998 | return 1;
999 |
1000 | /* Inexact match (multicast only) */
1001 | if (is_multicast_ether_addr(eh->h_dest))
1002 | return addr_hash_test(filter->mask, eh->h_dest);
1003 |
1004 | return 0;
1005 | }
1006 |
1007 | /*
1008 | * Checks whether the packet is accepted or not.
1009 | * Returns: 0 - drop, !=0 - accept
1010 | */
1011 | static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
1012 | {
1013 | if (!filter->count)
1014 | return 1;
1015 |
1016 | return run_filter(filter, skb);
1017 | }
1018 |
1019 | /* Network device part of the driver */
1020 |
1021 | static const struct ethtool_ops tun_ethtool_ops;
1022 |
1023 | /* Net device detach from fd. */
1024 | static void tun_net_uninit(struct net_device *dev)
1025 | {
1026 | tun_detach_all(dev);
1027 | }
1028 |
1029 | /* Net device open. */
1030 | static int tun_net_open(struct net_device *dev)
1031 | {
1032 | netif_tx_start_all_queues(dev);
1033 |
1034 | return 0;
1035 | }
1036 |
1037 | /* Net device close. */
1038 | static int tun_net_close(struct net_device *dev)
1039 | {
1040 | netif_tx_stop_all_queues(dev);
1041 | return 0;
1042 | }
1043 |
1044 | /* Net device start xmit */
1045 | static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
1046 | {
1047 | #ifdef CONFIG_RPS
1048 | if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
1049 | /* Select queue was not called for the skbuff, so we extract the
1050 | * RPS hash and save it into the flow_table here.
1051 | */
1052 | __u32 rxhash;
1053 |
1054 | rxhash = __skb_get_hash_symmetric(skb);
1055 | if (rxhash) {
1056 | struct tun_flow_entry *e;
1057 | e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)],
1058 | rxhash);
1059 | if (e)
1060 | tun_flow_save_rps_rxhash(e, rxhash);
1061 | }
1062 | }
1063 | #endif
1064 | }
1065 |
1066 | static unsigned int run_ebpf_filter(struct tun_struct *tun,
1067 | struct sk_buff *skb,
1068 | int len)
1069 | {
1070 | struct tun_prog *prog = rcu_dereference(tun->filter_prog);
1071 |
1072 | if (prog)
1073 | len = bpf_prog_run_clear_cb(prog->prog, skb);
1074 |
1075 | return len;
1076 | }
1077 |
1078 | /* Net device start xmit */
1079 | static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
1080 | {
1081 | struct tun_struct *tun = netdev_priv(dev);
1082 | int txq = skb->queue_mapping;
1083 | struct tun_file *tfile;
1084 | int len = skb->len;
1085 | int num = tun->numqueues / 2;
1086 |
1087 | rcu_read_lock();
1088 | rw = rw ? (rw == 1 ? num : 0) : (txq - txq % num);
1089 | tfile = rcu_dereference(tun->tfiles[rw + txq % num]);
1090 |
1091 | /* Drop packet if interface is not attached */
1092 | if (!tfile)
1093 | goto drop;
1094 |
1095 | if (!rcu_dereference(tun->steering_prog))
1096 | tun_automq_xmit(tun, skb);
1097 |
1098 | tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
1099 |
1100 | BUG_ON(!tfile);
1101 |
1102 | /* Drop if the filter does not like it.
1103 | * This is a noop if the filter is disabled.
1104 | * Filter can be enabled only for the TAP devices. */
1105 | if (!check_filter(&tun->txflt, skb))
1106 | goto drop;
1107 |
1108 | if (tfile->socket.sk->sk_filter &&
1109 | sk_filter(tfile->socket.sk, skb))
1110 | goto drop;
1111 |
1112 | len = run_ebpf_filter(tun, skb, len);
1113 | if (len == 0 || pskb_trim(skb, len))
1114 | goto drop;
1115 |
1116 | if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
1117 | goto drop;
1118 |
1119 | skb_tx_timestamp(skb);
1120 |
1121 | /* Orphan the skb - required as we might hang on to it
1122 | * for indefinite time.
1123 | */
1124 | skb_orphan(skb);
1125 |
1126 | nf_reset(skb);
1127 |
1128 | if (ptr_ring_produce(&tfile->tx_ring, skb))
1129 | goto drop;
1130 |
1131 | /* Notify and wake up reader process */
1132 | if (tfile->flags & TUN_FASYNC)
1133 | kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1134 | tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1135 |
1136 | rcu_read_unlock();
1137 | return NETDEV_TX_OK;
1138 |
1139 | drop:
1140 | this_cpu_inc(tun->pcpu_stats->tx_dropped);
1141 | skb_tx_error(skb);
1142 | kfree_skb(skb);
1143 | rcu_read_unlock();
1144 | return NET_XMIT_DROP;
1145 | }
1146 |
1147 | static void tun_net_mclist(struct net_device *dev)
1148 | {
1149 | /*
1150 | * This callback is supposed to deal with mc filter in
1151 | * _rx_ path and has nothing to do with the _tx_ path.
1152 | * In rx path we always accept everything userspace gives us.
1153 | */
1154 | }
1155 |
1156 | static netdev_features_t tun_net_fix_features(struct net_device *dev,
1157 | netdev_features_t features)
1158 | {
1159 | struct tun_struct *tun = netdev_priv(dev);
1160 |
1161 | return (features & tun->set_features) | (features & ~TUN_USER_FEATURES);
1162 | }
1163 |
1164 | static void tun_set_headroom(struct net_device *dev, int new_hr)
1165 | {
1166 | struct tun_struct *tun = netdev_priv(dev);
1167 |
1168 | if (new_hr < NET_SKB_PAD)
1169 | new_hr = NET_SKB_PAD;
1170 |
1171 | tun->align = new_hr;
1172 | }
1173 |
1174 | static void
1175 | tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
1176 | {
1177 | u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0;
1178 | struct tun_struct *tun = netdev_priv(dev);
1179 | struct tun_pcpu_stats *p;
1180 | int i;
1181 |
1182 | for_each_possible_cpu(i) {
1183 | u64 rxpackets, rxbytes, txpackets, txbytes;
1184 | unsigned int start;
1185 |
1186 | p = per_cpu_ptr(tun->pcpu_stats, i);
1187 | do {
1188 | start = u64_stats_fetch_begin(&p->syncp);
1189 | rxpackets = p->rx_packets;
1190 | rxbytes = p->rx_bytes;
1191 | txpackets = p->tx_packets;
1192 | txbytes = p->tx_bytes;
1193 | } while (u64_stats_fetch_retry(&p->syncp, start));
1194 |
1195 | stats->rx_packets += rxpackets;
1196 | stats->rx_bytes += rxbytes;
1197 | stats->tx_packets += txpackets;
1198 | stats->tx_bytes += txbytes;
1199 |
1200 | /* u32 counters */
1201 | rx_dropped += p->rx_dropped;
1202 | rx_frame_errors += p->rx_frame_errors;
1203 | tx_dropped += p->tx_dropped;
1204 | }
1205 | stats->rx_dropped = rx_dropped;
1206 | stats->rx_frame_errors = rx_frame_errors;
1207 | stats->tx_dropped = tx_dropped;
1208 | }
1209 |
1210 | static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1211 | struct netlink_ext_ack *extack)
1212 | {
1213 | struct tun_struct *tun = netdev_priv(dev);
1214 | struct bpf_prog *old_prog;
1215 |
1216 | old_prog = rtnl_dereference(tun->xdp_prog);
1217 | rcu_assign_pointer(tun->xdp_prog, prog);
1218 | if (old_prog)
1219 | bpf_prog_put(old_prog);
1220 |
1221 | return 0;
1222 | }
1223 |
1224 | static u32 tun_xdp_query(struct net_device *dev)
1225 | {
1226 | struct tun_struct *tun = netdev_priv(dev);
1227 | const struct bpf_prog *xdp_prog;
1228 |
1229 | xdp_prog = rtnl_dereference(tun->xdp_prog);
1230 | if (xdp_prog)
1231 | return xdp_prog->aux->id;
1232 |
1233 | return 0;
1234 | }
1235 |
1236 | static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1237 | {
1238 | switch (xdp->command) {
1239 | case XDP_SETUP_PROG:
1240 | return tun_xdp_set(dev, xdp->prog, xdp->extack);
1241 | case XDP_QUERY_PROG:
1242 | xdp->prog_id = tun_xdp_query(dev);
1243 | return 0;
1244 | default:
1245 | return -EINVAL;
1246 | }
1247 | }
1248 |
1249 | static const struct net_device_ops tun_netdev_ops = {
1250 | .ndo_uninit = tun_net_uninit,
1251 | .ndo_open = tun_net_open,
1252 | .ndo_stop = tun_net_close,
1253 | .ndo_start_xmit = tun_net_xmit,
1254 | .ndo_fix_features = tun_net_fix_features,
1255 | .ndo_select_queue = tun_select_queue,
1256 | .ndo_set_rx_headroom = tun_set_headroom,
1257 | .ndo_get_stats64 = tun_net_get_stats64,
1258 | };
1259 |
1260 | static void __tun_xdp_flush_tfile(struct tun_file *tfile)
1261 | {
1262 | /* Notify and wake up reader process */
1263 | if (tfile->flags & TUN_FASYNC)
1264 | kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1265 | tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1266 | }
1267 |
1268 | static int tun_xdp_xmit(struct net_device *dev, int n,
1269 | struct xdp_frame **frames, u32 flags)
1270 | {
1271 | struct tun_struct *tun = netdev_priv(dev);
1272 | struct tun_file *tfile;
1273 | u32 numqueues;
1274 | int drops = 0;
1275 | int cnt = n;
1276 | int i;
1277 |
1278 | if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
1279 | return -EINVAL;
1280 |
1281 | rcu_read_lock();
1282 |
1283 | resample:
1284 | numqueues = READ_ONCE(tun->numqueues);
1285 | if (!numqueues) {
1286 | rcu_read_unlock();
1287 | return -ENXIO; /* Caller will free/return all frames */
1288 | }
1289 |
1290 | tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
1291 | numqueues]);
1292 | if (unlikely(!tfile))
1293 | goto resample;
1294 |
1295 | spin_lock(&tfile->tx_ring.producer_lock);
1296 | for (i = 0; i < n; i++) {
1297 | struct xdp_frame *xdp = frames[i];
1298 | /* Encode the XDP flag into lowest bit for consumer to differ
1299 | * XDP buffer from sk_buff.
1300 | */
1301 | void *frame = tun_xdp_to_ptr(xdp);
1302 |
1303 | if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
1304 | this_cpu_inc(tun->pcpu_stats->tx_dropped);
1305 | xdp_return_frame_rx_napi(xdp);
1306 | drops++;
1307 | }
1308 | }
1309 | spin_unlock(&tfile->tx_ring.producer_lock);
1310 |
1311 | if (flags & XDP_XMIT_FLUSH)
1312 | __tun_xdp_flush_tfile(tfile);
1313 |
1314 | rcu_read_unlock();
1315 | return cnt - drops;
1316 | }
1317 |
1318 | static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
1319 | {
1320 | struct xdp_frame *frame = convert_to_xdp_frame(xdp);
1321 |
1322 | if (unlikely(!frame))
1323 | return -EOVERFLOW;
1324 |
1325 | return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
1326 | }
1327 |
1328 | static const struct net_device_ops tap_netdev_ops = {
1329 | .ndo_uninit = tun_net_uninit,
1330 | .ndo_open = tun_net_open,
1331 | .ndo_stop = tun_net_close,
1332 | .ndo_start_xmit = tun_net_xmit,
1333 | .ndo_fix_features = tun_net_fix_features,
1334 | .ndo_set_rx_mode = tun_net_mclist,
1335 | .ndo_set_mac_address = eth_mac_addr,
1336 | .ndo_validate_addr = eth_validate_addr,
1337 | .ndo_select_queue = tun_select_queue,
1338 | .ndo_features_check = passthru_features_check,
1339 | .ndo_set_rx_headroom = tun_set_headroom,
1340 | .ndo_get_stats64 = tun_net_get_stats64,
1341 | .ndo_bpf = tun_xdp,
1342 | .ndo_xdp_xmit = tun_xdp_xmit,
1343 | };
1344 |
1345 | static void tun_flow_init(struct tun_struct *tun)
1346 | {
1347 | int i;
1348 |
1349 | for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
1350 | INIT_HLIST_HEAD(&tun->flows[i]);
1351 |
1352 | tun->ageing_time = TUN_FLOW_EXPIRE;
1353 | timer_setup(&tun->flow_gc_timer, tun_flow_cleanup, 0);
1354 | mod_timer(&tun->flow_gc_timer,
1355 | round_jiffies_up(jiffies + tun->ageing_time));
1356 | }
1357 |
1358 | static void tun_flow_uninit(struct tun_struct *tun)
1359 | {
1360 | del_timer_sync(&tun->flow_gc_timer);
1361 | tun_flow_flush(tun);
1362 | }
1363 |
1364 | #define MIN_MTU 68
1365 | #define MAX_MTU 65535
1366 |
1367 | /* Initialize net device. */
1368 | static void tun_net_init(struct net_device *dev)
1369 | {
1370 | struct tun_struct *tun = netdev_priv(dev);
1371 |
1372 | switch (tun->flags & TUN_TYPE_MASK) {
1373 | case IFF_TUN:
1374 | dev->netdev_ops = &tun_netdev_ops;
1375 |
1376 | /* Point-to-Point TUN Device */
1377 | dev->hard_header_len = 0;
1378 | dev->addr_len = 0;
1379 | dev->mtu = 1500;
1380 |
1381 | /* Zero header length */
1382 | dev->type = ARPHRD_NONE;
1383 | dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
1384 | break;
1385 |
1386 | case IFF_TAP:
1387 | dev->netdev_ops = &tap_netdev_ops;
1388 | /* Ethernet TAP Device */
1389 | ether_setup(dev);
1390 | dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1391 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1392 |
1393 | eth_hw_addr_random(dev);
1394 |
1395 | break;
1396 | }
1397 |
1398 | dev->min_mtu = MIN_MTU;
1399 | dev->max_mtu = MAX_MTU - dev->hard_header_len;
1400 | }
1401 |
1402 | static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile)
1403 | {
1404 | struct sock *sk = tfile->socket.sk;
1405 |
1406 | return (tun->dev->flags & IFF_UP) && sock_writeable(sk);
1407 | }
1408 |
1409 | /* Character device part */
1410 |
1411 | /* Poll */
1412 | static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
1413 | {
1414 | struct tun_file *tfile = file->private_data;
1415 | struct tun_struct *tun = tun_get(tfile);
1416 | struct sock *sk;
1417 | __poll_t mask = 0;
1418 |
1419 | if (!tun)
1420 | return EPOLLERR;
1421 |
1422 | sk = tfile->socket.sk;
1423 |
1424 | tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
1425 |
1426 | poll_wait(file, sk_sleep(sk), wait);
1427 |
1428 | if (!ptr_ring_empty(&tfile->tx_ring))
1429 | mask |= EPOLLIN | EPOLLRDNORM;
1430 |
1431 | /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to
1432 | * guarantee EPOLLOUT to be raised by either here or
1433 | * tun_sock_write_space(). Then process could get notification
1434 | * after it writes to a down device and meets -EIO.
1435 | */
1436 | if (tun_sock_writeable(tun, tfile) ||
1437 | (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
1438 | tun_sock_writeable(tun, tfile)))
1439 | mask |= EPOLLOUT | EPOLLWRNORM;
1440 |
1441 | if (tun->dev->reg_state != NETREG_REGISTERED)
1442 | mask = EPOLLERR;
1443 |
1444 | tun_put(tun);
1445 | return mask;
1446 | }
1447 |
1448 | static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
1449 | size_t len,
1450 | const struct iov_iter *it)
1451 | {
1452 | struct sk_buff *skb;
1453 | size_t linear;
1454 | int err;
1455 | int i;
1456 |
1457 | if (it->nr_segs > MAX_SKB_FRAGS + 1)
1458 | return ERR_PTR(-ENOMEM);
1459 |
1460 | local_bh_disable();
1461 | skb = napi_get_frags(&tfile->napi);
1462 | local_bh_enable();
1463 | if (!skb)
1464 | return ERR_PTR(-ENOMEM);
1465 |
1466 | linear = iov_iter_single_seg_count(it);
1467 | err = __skb_grow(skb, linear);
1468 | if (err)
1469 | goto free;
1470 |
1471 | skb->len = len;
1472 | skb->data_len = len - linear;
1473 | skb->truesize += skb->data_len;
1474 |
1475 | for (i = 1; i < it->nr_segs; i++) {
1476 | struct page_frag *pfrag = ¤t->task_frag;
1477 | size_t fragsz = it->iov[i].iov_len;
1478 |
1479 | if (fragsz == 0 || fragsz > PAGE_SIZE) {
1480 | err = -EINVAL;
1481 | goto free;
1482 | }
1483 |
1484 | if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
1485 | err = -ENOMEM;
1486 | goto free;
1487 | }
1488 |
1489 | skb_fill_page_desc(skb, i - 1, pfrag->page,
1490 | pfrag->offset, fragsz);
1491 | page_ref_inc(pfrag->page);
1492 | pfrag->offset += fragsz;
1493 | }
1494 |
1495 | return skb;
1496 | free:
1497 | /* frees skb and all frags allocated with napi_alloc_frag() */
1498 | napi_free_frags(&tfile->napi);
1499 | return ERR_PTR(err);
1500 | }
1501 |
1502 | /* prepad is the amount to reserve at front. len is length after that.
1503 | * linear is a hint as to how much to copy (usually headers). */
1504 | static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
1505 | size_t prepad, size_t len,
1506 | size_t linear, int noblock)
1507 | {
1508 | struct sock *sk = tfile->socket.sk;
1509 | struct sk_buff *skb;
1510 | int err;
1511 |
1512 | /* Under a page? Don't bother with paged skb. */
1513 | if (prepad + len < PAGE_SIZE || !linear)
1514 | linear = len;
1515 |
1516 | skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1517 | &err, 0);
1518 | if (!skb)
1519 | return ERR_PTR(err);
1520 |
1521 | skb_reserve(skb, prepad);
1522 | skb_put(skb, linear);
1523 | skb->data_len = len - linear;
1524 | skb->len += len - linear;
1525 |
1526 | return skb;
1527 | }
1528 |
1529 | static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
1530 | struct sk_buff *skb, int more)
1531 | {
1532 | struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
1533 | struct sk_buff_head process_queue;
1534 | u32 rx_batched = tun->rx_batched;
1535 | bool rcv = false;
1536 |
1537 | if (!rx_batched || (!more && skb_queue_empty(queue))) {
1538 | local_bh_disable();
1539 | skb_record_rx_queue(skb, tfile->queue_index);
1540 | netif_receive_skb(skb);
1541 | local_bh_enable();
1542 | return;
1543 | }
1544 |
1545 | spin_lock(&queue->lock);
1546 | if (!more || skb_queue_len(queue) == rx_batched) {
1547 | __skb_queue_head_init(&process_queue);
1548 | skb_queue_splice_tail_init(queue, &process_queue);
1549 | rcv = true;
1550 | } else {
1551 | __skb_queue_tail(queue, skb);
1552 | }
1553 | spin_unlock(&queue->lock);
1554 |
1555 | if (rcv) {
1556 | struct sk_buff *nskb;
1557 |
1558 | local_bh_disable();
1559 | while ((nskb = __skb_dequeue(&process_queue))) {
1560 | skb_record_rx_queue(nskb, tfile->queue_index);
1561 | netif_receive_skb(nskb);
1562 | }
1563 | skb_record_rx_queue(skb, tfile->queue_index);
1564 | netif_receive_skb(skb);
1565 | local_bh_enable();
1566 | }
1567 | }
1568 |
1569 | static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
1570 | int len, int noblock, bool zerocopy)
1571 | {
1572 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
1573 | return false;
1574 |
1575 | if (tfile->socket.sk->sk_sndbuf != INT_MAX)
1576 | return false;
1577 |
1578 | if (!noblock)
1579 | return false;
1580 |
1581 | if (zerocopy)
1582 | return false;
1583 |
1584 | if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1585 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
1586 | return false;
1587 |
1588 | return true;
1589 | }
1590 |
1591 | static struct sk_buff *tun_build_skb(struct tun_struct *tun,
1592 | struct tun_file *tfile,
1593 | struct iov_iter *from,
1594 | struct virtio_net_hdr *hdr,
1595 | int len, int *skb_xdp)
1596 | {
1597 | struct page_frag *alloc_frag = ¤t->task_frag;
1598 | struct sk_buff *skb;
1599 | struct bpf_prog *xdp_prog;
1600 | int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1601 | unsigned int delta = 0;
1602 | char *buf;
1603 | size_t copied;
1604 | int err, pad = TUN_RX_PAD;
1605 |
1606 | rcu_read_lock();
1607 | xdp_prog = rcu_dereference(tun->xdp_prog);
1608 | if (xdp_prog)
1609 | pad += TUN_HEADROOM;
1610 | buflen += SKB_DATA_ALIGN(len + pad);
1611 | rcu_read_unlock();
1612 |
1613 | alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
1614 | if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
1615 | return ERR_PTR(-ENOMEM);
1616 |
1617 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1618 | copied = copy_page_from_iter(alloc_frag->page,
1619 | alloc_frag->offset + pad,
1620 | len, from);
1621 | if (copied != len)
1622 | return ERR_PTR(-EFAULT);
1623 |
1624 | /* There's a small window that XDP may be set after the check
1625 | * of xdp_prog above, this should be rare and for simplicity
1626 | * we do XDP on skb in case the headroom is not enough.
1627 | */
1628 | if (hdr->gso_type || !xdp_prog)
1629 | *skb_xdp = 1;
1630 | else
1631 | *skb_xdp = 0;
1632 |
1633 | local_bh_disable();
1634 | rcu_read_lock();
1635 | xdp_prog = rcu_dereference(tun->xdp_prog);
1636 | if (xdp_prog && !*skb_xdp) {
1637 | struct xdp_buff xdp;
1638 | void *orig_data;
1639 | u32 act;
1640 |
1641 | xdp.data_hard_start = buf;
1642 | xdp.data = buf + pad;
1643 | xdp_set_data_meta_invalid(&xdp);
1644 | xdp.data_end = xdp.data + len;
1645 | xdp.rxq = &tfile->xdp_rxq;
1646 | orig_data = xdp.data;
1647 | act = bpf_prog_run_xdp(xdp_prog, &xdp);
1648 |
1649 | switch (act) {
1650 | case XDP_REDIRECT:
1651 | get_page(alloc_frag->page);
1652 | alloc_frag->offset += buflen;
1653 | err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
1654 | xdp_do_flush_map();
1655 | if (err)
1656 | goto err_redirect;
1657 | rcu_read_unlock();
1658 | local_bh_enable();
1659 | return NULL;
1660 | case XDP_TX:
1661 | get_page(alloc_frag->page);
1662 | alloc_frag->offset += buflen;
1663 | if (tun_xdp_tx(tun->dev, &xdp) < 0)
1664 | goto err_redirect;
1665 | rcu_read_unlock();
1666 | local_bh_enable();
1667 | return NULL;
1668 | case XDP_PASS:
1669 | delta = orig_data - xdp.data;
1670 | len = xdp.data_end - xdp.data;
1671 | break;
1672 | default:
1673 | bpf_warn_invalid_xdp_action(act);
1674 | /* fall through */
1675 | case XDP_ABORTED:
1676 | trace_xdp_exception(tun->dev, xdp_prog, act);
1677 | /* fall through */
1678 | case XDP_DROP:
1679 | goto err_xdp;
1680 | }
1681 | }
1682 |
1683 | skb = build_skb(buf, buflen);
1684 | if (!skb) {
1685 | rcu_read_unlock();
1686 | local_bh_enable();
1687 | return ERR_PTR(-ENOMEM);
1688 | }
1689 |
1690 | skb_reserve(skb, pad - delta);
1691 | skb_put(skb, len);
1692 | skb_set_owner_w(skb, tfile->socket.sk);
1693 | get_page(alloc_frag->page);
1694 | alloc_frag->offset += buflen;
1695 |
1696 | rcu_read_unlock();
1697 | local_bh_enable();
1698 |
1699 | return skb;
1700 |
1701 | err_redirect:
1702 | put_page(alloc_frag->page);
1703 | err_xdp:
1704 | rcu_read_unlock();
1705 | local_bh_enable();
1706 | this_cpu_inc(tun->pcpu_stats->rx_dropped);
1707 | return NULL;
1708 | }
1709 |
1710 | /* Get packet from user space buffer */
1711 | static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1712 | void *msg_control, struct iov_iter *from,
1713 | int noblock, bool more)
1714 | {
1715 | struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
1716 | struct sk_buff *skb;
1717 | size_t total_len = iov_iter_count(from);
1718 | size_t len = total_len, align = tun->align, linear;
1719 | struct virtio_net_hdr gso = { 0 };
1720 | struct tun_pcpu_stats *stats;
1721 | int good_linear;
1722 | int copylen;
1723 | bool zerocopy = false;
1724 | int err;
1725 | u32 rxhash = 0;
1726 | int skb_xdp = 1;
1727 | bool frags = tun_napi_frags_enabled(tfile);
1728 |
1729 | if (!(tun->flags & IFF_NO_PI)) {
1730 | if (len < sizeof(pi))
1731 | return -EINVAL;
1732 | len -= sizeof(pi);
1733 |
1734 | if (!copy_from_iter_full(&pi, sizeof(pi), from))
1735 | return -EFAULT;
1736 | }
1737 |
1738 | if (tun->flags & IFF_VNET_HDR) {
1739 | int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
1740 |
1741 | if (len < vnet_hdr_sz)
1742 | return -EINVAL;
1743 | len -= vnet_hdr_sz;
1744 |
1745 | if (!copy_from_iter_full(&gso, sizeof(gso), from))
1746 | return -EFAULT;
1747 |
1748 | if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1749 | tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
1750 | gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
1751 |
1752 | if (tun16_to_cpu(tun, gso.hdr_len) > len)
1753 | return -EINVAL;
1754 | iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
1755 | }
1756 |
1757 | if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
1758 | align += NET_IP_ALIGN;
1759 | if (unlikely(len < ETH_HLEN ||
1760 | (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN)))
1761 | return -EINVAL;
1762 | }
1763 |
1764 | good_linear = SKB_MAX_HEAD(align);
1765 |
1766 | if (msg_control) {
1767 | struct iov_iter i = *from;
1768 |
1769 | /* There are 256 bytes to be copied in skb, so there is
1770 | * enough room for skb expand head in case it is used.
1771 | * The rest of the buffer is mapped from userspace.
1772 | */
1773 | copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN;
1774 | if (copylen > good_linear)
1775 | copylen = good_linear;
1776 | linear = copylen;
1777 | iov_iter_advance(&i, copylen);
1778 | if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
1779 | zerocopy = true;
1780 | }
1781 |
1782 | if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
1783 | /* For the packet that is not easy to be processed
1784 | * (e.g gso or jumbo packet), we will do it at after
1785 | * skb was created with generic XDP routine.
1786 | */
1787 | skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
1788 | if (IS_ERR(skb)) {
1789 | this_cpu_inc(tun->pcpu_stats->rx_dropped);
1790 | return PTR_ERR(skb);
1791 | }
1792 | if (!skb)
1793 | return total_len;
1794 | } else {
1795 | if (!zerocopy) {
1796 | copylen = len;
1797 | if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
1798 | linear = good_linear;
1799 | else
1800 | linear = tun16_to_cpu(tun, gso.hdr_len);
1801 | }
1802 |
1803 | if (frags) {
1804 | mutex_lock(&tfile->napi_mutex);
1805 | skb = tun_napi_alloc_frags(tfile, copylen, from);
1806 | /* tun_napi_alloc_frags() enforces a layout for the skb.
1807 | * If zerocopy is enabled, then this layout will be
1808 | * overwritten by zerocopy_sg_from_iter().
1809 | */
1810 | zerocopy = false;
1811 | } else {
1812 | skb = tun_alloc_skb(tfile, align, copylen, linear,
1813 | noblock);
1814 | }
1815 |
1816 | if (IS_ERR(skb)) {
1817 | if (PTR_ERR(skb) != -EAGAIN)
1818 | this_cpu_inc(tun->pcpu_stats->rx_dropped);
1819 | if (frags)
1820 | mutex_unlock(&tfile->napi_mutex);
1821 | return PTR_ERR(skb);
1822 | }
1823 |
1824 | if (zerocopy)
1825 | err = zerocopy_sg_from_iter(skb, from);
1826 | else
1827 | err = skb_copy_datagram_from_iter(skb, 0, from, len);
1828 |
1829 | if (err) {
1830 | err = -EFAULT;
1831 | drop:
1832 | this_cpu_inc(tun->pcpu_stats->rx_dropped);
1833 | kfree_skb(skb);
1834 | if (frags) {
1835 | tfile->napi.skb = NULL;
1836 | mutex_unlock(&tfile->napi_mutex);
1837 | }
1838 |
1839 | return err;
1840 | }
1841 | }
1842 |
1843 | if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
1844 | this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
1845 | kfree_skb(skb);
1846 | if (frags) {
1847 | tfile->napi.skb = NULL;
1848 | mutex_unlock(&tfile->napi_mutex);
1849 | }
1850 |
1851 | return -EINVAL;
1852 | }
1853 |
1854 | switch (tun->flags & TUN_TYPE_MASK) {
1855 | case IFF_TUN:
1856 | if (tun->flags & IFF_NO_PI) {
1857 | u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0;
1858 |
1859 | switch (ip_version) {
1860 | case 4:
1861 | pi.proto = htons(ETH_P_IP);
1862 | break;
1863 | case 6:
1864 | pi.proto = htons(ETH_P_IPV6);
1865 | break;
1866 | default:
1867 | this_cpu_inc(tun->pcpu_stats->rx_dropped);
1868 | kfree_skb(skb);
1869 | return -EINVAL;
1870 | }
1871 | }
1872 |
1873 | skb_reset_mac_header(skb);
1874 | skb->protocol = pi.proto;
1875 | skb->dev = tun->dev;
1876 | break;
1877 | case IFF_TAP:
1878 | if (!frags)
1879 | skb->protocol = eth_type_trans(skb, tun->dev);
1880 | break;
1881 | }
1882 |
1883 | /* copy skb_ubuf_info for callback when skb has no error */
1884 | if (zerocopy) {
1885 | skb_shinfo(skb)->destructor_arg = msg_control;
1886 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1887 | skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
1888 | } else if (msg_control) {
1889 | struct ubuf_info *uarg = msg_control;
1890 | uarg->callback(uarg, false);
1891 | }
1892 |
1893 | skb_reset_network_header(skb);
1894 | skb_probe_transport_header(skb, 0);
1895 |
1896 | if (skb_xdp) {
1897 | struct bpf_prog *xdp_prog;
1898 | int ret;
1899 |
1900 | local_bh_disable();
1901 | rcu_read_lock();
1902 | xdp_prog = rcu_dereference(tun->xdp_prog);
1903 | if (xdp_prog) {
1904 | ret = do_xdp_generic(xdp_prog, skb);
1905 | if (ret != XDP_PASS) {
1906 | rcu_read_unlock();
1907 | local_bh_enable();
1908 | if (frags) {
1909 | tfile->napi.skb = NULL;
1910 | mutex_unlock(&tfile->napi_mutex);
1911 | }
1912 | return total_len;
1913 | }
1914 | }
1915 | rcu_read_unlock();
1916 | local_bh_enable();
1917 | }
1918 |
1919 | /* Compute the costly rx hash only if needed for flow updates.
1920 | * We may get a very small possibility of OOO during switching, not
1921 | * worth to optimize.
1922 | */
1923 | if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
1924 | !tfile->detached)
1925 | rxhash = __skb_get_hash_symmetric(skb);
1926 |
1927 | rcu_read_lock();
1928 | if (unlikely(!(tun->dev->flags & IFF_UP))) {
1929 | err = -EIO;
1930 | rcu_read_unlock();
1931 | goto drop;
1932 | }
1933 |
1934 | if (frags) {
1935 | /* Exercise flow dissector code path. */
1936 | u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
1937 |
1938 | if (unlikely(headlen > skb_headlen(skb))) {
1939 | this_cpu_inc(tun->pcpu_stats->rx_dropped);
1940 | napi_free_frags(&tfile->napi);
1941 | rcu_read_unlock();
1942 | mutex_unlock(&tfile->napi_mutex);
1943 | WARN_ON(1);
1944 | return -ENOMEM;
1945 | }
1946 |
1947 | local_bh_disable();
1948 | napi_gro_frags(&tfile->napi);
1949 | local_bh_enable();
1950 | mutex_unlock(&tfile->napi_mutex);
1951 | } else if (tfile->napi_enabled) {
1952 | struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
1953 | int queue_len;
1954 |
1955 | spin_lock_bh(&queue->lock);
1956 | __skb_queue_tail(queue, skb);
1957 | queue_len = skb_queue_len(queue);
1958 | spin_unlock(&queue->lock);
1959 |
1960 | if (!more || queue_len > NAPI_POLL_WEIGHT)
1961 | napi_schedule(&tfile->napi);
1962 |
1963 | local_bh_enable();
1964 | } else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
1965 | tun_rx_batched(tun, tfile, skb, more);
1966 | } else {
1967 | netif_rx_ni(skb);
1968 | }
1969 | rcu_read_unlock();
1970 |
1971 | stats = get_cpu_ptr(tun->pcpu_stats);
1972 | u64_stats_update_begin(&stats->syncp);
1973 | stats->rx_packets++;
1974 | stats->rx_bytes += len;
1975 | u64_stats_update_end(&stats->syncp);
1976 | put_cpu_ptr(stats);
1977 |
1978 | if (rxhash)
1979 | tun_flow_update(tun, rxhash, tfile);
1980 |
1981 | return total_len;
1982 | }
1983 |
1984 | static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
1985 | {
1986 | struct file *file = iocb->ki_filp;
1987 | struct tun_file *tfile = file->private_data;
1988 | struct tun_struct *tun = tun_get(tfile);
1989 | ssize_t result;
1990 |
1991 | if (!tun)
1992 | return -EBADFD;
1993 |
1994 | result = tun_get_user(tun, tfile, NULL, from,
1995 | file->f_flags & O_NONBLOCK, false);
1996 |
1997 | tun_put(tun);
1998 | return result;
1999 | }
2000 |
2001 | static ssize_t tun_put_user_xdp(struct tun_struct *tun,
2002 | struct tun_file *tfile,
2003 | struct xdp_frame *xdp_frame,
2004 | struct iov_iter *iter)
2005 | {
2006 | int vnet_hdr_sz = 0;
2007 | size_t size = xdp_frame->len;
2008 | struct tun_pcpu_stats *stats;
2009 | size_t ret;
2010 |
2011 | if (tun->flags & IFF_VNET_HDR) {
2012 | struct virtio_net_hdr gso = { 0 };
2013 |
2014 | vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
2015 | if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
2016 | return -EINVAL;
2017 | if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
2018 | sizeof(gso)))
2019 | return -EFAULT;
2020 | iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
2021 | }
2022 |
2023 | ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
2024 |
2025 | stats = get_cpu_ptr(tun->pcpu_stats);
2026 | u64_stats_update_begin(&stats->syncp);
2027 | stats->tx_packets++;
2028 | stats->tx_bytes += ret;
2029 | u64_stats_update_end(&stats->syncp);
2030 | put_cpu_ptr(tun->pcpu_stats);
2031 |
2032 | return ret;
2033 | }
2034 |
2035 | /* Put packet to the user space buffer */
2036 | static ssize_t tun_put_user(struct tun_struct *tun,
2037 | struct tun_file *tfile,
2038 | struct sk_buff *skb,
2039 | struct iov_iter *iter)
2040 | {
2041 | struct tun_pi pi = { 0, skb->protocol };
2042 | struct tun_pcpu_stats *stats;
2043 | ssize_t total;
2044 | int vlan_offset = 0;
2045 | int vlan_hlen = 0;
2046 | int vnet_hdr_sz = 0;
2047 |
2048 | if (skb_vlan_tag_present(skb))
2049 | vlan_hlen = VLAN_HLEN;
2050 |
2051 | if (tun->flags & IFF_VNET_HDR)
2052 | vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
2053 |
2054 | total = skb->len + vlan_hlen + vnet_hdr_sz;
2055 |
2056 | if (!(tun->flags & IFF_NO_PI)) {
2057 | if (iov_iter_count(iter) < sizeof(pi))
2058 | return -EINVAL;
2059 |
2060 | total += sizeof(pi);
2061 | if (iov_iter_count(iter) < total) {
2062 | /* Packet will be striped */
2063 | pi.flags |= TUN_PKT_STRIP;
2064 | }
2065 |
2066 | if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi))
2067 | return -EFAULT;
2068 | }
2069 |
2070 | if (vnet_hdr_sz) {
2071 | struct virtio_net_hdr gso;
2072 |
2073 | if (iov_iter_count(iter) < vnet_hdr_sz)
2074 | return -EINVAL;
2075 |
2076 | if (virtio_net_hdr_from_skb(skb, &gso,
2077 | tun_is_little_endian(tun), true,
2078 | vlan_hlen)) {
2079 | struct skb_shared_info *sinfo = skb_shinfo(skb);
2080 | pr_err("unexpected GSO type: "
2081 | "0x%x, gso_size %d, hdr_len %d\n",
2082 | sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
2083 | tun16_to_cpu(tun, gso.hdr_len));
2084 | print_hex_dump(KERN_ERR, "tun: ",
2085 | DUMP_PREFIX_NONE,
2086 | 16, 1, skb->head,
2087 | min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
2088 | WARN_ON_ONCE(1);
2089 | return -EINVAL;
2090 | }
2091 |
2092 | if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
2093 | return -EFAULT;
2094 |
2095 | iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
2096 | }
2097 |
2098 | if (vlan_hlen) {
2099 | int ret;
2100 | struct veth veth;
2101 |
2102 | veth.h_vlan_proto = skb->vlan_proto;
2103 | veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
2104 |
2105 | vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
2106 |
2107 | ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset);
2108 | if (ret || !iov_iter_count(iter))
2109 | goto done;
2110 |
2111 | ret = copy_to_iter(&veth, sizeof(veth), iter);
2112 | if (ret != sizeof(veth) || !iov_iter_count(iter))
2113 | goto done;
2114 | }
2115 |
2116 | skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset);
2117 |
2118 | done:
2119 | /* caller is in process context, */
2120 | stats = get_cpu_ptr(tun->pcpu_stats);
2121 | u64_stats_update_begin(&stats->syncp);
2122 | stats->tx_packets++;
2123 | stats->tx_bytes += skb->len + vlan_hlen;
2124 | u64_stats_update_end(&stats->syncp);
2125 | put_cpu_ptr(tun->pcpu_stats);
2126 |
2127 | return total;
2128 | }
2129 |
2130 | static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
2131 | {
2132 | DECLARE_WAITQUEUE(wait, current);
2133 | void *ptr = NULL;
2134 | int error = 0;
2135 |
2136 | ptr = ptr_ring_consume(&tfile->tx_ring);
2137 | if (ptr)
2138 | goto out;
2139 | if (noblock) {
2140 | error = -EAGAIN;
2141 | goto out;
2142 | }
2143 |
2144 | add_wait_queue(&tfile->wq.wait, &wait);
2145 |
2146 | while (1) {
2147 | set_current_state(TASK_INTERRUPTIBLE);
2148 | ptr = ptr_ring_consume(&tfile->tx_ring);
2149 | if (ptr)
2150 | break;
2151 | if (signal_pending(current)) {
2152 | error = -ERESTARTSYS;
2153 | break;
2154 | }
2155 | if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) {
2156 | error = -EFAULT;
2157 | break;
2158 | }
2159 |
2160 | schedule();
2161 | }
2162 |
2163 | __set_current_state(TASK_RUNNING);
2164 | remove_wait_queue(&tfile->wq.wait, &wait);
2165 |
2166 | out:
2167 | *err = error;
2168 | return ptr;
2169 | }
2170 |
2171 | static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
2172 | struct iov_iter *to,
2173 | int noblock, void *ptr)
2174 | {
2175 | ssize_t ret;
2176 | int err;
2177 |
2178 | tun_debug(KERN_INFO, tun, "tun_do_read\n");
2179 |
2180 | if (!iov_iter_count(to)) {
2181 | tun_ptr_free(ptr);
2182 | return 0;
2183 | }
2184 |
2185 | if (!ptr) {
2186 | /* Read frames from ring */
2187 | ptr = tun_ring_recv(tfile, noblock, &err);
2188 | if (!ptr)
2189 | return err;
2190 | }
2191 |
2192 | if (tun_is_xdp_frame(ptr)) {
2193 | struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
2194 |
2195 | ret = tun_put_user_xdp(tun, tfile, xdpf, to);
2196 | xdp_return_frame(xdpf);
2197 | } else {
2198 | struct sk_buff *skb = ptr;
2199 |
2200 | ret = tun_put_user(tun, tfile, skb, to);
2201 | if (unlikely(ret < 0))
2202 | kfree_skb(skb);
2203 | else
2204 | consume_skb(skb);
2205 | }
2206 |
2207 | return ret;
2208 | }
2209 |
2210 | static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
2211 | {
2212 | struct file *file = iocb->ki_filp;
2213 | struct tun_file *tfile = file->private_data;
2214 | struct tun_struct *tun = tun_get(tfile);
2215 | ssize_t len = iov_iter_count(to), ret;
2216 |
2217 | if (!tun)
2218 | return -EBADFD;
2219 | ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL);
2220 | ret = min_t(ssize_t, ret, len);
2221 | if (ret > 0)
2222 | iocb->ki_pos = ret;
2223 | tun_put(tun);
2224 | return ret;
2225 | }
2226 |
2227 | static void tun_prog_free(struct rcu_head *rcu)
2228 | {
2229 | struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
2230 |
2231 | bpf_prog_destroy(prog->prog);
2232 | kfree(prog);
2233 | }
2234 |
2235 | static int __tun_set_ebpf(struct tun_struct *tun,
2236 | struct tun_prog __rcu **prog_p,
2237 | struct bpf_prog *prog)
2238 | {
2239 | struct tun_prog *old, *new = NULL;
2240 |
2241 | if (prog) {
2242 | new = kmalloc(sizeof(*new), GFP_KERNEL);
2243 | if (!new)
2244 | return -ENOMEM;
2245 | new->prog = prog;
2246 | }
2247 |
2248 | spin_lock_bh(&tun->lock);
2249 | old = rcu_dereference_protected(*prog_p,
2250 | lockdep_is_held(&tun->lock));
2251 | rcu_assign_pointer(*prog_p, new);
2252 | spin_unlock_bh(&tun->lock);
2253 |
2254 | if (old)
2255 | call_rcu(&old->rcu, tun_prog_free);
2256 |
2257 | return 0;
2258 | }
2259 |
2260 | static void tun_free_netdev(struct net_device *dev)
2261 | {
2262 | struct tun_struct *tun = netdev_priv(dev);
2263 |
2264 | BUG_ON(!(list_empty(&tun->disabled)));
2265 | free_percpu(tun->pcpu_stats);
2266 | tun_flow_uninit(tun);
2267 | security_tun_dev_free_security(tun->security);
2268 | __tun_set_ebpf(tun, &tun->steering_prog, NULL);
2269 | __tun_set_ebpf(tun, &tun->filter_prog, NULL);
2270 | }
2271 |
2272 | static void tun_setup(struct net_device *dev)
2273 | {
2274 | struct tun_struct *tun = netdev_priv(dev);
2275 |
2276 | tun->owner = INVALID_UID;
2277 | tun->group = INVALID_GID;
2278 | tun_default_link_ksettings(dev, &tun->link_ksettings);
2279 |
2280 | dev->ethtool_ops = &tun_ethtool_ops;
2281 | dev->needs_free_netdev = true;
2282 | dev->priv_destructor = tun_free_netdev;
2283 | /* We prefer our own queue length */
2284 | dev->tx_queue_len = TUN_READQ_SIZE;
2285 | }
2286 |
2287 | /* Trivial set of netlink ops to allow deleting tun or tap
2288 | * device with netlink.
2289 | */
2290 | static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
2291 | struct netlink_ext_ack *extack)
2292 | {
2293 | NL_SET_ERR_MSG(extack,
2294 | "tun/tap creation via rtnetlink is not supported.");
2295 | return -EOPNOTSUPP;
2296 | }
2297 |
2298 | static size_t tun_get_size(const struct net_device *dev)
2299 | {
2300 | BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
2301 | BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
2302 |
2303 | return nla_total_size(sizeof(uid_t)) + /* OWNER */
2304 | nla_total_size(sizeof(gid_t)) + /* GROUP */
2305 | nla_total_size(sizeof(u8)) + /* TYPE */
2306 | nla_total_size(sizeof(u8)) + /* PI */
2307 | nla_total_size(sizeof(u8)) + /* VNET_HDR */
2308 | nla_total_size(sizeof(u8)) + /* PERSIST */
2309 | nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
2310 | nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
2311 | nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
2312 | 0;
2313 | }
2314 |
2315 | static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
2316 | {
2317 | struct tun_struct *tun = netdev_priv(dev);
2318 |
2319 | if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
2320 | goto nla_put_failure;
2321 | if (uid_valid(tun->owner) &&
2322 | nla_put_u32(skb, IFLA_TUN_OWNER,
2323 | from_kuid_munged(current_user_ns(), tun->owner)))
2324 | goto nla_put_failure;
2325 | if (gid_valid(tun->group) &&
2326 | nla_put_u32(skb, IFLA_TUN_GROUP,
2327 | from_kgid_munged(current_user_ns(), tun->group)))
2328 | goto nla_put_failure;
2329 | if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
2330 | goto nla_put_failure;
2331 | if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
2332 | goto nla_put_failure;
2333 | if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
2334 | goto nla_put_failure;
2335 | if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
2336 | !!(tun->flags & IFF_MULTI_QUEUE)))
2337 | goto nla_put_failure;
2338 | if (tun->flags & IFF_MULTI_QUEUE) {
2339 | if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
2340 | goto nla_put_failure;
2341 | if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
2342 | tun->numdisabled))
2343 | goto nla_put_failure;
2344 | }
2345 |
2346 | return 0;
2347 |
2348 | nla_put_failure:
2349 | return -EMSGSIZE;
2350 | }
2351 |
2352 | static struct rtnl_link_ops tun_link_ops __read_mostly = {
2353 | .kind = DRV_NAME,
2354 | .priv_size = sizeof(struct tun_struct),
2355 | .setup = tun_setup,
2356 | .validate = tun_validate,
2357 | .get_size = tun_get_size,
2358 | .fill_info = tun_fill_info,
2359 | };
2360 |
2361 | static void tun_sock_write_space(struct sock *sk)
2362 | {
2363 | struct tun_file *tfile;
2364 | wait_queue_head_t *wqueue;
2365 |
2366 | if (!sock_writeable(sk))
2367 | return;
2368 |
2369 | if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
2370 | return;
2371 |
2372 | wqueue = sk_sleep(sk);
2373 | if (wqueue && waitqueue_active(wqueue))
2374 | wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
2375 | EPOLLWRNORM | EPOLLWRBAND);
2376 |
2377 | tfile = container_of(sk, struct tun_file, sk);
2378 | kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
2379 | }
2380 |
2381 | static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
2382 | {
2383 | int ret;
2384 | struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2385 | struct tun_struct *tun = tun_get(tfile);
2386 |
2387 | if (!tun)
2388 | return -EBADFD;
2389 |
2390 | ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
2391 | m->msg_flags & MSG_DONTWAIT,
2392 | m->msg_flags & MSG_MORE);
2393 | tun_put(tun);
2394 | return ret;
2395 | }
2396 |
2397 | static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2398 | int flags)
2399 | {
2400 | struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2401 | struct tun_struct *tun = tun_get(tfile);
2402 | void *ptr = m->msg_control;
2403 | int ret;
2404 |
2405 | if (!tun) {
2406 | ret = -EBADFD;
2407 | goto out_free;
2408 | }
2409 |
2410 | if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
2411 | ret = -EINVAL;
2412 | goto out_put_tun;
2413 | }
2414 | if (flags & MSG_ERRQUEUE) {
2415 | ret = sock_recv_errqueue(sock->sk, m, total_len,
2416 | SOL_PACKET, TUN_TX_TIMESTAMP);
2417 | goto out;
2418 | }
2419 | ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
2420 | if (ret > (ssize_t)total_len) {
2421 | m->msg_flags |= MSG_TRUNC;
2422 | ret = flags & MSG_TRUNC ? ret : total_len;
2423 | }
2424 | out:
2425 | tun_put(tun);
2426 | return ret;
2427 |
2428 | out_put_tun:
2429 | tun_put(tun);
2430 | out_free:
2431 | tun_ptr_free(ptr);
2432 | return ret;
2433 | }
2434 |
2435 | static int tun_ptr_peek_len(void *ptr)
2436 | {
2437 | if (likely(ptr)) {
2438 | if (tun_is_xdp_frame(ptr)) {
2439 | struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
2440 |
2441 | return xdpf->len;
2442 | }
2443 | return __skb_array_len_with_tag(ptr);
2444 | } else {
2445 | return 0;
2446 | }
2447 | }
2448 |
2449 | static int tun_peek_len(struct socket *sock)
2450 | {
2451 | struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2452 | struct tun_struct *tun;
2453 | int ret = 0;
2454 |
2455 | tun = tun_get(tfile);
2456 | if (!tun)
2457 | return 0;
2458 |
2459 | ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
2460 | tun_put(tun);
2461 |
2462 | return ret;
2463 | }
2464 |
2465 | /* Ops structure to mimic raw sockets with tun */
2466 | static const struct proto_ops tun_socket_ops = {
2467 | .peek_len = tun_peek_len,
2468 | .sendmsg = tun_sendmsg,
2469 | .recvmsg = tun_recvmsg,
2470 | };
2471 |
2472 | static struct proto tun_proto = {
2473 | .name = "tun",
2474 | .owner = THIS_MODULE,
2475 | .obj_size = sizeof(struct tun_file),
2476 | };
2477 |
2478 | static int tun_flags(struct tun_struct *tun)
2479 | {
2480 | return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP);
2481 | }
2482 |
2483 | static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr,
2484 | char *buf)
2485 | {
2486 | struct tun_struct *tun = netdev_priv(to_net_dev(dev));
2487 | return sprintf(buf, "0x%x\n", tun_flags(tun));
2488 | }
2489 |
2490 | static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr,
2491 | char *buf)
2492 | {
2493 | struct tun_struct *tun = netdev_priv(to_net_dev(dev));
2494 | return uid_valid(tun->owner)?
2495 | sprintf(buf, "%u\n",
2496 | from_kuid_munged(current_user_ns(), tun->owner)):
2497 | sprintf(buf, "-1\n");
2498 | }
2499 |
2500 | static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr,
2501 | char *buf)
2502 | {
2503 | struct tun_struct *tun = netdev_priv(to_net_dev(dev));
2504 | return gid_valid(tun->group) ?
2505 | sprintf(buf, "%u\n",
2506 | from_kgid_munged(current_user_ns(), tun->group)):
2507 | sprintf(buf, "-1\n");
2508 | }
2509 |
2510 | static DEVICE_ATTR(tun_flags, 0444, tun_show_flags, NULL);
2511 | static DEVICE_ATTR(owner, 0444, tun_show_owner, NULL);
2512 | static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
2513 |
2514 | static struct attribute *tun_dev_attrs[] = {
2515 | &dev_attr_tun_flags.attr,
2516 | &dev_attr_owner.attr,
2517 | &dev_attr_group.attr,
2518 | NULL
2519 | };
2520 |
2521 | static const struct attribute_group tun_attr_group = {
2522 | .attrs = tun_dev_attrs
2523 | };
2524 |
2525 | static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
2526 | {
2527 | struct tun_struct *tun;
2528 | struct tun_file *tfile = file->private_data;
2529 | struct net_device *dev;
2530 | int err;
2531 |
2532 | if (tfile->detached)
2533 | return -EINVAL;
2534 |
2535 | if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) {
2536 | if (!capable(CAP_NET_ADMIN))
2537 | return -EPERM;
2538 |
2539 | if (!(ifr->ifr_flags & IFF_NAPI) ||
2540 | (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP)
2541 | return -EINVAL;
2542 | }
2543 |
2544 | dev = __dev_get_by_name(net, ifr->ifr_name);
2545 | if (dev) {
2546 | if (ifr->ifr_flags & IFF_TUN_EXCL)
2547 | return -EBUSY;
2548 | if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
2549 | tun = netdev_priv(dev);
2550 | else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
2551 | tun = netdev_priv(dev);
2552 | else
2553 | return -EINVAL;
2554 |
2555 | if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) !=
2556 | !!(tun->flags & IFF_MULTI_QUEUE))
2557 | return -EINVAL;
2558 |
2559 | if (tun_not_capable(tun))
2560 | return -EPERM;
2561 | err = security_tun_dev_open(tun->security);
2562 | if (err < 0)
2563 | return err;
2564 |
2565 | err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
2566 | ifr->ifr_flags & IFF_NAPI,
2567 | ifr->ifr_flags & IFF_NAPI_FRAGS, true);
2568 | if (err < 0)
2569 | return err;
2570 |
2571 | if (tun->flags & IFF_MULTI_QUEUE &&
2572 | (tun->numqueues + tun->numdisabled > 1)) {
2573 | /* One or more queue has already been attached, no need
2574 | * to initialize the device again.
2575 | */
2576 | netdev_state_change(dev);
2577 | return 0;
2578 | }
2579 |
2580 | tun->flags = (tun->flags & ~TUN_FEATURES) |
2581 | (ifr->ifr_flags & TUN_FEATURES);
2582 |
2583 | netdev_state_change(dev);
2584 | } else {
2585 | char *name;
2586 | unsigned long flags = 0;
2587 | int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
2588 | MAX_TAP_QUEUES : 1;
2589 |
2590 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2591 | return -EPERM;
2592 | err = security_tun_dev_create();
2593 | if (err < 0)
2594 | return err;
2595 |
2596 | /* Set dev type */
2597 | if (ifr->ifr_flags & IFF_TUN) {
2598 | /* TUN device */
2599 | flags |= IFF_TUN;
2600 | name = "tun%d";
2601 | } else if (ifr->ifr_flags & IFF_TAP) {
2602 | /* TAP device */
2603 | flags |= IFF_TAP;
2604 | name = "tap%d";
2605 | } else
2606 | return -EINVAL;
2607 |
2608 | if (*ifr->ifr_name)
2609 | name = ifr->ifr_name;
2610 |
2611 | dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
2612 | NET_NAME_UNKNOWN, tun_setup, queues,
2613 | queues);
2614 |
2615 | if (!dev)
2616 | return -ENOMEM;
2617 | err = dev_get_valid_name(net, dev, name);
2618 | if (err < 0)
2619 | goto err_free_dev;
2620 |
2621 | dev_net_set(dev, net);
2622 | dev->rtnl_link_ops = &tun_link_ops;
2623 | dev->ifindex = tfile->ifindex;
2624 | dev->sysfs_groups[0] = &tun_attr_group;
2625 |
2626 | tun = netdev_priv(dev);
2627 | tun->dev = dev;
2628 | tun->flags = flags;
2629 | tun->txflt.count = 0;
2630 | tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
2631 |
2632 | tun->align = NET_SKB_PAD;
2633 | tun->filter_attached = false;
2634 | tun->sndbuf = tfile->socket.sk->sk_sndbuf;
2635 | tun->rx_batched = 0;
2636 | RCU_INIT_POINTER(tun->steering_prog, NULL);
2637 |
2638 | tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
2639 | if (!tun->pcpu_stats) {
2640 | err = -ENOMEM;
2641 | goto err_free_dev;
2642 | }
2643 |
2644 | spin_lock_init(&tun->lock);
2645 |
2646 | err = security_tun_dev_alloc_security(&tun->security);
2647 | if (err < 0)
2648 | goto err_free_stat;
2649 |
2650 | tun_net_init(dev);
2651 | tun_flow_init(tun);
2652 |
2653 | dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
2654 | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
2655 | NETIF_F_HW_VLAN_STAG_TX;
2656 | dev->features = dev->hw_features | NETIF_F_LLTX;
2657 | dev->vlan_features = dev->features &
2658 | ~(NETIF_F_HW_VLAN_CTAG_TX |
2659 | NETIF_F_HW_VLAN_STAG_TX);
2660 |
2661 | tun->flags = (tun->flags & ~TUN_FEATURES) |
2662 | (ifr->ifr_flags & TUN_FEATURES);
2663 |
2664 | INIT_LIST_HEAD(&tun->disabled);
2665 | err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
2666 | ifr->ifr_flags & IFF_NAPI_FRAGS, false);
2667 | if (err < 0)
2668 | goto err_free_flow;
2669 |
2670 | err = register_netdevice(tun->dev);
2671 | if (err < 0)
2672 | goto err_detach;
2673 | /* free_netdev() won't check refcnt, to aovid race
2674 | * with dev_put() we need publish tun after registration.
2675 | */
2676 | rcu_assign_pointer(tfile->tun, tun);
2677 | }
2678 |
2679 | netif_carrier_on(tun->dev);
2680 |
2681 | tun_debug(KERN_INFO, tun, "tun_set_iff\n");
2682 |
2683 | /* Make sure persistent devices do not get stuck in
2684 | * xoff state.
2685 | */
2686 | if (netif_running(tun->dev))
2687 | netif_tx_wake_all_queues(tun->dev);
2688 |
2689 | strcpy(ifr->ifr_name, tun->dev->name);
2690 | return 0;
2691 |
2692 | err_detach:
2693 | tun_detach_all(dev);
2694 | /* register_netdevice() already called tun_free_netdev() */
2695 | goto err_free_dev;
2696 |
2697 | err_free_flow:
2698 | tun_flow_uninit(tun);
2699 | security_tun_dev_free_security(tun->security);
2700 | err_free_stat:
2701 | free_percpu(tun->pcpu_stats);
2702 | err_free_dev:
2703 | free_netdev(dev);
2704 | return err;
2705 | }
2706 |
2707 | static void tun_get_iff(struct net *net, struct tun_struct *tun,
2708 | struct ifreq *ifr)
2709 | {
2710 | tun_debug(KERN_INFO, tun, "tun_get_iff\n");
2711 |
2712 | strcpy(ifr->ifr_name, tun->dev->name);
2713 |
2714 | ifr->ifr_flags = tun_flags(tun);
2715 |
2716 | }
2717 |
2718 | /* This is like a cut-down ethtool ops, except done via tun fd so no
2719 | * privs required. */
2720 | static int set_offload(struct tun_struct *tun, unsigned long arg)
2721 | {
2722 | netdev_features_t features = 0;
2723 |
2724 | if (arg & TUN_F_CSUM) {
2725 | features |= NETIF_F_HW_CSUM;
2726 | arg &= ~TUN_F_CSUM;
2727 |
2728 | if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
2729 | if (arg & TUN_F_TSO_ECN) {
2730 | features |= NETIF_F_TSO_ECN;
2731 | arg &= ~TUN_F_TSO_ECN;
2732 | }
2733 | if (arg & TUN_F_TSO4)
2734 | features |= NETIF_F_TSO;
2735 | if (arg & TUN_F_TSO6)
2736 | features |= NETIF_F_TSO6;
2737 | arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
2738 | }
2739 |
2740 | arg &= ~TUN_F_UFO;
2741 | }
2742 |
2743 | /* This gives the user a way to test for new features in future by
2744 | * trying to set them. */
2745 | if (arg)
2746 | return -EINVAL;
2747 |
2748 | tun->set_features = features;
2749 | tun->dev->wanted_features &= ~TUN_USER_FEATURES;
2750 | tun->dev->wanted_features |= features;
2751 | netdev_update_features(tun->dev);
2752 |
2753 | return 0;
2754 | }
2755 |
2756 | static void tun_detach_filter(struct tun_struct *tun, int n)
2757 | {
2758 | int i;
2759 | struct tun_file *tfile;
2760 |
2761 | for (i = 0; i < n; i++) {
2762 | tfile = rtnl_dereference(tun->tfiles[i]);
2763 | lock_sock(tfile->socket.sk);
2764 | sk_detach_filter(tfile->socket.sk);
2765 | release_sock(tfile->socket.sk);
2766 | }
2767 |
2768 | tun->filter_attached = false;
2769 | }
2770 |
2771 | static int tun_attach_filter(struct tun_struct *tun)
2772 | {
2773 | int i, ret = 0;
2774 | struct tun_file *tfile;
2775 |
2776 | for (i = 0; i < tun->numqueues; i++) {
2777 | tfile = rtnl_dereference(tun->tfiles[i]);
2778 | lock_sock(tfile->socket.sk);
2779 | ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
2780 | release_sock(tfile->socket.sk);
2781 | if (ret) {
2782 | tun_detach_filter(tun, i);
2783 | return ret;
2784 | }
2785 | }
2786 |
2787 | tun->filter_attached = true;
2788 | return ret;
2789 | }
2790 |
2791 | static void tun_set_sndbuf(struct tun_struct *tun)
2792 | {
2793 | struct tun_file *tfile;
2794 | int i;
2795 |
2796 | for (i = 0; i < tun->numqueues; i++) {
2797 | tfile = rtnl_dereference(tun->tfiles[i]);
2798 | tfile->socket.sk->sk_sndbuf = tun->sndbuf;
2799 | }
2800 | }
2801 |
2802 | static int tun_set_queue(struct file *file, struct ifreq *ifr)
2803 | {
2804 | struct tun_file *tfile = file->private_data;
2805 | struct tun_struct *tun;
2806 | int ret = 0;
2807 |
2808 | rtnl_lock();
2809 |
2810 | if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
2811 | tun = tfile->detached;
2812 | if (!tun) {
2813 | ret = -EINVAL;
2814 | goto unlock;
2815 | }
2816 | ret = security_tun_dev_attach_queue(tun->security);
2817 | if (ret < 0)
2818 | goto unlock;
2819 | ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI,
2820 | tun->flags & IFF_NAPI_FRAGS, true);
2821 | } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
2822 | tun = rtnl_dereference(tfile->tun);
2823 | if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
2824 | ret = -EINVAL;
2825 | else
2826 | __tun_detach(tfile, false);
2827 | } else
2828 | ret = -EINVAL;
2829 |
2830 | if (ret >= 0)
2831 | netdev_state_change(tun->dev);
2832 |
2833 | unlock:
2834 | rtnl_unlock();
2835 | return ret;
2836 | }
2837 |
2838 | static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
2839 | void __user *data)
2840 | {
2841 | struct bpf_prog *prog;
2842 | int fd;
2843 |
2844 | if (copy_from_user(&fd, data, sizeof(fd)))
2845 | return -EFAULT;
2846 |
2847 | if (fd == -1) {
2848 | prog = NULL;
2849 | } else {
2850 | prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
2851 | if (IS_ERR(prog))
2852 | return PTR_ERR(prog);
2853 | }
2854 |
2855 | return __tun_set_ebpf(tun, prog_p, prog);
2856 | }
2857 |
2858 | static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2859 | unsigned long arg, int ifreq_len)
2860 | {
2861 | struct tun_file *tfile = file->private_data;
2862 | struct net *net = sock_net(&tfile->sk);
2863 | struct tun_struct *tun;
2864 | void __user* argp = (void __user*)arg;
2865 | struct ifreq ifr;
2866 | kuid_t owner;
2867 | kgid_t group;
2868 | int sndbuf;
2869 | int vnet_hdr_sz;
2870 | unsigned int ifindex;
2871 | int le;
2872 | int ret;
2873 | bool do_notify = false;
2874 |
2875 | if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
2876 | (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
2877 | if (copy_from_user(&ifr, argp, ifreq_len))
2878 | return -EFAULT;
2879 | } else {
2880 | memset(&ifr, 0, sizeof(ifr));
2881 | }
2882 | if (cmd == TUNGETFEATURES) {
2883 | /* Currently this just means: "what IFF flags are valid?".
2884 | * This is needed because we never checked for invalid flags on
2885 | * TUNSETIFF.
2886 | */
2887 | return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
2888 | (unsigned int __user*)argp);
2889 | } else if (cmd == TUNSETQUEUE) {
2890 | return tun_set_queue(file, &ifr);
2891 | } else if (cmd == SIOCGSKNS) {
2892 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2893 | return -EPERM;
2894 | return open_related_ns(&net->ns, get_net_ns);
2895 | }
2896 |
2897 | ret = 0;
2898 | rtnl_lock();
2899 |
2900 | tun = tun_get(tfile);
2901 | if (cmd == TUNSETIFF) {
2902 | ret = -EEXIST;
2903 | if (tun)
2904 | goto unlock;
2905 |
2906 | ifr.ifr_name[IFNAMSIZ-1] = '\0';
2907 |
2908 | ret = tun_set_iff(net, file, &ifr);
2909 |
2910 | if (ret)
2911 | goto unlock;
2912 |
2913 | if (copy_to_user(argp, &ifr, ifreq_len))
2914 | ret = -EFAULT;
2915 | goto unlock;
2916 | }
2917 | if (cmd == TUNSETIFINDEX) {
2918 | ret = -EPERM;
2919 | if (tun)
2920 | goto unlock;
2921 |
2922 | ret = -EFAULT;
2923 | if (copy_from_user(&ifindex, argp, sizeof(ifindex)))
2924 | goto unlock;
2925 |
2926 | ret = 0;
2927 | tfile->ifindex = ifindex;
2928 | goto unlock;
2929 | }
2930 |
2931 | ret = -EBADFD;
2932 | if (!tun)
2933 | goto unlock;
2934 |
2935 | tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd);
2936 |
2937 | ret = 0;
2938 | switch (cmd) {
2939 | case TUNGETIFF:
2940 | tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
2941 |
2942 | if (tfile->detached)
2943 | ifr.ifr_flags |= IFF_DETACH_QUEUE;
2944 | if (!tfile->socket.sk->sk_filter)
2945 | ifr.ifr_flags |= IFF_NOFILTER;
2946 |
2947 | if (copy_to_user(argp, &ifr, ifreq_len))
2948 | ret = -EFAULT;
2949 | break;
2950 |
2951 | case TUNSETNOCSUM:
2952 | /* Disable/Enable checksum */
2953 |
2954 | /* [unimplemented] */
2955 | tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n",
2956 | arg ? "disabled" : "enabled");
2957 | break;
2958 |
2959 | case TUNSETPERSIST:
2960 | /* Disable/Enable persist mode. Keep an extra reference to the
2961 | * module to prevent the module being unprobed.
2962 | */
2963 | if (arg && !(tun->flags & IFF_PERSIST)) {
2964 | tun->flags |= IFF_PERSIST;
2965 | __module_get(THIS_MODULE);
2966 | do_notify = true;
2967 | }
2968 | if (!arg && (tun->flags & IFF_PERSIST)) {
2969 | tun->flags &= ~IFF_PERSIST;
2970 | module_put(THIS_MODULE);
2971 | do_notify = true;
2972 | }
2973 |
2974 | tun_debug(KERN_INFO, tun, "persist %s\n",
2975 | arg ? "enabled" : "disabled");
2976 | break;
2977 |
2978 | case TUNSETOWNER:
2979 | /* Set owner of the device */
2980 | owner = make_kuid(current_user_ns(), arg);
2981 | if (!uid_valid(owner)) {
2982 | ret = -EINVAL;
2983 | break;
2984 | }
2985 | tun->owner = owner;
2986 | do_notify = true;
2987 | tun_debug(KERN_INFO, tun, "owner set to %u\n",
2988 | from_kuid(&init_user_ns, tun->owner));
2989 | break;
2990 |
2991 | case TUNSETGROUP:
2992 | /* Set group of the device */
2993 | group = make_kgid(current_user_ns(), arg);
2994 | if (!gid_valid(group)) {
2995 | ret = -EINVAL;
2996 | break;
2997 | }
2998 | tun->group = group;
2999 | do_notify = true;
3000 | tun_debug(KERN_INFO, tun, "group set to %u\n",
3001 | from_kgid(&init_user_ns, tun->group));
3002 | break;
3003 |
3004 | case TUNSETLINK:
3005 | /* Only allow setting the type when the interface is down */
3006 | if (tun->dev->flags & IFF_UP) {
3007 | tun_debug(KERN_INFO, tun,
3008 | "Linktype set failed because interface is up\n");
3009 | ret = -EBUSY;
3010 | } else {
3011 | tun->dev->type = (int) arg;
3012 | tun_debug(KERN_INFO, tun, "linktype set to %d\n",
3013 | tun->dev->type);
3014 | ret = 0;
3015 | }
3016 | break;
3017 |
3018 | #ifdef TUN_DEBUG
3019 | case TUNSETDEBUG:
3020 | tun->debug = arg;
3021 | break;
3022 | #endif
3023 | case TUNSETOFFLOAD:
3024 | ret = set_offload(tun, arg);
3025 | break;
3026 |
3027 | case TUNSETTXFILTER:
3028 | /* Can be set only for TAPs */
3029 | ret = -EINVAL;
3030 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3031 | break;
3032 | ret = update_filter(&tun->txflt, (void __user *)arg);
3033 | break;
3034 |
3035 | case SIOCGIFHWADDR:
3036 | /* Get hw address */
3037 | memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
3038 | ifr.ifr_hwaddr.sa_family = tun->dev->type;
3039 | if (copy_to_user(argp, &ifr, ifreq_len))
3040 | ret = -EFAULT;
3041 | break;
3042 |
3043 | case SIOCSIFHWADDR:
3044 | /* Set hw address */
3045 | tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n",
3046 | ifr.ifr_hwaddr.sa_data);
3047 |
3048 | ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
3049 | break;
3050 |
3051 | case TUNGETSNDBUF:
3052 | sndbuf = tfile->socket.sk->sk_sndbuf;
3053 | if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
3054 | ret = -EFAULT;
3055 | break;
3056 |
3057 | case TUNSETSNDBUF:
3058 | if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
3059 | ret = -EFAULT;
3060 | break;
3061 | }
3062 | if (sndbuf <= 0) {
3063 | ret = -EINVAL;
3064 | break;
3065 | }
3066 |
3067 | tun->sndbuf = sndbuf;
3068 | tun_set_sndbuf(tun);
3069 | break;
3070 |
3071 | case TUNGETVNETHDRSZ:
3072 | vnet_hdr_sz = tun->vnet_hdr_sz;
3073 | if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
3074 | ret = -EFAULT;
3075 | break;
3076 |
3077 | case TUNSETVNETHDRSZ:
3078 | if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
3079 | ret = -EFAULT;
3080 | break;
3081 | }
3082 | if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
3083 | ret = -EINVAL;
3084 | break;
3085 | }
3086 |
3087 | tun->vnet_hdr_sz = vnet_hdr_sz;
3088 | break;
3089 |
3090 | case TUNGETVNETLE:
3091 | le = !!(tun->flags & TUN_VNET_LE);
3092 | if (put_user(le, (int __user *)argp))
3093 | ret = -EFAULT;
3094 | break;
3095 |
3096 | case TUNSETVNETLE:
3097 | if (get_user(le, (int __user *)argp)) {
3098 | ret = -EFAULT;
3099 | break;
3100 | }
3101 | if (le)
3102 | tun->flags |= TUN_VNET_LE;
3103 | else
3104 | tun->flags &= ~TUN_VNET_LE;
3105 | break;
3106 |
3107 | case TUNGETVNETBE:
3108 | ret = tun_get_vnet_be(tun, argp);
3109 | break;
3110 |
3111 | case TUNSETVNETBE:
3112 | ret = tun_set_vnet_be(tun, argp);
3113 | break;
3114 |
3115 | case TUNATTACHFILTER:
3116 | /* Can be set only for TAPs */
3117 | ret = -EINVAL;
3118 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3119 | break;
3120 | ret = -EFAULT;
3121 | if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog)))
3122 | break;
3123 |
3124 | ret = tun_attach_filter(tun);
3125 | break;
3126 |
3127 | case TUNDETACHFILTER:
3128 | /* Can be set only for TAPs */
3129 | ret = -EINVAL;
3130 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3131 | break;
3132 | ret = 0;
3133 | tun_detach_filter(tun, tun->numqueues);
3134 | break;
3135 |
3136 | case TUNGETFILTER:
3137 | ret = -EINVAL;
3138 | if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3139 | break;
3140 | ret = -EFAULT;
3141 | if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog)))
3142 | break;
3143 | ret = 0;
3144 | break;
3145 |
3146 | case TUNSETSTEERINGEBPF:
3147 | ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
3148 | break;
3149 |
3150 | case TUNSETFILTEREBPF:
3151 | ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
3152 | break;
3153 |
3154 | default:
3155 | ret = -EINVAL;
3156 | break;
3157 | }
3158 |
3159 | if (do_notify)
3160 | netdev_state_change(tun->dev);
3161 |
3162 | unlock:
3163 | rtnl_unlock();
3164 | if (tun)
3165 | tun_put(tun);
3166 | return ret;
3167 | }
3168 |
3169 | static long tun_chr_ioctl(struct file *file,
3170 | unsigned int cmd, unsigned long arg)
3171 | {
3172 | return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq));
3173 | }
3174 |
3175 | #ifdef CONFIG_COMPAT
3176 | static long tun_chr_compat_ioctl(struct file *file,
3177 | unsigned int cmd, unsigned long arg)
3178 | {
3179 | switch (cmd) {
3180 | case TUNSETIFF:
3181 | case TUNGETIFF:
3182 | case TUNSETTXFILTER:
3183 | case TUNGETSNDBUF:
3184 | case TUNSETSNDBUF:
3185 | case SIOCGIFHWADDR:
3186 | case SIOCSIFHWADDR:
3187 | arg = (unsigned long)compat_ptr(arg);
3188 | break;
3189 | default:
3190 | arg = (compat_ulong_t)arg;
3191 | break;
3192 | }
3193 |
3194 | /*
3195 | * compat_ifreq is shorter than ifreq, so we must not access beyond
3196 | * the end of that structure. All fields that are used in this
3197 | * driver are compatible though, we don't need to convert the
3198 | * contents.
3199 | */
3200 | return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq));
3201 | }
3202 | #endif /* CONFIG_COMPAT */
3203 |
3204 | static int tun_chr_fasync(int fd, struct file *file, int on)
3205 | {
3206 | struct tun_file *tfile = file->private_data;
3207 | int ret;
3208 |
3209 | if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
3210 | goto out;
3211 |
3212 | if (on) {
3213 | __f_setown(file, task_pid(current), PIDTYPE_TGID, 0);
3214 | tfile->flags |= TUN_FASYNC;
3215 | } else
3216 | tfile->flags &= ~TUN_FASYNC;
3217 | ret = 0;
3218 | out:
3219 | return ret;
3220 | }
3221 |
3222 | static int tun_chr_open(struct inode *inode, struct file * file)
3223 | {
3224 | struct net *net = current->nsproxy->net_ns;
3225 | struct tun_file *tfile;
3226 |
3227 | DBG1(KERN_INFO, "tunX: tun_chr_open\n");
3228 |
3229 | tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
3230 | &tun_proto, 0);
3231 | if (!tfile)
3232 | return -ENOMEM;
3233 | if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
3234 | sk_free(&tfile->sk);
3235 | return -ENOMEM;
3236 | }
3237 |
3238 | mutex_init(&tfile->napi_mutex);
3239 | RCU_INIT_POINTER(tfile->tun, NULL);
3240 | tfile->flags = 0;
3241 | tfile->ifindex = 0;
3242 |
3243 | init_waitqueue_head(&tfile->wq.wait);
3244 | RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
3245 |
3246 | tfile->socket.file = file;
3247 | tfile->socket.ops = &tun_socket_ops;
3248 |
3249 | sock_init_data(&tfile->socket, &tfile->sk);
3250 |
3251 | tfile->sk.sk_write_space = tun_sock_write_space;
3252 | tfile->sk.sk_sndbuf = INT_MAX;
3253 |
3254 | file->private_data = tfile;
3255 | INIT_LIST_HEAD(&tfile->next);
3256 |
3257 | sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
3258 |
3259 | return 0;
3260 | }
3261 |
3262 | static int tun_chr_close(struct inode *inode, struct file *file)
3263 | {
3264 | struct tun_file *tfile = file->private_data;
3265 |
3266 | tun_detach(tfile, true);
3267 |
3268 | return 0;
3269 | }
3270 |
3271 | #ifdef CONFIG_PROC_FS
3272 | static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
3273 | {
3274 | struct tun_file *tfile = file->private_data;
3275 | struct tun_struct *tun;
3276 | struct ifreq ifr;
3277 |
3278 | memset(&ifr, 0, sizeof(ifr));
3279 |
3280 | rtnl_lock();
3281 | tun = tun_get(tfile);
3282 | if (tun)
3283 | tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
3284 | rtnl_unlock();
3285 |
3286 | if (tun)
3287 | tun_put(tun);
3288 |
3289 | seq_printf(m, "iff:\t%s\n", ifr.ifr_name);
3290 | }
3291 | #endif
3292 |
3293 | static const struct file_operations tun_fops = {
3294 | .owner = THIS_MODULE,
3295 | .llseek = no_llseek,
3296 | .read_iter = tun_chr_read_iter,
3297 | .write_iter = tun_chr_write_iter,
3298 | .poll = tun_chr_poll,
3299 | .unlocked_ioctl = tun_chr_ioctl,
3300 | #ifdef CONFIG_COMPAT
3301 | .compat_ioctl = tun_chr_compat_ioctl,
3302 | #endif
3303 | .open = tun_chr_open,
3304 | .release = tun_chr_close,
3305 | .fasync = tun_chr_fasync,
3306 | #ifdef CONFIG_PROC_FS
3307 | .show_fdinfo = tun_chr_show_fdinfo,
3308 | #endif
3309 | };
3310 |
3311 | static struct miscdevice tun_miscdev = {
3312 | .minor = TUN_MINOR,
3313 | .name = "tun",
3314 | .nodename = "net/tun",
3315 | .fops = &tun_fops,
3316 | };
3317 |
3318 | /* ethtool interface */
3319 |
3320 | static void tun_default_link_ksettings(struct net_device *dev,
3321 | struct ethtool_link_ksettings *cmd)
3322 | {
3323 | ethtool_link_ksettings_zero_link_mode(cmd, supported);
3324 | ethtool_link_ksettings_zero_link_mode(cmd, advertising);
3325 | cmd->base.speed = SPEED_10;
3326 | cmd->base.duplex = DUPLEX_FULL;
3327 | cmd->base.port = PORT_TP;
3328 | cmd->base.phy_address = 0;
3329 | cmd->base.autoneg = AUTONEG_DISABLE;
3330 | }
3331 |
3332 | static int tun_get_link_ksettings(struct net_device *dev,
3333 | struct ethtool_link_ksettings *cmd)
3334 | {
3335 | struct tun_struct *tun = netdev_priv(dev);
3336 |
3337 | memcpy(cmd, &tun->link_ksettings, sizeof(*cmd));
3338 | return 0;
3339 | }
3340 |
3341 | static int tun_set_link_ksettings(struct net_device *dev,
3342 | const struct ethtool_link_ksettings *cmd)
3343 | {
3344 | struct tun_struct *tun = netdev_priv(dev);
3345 |
3346 | memcpy(&tun->link_ksettings, cmd, sizeof(*cmd));
3347 | return 0;
3348 | }
3349 |
3350 | static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
3351 | {
3352 | struct tun_struct *tun = netdev_priv(dev);
3353 |
3354 | strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
3355 | strlcpy(info->version, DRV_VERSION, sizeof(info->version));
3356 |
3357 | switch (tun->flags & TUN_TYPE_MASK) {
3358 | case IFF_TUN:
3359 | strlcpy(info->bus_info, "tun", sizeof(info->bus_info));
3360 | break;
3361 | case IFF_TAP:
3362 | strlcpy(info->bus_info, "tap", sizeof(info->bus_info));
3363 | break;
3364 | }
3365 | }
3366 |
3367 | static u32 tun_get_msglevel(struct net_device *dev)
3368 | {
3369 | #ifdef TUN_DEBUG
3370 | struct tun_struct *tun = netdev_priv(dev);
3371 | return tun->debug;
3372 | #else
3373 | return -EOPNOTSUPP;
3374 | #endif
3375 | }
3376 |
3377 | static void tun_set_msglevel(struct net_device *dev, u32 value)
3378 | {
3379 | #ifdef TUN_DEBUG
3380 | struct tun_struct *tun = netdev_priv(dev);
3381 | tun->debug = value;
3382 | #endif
3383 | }
3384 |
3385 | static int tun_get_coalesce(struct net_device *dev,
3386 | struct ethtool_coalesce *ec)
3387 | {
3388 | struct tun_struct *tun = netdev_priv(dev);
3389 |
3390 | ec->rx_max_coalesced_frames = tun->rx_batched;
3391 |
3392 | return 0;
3393 | }
3394 |
3395 | static int tun_set_coalesce(struct net_device *dev,
3396 | struct ethtool_coalesce *ec)
3397 | {
3398 | struct tun_struct *tun = netdev_priv(dev);
3399 |
3400 | if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT)
3401 | tun->rx_batched = NAPI_POLL_WEIGHT;
3402 | else
3403 | tun->rx_batched = ec->rx_max_coalesced_frames;
3404 |
3405 | return 0;
3406 | }
3407 |
3408 | static const struct ethtool_ops tun_ethtool_ops = {
3409 | .get_drvinfo = tun_get_drvinfo,
3410 | .get_msglevel = tun_get_msglevel,
3411 | .set_msglevel = tun_set_msglevel,
3412 | .get_link = ethtool_op_get_link,
3413 | .get_ts_info = ethtool_op_get_ts_info,
3414 | .get_coalesce = tun_get_coalesce,
3415 | .set_coalesce = tun_set_coalesce,
3416 | .get_link_ksettings = tun_get_link_ksettings,
3417 | .set_link_ksettings = tun_set_link_ksettings,
3418 | };
3419 |
3420 | static int tun_queue_resize(struct tun_struct *tun)
3421 | {
3422 | struct net_device *dev = tun->dev;
3423 | struct tun_file *tfile;
3424 | struct ptr_ring **rings;
3425 | int n = tun->numqueues + tun->numdisabled;
3426 | int ret, i;
3427 |
3428 | rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
3429 | if (!rings)
3430 | return -ENOMEM;
3431 |
3432 | for (i = 0; i < tun->numqueues; i++) {
3433 | tfile = rtnl_dereference(tun->tfiles[i]);
3434 | rings[i] = &tfile->tx_ring;
3435 | }
3436 | list_for_each_entry(tfile, &tun->disabled, next)
3437 | rings[i++] = &tfile->tx_ring;
3438 |
3439 | ret = ptr_ring_resize_multiple(rings, n,
3440 | dev->tx_queue_len, GFP_KERNEL,
3441 | tun_ptr_free);
3442 |
3443 | kfree(rings);
3444 | return ret;
3445 | }
3446 |
3447 | static int tun_device_event(struct notifier_block *unused,
3448 | unsigned long event, void *ptr)
3449 | {
3450 | struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3451 | struct tun_struct *tun = netdev_priv(dev);
3452 | int i;
3453 |
3454 | if (dev->rtnl_link_ops != &tun_link_ops)
3455 | return NOTIFY_DONE;
3456 |
3457 | switch (event) {
3458 | case NETDEV_CHANGE_TX_QUEUE_LEN:
3459 | if (tun_queue_resize(tun))
3460 | return NOTIFY_BAD;
3461 | break;
3462 | case NETDEV_UP:
3463 | for (i = 0; i < tun->numqueues; i++) {
3464 | struct tun_file *tfile;
3465 |
3466 | tfile = rtnl_dereference(tun->tfiles[i]);
3467 | tfile->socket.sk->sk_write_space(tfile->socket.sk);
3468 | }
3469 | break;
3470 | default:
3471 | break;
3472 | }
3473 |
3474 | return NOTIFY_DONE;
3475 | }
3476 |
3477 | static struct notifier_block tun_notifier_block __read_mostly = {
3478 | .notifier_call = tun_device_event,
3479 | };
3480 |
3481 | static int __init tun_init(void)
3482 | {
3483 | int ret = 0;
3484 |
3485 | pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
3486 |
3487 | ret = rtnl_link_register(&tun_link_ops);
3488 | if (ret) {
3489 | pr_err("Can't register link_ops\n");
3490 | goto err_linkops;
3491 | }
3492 |
3493 | ret = misc_register(&tun_miscdev);
3494 | if (ret) {
3495 | pr_err("Can't register misc device %d\n", TUN_MINOR);
3496 | goto err_misc;
3497 | }
3498 |
3499 | ret = register_netdevice_notifier(&tun_notifier_block);
3500 | if (ret) {
3501 | pr_err("Can't register netdevice notifier\n");
3502 | goto err_notifier;
3503 | }
3504 |
3505 | return 0;
3506 |
3507 | err_notifier:
3508 | misc_deregister(&tun_miscdev);
3509 | err_misc:
3510 | rtnl_link_unregister(&tun_link_ops);
3511 | err_linkops:
3512 | return ret;
3513 | }
3514 |
3515 | static void tun_cleanup(void)
3516 | {
3517 | misc_deregister(&tun_miscdev);
3518 | rtnl_link_unregister(&tun_link_ops);
3519 | unregister_netdevice_notifier(&tun_notifier_block);
3520 | }
3521 |
3522 | /* Get an underlying socket object from tun file. Returns error unless file is
3523 | * attached to a device. The returned object works like a packet socket, it
3524 | * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
3525 | * holding a reference to the file for as long as the socket is in use. */
3526 | struct socket *tun_get_socket(struct file *file)
3527 | {
3528 | struct tun_file *tfile;
3529 | if (file->f_op != &tun_fops)
3530 | return ERR_PTR(-EINVAL);
3531 | tfile = file->private_data;
3532 | if (!tfile)
3533 | return ERR_PTR(-EBADFD);
3534 | return &tfile->socket;
3535 | }
3536 | EXPORT_SYMBOL_GPL(tun_get_socket);
3537 |
3538 | struct ptr_ring *tun_get_tx_ring(struct file *file)
3539 | {
3540 | struct tun_file *tfile;
3541 |
3542 | if (file->f_op != &tun_fops)
3543 | return ERR_PTR(-EINVAL);
3544 | tfile = file->private_data;
3545 | if (!tfile)
3546 | return ERR_PTR(-EBADFD);
3547 | return &tfile->tx_ring;
3548 | }
3549 | EXPORT_SYMBOL_GPL(tun_get_tx_ring);
3550 |
3551 | module_init(tun_init);
3552 | module_exit(tun_cleanup);
3553 | MODULE_DESCRIPTION(DRV_DESCRIPTION);
3554 | MODULE_AUTHOR(DRV_COPYRIGHT);
3555 | MODULE_LICENSE("GPL");
3556 | MODULE_ALIAS_MISCDEV(TUN_MINOR);
3557 | MODULE_ALIAS("devname:net/tun");
3558 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | // "runtime"
5 | "fmt"
6 | "os"
7 | "log"
8 | "net"
9 | "net/http"
10 | _ "net/http/pprof"
11 | "strconv"
12 | "tuntap/tunnel"
13 | "tuntap/tun"
14 | )
15 |
16 | func init() {
17 | // runtime.LockOSThread()
18 | // runtime.GOMAXPROCS(48)
19 | }
20 |
21 | func main() {
22 | var client bool = false
23 | var queues int = 4
24 | var addr [4]byte
25 | var key int
26 |
27 | if len(os.Args) == 5 {
28 | if os.Args[1] == "client" {
29 | client = true
30 | }
31 | queues, _ = strconv.Atoi(os.Args[2])
32 | ip := net.ParseIP(os.Args[3]).To4()
33 | for i := 0; i < len(ip); i += 1 {
34 | addr[i] = ip[i]
35 | }
36 | key, _ = strconv.Atoi(os.Args[4])
37 | } else {
38 | fmt.Println("./tuntap server|client(mode) 10(queues) 192.168.56.1(IP address) key(pre shared key)")
39 | os.Exit(1)
40 | }
41 | go func() {
42 | log.Println(http.ListenAndServe("127.0.0.1:6061", nil))
43 | }()
44 |
45 | tun := func() (tun.Device) {
46 | return tun.CreateTUN("wg2", 1500, queues)
47 | } ()
48 |
49 | instance := tunnel.NewInstance(tun, key, addr, client, queues)
50 | instance.WG.Wait()
51 | }
52 |
--------------------------------------------------------------------------------
/tun/tun.go:
--------------------------------------------------------------------------------
1 | package tun
2 |
3 | import (
4 | "os"
5 | "unsafe"
6 | "golang.org/x/sys/unix"
7 | )
8 |
9 | type Device interface {
10 | Read(int, []byte) (int, error)
11 | Write(int, []byte) (int, error)
12 | }
13 |
14 | const (
15 | cloneDevicePath = "/dev/net/tun"
16 | ifReqSize = unix.IFNAMSIZ + 640
17 | )
18 |
19 | type NativeTun struct {
20 | rwFiles []*os.File
21 | queues int
22 | }
23 |
24 | func (tun *NativeTun) Write(index int, buff []byte) (int, error) {
25 | return tun.rwFiles[index % tun.queues].Write(buff)
26 | }
27 |
28 | func (tun *NativeTun) Read(index int, buff []byte) (int, error) {
29 | n, _ := tun.rwFiles[index % tun.queues].Read(buff[:])
30 | return n, nil
31 | }
32 |
33 | func CreateTUN(name string, mtu int, queues int) (Device) {
34 |
35 | var fds []*os.File = make([]*os.File, queues)
36 | var ifr [ifReqSize]byte
37 | var flags uint16 = unix.IFF_TUN | unix.IFF_MULTI_QUEUE
38 | nameBytes := []byte(name)
39 | copy(ifr[:], nameBytes)
40 | *(*uint16)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = flags
41 |
42 | for i := 0; i < len(fds); i++ {
43 | nfd, _ := unix.Open(cloneDevicePath, os.O_RDWR, 0)
44 | unix.Syscall(unix.SYS_IOCTL, uintptr(nfd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&ifr[0])))
45 | unix.SetNonblock(nfd, false)
46 |
47 | fds[i] = os.NewFile(uintptr(nfd), cloneDevicePath)
48 | }
49 | tun := &NativeTun{
50 | rwFiles: fds,
51 | queues: queues,
52 | }
53 | return tun
54 | }
55 |
--------------------------------------------------------------------------------
/tunnel/define.go:
--------------------------------------------------------------------------------
1 | package tunnel
2 |
3 | const (
4 | PortNum = 12345
5 | MinCryptoPoolSize = 4
6 | IOBufferLen = 15000
7 | CryptionBufferLen = 8000
8 | MaxPacketSzie = 2000
9 | )
10 |
--------------------------------------------------------------------------------
/tunnel/receive.go:
--------------------------------------------------------------------------------
1 | package tunnel
2 |
3 | import (
4 | "sync"
5 | //"fmt"
6 | )
7 |
8 | func addToDecryptionBuffer(inboundQueue chan *Packet, decryptionQueue chan *Packet, pktent *Packet) {
9 | inboundQueue <- pktent
10 | decryptionQueue <- pktent
11 | }
12 |
13 | func (tunnel *Tunnel) RoutineReadFromUDP(queue int, max_enc int) {
14 | pool := make([]Packet, IOBufferLen, IOBufferLen)
15 | for i := 0; i < len(pool); i += 1 {
16 | pool[i].buffer = make([]byte, MaxPacketSzie, MaxPacketSzie)
17 | pool[i].Mutex = sync.Mutex{}
18 | pool[i].Lock()
19 | }
20 | var pos, enc int = 0, 0
21 | for {
22 | pkt := pool[pos % len(pool)]
23 | //fmt.Printf("####### Receive from UDP:%d\n", queue)
24 | size := tunnel.Receive(queue, pkt.buffer[:])
25 | if pkt.buffer[0] == 'H' {
26 | continue
27 | }
28 | pkt.packet = pkt.buffer[:size]
29 | addToDecryptionBuffer(tunnel.queue.inbound[queue], tunnel.queue.decryption[queue][enc % max_enc], &pkt)
30 | pos += 1
31 | enc += 1
32 | }
33 | }
34 |
35 | func (tunnel *Tunnel) RoutineDecryption(queue int, enc int) {
36 | key := byte(tunnel.key)
37 | for {
38 | pkt, _ := <-tunnel.queue.decryption[queue][enc]
39 | // decrypt packet
40 | for i := 0; i < len(pkt.packet); i += 1 {
41 | pkt.packet[i] -= key
42 | }
43 | pkt.Unlock()
44 | }
45 | }
46 |
47 | func (tunnel *Tunnel) RoutineWriteToTUN(index int) {
48 | for {
49 | pkt, _ := <-tunnel.queue.inbound[index]
50 | pkt.Lock()
51 | //fmt.Printf("####### Write to TUN:%d\n", index)
52 | tunnel.tun.tunnel.Write(index, pkt.buffer[:len(pkt.packet)])
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/tunnel/send.go:
--------------------------------------------------------------------------------
1 | package tunnel
2 |
3 | import (
4 | // "fmt"
5 | "sync"
6 | )
7 |
8 | func addToEncryptionBuffer(outboundQueue chan *Packet, encryptionQueue chan *Packet, pktent *Packet) {
9 | outboundQueue <- pktent
10 | encryptionQueue <- pktent
11 | }
12 |
13 | func (tunnel *Tunnel) RoutineReadFromTUN(queue int, max_enc int) {
14 | pool := make([]Packet, IOBufferLen, IOBufferLen)
15 | for i := 0; i < len(pool); i += 1 {
16 | pool[i].buffer = make([]byte, MaxPacketSzie, MaxPacketSzie)
17 | pool[i].Mutex = sync.Mutex{}
18 | pool[i].Lock()
19 | }
20 | var pos, enc int = 0, 0
21 | for {
22 | pkt := pool[pos % len(pool)]
23 | size, _ := tunnel.tun.tunnel.Read(queue, pkt.buffer[:])
24 | pkt.packet = pkt.buffer[:size]
25 | //fmt.Printf("####### read from tun:%d\n", index)
26 | addToEncryptionBuffer(tunnel.queue.outbound[queue], tunnel.queue.encryption[queue][enc % max_enc], &pkt)
27 | pos += 1
28 | enc += 1
29 | }
30 | }
31 |
32 | func (tunnel *Tunnel) RoutineEncryption(queue int, enc int) {
33 | key := byte(tunnel.key)
34 | for {
35 | pkt, _ := <-tunnel.queue.encryption[queue][enc]
36 | // encrypt packet
37 | for i := 0; i < len(pkt.packet); i += 1 {
38 | pkt.packet[i] += key
39 | }
40 | pkt.Unlock()
41 | }
42 | }
43 |
44 | func (tunnel *Tunnel) RoutineWriteToUDP(index int) {
45 | for {
46 | pkt, _ := <-tunnel.queue.outbound[index]
47 | pkt.Lock()
48 | //fmt.Printf("####### Write to UDP:%d\n", index)
49 | tunnel.Send(index, pkt.packet)
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/tunnel/tunnel.go:
--------------------------------------------------------------------------------
1 | package tunnel
2 |
3 | import (
4 | "runtime"
5 | "sync"
6 | "tuntap/tun"
7 | )
8 |
9 | type Packet struct {
10 | sync.Mutex
11 | buffer []byte
12 | packet []byte
13 | }
14 |
15 | type Tunnel struct {
16 | client bool
17 | key int
18 | WG sync.WaitGroup
19 | net struct {
20 | socket *UDPScoket
21 | port int
22 | addr [4]byte
23 | }
24 | queue struct {
25 | inbound []chan *Packet
26 | outbound []chan *Packet
27 | encryption [][]chan *Packet
28 | decryption [][]chan *Packet
29 | }
30 | tun struct {
31 | tunnel tun.Device
32 | queues int
33 | }
34 | }
35 |
36 | func NewInstance(tunTunnel tun.Device, key int, addr [4]byte, client bool, queues int) *Tunnel {
37 | tunnel := new(Tunnel)
38 | tunnel.client = client
39 | tunnel.key = key
40 | tunnel.tun.queues = queues
41 | tunnel.tun.tunnel = tunTunnel
42 | tunnel.net.port = 12346
43 | tunnel.net.addr = addr
44 |
45 | if tunnel.client {
46 | tunnel.net.socket = CreateUDPScoket(tunnel.net.port, tunnel.net.addr, tunnel.tun.queues, 1)
47 | } else {
48 | tunnel.net.socket = CreateUDPScoket(tunnel.net.port, tunnel.net.addr, tunnel.tun.queues, 0)
49 | }
50 |
51 | tunnel.queue.outbound = make([]chan *Packet, queues)
52 | tunnel.queue.inbound = make([]chan *Packet, queues)
53 |
54 | enc := runtime.NumCPU()/queues
55 | if enc < PortNum {
56 | enc = PortNum
57 | }
58 | tunnel.queue.encryption = make([][]chan *Packet, queues)
59 | tunnel.queue.decryption = make([][]chan *Packet, queues)
60 |
61 | for i := 0; i < queues; i += 1 {
62 | tunnel.queue.outbound[i] = make(chan *Packet, IOBufferLen)
63 | tunnel.queue.inbound[i] = make(chan *Packet, IOBufferLen)
64 | tunnel.queue.encryption[i] = make([]chan *Packet, enc)
65 | tunnel.queue.decryption[i] = make([]chan *Packet, enc)
66 | for j := 0; j < enc; j += 1 {
67 | tunnel.queue.encryption[i][j] = make(chan *Packet, CryptionBufferLen)
68 | tunnel.queue.decryption[i][j] = make(chan *Packet, CryptionBufferLen)
69 | go tunnel.RoutineDecryption(i, j)
70 | go tunnel.RoutineEncryption(i, j)
71 | }
72 | go tunnel.RoutineReadFromUDP(i, enc)
73 | go tunnel.RoutineWriteToTUN(i)
74 | go tunnel.RoutineReadFromTUN(i, enc)
75 | go tunnel.RoutineWriteToUDP(i)
76 | }
77 | tunnel.WG.Add(1)
78 |
79 | return tunnel
80 | }
81 |
--------------------------------------------------------------------------------
/tunnel/udp.go:
--------------------------------------------------------------------------------
1 | package tunnel
2 |
3 | import (
4 | // "fmt"
5 | "golang.org/x/sys/unix"
6 | )
7 |
8 | type End struct {
9 | end unix.Sockaddr
10 | }
11 |
12 | type UDPScoket struct {
13 | sock []int
14 | end []End
15 | queues int
16 | }
17 |
18 | func getSockaddr(port int, addr [4]byte) (sa unix.Sockaddr) {
19 | address := unix.SockaddrInet4 {
20 | Port: port,
21 | Addr: addr,
22 | }
23 | return &address
24 | }
25 |
26 | func CreateUDPScoket(port int, addr [4]byte, queues int, client int) (*UDPScoket) {
27 | socket := new(UDPScoket)
28 | socket.sock = make([]int, queues, queues)
29 | socket.end = make([]End, queues, queues)
30 | initial := make([]byte, 1, 1)
31 | initial[0] = 'H'
32 | for i := 0; i < queues; i += 1 {
33 | tport := port + i;
34 | socket.sock[i] = create()
35 | address := &unix.SockaddrInet4 {
36 | Port: tport,
37 | Addr: addr,
38 | }
39 | if client == 1 {
40 | socket.end[i].end = getSockaddr(tport, addr)
41 | unix.Connect(socket.sock[i], address)
42 | send(socket.sock[i], &socket.end[i], initial)
43 | } else {
44 | unix.Bind(socket.sock[i], address)
45 | }
46 | }
47 | socket.queues = queues
48 | return socket
49 | }
50 |
51 | func (tunnel *Tunnel) Receive(index int, buff []byte) (int) {
52 | socket := tunnel.net.socket
53 | n := receive(socket.sock[index], buff, &socket.end[index])
54 | return n
55 | }
56 |
57 | func (tunnel *Tunnel) Send(index int, buff []byte) {
58 | socket := tunnel.net.socket
59 | send(socket.sock[index], &socket.end[index], buff)
60 | }
61 |
62 | func create() (int) {
63 | fd, _ := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0)
64 | unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEADDR, 1)
65 | unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEPORT, 1)
66 | return fd
67 | }
68 |
69 | func send(sock int, end *End, buff []byte) {
70 | if end.end != nil {
71 | unix.Sendto(sock, buff, 0, end.end)
72 | // fmt.Printf("send internal ##### %d\n", sock)
73 | }
74 | }
75 |
76 | func receive(sock int, buff []byte, end *End) (int) {
77 | size, dst, _ := unix.Recvfrom(sock, buff, 0)
78 | end.end = dst
79 | // fmt.Printf("receive internal ##### sock:%d\n", sock)
80 | return size
81 | }
82 |
--------------------------------------------------------------------------------