├── .gitattributes ├── tests ├── config.nims └── test1.nim ├── src ├── wepoll.nim └── wepoll │ ├── LICENSE │ ├── wepoll.nim │ ├── wepoll.h │ ├── README.md │ └── wepoll.c ├── wepoll.nimble ├── LICENSE └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | *.c linguist-language=nim -------------------------------------------------------------------------------- /tests/config.nims: -------------------------------------------------------------------------------- 1 | switch("path", "$projectDir/../src") -------------------------------------------------------------------------------- /src/wepoll.nim: -------------------------------------------------------------------------------- 1 | import wepoll/wepoll 2 | export wepoll 3 | -------------------------------------------------------------------------------- /tests/test1.nim: -------------------------------------------------------------------------------- 1 | import wepoll 2 | 3 | 4 | let x = epoll_create(12) 5 | echo x.repr 6 | -------------------------------------------------------------------------------- /wepoll.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "0.1.2" 4 | author = "ringabout" 5 | description = "Windows epoll wrapper." 6 | license = "MIT" 7 | srcDir = "src" 8 | 9 | 10 | 11 | # Dependencies 12 | 13 | requires "nim >= 1.2.0" 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 flywind 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/wepoll/LICENSE: -------------------------------------------------------------------------------- 1 | wepoll - epoll for Windows 2 | https://github.com/piscisaureus/wepoll 3 | 4 | Copyright 2012-2020, Bert Belder 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | 14 | * Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /src/wepoll/wepoll.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## wepoll - epoll for Windows 3 | ## https://github.com/piscisaureus/wepoll 4 | ## 5 | ## Copyright 2012-2020, Bert Belder 6 | ## All rights reserved. 7 | ## 8 | ## Redistribution and use in source and binary forms, with or without 9 | ## modification, are permitted provided that the following conditions are 10 | ## met: 11 | ## 12 | ## * Redistributions of source code must retain the above copyright 13 | ## notice, this list of conditions and the following disclaimer. 14 | ## 15 | ## * Redistributions in binary form must reproduce the above copyright 16 | ## notice, this list of conditions and the following disclaimer in the 17 | ## documentation and/or other materials provided with the distribution. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | ## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | ## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | ## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | ## OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | ## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | ## DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | ## THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | ## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | ## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | ## 31 | 32 | 33 | import os 34 | 35 | 36 | {.compile: "wepoll.c".} 37 | 38 | const header_file = currentSourcePath().splitPath.head / "wepoll.h" 39 | 40 | {.pragma: wepoll, header: header_file.} 41 | {.passL: "-lws2_32".} 42 | 43 | 44 | type 45 | EPOLL_EVENTS* = enum 46 | EPOLLIN = (int)(1 shl 0), EPOLLPRI = (int)(1 shl 1), EPOLLOUT = (int)(1 shl 2), 47 | EPOLLERR = (int)(1 shl 3), EPOLLHUP = (int)(1 shl 4), EPOLLRDNORM = (int)(1 shl 6), 48 | EPOLLRDBAND = (int)(1 shl 7), EPOLLWRNORM = (int)(1 shl 8), 49 | EPOLLWRBAND = (int)(1 shl 9), EPOLLMSG = (int)(1 shl 10), ## Never reported. 50 | EPOLLRDHUP = (int)(1 shl 13), EPOLLONESHOT = (int)(1 shl 31) 51 | 52 | 53 | const 54 | EPOLL_CTL_ADD* = 1 55 | EPOLL_CTL_MOD* = 2 56 | EPOLL_CTL_DEL* = 3 57 | 58 | 59 | type 60 | EpollHandle* = pointer 61 | 62 | EpollSocket* = culonglong 63 | 64 | EpollData* {.bycopy, union.} = object 65 | p*: pointer 66 | fd*: cint 67 | u32*: uint32 68 | u64*: uint64 69 | sock*: EpollSocket ## Windows specific 70 | hnd*: EpollHandle ## Windows specific 71 | 72 | EpollEvent* {.wepoll, importc:"struct epoll_event".} = object 73 | events*: uint32 ## Epoll events and flags 74 | data*: EpollData ## User data variable 75 | 76 | 77 | proc epoll_create*(size: cint): EpollHandle {.wepoll.} 78 | 79 | proc epoll_create1*(flags: cint): EpollHandle {.wepoll.} 80 | 81 | proc epoll_close*(ephnd: EpollHandle): cint {.wepoll.} 82 | 83 | proc epoll_ctl*(ephnd: EpollHandle, op: cint, 84 | sock: EpollSocket, event: ptr EpollEvent): cint {.wepoll.} 85 | 86 | proc epoll_wait*(ephnd: EpollHandle, events: ptr EpollEvent, 87 | maxevents: cint, timeout: cint): cint {.wepoll.} 88 | -------------------------------------------------------------------------------- /src/wepoll/wepoll.h: -------------------------------------------------------------------------------- 1 | /* 2 | * wepoll - epoll for Windows 3 | * https://github.com/piscisaureus/wepoll 4 | * 5 | * Copyright 2012-2020, Bert Belder 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are 10 | * met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 15 | * * Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | #ifndef WEPOLL_H_ 33 | #define WEPOLL_H_ 34 | 35 | #ifndef WEPOLL_EXPORT 36 | #define WEPOLL_EXPORT 37 | #endif 38 | 39 | #include 40 | 41 | enum EPOLL_EVENTS { 42 | EPOLLIN = (int) (1U << 0), 43 | EPOLLPRI = (int) (1U << 1), 44 | EPOLLOUT = (int) (1U << 2), 45 | EPOLLERR = (int) (1U << 3), 46 | EPOLLHUP = (int) (1U << 4), 47 | EPOLLRDNORM = (int) (1U << 6), 48 | EPOLLRDBAND = (int) (1U << 7), 49 | EPOLLWRNORM = (int) (1U << 8), 50 | EPOLLWRBAND = (int) (1U << 9), 51 | EPOLLMSG = (int) (1U << 10), /* Never reported. */ 52 | EPOLLRDHUP = (int) (1U << 13), 53 | EPOLLONESHOT = (int) (1U << 31) 54 | }; 55 | 56 | #define EPOLLIN (1U << 0) 57 | #define EPOLLPRI (1U << 1) 58 | #define EPOLLOUT (1U << 2) 59 | #define EPOLLERR (1U << 3) 60 | #define EPOLLHUP (1U << 4) 61 | #define EPOLLRDNORM (1U << 6) 62 | #define EPOLLRDBAND (1U << 7) 63 | #define EPOLLWRNORM (1U << 8) 64 | #define EPOLLWRBAND (1U << 9) 65 | #define EPOLLMSG (1U << 10) 66 | #define EPOLLRDHUP (1U << 13) 67 | #define EPOLLONESHOT (1U << 31) 68 | 69 | #define EPOLL_CTL_ADD 1 70 | #define EPOLL_CTL_MOD 2 71 | #define EPOLL_CTL_DEL 3 72 | 73 | typedef void* HANDLE; 74 | typedef uintptr_t SOCKET; 75 | 76 | typedef union epoll_data { 77 | void* ptr; 78 | int fd; 79 | uint32_t u32; 80 | uint64_t u64; 81 | SOCKET sock; /* Windows specific */ 82 | HANDLE hnd; /* Windows specific */ 83 | } epoll_data_t; 84 | 85 | struct epoll_event { 86 | uint32_t events; /* Epoll events and flags */ 87 | epoll_data_t data; /* User data variable */ 88 | }; 89 | 90 | #ifdef __cplusplus 91 | extern "C" { 92 | #endif 93 | 94 | HANDLE epoll_create(int size); 95 | HANDLE epoll_create1(int flags); 96 | 97 | WEPOLL_EXPORT int epoll_close(HANDLE ephnd); 98 | 99 | WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd, 100 | int op, 101 | SOCKET sock, 102 | struct epoll_event* event); 103 | 104 | WEPOLL_EXPORT int epoll_wait(HANDLE ephnd, 105 | struct epoll_event* events, 106 | int maxevents, 107 | int timeout); 108 | 109 | #ifdef __cplusplus 110 | } /* extern "C" */ 111 | #endif 112 | 113 | #endif /* WEPOLL_H_ */ 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wepoll 2 | Windows epoll wrapper based on [wepoll](https://github.com/piscisaureus/wepoll) 3 | 4 | ## Installation 5 | 6 | ``` 7 | nimble install wepoll 8 | ``` 9 | 10 | 11 | ## API 12 | 13 | Docs from https://github.com/piscisaureus/wepoll 14 | 15 | ### General remarks 16 | 17 | * The epoll port is a `EpollHandle`, not a file descriptor. 18 | * All functions set both `errno` and `GetLastError()` on failure. 19 | * For more extensive documentation, see the [epoll(7) man page][man epoll], 20 | and the per-function man pages that are linked below. 21 | 22 | ### epoll_create/epoll_create1 23 | 24 | ```nim 25 | proc epoll_create*(size: cint): EpollHandle 26 | proc epoll_create1*(flags: cint): EpollHandle 27 | ``` 28 | 29 | * Create a new epoll instance (port). 30 | * `size` is ignored but most be greater than zero. 31 | * `flags` must be zero as there are no supported flags. 32 | * Returns `NULL` on failure. 33 | * [Linux man page][man epoll_create] 34 | 35 | ### epoll_close 36 | 37 | ```nim 38 | proc epoll_close*(ephnd: EpollHandle): cint 39 | ``` 40 | 41 | * Close an epoll port. 42 | * Do not attempt to close the epoll port with `close()`, 43 | `CloseHandle()` or `closesocket()`. 44 | 45 | ### epoll_ctl 46 | 47 | ```nim 48 | proc epoll_ctl*(ephnd: EpollHandle, op: cint, 49 | sock: EpollSocket, event: ptr EpollEvent): cint {.wepoll.} 50 | ``` 51 | 52 | * Control which socket events are monitored by an epoll port. 53 | * `ephnd` must be a EpollHandle created by 54 | [`epoll_create()`](#epoll_createepoll_create1) or 55 | [`epoll_create1()`](#epoll_createepoll_create1). 56 | * `op` must be one of `EPOLL_CTL_ADD`, `EPOLL_CTL_MOD`, `EPOLL_CTL_DEL`. 57 | * `sock` must be a valid socket created by [`socket()`][msdn socket], 58 | [`WSASocket()`][msdn wsasocket], or [`accept()`][msdn accept]. 59 | * `event` should be a pointer to a [`EpollEvent`](#object-EpollEvent).
60 | If `op` is `EPOLL_CTL_DEL` then the `event` parameter is ignored, and it 61 | may be `NULL`. 62 | * Returns 0 on success, -1 on failure. 63 | * It is recommended to always explicitly remove a socket from its epoll 64 | set using `EPOLL_CTL_DEL` *before* closing it.
65 | As on Linux, closed sockets are automatically removed from the epoll set, but 66 | wepoll may not be able to detect that a socket was closed until the next call 67 | to [`epoll_wait()`](#epoll_wait). 68 | * [Linux man page][man epoll_ctl] 69 | 70 | ### epoll_wait 71 | 72 | ```nim 73 | proc epoll_wait*(ephnd: EpollHandle, events: ptr EpollEvent, 74 | maxevents: cint, timeout: cint): cint {.wepoll.} 75 | ``` 76 | 77 | * Receive socket events from an epoll port. 78 | * `events` should point to a caller-allocated array of 79 | [`EpollEvent`](#object-EpollEvent) object, which will receive the 80 | reported events. 81 | * `maxevents` is the maximum number of events that will be written to the 82 | `events` array, and must be greater than zero. 83 | * `timeout` specifies whether to block when no events are immediately available. 84 | - `<0` block indefinitely 85 | - `0` report any events that are already waiting, but don't block 86 | - `≥1` block for at most N milliseconds 87 | * Return value: 88 | - `-1` an error occurred 89 | - `0` timed out without any events to report 90 | - `≥1` the number of events stored in the `events` buffer 91 | * [Linux man page][man epoll_wait] 92 | 93 | ### object EpollEvent 94 | 95 | ```nim 96 | type 97 | EpollHandle* = pointer 98 | 99 | EpollSocket* = culonglong 100 | ``` 101 | 102 | ```nim 103 | type 104 | EpollData* {.bycopy, union.} = object 105 | p*: pointer 106 | fd*: cint 107 | u32*: uint32 108 | u64*: uint64 109 | sock*: EpollSocket ## Windows specific 110 | hnd*: EpollHandle ## Windows specific 111 | ``` 112 | 113 | ```nim 114 | type 115 | EpollEvent* {.bycopy.} = object 116 | events*: uint32 ## Epoll events and flags 117 | data*: EpollData ## User data variable 118 | ``` 119 | 120 | * The `events` field is a bit mask containing the events being 121 | monitored/reported, and optional flags.
122 | Flags are accepted by [`epoll_ctl()`](#epoll_ctl), but they are not reported 123 | back by [`epoll_wait()`](#epoll_wait). 124 | * The `data` field can be used to associate application-specific information 125 | with a socket; its value will be returned unmodified by 126 | [`epoll_wait()`](#epoll_wait). 127 | * [Linux man page][man epoll_ctl] 128 | 129 | | Event | Description | 130 | |---------------|----------------------------------------------------------------------| 131 | | `EPOLLIN` | incoming data available, or incoming connection ready to be accepted | 132 | | `EPOLLOUT` | ready to send data, or outgoing connection successfully established | 133 | | `EPOLLRDHUP` | remote peer initiated graceful socket shutdown | 134 | | `EPOLLPRI` | out-of-band data available for reading | 135 | | `EPOLLERR` | socket error1 | 136 | | `EPOLLHUP` | socket hang-up1 | 137 | | `EPOLLRDNORM` | same as `EPOLLIN` | 138 | | `EPOLLRDBAND` | same as `EPOLLPRI` | 139 | | `EPOLLWRNORM` | same as `EPOLLOUT` | 140 | | `EPOLLWRBAND` | same as `EPOLLOUT` | 141 | | `EPOLLMSG` | never reported | 142 | 143 | | Flag | Description | 144 | |------------------|---------------------------| 145 | | `EPOLLONESHOT` | report event(s) only once | 146 | | `EPOLLET` | not supported by wepoll | 147 | | `EPOLLEXCLUSIVE` | not supported by wepoll | 148 | | `EPOLLWAKEUP` | not supported by wepoll | 149 | 150 | 1: the `EPOLLERR` and `EPOLLHUP` events may always be reported by 151 | [`epoll_wait()`](#epoll_wait), regardless of the event mask that was passed to 152 | [`epoll_ctl()`](#epoll_ctl). 153 | 154 | 155 | [man epoll]: http://man7.org/linux/man-pages/man7/epoll.7.html 156 | [man epoll_create]: http://man7.org/linux/man-pages/man2/epoll_create.2.html 157 | [man epoll_ctl]: http://man7.org/linux/man-pages/man2/epoll_ctl.2.html 158 | [man epoll_wait]: http://man7.org/linux/man-pages/man2/epoll_wait.2.html 159 | [msdn accept]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms737526(v=vs.85).aspx 160 | [msdn socket]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms740506(v=vs.85).aspx 161 | [msdn wsasocket]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms742212(v=vs.85).aspx 162 | [select scale]: https://daniel.haxx.se/docs/poll-vs-select.html 163 | [wsapoll broken]: https://daniel.haxx.se/blog/2012/10/10/wsapoll-is-broken/ 164 | [wepoll.c]: https://github.com/piscisaureus/wepoll/blob/dist/wepoll.c 165 | [wepoll.h]: https://github.com/piscisaureus/wepoll/blob/dist/wepoll.h 166 | -------------------------------------------------------------------------------- /src/wepoll/README.md: -------------------------------------------------------------------------------- 1 | # wepoll - epoll for windows 2 | 3 | [![][ci status badge]][ci status link] 4 | 5 | This library implements the [epoll][man epoll] API for Windows 6 | applications. It is fast and scalable, and it closely resembles the API 7 | and behavior of Linux' epoll. 8 | 9 | ## Rationale 10 | 11 | Unlike Linux, OS X, and many other operating systems, Windows doesn't 12 | have a good API for receiving socket state notifications. It only 13 | supports the `select` and `WSAPoll` APIs, but they 14 | [don't scale][select scale] and suffer from 15 | [other issues][wsapoll broken]. 16 | 17 | Using I/O completion ports isn't always practical when software is 18 | designed to be cross-platform. Wepoll offers an alternative that is 19 | much closer to a drop-in replacement for software that was designed 20 | to run on Linux. 21 | 22 | ## Features 23 | 24 | * Can poll 100000s of sockets efficiently. 25 | * Fully thread-safe. 26 | * Multiple threads can poll the same epoll port. 27 | * Sockets can be added to multiple epoll sets. 28 | * All epoll events (`EPOLLIN`, `EPOLLOUT`, `EPOLLPRI`, `EPOLLRDHUP`) 29 | are supported. 30 | * Level-triggered and one-shot (`EPOLLONESTHOT`) modes are supported 31 | * Trivial to embed: you need [only two files][dist]. 32 | 33 | ## Limitations 34 | 35 | * Only works with sockets. 36 | * Edge-triggered (`EPOLLET`) mode isn't supported. 37 | 38 | ## How to use 39 | 40 | The library is [distributed][dist] as a single source file 41 | ([wepoll.c][wepoll.c]) and a single header file ([wepoll.h][wepoll.h]).
42 | Compile the .c file as part of your project, and include the header wherever 43 | needed. 44 | 45 | ## Compatibility 46 | 47 | * Requires Windows Vista or higher. 48 | * Can be compiled with recent versions of MSVC, Clang, and GCC. 49 | 50 | ## API 51 | 52 | ### General remarks 53 | 54 | * The epoll port is a `HANDLE`, not a file descriptor. 55 | * All functions set both `errno` and `GetLastError()` on failure. 56 | * For more extensive documentation, see the [epoll(7) man page][man epoll], 57 | and the per-function man pages that are linked below. 58 | 59 | ### epoll_create/epoll_create1 60 | 61 | ```c 62 | HANDLE epoll_create(int size); 63 | HANDLE epoll_create1(int flags); 64 | ``` 65 | 66 | * Create a new epoll instance (port). 67 | * `size` is ignored but most be greater than zero. 68 | * `flags` must be zero as there are no supported flags. 69 | * Returns `NULL` on failure. 70 | * [Linux man page][man epoll_create] 71 | 72 | ### epoll_close 73 | 74 | ```c 75 | int epoll_close(HANDLE ephnd); 76 | ``` 77 | 78 | * Close an epoll port. 79 | * Do not attempt to close the epoll port with `close()`, 80 | `CloseHandle()` or `closesocket()`. 81 | 82 | ### epoll_ctl 83 | 84 | ```c 85 | int epoll_ctl(HANDLE ephnd, 86 | int op, 87 | SOCKET sock, 88 | struct epoll_event* event); 89 | ``` 90 | 91 | * Control which socket events are monitored by an epoll port. 92 | * `ephnd` must be a HANDLE created by 93 | [`epoll_create()`](#epoll_createepoll_create1) or 94 | [`epoll_create1()`](#epoll_createepoll_create1). 95 | * `op` must be one of `EPOLL_CTL_ADD`, `EPOLL_CTL_MOD`, `EPOLL_CTL_DEL`. 96 | * `sock` must be a valid socket created by [`socket()`][msdn socket], 97 | [`WSASocket()`][msdn wsasocket], or [`accept()`][msdn accept]. 98 | * `event` should be a pointer to a [`struct epoll_event`](#struct-epoll_event).
99 | If `op` is `EPOLL_CTL_DEL` then the `event` parameter is ignored, and it 100 | may be `NULL`. 101 | * Returns 0 on success, -1 on failure. 102 | * It is recommended to always explicitly remove a socket from its epoll 103 | set using `EPOLL_CTL_DEL` *before* closing it.
104 | As on Linux, closed sockets are automatically removed from the epoll set, but 105 | wepoll may not be able to detect that a socket was closed until the next call 106 | to [`epoll_wait()`](#epoll_wait). 107 | * [Linux man page][man epoll_ctl] 108 | 109 | ### epoll_wait 110 | 111 | ```c 112 | int epoll_wait(HANDLE ephnd, 113 | struct epoll_event* events, 114 | int maxevents, 115 | int timeout); 116 | ``` 117 | 118 | * Receive socket events from an epoll port. 119 | * `events` should point to a caller-allocated array of 120 | [`epoll_event`](#struct-epoll_event) structs, which will receive the 121 | reported events. 122 | * `maxevents` is the maximum number of events that will be written to the 123 | `events` array, and must be greater than zero. 124 | * `timeout` specifies whether to block when no events are immediately available. 125 | - `<0` block indefinitely 126 | - `0` report any events that are already waiting, but don't block 127 | - `≥1` block for at most N milliseconds 128 | * Return value: 129 | - `-1` an error occurred 130 | - `0` timed out without any events to report 131 | - `≥1` the number of events stored in the `events` buffer 132 | * [Linux man page][man epoll_wait] 133 | 134 | ### struct epoll_event 135 | 136 | ```c 137 | typedef union epoll_data { 138 | void* ptr; 139 | int fd; 140 | uint32_t u32; 141 | uint64_t u64; 142 | SOCKET sock; /* Windows specific */ 143 | HANDLE hnd; /* Windows specific */ 144 | } epoll_data_t; 145 | ``` 146 | 147 | ```c 148 | struct epoll_event { 149 | uint32_t events; /* Epoll events and flags */ 150 | epoll_data_t data; /* User data variable */ 151 | }; 152 | ``` 153 | 154 | * The `events` field is a bit mask containing the events being 155 | monitored/reported, and optional flags.
156 | Flags are accepted by [`epoll_ctl()`](#epoll_ctl), but they are not reported 157 | back by [`epoll_wait()`](#epoll_wait). 158 | * The `data` field can be used to associate application-specific information 159 | with a socket; its value will be returned unmodified by 160 | [`epoll_wait()`](#epoll_wait). 161 | * [Linux man page][man epoll_ctl] 162 | 163 | | Event | Description | 164 | |---------------|----------------------------------------------------------------------| 165 | | `EPOLLIN` | incoming data available, or incoming connection ready to be accepted | 166 | | `EPOLLOUT` | ready to send data, or outgoing connection successfully established | 167 | | `EPOLLRDHUP` | remote peer initiated graceful socket shutdown | 168 | | `EPOLLPRI` | out-of-band data available for reading | 169 | | `EPOLLERR` | socket error1 | 170 | | `EPOLLHUP` | socket hang-up1 | 171 | | `EPOLLRDNORM` | same as `EPOLLIN` | 172 | | `EPOLLRDBAND` | same as `EPOLLPRI` | 173 | | `EPOLLWRNORM` | same as `EPOLLOUT` | 174 | | `EPOLLWRBAND` | same as `EPOLLOUT` | 175 | | `EPOLLMSG` | never reported | 176 | 177 | | Flag | Description | 178 | |------------------|---------------------------| 179 | | `EPOLLONESHOT` | report event(s) only once | 180 | | `EPOLLET` | not supported by wepoll | 181 | | `EPOLLEXCLUSIVE` | not supported by wepoll | 182 | | `EPOLLWAKEUP` | not supported by wepoll | 183 | 184 | 1: the `EPOLLERR` and `EPOLLHUP` events may always be reported by 185 | [`epoll_wait()`](#epoll_wait), regardless of the event mask that was passed to 186 | [`epoll_ctl()`](#epoll_ctl). 187 | 188 | 189 | [ci status badge]: https://ci.appveyor.com/api/projects/status/github/piscisaureus/wepoll?branch=master&svg=true 190 | [ci status link]: https://ci.appveyor.com/project/piscisaureus/wepoll/branch/master 191 | [dist]: https://github.com/piscisaureus/wepoll/tree/dist 192 | [man epoll]: http://man7.org/linux/man-pages/man7/epoll.7.html 193 | [man epoll_create]: http://man7.org/linux/man-pages/man2/epoll_create.2.html 194 | [man epoll_ctl]: http://man7.org/linux/man-pages/man2/epoll_ctl.2.html 195 | [man epoll_wait]: http://man7.org/linux/man-pages/man2/epoll_wait.2.html 196 | [msdn accept]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms737526(v=vs.85).aspx 197 | [msdn socket]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms740506(v=vs.85).aspx 198 | [msdn wsasocket]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms742212(v=vs.85).aspx 199 | [select scale]: https://daniel.haxx.se/docs/poll-vs-select.html 200 | [wsapoll broken]: https://daniel.haxx.se/blog/2012/10/10/wsapoll-is-broken/ 201 | [wepoll.c]: https://github.com/piscisaureus/wepoll/blob/dist/wepoll.c 202 | [wepoll.h]: https://github.com/piscisaureus/wepoll/blob/dist/wepoll.h 203 | -------------------------------------------------------------------------------- /src/wepoll/wepoll.c: -------------------------------------------------------------------------------- 1 | /* 2 | * wepoll - epoll for Windows 3 | * https://github.com/piscisaureus/wepoll 4 | * 5 | * Copyright 2012-2020, Bert Belder 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are 10 | * met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 15 | * * Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | #ifndef WEPOLL_EXPORT 33 | #define WEPOLL_EXPORT 34 | #endif 35 | 36 | #include 37 | 38 | enum EPOLL_EVENTS { 39 | EPOLLIN = (int) (1U << 0), 40 | EPOLLPRI = (int) (1U << 1), 41 | EPOLLOUT = (int) (1U << 2), 42 | EPOLLERR = (int) (1U << 3), 43 | EPOLLHUP = (int) (1U << 4), 44 | EPOLLRDNORM = (int) (1U << 6), 45 | EPOLLRDBAND = (int) (1U << 7), 46 | EPOLLWRNORM = (int) (1U << 8), 47 | EPOLLWRBAND = (int) (1U << 9), 48 | EPOLLMSG = (int) (1U << 10), /* Never reported. */ 49 | EPOLLRDHUP = (int) (1U << 13), 50 | EPOLLONESHOT = (int) (1U << 31) 51 | }; 52 | 53 | #define EPOLLIN (1U << 0) 54 | #define EPOLLPRI (1U << 1) 55 | #define EPOLLOUT (1U << 2) 56 | #define EPOLLERR (1U << 3) 57 | #define EPOLLHUP (1U << 4) 58 | #define EPOLLRDNORM (1U << 6) 59 | #define EPOLLRDBAND (1U << 7) 60 | #define EPOLLWRNORM (1U << 8) 61 | #define EPOLLWRBAND (1U << 9) 62 | #define EPOLLMSG (1U << 10) 63 | #define EPOLLRDHUP (1U << 13) 64 | #define EPOLLONESHOT (1U << 31) 65 | 66 | #define EPOLL_CTL_ADD 1 67 | #define EPOLL_CTL_MOD 2 68 | #define EPOLL_CTL_DEL 3 69 | 70 | typedef void* HANDLE; 71 | typedef uintptr_t SOCKET; 72 | 73 | typedef union epoll_data { 74 | void* ptr; 75 | int fd; 76 | uint32_t u32; 77 | uint64_t u64; 78 | SOCKET sock; /* Windows specific */ 79 | HANDLE hnd; /* Windows specific */ 80 | } epoll_data_t; 81 | 82 | struct epoll_event { 83 | uint32_t events; /* Epoll events and flags */ 84 | epoll_data_t data; /* User data variable */ 85 | }; 86 | 87 | #ifdef __cplusplus 88 | extern "C" { 89 | #endif 90 | 91 | WEPOLL_EXPORT HANDLE epoll_create(int size); 92 | WEPOLL_EXPORT HANDLE epoll_create1(int flags); 93 | 94 | WEPOLL_EXPORT int epoll_close(HANDLE ephnd); 95 | 96 | WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd, 97 | int op, 98 | SOCKET sock, 99 | struct epoll_event* event); 100 | 101 | WEPOLL_EXPORT int epoll_wait(HANDLE ephnd, 102 | struct epoll_event* events, 103 | int maxevents, 104 | int timeout); 105 | 106 | #ifdef __cplusplus 107 | } /* extern "C" */ 108 | #endif 109 | 110 | #include 111 | 112 | #include 113 | 114 | #define WEPOLL_INTERNAL static 115 | #define WEPOLL_INTERNAL_EXTERN static 116 | 117 | #if defined(__clang__) 118 | #pragma clang diagnostic push 119 | #pragma clang diagnostic ignored "-Wnonportable-system-include-path" 120 | #pragma clang diagnostic ignored "-Wreserved-id-macro" 121 | #elif defined(_MSC_VER) 122 | #pragma warning(push, 1) 123 | #endif 124 | 125 | #undef WIN32_LEAN_AND_MEAN 126 | #define WIN32_LEAN_AND_MEAN 127 | 128 | #undef _WIN32_WINNT 129 | #define _WIN32_WINNT 0x0600 130 | 131 | #include 132 | #include 133 | #include 134 | 135 | #if defined(__clang__) 136 | #pragma clang diagnostic pop 137 | #elif defined(_MSC_VER) 138 | #pragma warning(pop) 139 | #endif 140 | 141 | WEPOLL_INTERNAL int nt_global_init(void); 142 | 143 | typedef LONG NTSTATUS; 144 | typedef NTSTATUS* PNTSTATUS; 145 | 146 | #ifndef NT_SUCCESS 147 | #define NT_SUCCESS(status) (((NTSTATUS)(status)) >= 0) 148 | #endif 149 | 150 | #ifndef STATUS_SUCCESS 151 | #define STATUS_SUCCESS ((NTSTATUS) 0x00000000L) 152 | #endif 153 | 154 | #ifndef STATUS_PENDING 155 | #define STATUS_PENDING ((NTSTATUS) 0x00000103L) 156 | #endif 157 | 158 | #ifndef STATUS_CANCELLED 159 | #define STATUS_CANCELLED ((NTSTATUS) 0xC0000120L) 160 | #endif 161 | 162 | #ifndef STATUS_NOT_FOUND 163 | #define STATUS_NOT_FOUND ((NTSTATUS) 0xC0000225L) 164 | #endif 165 | 166 | typedef struct _IO_STATUS_BLOCK { 167 | NTSTATUS Status; 168 | ULONG_PTR Information; 169 | } IO_STATUS_BLOCK, *PIO_STATUS_BLOCK; 170 | 171 | typedef VOID(NTAPI* PIO_APC_ROUTINE)(PVOID ApcContext, 172 | PIO_STATUS_BLOCK IoStatusBlock, 173 | ULONG Reserved); 174 | 175 | typedef struct _UNICODE_STRING { 176 | USHORT Length; 177 | USHORT MaximumLength; 178 | PWSTR Buffer; 179 | } UNICODE_STRING, *PUNICODE_STRING; 180 | 181 | #define RTL_CONSTANT_STRING(s) \ 182 | { sizeof(s) - sizeof((s)[0]), sizeof(s), s } 183 | 184 | typedef struct _OBJECT_ATTRIBUTES { 185 | ULONG Length; 186 | HANDLE RootDirectory; 187 | PUNICODE_STRING ObjectName; 188 | ULONG Attributes; 189 | PVOID SecurityDescriptor; 190 | PVOID SecurityQualityOfService; 191 | } OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES; 192 | 193 | #define RTL_CONSTANT_OBJECT_ATTRIBUTES(ObjectName, Attributes) \ 194 | { sizeof(OBJECT_ATTRIBUTES), NULL, ObjectName, Attributes, NULL, NULL } 195 | 196 | #ifndef FILE_OPEN 197 | #define FILE_OPEN 0x00000001UL 198 | #endif 199 | 200 | #define KEYEDEVENT_WAIT 0x00000001UL 201 | #define KEYEDEVENT_WAKE 0x00000002UL 202 | #define KEYEDEVENT_ALL_ACCESS \ 203 | (STANDARD_RIGHTS_REQUIRED | KEYEDEVENT_WAIT | KEYEDEVENT_WAKE) 204 | 205 | #define NT_NTDLL_IMPORT_LIST(X) \ 206 | X(NTSTATUS, \ 207 | NTAPI, \ 208 | NtCancelIoFileEx, \ 209 | (HANDLE FileHandle, \ 210 | PIO_STATUS_BLOCK IoRequestToCancel, \ 211 | PIO_STATUS_BLOCK IoStatusBlock)) \ 212 | \ 213 | X(NTSTATUS, \ 214 | NTAPI, \ 215 | NtCreateFile, \ 216 | (PHANDLE FileHandle, \ 217 | ACCESS_MASK DesiredAccess, \ 218 | POBJECT_ATTRIBUTES ObjectAttributes, \ 219 | PIO_STATUS_BLOCK IoStatusBlock, \ 220 | PLARGE_INTEGER AllocationSize, \ 221 | ULONG FileAttributes, \ 222 | ULONG ShareAccess, \ 223 | ULONG CreateDisposition, \ 224 | ULONG CreateOptions, \ 225 | PVOID EaBuffer, \ 226 | ULONG EaLength)) \ 227 | \ 228 | X(NTSTATUS, \ 229 | NTAPI, \ 230 | NtCreateKeyedEvent, \ 231 | (PHANDLE KeyedEventHandle, \ 232 | ACCESS_MASK DesiredAccess, \ 233 | POBJECT_ATTRIBUTES ObjectAttributes, \ 234 | ULONG Flags)) \ 235 | \ 236 | X(NTSTATUS, \ 237 | NTAPI, \ 238 | NtDeviceIoControlFile, \ 239 | (HANDLE FileHandle, \ 240 | HANDLE Event, \ 241 | PIO_APC_ROUTINE ApcRoutine, \ 242 | PVOID ApcContext, \ 243 | PIO_STATUS_BLOCK IoStatusBlock, \ 244 | ULONG IoControlCode, \ 245 | PVOID InputBuffer, \ 246 | ULONG InputBufferLength, \ 247 | PVOID OutputBuffer, \ 248 | ULONG OutputBufferLength)) \ 249 | \ 250 | X(NTSTATUS, \ 251 | NTAPI, \ 252 | NtReleaseKeyedEvent, \ 253 | (HANDLE KeyedEventHandle, \ 254 | PVOID KeyValue, \ 255 | BOOLEAN Alertable, \ 256 | PLARGE_INTEGER Timeout)) \ 257 | \ 258 | X(NTSTATUS, \ 259 | NTAPI, \ 260 | NtWaitForKeyedEvent, \ 261 | (HANDLE KeyedEventHandle, \ 262 | PVOID KeyValue, \ 263 | BOOLEAN Alertable, \ 264 | PLARGE_INTEGER Timeout)) \ 265 | \ 266 | X(ULONG, WINAPI, RtlNtStatusToDosError, (NTSTATUS Status)) 267 | 268 | #define X(return_type, attributes, name, parameters) \ 269 | WEPOLL_INTERNAL_EXTERN return_type(attributes* name) parameters; 270 | NT_NTDLL_IMPORT_LIST(X) 271 | #undef X 272 | 273 | #define AFD_POLL_RECEIVE 0x0001 274 | #define AFD_POLL_RECEIVE_EXPEDITED 0x0002 275 | #define AFD_POLL_SEND 0x0004 276 | #define AFD_POLL_DISCONNECT 0x0008 277 | #define AFD_POLL_ABORT 0x0010 278 | #define AFD_POLL_LOCAL_CLOSE 0x0020 279 | #define AFD_POLL_ACCEPT 0x0080 280 | #define AFD_POLL_CONNECT_FAIL 0x0100 281 | 282 | typedef struct _AFD_POLL_HANDLE_INFO { 283 | HANDLE Handle; 284 | ULONG Events; 285 | NTSTATUS Status; 286 | } AFD_POLL_HANDLE_INFO, *PAFD_POLL_HANDLE_INFO; 287 | 288 | typedef struct _AFD_POLL_INFO { 289 | LARGE_INTEGER Timeout; 290 | ULONG NumberOfHandles; 291 | ULONG Exclusive; 292 | AFD_POLL_HANDLE_INFO Handles[1]; 293 | } AFD_POLL_INFO, *PAFD_POLL_INFO; 294 | 295 | WEPOLL_INTERNAL int afd_create_device_handle(HANDLE iocp_handle, 296 | HANDLE* afd_device_handle_out); 297 | 298 | WEPOLL_INTERNAL int afd_poll(HANDLE afd_device_handle, 299 | AFD_POLL_INFO* poll_info, 300 | IO_STATUS_BLOCK* io_status_block); 301 | WEPOLL_INTERNAL int afd_cancel_poll(HANDLE afd_device_handle, 302 | IO_STATUS_BLOCK* io_status_block); 303 | 304 | #define return_map_error(value) \ 305 | do { \ 306 | err_map_win_error(); \ 307 | return (value); \ 308 | } while (0) 309 | 310 | #define return_set_error(value, error) \ 311 | do { \ 312 | err_set_win_error(error); \ 313 | return (value); \ 314 | } while (0) 315 | 316 | WEPOLL_INTERNAL void err_map_win_error(void); 317 | WEPOLL_INTERNAL void err_set_win_error(DWORD error); 318 | WEPOLL_INTERNAL int err_check_handle(HANDLE handle); 319 | 320 | #define IOCTL_AFD_POLL 0x00012024 321 | 322 | static UNICODE_STRING afd__device_name = 323 | RTL_CONSTANT_STRING(L"\\Device\\Afd\\Wepoll"); 324 | 325 | static OBJECT_ATTRIBUTES afd__device_attributes = 326 | RTL_CONSTANT_OBJECT_ATTRIBUTES(&afd__device_name, 0); 327 | 328 | int afd_create_device_handle(HANDLE iocp_handle, 329 | HANDLE* afd_device_handle_out) { 330 | HANDLE afd_device_handle; 331 | IO_STATUS_BLOCK iosb; 332 | NTSTATUS status; 333 | 334 | /* By opening \Device\Afd without specifying any extended attributes, we'll 335 | * get a handle that lets us talk to the AFD driver, but that doesn't have an 336 | * associated endpoint (so it's not a socket). */ 337 | status = NtCreateFile(&afd_device_handle, 338 | SYNCHRONIZE, 339 | &afd__device_attributes, 340 | &iosb, 341 | NULL, 342 | 0, 343 | FILE_SHARE_READ | FILE_SHARE_WRITE, 344 | FILE_OPEN, 345 | 0, 346 | NULL, 347 | 0); 348 | if (status != STATUS_SUCCESS) 349 | return_set_error(-1, RtlNtStatusToDosError(status)); 350 | 351 | if (CreateIoCompletionPort(afd_device_handle, iocp_handle, 0, 0) == NULL) 352 | goto error; 353 | 354 | if (!SetFileCompletionNotificationModes(afd_device_handle, 355 | FILE_SKIP_SET_EVENT_ON_HANDLE)) 356 | goto error; 357 | 358 | *afd_device_handle_out = afd_device_handle; 359 | return 0; 360 | 361 | error: 362 | CloseHandle(afd_device_handle); 363 | return_map_error(-1); 364 | } 365 | 366 | int afd_poll(HANDLE afd_device_handle, 367 | AFD_POLL_INFO* poll_info, 368 | IO_STATUS_BLOCK* io_status_block) { 369 | NTSTATUS status; 370 | 371 | /* Blocking operation is not supported. */ 372 | assert(io_status_block != NULL); 373 | 374 | io_status_block->Status = STATUS_PENDING; 375 | status = NtDeviceIoControlFile(afd_device_handle, 376 | NULL, 377 | NULL, 378 | io_status_block, 379 | io_status_block, 380 | IOCTL_AFD_POLL, 381 | poll_info, 382 | sizeof *poll_info, 383 | poll_info, 384 | sizeof *poll_info); 385 | 386 | if (status == STATUS_SUCCESS) 387 | return 0; 388 | else if (status == STATUS_PENDING) 389 | return_set_error(-1, ERROR_IO_PENDING); 390 | else 391 | return_set_error(-1, RtlNtStatusToDosError(status)); 392 | } 393 | 394 | int afd_cancel_poll(HANDLE afd_device_handle, 395 | IO_STATUS_BLOCK* io_status_block) { 396 | NTSTATUS cancel_status; 397 | IO_STATUS_BLOCK cancel_iosb; 398 | 399 | /* If the poll operation has already completed or has been cancelled earlier, 400 | * there's nothing left for us to do. */ 401 | if (io_status_block->Status != STATUS_PENDING) 402 | return 0; 403 | 404 | cancel_status = 405 | NtCancelIoFileEx(afd_device_handle, io_status_block, &cancel_iosb); 406 | 407 | /* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed 408 | * just before calling NtCancelIoFileEx(). This is not an error. */ 409 | if (cancel_status == STATUS_SUCCESS || cancel_status == STATUS_NOT_FOUND) 410 | return 0; 411 | else 412 | return_set_error(-1, RtlNtStatusToDosError(cancel_status)); 413 | } 414 | 415 | WEPOLL_INTERNAL int epoll_global_init(void); 416 | 417 | WEPOLL_INTERNAL int init(void); 418 | 419 | typedef struct port_state port_state_t; 420 | typedef struct queue queue_t; 421 | typedef struct sock_state sock_state_t; 422 | typedef struct ts_tree_node ts_tree_node_t; 423 | 424 | WEPOLL_INTERNAL port_state_t* port_new(HANDLE* iocp_handle_out); 425 | WEPOLL_INTERNAL int port_close(port_state_t* port_state); 426 | WEPOLL_INTERNAL int port_delete(port_state_t* port_state); 427 | 428 | WEPOLL_INTERNAL int port_wait(port_state_t* port_state, 429 | struct epoll_event* events, 430 | int maxevents, 431 | int timeout); 432 | 433 | WEPOLL_INTERNAL int port_ctl(port_state_t* port_state, 434 | int op, 435 | SOCKET sock, 436 | struct epoll_event* ev); 437 | 438 | WEPOLL_INTERNAL int port_register_socket(port_state_t* port_state, 439 | sock_state_t* sock_state, 440 | SOCKET socket); 441 | WEPOLL_INTERNAL void port_unregister_socket(port_state_t* port_state, 442 | sock_state_t* sock_state); 443 | WEPOLL_INTERNAL sock_state_t* port_find_socket(port_state_t* port_state, 444 | SOCKET socket); 445 | 446 | WEPOLL_INTERNAL void port_request_socket_update(port_state_t* port_state, 447 | sock_state_t* sock_state); 448 | WEPOLL_INTERNAL void port_cancel_socket_update(port_state_t* port_state, 449 | sock_state_t* sock_state); 450 | 451 | WEPOLL_INTERNAL void port_add_deleted_socket(port_state_t* port_state, 452 | sock_state_t* sock_state); 453 | WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t* port_state, 454 | sock_state_t* sock_state); 455 | 456 | WEPOLL_INTERNAL HANDLE port_get_iocp_handle(port_state_t* port_state); 457 | WEPOLL_INTERNAL queue_t* port_get_poll_group_queue(port_state_t* port_state); 458 | 459 | WEPOLL_INTERNAL port_state_t* port_state_from_handle_tree_node( 460 | ts_tree_node_t* tree_node); 461 | WEPOLL_INTERNAL ts_tree_node_t* port_state_to_handle_tree_node( 462 | port_state_t* port_state); 463 | 464 | /* The reflock is a special kind of lock that normally prevents a chunk of 465 | * memory from being freed, but does allow the chunk of memory to eventually be 466 | * released in a coordinated fashion. 467 | * 468 | * Under normal operation, threads increase and decrease the reference count, 469 | * which are wait-free operations. 470 | * 471 | * Exactly once during the reflock's lifecycle, a thread holding a reference to 472 | * the lock may "destroy" the lock; this operation blocks until all other 473 | * threads holding a reference to the lock have dereferenced it. After 474 | * "destroy" returns, the calling thread may assume that no other threads have 475 | * a reference to the lock. 476 | * 477 | * Attemmpting to lock or destroy a lock after reflock_unref_and_destroy() has 478 | * been called is invalid and results in undefined behavior. Therefore the user 479 | * should use another lock to guarantee that this can't happen. 480 | */ 481 | 482 | typedef struct reflock { 483 | volatile long state; /* 32-bit Interlocked APIs operate on `long` values. */ 484 | } reflock_t; 485 | 486 | WEPOLL_INTERNAL int reflock_global_init(void); 487 | 488 | WEPOLL_INTERNAL void reflock_init(reflock_t* reflock); 489 | WEPOLL_INTERNAL void reflock_ref(reflock_t* reflock); 490 | WEPOLL_INTERNAL void reflock_unref(reflock_t* reflock); 491 | WEPOLL_INTERNAL void reflock_unref_and_destroy(reflock_t* reflock); 492 | 493 | #include 494 | 495 | /* N.b.: the tree functions do not set errno or LastError when they fail. Each 496 | * of the API functions has at most one failure mode. It is up to the caller to 497 | * set an appropriate error code when necessary. */ 498 | 499 | typedef struct tree tree_t; 500 | typedef struct tree_node tree_node_t; 501 | 502 | typedef struct tree { 503 | tree_node_t* root; 504 | } tree_t; 505 | 506 | typedef struct tree_node { 507 | tree_node_t* left; 508 | tree_node_t* right; 509 | tree_node_t* parent; 510 | uintptr_t key; 511 | bool red; 512 | } tree_node_t; 513 | 514 | WEPOLL_INTERNAL void tree_init(tree_t* tree); 515 | WEPOLL_INTERNAL void tree_node_init(tree_node_t* node); 516 | 517 | WEPOLL_INTERNAL int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key); 518 | WEPOLL_INTERNAL void tree_del(tree_t* tree, tree_node_t* node); 519 | 520 | WEPOLL_INTERNAL tree_node_t* tree_find(const tree_t* tree, uintptr_t key); 521 | WEPOLL_INTERNAL tree_node_t* tree_root(const tree_t* tree); 522 | 523 | typedef struct ts_tree { 524 | tree_t tree; 525 | SRWLOCK lock; 526 | } ts_tree_t; 527 | 528 | typedef struct ts_tree_node { 529 | tree_node_t tree_node; 530 | reflock_t reflock; 531 | } ts_tree_node_t; 532 | 533 | WEPOLL_INTERNAL void ts_tree_init(ts_tree_t* rtl); 534 | WEPOLL_INTERNAL void ts_tree_node_init(ts_tree_node_t* node); 535 | 536 | WEPOLL_INTERNAL int ts_tree_add(ts_tree_t* ts_tree, 537 | ts_tree_node_t* node, 538 | uintptr_t key); 539 | 540 | WEPOLL_INTERNAL ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, 541 | uintptr_t key); 542 | WEPOLL_INTERNAL ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, 543 | uintptr_t key); 544 | 545 | WEPOLL_INTERNAL void ts_tree_node_unref(ts_tree_node_t* node); 546 | WEPOLL_INTERNAL void ts_tree_node_unref_and_destroy(ts_tree_node_t* node); 547 | 548 | static ts_tree_t epoll__handle_tree; 549 | 550 | int epoll_global_init(void) { 551 | ts_tree_init(&epoll__handle_tree); 552 | return 0; 553 | } 554 | 555 | static HANDLE epoll__create(void) { 556 | port_state_t* port_state; 557 | HANDLE ephnd; 558 | ts_tree_node_t* tree_node; 559 | 560 | if (init() < 0) 561 | return NULL; 562 | 563 | port_state = port_new(&ephnd); 564 | if (port_state == NULL) 565 | return NULL; 566 | 567 | tree_node = port_state_to_handle_tree_node(port_state); 568 | if (ts_tree_add(&epoll__handle_tree, tree_node, (uintptr_t) ephnd) < 0) { 569 | /* This should never happen. */ 570 | port_delete(port_state); 571 | return_set_error(NULL, ERROR_ALREADY_EXISTS); 572 | } 573 | 574 | return ephnd; 575 | } 576 | 577 | HANDLE epoll_create(int size) { 578 | if (size <= 0) 579 | return_set_error(NULL, ERROR_INVALID_PARAMETER); 580 | 581 | return epoll__create(); 582 | } 583 | 584 | HANDLE epoll_create1(int flags) { 585 | if (flags != 0) 586 | return_set_error(NULL, ERROR_INVALID_PARAMETER); 587 | 588 | return epoll__create(); 589 | } 590 | 591 | int epoll_close(HANDLE ephnd) { 592 | ts_tree_node_t* tree_node; 593 | port_state_t* port_state; 594 | 595 | if (init() < 0) 596 | return -1; 597 | 598 | tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); 599 | if (tree_node == NULL) { 600 | err_set_win_error(ERROR_INVALID_PARAMETER); 601 | goto err; 602 | } 603 | 604 | port_state = port_state_from_handle_tree_node(tree_node); 605 | port_close(port_state); 606 | 607 | ts_tree_node_unref_and_destroy(tree_node); 608 | 609 | return port_delete(port_state); 610 | 611 | err: 612 | err_check_handle(ephnd); 613 | return -1; 614 | } 615 | 616 | int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event* ev) { 617 | ts_tree_node_t* tree_node; 618 | port_state_t* port_state; 619 | int r; 620 | 621 | if (init() < 0) 622 | return -1; 623 | 624 | tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); 625 | if (tree_node == NULL) { 626 | err_set_win_error(ERROR_INVALID_PARAMETER); 627 | goto err; 628 | } 629 | 630 | port_state = port_state_from_handle_tree_node(tree_node); 631 | r = port_ctl(port_state, op, sock, ev); 632 | 633 | ts_tree_node_unref(tree_node); 634 | 635 | if (r < 0) 636 | goto err; 637 | 638 | return 0; 639 | 640 | err: 641 | /* On Linux, in the case of epoll_ctl(), EBADF takes priority over other 642 | * errors. Wepoll mimics this behavior. */ 643 | err_check_handle(ephnd); 644 | err_check_handle((HANDLE) sock); 645 | return -1; 646 | } 647 | 648 | int epoll_wait(HANDLE ephnd, 649 | struct epoll_event* events, 650 | int maxevents, 651 | int timeout) { 652 | ts_tree_node_t* tree_node; 653 | port_state_t* port_state; 654 | int num_events; 655 | 656 | if (maxevents <= 0) 657 | return_set_error(-1, ERROR_INVALID_PARAMETER); 658 | 659 | if (init() < 0) 660 | return -1; 661 | 662 | tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); 663 | if (tree_node == NULL) { 664 | err_set_win_error(ERROR_INVALID_PARAMETER); 665 | goto err; 666 | } 667 | 668 | port_state = port_state_from_handle_tree_node(tree_node); 669 | num_events = port_wait(port_state, events, maxevents, timeout); 670 | 671 | ts_tree_node_unref(tree_node); 672 | 673 | if (num_events < 0) 674 | goto err; 675 | 676 | return num_events; 677 | 678 | err: 679 | err_check_handle(ephnd); 680 | return -1; 681 | } 682 | 683 | #include 684 | 685 | #define ERR__ERRNO_MAPPINGS(X) \ 686 | X(ERROR_ACCESS_DENIED, EACCES) \ 687 | X(ERROR_ALREADY_EXISTS, EEXIST) \ 688 | X(ERROR_BAD_COMMAND, EACCES) \ 689 | X(ERROR_BAD_EXE_FORMAT, ENOEXEC) \ 690 | X(ERROR_BAD_LENGTH, EACCES) \ 691 | X(ERROR_BAD_NETPATH, ENOENT) \ 692 | X(ERROR_BAD_NET_NAME, ENOENT) \ 693 | X(ERROR_BAD_NET_RESP, ENETDOWN) \ 694 | X(ERROR_BAD_PATHNAME, ENOENT) \ 695 | X(ERROR_BROKEN_PIPE, EPIPE) \ 696 | X(ERROR_CANNOT_MAKE, EACCES) \ 697 | X(ERROR_COMMITMENT_LIMIT, ENOMEM) \ 698 | X(ERROR_CONNECTION_ABORTED, ECONNABORTED) \ 699 | X(ERROR_CONNECTION_ACTIVE, EISCONN) \ 700 | X(ERROR_CONNECTION_REFUSED, ECONNREFUSED) \ 701 | X(ERROR_CRC, EACCES) \ 702 | X(ERROR_DIR_NOT_EMPTY, ENOTEMPTY) \ 703 | X(ERROR_DISK_FULL, ENOSPC) \ 704 | X(ERROR_DUP_NAME, EADDRINUSE) \ 705 | X(ERROR_FILENAME_EXCED_RANGE, ENOENT) \ 706 | X(ERROR_FILE_NOT_FOUND, ENOENT) \ 707 | X(ERROR_GEN_FAILURE, EACCES) \ 708 | X(ERROR_GRACEFUL_DISCONNECT, EPIPE) \ 709 | X(ERROR_HOST_DOWN, EHOSTUNREACH) \ 710 | X(ERROR_HOST_UNREACHABLE, EHOSTUNREACH) \ 711 | X(ERROR_INSUFFICIENT_BUFFER, EFAULT) \ 712 | X(ERROR_INVALID_ADDRESS, EADDRNOTAVAIL) \ 713 | X(ERROR_INVALID_FUNCTION, EINVAL) \ 714 | X(ERROR_INVALID_HANDLE, EBADF) \ 715 | X(ERROR_INVALID_NETNAME, EADDRNOTAVAIL) \ 716 | X(ERROR_INVALID_PARAMETER, EINVAL) \ 717 | X(ERROR_INVALID_USER_BUFFER, EMSGSIZE) \ 718 | X(ERROR_IO_PENDING, EINPROGRESS) \ 719 | X(ERROR_LOCK_VIOLATION, EACCES) \ 720 | X(ERROR_MORE_DATA, EMSGSIZE) \ 721 | X(ERROR_NETNAME_DELETED, ECONNABORTED) \ 722 | X(ERROR_NETWORK_ACCESS_DENIED, EACCES) \ 723 | X(ERROR_NETWORK_BUSY, ENETDOWN) \ 724 | X(ERROR_NETWORK_UNREACHABLE, ENETUNREACH) \ 725 | X(ERROR_NOACCESS, EFAULT) \ 726 | X(ERROR_NONPAGED_SYSTEM_RESOURCES, ENOMEM) \ 727 | X(ERROR_NOT_ENOUGH_MEMORY, ENOMEM) \ 728 | X(ERROR_NOT_ENOUGH_QUOTA, ENOMEM) \ 729 | X(ERROR_NOT_FOUND, ENOENT) \ 730 | X(ERROR_NOT_LOCKED, EACCES) \ 731 | X(ERROR_NOT_READY, EACCES) \ 732 | X(ERROR_NOT_SAME_DEVICE, EXDEV) \ 733 | X(ERROR_NOT_SUPPORTED, ENOTSUP) \ 734 | X(ERROR_NO_MORE_FILES, ENOENT) \ 735 | X(ERROR_NO_SYSTEM_RESOURCES, ENOMEM) \ 736 | X(ERROR_OPERATION_ABORTED, EINTR) \ 737 | X(ERROR_OUT_OF_PAPER, EACCES) \ 738 | X(ERROR_PAGED_SYSTEM_RESOURCES, ENOMEM) \ 739 | X(ERROR_PAGEFILE_QUOTA, ENOMEM) \ 740 | X(ERROR_PATH_NOT_FOUND, ENOENT) \ 741 | X(ERROR_PIPE_NOT_CONNECTED, EPIPE) \ 742 | X(ERROR_PORT_UNREACHABLE, ECONNRESET) \ 743 | X(ERROR_PROTOCOL_UNREACHABLE, ENETUNREACH) \ 744 | X(ERROR_REM_NOT_LIST, ECONNREFUSED) \ 745 | X(ERROR_REQUEST_ABORTED, EINTR) \ 746 | X(ERROR_REQ_NOT_ACCEP, EWOULDBLOCK) \ 747 | X(ERROR_SECTOR_NOT_FOUND, EACCES) \ 748 | X(ERROR_SEM_TIMEOUT, ETIMEDOUT) \ 749 | X(ERROR_SHARING_VIOLATION, EACCES) \ 750 | X(ERROR_TOO_MANY_NAMES, ENOMEM) \ 751 | X(ERROR_TOO_MANY_OPEN_FILES, EMFILE) \ 752 | X(ERROR_UNEXP_NET_ERR, ECONNABORTED) \ 753 | X(ERROR_WAIT_NO_CHILDREN, ECHILD) \ 754 | X(ERROR_WORKING_SET_QUOTA, ENOMEM) \ 755 | X(ERROR_WRITE_PROTECT, EACCES) \ 756 | X(ERROR_WRONG_DISK, EACCES) \ 757 | X(WSAEACCES, EACCES) \ 758 | X(WSAEADDRINUSE, EADDRINUSE) \ 759 | X(WSAEADDRNOTAVAIL, EADDRNOTAVAIL) \ 760 | X(WSAEAFNOSUPPORT, EAFNOSUPPORT) \ 761 | X(WSAECONNABORTED, ECONNABORTED) \ 762 | X(WSAECONNREFUSED, ECONNREFUSED) \ 763 | X(WSAECONNRESET, ECONNRESET) \ 764 | X(WSAEDISCON, EPIPE) \ 765 | X(WSAEFAULT, EFAULT) \ 766 | X(WSAEHOSTDOWN, EHOSTUNREACH) \ 767 | X(WSAEHOSTUNREACH, EHOSTUNREACH) \ 768 | X(WSAEINPROGRESS, EBUSY) \ 769 | X(WSAEINTR, EINTR) \ 770 | X(WSAEINVAL, EINVAL) \ 771 | X(WSAEISCONN, EISCONN) \ 772 | X(WSAEMSGSIZE, EMSGSIZE) \ 773 | X(WSAENETDOWN, ENETDOWN) \ 774 | X(WSAENETRESET, EHOSTUNREACH) \ 775 | X(WSAENETUNREACH, ENETUNREACH) \ 776 | X(WSAENOBUFS, ENOMEM) \ 777 | X(WSAENOTCONN, ENOTCONN) \ 778 | X(WSAENOTSOCK, ENOTSOCK) \ 779 | X(WSAEOPNOTSUPP, EOPNOTSUPP) \ 780 | X(WSAEPROCLIM, ENOMEM) \ 781 | X(WSAESHUTDOWN, EPIPE) \ 782 | X(WSAETIMEDOUT, ETIMEDOUT) \ 783 | X(WSAEWOULDBLOCK, EWOULDBLOCK) \ 784 | X(WSANOTINITIALISED, ENETDOWN) \ 785 | X(WSASYSNOTREADY, ENETDOWN) \ 786 | X(WSAVERNOTSUPPORTED, ENOSYS) 787 | 788 | static errno_t err__map_win_error_to_errno(DWORD error) { 789 | switch (error) { 790 | #define X(error_sym, errno_sym) \ 791 | case error_sym: \ 792 | return errno_sym; 793 | ERR__ERRNO_MAPPINGS(X) 794 | #undef X 795 | } 796 | return EINVAL; 797 | } 798 | 799 | void err_map_win_error(void) { 800 | errno = err__map_win_error_to_errno(GetLastError()); 801 | } 802 | 803 | void err_set_win_error(DWORD error) { 804 | SetLastError(error); 805 | errno = err__map_win_error_to_errno(error); 806 | } 807 | 808 | int err_check_handle(HANDLE handle) { 809 | DWORD flags; 810 | 811 | /* GetHandleInformation() succeeds when passed INVALID_HANDLE_VALUE, so check 812 | * for this condition explicitly. */ 813 | if (handle == INVALID_HANDLE_VALUE) 814 | return_set_error(-1, ERROR_INVALID_HANDLE); 815 | 816 | if (!GetHandleInformation(handle, &flags)) 817 | return_map_error(-1); 818 | 819 | return 0; 820 | } 821 | 822 | #include 823 | 824 | #define array_count(a) (sizeof(a) / (sizeof((a)[0]))) 825 | 826 | #define container_of(ptr, type, member) \ 827 | ((type*) ((uintptr_t) (ptr) - offsetof(type, member))) 828 | 829 | #define unused_var(v) ((void) (v)) 830 | 831 | /* Polyfill `inline` for older versions of msvc (up to Visual Studio 2013) */ 832 | #if defined(_MSC_VER) && _MSC_VER < 1900 833 | #define inline __inline 834 | #endif 835 | 836 | WEPOLL_INTERNAL int ws_global_init(void); 837 | WEPOLL_INTERNAL SOCKET ws_get_base_socket(SOCKET socket); 838 | 839 | static bool init__done = false; 840 | static INIT_ONCE init__once = INIT_ONCE_STATIC_INIT; 841 | 842 | static BOOL CALLBACK init__once_callback(INIT_ONCE* once, 843 | void* parameter, 844 | void** context) { 845 | unused_var(once); 846 | unused_var(parameter); 847 | unused_var(context); 848 | 849 | /* N.b. that initialization order matters here. */ 850 | if (ws_global_init() < 0 || nt_global_init() < 0 || 851 | reflock_global_init() < 0 || epoll_global_init() < 0) 852 | return FALSE; 853 | 854 | init__done = true; 855 | return TRUE; 856 | } 857 | 858 | int init(void) { 859 | if (!init__done && 860 | !InitOnceExecuteOnce(&init__once, init__once_callback, NULL, NULL)) 861 | /* `InitOnceExecuteOnce()` itself is infallible, and it doesn't set any 862 | * error code when the once-callback returns FALSE. We return -1 here to 863 | * indicate that global initialization failed; the failing init function is 864 | * resposible for setting `errno` and calling `SetLastError()`. */ 865 | return -1; 866 | 867 | return 0; 868 | } 869 | 870 | /* Set up a workaround for the following problem: 871 | * FARPROC addr = GetProcAddress(...); 872 | * MY_FUNC func = (MY_FUNC) addr; <-- GCC 8 warning/error. 873 | * MY_FUNC func = (MY_FUNC) (void*) addr; <-- MSVC warning/error. 874 | * To compile cleanly with either compiler, do casts with this "bridge" type: 875 | * MY_FUNC func = (MY_FUNC) (nt__fn_ptr_cast_t) addr; */ 876 | #ifdef __GNUC__ 877 | typedef void* nt__fn_ptr_cast_t; 878 | #else 879 | typedef FARPROC nt__fn_ptr_cast_t; 880 | #endif 881 | 882 | #define X(return_type, attributes, name, parameters) \ 883 | WEPOLL_INTERNAL return_type(attributes* name) parameters = NULL; 884 | NT_NTDLL_IMPORT_LIST(X) 885 | #undef X 886 | 887 | int nt_global_init(void) { 888 | HMODULE ntdll; 889 | FARPROC fn_ptr; 890 | 891 | ntdll = GetModuleHandleW(L"ntdll.dll"); 892 | if (ntdll == NULL) 893 | return -1; 894 | 895 | #define X(return_type, attributes, name, parameters) \ 896 | fn_ptr = GetProcAddress(ntdll, #name); \ 897 | if (fn_ptr == NULL) \ 898 | return -1; \ 899 | name = (return_type(attributes*) parameters)(nt__fn_ptr_cast_t) fn_ptr; 900 | NT_NTDLL_IMPORT_LIST(X) 901 | #undef X 902 | 903 | return 0; 904 | } 905 | 906 | #include 907 | 908 | typedef struct poll_group poll_group_t; 909 | 910 | typedef struct queue_node queue_node_t; 911 | 912 | WEPOLL_INTERNAL poll_group_t* poll_group_acquire(port_state_t* port); 913 | WEPOLL_INTERNAL void poll_group_release(poll_group_t* poll_group); 914 | 915 | WEPOLL_INTERNAL void poll_group_delete(poll_group_t* poll_group); 916 | 917 | WEPOLL_INTERNAL poll_group_t* poll_group_from_queue_node( 918 | queue_node_t* queue_node); 919 | WEPOLL_INTERNAL HANDLE 920 | poll_group_get_afd_device_handle(poll_group_t* poll_group); 921 | 922 | typedef struct queue_node { 923 | queue_node_t* prev; 924 | queue_node_t* next; 925 | } queue_node_t; 926 | 927 | typedef struct queue { 928 | queue_node_t head; 929 | } queue_t; 930 | 931 | WEPOLL_INTERNAL void queue_init(queue_t* queue); 932 | WEPOLL_INTERNAL void queue_node_init(queue_node_t* node); 933 | 934 | WEPOLL_INTERNAL queue_node_t* queue_first(const queue_t* queue); 935 | WEPOLL_INTERNAL queue_node_t* queue_last(const queue_t* queue); 936 | 937 | WEPOLL_INTERNAL void queue_prepend(queue_t* queue, queue_node_t* node); 938 | WEPOLL_INTERNAL void queue_append(queue_t* queue, queue_node_t* node); 939 | WEPOLL_INTERNAL void queue_move_to_start(queue_t* queue, queue_node_t* node); 940 | WEPOLL_INTERNAL void queue_move_to_end(queue_t* queue, queue_node_t* node); 941 | WEPOLL_INTERNAL void queue_remove(queue_node_t* node); 942 | 943 | WEPOLL_INTERNAL bool queue_is_empty(const queue_t* queue); 944 | WEPOLL_INTERNAL bool queue_is_enqueued(const queue_node_t* node); 945 | 946 | #define POLL_GROUP__MAX_GROUP_SIZE 32 947 | 948 | typedef struct poll_group { 949 | port_state_t* port_state; 950 | queue_node_t queue_node; 951 | HANDLE afd_device_handle; 952 | size_t group_size; 953 | } poll_group_t; 954 | 955 | static poll_group_t* poll_group__new(port_state_t* port_state) { 956 | HANDLE iocp_handle = port_get_iocp_handle(port_state); 957 | queue_t* poll_group_queue = port_get_poll_group_queue(port_state); 958 | 959 | poll_group_t* poll_group = malloc(sizeof *poll_group); 960 | if (poll_group == NULL) 961 | return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY); 962 | 963 | memset(poll_group, 0, sizeof *poll_group); 964 | 965 | queue_node_init(&poll_group->queue_node); 966 | poll_group->port_state = port_state; 967 | 968 | if (afd_create_device_handle(iocp_handle, &poll_group->afd_device_handle) < 969 | 0) { 970 | free(poll_group); 971 | return NULL; 972 | } 973 | 974 | queue_append(poll_group_queue, &poll_group->queue_node); 975 | 976 | return poll_group; 977 | } 978 | 979 | void poll_group_delete(poll_group_t* poll_group) { 980 | assert(poll_group->group_size == 0); 981 | CloseHandle(poll_group->afd_device_handle); 982 | queue_remove(&poll_group->queue_node); 983 | free(poll_group); 984 | } 985 | 986 | poll_group_t* poll_group_from_queue_node(queue_node_t* queue_node) { 987 | return container_of(queue_node, poll_group_t, queue_node); 988 | } 989 | 990 | HANDLE poll_group_get_afd_device_handle(poll_group_t* poll_group) { 991 | return poll_group->afd_device_handle; 992 | } 993 | 994 | poll_group_t* poll_group_acquire(port_state_t* port_state) { 995 | queue_t* poll_group_queue = port_get_poll_group_queue(port_state); 996 | poll_group_t* poll_group = 997 | !queue_is_empty(poll_group_queue) 998 | ? container_of( 999 | queue_last(poll_group_queue), poll_group_t, queue_node) 1000 | : NULL; 1001 | 1002 | if (poll_group == NULL || 1003 | poll_group->group_size >= POLL_GROUP__MAX_GROUP_SIZE) 1004 | poll_group = poll_group__new(port_state); 1005 | if (poll_group == NULL) 1006 | return NULL; 1007 | 1008 | if (++poll_group->group_size == POLL_GROUP__MAX_GROUP_SIZE) 1009 | queue_move_to_start(poll_group_queue, &poll_group->queue_node); 1010 | 1011 | return poll_group; 1012 | } 1013 | 1014 | void poll_group_release(poll_group_t* poll_group) { 1015 | port_state_t* port_state = poll_group->port_state; 1016 | queue_t* poll_group_queue = port_get_poll_group_queue(port_state); 1017 | 1018 | poll_group->group_size--; 1019 | assert(poll_group->group_size < POLL_GROUP__MAX_GROUP_SIZE); 1020 | 1021 | queue_move_to_end(poll_group_queue, &poll_group->queue_node); 1022 | 1023 | /* Poll groups are currently only freed when the epoll port is closed. */ 1024 | } 1025 | 1026 | WEPOLL_INTERNAL sock_state_t* sock_new(port_state_t* port_state, 1027 | SOCKET socket); 1028 | WEPOLL_INTERNAL void sock_delete(port_state_t* port_state, 1029 | sock_state_t* sock_state); 1030 | WEPOLL_INTERNAL void sock_force_delete(port_state_t* port_state, 1031 | sock_state_t* sock_state); 1032 | 1033 | WEPOLL_INTERNAL int sock_set_event(port_state_t* port_state, 1034 | sock_state_t* sock_state, 1035 | const struct epoll_event* ev); 1036 | 1037 | WEPOLL_INTERNAL int sock_update(port_state_t* port_state, 1038 | sock_state_t* sock_state); 1039 | WEPOLL_INTERNAL int sock_feed_event(port_state_t* port_state, 1040 | IO_STATUS_BLOCK* io_status_block, 1041 | struct epoll_event* ev); 1042 | 1043 | WEPOLL_INTERNAL sock_state_t* sock_state_from_queue_node( 1044 | queue_node_t* queue_node); 1045 | WEPOLL_INTERNAL queue_node_t* sock_state_to_queue_node( 1046 | sock_state_t* sock_state); 1047 | WEPOLL_INTERNAL sock_state_t* sock_state_from_tree_node( 1048 | tree_node_t* tree_node); 1049 | WEPOLL_INTERNAL tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state); 1050 | 1051 | #define PORT__MAX_ON_STACK_COMPLETIONS 256 1052 | 1053 | typedef struct port_state { 1054 | HANDLE iocp_handle; 1055 | tree_t sock_tree; 1056 | queue_t sock_update_queue; 1057 | queue_t sock_deleted_queue; 1058 | queue_t poll_group_queue; 1059 | ts_tree_node_t handle_tree_node; 1060 | CRITICAL_SECTION lock; 1061 | size_t active_poll_count; 1062 | } port_state_t; 1063 | 1064 | static inline port_state_t* port__alloc(void) { 1065 | port_state_t* port_state = malloc(sizeof *port_state); 1066 | if (port_state == NULL) 1067 | return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY); 1068 | 1069 | return port_state; 1070 | } 1071 | 1072 | static inline void port__free(port_state_t* port) { 1073 | assert(port != NULL); 1074 | free(port); 1075 | } 1076 | 1077 | static inline HANDLE port__create_iocp(void) { 1078 | HANDLE iocp_handle = 1079 | CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); 1080 | if (iocp_handle == NULL) 1081 | return_map_error(NULL); 1082 | 1083 | return iocp_handle; 1084 | } 1085 | 1086 | port_state_t* port_new(HANDLE* iocp_handle_out) { 1087 | port_state_t* port_state; 1088 | HANDLE iocp_handle; 1089 | 1090 | port_state = port__alloc(); 1091 | if (port_state == NULL) 1092 | goto err1; 1093 | 1094 | iocp_handle = port__create_iocp(); 1095 | if (iocp_handle == NULL) 1096 | goto err2; 1097 | 1098 | memset(port_state, 0, sizeof *port_state); 1099 | 1100 | port_state->iocp_handle = iocp_handle; 1101 | tree_init(&port_state->sock_tree); 1102 | queue_init(&port_state->sock_update_queue); 1103 | queue_init(&port_state->sock_deleted_queue); 1104 | queue_init(&port_state->poll_group_queue); 1105 | ts_tree_node_init(&port_state->handle_tree_node); 1106 | InitializeCriticalSection(&port_state->lock); 1107 | 1108 | *iocp_handle_out = iocp_handle; 1109 | return port_state; 1110 | 1111 | err2: 1112 | port__free(port_state); 1113 | err1: 1114 | return NULL; 1115 | } 1116 | 1117 | static inline int port__close_iocp(port_state_t* port_state) { 1118 | HANDLE iocp_handle = port_state->iocp_handle; 1119 | port_state->iocp_handle = NULL; 1120 | 1121 | if (!CloseHandle(iocp_handle)) 1122 | return_map_error(-1); 1123 | 1124 | return 0; 1125 | } 1126 | 1127 | int port_close(port_state_t* port_state) { 1128 | int result; 1129 | 1130 | EnterCriticalSection(&port_state->lock); 1131 | result = port__close_iocp(port_state); 1132 | LeaveCriticalSection(&port_state->lock); 1133 | 1134 | return result; 1135 | } 1136 | 1137 | int port_delete(port_state_t* port_state) { 1138 | tree_node_t* tree_node; 1139 | queue_node_t* queue_node; 1140 | 1141 | /* At this point the IOCP port should have been closed. */ 1142 | assert(port_state->iocp_handle == NULL); 1143 | 1144 | while ((tree_node = tree_root(&port_state->sock_tree)) != NULL) { 1145 | sock_state_t* sock_state = sock_state_from_tree_node(tree_node); 1146 | sock_force_delete(port_state, sock_state); 1147 | } 1148 | 1149 | while ((queue_node = queue_first(&port_state->sock_deleted_queue)) != NULL) { 1150 | sock_state_t* sock_state = sock_state_from_queue_node(queue_node); 1151 | sock_force_delete(port_state, sock_state); 1152 | } 1153 | 1154 | while ((queue_node = queue_first(&port_state->poll_group_queue)) != NULL) { 1155 | poll_group_t* poll_group = poll_group_from_queue_node(queue_node); 1156 | poll_group_delete(poll_group); 1157 | } 1158 | 1159 | assert(queue_is_empty(&port_state->sock_update_queue)); 1160 | 1161 | DeleteCriticalSection(&port_state->lock); 1162 | 1163 | port__free(port_state); 1164 | 1165 | return 0; 1166 | } 1167 | 1168 | static int port__update_events(port_state_t* port_state) { 1169 | queue_t* sock_update_queue = &port_state->sock_update_queue; 1170 | 1171 | /* Walk the queue, submitting new poll requests for every socket that needs 1172 | * it. */ 1173 | while (!queue_is_empty(sock_update_queue)) { 1174 | queue_node_t* queue_node = queue_first(sock_update_queue); 1175 | sock_state_t* sock_state = sock_state_from_queue_node(queue_node); 1176 | 1177 | if (sock_update(port_state, sock_state) < 0) 1178 | return -1; 1179 | 1180 | /* sock_update() removes the socket from the update queue. */ 1181 | } 1182 | 1183 | return 0; 1184 | } 1185 | 1186 | static inline void port__update_events_if_polling(port_state_t* port_state) { 1187 | if (port_state->active_poll_count > 0) 1188 | port__update_events(port_state); 1189 | } 1190 | 1191 | static inline int port__feed_events(port_state_t* port_state, 1192 | struct epoll_event* epoll_events, 1193 | OVERLAPPED_ENTRY* iocp_events, 1194 | DWORD iocp_event_count) { 1195 | int epoll_event_count = 0; 1196 | DWORD i; 1197 | 1198 | for (i = 0; i < iocp_event_count; i++) { 1199 | IO_STATUS_BLOCK* io_status_block = 1200 | (IO_STATUS_BLOCK*) iocp_events[i].lpOverlapped; 1201 | struct epoll_event* ev = &epoll_events[epoll_event_count]; 1202 | 1203 | epoll_event_count += sock_feed_event(port_state, io_status_block, ev); 1204 | } 1205 | 1206 | return epoll_event_count; 1207 | } 1208 | 1209 | static inline int port__poll(port_state_t* port_state, 1210 | struct epoll_event* epoll_events, 1211 | OVERLAPPED_ENTRY* iocp_events, 1212 | DWORD maxevents, 1213 | DWORD timeout) { 1214 | DWORD completion_count; 1215 | 1216 | if (port__update_events(port_state) < 0) 1217 | return -1; 1218 | 1219 | port_state->active_poll_count++; 1220 | 1221 | LeaveCriticalSection(&port_state->lock); 1222 | 1223 | BOOL r = GetQueuedCompletionStatusEx(port_state->iocp_handle, 1224 | iocp_events, 1225 | maxevents, 1226 | &completion_count, 1227 | timeout, 1228 | FALSE); 1229 | 1230 | EnterCriticalSection(&port_state->lock); 1231 | 1232 | port_state->active_poll_count--; 1233 | 1234 | if (!r) 1235 | return_map_error(-1); 1236 | 1237 | return port__feed_events( 1238 | port_state, epoll_events, iocp_events, completion_count); 1239 | } 1240 | 1241 | int port_wait(port_state_t* port_state, 1242 | struct epoll_event* events, 1243 | int maxevents, 1244 | int timeout) { 1245 | OVERLAPPED_ENTRY stack_iocp_events[PORT__MAX_ON_STACK_COMPLETIONS]; 1246 | OVERLAPPED_ENTRY* iocp_events; 1247 | uint64_t due = 0; 1248 | DWORD gqcs_timeout; 1249 | int result; 1250 | 1251 | /* Check whether `maxevents` is in range. */ 1252 | if (maxevents <= 0) 1253 | return_set_error(-1, ERROR_INVALID_PARAMETER); 1254 | 1255 | /* Decide whether the IOCP completion list can live on the stack, or allocate 1256 | * memory for it on the heap. */ 1257 | if ((size_t) maxevents <= array_count(stack_iocp_events)) { 1258 | iocp_events = stack_iocp_events; 1259 | } else if ((iocp_events = 1260 | malloc((size_t) maxevents * sizeof *iocp_events)) == NULL) { 1261 | iocp_events = stack_iocp_events; 1262 | maxevents = array_count(stack_iocp_events); 1263 | } 1264 | 1265 | /* Compute the timeout for GetQueuedCompletionStatus, and the wait end 1266 | * time, if the user specified a timeout other than zero or infinite. */ 1267 | if (timeout > 0) { 1268 | due = GetTickCount64() + (uint64_t) timeout; 1269 | gqcs_timeout = (DWORD) timeout; 1270 | } else if (timeout == 0) { 1271 | gqcs_timeout = 0; 1272 | } else { 1273 | gqcs_timeout = INFINITE; 1274 | } 1275 | 1276 | EnterCriticalSection(&port_state->lock); 1277 | 1278 | /* Dequeue completion packets until either at least one interesting event 1279 | * has been discovered, or the timeout is reached. */ 1280 | for (;;) { 1281 | uint64_t now; 1282 | 1283 | result = port__poll( 1284 | port_state, events, iocp_events, (DWORD) maxevents, gqcs_timeout); 1285 | if (result < 0 || result > 0) 1286 | break; /* Result, error, or time-out. */ 1287 | 1288 | if (timeout < 0) 1289 | continue; /* When timeout is negative, never time out. */ 1290 | 1291 | /* Update time. */ 1292 | now = GetTickCount64(); 1293 | 1294 | /* Do not allow the due time to be in the past. */ 1295 | if (now >= due) { 1296 | SetLastError(WAIT_TIMEOUT); 1297 | break; 1298 | } 1299 | 1300 | /* Recompute time-out argument for GetQueuedCompletionStatus. */ 1301 | gqcs_timeout = (DWORD)(due - now); 1302 | } 1303 | 1304 | port__update_events_if_polling(port_state); 1305 | 1306 | LeaveCriticalSection(&port_state->lock); 1307 | 1308 | if (iocp_events != stack_iocp_events) 1309 | free(iocp_events); 1310 | 1311 | if (result >= 0) 1312 | return result; 1313 | else if (GetLastError() == WAIT_TIMEOUT) 1314 | return 0; 1315 | else 1316 | return -1; 1317 | } 1318 | 1319 | static inline int port__ctl_add(port_state_t* port_state, 1320 | SOCKET sock, 1321 | struct epoll_event* ev) { 1322 | sock_state_t* sock_state = sock_new(port_state, sock); 1323 | if (sock_state == NULL) 1324 | return -1; 1325 | 1326 | if (sock_set_event(port_state, sock_state, ev) < 0) { 1327 | sock_delete(port_state, sock_state); 1328 | return -1; 1329 | } 1330 | 1331 | port__update_events_if_polling(port_state); 1332 | 1333 | return 0; 1334 | } 1335 | 1336 | static inline int port__ctl_mod(port_state_t* port_state, 1337 | SOCKET sock, 1338 | struct epoll_event* ev) { 1339 | sock_state_t* sock_state = port_find_socket(port_state, sock); 1340 | if (sock_state == NULL) 1341 | return -1; 1342 | 1343 | if (sock_set_event(port_state, sock_state, ev) < 0) 1344 | return -1; 1345 | 1346 | port__update_events_if_polling(port_state); 1347 | 1348 | return 0; 1349 | } 1350 | 1351 | static inline int port__ctl_del(port_state_t* port_state, SOCKET sock) { 1352 | sock_state_t* sock_state = port_find_socket(port_state, sock); 1353 | if (sock_state == NULL) 1354 | return -1; 1355 | 1356 | sock_delete(port_state, sock_state); 1357 | 1358 | return 0; 1359 | } 1360 | 1361 | static inline int port__ctl_op(port_state_t* port_state, 1362 | int op, 1363 | SOCKET sock, 1364 | struct epoll_event* ev) { 1365 | switch (op) { 1366 | case EPOLL_CTL_ADD: 1367 | return port__ctl_add(port_state, sock, ev); 1368 | case EPOLL_CTL_MOD: 1369 | return port__ctl_mod(port_state, sock, ev); 1370 | case EPOLL_CTL_DEL: 1371 | return port__ctl_del(port_state, sock); 1372 | default: 1373 | return_set_error(-1, ERROR_INVALID_PARAMETER); 1374 | } 1375 | } 1376 | 1377 | int port_ctl(port_state_t* port_state, 1378 | int op, 1379 | SOCKET sock, 1380 | struct epoll_event* ev) { 1381 | int result; 1382 | 1383 | EnterCriticalSection(&port_state->lock); 1384 | result = port__ctl_op(port_state, op, sock, ev); 1385 | LeaveCriticalSection(&port_state->lock); 1386 | 1387 | return result; 1388 | } 1389 | 1390 | int port_register_socket(port_state_t* port_state, 1391 | sock_state_t* sock_state, 1392 | SOCKET socket) { 1393 | if (tree_add(&port_state->sock_tree, 1394 | sock_state_to_tree_node(sock_state), 1395 | socket) < 0) 1396 | return_set_error(-1, ERROR_ALREADY_EXISTS); 1397 | return 0; 1398 | } 1399 | 1400 | void port_unregister_socket(port_state_t* port_state, 1401 | sock_state_t* sock_state) { 1402 | tree_del(&port_state->sock_tree, sock_state_to_tree_node(sock_state)); 1403 | } 1404 | 1405 | sock_state_t* port_find_socket(port_state_t* port_state, SOCKET socket) { 1406 | tree_node_t* tree_node = tree_find(&port_state->sock_tree, socket); 1407 | if (tree_node == NULL) 1408 | return_set_error(NULL, ERROR_NOT_FOUND); 1409 | return sock_state_from_tree_node(tree_node); 1410 | } 1411 | 1412 | void port_request_socket_update(port_state_t* port_state, 1413 | sock_state_t* sock_state) { 1414 | if (queue_is_enqueued(sock_state_to_queue_node(sock_state))) 1415 | return; 1416 | queue_append(&port_state->sock_update_queue, 1417 | sock_state_to_queue_node(sock_state)); 1418 | } 1419 | 1420 | void port_cancel_socket_update(port_state_t* port_state, 1421 | sock_state_t* sock_state) { 1422 | unused_var(port_state); 1423 | if (!queue_is_enqueued(sock_state_to_queue_node(sock_state))) 1424 | return; 1425 | queue_remove(sock_state_to_queue_node(sock_state)); 1426 | } 1427 | 1428 | void port_add_deleted_socket(port_state_t* port_state, 1429 | sock_state_t* sock_state) { 1430 | if (queue_is_enqueued(sock_state_to_queue_node(sock_state))) 1431 | return; 1432 | queue_append(&port_state->sock_deleted_queue, 1433 | sock_state_to_queue_node(sock_state)); 1434 | } 1435 | 1436 | void port_remove_deleted_socket(port_state_t* port_state, 1437 | sock_state_t* sock_state) { 1438 | unused_var(port_state); 1439 | if (!queue_is_enqueued(sock_state_to_queue_node(sock_state))) 1440 | return; 1441 | queue_remove(sock_state_to_queue_node(sock_state)); 1442 | } 1443 | 1444 | HANDLE port_get_iocp_handle(port_state_t* port_state) { 1445 | assert(port_state->iocp_handle != NULL); 1446 | return port_state->iocp_handle; 1447 | } 1448 | 1449 | queue_t* port_get_poll_group_queue(port_state_t* port_state) { 1450 | return &port_state->poll_group_queue; 1451 | } 1452 | 1453 | port_state_t* port_state_from_handle_tree_node(ts_tree_node_t* tree_node) { 1454 | return container_of(tree_node, port_state_t, handle_tree_node); 1455 | } 1456 | 1457 | ts_tree_node_t* port_state_to_handle_tree_node(port_state_t* port_state) { 1458 | return &port_state->handle_tree_node; 1459 | } 1460 | 1461 | void queue_init(queue_t* queue) { 1462 | queue_node_init(&queue->head); 1463 | } 1464 | 1465 | void queue_node_init(queue_node_t* node) { 1466 | node->prev = node; 1467 | node->next = node; 1468 | } 1469 | 1470 | static inline void queue__detach_node(queue_node_t* node) { 1471 | node->prev->next = node->next; 1472 | node->next->prev = node->prev; 1473 | } 1474 | 1475 | queue_node_t* queue_first(const queue_t* queue) { 1476 | return !queue_is_empty(queue) ? queue->head.next : NULL; 1477 | } 1478 | 1479 | queue_node_t* queue_last(const queue_t* queue) { 1480 | return !queue_is_empty(queue) ? queue->head.prev : NULL; 1481 | } 1482 | 1483 | void queue_prepend(queue_t* queue, queue_node_t* node) { 1484 | node->next = queue->head.next; 1485 | node->prev = &queue->head; 1486 | node->next->prev = node; 1487 | queue->head.next = node; 1488 | } 1489 | 1490 | void queue_append(queue_t* queue, queue_node_t* node) { 1491 | node->next = &queue->head; 1492 | node->prev = queue->head.prev; 1493 | node->prev->next = node; 1494 | queue->head.prev = node; 1495 | } 1496 | 1497 | void queue_move_to_start(queue_t* queue, queue_node_t* node) { 1498 | queue__detach_node(node); 1499 | queue_prepend(queue, node); 1500 | } 1501 | 1502 | void queue_move_to_end(queue_t* queue, queue_node_t* node) { 1503 | queue__detach_node(node); 1504 | queue_append(queue, node); 1505 | } 1506 | 1507 | void queue_remove(queue_node_t* node) { 1508 | queue__detach_node(node); 1509 | queue_node_init(node); 1510 | } 1511 | 1512 | bool queue_is_empty(const queue_t* queue) { 1513 | return !queue_is_enqueued(&queue->head); 1514 | } 1515 | 1516 | bool queue_is_enqueued(const queue_node_t* node) { 1517 | return node->prev != node; 1518 | } 1519 | 1520 | #define REFLOCK__REF ((long) 0x00000001UL) 1521 | #define REFLOCK__REF_MASK ((long) 0x0fffffffUL) 1522 | #define REFLOCK__DESTROY ((long) 0x10000000UL) 1523 | #define REFLOCK__DESTROY_MASK ((long) 0xf0000000UL) 1524 | #define REFLOCK__POISON ((long) 0x300dead0UL) 1525 | 1526 | static HANDLE reflock__keyed_event = NULL; 1527 | 1528 | int reflock_global_init(void) { 1529 | NTSTATUS status = NtCreateKeyedEvent( 1530 | &reflock__keyed_event, KEYEDEVENT_ALL_ACCESS, NULL, 0); 1531 | if (status != STATUS_SUCCESS) 1532 | return_set_error(-1, RtlNtStatusToDosError(status)); 1533 | return 0; 1534 | } 1535 | 1536 | void reflock_init(reflock_t* reflock) { 1537 | reflock->state = 0; 1538 | } 1539 | 1540 | static void reflock__signal_event(void* address) { 1541 | NTSTATUS status = 1542 | NtReleaseKeyedEvent(reflock__keyed_event, address, FALSE, NULL); 1543 | if (status != STATUS_SUCCESS) 1544 | abort(); 1545 | } 1546 | 1547 | static void reflock__await_event(void* address) { 1548 | NTSTATUS status = 1549 | NtWaitForKeyedEvent(reflock__keyed_event, address, FALSE, NULL); 1550 | if (status != STATUS_SUCCESS) 1551 | abort(); 1552 | } 1553 | 1554 | void reflock_ref(reflock_t* reflock) { 1555 | long state = InterlockedAdd(&reflock->state, REFLOCK__REF); 1556 | 1557 | /* Verify that the counter didn't overflow and the lock isn't destroyed. */ 1558 | assert((state & REFLOCK__DESTROY_MASK) == 0); 1559 | unused_var(state); 1560 | } 1561 | 1562 | void reflock_unref(reflock_t* reflock) { 1563 | long state = InterlockedAdd(&reflock->state, -REFLOCK__REF); 1564 | 1565 | /* Verify that the lock was referenced and not already destroyed. */ 1566 | assert((state & REFLOCK__DESTROY_MASK & ~REFLOCK__DESTROY) == 0); 1567 | 1568 | if (state == REFLOCK__DESTROY) 1569 | reflock__signal_event(reflock); 1570 | } 1571 | 1572 | void reflock_unref_and_destroy(reflock_t* reflock) { 1573 | long state = 1574 | InterlockedAdd(&reflock->state, REFLOCK__DESTROY - REFLOCK__REF); 1575 | long ref_count = state & REFLOCK__REF_MASK; 1576 | 1577 | /* Verify that the lock was referenced and not already destroyed. */ 1578 | assert((state & REFLOCK__DESTROY_MASK) == REFLOCK__DESTROY); 1579 | 1580 | if (ref_count != 0) 1581 | reflock__await_event(reflock); 1582 | 1583 | state = InterlockedExchange(&reflock->state, REFLOCK__POISON); 1584 | assert(state == REFLOCK__DESTROY); 1585 | } 1586 | 1587 | #define SOCK__KNOWN_EPOLL_EVENTS \ 1588 | (EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | \ 1589 | EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP) 1590 | 1591 | typedef enum sock__poll_status { 1592 | SOCK__POLL_IDLE = 0, 1593 | SOCK__POLL_PENDING, 1594 | SOCK__POLL_CANCELLED 1595 | } sock__poll_status_t; 1596 | 1597 | typedef struct sock_state { 1598 | IO_STATUS_BLOCK io_status_block; 1599 | AFD_POLL_INFO poll_info; 1600 | queue_node_t queue_node; 1601 | tree_node_t tree_node; 1602 | poll_group_t* poll_group; 1603 | SOCKET base_socket; 1604 | epoll_data_t user_data; 1605 | uint32_t user_events; 1606 | uint32_t pending_events; 1607 | sock__poll_status_t poll_status; 1608 | bool delete_pending; 1609 | } sock_state_t; 1610 | 1611 | static inline sock_state_t* sock__alloc(void) { 1612 | sock_state_t* sock_state = malloc(sizeof *sock_state); 1613 | if (sock_state == NULL) 1614 | return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY); 1615 | return sock_state; 1616 | } 1617 | 1618 | static inline void sock__free(sock_state_t* sock_state) { 1619 | assert(sock_state != NULL); 1620 | free(sock_state); 1621 | } 1622 | 1623 | static inline int sock__cancel_poll(sock_state_t* sock_state) { 1624 | assert(sock_state->poll_status == SOCK__POLL_PENDING); 1625 | 1626 | if (afd_cancel_poll(poll_group_get_afd_device_handle(sock_state->poll_group), 1627 | &sock_state->io_status_block) < 0) 1628 | return -1; 1629 | 1630 | sock_state->poll_status = SOCK__POLL_CANCELLED; 1631 | sock_state->pending_events = 0; 1632 | return 0; 1633 | } 1634 | 1635 | sock_state_t* sock_new(port_state_t* port_state, SOCKET socket) { 1636 | SOCKET base_socket; 1637 | poll_group_t* poll_group; 1638 | sock_state_t* sock_state; 1639 | 1640 | if (socket == 0 || socket == INVALID_SOCKET) 1641 | return_set_error(NULL, ERROR_INVALID_HANDLE); 1642 | 1643 | base_socket = ws_get_base_socket(socket); 1644 | if (base_socket == INVALID_SOCKET) 1645 | return NULL; 1646 | 1647 | poll_group = poll_group_acquire(port_state); 1648 | if (poll_group == NULL) 1649 | return NULL; 1650 | 1651 | sock_state = sock__alloc(); 1652 | if (sock_state == NULL) 1653 | goto err1; 1654 | 1655 | memset(sock_state, 0, sizeof *sock_state); 1656 | 1657 | sock_state->base_socket = base_socket; 1658 | sock_state->poll_group = poll_group; 1659 | 1660 | tree_node_init(&sock_state->tree_node); 1661 | queue_node_init(&sock_state->queue_node); 1662 | 1663 | if (port_register_socket(port_state, sock_state, socket) < 0) 1664 | goto err2; 1665 | 1666 | return sock_state; 1667 | 1668 | err2: 1669 | sock__free(sock_state); 1670 | err1: 1671 | poll_group_release(poll_group); 1672 | 1673 | return NULL; 1674 | } 1675 | 1676 | static int sock__delete(port_state_t* port_state, 1677 | sock_state_t* sock_state, 1678 | bool force) { 1679 | if (!sock_state->delete_pending) { 1680 | if (sock_state->poll_status == SOCK__POLL_PENDING) 1681 | sock__cancel_poll(sock_state); 1682 | 1683 | port_cancel_socket_update(port_state, sock_state); 1684 | port_unregister_socket(port_state, sock_state); 1685 | 1686 | sock_state->delete_pending = true; 1687 | } 1688 | 1689 | /* If the poll request still needs to complete, the sock_state object can't 1690 | * be free()d yet. `sock_feed_event()` or `port_close()` will take care 1691 | * of this later. */ 1692 | if (force || sock_state->poll_status == SOCK__POLL_IDLE) { 1693 | /* Free the sock_state now. */ 1694 | port_remove_deleted_socket(port_state, sock_state); 1695 | poll_group_release(sock_state->poll_group); 1696 | sock__free(sock_state); 1697 | } else { 1698 | /* Free the socket later. */ 1699 | port_add_deleted_socket(port_state, sock_state); 1700 | } 1701 | 1702 | return 0; 1703 | } 1704 | 1705 | void sock_delete(port_state_t* port_state, sock_state_t* sock_state) { 1706 | sock__delete(port_state, sock_state, false); 1707 | } 1708 | 1709 | void sock_force_delete(port_state_t* port_state, sock_state_t* sock_state) { 1710 | sock__delete(port_state, sock_state, true); 1711 | } 1712 | 1713 | int sock_set_event(port_state_t* port_state, 1714 | sock_state_t* sock_state, 1715 | const struct epoll_event* ev) { 1716 | /* EPOLLERR and EPOLLHUP are always reported, even when not requested by the 1717 | * caller. However they are disabled after a event has been reported for a 1718 | * socket for which the EPOLLONESHOT flag was set. */ 1719 | uint32_t events = ev->events | EPOLLERR | EPOLLHUP; 1720 | 1721 | sock_state->user_events = events; 1722 | sock_state->user_data = ev->data; 1723 | 1724 | if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0) 1725 | port_request_socket_update(port_state, sock_state); 1726 | 1727 | return 0; 1728 | } 1729 | 1730 | static inline DWORD sock__epoll_events_to_afd_events(uint32_t epoll_events) { 1731 | /* Always monitor for AFD_POLL_LOCAL_CLOSE, which is triggered when the 1732 | * socket is closed with closesocket() or CloseHandle(). */ 1733 | DWORD afd_events = AFD_POLL_LOCAL_CLOSE; 1734 | 1735 | if (epoll_events & (EPOLLIN | EPOLLRDNORM)) 1736 | afd_events |= AFD_POLL_RECEIVE | AFD_POLL_ACCEPT; 1737 | if (epoll_events & (EPOLLPRI | EPOLLRDBAND)) 1738 | afd_events |= AFD_POLL_RECEIVE_EXPEDITED; 1739 | if (epoll_events & (EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND)) 1740 | afd_events |= AFD_POLL_SEND; 1741 | if (epoll_events & (EPOLLIN | EPOLLRDNORM | EPOLLRDHUP)) 1742 | afd_events |= AFD_POLL_DISCONNECT; 1743 | if (epoll_events & EPOLLHUP) 1744 | afd_events |= AFD_POLL_ABORT; 1745 | if (epoll_events & EPOLLERR) 1746 | afd_events |= AFD_POLL_CONNECT_FAIL; 1747 | 1748 | return afd_events; 1749 | } 1750 | 1751 | static inline uint32_t sock__afd_events_to_epoll_events(DWORD afd_events) { 1752 | uint32_t epoll_events = 0; 1753 | 1754 | if (afd_events & (AFD_POLL_RECEIVE | AFD_POLL_ACCEPT)) 1755 | epoll_events |= EPOLLIN | EPOLLRDNORM; 1756 | if (afd_events & AFD_POLL_RECEIVE_EXPEDITED) 1757 | epoll_events |= EPOLLPRI | EPOLLRDBAND; 1758 | if (afd_events & AFD_POLL_SEND) 1759 | epoll_events |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 1760 | if (afd_events & AFD_POLL_DISCONNECT) 1761 | epoll_events |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1762 | if (afd_events & AFD_POLL_ABORT) 1763 | epoll_events |= EPOLLHUP; 1764 | if (afd_events & AFD_POLL_CONNECT_FAIL) 1765 | /* Linux reports all these events after connect() has failed. */ 1766 | epoll_events |= 1767 | EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLRDNORM | EPOLLWRNORM | EPOLLRDHUP; 1768 | 1769 | return epoll_events; 1770 | } 1771 | 1772 | int sock_update(port_state_t* port_state, sock_state_t* sock_state) { 1773 | assert(!sock_state->delete_pending); 1774 | 1775 | if ((sock_state->poll_status == SOCK__POLL_PENDING) && 1776 | (sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS & 1777 | ~sock_state->pending_events) == 0) { 1778 | /* All the events the user is interested in are already being monitored by 1779 | * the pending poll operation. It might spuriously complete because of an 1780 | * event that we're no longer interested in; when that happens we'll submit 1781 | * a new poll operation with the updated event mask. */ 1782 | 1783 | } else if (sock_state->poll_status == SOCK__POLL_PENDING) { 1784 | /* A poll operation is already pending, but it's not monitoring for all the 1785 | * events that the user is interested in. Therefore, cancel the pending 1786 | * poll operation; when we receive it's completion package, a new poll 1787 | * operation will be submitted with the correct event mask. */ 1788 | if (sock__cancel_poll(sock_state) < 0) 1789 | return -1; 1790 | 1791 | } else if (sock_state->poll_status == SOCK__POLL_CANCELLED) { 1792 | /* The poll operation has already been cancelled, we're still waiting for 1793 | * it to return. For now, there's nothing that needs to be done. */ 1794 | 1795 | } else if (sock_state->poll_status == SOCK__POLL_IDLE) { 1796 | /* No poll operation is pending; start one. */ 1797 | sock_state->poll_info.Exclusive = FALSE; 1798 | sock_state->poll_info.NumberOfHandles = 1; 1799 | sock_state->poll_info.Timeout.QuadPart = INT64_MAX; 1800 | sock_state->poll_info.Handles[0].Handle = (HANDLE) sock_state->base_socket; 1801 | sock_state->poll_info.Handles[0].Status = 0; 1802 | sock_state->poll_info.Handles[0].Events = 1803 | sock__epoll_events_to_afd_events(sock_state->user_events); 1804 | 1805 | if (afd_poll(poll_group_get_afd_device_handle(sock_state->poll_group), 1806 | &sock_state->poll_info, 1807 | &sock_state->io_status_block) < 0) { 1808 | switch (GetLastError()) { 1809 | case ERROR_IO_PENDING: 1810 | /* Overlapped poll operation in progress; this is expected. */ 1811 | break; 1812 | case ERROR_INVALID_HANDLE: 1813 | /* Socket closed; it'll be dropped from the epoll set. */ 1814 | return sock__delete(port_state, sock_state, false); 1815 | default: 1816 | /* Other errors are propagated to the caller. */ 1817 | return_map_error(-1); 1818 | } 1819 | } 1820 | 1821 | /* The poll request was successfully submitted. */ 1822 | sock_state->poll_status = SOCK__POLL_PENDING; 1823 | sock_state->pending_events = sock_state->user_events; 1824 | 1825 | } else { 1826 | /* Unreachable. */ 1827 | assert(false); 1828 | } 1829 | 1830 | port_cancel_socket_update(port_state, sock_state); 1831 | return 0; 1832 | } 1833 | 1834 | int sock_feed_event(port_state_t* port_state, 1835 | IO_STATUS_BLOCK* io_status_block, 1836 | struct epoll_event* ev) { 1837 | sock_state_t* sock_state = 1838 | container_of(io_status_block, sock_state_t, io_status_block); 1839 | AFD_POLL_INFO* poll_info = &sock_state->poll_info; 1840 | uint32_t epoll_events = 0; 1841 | 1842 | sock_state->poll_status = SOCK__POLL_IDLE; 1843 | sock_state->pending_events = 0; 1844 | 1845 | if (sock_state->delete_pending) { 1846 | /* Socket has been deleted earlier and can now be freed. */ 1847 | return sock__delete(port_state, sock_state, false); 1848 | 1849 | } else if (io_status_block->Status == STATUS_CANCELLED) { 1850 | /* The poll request was cancelled by CancelIoEx. */ 1851 | 1852 | } else if (!NT_SUCCESS(io_status_block->Status)) { 1853 | /* The overlapped request itself failed in an unexpected way. */ 1854 | epoll_events = EPOLLERR; 1855 | 1856 | } else if (poll_info->NumberOfHandles < 1) { 1857 | /* This poll operation succeeded but didn't report any socket events. */ 1858 | 1859 | } else if (poll_info->Handles[0].Events & AFD_POLL_LOCAL_CLOSE) { 1860 | /* The poll operation reported that the socket was closed. */ 1861 | return sock__delete(port_state, sock_state, false); 1862 | 1863 | } else { 1864 | /* Events related to our socket were reported. */ 1865 | epoll_events = 1866 | sock__afd_events_to_epoll_events(poll_info->Handles[0].Events); 1867 | } 1868 | 1869 | /* Requeue the socket so a new poll request will be submitted. */ 1870 | port_request_socket_update(port_state, sock_state); 1871 | 1872 | /* Filter out events that the user didn't ask for. */ 1873 | epoll_events &= sock_state->user_events; 1874 | 1875 | /* Return if there are no epoll events to report. */ 1876 | if (epoll_events == 0) 1877 | return 0; 1878 | 1879 | /* If the the socket has the EPOLLONESHOT flag set, unmonitor all events, 1880 | * even EPOLLERR and EPOLLHUP. But always keep looking for closed sockets. */ 1881 | if (sock_state->user_events & EPOLLONESHOT) 1882 | sock_state->user_events = 0; 1883 | 1884 | ev->data = sock_state->user_data; 1885 | ev->events = epoll_events; 1886 | return 1; 1887 | } 1888 | 1889 | sock_state_t* sock_state_from_queue_node(queue_node_t* queue_node) { 1890 | return container_of(queue_node, sock_state_t, queue_node); 1891 | } 1892 | 1893 | queue_node_t* sock_state_to_queue_node(sock_state_t* sock_state) { 1894 | return &sock_state->queue_node; 1895 | } 1896 | 1897 | sock_state_t* sock_state_from_tree_node(tree_node_t* tree_node) { 1898 | return container_of(tree_node, sock_state_t, tree_node); 1899 | } 1900 | 1901 | tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state) { 1902 | return &sock_state->tree_node; 1903 | } 1904 | 1905 | void ts_tree_init(ts_tree_t* ts_tree) { 1906 | tree_init(&ts_tree->tree); 1907 | InitializeSRWLock(&ts_tree->lock); 1908 | } 1909 | 1910 | void ts_tree_node_init(ts_tree_node_t* node) { 1911 | tree_node_init(&node->tree_node); 1912 | reflock_init(&node->reflock); 1913 | } 1914 | 1915 | int ts_tree_add(ts_tree_t* ts_tree, ts_tree_node_t* node, uintptr_t key) { 1916 | int r; 1917 | 1918 | AcquireSRWLockExclusive(&ts_tree->lock); 1919 | r = tree_add(&ts_tree->tree, &node->tree_node, key); 1920 | ReleaseSRWLockExclusive(&ts_tree->lock); 1921 | 1922 | return r; 1923 | } 1924 | 1925 | static inline ts_tree_node_t* ts_tree__find_node(ts_tree_t* ts_tree, 1926 | uintptr_t key) { 1927 | tree_node_t* tree_node = tree_find(&ts_tree->tree, key); 1928 | if (tree_node == NULL) 1929 | return NULL; 1930 | 1931 | return container_of(tree_node, ts_tree_node_t, tree_node); 1932 | } 1933 | 1934 | ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, uintptr_t key) { 1935 | ts_tree_node_t* ts_tree_node; 1936 | 1937 | AcquireSRWLockExclusive(&ts_tree->lock); 1938 | 1939 | ts_tree_node = ts_tree__find_node(ts_tree, key); 1940 | if (ts_tree_node != NULL) { 1941 | tree_del(&ts_tree->tree, &ts_tree_node->tree_node); 1942 | reflock_ref(&ts_tree_node->reflock); 1943 | } 1944 | 1945 | ReleaseSRWLockExclusive(&ts_tree->lock); 1946 | 1947 | return ts_tree_node; 1948 | } 1949 | 1950 | ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, uintptr_t key) { 1951 | ts_tree_node_t* ts_tree_node; 1952 | 1953 | AcquireSRWLockShared(&ts_tree->lock); 1954 | 1955 | ts_tree_node = ts_tree__find_node(ts_tree, key); 1956 | if (ts_tree_node != NULL) 1957 | reflock_ref(&ts_tree_node->reflock); 1958 | 1959 | ReleaseSRWLockShared(&ts_tree->lock); 1960 | 1961 | return ts_tree_node; 1962 | } 1963 | 1964 | void ts_tree_node_unref(ts_tree_node_t* node) { 1965 | reflock_unref(&node->reflock); 1966 | } 1967 | 1968 | void ts_tree_node_unref_and_destroy(ts_tree_node_t* node) { 1969 | reflock_unref_and_destroy(&node->reflock); 1970 | } 1971 | 1972 | void tree_init(tree_t* tree) { 1973 | memset(tree, 0, sizeof *tree); 1974 | } 1975 | 1976 | void tree_node_init(tree_node_t* node) { 1977 | memset(node, 0, sizeof *node); 1978 | } 1979 | 1980 | #define TREE__ROTATE(cis, trans) \ 1981 | tree_node_t* p = node; \ 1982 | tree_node_t* q = node->trans; \ 1983 | tree_node_t* parent = p->parent; \ 1984 | \ 1985 | if (parent) { \ 1986 | if (parent->left == p) \ 1987 | parent->left = q; \ 1988 | else \ 1989 | parent->right = q; \ 1990 | } else { \ 1991 | tree->root = q; \ 1992 | } \ 1993 | \ 1994 | q->parent = parent; \ 1995 | p->parent = q; \ 1996 | p->trans = q->cis; \ 1997 | if (p->trans) \ 1998 | p->trans->parent = p; \ 1999 | q->cis = p; 2000 | 2001 | static inline void tree__rotate_left(tree_t* tree, tree_node_t* node) { 2002 | TREE__ROTATE(left, right) 2003 | } 2004 | 2005 | static inline void tree__rotate_right(tree_t* tree, tree_node_t* node) { 2006 | TREE__ROTATE(right, left) 2007 | } 2008 | 2009 | #define TREE__INSERT_OR_DESCEND(side) \ 2010 | if (parent->side) { \ 2011 | parent = parent->side; \ 2012 | } else { \ 2013 | parent->side = node; \ 2014 | break; \ 2015 | } 2016 | 2017 | #define TREE__REBALANCE_AFTER_INSERT(cis, trans) \ 2018 | tree_node_t* grandparent = parent->parent; \ 2019 | tree_node_t* uncle = grandparent->trans; \ 2020 | \ 2021 | if (uncle && uncle->red) { \ 2022 | parent->red = uncle->red = false; \ 2023 | grandparent->red = true; \ 2024 | node = grandparent; \ 2025 | } else { \ 2026 | if (node == parent->trans) { \ 2027 | tree__rotate_##cis(tree, parent); \ 2028 | node = parent; \ 2029 | parent = node->parent; \ 2030 | } \ 2031 | parent->red = false; \ 2032 | grandparent->red = true; \ 2033 | tree__rotate_##trans(tree, grandparent); \ 2034 | } 2035 | 2036 | int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key) { 2037 | tree_node_t* parent; 2038 | 2039 | parent = tree->root; 2040 | if (parent) { 2041 | for (;;) { 2042 | if (key < parent->key) { 2043 | TREE__INSERT_OR_DESCEND(left) 2044 | } else if (key > parent->key) { 2045 | TREE__INSERT_OR_DESCEND(right) 2046 | } else { 2047 | return -1; 2048 | } 2049 | } 2050 | } else { 2051 | tree->root = node; 2052 | } 2053 | 2054 | node->key = key; 2055 | node->left = node->right = NULL; 2056 | node->parent = parent; 2057 | node->red = true; 2058 | 2059 | for (; parent && parent->red; parent = node->parent) { 2060 | if (parent == parent->parent->left) { 2061 | TREE__REBALANCE_AFTER_INSERT(left, right) 2062 | } else { 2063 | TREE__REBALANCE_AFTER_INSERT(right, left) 2064 | } 2065 | } 2066 | tree->root->red = false; 2067 | 2068 | return 0; 2069 | } 2070 | 2071 | #define TREE__REBALANCE_AFTER_REMOVE(cis, trans) \ 2072 | tree_node_t* sibling = parent->trans; \ 2073 | \ 2074 | if (sibling->red) { \ 2075 | sibling->red = false; \ 2076 | parent->red = true; \ 2077 | tree__rotate_##cis(tree, parent); \ 2078 | sibling = parent->trans; \ 2079 | } \ 2080 | if ((sibling->left && sibling->left->red) || \ 2081 | (sibling->right && sibling->right->red)) { \ 2082 | if (!sibling->trans || !sibling->trans->red) { \ 2083 | sibling->cis->red = false; \ 2084 | sibling->red = true; \ 2085 | tree__rotate_##trans(tree, sibling); \ 2086 | sibling = parent->trans; \ 2087 | } \ 2088 | sibling->red = parent->red; \ 2089 | parent->red = sibling->trans->red = false; \ 2090 | tree__rotate_##cis(tree, parent); \ 2091 | node = tree->root; \ 2092 | break; \ 2093 | } \ 2094 | sibling->red = true; 2095 | 2096 | void tree_del(tree_t* tree, tree_node_t* node) { 2097 | tree_node_t* parent = node->parent; 2098 | tree_node_t* left = node->left; 2099 | tree_node_t* right = node->right; 2100 | tree_node_t* next; 2101 | bool red; 2102 | 2103 | if (!left) { 2104 | next = right; 2105 | } else if (!right) { 2106 | next = left; 2107 | } else { 2108 | next = right; 2109 | while (next->left) 2110 | next = next->left; 2111 | } 2112 | 2113 | if (parent) { 2114 | if (parent->left == node) 2115 | parent->left = next; 2116 | else 2117 | parent->right = next; 2118 | } else { 2119 | tree->root = next; 2120 | } 2121 | 2122 | if (left && right) { 2123 | red = next->red; 2124 | next->red = node->red; 2125 | next->left = left; 2126 | left->parent = next; 2127 | if (next != right) { 2128 | parent = next->parent; 2129 | next->parent = node->parent; 2130 | node = next->right; 2131 | parent->left = node; 2132 | next->right = right; 2133 | right->parent = next; 2134 | } else { 2135 | next->parent = parent; 2136 | parent = next; 2137 | node = next->right; 2138 | } 2139 | } else { 2140 | red = node->red; 2141 | node = next; 2142 | } 2143 | 2144 | if (node) 2145 | node->parent = parent; 2146 | if (red) 2147 | return; 2148 | if (node && node->red) { 2149 | node->red = false; 2150 | return; 2151 | } 2152 | 2153 | do { 2154 | if (node == tree->root) 2155 | break; 2156 | if (node == parent->left) { 2157 | TREE__REBALANCE_AFTER_REMOVE(left, right) 2158 | } else { 2159 | TREE__REBALANCE_AFTER_REMOVE(right, left) 2160 | } 2161 | node = parent; 2162 | parent = parent->parent; 2163 | } while (!node->red); 2164 | 2165 | if (node) 2166 | node->red = false; 2167 | } 2168 | 2169 | tree_node_t* tree_find(const tree_t* tree, uintptr_t key) { 2170 | tree_node_t* node = tree->root; 2171 | while (node) { 2172 | if (key < node->key) 2173 | node = node->left; 2174 | else if (key > node->key) 2175 | node = node->right; 2176 | else 2177 | return node; 2178 | } 2179 | return NULL; 2180 | } 2181 | 2182 | tree_node_t* tree_root(const tree_t* tree) { 2183 | return tree->root; 2184 | } 2185 | 2186 | #ifndef SIO_BSP_HANDLE_POLL 2187 | #define SIO_BSP_HANDLE_POLL 0x4800001D 2188 | #endif 2189 | 2190 | #ifndef SIO_BASE_HANDLE 2191 | #define SIO_BASE_HANDLE 0x48000022 2192 | #endif 2193 | 2194 | int ws_global_init(void) { 2195 | int r; 2196 | WSADATA wsa_data; 2197 | 2198 | r = WSAStartup(MAKEWORD(2, 2), &wsa_data); 2199 | if (r != 0) 2200 | return_set_error(-1, (DWORD) r); 2201 | 2202 | return 0; 2203 | } 2204 | 2205 | static inline SOCKET ws__ioctl_get_bsp_socket(SOCKET socket, DWORD ioctl) { 2206 | SOCKET bsp_socket; 2207 | DWORD bytes; 2208 | 2209 | if (WSAIoctl(socket, 2210 | ioctl, 2211 | NULL, 2212 | 0, 2213 | &bsp_socket, 2214 | sizeof bsp_socket, 2215 | &bytes, 2216 | NULL, 2217 | NULL) != SOCKET_ERROR) 2218 | return bsp_socket; 2219 | else 2220 | return INVALID_SOCKET; 2221 | } 2222 | 2223 | SOCKET ws_get_base_socket(SOCKET socket) { 2224 | SOCKET base_socket; 2225 | DWORD error; 2226 | 2227 | for (;;) { 2228 | base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BASE_HANDLE); 2229 | if (base_socket != INVALID_SOCKET) 2230 | return base_socket; 2231 | 2232 | error = GetLastError(); 2233 | if (error == WSAENOTSOCK) 2234 | return_set_error(INVALID_SOCKET, error); 2235 | 2236 | /* Even though Microsoft documentation clearly states that LSPs should 2237 | * never intercept the `SIO_BASE_HANDLE` ioctl [1], Komodia based LSPs do 2238 | * so anyway, breaking it, with the apparent intention of preventing LSP 2239 | * bypass [2]. Fortunately they don't handle `SIO_BSP_HANDLE_POLL`, which 2240 | * will at least let us obtain the socket associated with the next winsock 2241 | * protocol chain entry. If this succeeds, loop around and call 2242 | * `SIO_BASE_HANDLE` again with the returned BSP socket, to make sure that 2243 | * we unwrap all layers and retrieve the actual base socket. 2244 | * [1] https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls 2245 | * [2] https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6 2246 | */ 2247 | base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BSP_HANDLE_POLL); 2248 | if (base_socket != INVALID_SOCKET && base_socket != socket) 2249 | socket = base_socket; 2250 | else 2251 | return_set_error(INVALID_SOCKET, error); 2252 | } 2253 | } 2254 | --------------------------------------------------------------------------------