├── Changes ├── LICENSE ├── Makefile.am ├── autogen.sh ├── configure.ac ├── demo.c ├── ecb.h ├── eio.3 ├── eio.c ├── eio.h ├── eio.pod ├── libeio.m4 └── xthread.h /Changes: -------------------------------------------------------------------------------- 1 | Revision history for libeio 2 | 3 | TODO: maybe add mincore support? available on at least darwin, solaris, linux, freebsd 4 | TODO: openbsd requires stdint.h for intptr_t - why posix? 5 | 6 | TODO: make mtouch/readdir maybe others cancellable in-request 7 | TODO: fadvise request 8 | TODO: fdopendir/utimensat 9 | TODO: maybe work around 3.996gb barrier in pread/pwrite as well, maybe readahead etc.? 10 | 1.0 11 | - fix a deadlock where a wakeup signal could be missed when 12 | a timeout occured at the same time. 13 | - use nonstandard but maybe-working-on-bsd fork technique. 14 | - use fewer time() syscalls when waiting for new requests. 15 | - fix a path-memory-leak in readdir when using the wrappers 16 | (reported by Thomas L. Shinnick). 17 | - support a max_idle value of 0. 18 | - support setting of idle timeout value (eio_set_idle_timeout). 19 | - readdir: correctly handle malloc failures. 20 | - readdir: new flags argument, can return inode 21 | and possibly filetype, can sort in various ways. 22 | - readdir: stop immediately when cancelled, do 23 | not continue reading the directory. 24 | - fix return value of eio_sendfile_sync. 25 | - include sys/mman.h for msync. 26 | - added EIO_STACKSIZE. 27 | - added msync, mtouch support (untested). 28 | - added sync_file_range (untested). 29 | - fixed custom support. 30 | - use a more robust feed-add detection method. 31 | - "outbundled" from IO::AIO. 32 | - eio_set_max_polltime did not properly convert time to ticks. 33 | - tentatively support darwin in sendfile. 34 | - fix freebsd/darwin sendfile. 35 | - also use sendfile emulation for ENOTSUP and EOPNOTSUPP 36 | error codes. 37 | - add OS-independent EIO_MT_* and EIO_MS_* flag enums. 38 | - add eio_statvfs/eio_fstatvfs. 39 | - add eio_mlock/eio_mlockall and OS-independent MCL_* flag enums. 40 | - no longer set errno to 0 before making syscalls, this only lures 41 | people into the trap of believing errno shows success or failure. 42 | - "fix" demo.c so that it works as non-root. 43 | - suppoert utimes seperately from futimes, as some systems have 44 | utimes but not futimes. 45 | - use _POSIX_MEMLOCK_RANGE for mlock. 46 | - do not (errornously) overwrite CFLAGS in configure.ac. 47 | - mknod used int3 for dev_t (§2 bit), not offs (64 bit). 48 | - fix memory corruption in eio_readdirx for the flags 49 | combination EIO_READDIR_STAT_ORDER | EIO_READDIR_DIRS_FIRST. 50 | - port to openbsd (another blatantly broken non-UNIX/POSIX platform). 51 | - fix eio_custom prototype. 52 | - work around a Linux (and likely FreeBSD and other kernels) bug 53 | where sendfile would not transfer all the requested bytes on 54 | large transfers, using a heuristic. 55 | - use libecb, and apply lots of minor space optimisations. 56 | - disable sendfile on darwin, broken as everything else. 57 | - add realpath request and implementation. 58 | - cancelled requests will still invoke their request callbacks. 59 | - add fallocate. 60 | - do not acquire any locks when forking. 61 | - incorporated some mingw32 changes by traviscline. 62 | - added syncfs support, using direct syscall. 63 | - set thread name on linux (ps -L/Hcx, top, gdb). 64 | - remove useless use of volatile variables. 65 | - fix memory leak when reaping threads. 66 | - use utime now uses nanosecond resolution on posix 2008 systems. 67 | - allow taking advantage of posix 2008 xxxat functions and fdopendir 68 | by implementing a working directory abstraction. 69 | - make readahead emulation behave more like actual readahead by never failing. 70 | - added EIO_LSEEK (untested). 71 | - added EIO_FALLOC_FL_PUNCH_HOLE. 72 | - wtf. etp_proc returned 0, and no compiler ever complained. 73 | - remove pread/pwrite emulation, as the only system that lacked them 74 | (cygwin) provides them for a while now. 75 | - provide pread/pwrite implementations for win32. 76 | 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | All files in libeio are Copyright (C)2007,2008 Marc Alexander Lehmann. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials provided 13 | with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | Alternatively, the contents of this package may be used under the terms 28 | of the GNU General Public License ("GPL") version 2 or any later version, 29 | in which case the provisions of the GPL are applicable instead of the 30 | above. If you wish to allow the use of your version of this package only 31 | under the terms of the GPL and not to allow others to use your version of 32 | this file under the BSD license, indicate your decision by deleting the 33 | provisions above and replace them with the notice and other provisions 34 | required by the GPL in this and the other files of this package. If you do 35 | not delete the provisions above, a recipient may use your version of this 36 | file under either the BSD or the GPL. 37 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | AUTOMAKE_OPTIONS = foreign no-dependencies 2 | 3 | VERSION_INFO = 1:0 4 | 5 | EXTRA_DIST = LICENSE Changes autogen.sh 6 | 7 | #man_MANS = ev.3 8 | 9 | include_HEADERS = eio.h 10 | 11 | lib_LTLIBRARIES = libeio.la 12 | 13 | libeio_la_SOURCES = eio.c ecb.h xthread.h config.h 14 | libeio_la_LDFLAGS = -version-info $(VERSION_INFO) 15 | 16 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | autoreconf --install --symlink --force 4 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ(2.59) 2 | AC_INIT 3 | AC_CONFIG_SRCDIR([eio.h]) 4 | AC_CONFIG_HEADERS([config.h]) 5 | 6 | AM_INIT_AUTOMAKE(libeio,1.0) 7 | AM_MAINTAINER_MODE 8 | 9 | AC_GNU_SOURCE 10 | 11 | AC_PROG_LIBTOOL 12 | 13 | AC_PROG_CC 14 | 15 | if test "x$GCC" = xyes ; then 16 | CFLAGS="-O3 $CFLAGS" 17 | fi 18 | 19 | m4_include([libeio.m4]) 20 | 21 | AC_CONFIG_FILES([Makefile]) 22 | AC_OUTPUT 23 | -------------------------------------------------------------------------------- /demo.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "eio.h" 12 | 13 | int respipe [2]; 14 | 15 | void 16 | want_poll (void) 17 | { 18 | char dummy; 19 | printf ("want_poll ()\n"); 20 | write (respipe [1], &dummy, 1); 21 | } 22 | 23 | void 24 | done_poll (void) 25 | { 26 | char dummy; 27 | printf ("done_poll ()\n"); 28 | read (respipe [0], &dummy, 1); 29 | } 30 | 31 | void 32 | event_loop (void) 33 | { 34 | // an event loop. yeah. 35 | struct pollfd pfd; 36 | pfd.fd = respipe [0]; 37 | pfd.events = POLLIN; 38 | 39 | printf ("\nentering event loop\n"); 40 | while (eio_nreqs ()) 41 | { 42 | poll (&pfd, 1, -1); 43 | printf ("eio_poll () = %d\n", eio_poll ()); 44 | } 45 | printf ("leaving event loop\n"); 46 | } 47 | 48 | int 49 | res_cb (eio_req *req) 50 | { 51 | printf ("res_cb(%d|%s) = %d\n", req->type, req->data ? req->data : "?", EIO_RESULT (req)); 52 | 53 | if (req->result < 0) 54 | abort (); 55 | 56 | return 0; 57 | } 58 | 59 | int 60 | readdir_cb (eio_req *req) 61 | { 62 | char *buf = (char *)EIO_BUF (req); 63 | 64 | printf ("readdir_cb = %d\n", EIO_RESULT (req)); 65 | 66 | if (EIO_RESULT (req) < 0) 67 | return 0; 68 | 69 | while (EIO_RESULT (req)--) 70 | { 71 | printf ("readdir = <%s>\n", buf); 72 | buf += strlen (buf) + 1; 73 | } 74 | 75 | return 0; 76 | } 77 | 78 | int 79 | stat_cb (eio_req *req) 80 | { 81 | struct stat *buf = EIO_STAT_BUF (req); 82 | 83 | if (req->type == EIO_FSTAT) 84 | printf ("fstat_cb = %d\n", EIO_RESULT (req)); 85 | else 86 | printf ("stat_cb(%s) = %d\n", EIO_PATH (req), EIO_RESULT (req)); 87 | 88 | if (!EIO_RESULT (req)) 89 | printf ("stat size %d perm 0%o\n", buf->st_size, buf->st_mode & 0777); 90 | 91 | return 0; 92 | } 93 | 94 | int 95 | read_cb (eio_req *req) 96 | { 97 | unsigned char *buf = (unsigned char *)EIO_BUF (req); 98 | 99 | printf ("read_cb = %d (%02x%02x%02x%02x %02x%02x%02x%02x)\n", 100 | EIO_RESULT (req), 101 | buf [0], buf [1], buf [2], buf [3], 102 | buf [4], buf [5], buf [6], buf [7]); 103 | 104 | return 0; 105 | } 106 | 107 | int last_fd; 108 | 109 | int 110 | open_cb (eio_req *req) 111 | { 112 | printf ("open_cb = %d\n", EIO_RESULT (req)); 113 | 114 | last_fd = EIO_RESULT (req); 115 | 116 | return 0; 117 | } 118 | 119 | int 120 | main (void) 121 | { 122 | printf ("pipe ()\n"); 123 | if (pipe (respipe)) abort (); 124 | 125 | printf ("eio_init ()\n"); 126 | if (eio_init (want_poll, done_poll)) abort (); 127 | 128 | do 129 | { 130 | /* avoid relative paths yourself(!) */ 131 | eio_mkdir ("eio-test-dir", 0777, 0, res_cb, "mkdir"); 132 | eio_nop (0, res_cb, "nop"); 133 | event_loop (); 134 | 135 | eio_stat ("eio-test-dir", 0, stat_cb, "stat"); 136 | eio_lstat ("eio-test-dir", 0, stat_cb, "stat"); 137 | eio_open ("eio-test-dir/eio-test-file", O_RDWR | O_CREAT, 0777, 0, open_cb, "open"); 138 | eio_symlink ("test", "eio-test-dir/eio-symlink", 0, res_cb, "symlink"); 139 | eio_mknod ("eio-test-dir/eio-fifo", S_IFIFO, 0, 0, res_cb, "mknod"); 140 | event_loop (); 141 | 142 | eio_utime ("eio-test-dir", 12345.678, 23456.789, 0, res_cb, "utime"); 143 | eio_futime (last_fd, 92345.678, 93456.789, 0, res_cb, "futime"); 144 | eio_chown ("eio-test-dir", getuid (), getgid (), 0, res_cb, "chown"); 145 | eio_fchown (last_fd, getuid (), getgid (), 0, res_cb, "fchown"); 146 | eio_fchmod (last_fd, 0723, 0, res_cb, "fchmod"); 147 | eio_readdir ("eio-test-dir", 0, 0, readdir_cb, "readdir"); 148 | eio_readdir ("/nonexistant", 0, 0, readdir_cb, "readdir"); 149 | eio_fstat (last_fd, 0, stat_cb, "stat"); 150 | eio_write (last_fd, "test\nfail\n", 10, 4, 0, res_cb, "write"); 151 | event_loop (); 152 | 153 | eio_read (last_fd, 0, 8, 0, EIO_PRI_DEFAULT, read_cb, "read"); 154 | eio_readlink ("eio-test-dir/eio-symlink", 0, res_cb, "readlink"); 155 | event_loop (); 156 | 157 | eio_dup2 (1, 2, EIO_PRI_DEFAULT, res_cb, "dup"); // dup stdout to stderr 158 | eio_chmod ("eio-test-dir", 0765, 0, res_cb, "chmod"); 159 | eio_ftruncate (last_fd, 9, 0, res_cb, "ftruncate"); 160 | eio_fdatasync (last_fd, 0, res_cb, "fdatasync"); 161 | eio_fsync (last_fd, 0, res_cb, "fsync"); 162 | eio_sync (0, res_cb, "sync"); 163 | eio_busy (0.5, 0, res_cb, "busy"); 164 | event_loop (); 165 | 166 | eio_sendfile (1, last_fd, 4, 5, 0, res_cb, "sendfile"); // write "test\n" to stdout 167 | eio_fstat (last_fd, 0, stat_cb, "stat"); 168 | event_loop (); 169 | 170 | eio_truncate ("eio-test-dir/eio-test-file", 6, 0, res_cb, "truncate"); 171 | eio_readahead (last_fd, 0, 64, 0, res_cb, "readahead"); 172 | event_loop (); 173 | 174 | eio_close (last_fd, 0, res_cb, "close"); 175 | eio_link ("eio-test-dir/eio-test-file", "eio-test-dir/eio-test-file-2", 0, res_cb, "link"); 176 | event_loop (); 177 | 178 | eio_rename ("eio-test-dir/eio-test-file", "eio-test-dir/eio-test-file-renamed", 0, res_cb, "rename"); 179 | event_loop (); 180 | 181 | eio_unlink ("eio-test-dir/eio-fifo", 0, res_cb, "unlink"); 182 | eio_unlink ("eio-test-dir/eio-symlink", 0, res_cb, "unlink"); 183 | eio_unlink ("eio-test-dir/eio-test-file-2", 0, res_cb, "unlink"); 184 | eio_unlink ("eio-test-dir/eio-test-file-renamed", 0, res_cb, "unlink"); 185 | event_loop (); 186 | 187 | eio_rmdir ("eio-test-dir", 0, res_cb, "rmdir"); 188 | event_loop (); 189 | } 190 | while (0); 191 | 192 | return 0; 193 | } 194 | 195 | -------------------------------------------------------------------------------- /ecb.h: -------------------------------------------------------------------------------- 1 | /* 2 | * libecb - http://software.schmorp.de/pkg/libecb 3 | * 4 | * Copyright (©) 2009-2012 Marc Alexander Lehmann 5 | * Copyright (©) 2011 Emanuele Giaquinta 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without modifica- 9 | * tion, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, 12 | * this list of conditions and the following disclaimer. 13 | * 14 | * 2. Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in the 16 | * documentation and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 19 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- 20 | * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 21 | * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- 22 | * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 24 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- 26 | * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 27 | * OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef ECB_H 31 | #define ECB_H 32 | 33 | /* 16 bits major, 16 bits minor */ 34 | #define ECB_VERSION 0x00010002 35 | 36 | #ifdef _WIN32 37 | typedef signed char int8_t; 38 | typedef unsigned char uint8_t; 39 | typedef signed short int16_t; 40 | typedef unsigned short uint16_t; 41 | typedef signed int int32_t; 42 | typedef unsigned int uint32_t; 43 | #if __GNUC__ 44 | typedef signed long long int64_t; 45 | typedef unsigned long long uint64_t; 46 | #else /* _MSC_VER || __BORLANDC__ */ 47 | typedef signed __int64 int64_t; 48 | typedef unsigned __int64 uint64_t; 49 | #endif 50 | #ifdef _WIN64 51 | #define ECB_PTRSIZE 8 52 | typedef uint64_t uintptr_t; 53 | typedef int64_t intptr_t; 54 | #else 55 | #define ECB_PTRSIZE 4 56 | typedef uint32_t uintptr_t; 57 | typedef int32_t intptr_t; 58 | #endif 59 | #else 60 | #include 61 | #if UINTMAX_MAX > 0xffffffffU 62 | #define ECB_PTRSIZE 8 63 | #else 64 | #define ECB_PTRSIZE 4 65 | #endif 66 | #endif 67 | 68 | /* many compilers define _GNUC_ to some versions but then only implement 69 | * what their idiot authors think are the "more important" extensions, 70 | * causing enormous grief in return for some better fake benchmark numbers. 71 | * or so. 72 | * we try to detect these and simply assume they are not gcc - if they have 73 | * an issue with that they should have done it right in the first place. 74 | */ 75 | #ifndef ECB_GCC_VERSION 76 | #if !defined __GNUC_MINOR__ || defined __INTEL_COMPILER || defined __SUNPRO_C || defined __SUNPRO_CC || defined __llvm__ || defined __clang__ 77 | #define ECB_GCC_VERSION(major,minor) 0 78 | #else 79 | #define ECB_GCC_VERSION(major,minor) (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) 80 | #endif 81 | #endif 82 | 83 | #define ECB_C (__STDC__+0) /* this assumes that __STDC__ is either empty or a number */ 84 | #define ECB_C99 (__STDC_VERSION__ >= 199901L) 85 | #define ECB_C11 (__STDC_VERSION__ >= 201112L) 86 | #define ECB_CPP (__cplusplus+0) 87 | #define ECB_CPP11 (__cplusplus >= 201103L) 88 | 89 | #if ECB_CPP 90 | #define ECB_EXTERN_C extern "C" 91 | #define ECB_EXTERN_C_BEG ECB_EXTERN_C { 92 | #define ECB_EXTERN_C_END } 93 | #else 94 | #define ECB_EXTERN_C extern 95 | #define ECB_EXTERN_C_BEG 96 | #define ECB_EXTERN_C_END 97 | #endif 98 | 99 | /*****************************************************************************/ 100 | 101 | /* ECB_NO_THREADS - ecb is not used by multiple threads, ever */ 102 | /* ECB_NO_SMP - ecb might be used in multiple threads, but only on a single cpu */ 103 | 104 | #if ECB_NO_THREADS 105 | #define ECB_NO_SMP 1 106 | #endif 107 | 108 | #if ECB_NO_SMP 109 | #define ECB_MEMORY_FENCE do { } while (0) 110 | #endif 111 | 112 | #ifndef ECB_MEMORY_FENCE 113 | #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 114 | #if __i386 || __i386__ 115 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory") 116 | #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") 117 | #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") 118 | #elif __amd64 || __amd64__ || __x86_64 || __x86_64__ 119 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mfence" : : : "memory") 120 | #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") 121 | #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") 122 | #elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ 123 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory") 124 | #elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ \ 125 | || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ 126 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mcr p15,0,%0,c7,c10,5" : : "r" (0) : "memory") 127 | #elif defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ \ 128 | || defined __ARM_ARCH_7M__ || defined __ARM_ARCH_7R__ 129 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb" : : : "memory") 130 | #elif __sparc || __sparc__ 131 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad | #StoreStore | #StoreLoad" : : : "memory") 132 | #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad" : : : "memory") 133 | #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("membar #LoadStore | #StoreStore") 134 | #elif defined __s390__ || defined __s390x__ 135 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("bcr 15,0" : : : "memory") 136 | #elif defined __mips__ 137 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory") 138 | #elif defined __alpha__ 139 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mb" : : : "memory") 140 | #elif defined __hppa__ 141 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory") 142 | #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") 143 | #elif defined __ia64__ 144 | #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mf" : : : "memory") 145 | #endif 146 | #endif 147 | #endif 148 | 149 | #ifndef ECB_MEMORY_FENCE 150 | #if ECB_GCC_VERSION(4,7) 151 | /* see comment below (stdatomic.h) about the C11 memory model. */ 152 | #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST) 153 | 154 | /* The __has_feature syntax from clang is so misdesigned that we cannot use it 155 | * without risking compile time errors with other compilers. We *could* 156 | * define our own ecb_clang_has_feature, but I just can't be bothered to work 157 | * around this shit time and again. 158 | * #elif defined __clang && __has_feature (cxx_atomic) 159 | * // see comment below (stdatomic.h) about the C11 memory model. 160 | * #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST) 161 | */ 162 | 163 | #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__ 164 | #define ECB_MEMORY_FENCE __sync_synchronize () 165 | #elif _MSC_VER >= 1400 /* VC++ 2005 */ 166 | #pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier) 167 | #define ECB_MEMORY_FENCE _ReadWriteBarrier () 168 | #define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier () /* according to msdn, _ReadBarrier is not a load fence */ 169 | #define ECB_MEMORY_FENCE_RELEASE _WriteBarrier () 170 | #elif defined _WIN32 171 | #include 172 | #define ECB_MEMORY_FENCE MemoryBarrier () /* actually just xchg on x86... scary */ 173 | #elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 174 | #include 175 | #define ECB_MEMORY_FENCE __machine_rw_barrier () 176 | #define ECB_MEMORY_FENCE_ACQUIRE __machine_r_barrier () 177 | #define ECB_MEMORY_FENCE_RELEASE __machine_w_barrier () 178 | #elif __xlC__ 179 | #define ECB_MEMORY_FENCE __sync () 180 | #endif 181 | #endif 182 | 183 | #ifndef ECB_MEMORY_FENCE 184 | #if ECB_C11 && !defined __STDC_NO_ATOMICS__ 185 | /* we assume that these memory fences work on all variables/all memory accesses, */ 186 | /* not just C11 atomics and atomic accesses */ 187 | #include 188 | /* Unfortunately, neither gcc 4.7 nor clang 3.1 generate any instructions for */ 189 | /* any fence other than seq_cst, which isn't very efficient for us. */ 190 | /* Why that is, we don't know - either the C11 memory model is quite useless */ 191 | /* for most usages, or gcc and clang have a bug */ 192 | /* I *currently* lean towards the latter, and inefficiently implement */ 193 | /* all three of ecb's fences as a seq_cst fence */ 194 | #define ECB_MEMORY_FENCE atomic_thread_fence (memory_order_seq_cst) 195 | #endif 196 | #endif 197 | 198 | #ifndef ECB_MEMORY_FENCE 199 | #if !ECB_AVOID_PTHREADS 200 | /* 201 | * if you get undefined symbol references to pthread_mutex_lock, 202 | * or failure to find pthread.h, then you should implement 203 | * the ECB_MEMORY_FENCE operations for your cpu/compiler 204 | * OR provide pthread.h and link against the posix thread library 205 | * of your system. 206 | */ 207 | #include 208 | #define ECB_NEEDS_PTHREADS 1 209 | #define ECB_MEMORY_FENCE_NEEDS_PTHREADS 1 210 | 211 | static pthread_mutex_t ecb_mf_lock = PTHREAD_MUTEX_INITIALIZER; 212 | #define ECB_MEMORY_FENCE do { pthread_mutex_lock (&ecb_mf_lock); pthread_mutex_unlock (&ecb_mf_lock); } while (0) 213 | #endif 214 | #endif 215 | 216 | #if !defined ECB_MEMORY_FENCE_ACQUIRE && defined ECB_MEMORY_FENCE 217 | #define ECB_MEMORY_FENCE_ACQUIRE ECB_MEMORY_FENCE 218 | #endif 219 | 220 | #if !defined ECB_MEMORY_FENCE_RELEASE && defined ECB_MEMORY_FENCE 221 | #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE 222 | #endif 223 | 224 | /*****************************************************************************/ 225 | 226 | #if __cplusplus 227 | #define ecb_inline static inline 228 | #elif ECB_GCC_VERSION(2,5) 229 | #define ecb_inline static __inline__ 230 | #elif ECB_C99 231 | #define ecb_inline static inline 232 | #else 233 | #define ecb_inline static 234 | #endif 235 | 236 | #if ECB_GCC_VERSION(3,3) 237 | #define ecb_restrict __restrict__ 238 | #elif ECB_C99 239 | #define ecb_restrict restrict 240 | #else 241 | #define ecb_restrict 242 | #endif 243 | 244 | typedef int ecb_bool; 245 | 246 | #define ECB_CONCAT_(a, b) a ## b 247 | #define ECB_CONCAT(a, b) ECB_CONCAT_(a, b) 248 | #define ECB_STRINGIFY_(a) # a 249 | #define ECB_STRINGIFY(a) ECB_STRINGIFY_(a) 250 | 251 | #define ecb_function_ ecb_inline 252 | 253 | #if ECB_GCC_VERSION(3,1) 254 | #define ecb_attribute(attrlist) __attribute__(attrlist) 255 | #define ecb_is_constant(expr) __builtin_constant_p (expr) 256 | #define ecb_expect(expr,value) __builtin_expect ((expr),(value)) 257 | #define ecb_prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality) 258 | #else 259 | #define ecb_attribute(attrlist) 260 | #define ecb_is_constant(expr) 0 261 | #define ecb_expect(expr,value) (expr) 262 | #define ecb_prefetch(addr,rw,locality) 263 | #endif 264 | 265 | /* no emulation for ecb_decltype */ 266 | #if ECB_GCC_VERSION(4,5) 267 | #define ecb_decltype(x) __decltype(x) 268 | #elif ECB_GCC_VERSION(3,0) 269 | #define ecb_decltype(x) __typeof(x) 270 | #endif 271 | 272 | #define ecb_noinline ecb_attribute ((__noinline__)) 273 | #define ecb_unused ecb_attribute ((__unused__)) 274 | #define ecb_const ecb_attribute ((__const__)) 275 | #define ecb_pure ecb_attribute ((__pure__)) 276 | 277 | #if ECB_C11 278 | #define ecb_noreturn _Noreturn 279 | #else 280 | #define ecb_noreturn ecb_attribute ((__noreturn__)) 281 | #endif 282 | 283 | #if ECB_GCC_VERSION(4,3) 284 | #define ecb_artificial ecb_attribute ((__artificial__)) 285 | #define ecb_hot ecb_attribute ((__hot__)) 286 | #define ecb_cold ecb_attribute ((__cold__)) 287 | #else 288 | #define ecb_artificial 289 | #define ecb_hot 290 | #define ecb_cold 291 | #endif 292 | 293 | /* put around conditional expressions if you are very sure that the */ 294 | /* expression is mostly true or mostly false. note that these return */ 295 | /* booleans, not the expression. */ 296 | #define ecb_expect_false(expr) ecb_expect (!!(expr), 0) 297 | #define ecb_expect_true(expr) ecb_expect (!!(expr), 1) 298 | /* for compatibility to the rest of the world */ 299 | #define ecb_likely(expr) ecb_expect_true (expr) 300 | #define ecb_unlikely(expr) ecb_expect_false (expr) 301 | 302 | /* count trailing zero bits and count # of one bits */ 303 | #if ECB_GCC_VERSION(3,4) 304 | /* we assume int == 32 bit, long == 32 or 64 bit and long long == 64 bit */ 305 | #define ecb_ld32(x) (__builtin_clz (x) ^ 31) 306 | #define ecb_ld64(x) (__builtin_clzll (x) ^ 63) 307 | #define ecb_ctz32(x) __builtin_ctz (x) 308 | #define ecb_ctz64(x) __builtin_ctzll (x) 309 | #define ecb_popcount32(x) __builtin_popcount (x) 310 | /* no popcountll */ 311 | #else 312 | ecb_function_ int ecb_ctz32 (uint32_t x) ecb_const; 313 | ecb_function_ int 314 | ecb_ctz32 (uint32_t x) 315 | { 316 | int r = 0; 317 | 318 | x &= ~x + 1; /* this isolates the lowest bit */ 319 | 320 | #if ECB_branchless_on_i386 321 | r += !!(x & 0xaaaaaaaa) << 0; 322 | r += !!(x & 0xcccccccc) << 1; 323 | r += !!(x & 0xf0f0f0f0) << 2; 324 | r += !!(x & 0xff00ff00) << 3; 325 | r += !!(x & 0xffff0000) << 4; 326 | #else 327 | if (x & 0xaaaaaaaa) r += 1; 328 | if (x & 0xcccccccc) r += 2; 329 | if (x & 0xf0f0f0f0) r += 4; 330 | if (x & 0xff00ff00) r += 8; 331 | if (x & 0xffff0000) r += 16; 332 | #endif 333 | 334 | return r; 335 | } 336 | 337 | ecb_function_ int ecb_ctz64 (uint64_t x) ecb_const; 338 | ecb_function_ int 339 | ecb_ctz64 (uint64_t x) 340 | { 341 | int shift = x & 0xffffffffU ? 0 : 32; 342 | return ecb_ctz32 (x >> shift) + shift; 343 | } 344 | 345 | ecb_function_ int ecb_popcount32 (uint32_t x) ecb_const; 346 | ecb_function_ int 347 | ecb_popcount32 (uint32_t x) 348 | { 349 | x -= (x >> 1) & 0x55555555; 350 | x = ((x >> 2) & 0x33333333) + (x & 0x33333333); 351 | x = ((x >> 4) + x) & 0x0f0f0f0f; 352 | x *= 0x01010101; 353 | 354 | return x >> 24; 355 | } 356 | 357 | ecb_function_ int ecb_ld32 (uint32_t x) ecb_const; 358 | ecb_function_ int ecb_ld32 (uint32_t x) 359 | { 360 | int r = 0; 361 | 362 | if (x >> 16) { x >>= 16; r += 16; } 363 | if (x >> 8) { x >>= 8; r += 8; } 364 | if (x >> 4) { x >>= 4; r += 4; } 365 | if (x >> 2) { x >>= 2; r += 2; } 366 | if (x >> 1) { r += 1; } 367 | 368 | return r; 369 | } 370 | 371 | ecb_function_ int ecb_ld64 (uint64_t x) ecb_const; 372 | ecb_function_ int ecb_ld64 (uint64_t x) 373 | { 374 | int r = 0; 375 | 376 | if (x >> 32) { x >>= 32; r += 32; } 377 | 378 | return r + ecb_ld32 (x); 379 | } 380 | #endif 381 | 382 | ecb_function_ ecb_bool ecb_is_pot32 (uint32_t x) ecb_const; 383 | ecb_function_ ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); } 384 | ecb_function_ ecb_bool ecb_is_pot64 (uint64_t x) ecb_const; 385 | ecb_function_ ecb_bool ecb_is_pot64 (uint64_t x) { return !(x & (x - 1)); } 386 | 387 | ecb_function_ uint8_t ecb_bitrev8 (uint8_t x) ecb_const; 388 | ecb_function_ uint8_t ecb_bitrev8 (uint8_t x) 389 | { 390 | return ( (x * 0x0802U & 0x22110U) 391 | | (x * 0x8020U & 0x88440U)) * 0x10101U >> 16; 392 | } 393 | 394 | ecb_function_ uint16_t ecb_bitrev16 (uint16_t x) ecb_const; 395 | ecb_function_ uint16_t ecb_bitrev16 (uint16_t x) 396 | { 397 | x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1); 398 | x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2); 399 | x = ((x >> 4) & 0x0f0f) | ((x & 0x0f0f) << 4); 400 | x = ( x >> 8 ) | ( x << 8); 401 | 402 | return x; 403 | } 404 | 405 | ecb_function_ uint32_t ecb_bitrev32 (uint32_t x) ecb_const; 406 | ecb_function_ uint32_t ecb_bitrev32 (uint32_t x) 407 | { 408 | x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1); 409 | x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2); 410 | x = ((x >> 4) & 0x0f0f0f0f) | ((x & 0x0f0f0f0f) << 4); 411 | x = ((x >> 8) & 0x00ff00ff) | ((x & 0x00ff00ff) << 8); 412 | x = ( x >> 16 ) | ( x << 16); 413 | 414 | return x; 415 | } 416 | 417 | /* popcount64 is only available on 64 bit cpus as gcc builtin */ 418 | /* so for this version we are lazy */ 419 | ecb_function_ int ecb_popcount64 (uint64_t x) ecb_const; 420 | ecb_function_ int 421 | ecb_popcount64 (uint64_t x) 422 | { 423 | return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); 424 | } 425 | 426 | ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) ecb_const; 427 | ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) ecb_const; 428 | ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) ecb_const; 429 | ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) ecb_const; 430 | ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) ecb_const; 431 | ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) ecb_const; 432 | ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) ecb_const; 433 | ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) ecb_const; 434 | 435 | ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> ( 8 - count)) | (x << count); } 436 | ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << ( 8 - count)) | (x >> count); } 437 | ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (16 - count)) | (x << count); } 438 | ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (16 - count)) | (x >> count); } 439 | ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (32 - count)) | (x << count); } 440 | ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (32 - count)) | (x >> count); } 441 | ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); } 442 | ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); } 443 | 444 | #if ECB_GCC_VERSION(4,3) 445 | #define ecb_bswap16(x) (__builtin_bswap32 (x) >> 16) 446 | #define ecb_bswap32(x) __builtin_bswap32 (x) 447 | #define ecb_bswap64(x) __builtin_bswap64 (x) 448 | #else 449 | ecb_function_ uint16_t ecb_bswap16 (uint16_t x) ecb_const; 450 | ecb_function_ uint16_t 451 | ecb_bswap16 (uint16_t x) 452 | { 453 | return ecb_rotl16 (x, 8); 454 | } 455 | 456 | ecb_function_ uint32_t ecb_bswap32 (uint32_t x) ecb_const; 457 | ecb_function_ uint32_t 458 | ecb_bswap32 (uint32_t x) 459 | { 460 | return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); 461 | } 462 | 463 | ecb_function_ uint64_t ecb_bswap64 (uint64_t x) ecb_const; 464 | ecb_function_ uint64_t 465 | ecb_bswap64 (uint64_t x) 466 | { 467 | return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); 468 | } 469 | #endif 470 | 471 | #if ECB_GCC_VERSION(4,5) 472 | #define ecb_unreachable() __builtin_unreachable () 473 | #else 474 | /* this seems to work fine, but gcc always emits a warning for it :/ */ 475 | ecb_inline void ecb_unreachable (void) ecb_noreturn; 476 | ecb_inline void ecb_unreachable (void) { } 477 | #endif 478 | 479 | /* try to tell the compiler that some condition is definitely true */ 480 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 481 | 482 | ecb_inline unsigned char ecb_byteorder_helper (void) ecb_const; 483 | ecb_inline unsigned char 484 | ecb_byteorder_helper (void) 485 | { 486 | /* the union code still generates code under pressure in gcc, */ 487 | /* but less than using pointers, and always seems to */ 488 | /* successfully return a constant. */ 489 | /* the reason why we have this horrible preprocessor mess */ 490 | /* is to avoid it in all cases, at least on common architectures */ 491 | /* or when using a recent enough gcc version (>= 4.6) */ 492 | #if __i386 || __i386__ || _M_X86 || __amd64 || __amd64__ || _M_X64 493 | return 0x44; 494 | #elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 495 | return 0x44; 496 | #elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 497 | return 0x11; 498 | #else 499 | union 500 | { 501 | uint32_t i; 502 | uint8_t c; 503 | } u = { 0x11223344 }; 504 | return u.c; 505 | #endif 506 | } 507 | 508 | ecb_inline ecb_bool ecb_big_endian (void) ecb_const; 509 | ecb_inline ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11; } 510 | ecb_inline ecb_bool ecb_little_endian (void) ecb_const; 511 | ecb_inline ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44; } 512 | 513 | #if ECB_GCC_VERSION(3,0) || ECB_C99 514 | #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) 515 | #else 516 | #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n))) 517 | #endif 518 | 519 | #if __cplusplus 520 | template 521 | static inline T ecb_div_rd (T val, T div) 522 | { 523 | return val < 0 ? - ((-val + div - 1) / div) : (val ) / div; 524 | } 525 | template 526 | static inline T ecb_div_ru (T val, T div) 527 | { 528 | return val < 0 ? - ((-val ) / div) : (val + div - 1) / div; 529 | } 530 | #else 531 | #define ecb_div_rd(val,div) ((val) < 0 ? - ((-(val) + (div) - 1) / (div)) : ((val) ) / (div)) 532 | #define ecb_div_ru(val,div) ((val) < 0 ? - ((-(val) ) / (div)) : ((val) + (div) - 1) / (div)) 533 | #endif 534 | 535 | #if ecb_cplusplus_does_not_suck 536 | /* does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm) */ 537 | template 538 | static inline int ecb_array_length (const T (&arr)[N]) 539 | { 540 | return N; 541 | } 542 | #else 543 | #define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) 544 | #endif 545 | 546 | /*******************************************************************************/ 547 | /* floating point stuff, can be disabled by defining ECB_NO_LIBM */ 548 | 549 | /* basically, everything uses "ieee pure-endian" floating point numbers */ 550 | /* the only noteworthy exception is ancient armle, which uses order 43218765 */ 551 | #if 0 \ 552 | || __i386 || __i386__ \ 553 | || __amd64 || __amd64__ || __x86_64 || __x86_64__ \ 554 | || __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ \ 555 | || defined __arm__ && defined __ARM_EABI__ \ 556 | || defined __s390__ || defined __s390x__ \ 557 | || defined __mips__ \ 558 | || defined __alpha__ \ 559 | || defined __hppa__ \ 560 | || defined __ia64__ \ 561 | || defined _M_IX86 || defined _M_AMD64 || defined _M_IA64 562 | #define ECB_STDFP 1 563 | #include /* for memcpy */ 564 | #else 565 | #define ECB_STDFP 0 566 | #include /* for frexp*, ldexp* */ 567 | #endif 568 | 569 | #ifndef ECB_NO_LIBM 570 | 571 | /* convert a float to ieee single/binary32 */ 572 | ecb_function_ uint32_t ecb_float_to_binary32 (float x) ecb_const; 573 | ecb_function_ uint32_t 574 | ecb_float_to_binary32 (float x) 575 | { 576 | uint32_t r; 577 | 578 | #if ECB_STDFP 579 | memcpy (&r, &x, 4); 580 | #else 581 | /* slow emulation, works for anything but -0 */ 582 | uint32_t m; 583 | int e; 584 | 585 | if (x == 0e0f ) return 0x00000000U; 586 | if (x > +3.40282346638528860e+38f) return 0x7f800000U; 587 | if (x < -3.40282346638528860e+38f) return 0xff800000U; 588 | if (x != x ) return 0x7fbfffffU; 589 | 590 | m = frexpf (x, &e) * 0x1000000U; 591 | 592 | r = m & 0x80000000U; 593 | 594 | if (r) 595 | m = -m; 596 | 597 | if (e <= -126) 598 | { 599 | m &= 0xffffffU; 600 | m >>= (-125 - e); 601 | e = -126; 602 | } 603 | 604 | r |= (e + 126) << 23; 605 | r |= m & 0x7fffffU; 606 | #endif 607 | 608 | return r; 609 | } 610 | 611 | /* converts an ieee single/binary32 to a float */ 612 | ecb_function_ float ecb_binary32_to_float (uint32_t x) ecb_const; 613 | ecb_function_ float 614 | ecb_binary32_to_float (uint32_t x) 615 | { 616 | float r; 617 | 618 | #if ECB_STDFP 619 | memcpy (&r, &x, 4); 620 | #else 621 | /* emulation, only works for normals and subnormals and +0 */ 622 | int neg = x >> 31; 623 | int e = (x >> 23) & 0xffU; 624 | 625 | x &= 0x7fffffU; 626 | 627 | if (e) 628 | x |= 0x800000U; 629 | else 630 | e = 1; 631 | 632 | /* we distrust ldexpf a bit and do the 2**-24 scaling by an extra multiply */ 633 | r = ldexpf (x * (0.5f / 0x800000U), e - 126); 634 | 635 | r = neg ? -r : r; 636 | #endif 637 | 638 | return r; 639 | } 640 | 641 | /* convert a double to ieee double/binary64 */ 642 | ecb_function_ uint64_t ecb_double_to_binary64 (double x) ecb_const; 643 | ecb_function_ uint64_t 644 | ecb_double_to_binary64 (double x) 645 | { 646 | uint64_t r; 647 | 648 | #if ECB_STDFP 649 | memcpy (&r, &x, 8); 650 | #else 651 | /* slow emulation, works for anything but -0 */ 652 | uint64_t m; 653 | int e; 654 | 655 | if (x == 0e0 ) return 0x0000000000000000U; 656 | if (x > +1.79769313486231470e+308) return 0x7ff0000000000000U; 657 | if (x < -1.79769313486231470e+308) return 0xfff0000000000000U; 658 | if (x != x ) return 0X7ff7ffffffffffffU; 659 | 660 | m = frexp (x, &e) * 0x20000000000000U; 661 | 662 | r = m & 0x8000000000000000;; 663 | 664 | if (r) 665 | m = -m; 666 | 667 | if (e <= -1022) 668 | { 669 | m &= 0x1fffffffffffffU; 670 | m >>= (-1021 - e); 671 | e = -1022; 672 | } 673 | 674 | r |= ((uint64_t)(e + 1022)) << 52; 675 | r |= m & 0xfffffffffffffU; 676 | #endif 677 | 678 | return r; 679 | } 680 | 681 | /* converts an ieee double/binary64 to a double */ 682 | ecb_function_ double ecb_binary64_to_double (uint64_t x) ecb_const; 683 | ecb_function_ double 684 | ecb_binary64_to_double (uint64_t x) 685 | { 686 | double r; 687 | 688 | #if ECB_STDFP 689 | memcpy (&r, &x, 8); 690 | #else 691 | /* emulation, only works for normals and subnormals and +0 */ 692 | int neg = x >> 63; 693 | int e = (x >> 52) & 0x7ffU; 694 | 695 | x &= 0xfffffffffffffU; 696 | 697 | if (e) 698 | x |= 0x10000000000000U; 699 | else 700 | e = 1; 701 | 702 | /* we distrust ldexp a bit and do the 2**-53 scaling by an extra multiply */ 703 | r = ldexp (x * (0.5 / 0x10000000000000U), e - 1022); 704 | 705 | r = neg ? -r : r; 706 | #endif 707 | 708 | return r; 709 | } 710 | 711 | #endif 712 | 713 | #endif 714 | 715 | -------------------------------------------------------------------------------- /eio.c: -------------------------------------------------------------------------------- 1 | /* 2 | * libeio implementation 3 | * 4 | * Copyright (c) 2007,2008,2009,2010,2011,2012 Marc Alexander Lehmann 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without modifica- 8 | * tion, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, 11 | * this list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- 19 | * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 20 | * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- 21 | * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 23 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 24 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- 25 | * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 26 | * OF THE POSSIBILITY OF SUCH DAMAGE. 27 | * 28 | * Alternatively, the contents of this file may be used under the terms of 29 | * the GNU General Public License ("GPL") version 2 or any later version, 30 | * in which case the provisions of the GPL are applicable instead of 31 | * the above. If you wish to allow the use of your version of this file 32 | * only under the terms of the GPL and not to allow others to use your 33 | * version of this file under the BSD license, indicate your decision 34 | * by deleting the provisions above and replace them with the notice 35 | * and other provisions required by the GPL. If you do not delete the 36 | * provisions above, a recipient may use your version of this file under 37 | * either the BSD or the GPL. 38 | */ 39 | 40 | #ifndef _WIN32 41 | # include "config.h" 42 | #endif 43 | 44 | #include "eio.h" 45 | #include "ecb.h" 46 | 47 | #ifdef EIO_STACKSIZE 48 | # define X_STACKSIZE EIO_STACKSIZE 49 | #endif 50 | #include "xthread.h" 51 | 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include 62 | 63 | /* intptr_t comes from unistd.h, says POSIX/UNIX/tradition */ 64 | /* intptr_t only comes from stdint.h, says idiot openbsd coder */ 65 | #if HAVE_STDINT_H 66 | # include 67 | #endif 68 | 69 | #ifndef ECANCELED 70 | # define ECANCELED EDOM 71 | #endif 72 | #ifndef ELOOP 73 | # define ELOOP EDOM 74 | #endif 75 | 76 | #if !defined(ENOTSOCK) && defined(WSAENOTSOCK) 77 | # define ENOTSOCK WSAENOTSOCK 78 | #endif 79 | 80 | static void eio_destroy (eio_req *req); 81 | 82 | #ifndef EIO_FINISH 83 | # define EIO_FINISH(req) ((req)->finish) && !EIO_CANCELLED (req) ? (req)->finish (req) : 0 84 | #endif 85 | 86 | #ifndef EIO_DESTROY 87 | # define EIO_DESTROY(req) do { if ((req)->destroy) (req)->destroy (req); } while (0) 88 | #endif 89 | 90 | #ifndef EIO_FEED 91 | # define EIO_FEED(req) do { if ((req)->feed ) (req)->feed (req); } while (0) 92 | #endif 93 | 94 | #ifndef EIO_FD_TO_WIN32_HANDLE 95 | # define EIO_FD_TO_WIN32_HANDLE(fd) _get_osfhandle (fd) 96 | #endif 97 | #ifndef EIO_WIN32_HANDLE_TO_FD 98 | # define EIO_WIN32_HANDLE_TO_FD(handle) _open_osfhandle (handle, 0) 99 | #endif 100 | 101 | #define EIO_ERRNO(errval,retval) ((errno = errval), retval) 102 | 103 | #define EIO_ENOSYS() EIO_ERRNO (ENOSYS, -1) 104 | 105 | #ifdef _WIN32 106 | 107 | #undef PAGESIZE 108 | #define PAGESIZE 4096 /* GetSystemInfo? */ 109 | 110 | /* TODO: look at how perl does stat (non-sloppy), unlink (ro-files), utime, link */ 111 | 112 | #ifdef EIO_STRUCT_STATI64 113 | /* look at perl's non-sloppy stat */ 114 | #define stat(path,buf) _stati64 (path,buf) 115 | #define fstat(fd,buf) _fstati64 (fd,buf) 116 | #endif 117 | #define lstat(path,buf) stat (path,buf) 118 | #define fsync(fd) (FlushFileBuffers ((HANDLE)EIO_FD_TO_WIN32_HANDLE (fd)) ? 0 : EIO_ERRNO (EBADF, -1)) 119 | #define mkdir(path,mode) _mkdir (path) 120 | #define link(old,neu) (CreateHardLink (neu, old, 0) ? 0 : EIO_ERRNO (ENOENT, -1)) 121 | 122 | #define chmod(path,mode) _chmod (path, mode) 123 | #define dup(fd) _dup (fd) 124 | #define dup2(fd1,fd2) _dup2 (fd1, fd2) 125 | 126 | #define fchmod(fd,mode) EIO_ENOSYS () 127 | #define chown(path,uid,gid) EIO_ENOSYS () 128 | #define fchown(fd,uid,gid) EIO_ENOSYS () 129 | #define truncate(path,offs) EIO_ENOSYS () /* far-miss: SetEndOfFile */ 130 | #define ftruncate(fd,offs) EIO_ENOSYS () /* near-miss: SetEndOfFile */ 131 | #define mknod(path,mode,dev) EIO_ENOSYS () 132 | #define sync() EIO_ENOSYS () 133 | #define readlink(path,buf,s) EIO_ENOSYS () 134 | #define statvfs(path,buf) EIO_ENOSYS () 135 | #define fstatvfs(fd,buf) EIO_ENOSYS () 136 | 137 | #define pread(fd,buf,count,offset) eio__pread (fd, buf, count, offset) 138 | #define pwrite(fd,buf,count,offset) eio__pwrite (fd, buf, count, offset) 139 | 140 | #if __GNUC__ 141 | typedef long long eio_off_t; /* signed for compatibility to msvc */ 142 | #else 143 | typedef __int64 eio_off_t; /* unsigned not supported by msvc */ 144 | #endif 145 | 146 | static eio_ssize_t 147 | eio__pread (int fd, void *buf, eio_ssize_t count, eio_off_t offset) 148 | { 149 | OVERLAPPED o = { 0 }; 150 | DWORD got; 151 | 152 | o.Offset = offset; 153 | o.OffsetHigh = offset >> 32; 154 | 155 | return ReadFile ((HANDLE)EIO_FD_TO_WIN32_HANDLE (fd), buf, count, &got, &o) 156 | ? got : -1; 157 | } 158 | 159 | static eio_ssize_t 160 | eio__pwrite (int fd, void *buf, eio_ssize_t count, eio_off_t offset) 161 | { 162 | OVERLAPPED o = { 0 }; 163 | DWORD got; 164 | 165 | o.Offset = offset; 166 | o.OffsetHigh = offset >> 32; 167 | 168 | return WriteFile ((HANDLE)EIO_FD_TO_WIN32_HANDLE (fd), buf, count, &got, &o) 169 | ? got : -1; 170 | } 171 | 172 | /* rename() uses MoveFile, which fails to overwrite */ 173 | #define rename(old,neu) eio__rename (old, neu) 174 | 175 | static int 176 | eio__rename (const char *old, const char *neu) 177 | { 178 | if (MoveFileEx (old, neu, MOVEFILE_REPLACE_EXISTING)) 179 | return 0; 180 | 181 | /* should steal _dosmaperr */ 182 | switch (GetLastError ()) 183 | { 184 | case ERROR_FILE_NOT_FOUND: 185 | case ERROR_PATH_NOT_FOUND: 186 | case ERROR_INVALID_DRIVE: 187 | case ERROR_NO_MORE_FILES: 188 | case ERROR_BAD_NETPATH: 189 | case ERROR_BAD_NET_NAME: 190 | case ERROR_BAD_PATHNAME: 191 | case ERROR_FILENAME_EXCED_RANGE: 192 | errno = ENOENT; 193 | break; 194 | 195 | default: 196 | errno = EACCES; 197 | break; 198 | } 199 | 200 | return -1; 201 | } 202 | 203 | /* we could even stat and see if it exists */ 204 | static int 205 | symlink (const char *old, const char *neu) 206 | { 207 | #if WINVER >= 0x0600 208 | if (CreateSymbolicLink (neu, old, 1)) 209 | return 0; 210 | 211 | if (CreateSymbolicLink (neu, old, 0)) 212 | return 0; 213 | #endif 214 | 215 | return EIO_ERRNO (ENOENT, -1); 216 | } 217 | 218 | /* POSIX API only */ 219 | #define CreateHardLink(neu,old,flags) 0 220 | #define CreateSymbolicLink(neu,old,flags) 0 221 | 222 | struct statvfs 223 | { 224 | int dummy; 225 | }; 226 | 227 | #define DT_DIR EIO_DT_DIR 228 | #define DT_REG EIO_DT_REG 229 | #define D_NAME(entp) entp.cFileName 230 | #define D_TYPE(entp) (entp.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ? DT_DIR : DT_REG) 231 | 232 | #else 233 | 234 | #include 235 | #include 236 | #include 237 | #include 238 | #include 239 | #include 240 | 241 | #if _POSIX_MEMLOCK || _POSIX_MEMLOCK_RANGE || _POSIX_MAPPED_FILES 242 | #include 243 | #endif 244 | 245 | #define D_NAME(entp) entp->d_name 246 | 247 | /* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ 248 | #if __FreeBSD__ || __NetBSD__ || __OpenBSD__ 249 | #define _DIRENT_HAVE_D_TYPE /* sigh */ 250 | #define D_INO(de) (de)->d_fileno 251 | #define D_NAMLEN(de) (de)->d_namlen 252 | #elif __linux || defined d_ino || _XOPEN_SOURCE >= 600 253 | #define D_INO(de) (de)->d_ino 254 | #endif 255 | 256 | #ifdef _D_EXACT_NAMLEN 257 | #undef D_NAMLEN 258 | #define D_NAMLEN(de) _D_EXACT_NAMLEN (de) 259 | #endif 260 | 261 | #ifdef _DIRENT_HAVE_D_TYPE 262 | #define D_TYPE(de) (de)->d_type 263 | #endif 264 | 265 | #ifndef EIO_STRUCT_DIRENT 266 | #define EIO_STRUCT_DIRENT struct dirent 267 | #endif 268 | 269 | #endif 270 | 271 | #if HAVE_UTIMES 272 | # include 273 | #endif 274 | 275 | #if HAVE_SYS_SYSCALL_H 276 | # include 277 | #endif 278 | 279 | #if HAVE_SYS_PRCTL_H 280 | # include 281 | #endif 282 | 283 | #if HAVE_SENDFILE 284 | # if __linux 285 | # include 286 | # elif __FreeBSD__ || defined __APPLE__ 287 | # include 288 | # include 289 | # elif __hpux 290 | # include 291 | # elif __solaris 292 | # include 293 | # else 294 | # error sendfile support requested but not available 295 | # endif 296 | #endif 297 | 298 | #ifndef D_TYPE 299 | # define D_TYPE(de) 0 300 | #endif 301 | #ifndef D_INO 302 | # define D_INO(de) 0 303 | #endif 304 | #ifndef D_NAMLEN 305 | # define D_NAMLEN(entp) strlen (D_NAME (entp)) 306 | #endif 307 | 308 | /* used for struct dirent, AIX doesn't provide it */ 309 | #ifndef NAME_MAX 310 | # define NAME_MAX 4096 311 | #endif 312 | 313 | /* used for readlink etc. */ 314 | #ifndef PATH_MAX 315 | # define PATH_MAX 4096 316 | #endif 317 | 318 | /* buffer size for various temporary buffers */ 319 | #define EIO_BUFSIZE 65536 320 | 321 | #define dBUF \ 322 | char *eio_buf = malloc (EIO_BUFSIZE); \ 323 | errno = ENOMEM; \ 324 | if (!eio_buf) \ 325 | return -1 326 | 327 | #define FUBd \ 328 | free (eio_buf) 329 | 330 | #define EIO_TICKS ((1000000 + 1023) >> 10) 331 | 332 | /*****************************************************************************/ 333 | 334 | struct tmpbuf 335 | { 336 | void *ptr; 337 | int len; 338 | }; 339 | 340 | static void * 341 | tmpbuf_get (struct tmpbuf *buf, int len) 342 | { 343 | if (buf->len < len) 344 | { 345 | free (buf->ptr); 346 | buf->ptr = malloc (buf->len = len); 347 | } 348 | 349 | return buf->ptr; 350 | } 351 | 352 | struct tmpbuf; 353 | 354 | #if _POSIX_VERSION >= 200809L 355 | #define HAVE_AT 1 356 | #define WD2FD(wd) ((wd) ? (wd)->fd : AT_FDCWD) 357 | #ifndef O_SEARCH 358 | #define O_SEARCH O_RDONLY 359 | #endif 360 | #else 361 | #define HAVE_AT 0 362 | static const char *wd_expand (struct tmpbuf *tmpbuf, eio_wd wd, const char *path); 363 | #endif 364 | 365 | struct eio_pwd 366 | { 367 | #if HAVE_AT 368 | int fd; 369 | #endif 370 | int len; 371 | char str[1]; /* actually, a 0-terminated canonical path */ 372 | }; 373 | 374 | /*****************************************************************************/ 375 | 376 | #define ETP_PRI_MIN EIO_PRI_MIN 377 | #define ETP_PRI_MAX EIO_PRI_MAX 378 | 379 | struct etp_worker; 380 | 381 | #define ETP_REQ eio_req 382 | #define ETP_DESTROY(req) eio_destroy (req) 383 | static int eio_finish (eio_req *req); 384 | #define ETP_FINISH(req) eio_finish (req) 385 | static void eio_execute (struct etp_worker *self, eio_req *req); 386 | #define ETP_EXECUTE(wrk,req) eio_execute (wrk,req) 387 | 388 | /*****************************************************************************/ 389 | 390 | #define ETP_NUM_PRI (ETP_PRI_MAX - ETP_PRI_MIN + 1) 391 | 392 | /* calculate time difference in ~1/EIO_TICKS of a second */ 393 | ecb_inline int 394 | tvdiff (struct timeval *tv1, struct timeval *tv2) 395 | { 396 | return (tv2->tv_sec - tv1->tv_sec ) * EIO_TICKS 397 | + ((tv2->tv_usec - tv1->tv_usec) >> 10); 398 | } 399 | 400 | static unsigned int started, idle, wanted = 4; 401 | 402 | static void (*want_poll_cb) (void); 403 | static void (*done_poll_cb) (void); 404 | 405 | static unsigned int max_poll_time; /* reslock */ 406 | static unsigned int max_poll_reqs; /* reslock */ 407 | 408 | static unsigned int nreqs; /* reqlock */ 409 | static unsigned int nready; /* reqlock */ 410 | static unsigned int npending; /* reqlock */ 411 | static unsigned int max_idle = 4; /* maximum number of threads that can idle indefinitely */ 412 | static unsigned int idle_timeout = 10; /* number of seconds after which an idle threads exit */ 413 | 414 | static xmutex_t wrklock; 415 | static xmutex_t reslock; 416 | static xmutex_t reqlock; 417 | static xcond_t reqwait; 418 | 419 | typedef struct etp_worker 420 | { 421 | struct tmpbuf tmpbuf; 422 | 423 | /* locked by wrklock */ 424 | struct etp_worker *prev, *next; 425 | 426 | xthread_t tid; 427 | 428 | #ifdef ETP_WORKER_COMMON 429 | ETP_WORKER_COMMON 430 | #endif 431 | } etp_worker; 432 | 433 | static etp_worker wrk_first; /* NOT etp */ 434 | 435 | #define ETP_WORKER_LOCK(wrk) X_LOCK (wrklock) 436 | #define ETP_WORKER_UNLOCK(wrk) X_UNLOCK (wrklock) 437 | 438 | /* worker threads management */ 439 | 440 | static void 441 | etp_worker_clear (etp_worker *wrk) 442 | { 443 | } 444 | 445 | static void ecb_cold 446 | etp_worker_free (etp_worker *wrk) 447 | { 448 | free (wrk->tmpbuf.ptr); 449 | 450 | wrk->next->prev = wrk->prev; 451 | wrk->prev->next = wrk->next; 452 | 453 | free (wrk); 454 | } 455 | 456 | static unsigned int 457 | etp_nreqs (void) 458 | { 459 | int retval; 460 | if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 461 | retval = nreqs; 462 | if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 463 | return retval; 464 | } 465 | 466 | static unsigned int 467 | etp_nready (void) 468 | { 469 | unsigned int retval; 470 | 471 | if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 472 | retval = nready; 473 | if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 474 | 475 | return retval; 476 | } 477 | 478 | static unsigned int 479 | etp_npending (void) 480 | { 481 | unsigned int retval; 482 | 483 | if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 484 | retval = npending; 485 | if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 486 | 487 | return retval; 488 | } 489 | 490 | static unsigned int 491 | etp_nthreads (void) 492 | { 493 | unsigned int retval; 494 | 495 | if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 496 | retval = started; 497 | if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 498 | 499 | return retval; 500 | } 501 | 502 | /* 503 | * a somewhat faster data structure might be nice, but 504 | * with 8 priorities this actually needs <20 insns 505 | * per shift, the most expensive operation. 506 | */ 507 | typedef struct { 508 | ETP_REQ *qs[ETP_NUM_PRI], *qe[ETP_NUM_PRI]; /* qstart, qend */ 509 | int size; 510 | } etp_reqq; 511 | 512 | static etp_reqq req_queue; 513 | static etp_reqq res_queue; 514 | 515 | static void ecb_noinline ecb_cold 516 | reqq_init (etp_reqq *q) 517 | { 518 | int pri; 519 | 520 | for (pri = 0; pri < ETP_NUM_PRI; ++pri) 521 | q->qs[pri] = q->qe[pri] = 0; 522 | 523 | q->size = 0; 524 | } 525 | 526 | static int ecb_noinline 527 | reqq_push (etp_reqq *q, ETP_REQ *req) 528 | { 529 | int pri = req->pri; 530 | req->next = 0; 531 | 532 | if (q->qe[pri]) 533 | { 534 | q->qe[pri]->next = req; 535 | q->qe[pri] = req; 536 | } 537 | else 538 | q->qe[pri] = q->qs[pri] = req; 539 | 540 | return q->size++; 541 | } 542 | 543 | static ETP_REQ * ecb_noinline 544 | reqq_shift (etp_reqq *q) 545 | { 546 | int pri; 547 | 548 | if (!q->size) 549 | return 0; 550 | 551 | --q->size; 552 | 553 | for (pri = ETP_NUM_PRI; pri--; ) 554 | { 555 | eio_req *req = q->qs[pri]; 556 | 557 | if (req) 558 | { 559 | if (!(q->qs[pri] = (eio_req *)req->next)) 560 | q->qe[pri] = 0; 561 | 562 | return req; 563 | } 564 | } 565 | 566 | abort (); 567 | } 568 | 569 | static int ecb_cold 570 | etp_init (void (*want_poll)(void), void (*done_poll)(void)) 571 | { 572 | X_MUTEX_CREATE (wrklock); 573 | X_MUTEX_CREATE (reslock); 574 | X_MUTEX_CREATE (reqlock); 575 | X_COND_CREATE (reqwait); 576 | 577 | reqq_init (&req_queue); 578 | reqq_init (&res_queue); 579 | 580 | wrk_first.next = 581 | wrk_first.prev = &wrk_first; 582 | 583 | started = 0; 584 | idle = 0; 585 | nreqs = 0; 586 | nready = 0; 587 | npending = 0; 588 | 589 | want_poll_cb = want_poll; 590 | done_poll_cb = done_poll; 591 | 592 | return 0; 593 | } 594 | 595 | X_THREAD_PROC (etp_proc); 596 | 597 | static void ecb_cold 598 | etp_start_thread (void) 599 | { 600 | etp_worker *wrk = calloc (1, sizeof (etp_worker)); 601 | 602 | /*TODO*/ 603 | assert (("unable to allocate worker thread data", wrk)); 604 | 605 | X_LOCK (wrklock); 606 | 607 | if (xthread_create (&wrk->tid, etp_proc, (void *)wrk)) 608 | { 609 | wrk->prev = &wrk_first; 610 | wrk->next = wrk_first.next; 611 | wrk_first.next->prev = wrk; 612 | wrk_first.next = wrk; 613 | ++started; 614 | } 615 | else 616 | free (wrk); 617 | 618 | X_UNLOCK (wrklock); 619 | } 620 | 621 | static void 622 | etp_maybe_start_thread (void) 623 | { 624 | if (ecb_expect_true (etp_nthreads () >= wanted)) 625 | return; 626 | 627 | /* todo: maybe use idle here, but might be less exact */ 628 | if (ecb_expect_true (0 <= (int)etp_nthreads () + (int)etp_npending () - (int)etp_nreqs ())) 629 | return; 630 | 631 | etp_start_thread (); 632 | } 633 | 634 | static void ecb_cold 635 | etp_end_thread (void) 636 | { 637 | eio_req *req = calloc (1, sizeof (eio_req)); /* will be freed by worker */ 638 | 639 | req->type = -1; 640 | req->pri = ETP_PRI_MAX - ETP_PRI_MIN; 641 | 642 | X_LOCK (reqlock); 643 | reqq_push (&req_queue, req); 644 | X_COND_SIGNAL (reqwait); 645 | X_UNLOCK (reqlock); 646 | 647 | X_LOCK (wrklock); 648 | --started; 649 | X_UNLOCK (wrklock); 650 | } 651 | 652 | static int 653 | etp_poll (void) 654 | { 655 | unsigned int maxreqs; 656 | unsigned int maxtime; 657 | struct timeval tv_start, tv_now; 658 | 659 | X_LOCK (reslock); 660 | maxreqs = max_poll_reqs; 661 | maxtime = max_poll_time; 662 | X_UNLOCK (reslock); 663 | 664 | if (maxtime) 665 | gettimeofday (&tv_start, 0); 666 | 667 | for (;;) 668 | { 669 | ETP_REQ *req; 670 | 671 | etp_maybe_start_thread (); 672 | 673 | X_LOCK (reslock); 674 | req = reqq_shift (&res_queue); 675 | 676 | if (req) 677 | { 678 | --npending; 679 | 680 | if (!res_queue.size && done_poll_cb) 681 | done_poll_cb (); 682 | } 683 | 684 | X_UNLOCK (reslock); 685 | 686 | if (!req) 687 | return 0; 688 | 689 | X_LOCK (reqlock); 690 | --nreqs; 691 | X_UNLOCK (reqlock); 692 | 693 | if (ecb_expect_false (req->type == EIO_GROUP && req->size)) 694 | { 695 | req->int1 = 1; /* mark request as delayed */ 696 | continue; 697 | } 698 | else 699 | { 700 | int res = ETP_FINISH (req); 701 | if (ecb_expect_false (res)) 702 | return res; 703 | } 704 | 705 | if (ecb_expect_false (maxreqs && !--maxreqs)) 706 | break; 707 | 708 | if (maxtime) 709 | { 710 | gettimeofday (&tv_now, 0); 711 | 712 | if (tvdiff (&tv_start, &tv_now) >= maxtime) 713 | break; 714 | } 715 | } 716 | 717 | errno = EAGAIN; 718 | return -1; 719 | } 720 | 721 | static void 722 | etp_cancel (ETP_REQ *req) 723 | { 724 | req->cancelled = 1; 725 | 726 | eio_grp_cancel (req); 727 | } 728 | 729 | static void 730 | etp_submit (ETP_REQ *req) 731 | { 732 | req->pri -= ETP_PRI_MIN; 733 | 734 | if (ecb_expect_false (req->pri < ETP_PRI_MIN - ETP_PRI_MIN)) req->pri = ETP_PRI_MIN - ETP_PRI_MIN; 735 | if (ecb_expect_false (req->pri > ETP_PRI_MAX - ETP_PRI_MIN)) req->pri = ETP_PRI_MAX - ETP_PRI_MIN; 736 | 737 | if (ecb_expect_false (req->type == EIO_GROUP)) 738 | { 739 | /* I hope this is worth it :/ */ 740 | X_LOCK (reqlock); 741 | ++nreqs; 742 | X_UNLOCK (reqlock); 743 | 744 | X_LOCK (reslock); 745 | 746 | ++npending; 747 | 748 | if (!reqq_push (&res_queue, req) && want_poll_cb) 749 | want_poll_cb (); 750 | 751 | X_UNLOCK (reslock); 752 | } 753 | else 754 | { 755 | X_LOCK (reqlock); 756 | ++nreqs; 757 | ++nready; 758 | reqq_push (&req_queue, req); 759 | X_COND_SIGNAL (reqwait); 760 | X_UNLOCK (reqlock); 761 | 762 | etp_maybe_start_thread (); 763 | } 764 | } 765 | 766 | static void ecb_cold 767 | etp_set_max_poll_time (double nseconds) 768 | { 769 | if (WORDACCESS_UNSAFE) X_LOCK (reslock); 770 | max_poll_time = nseconds * EIO_TICKS; 771 | if (WORDACCESS_UNSAFE) X_UNLOCK (reslock); 772 | } 773 | 774 | static void ecb_cold 775 | etp_set_max_poll_reqs (unsigned int maxreqs) 776 | { 777 | if (WORDACCESS_UNSAFE) X_LOCK (reslock); 778 | max_poll_reqs = maxreqs; 779 | if (WORDACCESS_UNSAFE) X_UNLOCK (reslock); 780 | } 781 | 782 | static void ecb_cold 783 | etp_set_max_idle (unsigned int nthreads) 784 | { 785 | if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 786 | max_idle = nthreads; 787 | if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 788 | } 789 | 790 | static void ecb_cold 791 | etp_set_idle_timeout (unsigned int seconds) 792 | { 793 | if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 794 | idle_timeout = seconds; 795 | if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 796 | } 797 | 798 | static void ecb_cold 799 | etp_set_min_parallel (unsigned int nthreads) 800 | { 801 | if (wanted < nthreads) 802 | wanted = nthreads; 803 | } 804 | 805 | static void ecb_cold 806 | etp_set_max_parallel (unsigned int nthreads) 807 | { 808 | if (wanted > nthreads) 809 | wanted = nthreads; 810 | 811 | while (started > wanted) 812 | etp_end_thread (); 813 | } 814 | 815 | /*****************************************************************************/ 816 | 817 | static void 818 | grp_try_feed (eio_req *grp) 819 | { 820 | while (grp->size < grp->int2 && !EIO_CANCELLED (grp)) 821 | { 822 | grp->flags &= ~EIO_FLAG_GROUPADD; 823 | 824 | EIO_FEED (grp); 825 | 826 | /* stop if no progress has been made */ 827 | if (!(grp->flags & EIO_FLAG_GROUPADD)) 828 | { 829 | grp->feed = 0; 830 | break; 831 | } 832 | } 833 | } 834 | 835 | static int 836 | grp_dec (eio_req *grp) 837 | { 838 | --grp->size; 839 | 840 | /* call feeder, if applicable */ 841 | grp_try_feed (grp); 842 | 843 | /* finish, if done */ 844 | if (!grp->size && grp->int1) 845 | return eio_finish (grp); 846 | else 847 | return 0; 848 | } 849 | 850 | static void 851 | eio_destroy (eio_req *req) 852 | { 853 | if ((req)->flags & EIO_FLAG_PTR1_FREE) free (req->ptr1); 854 | if ((req)->flags & EIO_FLAG_PTR2_FREE) free (req->ptr2); 855 | 856 | EIO_DESTROY (req); 857 | } 858 | 859 | static int 860 | eio_finish (eio_req *req) 861 | { 862 | int res = EIO_FINISH (req); 863 | 864 | if (req->grp) 865 | { 866 | int res2; 867 | eio_req *grp = req->grp; 868 | 869 | /* unlink request */ 870 | if (req->grp_next) req->grp_next->grp_prev = req->grp_prev; 871 | if (req->grp_prev) req->grp_prev->grp_next = req->grp_next; 872 | 873 | if (grp->grp_first == req) 874 | grp->grp_first = req->grp_next; 875 | 876 | res2 = grp_dec (grp); 877 | 878 | if (!res) 879 | res = res2; 880 | } 881 | 882 | eio_destroy (req); 883 | 884 | return res; 885 | } 886 | 887 | void 888 | eio_grp_cancel (eio_req *grp) 889 | { 890 | for (grp = grp->grp_first; grp; grp = grp->grp_next) 891 | eio_cancel (grp); 892 | } 893 | 894 | void 895 | eio_cancel (eio_req *req) 896 | { 897 | etp_cancel (req); 898 | } 899 | 900 | void 901 | eio_submit (eio_req *req) 902 | { 903 | etp_submit (req); 904 | } 905 | 906 | unsigned int 907 | eio_nreqs (void) 908 | { 909 | return etp_nreqs (); 910 | } 911 | 912 | unsigned int 913 | eio_nready (void) 914 | { 915 | return etp_nready (); 916 | } 917 | 918 | unsigned int 919 | eio_npending (void) 920 | { 921 | return etp_npending (); 922 | } 923 | 924 | unsigned int ecb_cold 925 | eio_nthreads (void) 926 | { 927 | return etp_nthreads (); 928 | } 929 | 930 | void ecb_cold 931 | eio_set_max_poll_time (double nseconds) 932 | { 933 | etp_set_max_poll_time (nseconds); 934 | } 935 | 936 | void ecb_cold 937 | eio_set_max_poll_reqs (unsigned int maxreqs) 938 | { 939 | etp_set_max_poll_reqs (maxreqs); 940 | } 941 | 942 | void ecb_cold 943 | eio_set_max_idle (unsigned int nthreads) 944 | { 945 | etp_set_max_idle (nthreads); 946 | } 947 | 948 | void ecb_cold 949 | eio_set_idle_timeout (unsigned int seconds) 950 | { 951 | etp_set_idle_timeout (seconds); 952 | } 953 | 954 | void ecb_cold 955 | eio_set_min_parallel (unsigned int nthreads) 956 | { 957 | etp_set_min_parallel (nthreads); 958 | } 959 | 960 | void ecb_cold 961 | eio_set_max_parallel (unsigned int nthreads) 962 | { 963 | etp_set_max_parallel (nthreads); 964 | } 965 | 966 | int eio_poll (void) 967 | { 968 | return etp_poll (); 969 | } 970 | 971 | /*****************************************************************************/ 972 | /* work around various missing functions */ 973 | 974 | #ifndef HAVE_UTIMES 975 | 976 | # undef utimes 977 | # define utimes(path,times) eio__utimes (path, times) 978 | 979 | static int 980 | eio__utimes (const char *filename, const struct timeval times[2]) 981 | { 982 | if (times) 983 | { 984 | struct utimbuf buf; 985 | 986 | buf.actime = times[0].tv_sec; 987 | buf.modtime = times[1].tv_sec; 988 | 989 | return utime (filename, &buf); 990 | } 991 | else 992 | return utime (filename, 0); 993 | } 994 | 995 | #endif 996 | 997 | #ifndef HAVE_FUTIMES 998 | 999 | # undef futimes 1000 | # define futimes(fd,times) eio__futimes (fd, times) 1001 | 1002 | static int 1003 | eio__futimes (int fd, const struct timeval tv[2]) 1004 | { 1005 | errno = ENOSYS; 1006 | return -1; 1007 | } 1008 | 1009 | #endif 1010 | 1011 | #if !HAVE_FDATASYNC 1012 | # undef fdatasync 1013 | # define fdatasync(fd) fsync (fd) 1014 | #endif 1015 | 1016 | static int 1017 | eio__syncfs (int fd) 1018 | { 1019 | int res; 1020 | 1021 | #if HAVE_SYS_SYNCFS 1022 | res = (int)syscall (__NR_syncfs, (int)(fd)); 1023 | #else 1024 | res = EIO_ENOSYS (); 1025 | #endif 1026 | 1027 | if (res < 0 && errno == ENOSYS && fd >= 0) 1028 | sync (); 1029 | 1030 | return res; 1031 | } 1032 | 1033 | /* sync_file_range always needs emulation */ 1034 | static int 1035 | eio__sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags) 1036 | { 1037 | #if HAVE_SYNC_FILE_RANGE 1038 | int res; 1039 | 1040 | if (EIO_SYNC_FILE_RANGE_WAIT_BEFORE != SYNC_FILE_RANGE_WAIT_BEFORE 1041 | || EIO_SYNC_FILE_RANGE_WRITE != SYNC_FILE_RANGE_WRITE 1042 | || EIO_SYNC_FILE_RANGE_WAIT_AFTER != SYNC_FILE_RANGE_WAIT_AFTER) 1043 | { 1044 | flags = 0 1045 | | (flags & EIO_SYNC_FILE_RANGE_WAIT_BEFORE ? SYNC_FILE_RANGE_WAIT_BEFORE : 0) 1046 | | (flags & EIO_SYNC_FILE_RANGE_WRITE ? SYNC_FILE_RANGE_WRITE : 0) 1047 | | (flags & EIO_SYNC_FILE_RANGE_WAIT_AFTER ? SYNC_FILE_RANGE_WAIT_AFTER : 0); 1048 | } 1049 | 1050 | res = sync_file_range (fd, offset, nbytes, flags); 1051 | 1052 | if (!res || errno != ENOSYS) 1053 | return res; 1054 | #endif 1055 | 1056 | /* even though we could play tricks with the flags, it's better to always 1057 | * call fdatasync, as that matches the expectation of its users best */ 1058 | return fdatasync (fd); 1059 | } 1060 | 1061 | static int 1062 | eio__fallocate (int fd, int mode, off_t offset, size_t len) 1063 | { 1064 | #if HAVE_LINUX_FALLOCATE 1065 | return fallocate (fd, mode, offset, len); 1066 | #else 1067 | return EIO_ENOSYS (); 1068 | #endif 1069 | } 1070 | 1071 | #if !HAVE_READAHEAD 1072 | # undef readahead 1073 | # define readahead(fd,offset,count) eio__readahead (fd, offset, count, self) 1074 | 1075 | static eio_ssize_t 1076 | eio__readahead (int fd, off_t offset, size_t count, etp_worker *self) 1077 | { 1078 | size_t todo = count; 1079 | dBUF; 1080 | 1081 | while (todo > 0) 1082 | { 1083 | size_t len = todo < EIO_BUFSIZE ? todo : EIO_BUFSIZE; 1084 | 1085 | pread (fd, eio_buf, len, offset); 1086 | offset += len; 1087 | todo -= len; 1088 | } 1089 | 1090 | FUBd; 1091 | 1092 | /* linux's readahead basically only fails for EBADF or EINVAL (not mmappable) */ 1093 | /* but not for e.g. EIO or eof, so we also never fail */ 1094 | return 0; 1095 | } 1096 | 1097 | #endif 1098 | 1099 | /* sendfile always needs emulation */ 1100 | static eio_ssize_t 1101 | eio__sendfile (int ofd, int ifd, off_t offset, size_t count) 1102 | { 1103 | eio_ssize_t written = 0; 1104 | eio_ssize_t res; 1105 | 1106 | if (!count) 1107 | return 0; 1108 | 1109 | for (;;) 1110 | { 1111 | #ifdef __APPLE__ 1112 | # undef HAVE_SENDFILE /* broken, as everything on os x */ 1113 | #endif 1114 | #if HAVE_SENDFILE 1115 | # if __linux 1116 | off_t soffset = offset; 1117 | res = sendfile (ofd, ifd, &soffset, count); 1118 | 1119 | # elif __FreeBSD__ 1120 | /* 1121 | * Of course, the freebsd sendfile is a dire hack with no thoughts 1122 | * wasted on making it similar to other I/O functions. 1123 | */ 1124 | off_t sbytes; 1125 | res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); 1126 | 1127 | #if 0 /* according to the manpage, this is correct, but broken behaviour */ 1128 | /* freebsd' sendfile will return 0 on success */ 1129 | /* freebsd 8 documents it as only setting *sbytes on EINTR and EAGAIN, but */ 1130 | /* not on e.g. EIO or EPIPE - sounds broken */ 1131 | if ((res < 0 && (errno == EAGAIN || errno == EINTR) && sbytes) || res == 0) 1132 | res = sbytes; 1133 | #endif 1134 | 1135 | /* according to source inspection, this is correct, and useful behaviour */ 1136 | if (sbytes) 1137 | res = sbytes; 1138 | 1139 | # elif defined __APPLE__ 1140 | off_t sbytes = count; 1141 | res = sendfile (ifd, ofd, offset, &sbytes, 0, 0); 1142 | 1143 | /* according to the manpage, sbytes is always valid */ 1144 | if (sbytes) 1145 | res = sbytes; 1146 | 1147 | # elif __hpux 1148 | res = sendfile (ofd, ifd, offset, count, 0, 0); 1149 | 1150 | # elif __solaris 1151 | struct sendfilevec vec; 1152 | size_t sbytes; 1153 | 1154 | vec.sfv_fd = ifd; 1155 | vec.sfv_flag = 0; 1156 | vec.sfv_off = offset; 1157 | vec.sfv_len = count; 1158 | 1159 | res = sendfilev (ofd, &vec, 1, &sbytes); 1160 | 1161 | if (res < 0 && sbytes) 1162 | res = sbytes; 1163 | 1164 | # endif 1165 | 1166 | #elif defined (_WIN32) && 0 1167 | /* does not work, just for documentation of what would need to be done */ 1168 | /* actually, cannot be done like this, as TransmitFile changes the file offset, */ 1169 | /* libeio guarantees that the file offset does not change, and windows */ 1170 | /* has no way to get an independent handle to the same file description */ 1171 | HANDLE h = TO_SOCKET (ifd); 1172 | SetFilePointer (h, offset, 0, FILE_BEGIN); 1173 | res = TransmitFile (TO_SOCKET (ofd), h, count, 0, 0, 0, 0); 1174 | 1175 | #else 1176 | res = EIO_ENOSYS (); 1177 | #endif 1178 | 1179 | /* we assume sendfile can copy at least 128mb in one go */ 1180 | if (res <= 128 * 1024 * 1024) 1181 | { 1182 | if (res > 0) 1183 | written += res; 1184 | 1185 | if (written) 1186 | return written; 1187 | 1188 | break; 1189 | } 1190 | else 1191 | { 1192 | /* if we requested more, then probably the kernel was lazy */ 1193 | written += res; 1194 | offset += res; 1195 | count -= res; 1196 | 1197 | if (!count) 1198 | return written; 1199 | } 1200 | } 1201 | 1202 | if (res < 0 1203 | && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK 1204 | /* BSDs */ 1205 | #ifdef ENOTSUP /* sigh, if the steenking pile called openbsd would only try to at least compile posix code... */ 1206 | || errno == ENOTSUP 1207 | #endif 1208 | #ifdef EOPNOTSUPP /* windows */ 1209 | || errno == EOPNOTSUPP /* BSDs */ 1210 | #endif 1211 | #if __solaris 1212 | || errno == EAFNOSUPPORT || errno == EPROTOTYPE 1213 | #endif 1214 | ) 1215 | ) 1216 | { 1217 | /* emulate sendfile. this is a major pain in the ass */ 1218 | dBUF; 1219 | 1220 | res = 0; 1221 | 1222 | while (count) 1223 | { 1224 | eio_ssize_t cnt; 1225 | 1226 | cnt = pread (ifd, eio_buf, count > EIO_BUFSIZE ? EIO_BUFSIZE : count, offset); 1227 | 1228 | if (cnt <= 0) 1229 | { 1230 | if (cnt && !res) res = -1; 1231 | break; 1232 | } 1233 | 1234 | cnt = write (ofd, eio_buf, cnt); 1235 | 1236 | if (cnt <= 0) 1237 | { 1238 | if (cnt && !res) res = -1; 1239 | break; 1240 | } 1241 | 1242 | offset += cnt; 1243 | res += cnt; 1244 | count -= cnt; 1245 | } 1246 | 1247 | FUBd; 1248 | } 1249 | 1250 | return res; 1251 | } 1252 | 1253 | #ifdef PAGESIZE 1254 | # define eio_pagesize() PAGESIZE 1255 | #else 1256 | static intptr_t 1257 | eio_pagesize (void) 1258 | { 1259 | static intptr_t page; 1260 | 1261 | if (!page) 1262 | page = sysconf (_SC_PAGESIZE); 1263 | 1264 | return page; 1265 | } 1266 | #endif 1267 | 1268 | static void 1269 | eio_page_align (void **addr, size_t *length) 1270 | { 1271 | intptr_t mask = eio_pagesize () - 1; 1272 | 1273 | /* round down addr */ 1274 | intptr_t adj = mask & (intptr_t)*addr; 1275 | 1276 | *addr = (void *)((intptr_t)*addr - adj); 1277 | *length += adj; 1278 | 1279 | /* round up length */ 1280 | *length = (*length + mask) & ~mask; 1281 | } 1282 | 1283 | #if !_POSIX_MEMLOCK 1284 | # define eio__mlockall(a) EIO_ENOSYS () 1285 | #else 1286 | 1287 | static int 1288 | eio__mlockall (int flags) 1289 | { 1290 | #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7 1291 | extern int mallopt (int, int); 1292 | mallopt (-6, 238); /* http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=473812 */ 1293 | #endif 1294 | 1295 | if (EIO_MCL_CURRENT != MCL_CURRENT 1296 | || EIO_MCL_FUTURE != MCL_FUTURE) 1297 | { 1298 | flags = 0 1299 | | (flags & EIO_MCL_CURRENT ? MCL_CURRENT : 0) 1300 | | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0); 1301 | } 1302 | 1303 | return mlockall (flags); 1304 | } 1305 | #endif 1306 | 1307 | #if !_POSIX_MEMLOCK_RANGE 1308 | # define eio__mlock(a,b) EIO_ENOSYS () 1309 | #else 1310 | 1311 | static int 1312 | eio__mlock (void *addr, size_t length) 1313 | { 1314 | eio_page_align (&addr, &length); 1315 | 1316 | return mlock (addr, length); 1317 | } 1318 | 1319 | #endif 1320 | 1321 | #if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) 1322 | # define eio__msync(a,b,c) EIO_ENOSYS () 1323 | #else 1324 | 1325 | static int 1326 | eio__msync (void *mem, size_t len, int flags) 1327 | { 1328 | eio_page_align (&mem, &len); 1329 | 1330 | if (EIO_MS_ASYNC != MS_SYNC 1331 | || EIO_MS_INVALIDATE != MS_INVALIDATE 1332 | || EIO_MS_SYNC != MS_SYNC) 1333 | { 1334 | flags = 0 1335 | | (flags & EIO_MS_ASYNC ? MS_ASYNC : 0) 1336 | | (flags & EIO_MS_INVALIDATE ? MS_INVALIDATE : 0) 1337 | | (flags & EIO_MS_SYNC ? MS_SYNC : 0); 1338 | } 1339 | 1340 | return msync (mem, len, flags); 1341 | } 1342 | 1343 | #endif 1344 | 1345 | static int 1346 | eio__mtouch (eio_req *req) 1347 | { 1348 | void *mem = req->ptr2; 1349 | size_t len = req->size; 1350 | int flags = req->int1; 1351 | 1352 | eio_page_align (&mem, &len); 1353 | 1354 | { 1355 | intptr_t addr = (intptr_t)mem; 1356 | intptr_t end = addr + len; 1357 | intptr_t page = eio_pagesize (); 1358 | 1359 | if (addr < end) 1360 | if (flags & EIO_MT_MODIFY) /* modify */ 1361 | do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len && !EIO_CANCELLED (req)); 1362 | else 1363 | do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len && !EIO_CANCELLED (req)); 1364 | } 1365 | 1366 | return 0; 1367 | } 1368 | 1369 | /*****************************************************************************/ 1370 | /* requests implemented outside eio_execute, because they are so large */ 1371 | 1372 | static void 1373 | eio__lseek (eio_req *req) 1374 | { 1375 | /* this usually gets optimised away completely, or your compiler sucks, */ 1376 | /* or the whence constants really are not 0, 1, 2 */ 1377 | int whence = req->int2 == EIO_SEEK_SET ? SEEK_SET 1378 | : req->int2 == EIO_SEEK_CUR ? SEEK_CUR 1379 | : req->int2 == EIO_SEEK_END ? SEEK_END 1380 | : req->int2; 1381 | 1382 | req->offs = lseek (req->int1, req->offs, whence); 1383 | req->result = req->offs == (off_t)-1 ? -1 : 0; 1384 | } 1385 | 1386 | /* result will always end up in tmpbuf, there is always space for adding a 0-byte */ 1387 | static int 1388 | eio__realpath (struct tmpbuf *tmpbuf, eio_wd wd, const char *path) 1389 | { 1390 | const char *rel = path; 1391 | char *res; 1392 | char *tmp1, *tmp2; 1393 | #if SYMLOOP_MAX > 32 1394 | int symlinks = SYMLOOP_MAX; 1395 | #else 1396 | int symlinks = 32; 1397 | #endif 1398 | 1399 | errno = EINVAL; 1400 | if (!rel) 1401 | return -1; 1402 | 1403 | errno = ENOENT; 1404 | if (!*rel) 1405 | return -1; 1406 | 1407 | res = tmpbuf_get (tmpbuf, PATH_MAX * 3); 1408 | tmp1 = res + PATH_MAX; 1409 | tmp2 = tmp1 + PATH_MAX; 1410 | 1411 | #if 0 /* disabled, the musl way to do things is just too racy */ 1412 | #if __linux && defined(O_NONBLOCK) && defined(O_NOATIME) 1413 | /* on linux we may be able to ask the kernel */ 1414 | { 1415 | int fd = open (rel, O_RDONLY | O_NONBLOCK | O_NOCTTY | O_NOATIME); 1416 | 1417 | if (fd >= 0) 1418 | { 1419 | sprintf (tmp1, "/proc/self/fd/%d", fd); 1420 | req->result = readlink (tmp1, res, PATH_MAX); 1421 | close (fd); 1422 | 1423 | /* here we should probably stat the open file and the disk file, to make sure they still match */ 1424 | 1425 | if (req->result > 0) 1426 | goto done; 1427 | } 1428 | else if (errno == ELOOP || errno == ENAMETOOLONG || errno == ENOENT || errno == ENOTDIR || errno == EIO) 1429 | return; 1430 | } 1431 | #endif 1432 | #endif 1433 | 1434 | if (*rel != '/') 1435 | { 1436 | int len; 1437 | 1438 | errno = ENOENT; 1439 | if (wd == EIO_INVALID_WD) 1440 | return -1; 1441 | 1442 | if (wd == EIO_CWD) 1443 | { 1444 | if (!getcwd (res, PATH_MAX)) 1445 | return -1; 1446 | 1447 | len = strlen (res); 1448 | } 1449 | else 1450 | memcpy (res, wd->str, len = wd->len); 1451 | 1452 | if (res [1]) /* only use if not / */ 1453 | res += len; 1454 | } 1455 | 1456 | while (*rel) 1457 | { 1458 | eio_ssize_t len, linklen; 1459 | const char *beg = rel; 1460 | 1461 | while (*rel && *rel != '/') 1462 | ++rel; 1463 | 1464 | len = rel - beg; 1465 | 1466 | if (!len) /* skip slashes */ 1467 | { 1468 | ++rel; 1469 | continue; 1470 | } 1471 | 1472 | if (beg [0] == '.') 1473 | { 1474 | if (len == 1) 1475 | continue; /* . - nop */ 1476 | 1477 | if (beg [1] == '.' && len == 2) 1478 | { 1479 | /* .. - back up one component, if possible */ 1480 | 1481 | while (res != tmpbuf->ptr) 1482 | if (*--res == '/') 1483 | break; 1484 | 1485 | continue; 1486 | } 1487 | } 1488 | 1489 | errno = ENAMETOOLONG; 1490 | if (res + 1 + len + 1 >= tmp1) 1491 | return -1; 1492 | 1493 | /* copy one component */ 1494 | *res = '/'; 1495 | memcpy (res + 1, beg, len); 1496 | 1497 | /* zero-terminate, for readlink */ 1498 | res [len + 1] = 0; 1499 | 1500 | /* now check if it's a symlink */ 1501 | linklen = readlink (tmpbuf->ptr, tmp1, PATH_MAX); 1502 | 1503 | if (linklen < 0) 1504 | { 1505 | if (errno != EINVAL) 1506 | return -1; 1507 | 1508 | /* it's a normal directory. hopefully */ 1509 | res += len + 1; 1510 | } 1511 | else 1512 | { 1513 | /* yay, it was a symlink - build new path in tmp2 */ 1514 | int rellen = strlen (rel); 1515 | 1516 | errno = ENAMETOOLONG; 1517 | if (linklen + 1 + rellen >= PATH_MAX) 1518 | return -1; 1519 | 1520 | errno = ELOOP; 1521 | if (!--symlinks) 1522 | return -1; 1523 | 1524 | if (*tmp1 == '/') 1525 | res = tmpbuf->ptr; /* symlink resolves to an absolute path */ 1526 | 1527 | /* we need to be careful, as rel might point into tmp2 already */ 1528 | memmove (tmp2 + linklen + 1, rel, rellen + 1); 1529 | tmp2 [linklen] = '/'; 1530 | memcpy (tmp2, tmp1, linklen); 1531 | 1532 | rel = tmp2; 1533 | } 1534 | } 1535 | 1536 | /* special case for the lone root path */ 1537 | if (res == tmpbuf->ptr) 1538 | *res++ = '/'; 1539 | 1540 | return res - (char *)tmpbuf->ptr; 1541 | } 1542 | 1543 | static signed char 1544 | eio_dent_cmp (const eio_dirent *a, const eio_dirent *b) 1545 | { 1546 | return a->score - b->score ? a->score - b->score /* works because our signed char is always 0..100 */ 1547 | : a->inode < b->inode ? -1 1548 | : a->inode > b->inode ? 1 1549 | : 0; 1550 | } 1551 | 1552 | #define EIO_DENT_CMP(i,op,j) eio_dent_cmp (&i, &j) op 0 1553 | 1554 | #define EIO_SORT_CUTOFF 30 /* quite high, but performs well on many filesystems */ 1555 | #define EIO_SORT_FAST 60 /* when to only use insertion sort */ 1556 | 1557 | static void 1558 | eio_dent_radix_sort (eio_dirent *dents, int size, signed char score_bits, eio_ino_t inode_bits) 1559 | { 1560 | unsigned char bits [9 + sizeof (eio_ino_t) * 8]; 1561 | unsigned char *bit = bits; 1562 | 1563 | assert (CHAR_BIT == 8); 1564 | assert (sizeof (eio_dirent) * 8 < 256); 1565 | assert (offsetof (eio_dirent, inode)); /* we use bit #0 as sentinel */ 1566 | assert (offsetof (eio_dirent, score)); /* we use bit #0 as sentinel */ 1567 | 1568 | if (size <= EIO_SORT_FAST) 1569 | return; 1570 | 1571 | /* first prepare an array of bits to test in our radix sort */ 1572 | /* try to take endianness into account, as well as differences in eio_ino_t sizes */ 1573 | /* inode_bits must contain all inodes ORed together */ 1574 | /* which is used to skip bits that are 0 everywhere, which is very common */ 1575 | { 1576 | eio_ino_t endianness; 1577 | int i, j; 1578 | 1579 | /* we store the byte offset of byte n into byte n of "endianness" */ 1580 | for (i = 0; i < sizeof (eio_ino_t); ++i) 1581 | ((unsigned char *)&endianness)[i] = i; 1582 | 1583 | *bit++ = 0; 1584 | 1585 | for (i = 0; i < sizeof (eio_ino_t); ++i) 1586 | { 1587 | /* shifting off the byte offsets out of "endianness" */ 1588 | int offs = (offsetof (eio_dirent, inode) + (endianness & 0xff)) * 8; 1589 | endianness >>= 8; 1590 | 1591 | for (j = 0; j < 8; ++j) 1592 | if (inode_bits & (((eio_ino_t)1) << (i * 8 + j))) 1593 | *bit++ = offs + j; 1594 | } 1595 | 1596 | for (j = 0; j < 8; ++j) 1597 | if (score_bits & (1 << j)) 1598 | *bit++ = offsetof (eio_dirent, score) * 8 + j; 1599 | } 1600 | 1601 | /* now actually do the sorting (a variant of MSD radix sort) */ 1602 | { 1603 | eio_dirent *base_stk [9 + sizeof (eio_ino_t) * 8], *base; 1604 | eio_dirent *end_stk [9 + sizeof (eio_ino_t) * 8], *end; 1605 | unsigned char *bit_stk [9 + sizeof (eio_ino_t) * 8]; 1606 | int stk_idx = 0; 1607 | 1608 | base_stk [stk_idx] = dents; 1609 | end_stk [stk_idx] = dents + size; 1610 | bit_stk [stk_idx] = bit - 1; 1611 | 1612 | do 1613 | { 1614 | base = base_stk [stk_idx]; 1615 | end = end_stk [stk_idx]; 1616 | bit = bit_stk [stk_idx]; 1617 | 1618 | for (;;) 1619 | { 1620 | unsigned char O = *bit >> 3; 1621 | unsigned char M = 1 << (*bit & 7); 1622 | 1623 | eio_dirent *a = base; 1624 | eio_dirent *b = end; 1625 | 1626 | if (b - a < EIO_SORT_CUTOFF) 1627 | break; 1628 | 1629 | /* now bit-partition the array on the bit */ 1630 | /* this ugly asymmetric loop seems to perform much better than typical */ 1631 | /* partition algos found in the literature */ 1632 | do 1633 | if (!(((unsigned char *)a)[O] & M)) 1634 | ++a; 1635 | else if (!(((unsigned char *)--b)[O] & M)) 1636 | { 1637 | eio_dirent tmp = *a; *a = *b; *b = tmp; 1638 | ++a; 1639 | } 1640 | while (b > a); 1641 | 1642 | /* next bit, or stop, if no bits left in this path */ 1643 | if (!*--bit) 1644 | break; 1645 | 1646 | base_stk [stk_idx] = a; 1647 | end_stk [stk_idx] = end; 1648 | bit_stk [stk_idx] = bit; 1649 | ++stk_idx; 1650 | 1651 | end = a; 1652 | } 1653 | } 1654 | while (stk_idx--); 1655 | } 1656 | } 1657 | 1658 | static void 1659 | eio_dent_insertion_sort (eio_dirent *dents, int size) 1660 | { 1661 | /* first move the smallest element to the front, to act as a sentinel */ 1662 | { 1663 | int i; 1664 | eio_dirent *min = dents; 1665 | 1666 | /* the radix pre-pass ensures that the minimum element is in the first EIO_SORT_CUTOFF + 1 elements */ 1667 | for (i = size > EIO_SORT_FAST ? EIO_SORT_CUTOFF + 1 : size; --i; ) 1668 | if (EIO_DENT_CMP (dents [i], <, *min)) 1669 | min = &dents [i]; 1670 | 1671 | /* swap elements 0 and j (minimum) */ 1672 | { 1673 | eio_dirent tmp = *dents; *dents = *min; *min = tmp; 1674 | } 1675 | } 1676 | 1677 | /* then do standard insertion sort, assuming that all elements are >= dents [0] */ 1678 | { 1679 | eio_dirent *i, *j; 1680 | 1681 | for (i = dents + 1; i < dents + size; ++i) 1682 | { 1683 | eio_dirent value = *i; 1684 | 1685 | for (j = i - 1; EIO_DENT_CMP (*j, >, value); --j) 1686 | j [1] = j [0]; 1687 | 1688 | j [1] = value; 1689 | } 1690 | } 1691 | } 1692 | 1693 | static void 1694 | eio_dent_sort (eio_dirent *dents, int size, signed char score_bits, eio_ino_t inode_bits) 1695 | { 1696 | if (size <= 1) 1697 | return; /* our insertion sort relies on size > 0 */ 1698 | 1699 | /* first we use a radix sort, but only for dirs >= EIO_SORT_FAST */ 1700 | /* and stop sorting when the partitions are <= EIO_SORT_CUTOFF */ 1701 | eio_dent_radix_sort (dents, size, score_bits, inode_bits); 1702 | 1703 | /* use an insertion sort at the end, or for small arrays, */ 1704 | /* as insertion sort is more efficient for small partitions */ 1705 | eio_dent_insertion_sort (dents, size); 1706 | } 1707 | 1708 | /* read a full directory */ 1709 | static void 1710 | eio__scandir (eio_req *req, etp_worker *self) 1711 | { 1712 | char *name, *names; 1713 | int namesalloc = 4096 - sizeof (void *) * 4; 1714 | int namesoffs = 0; 1715 | int flags = req->int1; 1716 | eio_dirent *dents = 0; 1717 | int dentalloc = 128; 1718 | int dentoffs = 0; 1719 | eio_ino_t inode_bits = 0; 1720 | #ifdef _WIN32 1721 | HANDLE dirp; 1722 | WIN32_FIND_DATA entp; 1723 | #else 1724 | DIR *dirp; 1725 | EIO_STRUCT_DIRENT *entp; 1726 | #endif 1727 | 1728 | req->result = -1; 1729 | 1730 | if (!(flags & EIO_READDIR_DENTS)) 1731 | flags &= ~(EIO_READDIR_DIRS_FIRST | EIO_READDIR_STAT_ORDER); 1732 | 1733 | #ifdef _WIN32 1734 | { 1735 | int len = strlen ((const char *)req->ptr1); 1736 | char *path = malloc (MAX_PATH); 1737 | const char *fmt; 1738 | const char *reqpath = wd_expand (&self->tmpbuf, req->wd, req->ptr1); 1739 | 1740 | if (!len) 1741 | fmt = "./*"; 1742 | else if (reqpath[len - 1] == '/' || reqpath[len - 1] == '\\') 1743 | fmt = "%s*"; 1744 | else 1745 | fmt = "%s/*"; 1746 | 1747 | _snprintf (path, MAX_PATH, fmt, reqpath); 1748 | dirp = FindFirstFile (path, &entp); 1749 | free (path); 1750 | 1751 | if (dirp == INVALID_HANDLE_VALUE) 1752 | { 1753 | /* should steal _dosmaperr */ 1754 | switch (GetLastError ()) 1755 | { 1756 | case ERROR_FILE_NOT_FOUND: 1757 | req->result = 0; 1758 | break; 1759 | 1760 | case ERROR_INVALID_NAME: 1761 | case ERROR_PATH_NOT_FOUND: 1762 | case ERROR_NO_MORE_FILES: 1763 | errno = ENOENT; 1764 | break; 1765 | 1766 | case ERROR_NOT_ENOUGH_MEMORY: 1767 | errno = ENOMEM; 1768 | break; 1769 | 1770 | default: 1771 | errno = EINVAL; 1772 | break; 1773 | } 1774 | 1775 | return; 1776 | } 1777 | } 1778 | #else 1779 | #if HAVE_AT 1780 | if (req->wd) 1781 | { 1782 | int fd = openat (WD2FD (req->wd), req->ptr1, O_CLOEXEC | O_SEARCH | O_DIRECTORY); 1783 | 1784 | if (fd < 0) 1785 | return; 1786 | 1787 | dirp = fdopendir (fd); 1788 | 1789 | if (!dirp) 1790 | close (fd); 1791 | } 1792 | else 1793 | dirp = opendir (req->ptr1); 1794 | #else 1795 | dirp = opendir (wd_expand (&self->tmpbuf, req->wd, req->ptr1)); 1796 | #endif 1797 | 1798 | if (!dirp) 1799 | return; 1800 | #endif 1801 | 1802 | if (req->flags & EIO_FLAG_PTR1_FREE) 1803 | free (req->ptr1); 1804 | 1805 | req->flags |= EIO_FLAG_PTR1_FREE | EIO_FLAG_PTR2_FREE; 1806 | req->ptr1 = dents = flags ? malloc (dentalloc * sizeof (eio_dirent)) : 0; 1807 | req->ptr2 = names = malloc (namesalloc); 1808 | 1809 | if (!names || (flags && !dents)) 1810 | return; 1811 | 1812 | for (;;) 1813 | { 1814 | int done; 1815 | 1816 | #ifdef _WIN32 1817 | done = !dirp; 1818 | #else 1819 | errno = 0; 1820 | entp = readdir (dirp); 1821 | done = !entp; 1822 | #endif 1823 | 1824 | if (done) 1825 | { 1826 | #ifndef _WIN32 1827 | int old_errno = errno; 1828 | closedir (dirp); 1829 | errno = old_errno; 1830 | 1831 | if (errno) 1832 | break; 1833 | #endif 1834 | 1835 | /* sort etc. */ 1836 | req->int1 = flags; 1837 | req->result = dentoffs; 1838 | 1839 | if (flags & EIO_READDIR_STAT_ORDER) 1840 | eio_dent_sort (dents, dentoffs, flags & EIO_READDIR_DIRS_FIRST ? 7 : 0, inode_bits); 1841 | else if (flags & EIO_READDIR_DIRS_FIRST) 1842 | if (flags & EIO_READDIR_FOUND_UNKNOWN) 1843 | eio_dent_sort (dents, dentoffs, 7, inode_bits); /* sort by score and inode */ 1844 | else 1845 | { 1846 | /* in this case, all is known, and we just put dirs first and sort them */ 1847 | eio_dirent *oth = dents + dentoffs; 1848 | eio_dirent *dir = dents; 1849 | 1850 | /* now partition dirs to the front, and non-dirs to the back */ 1851 | /* by walking from both sides and swapping if necessary */ 1852 | while (oth > dir) 1853 | { 1854 | if (dir->type == EIO_DT_DIR) 1855 | ++dir; 1856 | else if ((--oth)->type == EIO_DT_DIR) 1857 | { 1858 | eio_dirent tmp = *dir; *dir = *oth; *oth = tmp; 1859 | 1860 | ++dir; 1861 | } 1862 | } 1863 | 1864 | /* now sort the dirs only (dirs all have the same score) */ 1865 | eio_dent_sort (dents, dir - dents, 0, inode_bits); 1866 | } 1867 | 1868 | break; 1869 | } 1870 | 1871 | /* now add the entry to our list(s) */ 1872 | name = D_NAME (entp); 1873 | 1874 | /* skip . and .. entries */ 1875 | if (name [0] != '.' || (name [1] && (name [1] != '.' || name [2]))) 1876 | { 1877 | int len = D_NAMLEN (entp) + 1; 1878 | 1879 | while (ecb_expect_false (namesoffs + len > namesalloc)) 1880 | { 1881 | namesalloc *= 2; 1882 | req->ptr2 = names = realloc (names, namesalloc); 1883 | 1884 | if (!names) 1885 | break; 1886 | } 1887 | 1888 | memcpy (names + namesoffs, name, len); 1889 | 1890 | if (dents) 1891 | { 1892 | struct eio_dirent *ent; 1893 | 1894 | if (ecb_expect_false (dentoffs == dentalloc)) 1895 | { 1896 | dentalloc *= 2; 1897 | req->ptr1 = dents = realloc (dents, dentalloc * sizeof (eio_dirent)); 1898 | 1899 | if (!dents) 1900 | break; 1901 | } 1902 | 1903 | ent = dents + dentoffs; 1904 | 1905 | ent->nameofs = namesoffs; /* rather dirtily we store the offset in the pointer */ 1906 | ent->namelen = len - 1; 1907 | ent->inode = D_INO (entp); 1908 | 1909 | inode_bits |= ent->inode; 1910 | 1911 | switch (D_TYPE (entp)) 1912 | { 1913 | default: 1914 | ent->type = EIO_DT_UNKNOWN; 1915 | flags |= EIO_READDIR_FOUND_UNKNOWN; 1916 | break; 1917 | 1918 | #ifdef DT_FIFO 1919 | case DT_FIFO: ent->type = EIO_DT_FIFO; break; 1920 | #endif 1921 | #ifdef DT_CHR 1922 | case DT_CHR: ent->type = EIO_DT_CHR; break; 1923 | #endif 1924 | #ifdef DT_MPC 1925 | case DT_MPC: ent->type = EIO_DT_MPC; break; 1926 | #endif 1927 | #ifdef DT_DIR 1928 | case DT_DIR: ent->type = EIO_DT_DIR; break; 1929 | #endif 1930 | #ifdef DT_NAM 1931 | case DT_NAM: ent->type = EIO_DT_NAM; break; 1932 | #endif 1933 | #ifdef DT_BLK 1934 | case DT_BLK: ent->type = EIO_DT_BLK; break; 1935 | #endif 1936 | #ifdef DT_MPB 1937 | case DT_MPB: ent->type = EIO_DT_MPB; break; 1938 | #endif 1939 | #ifdef DT_REG 1940 | case DT_REG: ent->type = EIO_DT_REG; break; 1941 | #endif 1942 | #ifdef DT_NWK 1943 | case DT_NWK: ent->type = EIO_DT_NWK; break; 1944 | #endif 1945 | #ifdef DT_CMP 1946 | case DT_CMP: ent->type = EIO_DT_CMP; break; 1947 | #endif 1948 | #ifdef DT_LNK 1949 | case DT_LNK: ent->type = EIO_DT_LNK; break; 1950 | #endif 1951 | #ifdef DT_SOCK 1952 | case DT_SOCK: ent->type = EIO_DT_SOCK; break; 1953 | #endif 1954 | #ifdef DT_DOOR 1955 | case DT_DOOR: ent->type = EIO_DT_DOOR; break; 1956 | #endif 1957 | #ifdef DT_WHT 1958 | case DT_WHT: ent->type = EIO_DT_WHT; break; 1959 | #endif 1960 | } 1961 | 1962 | ent->score = 7; 1963 | 1964 | if (flags & EIO_READDIR_DIRS_FIRST) 1965 | { 1966 | if (ent->type == EIO_DT_UNKNOWN) 1967 | { 1968 | if (*name == '.') /* leading dots are likely directories, and, in any case, rare */ 1969 | ent->score = 1; 1970 | else if (!strchr (name, '.')) /* absence of dots indicate likely dirs */ 1971 | ent->score = len <= 2 ? 4 - len : len <= 4 ? 4 : len <= 7 ? 5 : 6; /* shorter == more likely dir, but avoid too many classes */ 1972 | } 1973 | else if (ent->type == EIO_DT_DIR) 1974 | ent->score = 0; 1975 | } 1976 | } 1977 | 1978 | namesoffs += len; 1979 | ++dentoffs; 1980 | } 1981 | 1982 | if (EIO_CANCELLED (req)) 1983 | { 1984 | errno = ECANCELED; 1985 | break; 1986 | } 1987 | 1988 | #ifdef _WIN32 1989 | if (!FindNextFile (dirp, &entp)) 1990 | { 1991 | FindClose (dirp); 1992 | dirp = 0; 1993 | } 1994 | #endif 1995 | } 1996 | } 1997 | 1998 | /*****************************************************************************/ 1999 | /* working directory stuff */ 2000 | /* various deficiencies in the posix 2008 api force us to */ 2001 | /* keep the absolute path in string form at all times */ 2002 | /* fuck yeah. */ 2003 | 2004 | #if !HAVE_AT 2005 | 2006 | /* a bit like realpath, but usually faster because it doesn'T have to return */ 2007 | /* an absolute or canonical path */ 2008 | static const char * 2009 | wd_expand (struct tmpbuf *tmpbuf, eio_wd wd, const char *path) 2010 | { 2011 | if (!wd || *path == '/') 2012 | return path; 2013 | 2014 | if (path [0] == '.' && !path [1]) 2015 | return wd->str; 2016 | 2017 | { 2018 | int l1 = wd->len; 2019 | int l2 = strlen (path); 2020 | 2021 | char *res = tmpbuf_get (tmpbuf, l1 + l2 + 2); 2022 | 2023 | memcpy (res, wd->str, l1); 2024 | res [l1] = '/'; 2025 | memcpy (res + l1 + 1, path, l2 + 1); 2026 | 2027 | return res; 2028 | } 2029 | } 2030 | 2031 | #endif 2032 | 2033 | static eio_wd 2034 | eio__wd_open_sync (struct tmpbuf *tmpbuf, eio_wd wd, const char *path) 2035 | { 2036 | int fd; 2037 | eio_wd res; 2038 | int len = eio__realpath (tmpbuf, wd, path); 2039 | 2040 | if (len < 0) 2041 | return EIO_INVALID_WD; 2042 | 2043 | #if HAVE_AT 2044 | fd = openat (WD2FD (wd), path, O_CLOEXEC | O_SEARCH | O_DIRECTORY); 2045 | 2046 | if (fd < 0) 2047 | return EIO_INVALID_WD; 2048 | #endif 2049 | 2050 | res = malloc (sizeof (*res) + len); /* one extra 0-byte */ 2051 | 2052 | #if HAVE_AT 2053 | res->fd = fd; 2054 | #endif 2055 | 2056 | res->len = len; 2057 | memcpy (res->str, tmpbuf->ptr, len); 2058 | res->str [len] = 0; 2059 | 2060 | return res; 2061 | } 2062 | 2063 | eio_wd 2064 | eio_wd_open_sync (eio_wd wd, const char *path) 2065 | { 2066 | struct tmpbuf tmpbuf = { 0 }; 2067 | wd = eio__wd_open_sync (&tmpbuf, wd, path); 2068 | free (tmpbuf.ptr); 2069 | 2070 | return wd; 2071 | } 2072 | 2073 | void 2074 | eio_wd_close_sync (eio_wd wd) 2075 | { 2076 | if (wd != EIO_INVALID_WD && wd != EIO_CWD) 2077 | { 2078 | #if HAVE_AT 2079 | close (wd->fd); 2080 | #endif 2081 | free (wd); 2082 | } 2083 | } 2084 | 2085 | #if HAVE_AT 2086 | 2087 | /* they forgot these */ 2088 | 2089 | static int 2090 | eio__truncateat (int dirfd, const char *path, off_t length) 2091 | { 2092 | int fd = openat (dirfd, path, O_WRONLY | O_CLOEXEC); 2093 | int res; 2094 | 2095 | if (fd < 0) 2096 | return fd; 2097 | 2098 | res = ftruncate (fd, length); 2099 | close (fd); 2100 | return res; 2101 | } 2102 | 2103 | static int 2104 | eio__statvfsat (int dirfd, const char *path, struct statvfs *buf) 2105 | { 2106 | int fd = openat (dirfd, path, O_SEARCH | O_CLOEXEC); 2107 | int res; 2108 | 2109 | if (fd < 0) 2110 | return fd; 2111 | 2112 | res = fstatvfs (fd, buf); 2113 | close (fd); 2114 | return res; 2115 | 2116 | } 2117 | 2118 | #endif 2119 | 2120 | /*****************************************************************************/ 2121 | 2122 | #define ALLOC(len) \ 2123 | if (!req->ptr2) \ 2124 | { \ 2125 | X_LOCK (wrklock); \ 2126 | req->flags |= EIO_FLAG_PTR2_FREE; \ 2127 | X_UNLOCK (wrklock); \ 2128 | req->ptr2 = malloc (len); \ 2129 | if (!req->ptr2) \ 2130 | { \ 2131 | errno = ENOMEM; \ 2132 | req->result = -1; \ 2133 | break; \ 2134 | } \ 2135 | } 2136 | 2137 | static void ecb_noinline ecb_cold 2138 | etp_proc_init (void) 2139 | { 2140 | #if HAVE_PRCTL_SET_NAME 2141 | /* provide a more sensible "thread name" */ 2142 | char name[16 + 1]; 2143 | const int namelen = sizeof (name) - 1; 2144 | int len; 2145 | 2146 | prctl (PR_GET_NAME, (unsigned long)name, 0, 0, 0); 2147 | name [namelen] = 0; 2148 | len = strlen (name); 2149 | strcpy (name + (len <= namelen - 4 ? len : namelen - 4), "/eio"); 2150 | prctl (PR_SET_NAME, (unsigned long)name, 0, 0, 0); 2151 | #endif 2152 | } 2153 | 2154 | X_THREAD_PROC (etp_proc) 2155 | { 2156 | ETP_REQ *req; 2157 | struct timespec ts; 2158 | etp_worker *self = (etp_worker *)thr_arg; 2159 | 2160 | etp_proc_init (); 2161 | 2162 | /* try to distribute timeouts somewhat evenly */ 2163 | ts.tv_nsec = ((unsigned long)self & 1023UL) * (1000000000UL / 1024UL); 2164 | 2165 | for (;;) 2166 | { 2167 | ts.tv_sec = 0; 2168 | 2169 | X_LOCK (reqlock); 2170 | 2171 | for (;;) 2172 | { 2173 | req = reqq_shift (&req_queue); 2174 | 2175 | if (req) 2176 | break; 2177 | 2178 | if (ts.tv_sec == 1) /* no request, but timeout detected, let's quit */ 2179 | { 2180 | X_UNLOCK (reqlock); 2181 | X_LOCK (wrklock); 2182 | --started; 2183 | X_UNLOCK (wrklock); 2184 | goto quit; 2185 | } 2186 | 2187 | ++idle; 2188 | 2189 | if (idle <= max_idle) 2190 | /* we are allowed to idle, so do so without any timeout */ 2191 | X_COND_WAIT (reqwait, reqlock); 2192 | else 2193 | { 2194 | /* initialise timeout once */ 2195 | if (!ts.tv_sec) 2196 | ts.tv_sec = time (0) + idle_timeout; 2197 | 2198 | if (X_COND_TIMEDWAIT (reqwait, reqlock, ts) == ETIMEDOUT) 2199 | ts.tv_sec = 1; /* assuming this is not a value computed above.,.. */ 2200 | } 2201 | 2202 | --idle; 2203 | } 2204 | 2205 | --nready; 2206 | 2207 | X_UNLOCK (reqlock); 2208 | 2209 | if (req->type < 0) 2210 | goto quit; 2211 | 2212 | ETP_EXECUTE (self, req); 2213 | 2214 | X_LOCK (reslock); 2215 | 2216 | ++npending; 2217 | 2218 | if (!reqq_push (&res_queue, req) && want_poll_cb) 2219 | want_poll_cb (); 2220 | 2221 | etp_worker_clear (self); 2222 | 2223 | X_UNLOCK (reslock); 2224 | } 2225 | 2226 | quit: 2227 | free (req); 2228 | 2229 | X_LOCK (wrklock); 2230 | etp_worker_free (self); 2231 | X_UNLOCK (wrklock); 2232 | 2233 | return 0; 2234 | } 2235 | 2236 | /*****************************************************************************/ 2237 | 2238 | int ecb_cold 2239 | eio_init (void (*want_poll)(void), void (*done_poll)(void)) 2240 | { 2241 | return etp_init (want_poll, done_poll); 2242 | } 2243 | 2244 | ecb_inline void 2245 | eio_api_destroy (eio_req *req) 2246 | { 2247 | free (req); 2248 | } 2249 | 2250 | #define REQ(rtype) \ 2251 | eio_req *req; \ 2252 | \ 2253 | req = (eio_req *)calloc (1, sizeof *req); \ 2254 | if (!req) \ 2255 | return 0; \ 2256 | \ 2257 | req->type = rtype; \ 2258 | req->pri = pri; \ 2259 | req->finish = cb; \ 2260 | req->data = data; \ 2261 | req->destroy = eio_api_destroy; 2262 | 2263 | #define SEND eio_submit (req); return req 2264 | 2265 | #define PATH \ 2266 | req->flags |= EIO_FLAG_PTR1_FREE; \ 2267 | req->ptr1 = strdup (path); \ 2268 | if (!req->ptr1) \ 2269 | { \ 2270 | eio_api_destroy (req); \ 2271 | return 0; \ 2272 | } 2273 | 2274 | static void 2275 | eio_execute (etp_worker *self, eio_req *req) 2276 | { 2277 | #if HAVE_AT 2278 | int dirfd; 2279 | #else 2280 | const char *path; 2281 | #endif 2282 | 2283 | if (ecb_expect_false (EIO_CANCELLED (req))) 2284 | { 2285 | req->result = -1; 2286 | req->errorno = ECANCELED; 2287 | return; 2288 | } 2289 | 2290 | if (ecb_expect_false (req->wd == EIO_INVALID_WD)) 2291 | { 2292 | req->result = -1; 2293 | req->errorno = ENOENT; 2294 | return; 2295 | } 2296 | 2297 | if (req->type >= EIO_OPEN) 2298 | { 2299 | #if HAVE_AT 2300 | dirfd = WD2FD (req->wd); 2301 | #else 2302 | path = wd_expand (&self->tmpbuf, req->wd, req->ptr1); 2303 | #endif 2304 | } 2305 | 2306 | switch (req->type) 2307 | { 2308 | case EIO_WD_OPEN: req->wd = eio__wd_open_sync (&self->tmpbuf, req->wd, req->ptr1); 2309 | req->result = req->wd == EIO_INVALID_WD ? -1 : 0; 2310 | break; 2311 | case EIO_WD_CLOSE: req->result = 0; 2312 | eio_wd_close_sync (req->wd); break; 2313 | 2314 | case EIO_SEEK: eio__lseek (req); break; 2315 | case EIO_READ: ALLOC (req->size); 2316 | req->result = req->offs >= 0 2317 | ? pread (req->int1, req->ptr2, req->size, req->offs) 2318 | : read (req->int1, req->ptr2, req->size); break; 2319 | case EIO_WRITE: req->result = req->offs >= 0 2320 | ? pwrite (req->int1, req->ptr2, req->size, req->offs) 2321 | : write (req->int1, req->ptr2, req->size); break; 2322 | 2323 | case EIO_READAHEAD: req->result = readahead (req->int1, req->offs, req->size); break; 2324 | case EIO_SENDFILE: req->result = eio__sendfile (req->int1, req->int2, req->offs, req->size); break; 2325 | 2326 | #if HAVE_AT 2327 | 2328 | case EIO_STAT: ALLOC (sizeof (EIO_STRUCT_STAT)); 2329 | req->result = fstatat (dirfd, req->ptr1, (EIO_STRUCT_STAT *)req->ptr2, 0); break; 2330 | case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); 2331 | req->result = fstatat (dirfd, req->ptr1, (EIO_STRUCT_STAT *)req->ptr2, AT_SYMLINK_NOFOLLOW); break; 2332 | case EIO_CHOWN: req->result = fchownat (dirfd, req->ptr1, req->int2, req->int3, 0); break; 2333 | case EIO_CHMOD: req->result = fchmodat (dirfd, req->ptr1, (mode_t)req->int2, 0); break; 2334 | case EIO_TRUNCATE: req->result = eio__truncateat (dirfd, req->ptr1, req->offs); break; 2335 | case EIO_OPEN: req->result = openat (dirfd, req->ptr1, req->int1, (mode_t)req->int2); break; 2336 | 2337 | case EIO_UNLINK: req->result = unlinkat (dirfd, req->ptr1, 0); break; 2338 | case EIO_RMDIR: req->result = unlinkat (dirfd, req->ptr1, AT_REMOVEDIR); break; 2339 | case EIO_MKDIR: req->result = mkdirat (dirfd, req->ptr1, (mode_t)req->int2); break; 2340 | case EIO_RENAME: req->result = renameat (dirfd, req->ptr1, WD2FD ((eio_wd)req->int3), req->ptr2); break; 2341 | case EIO_LINK: req->result = linkat (dirfd, req->ptr1, WD2FD ((eio_wd)req->int3), req->ptr2, 0); break; 2342 | case EIO_SYMLINK: req->result = symlinkat (req->ptr1, dirfd, req->ptr2); break; 2343 | case EIO_MKNOD: req->result = mknodat (dirfd, req->ptr1, (mode_t)req->int2, (dev_t)req->offs); break; 2344 | case EIO_READLINK: ALLOC (PATH_MAX); 2345 | req->result = readlinkat (dirfd, req->ptr1, req->ptr2, PATH_MAX); break; 2346 | case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); 2347 | req->result = eio__statvfsat (dirfd, req->ptr1, (EIO_STRUCT_STATVFS *)req->ptr2); break; 2348 | case EIO_UTIME: 2349 | case EIO_FUTIME: 2350 | { 2351 | struct timespec ts[2]; 2352 | struct timespec *times; 2353 | 2354 | if (req->nv1 != -1. || req->nv2 != -1.) 2355 | { 2356 | ts[0].tv_sec = req->nv1; 2357 | ts[0].tv_nsec = (req->nv1 - ts[0].tv_sec) * 1e9; 2358 | ts[1].tv_sec = req->nv2; 2359 | ts[1].tv_nsec = (req->nv2 - ts[1].tv_sec) * 1e9; 2360 | 2361 | times = ts; 2362 | } 2363 | else 2364 | times = 0; 2365 | 2366 | req->result = req->type == EIO_FUTIME 2367 | ? futimens (req->int1, times) 2368 | : utimensat (dirfd, req->ptr1, times, 0); 2369 | } 2370 | break; 2371 | 2372 | #else 2373 | 2374 | case EIO_STAT: ALLOC (sizeof (EIO_STRUCT_STAT)); 2375 | req->result = stat (path , (EIO_STRUCT_STAT *)req->ptr2); break; 2376 | case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); 2377 | req->result = lstat (path , (EIO_STRUCT_STAT *)req->ptr2); break; 2378 | case EIO_CHOWN: req->result = chown (path , req->int2, req->int3); break; 2379 | case EIO_CHMOD: req->result = chmod (path , (mode_t)req->int2); break; 2380 | case EIO_TRUNCATE: req->result = truncate (path , req->offs); break; 2381 | case EIO_OPEN: req->result = open (path , req->int1, (mode_t)req->int2); break; 2382 | 2383 | case EIO_UNLINK: req->result = unlink (path ); break; 2384 | case EIO_RMDIR: req->result = rmdir (path ); break; 2385 | case EIO_MKDIR: req->result = mkdir (path , (mode_t)req->int2); break; 2386 | case EIO_RENAME: req->result = rename (path , req->ptr2); break; 2387 | case EIO_LINK: req->result = link (path , req->ptr2); break; 2388 | case EIO_SYMLINK: req->result = symlink (path , req->ptr2); break; 2389 | case EIO_MKNOD: req->result = mknod (path , (mode_t)req->int2, (dev_t)req->offs); break; 2390 | case EIO_READLINK: ALLOC (PATH_MAX); 2391 | req->result = readlink (path, req->ptr2, PATH_MAX); break; 2392 | case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); 2393 | req->result = statvfs (path , (EIO_STRUCT_STATVFS *)req->ptr2); break; 2394 | 2395 | case EIO_UTIME: 2396 | case EIO_FUTIME: 2397 | { 2398 | struct timeval tv[2]; 2399 | struct timeval *times; 2400 | 2401 | if (req->nv1 != -1. || req->nv2 != -1.) 2402 | { 2403 | tv[0].tv_sec = req->nv1; 2404 | tv[0].tv_usec = (req->nv1 - tv[0].tv_sec) * 1e6; 2405 | tv[1].tv_sec = req->nv2; 2406 | tv[1].tv_usec = (req->nv2 - tv[1].tv_sec) * 1e6; 2407 | 2408 | times = tv; 2409 | } 2410 | else 2411 | times = 0; 2412 | 2413 | req->result = req->type == EIO_FUTIME 2414 | ? futimes (req->int1, times) 2415 | : utimes (req->ptr1, times); 2416 | } 2417 | break; 2418 | 2419 | #endif 2420 | 2421 | case EIO_REALPATH: if (0 <= (req->result = eio__realpath (&self->tmpbuf, req->wd, req->ptr1))) 2422 | { 2423 | ALLOC (req->result); 2424 | memcpy (req->ptr2, self->tmpbuf.ptr, req->result); 2425 | } 2426 | break; 2427 | 2428 | case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); 2429 | req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; 2430 | 2431 | case EIO_FSTATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); 2432 | req->result = fstatvfs (req->int1, (EIO_STRUCT_STATVFS *)req->ptr2); break; 2433 | 2434 | case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; 2435 | case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; 2436 | case EIO_FTRUNCATE: req->result = ftruncate (req->int1, req->offs); break; 2437 | 2438 | case EIO_CLOSE: req->result = close (req->int1); break; 2439 | case EIO_DUP2: req->result = dup2 (req->int1, req->int2); break; 2440 | case EIO_SYNC: req->result = 0; sync (); break; 2441 | case EIO_FSYNC: req->result = fsync (req->int1); break; 2442 | case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; 2443 | case EIO_SYNCFS: req->result = eio__syncfs (req->int1); break; 2444 | case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; 2445 | case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break; 2446 | case EIO_MTOUCH: req->result = eio__mtouch (req); break; 2447 | case EIO_MLOCK: req->result = eio__mlock (req->ptr2, req->size); break; 2448 | case EIO_MLOCKALL: req->result = eio__mlockall (req->int1); break; 2449 | case EIO_FALLOCATE: req->result = eio__fallocate (req->int1, req->int2, req->offs, req->size); break; 2450 | 2451 | case EIO_READDIR: eio__scandir (req, self); break; 2452 | 2453 | case EIO_BUSY: 2454 | #ifdef _WIN32 2455 | Sleep (req->nv1 * 1e3); 2456 | #else 2457 | { 2458 | struct timeval tv; 2459 | 2460 | tv.tv_sec = req->nv1; 2461 | tv.tv_usec = (req->nv1 - tv.tv_sec) * 1e6; 2462 | 2463 | req->result = select (0, 0, 0, 0, &tv); 2464 | } 2465 | #endif 2466 | break; 2467 | 2468 | case EIO_GROUP: 2469 | abort (); /* handled in eio_request */ 2470 | 2471 | case EIO_NOP: 2472 | req->result = 0; 2473 | break; 2474 | 2475 | case EIO_CUSTOM: 2476 | req->feed (req); 2477 | break; 2478 | 2479 | default: 2480 | req->result = EIO_ENOSYS (); 2481 | break; 2482 | } 2483 | 2484 | req->errorno = errno; 2485 | } 2486 | 2487 | #ifndef EIO_NO_WRAPPERS 2488 | 2489 | eio_req *eio_wd_open (const char *path, int pri, eio_cb cb, void *data) 2490 | { 2491 | REQ (EIO_WD_OPEN); PATH; SEND; 2492 | } 2493 | 2494 | eio_req *eio_wd_close (eio_wd wd, int pri, eio_cb cb, void *data) 2495 | { 2496 | REQ (EIO_WD_CLOSE); req->wd = wd; SEND; 2497 | } 2498 | 2499 | eio_req *eio_nop (int pri, eio_cb cb, void *data) 2500 | { 2501 | REQ (EIO_NOP); SEND; 2502 | } 2503 | 2504 | eio_req *eio_busy (double delay, int pri, eio_cb cb, void *data) 2505 | { 2506 | REQ (EIO_BUSY); req->nv1 = delay; SEND; 2507 | } 2508 | 2509 | eio_req *eio_sync (int pri, eio_cb cb, void *data) 2510 | { 2511 | REQ (EIO_SYNC); SEND; 2512 | } 2513 | 2514 | eio_req *eio_fsync (int fd, int pri, eio_cb cb, void *data) 2515 | { 2516 | REQ (EIO_FSYNC); req->int1 = fd; SEND; 2517 | } 2518 | 2519 | eio_req *eio_msync (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) 2520 | { 2521 | REQ (EIO_MSYNC); req->ptr2 = addr; req->size = length; req->int1 = flags; SEND; 2522 | } 2523 | 2524 | eio_req *eio_fdatasync (int fd, int pri, eio_cb cb, void *data) 2525 | { 2526 | REQ (EIO_FDATASYNC); req->int1 = fd; SEND; 2527 | } 2528 | 2529 | eio_req *eio_syncfs (int fd, int pri, eio_cb cb, void *data) 2530 | { 2531 | REQ (EIO_SYNCFS); req->int1 = fd; SEND; 2532 | } 2533 | 2534 | eio_req *eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data) 2535 | { 2536 | REQ (EIO_SYNC_FILE_RANGE); req->int1 = fd; req->offs = offset; req->size = nbytes; req->int2 = flags; SEND; 2537 | } 2538 | 2539 | eio_req *eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) 2540 | { 2541 | REQ (EIO_MTOUCH); req->ptr2 = addr; req->size = length; req->int1 = flags; SEND; 2542 | } 2543 | 2544 | eio_req *eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data) 2545 | { 2546 | REQ (EIO_MLOCK); req->ptr2 = addr; req->size = length; SEND; 2547 | } 2548 | 2549 | eio_req *eio_mlockall (int flags, int pri, eio_cb cb, void *data) 2550 | { 2551 | REQ (EIO_MLOCKALL); req->int1 = flags; SEND; 2552 | } 2553 | 2554 | eio_req *eio_fallocate (int fd, int mode, off_t offset, size_t len, int pri, eio_cb cb, void *data) 2555 | { 2556 | REQ (EIO_FALLOCATE); req->int1 = fd; req->int2 = mode; req->offs = offset; req->size = len; SEND; 2557 | } 2558 | 2559 | eio_req *eio_close (int fd, int pri, eio_cb cb, void *data) 2560 | { 2561 | REQ (EIO_CLOSE); req->int1 = fd; SEND; 2562 | } 2563 | 2564 | eio_req *eio_readahead (int fd, off_t offset, size_t length, int pri, eio_cb cb, void *data) 2565 | { 2566 | REQ (EIO_READAHEAD); req->int1 = fd; req->offs = offset; req->size = length; SEND; 2567 | } 2568 | 2569 | eio_req *eio_seek (int fd, off_t offset, int whence, int pri, eio_cb cb, void *data) 2570 | { 2571 | REQ (EIO_SEEK); req->int1 = fd; req->offs = offset; req->int2 = whence; SEND; 2572 | } 2573 | 2574 | eio_req *eio_read (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) 2575 | { 2576 | REQ (EIO_READ); req->int1 = fd; req->offs = offset; req->size = length; req->ptr2 = buf; SEND; 2577 | } 2578 | 2579 | eio_req *eio_write (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) 2580 | { 2581 | REQ (EIO_WRITE); req->int1 = fd; req->offs = offset; req->size = length; req->ptr2 = buf; SEND; 2582 | } 2583 | 2584 | eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data) 2585 | { 2586 | REQ (EIO_FSTAT); req->int1 = fd; SEND; 2587 | } 2588 | 2589 | eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) 2590 | { 2591 | REQ (EIO_FSTATVFS); req->int1 = fd; SEND; 2592 | } 2593 | 2594 | eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) 2595 | { 2596 | REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; 2597 | } 2598 | 2599 | eio_req *eio_ftruncate (int fd, off_t offset, int pri, eio_cb cb, void *data) 2600 | { 2601 | REQ (EIO_FTRUNCATE); req->int1 = fd; req->offs = offset; SEND; 2602 | } 2603 | 2604 | eio_req *eio_fchmod (int fd, mode_t mode, int pri, eio_cb cb, void *data) 2605 | { 2606 | REQ (EIO_FCHMOD); req->int1 = fd; req->int2 = (long)mode; SEND; 2607 | } 2608 | 2609 | eio_req *eio_fchown (int fd, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data) 2610 | { 2611 | REQ (EIO_FCHOWN); req->int1 = fd; req->int2 = (long)uid; req->int3 = (long)gid; SEND; 2612 | } 2613 | 2614 | eio_req *eio_dup2 (int fd, int fd2, int pri, eio_cb cb, void *data) 2615 | { 2616 | REQ (EIO_DUP2); req->int1 = fd; req->int2 = fd2; SEND; 2617 | } 2618 | 2619 | eio_req *eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, int pri, eio_cb cb, void *data) 2620 | { 2621 | REQ (EIO_SENDFILE); req->int1 = out_fd; req->int2 = in_fd; req->offs = in_offset; req->size = length; SEND; 2622 | } 2623 | 2624 | eio_req *eio_open (const char *path, int flags, mode_t mode, int pri, eio_cb cb, void *data) 2625 | { 2626 | REQ (EIO_OPEN); PATH; req->int1 = flags; req->int2 = (long)mode; SEND; 2627 | } 2628 | 2629 | eio_req *eio_utime (const char *path, double atime, double mtime, int pri, eio_cb cb, void *data) 2630 | { 2631 | REQ (EIO_UTIME); PATH; req->nv1 = atime; req->nv2 = mtime; SEND; 2632 | } 2633 | 2634 | eio_req *eio_truncate (const char *path, off_t offset, int pri, eio_cb cb, void *data) 2635 | { 2636 | REQ (EIO_TRUNCATE); PATH; req->offs = offset; SEND; 2637 | } 2638 | 2639 | eio_req *eio_chown (const char *path, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data) 2640 | { 2641 | REQ (EIO_CHOWN); PATH; req->int2 = (long)uid; req->int3 = (long)gid; SEND; 2642 | } 2643 | 2644 | eio_req *eio_chmod (const char *path, mode_t mode, int pri, eio_cb cb, void *data) 2645 | { 2646 | REQ (EIO_CHMOD); PATH; req->int2 = (long)mode; SEND; 2647 | } 2648 | 2649 | eio_req *eio_mkdir (const char *path, mode_t mode, int pri, eio_cb cb, void *data) 2650 | { 2651 | REQ (EIO_MKDIR); PATH; req->int2 = (long)mode; SEND; 2652 | } 2653 | 2654 | static eio_req * 2655 | eio__1path (int type, const char *path, int pri, eio_cb cb, void *data) 2656 | { 2657 | REQ (type); PATH; SEND; 2658 | } 2659 | 2660 | eio_req *eio_readlink (const char *path, int pri, eio_cb cb, void *data) 2661 | { 2662 | return eio__1path (EIO_READLINK, path, pri, cb, data); 2663 | } 2664 | 2665 | eio_req *eio_realpath (const char *path, int pri, eio_cb cb, void *data) 2666 | { 2667 | return eio__1path (EIO_REALPATH, path, pri, cb, data); 2668 | } 2669 | 2670 | eio_req *eio_stat (const char *path, int pri, eio_cb cb, void *data) 2671 | { 2672 | return eio__1path (EIO_STAT, path, pri, cb, data); 2673 | } 2674 | 2675 | eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data) 2676 | { 2677 | return eio__1path (EIO_LSTAT, path, pri, cb, data); 2678 | } 2679 | 2680 | eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data) 2681 | { 2682 | return eio__1path (EIO_STATVFS, path, pri, cb, data); 2683 | } 2684 | 2685 | eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) 2686 | { 2687 | return eio__1path (EIO_UNLINK, path, pri, cb, data); 2688 | } 2689 | 2690 | eio_req *eio_rmdir (const char *path, int pri, eio_cb cb, void *data) 2691 | { 2692 | return eio__1path (EIO_RMDIR, path, pri, cb, data); 2693 | } 2694 | 2695 | eio_req *eio_readdir (const char *path, int flags, int pri, eio_cb cb, void *data) 2696 | { 2697 | REQ (EIO_READDIR); PATH; req->int1 = flags; SEND; 2698 | } 2699 | 2700 | eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data) 2701 | { 2702 | REQ (EIO_MKNOD); PATH; req->int2 = (long)mode; req->offs = (off_t)dev; SEND; 2703 | } 2704 | 2705 | static eio_req * 2706 | eio__2path (int type, const char *path, const char *new_path, int pri, eio_cb cb, void *data) 2707 | { 2708 | REQ (type); PATH; 2709 | 2710 | req->flags |= EIO_FLAG_PTR2_FREE; 2711 | req->ptr2 = strdup (new_path); 2712 | if (!req->ptr2) 2713 | { 2714 | eio_api_destroy (req); 2715 | return 0; 2716 | } 2717 | 2718 | SEND; 2719 | } 2720 | 2721 | eio_req *eio_link (const char *path, const char *new_path, int pri, eio_cb cb, void *data) 2722 | { 2723 | return eio__2path (EIO_LINK, path, new_path, pri, cb, data); 2724 | } 2725 | 2726 | eio_req *eio_symlink (const char *path, const char *new_path, int pri, eio_cb cb, void *data) 2727 | { 2728 | return eio__2path (EIO_SYMLINK, path, new_path, pri, cb, data); 2729 | } 2730 | 2731 | eio_req *eio_rename (const char *path, const char *new_path, int pri, eio_cb cb, void *data) 2732 | { 2733 | return eio__2path (EIO_RENAME, path, new_path, pri, cb, data); 2734 | } 2735 | 2736 | eio_req *eio_custom (void (*execute)(eio_req *), int pri, eio_cb cb, void *data) 2737 | { 2738 | REQ (EIO_CUSTOM); req->feed = execute; SEND; 2739 | } 2740 | 2741 | #endif 2742 | 2743 | eio_req *eio_grp (eio_cb cb, void *data) 2744 | { 2745 | const int pri = EIO_PRI_MAX; 2746 | 2747 | REQ (EIO_GROUP); SEND; 2748 | } 2749 | 2750 | #undef REQ 2751 | #undef PATH 2752 | #undef SEND 2753 | 2754 | /*****************************************************************************/ 2755 | /* grp functions */ 2756 | 2757 | void 2758 | eio_grp_feed (eio_req *grp, void (*feed)(eio_req *req), int limit) 2759 | { 2760 | grp->int2 = limit; 2761 | grp->feed = feed; 2762 | 2763 | grp_try_feed (grp); 2764 | } 2765 | 2766 | void 2767 | eio_grp_limit (eio_req *grp, int limit) 2768 | { 2769 | grp->int2 = limit; 2770 | 2771 | grp_try_feed (grp); 2772 | } 2773 | 2774 | void 2775 | eio_grp_add (eio_req *grp, eio_req *req) 2776 | { 2777 | assert (("cannot add requests to IO::AIO::GRP after the group finished", grp->int1 != 2)); 2778 | 2779 | grp->flags |= EIO_FLAG_GROUPADD; 2780 | 2781 | ++grp->size; 2782 | req->grp = grp; 2783 | 2784 | req->grp_prev = 0; 2785 | req->grp_next = grp->grp_first; 2786 | 2787 | if (grp->grp_first) 2788 | grp->grp_first->grp_prev = req; 2789 | 2790 | grp->grp_first = req; 2791 | } 2792 | 2793 | /*****************************************************************************/ 2794 | /* misc garbage */ 2795 | 2796 | eio_ssize_t 2797 | eio_sendfile_sync (int ofd, int ifd, off_t offset, size_t count) 2798 | { 2799 | return eio__sendfile (ofd, ifd, offset, count); 2800 | } 2801 | 2802 | -------------------------------------------------------------------------------- /eio.h: -------------------------------------------------------------------------------- 1 | /* 2 | * libeio API header 3 | * 4 | * Copyright (c) 2007,2008,2009,2010,2011,2012 Marc Alexander Lehmann 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without modifica- 8 | * tion, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, 11 | * this list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- 19 | * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 20 | * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- 21 | * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 23 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 24 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- 25 | * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 26 | * OF THE POSSIBILITY OF SUCH DAMAGE. 27 | * 28 | * Alternatively, the contents of this file may be used under the terms of 29 | * the GNU General Public License ("GPL") version 2 or any later version, 30 | * in which case the provisions of the GPL are applicable instead of 31 | * the above. If you wish to allow the use of your version of this file 32 | * only under the terms of the GPL and not to allow others to use your 33 | * version of this file under the BSD license, indicate your decision 34 | * by deleting the provisions above and replace them with the notice 35 | * and other provisions required by the GPL. If you do not delete the 36 | * provisions above, a recipient may use your version of this file under 37 | * either the BSD or the GPL. 38 | */ 39 | 40 | #ifndef EIO_H_ 41 | #define EIO_H_ 42 | 43 | #ifdef __cplusplus 44 | extern "C" { 45 | #endif 46 | 47 | #include 48 | #include 49 | #include 50 | 51 | typedef struct eio_req eio_req; 52 | typedef struct eio_dirent eio_dirent; 53 | 54 | typedef int (*eio_cb)(eio_req *req); 55 | 56 | #ifndef EIO_REQ_MEMBERS 57 | # define EIO_REQ_MEMBERS 58 | #endif 59 | 60 | #ifndef EIO_STRUCT_STAT 61 | # ifdef _WIN32 62 | # define EIO_STRUCT_STAT struct _stati64 63 | # define EIO_STRUCT_STATI64 64 | # else 65 | # define EIO_STRUCT_STAT struct stat 66 | # endif 67 | #endif 68 | 69 | #ifdef _WIN32 70 | typedef int eio_uid_t; 71 | typedef int eio_gid_t; 72 | #ifdef __MINGW32__ /* no intptr_t */ 73 | typedef ssize_t eio_ssize_t; 74 | #else 75 | typedef intptr_t eio_ssize_t; /* or SSIZE_T */ 76 | #endif 77 | #if __GNUC__ 78 | typedef long long eio_ino_t; /* signed for compatibility to msvc */ 79 | #else 80 | typedef __int64 eio_ino_t; /* unsigned not supported by msvc */ 81 | #endif 82 | #else 83 | typedef uid_t eio_uid_t; 84 | typedef gid_t eio_gid_t; 85 | typedef ssize_t eio_ssize_t; 86 | typedef ino_t eio_ino_t; 87 | #endif 88 | 89 | #ifndef EIO_STRUCT_STATVFS 90 | # define EIO_STRUCT_STATVFS struct statvfs 91 | #endif 92 | 93 | /* managing working directories */ 94 | 95 | typedef struct eio_pwd *eio_wd; 96 | 97 | #define EIO_CWD 0 /* the current working directory of the process, guaranteed to be a null pointer */ 98 | #define EIO_INVALID_WD ((eio_wd)(int)-1) /* failure return for eio_wd_open */ 99 | 100 | eio_wd eio_wd_open_sync (eio_wd wd, const char *path); 101 | void eio_wd_close_sync (eio_wd wd); 102 | 103 | /* for readdir */ 104 | 105 | /* eio_readdir flags */ 106 | enum 107 | { 108 | EIO_READDIR_DENTS = 0x01, /* ptr2 contains eio_dirents, not just the (unsorted) names */ 109 | EIO_READDIR_DIRS_FIRST = 0x02, /* dirents gets sorted into a good stat() ing order to find directories first */ 110 | EIO_READDIR_STAT_ORDER = 0x04, /* dirents gets sorted into a good stat() ing order to quickly stat all files */ 111 | EIO_READDIR_FOUND_UNKNOWN = 0x80, /* set by eio_readdir when *_ARRAY was set and any TYPE=UNKNOWN's were found */ 112 | 113 | EIO_READDIR_CUSTOM1 = 0x100, /* for use by apps */ 114 | EIO_READDIR_CUSTOM2 = 0x200 /* for use by apps */ 115 | }; 116 | 117 | /* using "typical" values in the hope that the compiler will do something sensible */ 118 | enum eio_dtype 119 | { 120 | EIO_DT_UNKNOWN = 0, 121 | EIO_DT_FIFO = 1, 122 | EIO_DT_CHR = 2, 123 | EIO_DT_MPC = 3, /* multiplexed char device (v7+coherent) */ 124 | EIO_DT_DIR = 4, 125 | EIO_DT_NAM = 5, /* xenix special named file */ 126 | EIO_DT_BLK = 6, 127 | EIO_DT_MPB = 7, /* multiplexed block device (v7+coherent) */ 128 | EIO_DT_REG = 8, 129 | EIO_DT_NWK = 9, /* HP-UX network special */ 130 | EIO_DT_CMP = 9, /* VxFS compressed */ 131 | EIO_DT_LNK = 10, 132 | /* DT_SHAD = 11,*/ 133 | EIO_DT_SOCK = 12, 134 | EIO_DT_DOOR = 13, /* solaris door */ 135 | EIO_DT_WHT = 14, 136 | EIO_DT_MAX = 15 /* highest DT_VALUE ever, hopefully */ 137 | }; 138 | 139 | struct eio_dirent 140 | { 141 | int nameofs; /* offset of null-terminated name string in (char *)req->ptr2 */ 142 | unsigned short namelen; /* size of filename without trailing 0 */ 143 | unsigned char type; /* one of EIO_DT_* */ 144 | signed char score; /* internal use */ 145 | eio_ino_t inode; /* the inode number, if available, otherwise unspecified */ 146 | }; 147 | 148 | /* eio_msync flags */ 149 | enum 150 | { 151 | EIO_MS_ASYNC = 1, 152 | EIO_MS_INVALIDATE = 2, 153 | EIO_MS_SYNC = 4 154 | }; 155 | 156 | /* eio_mtouch flags */ 157 | enum 158 | { 159 | EIO_MT_MODIFY = 1 160 | }; 161 | 162 | /* eio_sync_file_range flags */ 163 | enum 164 | { 165 | EIO_SYNC_FILE_RANGE_WAIT_BEFORE = 1, 166 | EIO_SYNC_FILE_RANGE_WRITE = 2, 167 | EIO_SYNC_FILE_RANGE_WAIT_AFTER = 4 168 | }; 169 | 170 | /* eio_fallocate flags */ 171 | enum 172 | { 173 | /* these MUST match the value in linux/falloc.h */ 174 | EIO_FALLOC_FL_KEEP_SIZE = 1, 175 | EIO_FALLOC_FL_PUNCH_HOLE = 2 176 | }; 177 | 178 | /* timestamps and differences - feel free to use double in your code directly */ 179 | typedef double eio_tstamp; 180 | 181 | /* the eio request structure */ 182 | enum 183 | { 184 | EIO_CUSTOM, 185 | EIO_WD_OPEN, EIO_WD_CLOSE, 186 | 187 | EIO_CLOSE, EIO_DUP2, 188 | EIO_SEEK, EIO_READ, EIO_WRITE, 189 | EIO_READAHEAD, EIO_SENDFILE, 190 | EIO_FSTAT, EIO_FSTATVFS, 191 | EIO_FTRUNCATE, EIO_FUTIME, EIO_FCHMOD, EIO_FCHOWN, 192 | EIO_SYNC, EIO_FSYNC, EIO_FDATASYNC, EIO_SYNCFS, 193 | EIO_MSYNC, EIO_MTOUCH, EIO_SYNC_FILE_RANGE, EIO_FALLOCATE, 194 | EIO_MLOCK, EIO_MLOCKALL, 195 | EIO_GROUP, EIO_NOP, 196 | EIO_BUSY, 197 | 198 | /* these use wd + ptr1, but are emulated */ 199 | EIO_REALPATH, 200 | EIO_READDIR, 201 | 202 | /* all the following requests use wd + ptr1 as path in xxxat functions */ 203 | EIO_OPEN, 204 | EIO_STAT, EIO_LSTAT, EIO_STATVFS, 205 | EIO_TRUNCATE, 206 | EIO_UTIME, 207 | EIO_CHMOD, 208 | EIO_CHOWN, 209 | EIO_UNLINK, EIO_RMDIR, EIO_MKDIR, EIO_RENAME, 210 | EIO_MKNOD, 211 | EIO_LINK, EIO_SYMLINK, EIO_READLINK, 212 | 213 | EIO_REQ_TYPE_NUM 214 | }; 215 | 216 | /* seek whence modes */ 217 | /* these are guaranteed to hasve the traditional 0, 1, 2 values, */ 218 | /* so you might as wlel use those */ 219 | enum 220 | { 221 | EIO_SEEK_SET = 0, 222 | EIO_SEEK_CUR = 1, 223 | EIO_SEEK_END = 2 224 | }; 225 | 226 | 227 | /* mlockall constants */ 228 | enum 229 | { 230 | EIO_MCL_CURRENT = 1, 231 | EIO_MCL_FUTURE = 2 232 | }; 233 | 234 | /* request priorities */ 235 | 236 | enum { 237 | EIO_PRI_MIN = -4, 238 | EIO_PRI_MAX = 4, 239 | EIO_PRI_DEFAULT = 0 240 | }; 241 | 242 | /* eio request structure */ 243 | /* this structure is mostly read-only */ 244 | /* when initialising it, all members must be zero-initialised */ 245 | struct eio_req 246 | { 247 | eio_req volatile *next; /* private ETP */ 248 | 249 | eio_wd wd; /* all applicable requests: working directory of pathname, old name; wd_open: return wd */ 250 | 251 | eio_ssize_t result; /* result of syscall, e.g. result = read (... */ 252 | off_t offs; /* read, write, truncate, readahead, sync_file_range, fallocate: file offset, mknod: dev_t */ 253 | size_t size; /* read, write, readahead, sendfile, msync, mlock, sync_file_range, fallocate: length */ 254 | void *ptr1; /* all applicable requests: pathname, old name; readdir: optional eio_dirents */ 255 | void *ptr2; /* all applicable requests: new name or memory buffer; readdir: name strings */ 256 | eio_tstamp nv1; /* utime, futime: atime; busy: sleep time */ 257 | eio_tstamp nv2; /* utime, futime: mtime */ 258 | 259 | int type; /* EIO_xxx constant ETP */ 260 | int int1; /* all applicable requests: file descriptor; sendfile: output fd; open, msync, mlockall, readdir: flags */ 261 | long int2; /* chown, fchown: uid; sendfile: input fd; open, chmod, mkdir, mknod: file mode, seek: whence, sync_file_range, fallocate: flags */ 262 | long int3; /* chown, fchown: gid; rename, link: working directory of new name */ 263 | int errorno; /* errno value on syscall return */ 264 | 265 | #if __i386 || __amd64 266 | unsigned char cancelled; 267 | #else 268 | sig_atomic_t cancelled; 269 | #endif 270 | 271 | unsigned char flags; /* private */ 272 | signed char pri; /* the priority */ 273 | 274 | void *data; 275 | eio_cb finish; 276 | void (*destroy)(eio_req *req); /* called when request no longer needed */ 277 | void (*feed)(eio_req *req); /* only used for group requests */ 278 | 279 | EIO_REQ_MEMBERS 280 | 281 | eio_req *grp, *grp_prev, *grp_next, *grp_first; /* private */ 282 | }; 283 | 284 | /* _private_ request flags */ 285 | enum { 286 | EIO_FLAG_PTR1_FREE = 0x01, /* need to free(ptr1) */ 287 | EIO_FLAG_PTR2_FREE = 0x02, /* need to free(ptr2) */ 288 | EIO_FLAG_GROUPADD = 0x04 /* some request was added to the group */ 289 | }; 290 | 291 | /* undocumented/unsupported/private helper */ 292 | /*void eio_page_align (void **addr, size_t *length);*/ 293 | 294 | /* returns < 0 on error, errno set 295 | * need_poll, if non-zero, will be called when results are available 296 | * and eio_poll_cb needs to be invoked (it MUST NOT call eio_poll_cb itself). 297 | * done_poll is called when the need to poll is gone. 298 | */ 299 | int eio_init (void (*want_poll)(void), void (*done_poll)(void)); 300 | 301 | /* must be called regularly to handle pending requests */ 302 | /* returns 0 if all requests were handled, -1 if not, or the value of EIO_FINISH if != 0 */ 303 | int eio_poll (void); 304 | 305 | /* stop polling if poll took longer than duration seconds */ 306 | void eio_set_max_poll_time (eio_tstamp nseconds); 307 | /* do not handle more then count requests in one call to eio_poll_cb */ 308 | void eio_set_max_poll_reqs (unsigned int nreqs); 309 | 310 | /* set minimum required number 311 | * maximum wanted number 312 | * or maximum idle number of threads */ 313 | void eio_set_min_parallel (unsigned int nthreads); 314 | void eio_set_max_parallel (unsigned int nthreads); 315 | void eio_set_max_idle (unsigned int nthreads); 316 | void eio_set_idle_timeout (unsigned int seconds); 317 | 318 | unsigned int eio_nreqs (void); /* number of requests in-flight */ 319 | unsigned int eio_nready (void); /* number of not-yet handled requests */ 320 | unsigned int eio_npending (void); /* number of finished but unhandled requests */ 321 | unsigned int eio_nthreads (void); /* number of worker threads in use currently */ 322 | 323 | /*****************************************************************************/ 324 | /* convenience wrappers */ 325 | 326 | #ifndef EIO_NO_WRAPPERS 327 | eio_req *eio_wd_open (const char *path, int pri, eio_cb cb, void *data); /* result=wd */ 328 | eio_req *eio_wd_close (eio_wd wd, int pri, eio_cb cb, void *data); 329 | eio_req *eio_nop (int pri, eio_cb cb, void *data); /* does nothing except go through the whole process */ 330 | eio_req *eio_busy (eio_tstamp delay, int pri, eio_cb cb, void *data); /* ties a thread for this long, simulating busyness */ 331 | eio_req *eio_sync (int pri, eio_cb cb, void *data); 332 | eio_req *eio_fsync (int fd, int pri, eio_cb cb, void *data); 333 | eio_req *eio_fdatasync (int fd, int pri, eio_cb cb, void *data); 334 | eio_req *eio_syncfs (int fd, int pri, eio_cb cb, void *data); 335 | eio_req *eio_msync (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data); 336 | eio_req *eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data); 337 | eio_req *eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data); 338 | eio_req *eio_mlockall (int flags, int pri, eio_cb cb, void *data); 339 | eio_req *eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data); 340 | eio_req *eio_fallocate (int fd, int mode, off_t offset, size_t len, int pri, eio_cb cb, void *data); 341 | eio_req *eio_close (int fd, int pri, eio_cb cb, void *data); 342 | eio_req *eio_readahead (int fd, off_t offset, size_t length, int pri, eio_cb cb, void *data); 343 | eio_req *eio_seek (int fd, off_t offset, int whence, int pri, eio_cb cb, void *data); 344 | eio_req *eio_read (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data); 345 | eio_req *eio_write (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data); 346 | eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ 347 | eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ 348 | eio_req *eio_futime (int fd, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data); 349 | eio_req *eio_ftruncate (int fd, off_t offset, int pri, eio_cb cb, void *data); 350 | eio_req *eio_fchmod (int fd, mode_t mode, int pri, eio_cb cb, void *data); 351 | eio_req *eio_fchown (int fd, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data); 352 | eio_req *eio_dup2 (int fd, int fd2, int pri, eio_cb cb, void *data); 353 | eio_req *eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, int pri, eio_cb cb, void *data); 354 | eio_req *eio_open (const char *path, int flags, mode_t mode, int pri, eio_cb cb, void *data); 355 | eio_req *eio_utime (const char *path, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data); 356 | eio_req *eio_truncate (const char *path, off_t offset, int pri, eio_cb cb, void *data); 357 | eio_req *eio_chown (const char *path, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data); 358 | eio_req *eio_chmod (const char *path, mode_t mode, int pri, eio_cb cb, void *data); 359 | eio_req *eio_mkdir (const char *path, mode_t mode, int pri, eio_cb cb, void *data); 360 | eio_req *eio_readdir (const char *path, int flags, int pri, eio_cb cb, void *data); /* result=ptr2 allocated dynamically */ 361 | eio_req *eio_rmdir (const char *path, int pri, eio_cb cb, void *data); 362 | eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data); 363 | eio_req *eio_readlink (const char *path, int pri, eio_cb cb, void *data); /* result=ptr2 allocated dynamically */ 364 | eio_req *eio_realpath (const char *path, int pri, eio_cb cb, void *data); /* result=ptr2 allocated dynamically */ 365 | eio_req *eio_stat (const char *path, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ 366 | eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ 367 | eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ 368 | eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data); 369 | eio_req *eio_link (const char *path, const char *new_path, int pri, eio_cb cb, void *data); 370 | eio_req *eio_symlink (const char *path, const char *new_path, int pri, eio_cb cb, void *data); 371 | eio_req *eio_rename (const char *path, const char *new_path, int pri, eio_cb cb, void *data); 372 | eio_req *eio_custom (void (*execute)(eio_req *), int pri, eio_cb cb, void *data); 373 | #endif 374 | 375 | /*****************************************************************************/ 376 | /* groups */ 377 | 378 | eio_req *eio_grp (eio_cb cb, void *data); 379 | void eio_grp_feed (eio_req *grp, void (*feed)(eio_req *req), int limit); 380 | void eio_grp_limit (eio_req *grp, int limit); 381 | void eio_grp_add (eio_req *grp, eio_req *req); 382 | void eio_grp_cancel (eio_req *grp); /* cancels all sub requests but not the group */ 383 | 384 | /*****************************************************************************/ 385 | /* request api */ 386 | 387 | /* true if the request was cancelled, useful in the invoke callback */ 388 | #define EIO_CANCELLED(req) ((req)->cancelled) 389 | 390 | #define EIO_RESULT(req) ((req)->result) 391 | /* returns a pointer to the result buffer allocated by eio */ 392 | #define EIO_BUF(req) ((req)->ptr2) 393 | #define EIO_STAT_BUF(req) ((EIO_STRUCT_STAT *)EIO_BUF(req)) 394 | #define EIO_STATVFS_BUF(req) ((EIO_STRUCT_STATVFS *)EIO_BUF(req)) 395 | #define EIO_PATH(req) ((char *)(req)->ptr1) 396 | 397 | /* submit a request for execution */ 398 | void eio_submit (eio_req *req); 399 | /* cancel a request as soon fast as possible, if possible */ 400 | void eio_cancel (eio_req *req); 401 | 402 | /*****************************************************************************/ 403 | /* convenience functions */ 404 | 405 | eio_ssize_t eio_sendfile_sync (int ofd, int ifd, off_t offset, size_t count); 406 | 407 | #ifdef __cplusplus 408 | } 409 | #endif 410 | 411 | #endif 412 | 413 | -------------------------------------------------------------------------------- /eio.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | libeio - truly asynchronous POSIX I/O 4 | 5 | =head1 SYNOPSIS 6 | 7 | #include 8 | 9 | =head1 DESCRIPTION 10 | 11 | The newest version of this document is also available as an html-formatted 12 | web page you might find easier to navigate when reading it for the first 13 | time: L. 14 | 15 | Note that this library is a by-product of the C perl 16 | module, and many of the subtler points regarding requests lifetime 17 | and so on are only documented in its documentation at the 18 | moment: L. 19 | 20 | =head2 FEATURES 21 | 22 | This library provides fully asynchronous versions of most POSIX functions 23 | dealing with I/O. Unlike most asynchronous libraries, this not only 24 | includes C and C, but also C, C, C and 25 | similar functions, as well as less rarely ones such as C, C 26 | or C. 27 | 28 | It also offers wrappers around C (Solaris, Linux, HP-UX and 29 | FreeBSD, with emulation on other platforms) and C (Linux, with 30 | emulation elsewhere>). 31 | 32 | The goal is to enable you to write fully non-blocking programs. For 33 | example, in a game server, you would not want to freeze for a few seconds 34 | just because the server is running a backup and you happen to call 35 | C. 36 | 37 | =head2 TIME REPRESENTATION 38 | 39 | Libeio represents time as a single floating point number, representing the 40 | (fractional) number of seconds since the (POSIX) epoch (somewhere near 41 | the beginning of 1970, details are complicated, don't ask). This type is 42 | called C, but it is guaranteed to be of type C (or 43 | better), so you can freely use C yourself. 44 | 45 | Unlike the name component C might indicate, it is also used for 46 | time differences throughout libeio. 47 | 48 | =head2 FORK SUPPORT 49 | 50 | Usage of pthreads in a program changes the semantics of fork 51 | considerably. Specifically, only async-safe functions can be called after 52 | fork. Libeio uses pthreads, so this applies, and makes using fork hard for 53 | anything but relatively fork + exec uses. 54 | 55 | This library only works in the process that initialised it: Forking is 56 | fully supported, but using libeio in any other process than the one that 57 | called C is not. 58 | 59 | You might get around by not I libeio before (or after) forking in 60 | the parent, and using it in the child afterwards. You could also try to 61 | call the L function again in the child, which will brutally 62 | reinitialise all data structures, which isn't POSIX conformant, but 63 | typically works. 64 | 65 | Otherwise, the only recommendation you should follow is: treat fork code 66 | the same way you treat signal handlers, and only ever call C in 67 | the process that uses it, and only once ever. 68 | 69 | =head1 INITIALISATION/INTEGRATION 70 | 71 | Before you can call any eio functions you first have to initialise the 72 | library. The library integrates into any event loop, but can also be used 73 | without one, including in polling mode. 74 | 75 | You have to provide the necessary glue yourself, however. 76 | 77 | =over 4 78 | 79 | =item int eio_init (void (*want_poll)(void), void (*done_poll)(void)) 80 | 81 | This function initialises the library. On success it returns C<0>, on 82 | failure it returns C<-1> and sets C appropriately. 83 | 84 | It accepts two function pointers specifying callbacks as argument, both of 85 | which can be C<0>, in which case the callback isn't called. 86 | 87 | There is currently no way to change these callbacks later, or to 88 | "uninitialise" the library again. 89 | 90 | =item want_poll callback 91 | 92 | The C callback is invoked whenever libeio wants attention (i.e. 93 | it wants to be polled by calling C). It is "edge-triggered", 94 | that is, it will only be called once when eio wants attention, until all 95 | pending requests have been handled. 96 | 97 | This callback is called while locks are being held, so I. That includes 99 | C. What you should do is notify some other thread, or wake up 100 | your event loop, and then call C. 101 | 102 | =item done_poll callback 103 | 104 | This callback is invoked when libeio detects that all pending requests 105 | have been handled. It is "edge-triggered", that is, it will only be 106 | called once after C. To put it differently, C and 107 | C are invoked in pairs: after C you have to call 108 | C until either C indicates that everything has been 109 | handled or C has been called, which signals the same. 110 | 111 | Note that C might return after C and C 112 | have been called again, so watch out for races in your code. 113 | 114 | As with C, this callback is called while locks are being held, 115 | so you I. 116 | 117 | =item int eio_poll () 118 | 119 | This function has to be called whenever there are pending requests that 120 | need finishing. You usually call this after C has indicated 121 | that you should do so, but you can also call this function regularly to 122 | poll for new results. 123 | 124 | If any request invocation returns a non-zero value, then C 125 | immediately returns with that value as return value. 126 | 127 | Otherwise, if all requests could be handled, it returns C<0>. If for some 128 | reason not all requests have been handled, i.e. some are still pending, it 129 | returns C<-1>. 130 | 131 | =back 132 | 133 | For libev, you would typically use an C watcher: the 134 | C callback would invoke C to wake up the event 135 | loop. Inside the callback set for the watcher, one would call C. 137 | 138 | If C is configured to not handle all results in one go 139 | (i.e. it returns C<-1>) then you should start an idle watcher that calls 140 | C until it returns something C. 141 | 142 | A full-featured connector between libeio and libev would look as follows 143 | (if C is handling all requests, it can of course be simplified a 144 | lot by removing the idle watcher logic): 145 | 146 | static struct ev_loop *loop; 147 | static ev_idle repeat_watcher; 148 | static ev_async ready_watcher; 149 | 150 | /* idle watcher callback, only used when eio_poll */ 151 | /* didn't handle all results in one call */ 152 | static void 153 | repeat (EV_P_ ev_idle *w, int revents) 154 | { 155 | if (eio_poll () != -1) 156 | ev_idle_stop (EV_A_ w); 157 | } 158 | 159 | /* eio has some results, process them */ 160 | static void 161 | ready (EV_P_ ev_async *w, int revents) 162 | { 163 | if (eio_poll () == -1) 164 | ev_idle_start (EV_A_ &repeat_watcher); 165 | } 166 | 167 | /* wake up the event loop */ 168 | static void 169 | want_poll (void) 170 | { 171 | ev_async_send (loop, &ready_watcher) 172 | } 173 | 174 | void 175 | my_init_eio () 176 | { 177 | loop = EV_DEFAULT; 178 | 179 | ev_idle_init (&repeat_watcher, repeat); 180 | ev_async_init (&ready_watcher, ready); 181 | ev_async_start (loop &watcher); 182 | 183 | eio_init (want_poll, 0); 184 | } 185 | 186 | For most other event loops, you would typically use a pipe - the event 187 | loop should be told to wait for read readiness on the read end. In 188 | C you would write a single byte, in C you would try 189 | to read that byte, and in the callback for the read end, you would call 190 | C. 191 | 192 | You don't have to take special care in the case C doesn't handle 193 | all requests, as the done callback will not be invoked, so the event loop 194 | will still signal readiness for the pipe until I results have been 195 | processed. 196 | 197 | 198 | =head1 HIGH LEVEL REQUEST API 199 | 200 | Libeio has both a high-level API, which consists of calling a request 201 | function with a callback to be called on completion, and a low-level API 202 | where you fill out request structures and submit them. 203 | 204 | This section describes the high-level API. 205 | 206 | =head2 REQUEST SUBMISSION AND RESULT PROCESSING 207 | 208 | You submit a request by calling the relevant C function with the 209 | required parameters, a callback of type C 210 | (called C below) and a freely usable C argument. 211 | 212 | The return value will either be 0, in case something went really wrong 213 | (which can basically only happen on very fatal errors, such as C 214 | returning 0, which is rather unlikely), or a pointer to the newly-created 215 | and submitted C. 216 | 217 | The callback will be called with an C which contains the 218 | results of the request. The members you can access inside that structure 219 | vary from request to request, except for: 220 | 221 | =over 4 222 | 223 | =item C 224 | 225 | This contains the result value from the call (usually the same as the 226 | syscall of the same name). 227 | 228 | =item C 229 | 230 | This contains the value of C after the call. 231 | 232 | =item C 233 | 234 | The C member simply stores the value of the C argument. 235 | 236 | =back 237 | 238 | Members not explicitly described as accessible must not be 239 | accessed. Specifically, there is no guarantee that any members will still 240 | have the value they had when the request was submitted. 241 | 242 | The return value of the callback is normally C<0>, which tells libeio to 243 | continue normally. If a callback returns a nonzero value, libeio will 244 | stop processing results (in C) and will return the value to its 245 | caller. 246 | 247 | Memory areas passed to libeio wrappers must stay valid as long as a 248 | request executes, with the exception of paths, which are being copied 249 | internally. Any memory libeio itself allocates will be freed after the 250 | finish callback has been called. If you want to manage all memory passed 251 | to libeio yourself you can use the low-level API. 252 | 253 | For example, to open a file, you could do this: 254 | 255 | static int 256 | file_open_done (eio_req *req) 257 | { 258 | if (req->result < 0) 259 | { 260 | /* open() returned -1 */ 261 | errno = req->errorno; 262 | perror ("open"); 263 | } 264 | else 265 | { 266 | int fd = req->result; 267 | /* now we have the new fd in fd */ 268 | } 269 | 270 | return 0; 271 | } 272 | 273 | /* the first three arguments are passed to open(2) */ 274 | /* the remaining are priority, callback and data */ 275 | if (!eio_open ("/etc/passwd", O_RDONLY, 0, 0, file_open_done, 0)) 276 | abort (); /* something went wrong, we will all die!!! */ 277 | 278 | Note that you additionally need to call C when the C 279 | indicates that requests are ready to be processed. 280 | 281 | =head2 CANCELLING REQUESTS 282 | 283 | Sometimes the need for a request goes away before the request is 284 | finished. In that case, one can cancel the request by a call to 285 | C: 286 | 287 | =over 4 288 | 289 | =item eio_cancel (eio_req *req) 290 | 291 | Cancel the request (and all its subrequests). If the request is currently 292 | executing it might still continue to execute, and in other cases it might 293 | still take a while till the request is cancelled. 294 | 295 | Even if cancelled, the finish callback will still be invoked - the 296 | callbacks of all cancellable requests need to check whether the request 297 | has been cancelled by calling C: 298 | 299 | static int 300 | my_eio_cb (eio_req *req) 301 | { 302 | if (EIO_CANCELLED (req)) 303 | return 0; 304 | } 305 | 306 | In addition, cancelled requests will I have C<< req->result >> 307 | set to C<-1> and C to C, or I they were 308 | successfully executed, despite being cancelled (e.g. when they have 309 | already been executed at the time they were cancelled). 310 | 311 | C is still true for requests that have successfully 312 | executed, as long as C was called on them at some point. 313 | 314 | =back 315 | 316 | =head2 AVAILABLE REQUESTS 317 | 318 | The following request functions are available. I of them return the 319 | C on success and C<0> on failure, and I of them have the 320 | same three trailing arguments: C, C and C. The C is 321 | mandatory, but in most cases, you pass in C<0> as C and C<0> or some 322 | custom data value as C. 323 | 324 | =head3 POSIX API WRAPPERS 325 | 326 | These requests simply wrap the POSIX call of the same name, with the same 327 | arguments. If a function is not implemented by the OS and cannot be emulated 328 | in some way, then all of these return C<-1> and set C to C. 329 | 330 | =over 4 331 | 332 | =item eio_open (const char *path, int flags, mode_t mode, int pri, eio_cb cb, void *data) 333 | 334 | =item eio_truncate (const char *path, off_t offset, int pri, eio_cb cb, void *data) 335 | 336 | =item eio_chown (const char *path, uid_t uid, gid_t gid, int pri, eio_cb cb, void *data) 337 | 338 | =item eio_chmod (const char *path, mode_t mode, int pri, eio_cb cb, void *data) 339 | 340 | =item eio_mkdir (const char *path, mode_t mode, int pri, eio_cb cb, void *data) 341 | 342 | =item eio_rmdir (const char *path, int pri, eio_cb cb, void *data) 343 | 344 | =item eio_unlink (const char *path, int pri, eio_cb cb, void *data) 345 | 346 | =item eio_utime (const char *path, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data) 347 | 348 | =item eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data) 349 | 350 | =item eio_link (const char *path, const char *new_path, int pri, eio_cb cb, void *data) 351 | 352 | =item eio_symlink (const char *path, const char *new_path, int pri, eio_cb cb, void *data) 353 | 354 | =item eio_rename (const char *path, const char *new_path, int pri, eio_cb cb, void *data) 355 | 356 | =item eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data) 357 | 358 | =item eio_close (int fd, int pri, eio_cb cb, void *data) 359 | 360 | =item eio_sync (int pri, eio_cb cb, void *data) 361 | 362 | =item eio_fsync (int fd, int pri, eio_cb cb, void *data) 363 | 364 | =item eio_fdatasync (int fd, int pri, eio_cb cb, void *data) 365 | 366 | =item eio_futime (int fd, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data) 367 | 368 | =item eio_ftruncate (int fd, off_t offset, int pri, eio_cb cb, void *data) 369 | 370 | =item eio_fchmod (int fd, mode_t mode, int pri, eio_cb cb, void *data) 371 | 372 | =item eio_fchown (int fd, uid_t uid, gid_t gid, int pri, eio_cb cb, void *data) 373 | 374 | =item eio_dup2 (int fd, int fd2, int pri, eio_cb cb, void *data) 375 | 376 | These have the same semantics as the syscall of the same name, their 377 | return value is available as C<< req->result >> later. 378 | 379 | =item eio_read (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) 380 | 381 | =item eio_write (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) 382 | 383 | These two requests are called C and C, but actually wrap 384 | C and C. On systems that lack these calls (such as cygwin), 385 | libeio uses lseek/read_or_write/lseek and a mutex to serialise the 386 | requests, so all these requests run serially and do not disturb each 387 | other. However, they still disturb the file offset while they run, so it's 388 | not safe to call these functions concurrently with non-libeio functions on 389 | the same fd on these systems. 390 | 391 | Not surprisingly, pread and pwrite are not thread-safe on Darwin (OS/X), 392 | so it is advised not to submit multiple requests on the same fd on this 393 | horrible pile of garbage. 394 | 395 | =item eio_mlockall (int flags, int pri, eio_cb cb, void *data) 396 | 397 | Like C, but the flag value constants are called 398 | C and C. 399 | 400 | =item eio_msync (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) 401 | 402 | Just like msync, except that the flag values are called C, 403 | C and C. 404 | 405 | =item eio_readlink (const char *path, int pri, eio_cb cb, void *data) 406 | 407 | If successful, the path read by C can be accessed via C<< 408 | req->ptr2 >> and is I null-terminated, with the length specified as 409 | C<< req->result >>. 410 | 411 | if (req->result >= 0) 412 | { 413 | char *target = strndup ((char *)req->ptr2, req->result); 414 | 415 | free (target); 416 | } 417 | 418 | =item eio_realpath (const char *path, int pri, eio_cb cb, void *data) 419 | 420 | Similar to the realpath libc function, but unlike that one, C<< 421 | req->result >> is C<-1> on failure. On success, the result is the length 422 | of the returned path in C (which is I 0-terminated) - this is 423 | similar to readlink. 424 | 425 | =item eio_stat (const char *path, int pri, eio_cb cb, void *data) 426 | 427 | =item eio_lstat (const char *path, int pri, eio_cb cb, void *data) 428 | 429 | =item eio_fstat (int fd, int pri, eio_cb cb, void *data) 430 | 431 | Stats a file - if C<< req->result >> indicates success, then you can 432 | access the C-like structure via C<< req->ptr2 >>: 433 | 434 | EIO_STRUCT_STAT *statdata = (EIO_STRUCT_STAT *)req->ptr2; 435 | 436 | =item eio_statvfs (const char *path, int pri, eio_cb cb, void *data) 437 | 438 | =item eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) 439 | 440 | Stats a filesystem - if C<< req->result >> indicates success, then you can 441 | access the C-like structure via C<< req->ptr2 >>: 442 | 443 | EIO_STRUCT_STATVFS *statdata = (EIO_STRUCT_STATVFS *)req->ptr2; 444 | 445 | =back 446 | 447 | =head3 READING DIRECTORIES 448 | 449 | Reading directories sounds simple, but can be rather demanding, especially 450 | if you want to do stuff such as traversing a directory hierarchy or 451 | processing all files in a directory. Libeio can assist these complex tasks 452 | with it's C call. 453 | 454 | =over 4 455 | 456 | =item eio_readdir (const char *path, int flags, int pri, eio_cb cb, void *data) 457 | 458 | This is a very complex call. It basically reads through a whole directory 459 | (via the C, C and C calls) and returns either 460 | the names or an array of C, depending on the C 461 | argument. 462 | 463 | The C<< req->result >> indicates either the number of files found, or 464 | C<-1> on error. On success, null-terminated names can be found as C<< req->ptr2 >>, 465 | and C, if requested by C, can be found via C<< 466 | req->ptr1 >>. 467 | 468 | Here is an example that prints all the names: 469 | 470 | int i; 471 | char *names = (char *)req->ptr2; 472 | 473 | for (i = 0; i < req->result; ++i) 474 | { 475 | printf ("name #%d: %s\n", i, names); 476 | 477 | /* move to next name */ 478 | names += strlen (names) + 1; 479 | } 480 | 481 | Pseudo-entries such as F<.> and F<..> are never returned by C. 482 | 483 | C can be any combination of: 484 | 485 | =over 4 486 | 487 | =item EIO_READDIR_DENTS 488 | 489 | If this flag is specified, then, in addition to the names in C, 490 | also an array of C is returned, in C. A C looks like this: 492 | 493 | struct eio_dirent 494 | { 495 | int nameofs; /* offset of null-terminated name string in (char *)req->ptr2 */ 496 | unsigned short namelen; /* size of filename without trailing 0 */ 497 | unsigned char type; /* one of EIO_DT_* */ 498 | signed char score; /* internal use */ 499 | ino_t inode; /* the inode number, if available, otherwise unspecified */ 500 | }; 501 | 502 | The only members you normally would access are C, which is the 503 | byte-offset from C to the start of the name, C and C. 504 | 505 | C can be one of: 506 | 507 | C - if the type is not known (very common) and you have to C 508 | the name yourself if you need to know, 509 | one of the "standard" POSIX file types (C, C, C, 510 | C, C, C, C) 511 | or some OS-specific type (currently 512 | C - multiplexed char device (v7+coherent), 513 | C - xenix special named file, 514 | C - multiplexed block device (v7+coherent), 515 | C - HP-UX network special, 516 | C - VxFS compressed, 517 | C - solaris door, or 518 | C). 519 | 520 | This example prints all names and their type: 521 | 522 | int i; 523 | struct eio_dirent *ents = (struct eio_dirent *)req->ptr1; 524 | char *names = (char *)req->ptr2; 525 | 526 | for (i = 0; i < req->result; ++i) 527 | { 528 | struct eio_dirent *ent = ents + i; 529 | char *name = names + ent->nameofs; 530 | 531 | printf ("name #%d: %s (type %d)\n", i, name, ent->type); 532 | } 533 | 534 | =item EIO_READDIR_DIRS_FIRST 535 | 536 | When this flag is specified, then the names will be returned in an order 537 | where likely directories come first, in optimal C order. This is 538 | useful when you need to quickly find directories, or you want to find all 539 | directories while avoiding to stat() each entry. 540 | 541 | If the system returns type information in readdir, then this is used 542 | to find directories directly. Otherwise, likely directories are names 543 | beginning with ".", or otherwise names with no dots, of which names with 544 | short names are tried first. 545 | 546 | =item EIO_READDIR_STAT_ORDER 547 | 548 | When this flag is specified, then the names will be returned in an order 549 | suitable for stat()'ing each one. That is, when you plan to stat() 550 | all files in the given directory, then the returned order will likely 551 | be fastest. 552 | 553 | If both this flag and C are specified, then the 554 | likely directories come first, resulting in a less optimal stat order. 555 | 556 | =item EIO_READDIR_FOUND_UNKNOWN 557 | 558 | This flag should not be specified when calling C. Instead, 559 | it is being set by C (you can access the C via C<< 560 | req->int1 >>, when any of the C's found were C. The 561 | absence of this flag therefore indicates that all C's are known, 562 | which can be used to speed up some algorithms. 563 | 564 | A typical use case would be to identify all subdirectories within a 565 | directory - you would ask C for C. If 566 | then this flag is I set, then all the entries at the beginning of the 567 | returned array of type C are the directories. Otherwise, you 568 | should start C'ing the entries starting at the beginning of the 569 | array, stopping as soon as you found all directories (the count can be 570 | deduced by the link count of the directory). 571 | 572 | =back 573 | 574 | =back 575 | 576 | =head3 OS-SPECIFIC CALL WRAPPERS 577 | 578 | These wrap OS-specific calls (usually Linux ones), and might or might not 579 | be emulated on other operating systems. Calls that are not emulated will 580 | return C<-1> and set C to C. 581 | 582 | =over 4 583 | 584 | =item eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, int pri, eio_cb cb, void *data) 585 | 586 | Wraps the C syscall. The arguments follow the Linux version, but 587 | libeio supports and will use similar calls on FreeBSD, HP/UX, Solaris and 588 | Darwin. 589 | 590 | If the OS doesn't support some sendfile-like call, or the call fails, 591 | indicating support for the given file descriptor type (for example, 592 | Linux's sendfile might not support file to file copies), then libeio will 593 | emulate the call in userspace, so there are almost no limitations on its 594 | use. 595 | 596 | =item eio_readahead (int fd, off_t offset, size_t length, int pri, eio_cb cb, void *data) 597 | 598 | Calls C. If the syscall is missing, then the call is 599 | emulated by simply reading the data (currently in 64kiB chunks). 600 | 601 | =item eio_syncfs (int fd, int pri, eio_cb cb, void *data) 602 | 603 | Calls Linux' C syscall, if available. Returns C<-1> and sets 604 | C to C if the call is missing I, 605 | if the C is C<< >= 0 >>, so you can probe for the availability of the 606 | syscall with a negative C argument and checking for C<-1/ENOSYS>. 607 | 608 | =item eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data) 609 | 610 | Calls C. If the syscall is missing, then this is the same 611 | as calling C. 612 | 613 | Flags can be any combination of C, 614 | C and C. 615 | 616 | =item eio_fallocate (int fd, int mode, off_t offset, off_t len, int pri, eio_cb cb, void *data) 617 | 618 | Calls C (note: I C!). If the syscall is 619 | missing, then it returns failure and sets C to C. 620 | 621 | The C argument can be C<0> (for behaviour similar to 622 | C), or C, which keeps the size 623 | of the file unchanged (but still preallocates space beyond end of file). 624 | 625 | =back 626 | 627 | =head3 LIBEIO-SPECIFIC REQUESTS 628 | 629 | These requests are specific to libeio and do not correspond to any OS call. 630 | 631 | =over 4 632 | 633 | =item eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) 634 | 635 | Reads (C) or modifies (C as parameter and is expected to read 648 | and modify any request-specific members. Specifically, it should set C<< 649 | req->result >> to the result value, just like other requests. 650 | 651 | Here is an example that simply calls C, like C, but it 652 | uses the C member as filename and uses a hardcoded C. If 653 | you want to pass more/other parameters, you either need to pass some 654 | struct or so via C or provide your own wrapper using the low-level 655 | API. 656 | 657 | static int 658 | my_open_done (eio_req *req) 659 | { 660 | int fd = req->result; 661 | 662 | return 0; 663 | } 664 | 665 | static void 666 | my_open (eio_req *req) 667 | { 668 | req->result = open (req->data, O_RDONLY); 669 | } 670 | 671 | eio_custom (my_open, 0, my_open_done, "/etc/passwd"); 672 | 673 | =item eio_busy (eio_tstamp delay, int pri, eio_cb cb, void *data) 674 | 675 | This is a request that takes C seconds to execute, but otherwise 676 | does nothing - it simply puts one of the worker threads to sleep for this 677 | long. 678 | 679 | This request can be used to artificially increase load, e.g. for debugging 680 | or benchmarking reasons. 681 | 682 | =item eio_nop (int pri, eio_cb cb, void *data) 683 | 684 | This request does nothing, except go through the whole request cycle. This 685 | can be used to measure latency or in some cases to simplify code, but is 686 | not really of much use. 687 | 688 | =back 689 | 690 | =head3 GROUPING AND LIMITING REQUESTS 691 | 692 | There is one more rather special request, C. It is a very special 693 | aio request: Instead of doing something, it is a container for other eio 694 | requests. 695 | 696 | There are two primary use cases for this: a) bundle many requests into a 697 | single, composite, request with a definite callback and the ability to 698 | cancel the whole request with its subrequests and b) limiting the number 699 | of "active" requests. 700 | 701 | Further below you will find more discussion of these topics - first 702 | follows the reference section detailing the request generator and other 703 | methods. 704 | 705 | =over 4 706 | 707 | =item eio_req *grp = eio_grp (eio_cb cb, void *data) 708 | 709 | Creates, submits and returns a group request. Note that it doesn't have a 710 | priority, unlike all other requests. 711 | 712 | =item eio_grp_add (eio_req *grp, eio_req *req) 713 | 714 | Adds a request to the request group. 715 | 716 | =item eio_grp_cancel (eio_req *grp) 717 | 718 | Cancels all requests I the group, but I the group request 719 | itself. You can cancel the group request I all subrequests via a 720 | normal C call. 721 | 722 | =back 723 | 724 | =head4 GROUP REQUEST LIFETIME 725 | 726 | Left alone, a group request will instantly move to the pending state and 727 | will be finished at the next call of C. 728 | 729 | The usefulness stems from the fact that, if a subrequest is added to a 730 | group I a call to C, via C, then the group 731 | will not finish until all the subrequests have finished. 732 | 733 | So the usage cycle of a group request is like this: after it is created, 734 | you normally instantly add a subrequest. If none is added, the group 735 | request will finish on it's own. As long as subrequests are added before 736 | the group request is finished it will be kept from finishing, that is the 737 | callbacks of any subrequests can, in turn, add more requests to the group, 738 | and as long as any requests are active, the group request itself will not 739 | finish. 740 | 741 | =head4 CREATING COMPOSITE REQUESTS 742 | 743 | Imagine you wanted to create an C request that opens a file, 744 | reads it and closes it. This means it has to execute at least three eio 745 | requests, but for various reasons it might be nice if that request looked 746 | like any other eio request. 747 | 748 | This can be done with groups: 749 | 750 | =over 4 751 | 752 | =item 1) create the request object 753 | 754 | Create a group that contains all further requests. This is the request you 755 | can return as "the load request". 756 | 757 | =item 2) open the file, maybe 758 | 759 | Next, open the file with C and add the request to the group 760 | request and you are finished setting up the request. 761 | 762 | If, for some reason, you cannot C (path is a null ptr?) you 763 | can set C<< grp->result >> to C<-1> to signal an error and let the group 764 | request finish on its own. 765 | 766 | =item 3) open callback adds more requests 767 | 768 | In the open callback, if the open was not successful, copy C<< 769 | req->errorno >> to C<< grp->errorno >> and set C<< grp->result >> to 770 | C<-1> to signal an error. 771 | 772 | Otherwise, malloc some memory or so and issue a read request, adding the 773 | read request to the group. 774 | 775 | =item 4) continue issuing requests till finished 776 | 777 | In the read callback, check for errors and possibly continue with 778 | C or any other eio request in the same way. 779 | 780 | As soon as no new requests are added, the group request will finish. Make 781 | sure you I set C<< grp->result >> to some sensible value. 782 | 783 | =back 784 | 785 | =head4 REQUEST LIMITING 786 | 787 | 788 | #TODO 789 | 790 | void eio_grp_limit (eio_req *grp, int limit); 791 | 792 | 793 | =back 794 | 795 | 796 | =head1 LOW LEVEL REQUEST API 797 | 798 | #TODO 799 | 800 | 801 | =head1 ANATOMY AND LIFETIME OF AN EIO REQUEST 802 | 803 | A request is represented by a structure of type C. To initialise 804 | it, clear it to all zero bytes: 805 | 806 | eio_req req; 807 | 808 | memset (&req, 0, sizeof (req)); 809 | 810 | A more common way to initialise a new C is to use C: 811 | 812 | eio_req *req = calloc (1, sizeof (*req)); 813 | 814 | In either case, libeio neither allocates, initialises or frees the 815 | C structure for you - it merely uses it. 816 | 817 | zero 818 | 819 | #TODO 820 | 821 | =head2 CONFIGURATION 822 | 823 | The functions in this section can sometimes be useful, but the default 824 | configuration will do in most case, so you should skip this section on 825 | first reading. 826 | 827 | =over 4 828 | 829 | =item eio_set_max_poll_time (eio_tstamp nseconds) 830 | 831 | This causes C to return after it has detected that it was 832 | running for C seconds or longer (this number can be fractional). 833 | 834 | This can be used to limit the amount of time spent handling eio requests, 835 | for example, in interactive programs, you might want to limit this time to 836 | C<0.01> seconds or so. 837 | 838 | Note that: 839 | 840 | =over 4 841 | 842 | =item a) libeio doesn't know how long your request callbacks take, so the 843 | time spent in C is up to one callback invocation longer then 844 | this interval. 845 | 846 | =item b) this is implemented by calling C after each 847 | request, which can be costly. 848 | 849 | =item c) at least one request will be handled. 850 | 851 | =back 852 | 853 | =item eio_set_max_poll_reqs (unsigned int nreqs) 854 | 855 | When C is non-zero, then C will not handle more than 856 | C requests per invocation. This is a less costly way to limit the 857 | amount of work done by C then setting a time limit. 858 | 859 | If you know your callbacks are generally fast, you could use this to 860 | encourage interactiveness in your programs by setting it to C<10>, C<100> 861 | or even C<1000>. 862 | 863 | =item eio_set_min_parallel (unsigned int nthreads) 864 | 865 | Make sure libeio can handle at least this many requests in parallel. It 866 | might be able handle more. 867 | 868 | =item eio_set_max_parallel (unsigned int nthreads) 869 | 870 | Set the maximum number of threads that libeio will spawn. 871 | 872 | =item eio_set_max_idle (unsigned int nthreads) 873 | 874 | Libeio uses threads internally to handle most requests, and will start and stop threads on demand. 875 | 876 | This call can be used to limit the number of idle threads (threads without 877 | work to do): libeio will keep some threads idle in preparation for more 878 | requests, but never longer than C threads. 879 | 880 | In addition to this, libeio will also stop threads when they are idle for 881 | a few seconds, regardless of this setting. 882 | 883 | =item unsigned int eio_nthreads () 884 | 885 | Return the number of worker threads currently running. 886 | 887 | =item unsigned int eio_nreqs () 888 | 889 | Return the number of requests currently handled by libeio. This is the 890 | total number of requests that have been submitted to libeio, but not yet 891 | destroyed. 892 | 893 | =item unsigned int eio_nready () 894 | 895 | Returns the number of ready requests, i.e. requests that have been 896 | submitted but have not yet entered the execution phase. 897 | 898 | =item unsigned int eio_npending () 899 | 900 | Returns the number of pending requests, i.e. requests that have been 901 | executed and have results, but have not been finished yet by a call to 902 | C). 903 | 904 | =back 905 | 906 | =head1 EMBEDDING 907 | 908 | Libeio can be embedded directly into programs. This functionality is not 909 | documented and not (yet) officially supported. 910 | 911 | Note that, when including C, you are responsible for defining 912 | the compilation environment (C<_LARGEFILE_SOURCE>, C<_GNU_SOURCE> etc.). 913 | 914 | If you need to know how, check the C perl module, which does 915 | exactly that. 916 | 917 | 918 | =head1 COMPILETIME CONFIGURATION 919 | 920 | These symbols, if used, must be defined when compiling F. 921 | 922 | =over 4 923 | 924 | =item EIO_STACKSIZE 925 | 926 | This symbol governs the stack size for each eio thread. Libeio itself 927 | was written to use very little stackspace, but when using C 928 | requests, you might want to increase this. 929 | 930 | If this symbol is undefined (the default) then libeio will use its default 931 | stack size (C currently). If it is defined, but 932 | C<0>, then the default operating system stack size will be used. In all 933 | other cases, the value must be an expression that evaluates to the desired 934 | stack size. 935 | 936 | =back 937 | 938 | 939 | =head1 PORTABILITY REQUIREMENTS 940 | 941 | In addition to a working ISO-C implementation, libeio relies on a few 942 | additional extensions: 943 | 944 | =over 4 945 | 946 | =item POSIX threads 947 | 948 | To be portable, this module uses threads, specifically, the POSIX threads 949 | library must be available (and working, which partially excludes many xBSD 950 | systems, where C is buggy). 951 | 952 | =item POSIX-compatible filesystem API 953 | 954 | This is actually a harder portability requirement: The libeio API is quite 955 | demanding regarding POSIX API calls (symlinks, user/group management 956 | etc.). 957 | 958 | =item C must hold a time value in seconds with enough accuracy 959 | 960 | The type C is used to represent timestamps. It is required to 961 | have at least 51 bits of mantissa (and 9 bits of exponent), which is good 962 | enough for at least into the year 4000. This requirement is fulfilled by 963 | implementations implementing IEEE 754 (basically all existing ones). 964 | 965 | =back 966 | 967 | If you know of other additional requirements drop me a note. 968 | 969 | 970 | =head1 AUTHOR 971 | 972 | Marc Lehmann . 973 | 974 | -------------------------------------------------------------------------------- /libeio.m4: -------------------------------------------------------------------------------- 1 | dnl openbsd in it's neverending brokenness requires stdint.h for intptr_t, 2 | dnl but that header isn't very portable... 3 | AC_CHECK_HEADERS([stdint.h sys/syscall.h sys/prctl.h]) 4 | 5 | AC_SEARCH_LIBS( 6 | pthread_create, 7 | [pthread pthreads pthreadVC2], 8 | , 9 | [AC_MSG_ERROR(pthread functions not found)] 10 | ) 11 | 12 | AC_CACHE_CHECK(for utimes, ac_cv_utimes, [AC_LINK_IFELSE([[ 13 | #include 14 | #include 15 | #include 16 | struct timeval tv[2]; 17 | int res; 18 | int main (void) 19 | { 20 | res = utimes ("/", tv); 21 | return 0; 22 | } 23 | ]],ac_cv_utimes=yes,ac_cv_utimes=no)]) 24 | test $ac_cv_utimes = yes && AC_DEFINE(HAVE_UTIMES, 1, utimes(2) is available) 25 | 26 | AC_CACHE_CHECK(for futimes, ac_cv_futimes, [AC_LINK_IFELSE([[ 27 | #include 28 | #include 29 | #include 30 | struct timeval tv[2]; 31 | int res; 32 | int fd; 33 | int main (void) 34 | { 35 | res = futimes (fd, tv); 36 | return 0; 37 | } 38 | ]],ac_cv_futimes=yes,ac_cv_futimes=no)]) 39 | test $ac_cv_futimes = yes && AC_DEFINE(HAVE_FUTIMES, 1, futimes(2) is available) 40 | 41 | AC_CACHE_CHECK(for readahead, ac_cv_readahead, [AC_LINK_IFELSE([ 42 | #include 43 | int main (void) 44 | { 45 | int fd = 0; 46 | size_t count = 2; 47 | ssize_t res; 48 | res = readahead (fd, 0, count); 49 | return 0; 50 | } 51 | ],ac_cv_readahead=yes,ac_cv_readahead=no)]) 52 | test $ac_cv_readahead = yes && AC_DEFINE(HAVE_READAHEAD, 1, readahead(2) is available (linux)) 53 | 54 | AC_CACHE_CHECK(for fdatasync, ac_cv_fdatasync, [AC_LINK_IFELSE([ 55 | #include 56 | int main (void) 57 | { 58 | int fd = 0; 59 | fdatasync (fd); 60 | return 0; 61 | } 62 | ],ac_cv_fdatasync=yes,ac_cv_fdatasync=no)]) 63 | test $ac_cv_fdatasync = yes && AC_DEFINE(HAVE_FDATASYNC, 1, fdatasync(2) is available) 64 | 65 | AC_CACHE_CHECK(for sendfile, ac_cv_sendfile, [AC_LINK_IFELSE([ 66 | # include 67 | #if __linux 68 | # include 69 | #elif __FreeBSD__ || defined __APPLE__ 70 | # include 71 | # include 72 | #elif __hpux 73 | # include 74 | #else 75 | # error unsupported architecture 76 | #endif 77 | int main (void) 78 | { 79 | int fd = 0; 80 | off_t offset = 1; 81 | size_t count = 2; 82 | ssize_t res; 83 | #if __linux 84 | res = sendfile (fd, fd, offset, count); 85 | #elif __FreeBSD__ 86 | res = sendfile (fd, fd, offset, count, 0, &offset, 0); 87 | #elif __hpux 88 | res = sendfile (fd, fd, offset, count, 0, 0); 89 | #endif 90 | return 0; 91 | } 92 | ],ac_cv_sendfile=yes,ac_cv_sendfile=no)]) 93 | test $ac_cv_sendfile = yes && AC_DEFINE(HAVE_SENDFILE, 1, sendfile(2) is available and supported) 94 | 95 | AC_CACHE_CHECK(for sync_file_range, ac_cv_sync_file_range, [AC_LINK_IFELSE([ 96 | #include 97 | int main (void) 98 | { 99 | int fd = 0; 100 | off64_t offset = 1; 101 | off64_t nbytes = 1; 102 | unsigned int flags = SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER; 103 | ssize_t res; 104 | res = sync_file_range (fd, offset, nbytes, flags); 105 | return 0; 106 | } 107 | ],ac_cv_sync_file_range=yes,ac_cv_sync_file_range=no)]) 108 | test $ac_cv_sync_file_range = yes && AC_DEFINE(HAVE_SYNC_FILE_RANGE, 1, sync_file_range(2) is available) 109 | 110 | AC_CACHE_CHECK(for fallocate, ac_cv_linux_fallocate, [AC_LINK_IFELSE([ 111 | #include 112 | int main (void) 113 | { 114 | int fd = 0; 115 | int mode = 0; 116 | off_t offset = 1; 117 | off_t len = 1; 118 | int res; 119 | res = fallocate (fd, mode, offset, len); 120 | return 0; 121 | } 122 | ],ac_cv_linux_fallocate=yes,ac_cv_linux_fallocate=no)]) 123 | test $ac_cv_linux_fallocate = yes && AC_DEFINE(HAVE_LINUX_FALLOCATE, 1, fallocate(2) is available) 124 | 125 | AC_CACHE_CHECK(for sys_syncfs, ac_cv_sys_syncfs, [AC_LINK_IFELSE([ 126 | #include 127 | #include 128 | int main (void) 129 | { 130 | int res = syscall (__NR_syncfs, (int)0); 131 | } 132 | ],ac_cv_sys_syncfs=yes,ac_cv_sys_syncfs=no)]) 133 | test $ac_cv_sys_syncfs = yes && AC_DEFINE(HAVE_SYS_SYNCFS, 1, syscall(__NR_syncfs) is available) 134 | 135 | AC_CACHE_CHECK(for prctl_set_name, ac_cv_prctl_set_name, [AC_LINK_IFELSE([ 136 | #include 137 | int main (void) 138 | { 139 | char name[] = "test123"; 140 | int res = prctl (PR_SET_NAME, (unsigned long)name, 0, 0, 0); 141 | } 142 | ],ac_cv_prctl_set_name=yes,ac_cv_prctl_set_name=no)]) 143 | test $ac_cv_prctl_set_name = yes && AC_DEFINE(HAVE_PRCTL_SET_NAME, 1, prctl(PR_SET_NAME) is available) 144 | 145 | dnl ############################################################################# 146 | dnl # these checks exist for the benefit of IO::AIO 147 | 148 | dnl at least uclibc defines _POSIX_ADVISORY_INFO without *any* of the required 149 | dnl functionality actually being present. ugh. 150 | AC_CACHE_CHECK(for posix_madvise, ac_cv_posix_madvise, [AC_LINK_IFELSE([ 151 | #include 152 | int main (void) 153 | { 154 | int res = posix_madvise ((void *)0, (size_t)0, POSIX_MADV_NORMAL); 155 | int a = POSIX_MADV_SEQUENTIAL; 156 | int b = POSIX_MADV_RANDOM; 157 | int c = POSIX_MADV_WILLNEED; 158 | int d = POSIX_MADV_DONTNEED; 159 | return 0; 160 | } 161 | ],ac_cv_posix_madvise=yes,ac_cv_posix_madvise=no)]) 162 | test $ac_cv_posix_madvise = yes && AC_DEFINE(HAVE_POSIX_MADVISE, 1, posix_madvise(2) is available) 163 | 164 | AC_CACHE_CHECK(for posix_fadvise, ac_cv_posix_fadvise, [AC_LINK_IFELSE([ 165 | #define _XOPEN_SOURCE 600 166 | #include 167 | int main (void) 168 | { 169 | int res = posix_fadvise ((int)0, (off_t)0, (off_t)0, POSIX_FADV_NORMAL); 170 | int a = POSIX_FADV_SEQUENTIAL; 171 | int b = POSIX_FADV_NOREUSE; 172 | int c = POSIX_FADV_RANDOM; 173 | int d = POSIX_FADV_WILLNEED; 174 | int e = POSIX_FADV_DONTNEED; 175 | return 0; 176 | } 177 | ],ac_cv_posix_fadvise=yes,ac_cv_posix_fadvise=no)]) 178 | test $ac_cv_posix_fadvise = yes && AC_DEFINE(HAVE_POSIX_FADVISE, 1, posix_fadvise(2) is available) 179 | 180 | dnl lots of linux specifics 181 | AC_CHECK_HEADERS([linux/fs.h linux/fiemap.h]) 182 | 183 | AC_CACHE_CHECK([for splice, vmsplice and tee], ac_cv_linux_splice, [AC_LINK_IFELSE([ 184 | #include 185 | int main (void) 186 | { 187 | ssize_t res; 188 | res = splice ((int)0, (loff_t)0, (int)0, (loff_t *)0, (size_t)0, SPLICE_F_MOVE | SPLICE_F_NONBLOCK | SPLICE_F_MORE); 189 | res = tee ((int)0, (int)0, (size_t)0, SPLICE_F_NONBLOCK); 190 | res = vmsplice ((int)0, (struct iovec *)0, 0, SPLICE_F_NONBLOCK | SPLICE_F_GIFT); 191 | return 0; 192 | } 193 | ],ac_cv_linux_splice=yes,ac_cv_linux_splice=no)]) 194 | test $ac_cv_linux_splice = yes && AC_DEFINE(HAVE_LINUX_SPLICE, 1, splice/vmsplice/tee(2) are available) 195 | 196 | -------------------------------------------------------------------------------- /xthread.h: -------------------------------------------------------------------------------- 1 | #ifndef XTHREAD_H_ 2 | #define XTHREAD_H_ 3 | 4 | /* whether word reads are potentially non-atomic. 5 | * this is conservative, likely most arches this runs 6 | * on have atomic word read/writes. 7 | */ 8 | #ifndef WORDACCESS_UNSAFE 9 | # if __i386 || __x86_64 10 | # define WORDACCESS_UNSAFE 0 11 | # else 12 | # define WORDACCESS_UNSAFE 1 13 | # endif 14 | #endif 15 | 16 | ///////////////////////////////////////////////////////////////////////////// 17 | 18 | #ifdef _WIN32 19 | 20 | #define NTDDI_VERSION NTDDI_WIN2K // needed to get win2000 api calls 21 | #define _WIN32_WINNT 0x400 22 | #include //D 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #define sigset_t int 31 | #define sigfillset(a) 32 | #define pthread_sigmask(a,b,c) 33 | #define sigaddset(a,b) 34 | #define sigemptyset(s) 35 | 36 | typedef pthread_mutex_t xmutex_t; 37 | #define X_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER 38 | #define X_MUTEX_CREATE(mutex) pthread_mutex_init (&(mutex), 0) 39 | #define X_LOCK(mutex) pthread_mutex_lock (&(mutex)) 40 | #define X_UNLOCK(mutex) pthread_mutex_unlock (&(mutex)) 41 | 42 | typedef pthread_cond_t xcond_t; 43 | #define X_COND_INIT PTHREAD_COND_INITIALIZER 44 | #define X_COND_CREATE(cond) pthread_cond_init (&(cond), 0) 45 | #define X_COND_SIGNAL(cond) pthread_cond_signal (&(cond)) 46 | #define X_COND_WAIT(cond,mutex) pthread_cond_wait (&(cond), &(mutex)) 47 | #define X_COND_TIMEDWAIT(cond,mutex,to) pthread_cond_timedwait (&(cond), &(mutex), &(to)) 48 | 49 | typedef pthread_t xthread_t; 50 | #define X_THREAD_PROC(name) static void *name (void *thr_arg) 51 | #define X_THREAD_ATFORK(a,b,c) 52 | 53 | static int 54 | xthread_create (xthread_t *tid, void *(*proc)(void *), void *arg) 55 | { 56 | int retval; 57 | pthread_attr_t attr; 58 | 59 | pthread_attr_init (&attr); 60 | pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); 61 | 62 | retval = pthread_create (tid, &attr, proc, arg) == 0; 63 | 64 | pthread_attr_destroy (&attr); 65 | 66 | return retval; 67 | } 68 | 69 | #define respipe_read(a,b,c) PerlSock_recv ((a), (b), (c), 0) 70 | #define respipe_write(a,b,c) send ((a), (b), (c), 0) 71 | #define respipe_close(a) PerlSock_closesocket ((a)) 72 | 73 | #else 74 | ///////////////////////////////////////////////////////////////////////////// 75 | 76 | #if __linux && !defined(_GNU_SOURCE) 77 | # define _GNU_SOURCE 78 | #endif 79 | 80 | /* just in case */ 81 | #define _REENTRANT 1 82 | 83 | #if __solaris 84 | # define _POSIX_PTHREAD_SEMANTICS 1 85 | /* try to bribe solaris headers into providing a current pthread API 86 | * despite environment being configured for an older version. 87 | */ 88 | # define __EXTENSIONS__ 1 89 | #endif 90 | 91 | #include 92 | #include 93 | #include 94 | #include 95 | #include 96 | 97 | typedef pthread_mutex_t xmutex_t; 98 | #if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP) 99 | # define X_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP 100 | # define X_MUTEX_CREATE(mutex) \ 101 | do { \ 102 | pthread_mutexattr_t attr; \ 103 | pthread_mutexattr_init (&attr); \ 104 | pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP); \ 105 | pthread_mutex_init (&(mutex), &attr); \ 106 | } while (0) 107 | #else 108 | # define X_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER 109 | # define X_MUTEX_CREATE(mutex) pthread_mutex_init (&(mutex), 0) 110 | #endif 111 | #define X_LOCK(mutex) pthread_mutex_lock (&(mutex)) 112 | #define X_UNLOCK(mutex) pthread_mutex_unlock (&(mutex)) 113 | 114 | typedef pthread_cond_t xcond_t; 115 | #define X_COND_INIT PTHREAD_COND_INITIALIZER 116 | #define X_COND_CREATE(cond) pthread_cond_init (&(cond), 0) 117 | #define X_COND_SIGNAL(cond) pthread_cond_signal (&(cond)) 118 | #define X_COND_WAIT(cond,mutex) pthread_cond_wait (&(cond), &(mutex)) 119 | #define X_COND_TIMEDWAIT(cond,mutex,to) pthread_cond_timedwait (&(cond), &(mutex), &(to)) 120 | 121 | typedef pthread_t xthread_t; 122 | #define X_THREAD_PROC(name) static void *name (void *thr_arg) 123 | #define X_THREAD_ATFORK(prepare,parent,child) pthread_atfork (prepare, parent, child) 124 | 125 | // the broken bsd's once more 126 | #ifndef PTHREAD_STACK_MIN 127 | # define PTHREAD_STACK_MIN 0 128 | #endif 129 | 130 | #ifndef X_STACKSIZE 131 | # define X_STACKSIZE sizeof (void *) * 4096 132 | #endif 133 | 134 | static int 135 | xthread_create (xthread_t *tid, void *(*proc)(void *), void *arg) 136 | { 137 | int retval; 138 | sigset_t fullsigset, oldsigset; 139 | pthread_attr_t attr; 140 | 141 | pthread_attr_init (&attr); 142 | pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); 143 | pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN < X_STACKSIZE ? X_STACKSIZE : PTHREAD_STACK_MIN); 144 | #ifdef PTHREAD_SCOPE_PROCESS 145 | pthread_attr_setscope (&attr, PTHREAD_SCOPE_PROCESS); 146 | #endif 147 | 148 | sigfillset (&fullsigset); 149 | 150 | pthread_sigmask (SIG_SETMASK, &fullsigset, &oldsigset); 151 | retval = pthread_create (tid, &attr, proc, arg) == 0; 152 | pthread_sigmask (SIG_SETMASK, &oldsigset, 0); 153 | 154 | pthread_attr_destroy (&attr); 155 | 156 | return retval; 157 | } 158 | 159 | #define respipe_read(a,b,c) read ((a), (b), (c)) 160 | #define respipe_write(a,b,c) write ((a), (b), (c)) 161 | #define respipe_close(a) close ((a)) 162 | 163 | #endif 164 | 165 | #endif 166 | 167 | --------------------------------------------------------------------------------