├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── build └── .ignore ├── build_vs2013 ├── tlc │ └── tlc.vcxproj ├── turbo_linecount.sln ├── turbo_linecount │ └── turbo_linecount.vcxproj └── turbo_linecount_static │ └── turbo_linecount_static.vcxproj ├── src ├── main.cpp ├── turbo_linecount.cpp └── turbo_linecount.h └── tests ├── compare_testfiles.sh └── create_testfiles.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | # Cmake build directory 31 | build 32 | 33 | # Visual Studio build directory 34 | build_vs2013 35 | 36 | # Test Files 37 | test*.txt 38 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0) 2 | PROJECT (turbo_linecount) 3 | 4 | IF(MSVC) 5 | 6 | OPTION (USE_STATIC_RUNTIME "Use Static Runtime" OFF) 7 | 8 | SET(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "limited configs" FORCE) 9 | 10 | SET(CMAKE_CXX_FLAGS_DEBUG "/Od /MDd") 11 | SET(CMAKE_C_FLAGS_DEBUG "/Od /MDd") 12 | SET(CMAKE_CXX_FLAGS_RELEASE "/Ox /Ob2 /MD") 13 | SET(CMAKE_C_FLAGS_RELEASE "/Ox /Ob2 /MD") 14 | 15 | # Force static runtime libraries 16 | IF(USE_STATIC_RUNTIME) 17 | MESSAGE("Using Static Runtime") 18 | FOREACH(flag 19 | CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELWITHDEBINFO 20 | CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_DEBUG_INIT 21 | CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELWITHDEBINFO 22 | CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG_INIT) 23 | STRING(REPLACE "/MD" "/MT" "${flag}" "${${flag}}") 24 | ENDFOREACH() 25 | ENDIF() 26 | ELSE() 27 | FIND_PACKAGE (Threads REQUIRED) 28 | SET(CMAKE_CXX_FLAGS_RELEASE "-O3") 29 | SET(CMAKE_C_FLAGS_RELEASE "-O3") 30 | ADD_DEFINITIONS(-D_LARGEFILE_SOURCE=1 -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64) 31 | ENDIF() 32 | 33 | ADD_LIBRARY (turbo_linecount SHARED src/turbo_linecount.cpp src/turbo_linecount.h) 34 | ADD_LIBRARY (turbo_linecount_static STATIC src/turbo_linecount.cpp src/turbo_linecount.h) 35 | ADD_EXECUTABLE (tlc src/main.cpp) 36 | 37 | 38 | IF(THREADS_HAVE_PTHREAD_ARG) 39 | SET_PROPERTY(TARGET tlc PROPERTY COMPILE_OPTIONS "-pthread") 40 | SET_PROPERTY(TARGET tlc PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") 41 | ENDIF() 42 | 43 | TARGET_LINK_LIBRARIES (tlc turbo_linecount_static ${CMAKE_THREAD_LIBS_INIT}) 44 | 45 | INSTALL(TARGETS tlc 46 | DESTINATION "bin" 47 | ) 48 | INSTALL(TARGETS turbo_linecount turbo_linecount_static 49 | DESTINATION "lib" 50 | ) 51 | INSTALL(FILES src/turbo_linecount.h 52 | DESTINATION "include" 53 | ) 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Christien Rioux 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # turbo-linecount 2 | turbo-linecount 1.0 Copyright 2015, Christien Rioux 3 | 4 | ### Super-Fast Multi-Threaded Line Counter 5 | 6 | *turbo-linecount* is a tool that simply counts the number of lines in a file, as fast as possible. It reads the file in large chunks into several threads and quickly scans the file for line endings. 7 | 8 | Many times, you have to count the number of lines in text file on disk. The typical solution is to use `wc -l` on the command line. `wc -l` uses buffered streams to process the file, which has its advantages, but it is slower than direct memory mapped file access. You can't 'pipe' to *turbo-linecount* however. This may change in a future release. 9 | 10 | How much faster is *turbo-linecount*? About 8 times faster than `wc -l` and 5 times faster than the naive Python implementation. 11 | 12 | To use *turbo-linecount*, just run the command line: 13 | 14 | ``` 15 | tlc 16 | ``` 17 | 18 | where *\* is the path to the file of which you'd like to count the lines. 19 | 20 | ###Help 21 | To get help with *turbo-linecount*: 22 | 23 | ``` 24 | tlc -h 25 | usage: tlc [options] 26 | -h --help print this usage and exit 27 | -b --buffersize size of buffer per-thread to use when reading (default is 1MB) 28 | -t --threadcount number of threads to use (defaults to number of cpu cores) 29 | -v --version print version information and exit 30 | ``` 31 | 32 | ###Building 33 | 34 | To build *turbo-linecount*, we use *cmake*. Cmake 3.0.0 or higher is the preferred version as of this release. For simplified building on Windows, a Visual Studio 2013 solution file is also included. 35 | 36 | To build with *cmake*: 37 | ``` 38 | cd build 39 | cmake .. 40 | make 41 | make install 42 | ``` 43 | 44 | This will build and install the command line utility `tlc`, a shared library `libturbo_linecount`, a static library `libturbo_linecount_static`, and a header file `turbo_linecount.h`. 45 | 46 | Building *turbo-linecount* is known to be possible on 47 | 48 | ``` 49 | Windows 32/64 bit 50 | Mac OS X 51 | Linux 52 | Cygwin 53 | ``` 54 | 55 | ### Testing 56 | 57 | Testing cmake against `wc -l` and `python` can be done with the test scripts. To generate some random test files, run `create_testfiles.sh`, and four test files, one 10MB, one 100MB, one 1GB, and one 10GB file will be created. Feel free to delete these when you're done testing to save space. 58 | 59 | To run the test, run `compare_testfiles.sh`. This will generate output as such: 60 | 61 | ``` 62 | Timing for tlc 63 | tlc: test_10MB.txt 0.006s 64 | tlc: test_100MB.txt 0.015s 65 | tlc: test_1GB.txt 0.127s 66 | tlc: test_10GB.txt 1.196s 67 | Timing for python 68 | python: test_10MB.txt 0.025s 69 | python: test_100MB.txt 0.084s 70 | python: test_1GB.txt 0.661s 71 | python: test_10GB.txt 6.165s 72 | Timing for wc 73 | wc: test_10MB.txt 0.012s 74 | wc: test_100MB.txt 0.100s 75 | wc: test_1GB.txt 0.933s 76 | wc: test_10GB.txt 9.857s 77 | ``` 78 | 79 | ### Performance 80 | 81 | Performance on Windows and Mac OS X is excellent for all file sizes. Performance on Linux and other operating systems is good, but can be better. Stay tuned. 82 | 83 | * Macbook Pro (Retina, 15-inch Mid 2014) 84 | * 2.8 GHz Intel Core i7 85 | * 1TB SSD hard drive 86 | * 16GB Memory 87 | 88 | ``` 89 | | File Size | `tlc` | `python` | `wc -l` | 90 | |-----------|--------|----------------|----------------| 91 | | 10MB | 0.006s | 0.025s (4.2x) | 0.012s (2.0x) | 92 | | 100MB | 0.015s | 0.084s (5.6x) | 0.100s (6.7x) | 93 | | 1GB | 0.127s | 0.661s (5.2x) | 0.933s (7.3x) | 94 | | 10GB | 1.196s | 6.165s (5.15x) | 9.857s (8.2x) | 95 | ``` 96 | -------------------------------------------------------------------------------- /build/.ignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crioux/turbo-linecount/9bf2210d40130be376b4e71c93e3d6b74a77b362/build/.ignore -------------------------------------------------------------------------------- /build_vs2013/tlc/tlc.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug DLL 6 | Win32 7 | 8 | 9 | Debug DLL 10 | x64 11 | 12 | 13 | Debug 14 | Win32 15 | 16 | 17 | Debug 18 | x64 19 | 20 | 21 | Release DLL 22 | Win32 23 | 24 | 25 | Release DLL 26 | x64 27 | 28 | 29 | Release 30 | Win32 31 | 32 | 33 | Release 34 | x64 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | {c6e64e54-0635-4a2b-b404-145ea82a761f} 43 | 44 | 45 | 46 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9} 47 | Win32Proj 48 | linecount 49 | tlc 50 | 51 | 52 | 53 | Application 54 | true 55 | v120 56 | Unicode 57 | 58 | 59 | Application 60 | true 61 | v120 62 | Unicode 63 | 64 | 65 | Application 66 | true 67 | v120 68 | Unicode 69 | 70 | 71 | Application 72 | true 73 | v120 74 | Unicode 75 | 76 | 77 | Application 78 | false 79 | v120 80 | true 81 | Unicode 82 | 83 | 84 | Application 85 | false 86 | v120 87 | true 88 | Unicode 89 | 90 | 91 | Application 92 | false 93 | v120 94 | true 95 | Unicode 96 | 97 | 98 | Application 99 | false 100 | v120 101 | true 102 | Unicode 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | true 134 | 135 | 136 | true 137 | 138 | 139 | true 140 | 141 | 142 | true 143 | 144 | 145 | false 146 | 147 | 148 | false 149 | 150 | 151 | false 152 | 153 | 154 | false 155 | 156 | 157 | 158 | 159 | 160 | Level3 161 | Disabled 162 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 163 | MultiThreadedDebug 164 | 165 | 166 | Console 167 | true 168 | 169 | 170 | 171 | 172 | 173 | 174 | Level3 175 | Disabled 176 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 177 | MultiThreadedDebugDLL 178 | 179 | 180 | Console 181 | true 182 | 183 | 184 | 185 | 186 | 187 | 188 | Level3 189 | Disabled 190 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 191 | MultiThreadedDebug 192 | 193 | 194 | Console 195 | true 196 | 197 | 198 | 199 | 200 | 201 | 202 | Level3 203 | Disabled 204 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 205 | MultiThreadedDebugDLL 206 | 207 | 208 | Console 209 | true 210 | 211 | 212 | 213 | 214 | Level3 215 | 216 | 217 | Full 218 | true 219 | true 220 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 221 | Speed 222 | AnySuitable 223 | true 224 | MultiThreaded 225 | 226 | 227 | Console 228 | true 229 | true 230 | true 231 | 232 | 233 | 234 | 235 | Level3 236 | 237 | 238 | Full 239 | true 240 | true 241 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 242 | Speed 243 | AnySuitable 244 | true 245 | MultiThreadedDLL 246 | 247 | 248 | Console 249 | true 250 | true 251 | true 252 | 253 | 254 | 255 | 256 | Level3 257 | 258 | 259 | Full 260 | true 261 | true 262 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 263 | AnySuitable 264 | Speed 265 | true 266 | MultiThreaded 267 | 268 | 269 | Console 270 | true 271 | true 272 | true 273 | 274 | 275 | 276 | 277 | Level3 278 | 279 | 280 | Full 281 | true 282 | true 283 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 284 | AnySuitable 285 | Speed 286 | true 287 | MultiThreadedDLL 288 | 289 | 290 | Console 291 | true 292 | true 293 | true 294 | 295 | 296 | 297 | 298 | 299 | -------------------------------------------------------------------------------- /build_vs2013/turbo_linecount.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.31101.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tlc", "tlc\tlc.vcxproj", "{E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "turbo_linecount", "turbo_linecount\turbo_linecount.vcxproj", "{76AA0AD7-A400-467D-8D9D-12D787043C0A}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "turbo_linecount_static", "turbo_linecount_static\turbo_linecount_static.vcxproj", "{C6E64E54-0635-4A2B-B404-145EA82A761F}" 11 | EndProject 12 | Global 13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 14 | Debug DLL|Win32 = Debug DLL|Win32 15 | Debug DLL|x64 = Debug DLL|x64 16 | Debug|Win32 = Debug|Win32 17 | Debug|x64 = Debug|x64 18 | Release DLL|Win32 = Release DLL|Win32 19 | Release DLL|x64 = Release DLL|x64 20 | Release|Win32 = Release|Win32 21 | Release|x64 = Release|x64 22 | EndGlobalSection 23 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 24 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 25 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 26 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug DLL|x64.ActiveCfg = Debug DLL|x64 27 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug DLL|x64.Build.0 = Debug DLL|x64 28 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug|Win32.ActiveCfg = Debug|Win32 29 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug|Win32.Build.0 = Debug|Win32 30 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug|x64.ActiveCfg = Debug|x64 31 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Debug|x64.Build.0 = Debug|x64 32 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 33 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release DLL|Win32.Build.0 = Release DLL|Win32 34 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release DLL|x64.ActiveCfg = Release DLL|x64 35 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release DLL|x64.Build.0 = Release DLL|x64 36 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release|Win32.ActiveCfg = Release|Win32 37 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release|Win32.Build.0 = Release|Win32 38 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release|x64.ActiveCfg = Release|x64 39 | {E5D80D5A-FDE6-44FE-9E97-DF3D470DD5E9}.Release|x64.Build.0 = Release|x64 40 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 41 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 42 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug DLL|x64.ActiveCfg = Debug DLL|x64 43 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug DLL|x64.Build.0 = Debug DLL|x64 44 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug|Win32.ActiveCfg = Debug|Win32 45 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug|Win32.Build.0 = Debug|Win32 46 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug|x64.ActiveCfg = Debug|x64 47 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Debug|x64.Build.0 = Debug|x64 48 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 49 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release DLL|Win32.Build.0 = Release DLL|Win32 50 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release DLL|x64.ActiveCfg = Release DLL|x64 51 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release DLL|x64.Build.0 = Release DLL|x64 52 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release|Win32.ActiveCfg = Release|Win32 53 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release|Win32.Build.0 = Release|Win32 54 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release|x64.ActiveCfg = Release|x64 55 | {76AA0AD7-A400-467D-8D9D-12D787043C0A}.Release|x64.Build.0 = Release|x64 56 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 57 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 58 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug DLL|x64.ActiveCfg = Debug DLL|x64 59 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug DLL|x64.Build.0 = Debug DLL|x64 60 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug|Win32.ActiveCfg = Debug|Win32 61 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug|Win32.Build.0 = Debug|Win32 62 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug|x64.ActiveCfg = Debug|x64 63 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Debug|x64.Build.0 = Debug|x64 64 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 65 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release DLL|Win32.Build.0 = Release DLL|Win32 66 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release DLL|x64.ActiveCfg = Release DLL|x64 67 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release DLL|x64.Build.0 = Release DLL|x64 68 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release|Win32.ActiveCfg = Release|Win32 69 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release|Win32.Build.0 = Release|Win32 70 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release|x64.ActiveCfg = Release|x64 71 | {C6E64E54-0635-4A2B-B404-145EA82A761F}.Release|x64.Build.0 = Release|x64 72 | EndGlobalSection 73 | GlobalSection(SolutionProperties) = preSolution 74 | HideSolutionNode = FALSE 75 | EndGlobalSection 76 | EndGlobal 77 | -------------------------------------------------------------------------------- /build_vs2013/turbo_linecount/turbo_linecount.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug DLL 6 | Win32 7 | 8 | 9 | Debug DLL 10 | x64 11 | 12 | 13 | Debug 14 | Win32 15 | 16 | 17 | Debug 18 | x64 19 | 20 | 21 | Release DLL 22 | Win32 23 | 24 | 25 | Release DLL 26 | x64 27 | 28 | 29 | Release 30 | Win32 31 | 32 | 33 | Release 34 | x64 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | {76AA0AD7-A400-467D-8D9D-12D787043C0A} 45 | Win32Proj 46 | linecount 47 | 48 | 49 | 50 | DynamicLibrary 51 | true 52 | v120 53 | Unicode 54 | 55 | 56 | DynamicLibrary 57 | true 58 | v120 59 | Unicode 60 | 61 | 62 | DynamicLibrary 63 | true 64 | v120 65 | Unicode 66 | 67 | 68 | DynamicLibrary 69 | true 70 | v120 71 | Unicode 72 | 73 | 74 | DynamicLibrary 75 | false 76 | v120 77 | true 78 | Unicode 79 | 80 | 81 | DynamicLibrary 82 | false 83 | v120 84 | true 85 | Unicode 86 | 87 | 88 | DynamicLibrary 89 | false 90 | v120 91 | true 92 | Unicode 93 | 94 | 95 | DynamicLibrary 96 | false 97 | v120 98 | true 99 | Unicode 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | true 131 | 132 | 133 | true 134 | 135 | 136 | true 137 | 138 | 139 | true 140 | 141 | 142 | false 143 | 144 | 145 | false 146 | 147 | 148 | false 149 | 150 | 151 | false 152 | 153 | 154 | 155 | 156 | 157 | Level3 158 | Disabled 159 | WIN32;_DEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 160 | MultiThreadedDebug 161 | 162 | 163 | Windows 164 | true 165 | 166 | 167 | 168 | 169 | 170 | 171 | Level3 172 | Disabled 173 | WIN32;_DEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 174 | MultiThreadedDebugDLL 175 | 176 | 177 | Windows 178 | true 179 | 180 | 181 | 182 | 183 | 184 | 185 | Level3 186 | Disabled 187 | WIN32;_DEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 188 | MultiThreadedDebug 189 | 190 | 191 | Windows 192 | true 193 | 194 | 195 | 196 | 197 | 198 | 199 | Level3 200 | Disabled 201 | WIN32;_DEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 202 | MultiThreadedDebugDLL 203 | 204 | 205 | Windows 206 | true 207 | 208 | 209 | 210 | 211 | Level3 212 | 213 | 214 | MaxSpeed 215 | true 216 | true 217 | WIN32;NDEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 218 | None 219 | MultiThreaded 220 | 221 | 222 | Windows 223 | true 224 | true 225 | 226 | 227 | 228 | 229 | Level3 230 | 231 | 232 | MaxSpeed 233 | true 234 | true 235 | WIN32;NDEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 236 | None 237 | MultiThreadedDLL 238 | 239 | 240 | Windows 241 | true 242 | true 243 | 244 | 245 | 246 | 247 | Level3 248 | 249 | 250 | MaxSpeed 251 | true 252 | true 253 | WIN32;NDEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 254 | None 255 | MultiThreaded 256 | 257 | 258 | Windows 259 | true 260 | true 261 | 262 | 263 | 264 | 265 | Level3 266 | 267 | 268 | MaxSpeed 269 | true 270 | true 271 | WIN32;NDEBUG;_WINDOWS;_USRDLL;LINECOUNT_EXPORTS;%(PreprocessorDefinitions) 272 | None 273 | MultiThreadedDLL 274 | 275 | 276 | Windows 277 | true 278 | true 279 | 280 | 281 | 282 | 283 | 284 | -------------------------------------------------------------------------------- /build_vs2013/turbo_linecount_static/turbo_linecount_static.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug DLL 6 | Win32 7 | 8 | 9 | Debug DLL 10 | x64 11 | 12 | 13 | Debug 14 | Win32 15 | 16 | 17 | Debug 18 | x64 19 | 20 | 21 | Release DLL 22 | Win32 23 | 24 | 25 | Release DLL 26 | x64 27 | 28 | 29 | Release 30 | Win32 31 | 32 | 33 | Release 34 | x64 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | {C6E64E54-0635-4A2B-B404-145EA82A761F} 45 | Win32Proj 46 | linecount_static 47 | 48 | 49 | 50 | StaticLibrary 51 | true 52 | v120 53 | Unicode 54 | 55 | 56 | StaticLibrary 57 | true 58 | v120 59 | Unicode 60 | 61 | 62 | StaticLibrary 63 | true 64 | v120 65 | Unicode 66 | 67 | 68 | StaticLibrary 69 | true 70 | v120 71 | Unicode 72 | 73 | 74 | StaticLibrary 75 | false 76 | v120 77 | true 78 | Unicode 79 | 80 | 81 | StaticLibrary 82 | false 83 | v120 84 | true 85 | Unicode 86 | 87 | 88 | StaticLibrary 89 | false 90 | v120 91 | true 92 | Unicode 93 | 94 | 95 | StaticLibrary 96 | false 97 | v120 98 | true 99 | Unicode 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | Level3 135 | Disabled 136 | WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) 137 | MultiThreadedDebug 138 | 139 | 140 | Windows 141 | true 142 | 143 | 144 | 145 | 146 | 147 | 148 | Level3 149 | Disabled 150 | WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) 151 | MultiThreadedDebugDLL 152 | 153 | 154 | Windows 155 | true 156 | 157 | 158 | 159 | 160 | 161 | 162 | Level3 163 | Disabled 164 | WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) 165 | MultiThreadedDebug 166 | 167 | 168 | Windows 169 | true 170 | 171 | 172 | 173 | 174 | 175 | 176 | Level3 177 | Disabled 178 | WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) 179 | MultiThreadedDebugDLL 180 | 181 | 182 | Windows 183 | true 184 | 185 | 186 | 187 | 188 | Level3 189 | 190 | 191 | MaxSpeed 192 | true 193 | true 194 | WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) 195 | MultiThreaded 196 | 197 | 198 | Windows 199 | true 200 | true 201 | true 202 | 203 | 204 | 205 | 206 | Level3 207 | 208 | 209 | MaxSpeed 210 | true 211 | true 212 | WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) 213 | MultiThreadedDLL 214 | 215 | 216 | Windows 217 | true 218 | true 219 | true 220 | 221 | 222 | 223 | 224 | Level3 225 | 226 | 227 | MaxSpeed 228 | true 229 | true 230 | WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) 231 | MultiThreaded 232 | 233 | 234 | Windows 235 | true 236 | true 237 | true 238 | 239 | 240 | 241 | 242 | Level3 243 | 244 | 245 | MaxSpeed 246 | true 247 | true 248 | WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) 249 | MultiThreadedDLL 250 | 251 | 252 | Windows 253 | true 254 | true 255 | true 256 | 257 | 258 | 259 | 260 | 261 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Turbo Linecount 3 | // Copyright 2015, Christien Rioux 4 | // 5 | // MIT Licensed, see file 'LICENSE' for details 6 | // 7 | /////////////////////////////////////////////// 8 | 9 | #include"turbo_linecount.h" 10 | #include 11 | #include 12 | 13 | #ifdef _WIN32 14 | 15 | #include 16 | 17 | #elif defined(TLC_COMPATIBLE_UNIX) 18 | 19 | #include 20 | #define _tprintf printf 21 | #define _ftprintf fprintf 22 | #define _tcscmp strcmp 23 | #define _tcslen strlen 24 | #define _ttoi atoi 25 | #define _tcstoui64 strtoull 26 | #define _T(x) x 27 | #define TCHAR char 28 | 29 | #endif 30 | 31 | using namespace TURBOLINECOUNT; 32 | 33 | ////////////////////////////////////////////////////// 34 | 35 | 36 | void help(const TCHAR *argv0) 37 | { 38 | _ftprintf(stderr, _T("usage: %s [options] \n"), argv0); 39 | _ftprintf(stderr, _T(" -h --help print this usage and exit\n")); 40 | _ftprintf(stderr, _T(" -b --buffersize size of buffer per-thread to use when reading (default is 1MB)\n")); 41 | _ftprintf(stderr, _T(" -t --threadcount number of threads to use (defaults to number of cpu cores)\n")); 42 | _ftprintf(stderr, _T(" -v --version print version information and exit\n")); 43 | } 44 | 45 | 46 | void version(void) 47 | { 48 | _tprintf(_T("tlc (turbo-linecount) %d.%2.2d\nCopyright (c) 2015 Christien Rioux\n"), TURBOLINECOUNT_VERSION_MAJOR, TURBOLINECOUNT_VERSION_MINOR); 49 | } 50 | 51 | ////////////////////////////////////////////////////// 52 | 53 | 54 | #if defined(WIN32) && defined(_UNICODE) 55 | int wmain(int argc, TCHAR **argv) 56 | #else 57 | int main(int argc, char **argv) 58 | #endif 59 | { 60 | // Parse parameters 61 | int arg = 1; 62 | int posparam = 0; 63 | 64 | CLineCount::PARAMETERS params; 65 | params.buffersize = -1; 66 | params.threadcount = -1; 67 | 68 | TCHAR *filename = NULL; 69 | 70 | if(argc==1) 71 | { 72 | help(argv[0]); 73 | exit(0); 74 | } 75 | 76 | while (arg < argc) 77 | { 78 | if (_tcscmp(argv[arg], _T("-h")) == 0 || _tcscmp(argv[arg], _T("--help")) == 0) 79 | { 80 | help(argv[0]); 81 | exit(0); 82 | } 83 | else if (_tcscmp(argv[arg], _T("-v")) == 0 || _tcscmp(argv[arg], _T("--version")) == 0) 84 | { 85 | version(); 86 | exit(0); 87 | } 88 | else if (_tcscmp(argv[arg], _T("-b")) == 0 || _tcscmp(argv[arg], _T("--buffersize")) == 0) 89 | { 90 | arg++; 91 | if (arg == argc) 92 | { 93 | _ftprintf(stderr, _T("%s: missing argument to %s\n"), argv[0], argv[arg-1]); 94 | return 1; 95 | } 96 | 97 | TCHAR *wsstr = argv[arg]; 98 | 99 | // Check for size multipliers 100 | size_t multiplier = 1; 101 | TCHAR *lastchar = wsstr + (_tcslen(wsstr) - 1); 102 | if (*lastchar == _T('k') || *lastchar == _T('K')) 103 | { 104 | multiplier = 1024; 105 | lastchar = 0; 106 | } 107 | else if (*lastchar == _T('m') || *lastchar == _T('M')) 108 | { 109 | multiplier = 1024 * 1024; 110 | lastchar = 0; 111 | } 112 | else if (*lastchar == _T('g') || *lastchar == _T('G')) 113 | { 114 | multiplier = 1024 * 1024 * 1024; 115 | lastchar = 0; 116 | } 117 | 118 | TCHAR *endptr; 119 | params.buffersize = ((size_t)_tcstoui64(argv[arg], &endptr, 10)) * multiplier; 120 | 121 | } 122 | else if (_tcscmp(argv[arg], _T("-t")) == 0 || _tcscmp(argv[arg], _T("--threadcount")) == 0) 123 | { 124 | arg++; 125 | if (arg == argc) 126 | { 127 | _ftprintf(stderr, _T("%s: Missing argument to %s\n"), argv[0], argv[arg-1]); 128 | return 1; 129 | } 130 | 131 | params.threadcount = _ttoi(argv[arg]); 132 | if(params.threadcount<=0) 133 | { 134 | _ftprintf(stderr, _T("%s: Invalid thread count\n"), argv[0]); 135 | return 1; 136 | } 137 | } 138 | else 139 | { 140 | if (posparam == 0) 141 | { 142 | filename = argv[arg]; 143 | } 144 | else 145 | { 146 | _ftprintf(stderr, _T("%s: Too many arguments\n"), argv[0]); 147 | return 1; 148 | } 149 | posparam++; 150 | } 151 | 152 | arg++; 153 | } 154 | 155 | if (posparam != 1) 156 | { 157 | _ftprintf(stderr, _T("%s: Missing required argument\n"), argv[0]); 158 | return 1; 159 | } 160 | 161 | // Create line count class 162 | CLineCount lc(¶ms); 163 | 164 | if (!lc.open(filename)) 165 | { 166 | tlc_error_t err = lc.lastError(); 167 | tlc_string_t errstr = lc.lastErrorString(); 168 | 169 | _ftprintf(stderr, _T("%s: Error %d (%s)\n"), argv[0], err, errstr.c_str()); 170 | return err; 171 | } 172 | 173 | // Count lines 174 | tlc_linecount_t count; 175 | if (!lc.countLines(count)) 176 | { 177 | tlc_error_t err = lc.lastError(); 178 | tlc_string_t errstr = lc.lastErrorString(); 179 | 180 | _ftprintf(stderr, _T("%s: Error %d: (%s)\n"), argv[0], err, errstr.c_str()); 181 | return err; 182 | } 183 | 184 | // Display output 185 | _tprintf(_T(TLC_LINECOUNT_FMT) _T("\n"), count); 186 | 187 | return 0; 188 | } 189 | -------------------------------------------------------------------------------- /src/turbo_linecount.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Turbo Linecount 3 | // Copyright 2015, Christien Rioux 4 | // 5 | // MIT Licensed, see file 'LICENSE' for details 6 | // 7 | /////////////////////////////////////////////// 8 | 9 | #include"turbo_linecount.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #ifdef min 15 | #undef min 16 | #endif 17 | 18 | ///////////////////////////// Platform specific 19 | #if defined(_WIN32) 20 | 21 | // Windows 22 | #define LCOPENFILE(name) CreateFile(name, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL) 23 | #define LCCLOSEFILE(handle) CloseHandle(handle) 24 | #define LCINVALIDHANDLE INVALID_HANDLE_VALUE 25 | #define LCSETREALLASTERROR(err, errstr) { setLastError((err), (errstr)); } 26 | #define MAP_FAILED NULL 27 | 28 | #elif defined(TLC_COMPATIBLE_UNIX) 29 | 30 | // Compatible Unix 31 | #include 32 | #include 33 | #include 34 | #include 35 | #if (defined (__APPLE__) && defined (__MACH__)) 36 | #include 37 | #define MMAP ::mmap 38 | #define FSTAT ::fstat 39 | #define STAT ::stat 40 | #elif (defined(__linux__) || defined(__cygwin__)) && defined(_LARGEFILE64_SOURCE) 41 | #define MMAP ::mmap64 42 | #define FSTAT ::fstat64 43 | #define STAT ::stat64 44 | #else 45 | #define MMAP ::mmap 46 | #define FSTAT ::fstat 47 | #define STAT ::stat 48 | #endif 49 | 50 | #define LCOPENFILE(name) ::open(name, O_RDONLY) 51 | #define LCCLOSEFILE(handle) (::close(handle) != -1) 52 | #define LCINVALIDHANDLE -1 53 | #define LCSETREALLASTERROR(err, errstr) { int __err = errno; setLastError(__err, std::strerror(__err)); } 54 | #define _tcsdup strdup 55 | 56 | #endif 57 | 58 | ///////////////////////////// Line Count Class 59 | 60 | BEGIN_TURBOLINECOUNT_NAMESPACE; 61 | 62 | struct LCTHREADCONTEXT 63 | { 64 | int thread_number; 65 | CLineCount *m_this; 66 | }; 67 | 68 | CLineCount::CLineCount(PARAMETERS *parameters) 69 | { 70 | 71 | // Set line count parameter defaults 72 | int cpucount; 73 | int allocationgranularity; 74 | #if defined(_WIN32) || defined(__cygwin__) || defined(__MIN) 75 | SYSTEM_INFO sysinfo; 76 | GetSystemInfo(&sysinfo); 77 | cpucount = sysinfo.dwNumberOfProcessors; 78 | allocationgranularity = sysinfo.dwAllocationGranularity; 79 | //#elif defined(__linux__) 80 | #elif TLC_COMPATIBLE_UNIX 81 | cpucount = sysconf(_SC_NPROCESSORS_ONLN); 82 | allocationgranularity = sysconf(_SC_PAGESIZE); 83 | //#elif (defined (__APPLE__) && defined (__MACH__)) 84 | // mmsize_t count_len = sizeof(cpucount); 85 | // sysctlbyname("hw.logicalcpu", &cpucount, &count_len, NULL, 0); 86 | //#else 87 | // cpucount = 1; 88 | #endif 89 | m_parameters.threadcount = cpucount; 90 | m_parameters.buffersize = (1024 * 1024); 91 | 92 | // Override defaults if specified 93 | if (parameters) 94 | { 95 | if (parameters->buffersize != -1) 96 | { 97 | m_parameters.buffersize = parameters->buffersize; 98 | m_parameters.buffersize += (allocationgranularity - (m_parameters.buffersize % allocationgranularity)) % allocationgranularity; 99 | } 100 | if (parameters->threadcount != -1) 101 | { 102 | m_parameters.threadcount = parameters->threadcount; 103 | } 104 | } 105 | 106 | init(); 107 | } 108 | 109 | CLineCount::~CLineCount() 110 | { 111 | if (m_auto_close && m_opened) 112 | { 113 | LCCLOSEFILE(m_fh); 114 | } 115 | } 116 | 117 | void CLineCount::init(void) 118 | { 119 | m_lasterror = 0; 120 | m_lasterrorstring = _T(""); 121 | m_opened = false; 122 | m_auto_close = false; 123 | m_fh = LCINVALIDHANDLE; 124 | m_filesize = 0; 125 | m_actual_thread_count = 0; 126 | #ifdef _WIN32 127 | m_filemapping = NULL; 128 | #endif 129 | m_threads.clear(); 130 | m_threadlinecounts.clear(); 131 | } 132 | 133 | void CLineCount::setLastError(tlc_error_t lasterror, tlc_string_t lasterrorstring) 134 | { 135 | m_lasterror = lasterror; 136 | m_lasterrorstring = lasterrorstring; 137 | } 138 | 139 | tlc_error_t CLineCount::lastError() const 140 | { 141 | return m_lasterror; 142 | } 143 | 144 | tlc_string_t CLineCount::lastErrorString() const 145 | { 146 | return m_lasterrorstring; 147 | } 148 | 149 | bool CLineCount::isOpened() const 150 | { 151 | return m_opened; 152 | } 153 | 154 | bool CLineCount::open(tlc_filehandle_t fhandle, bool auto_close) 155 | { 156 | if (m_opened) 157 | { 158 | setLastError(EEXIST, _T("file already opened")); 159 | return false; 160 | } 161 | 162 | m_fh = fhandle; 163 | m_opened = true; 164 | m_auto_close = auto_close; 165 | 166 | return true; 167 | } 168 | 169 | bool CLineCount::open(const TCHAR *filename) 170 | { 171 | if (m_opened) 172 | { 173 | setLastError(EEXIST, _T("file already opened")); 174 | return false; 175 | } 176 | 177 | m_fh = LCOPENFILE(filename); 178 | if (m_fh == LCINVALIDHANDLE) 179 | { 180 | LCSETREALLASTERROR(ENOENT, _T("file could not be opened")); 181 | return false; 182 | } 183 | 184 | m_opened = true; 185 | m_auto_close = true; 186 | 187 | return true; 188 | } 189 | 190 | bool CLineCount::close() 191 | { 192 | if (!m_opened) 193 | { 194 | setLastError(EBADF, _T("file not opened")); 195 | return false; 196 | } 197 | 198 | bool ok = true; 199 | if (!LCCLOSEFILE(m_fh)) 200 | { 201 | LCSETREALLASTERROR(EBADF, _T("unable to close file")); 202 | ok = false; 203 | } 204 | 205 | init(); 206 | 207 | return ok; 208 | } 209 | 210 | 211 | #ifdef _WIN32 212 | DWORD WINAPI threadProc(LPVOID ctx) 213 | #else 214 | void *threadProc(void *ctx) 215 | #endif 216 | { 217 | LCTHREADCONTEXT *lctctx = (LCTHREADCONTEXT*)ctx; 218 | lctctx->m_this->countThread(lctctx->thread_number); 219 | return NULL; 220 | } 221 | 222 | unsigned int CLineCount::countThread(int thread_number) 223 | { 224 | tlc_fileoffset_t buffersize = (tlc_fileoffset_t)m_parameters.buffersize; 225 | tlc_fileoffset_t startoffset = buffersize * (tlc_fileoffset_t)thread_number; 226 | tlc_fileoffset_t stride = buffersize * m_actual_thread_count; 227 | tlc_fileoffset_t curoffset = startoffset; 228 | tlc_fileoffset_t lastmapsize = 0; 229 | tlc_linecount_t count = 0; 230 | void *mem = NULL; 231 | 232 | while (curoffset < m_filesize) 233 | { 234 | if (m_thread_fail) 235 | { 236 | return -1; 237 | } 238 | 239 | // Get best file mapping window size 240 | size_t mapsize = (size_t)std::min((m_filesize - curoffset), buffersize); 241 | 242 | // Map view of file 243 | #ifdef _WIN32 244 | 245 | if (mem) 246 | { 247 | if (!UnmapViewOfFile(mem)) 248 | { 249 | setLastError(EINVAL, _T("memory unmap failed")); 250 | m_thread_fail = true; 251 | return -1; 252 | } 253 | } 254 | mem = MapViewOfFile(m_filemapping, FILE_MAP_READ, (DWORD)(curoffset >> 32), (DWORD)curoffset, (SIZE_T)mapsize); 255 | #else 256 | if (mem) 257 | { 258 | if(munmap(mem, lastmapsize) !=0) 259 | { 260 | LCSETREALLASTERROR(EINVAL, _T("memory unmap failed")); 261 | m_thread_fail = true; 262 | return -1; 263 | } 264 | } 265 | mem = MMAP(NULL, mapsize, PROT_READ, MAP_FILE | MAP_SHARED, m_fh, curoffset); 266 | // printf("%p %lld %lld\n",mem, mapsize, curoffset); 267 | #endif 268 | if (mem == MAP_FAILED) 269 | { 270 | LCSETREALLASTERROR(EINVAL, _T("memory map failed")); 271 | m_thread_fail = true; 272 | return -1; 273 | } 274 | 275 | // Count newlines in buffer 276 | tlc_fileoffset_t windowoffset = 0; 277 | size_t windowleft = mapsize; 278 | char *ptr = (char *)mem; 279 | while (windowleft > 0) 280 | { 281 | char *ptrnext = (char *)memchr(ptr, '\n', windowleft); 282 | if (ptrnext) 283 | { 284 | ptrnext++; 285 | count++; 286 | windowleft -= (ptrnext - ptr); 287 | ptr = ptrnext; 288 | } 289 | else 290 | { 291 | windowleft = 0; 292 | } 293 | } 294 | 295 | // See if we need to account for end of file not ending with line terminator 296 | if ((curoffset + mapsize) == m_filesize) 297 | { 298 | if (*((char *)mem + (mapsize - 1)) != '\n') 299 | { 300 | count++; 301 | } 302 | } 303 | 304 | // Move to next buffer 305 | curoffset += stride; 306 | lastmapsize = mapsize; 307 | 308 | // printf("%lld\n", curoffset); 309 | } 310 | 311 | // Clean up memory map 312 | #ifdef _WIN32 313 | if (mem) 314 | { 315 | if (!UnmapViewOfFile(mem)) 316 | { 317 | setLastError(EINVAL, _T("memory unmap failed")); 318 | m_thread_fail = true; 319 | return -1; 320 | } 321 | } 322 | #else 323 | if (mem) 324 | { 325 | if (munmap(mem, lastmapsize) != 0) 326 | { 327 | LCSETREALLASTERROR(EINVAL, _T("memory unmap failed")); 328 | m_thread_fail = true; 329 | return -1; 330 | } 331 | } 332 | #endif 333 | 334 | // Save count for this thread 335 | m_threadlinecounts[thread_number] = count; 336 | 337 | return 0; 338 | } 339 | 340 | bool CLineCount::createThread(int thread_number) 341 | { 342 | LCTHREADCONTEXT * ctx = new LCTHREADCONTEXT; 343 | ctx->m_this = this; 344 | ctx->thread_number = thread_number; 345 | #ifdef _WIN32 346 | HANDLE hThread = CreateThread(NULL, 0, threadProc, ctx, 0, NULL); 347 | if(!hThread) 348 | { 349 | return false; 350 | } 351 | #else 352 | pthread_t hThread; 353 | int ret = pthread_create(&hThread, NULL, threadProc, ctx); 354 | if (ret != 0) 355 | { 356 | return false; 357 | } 358 | #endif 359 | m_threads[thread_number] = hThread; 360 | return true; 361 | } 362 | 363 | bool CLineCount::countLines(tlc_linecount_t & linecount) 364 | { 365 | // Determine file size 366 | #ifdef _WIN32 367 | LARGE_INTEGER li; 368 | if (!GetFileSizeEx(m_fh, &li)) 369 | { 370 | LCSETREALLASTERROR(EBADF, _T("unable to get file size")); 371 | return false; 372 | } 373 | m_filesize = li.QuadPart; 374 | #else 375 | struct STAT statbuf; 376 | if(FSTAT(m_fh,&statbuf)!=0) 377 | { 378 | LCSETREALLASTERROR(EBADF, _T("unable to get file size")); 379 | return false; 380 | } 381 | m_filesize = statbuf.st_size; 382 | #endif 383 | 384 | // Exit now for empty files 385 | if (m_filesize == 0) 386 | { 387 | linecount = 0; 388 | return true; 389 | } 390 | 391 | // Figure out actual thread count 392 | tlc_fileoffset_t windowcount = (m_filesize + (m_parameters.buffersize - 1)) / m_parameters.buffersize; 393 | if (windowcount < (tlc_fileoffset_t) m_parameters.threadcount) 394 | { 395 | m_actual_thread_count = (int)windowcount; 396 | } 397 | else 398 | { 399 | m_actual_thread_count = m_parameters.threadcount; 400 | } 401 | 402 | // printf("act: %d\n",m_actual_thread_count); 403 | 404 | #ifdef _WIN32 405 | // Prepare file mapping 406 | m_filemapping = CreateFileMapping(m_fh, NULL, PAGE_READONLY, 0, 0, NULL); 407 | #endif 408 | 409 | // Spin up threads 410 | m_threads.resize(m_actual_thread_count); 411 | m_threadlinecounts.resize(m_actual_thread_count); 412 | m_thread_fail = false; 413 | for (int i = 0; i < m_actual_thread_count; i++) 414 | { 415 | if (!createThread(i)) 416 | { 417 | setLastError(ECHILD, _T("failed to create counting thread")); 418 | 419 | m_thread_fail = true; 420 | 421 | m_actual_thread_count = i; 422 | break; 423 | } 424 | } 425 | 426 | // Wait for threads to complete 427 | int complete = 0; 428 | int errors = 0; 429 | for (int i = 0; i < m_actual_thread_count; i++) 430 | { 431 | bool success = false; 432 | 433 | #ifdef _WIN32 434 | success = (WaitForSingleObject(m_threads[i], INFINITE) == WAIT_OBJECT_0); 435 | #else 436 | success = pthread_join(m_threads[i], NULL) == 0; 437 | #endif 438 | 439 | if (success) 440 | { 441 | complete++; 442 | } 443 | else 444 | { 445 | errors++; 446 | } 447 | } 448 | 449 | #ifdef _WIN32 450 | // Clean up file mapping 451 | CloseHandle(m_filemapping); 452 | #endif 453 | 454 | if (m_thread_fail) 455 | { 456 | return false; 457 | } 458 | 459 | if (complete != m_actual_thread_count) 460 | { 461 | setLastError(ECHILD, _T("thread join failed")); 462 | return false; 463 | } 464 | 465 | // Sum up thread line counts and return 466 | linecount = 0; 467 | for (int i = 0; i < m_actual_thread_count; i++) 468 | { 469 | linecount += m_threadlinecounts[i]; 470 | } 471 | 472 | return true; 473 | } 474 | 475 | // Static helpers 476 | tlc_linecount_t CLineCount::LineCount(tlc_filehandle_t fhandle, tlc_error_t * error, tlc_string_t *errorstring) 477 | { 478 | CLineCount lc; 479 | if (!lc.open(fhandle)) 480 | { 481 | if (error) 482 | { 483 | *error = lc.lastError(); 484 | } 485 | if (errorstring) 486 | { 487 | *errorstring = lc.lastErrorString(); 488 | } 489 | 490 | return -1; 491 | } 492 | 493 | tlc_linecount_t count; 494 | if (!lc.countLines(count)) 495 | { 496 | if (error) 497 | { 498 | *error = lc.lastError(); 499 | } 500 | if (errorstring) 501 | { 502 | *errorstring = lc.lastErrorString(); 503 | } 504 | return -1; 505 | } 506 | 507 | *error = 0; 508 | *errorstring = _T(""); 509 | 510 | return count; 511 | } 512 | 513 | tlc_linecount_t CLineCount::LineCount(const TCHAR *filename, tlc_error_t * error, tlc_string_t *errorstring) 514 | { 515 | CLineCount lc; 516 | if (!lc.open(filename)) 517 | { 518 | if (error) 519 | { 520 | *error = lc.lastError(); 521 | } 522 | if (errorstring) 523 | { 524 | *errorstring = lc.lastErrorString(); 525 | } 526 | 527 | return -1; 528 | } 529 | 530 | tlc_linecount_t count; 531 | if (!lc.countLines(count)) 532 | { 533 | if (error) 534 | { 535 | *error = lc.lastError(); 536 | } 537 | if (errorstring) 538 | { 539 | *errorstring = lc.lastErrorString(); 540 | } 541 | return -1; 542 | } 543 | 544 | *error = 0; 545 | *errorstring = _T(""); 546 | 547 | return count; 548 | } 549 | 550 | END_TURBOLINECOUNT_NAMESPACE; 551 | 552 | 553 | ///////////////////////////// C Linkage 554 | 555 | #ifndef _NO_TURBOLINECOUNT_C 556 | 557 | #ifdef _WIN32 558 | long long turbo_linecount_handle(HANDLE fhandle, tlc_error_t * error, TCHAR ** errorstring) 559 | #else 560 | long long turbo_linecount_handle(int fhandle, tlc_error_t * error, char ** errorstring) 561 | #endif 562 | { 563 | TURBOLINECOUNT::tlc_string_t errstr; 564 | 565 | long long linecount = TURBOLINECOUNT::CLineCount::LineCount(fhandle, error, &errstr); 566 | 567 | if (errorstring) 568 | { 569 | *errorstring = _tcsdup(errstr.c_str()); 570 | } 571 | 572 | return linecount; 573 | } 574 | 575 | #ifdef _WIN32 576 | long long turbo_linecount_file(const TCHAR *filename, tlc_error_t * error, TCHAR ** errorstring) 577 | #elif defined(TLC_COMPATIBLE_UNIX) 578 | long long turbo_linecount_file(const char *filename, tlc_error_t * error, char ** errorstring) 579 | #endif 580 | { 581 | TURBOLINECOUNT::tlc_string_t errstr; 582 | 583 | long long linecount = TURBOLINECOUNT::CLineCount::LineCount(filename, error, &errstr); 584 | 585 | if (errorstring) 586 | { 587 | *errorstring = _tcsdup(errstr.c_str()); 588 | } 589 | 590 | return linecount; 591 | } 592 | 593 | #endif 594 | -------------------------------------------------------------------------------- /src/turbo_linecount.h: -------------------------------------------------------------------------------- 1 | // 2 | // Turbo Linecount 3 | // Copyright 2015, Christien Rioux 4 | // 5 | // MIT Licensed, see file 'LICENSE' for details 6 | // 7 | /////////////////////////////////////////////// 8 | 9 | #ifndef __INC_TURBO_LINECOUNT_H 10 | #define __INC_TURBO_LINECOUNT_H 11 | 12 | #define TURBOLINECOUNT_VERSION_MAJOR 1 13 | #define TURBOLINECOUNT_VERSION_MINOR 0 14 | 15 | #ifdef __cplusplus 16 | 17 | ///////////////////////////////////////////// Headers 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #if defined(__APPLE__) || defined(__linux__) || defined(__CYGWIN__) 24 | #define TLC_COMPATIBLE_UNIX 1 25 | #endif 26 | 27 | #ifdef _WIN32 // Windows 28 | 29 | #include 30 | #include 31 | 32 | typedef errno_t tlc_error_t; 33 | 34 | #elif defined(TLC_COMPATIBLE_UNIX) 35 | 36 | #include 37 | #include 38 | #define _T(x) x 39 | 40 | #ifdef _ERRNO_T 41 | typedef errno_t tlc_error_t; 42 | #elif defined(__error_t_defined) 43 | typedef error_t tlc_error_t; 44 | #else 45 | typedef int tlc_error_t; 46 | #endif 47 | 48 | #else 49 | #error Unsupported operating system. 50 | #endif 51 | 52 | ///////////////////////////////////////////// Line Count Class 53 | 54 | #define BEGIN_TURBOLINECOUNT_NAMESPACE namespace TURBOLINECOUNT { 55 | #define END_TURBOLINECOUNT_NAMESPACE } 56 | 57 | BEGIN_TURBOLINECOUNT_NAMESPACE; 58 | 59 | ////////////// Platform specific 60 | #ifdef _WIN32 // Windows 61 | 62 | #ifdef _UNICODE 63 | typedef std::wstring tlc_string_t; 64 | #else 65 | typedef std::string tlc_string_t; 66 | #endif 67 | 68 | typedef HANDLE tlc_filehandle_t; 69 | typedef long long int tlc_fileoffset_t; 70 | typedef tlc_fileoffset_t tlc_linecount_t; 71 | #define TLC_LINECOUNT_FMT "%I64d" 72 | 73 | #elif defined(TLC_COMPATIBLE_UNIX) // Unix 74 | typedef char TCHAR; 75 | 76 | typedef std::string tlc_string_t; 77 | typedef int tlc_filehandle_t; 78 | 79 | #if (defined (__APPLE__) && defined (__MACH__)) 80 | typedef off_t tlc_fileoffset_t; 81 | #define TLC_LINECOUNT_FMT "%lld" 82 | #elif defined(_LARGEFILE64_SOURCE) 83 | #if defined(__CYGWIN__) 84 | typedef _off64_t tlc_fileoffset_t; 85 | #else 86 | typedef off64_t tlc_fileoffset_t; 87 | #endif 88 | #ifdef __LP64__ 89 | #define TLC_LINECOUNT_FMT "%ld" 90 | #else 91 | #define TLC_LINECOUNT_FMT "%lld" 92 | #endif 93 | #else 94 | typedef off_t tlc_fileoffset_t; 95 | #define TLC_LINECOUNT_FMT "%d" 96 | #endif 97 | 98 | typedef tlc_fileoffset_t tlc_linecount_t; 99 | 100 | #endif 101 | 102 | 103 | class CLineCount 104 | { 105 | public: 106 | 107 | struct PARAMETERS 108 | { 109 | size_t buffersize; 110 | int threadcount; 111 | }; 112 | 113 | private: 114 | 115 | bool m_opened; 116 | bool m_auto_close; 117 | tlc_filehandle_t m_fh; 118 | tlc_error_t m_lasterror; 119 | tlc_string_t m_lasterrorstring; 120 | tlc_fileoffset_t m_filesize; 121 | PARAMETERS m_parameters; 122 | int m_actual_thread_count; 123 | #ifdef _WIN32 124 | std::vector m_threads; 125 | HANDLE m_filemapping; 126 | #elif defined(TLC_COMPATIBLE_UNIX) 127 | std::vector m_threads; 128 | #endif 129 | std::vector m_threadlinecounts; 130 | bool m_thread_fail; 131 | 132 | private: 133 | 134 | void setLastError(tlc_error_t error, tlc_string_t lasterrorstring); 135 | void init(); 136 | bool createThread(int thread_number); 137 | #ifdef _WIN32 138 | friend DWORD WINAPI threadProc(LPVOID ctx); 139 | #elif defined(TLC_COMPATIBLE_UNIX) 140 | friend void *threadProc(void *ctx); 141 | #endif 142 | unsigned int countThread(int thread_number); 143 | 144 | public: 145 | 146 | CLineCount(PARAMETERS *parameters=NULL); 147 | ~CLineCount(); 148 | 149 | bool isOpened() const; 150 | tlc_error_t lastError() const; 151 | tlc_string_t lastErrorString() const; 152 | 153 | bool open(tlc_filehandle_t fhandle, bool auto_close = false); 154 | bool open(const TCHAR * filename); 155 | bool close(); 156 | 157 | bool countLines(tlc_linecount_t &linecount); 158 | 159 | public: 160 | 161 | // Static utility functions 162 | static tlc_linecount_t LineCount(tlc_filehandle_t fhandle, tlc_error_t * error = NULL, tlc_string_t * errorstring = NULL); 163 | static tlc_linecount_t LineCount(const TCHAR *filename, tlc_error_t * error = NULL, tlc_string_t * errorstring = NULL); 164 | }; 165 | 166 | END_TURBOLINECOUNT_NAMESPACE; 167 | 168 | #endif 169 | 170 | 171 | // C compatibility functions 172 | #ifndef _NO_TURBO_LINECOUNT_C 173 | 174 | #ifdef __cplusplus 175 | extern "C" 176 | { 177 | #endif 178 | 179 | #ifdef _WIN32 180 | long long int turbo_linecount_handle(HANDLE fhandle, errno_t * error = NULL, TCHAR ** errorstring = NULL); 181 | long long int turbo_linecount_file(const TCHAR *filename, errno_t * error = NULL, TCHAR ** errorstring = NULL); 182 | #elif defined(TLC_COMPATIBLE_UNIX) 183 | long long int turbo_linecount_handle(int fhandle, tlc_error_t * tlc_error = NULL, char ** errorstring = NULL); 184 | long long int turbo_linecount_file(const char *filename, tlc_error_t * error = NULL, char ** errorstring = NULL); 185 | #endif 186 | 187 | 188 | #ifdef __cplusplus 189 | } 190 | #endif 191 | 192 | #endif 193 | 194 | #endif 195 | -------------------------------------------------------------------------------- /tests/compare_testfiles.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ "$1" = "" ]; then 4 | echo "specify path to tlc binary" 5 | exit 1 6 | else 7 | TLC=$1 8 | fi 9 | 10 | tlctest() 11 | { 12 | 13 | OUT=`(time $TLC $1) 2>&1 | grep real | cut -f 2 | cut -c 3-` 14 | echo "tlc: $1 $OUT" 15 | return 0 16 | } 17 | 18 | wctest() 19 | { 20 | OUT=`(time wc -l $1) 2>&1 | grep real | cut -f 2 | cut -c 3-` 21 | echo "wc: $1 $OUT" 22 | return 0 23 | } 24 | 25 | pythontest() 26 | { 27 | OUT=`(time echo "print str(sum(1 for line in open('$1'))) + ' $1'" | python) 2>&1 | grep real | cut -f 2 | cut -c 3-` 28 | echo "python: $1 $OUT" 29 | return 0 30 | } 31 | 32 | echo Timing for 'tlc' 33 | tlctest test_10MB.txt 34 | tlctest test_100MB.txt 35 | tlctest test_1GB.txt 36 | tlctest test_10GB.txt 37 | 38 | echo Timing for 'python' 39 | pythontest test_10MB.txt 40 | pythontest test_100MB.txt 41 | pythontest test_1GB.txt 42 | pythontest test_10GB.txt 43 | 44 | echo Timing for 'wc' 45 | wctest test_10MB.txt 46 | wctest test_100MB.txt 47 | wctest test_1GB.txt 48 | wctest test_10GB.txt 49 | -------------------------------------------------------------------------------- /tests/create_testfiles.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dd if=/dev/urandom of=test_10MB.txt bs=1000000 count=10 4 | dd if=/dev/urandom of=test_100MB.txt bs=1000000 count=100 5 | dd if=/dev/urandom of=test_1GB.txt bs=1000000 count=1000 6 | cat test_1GB.txt test_1GB.txt test_1GB.txt test_1GB.txt test_1GB.txt test_1GB.txt test_1GB.txt test_1GB.txt test_1GB.txt test_1GB.txt > test_10GB.txt 7 | --------------------------------------------------------------------------------