├── .editorconfig ├── .gitignore ├── data ├── cube.obj ├── sponza.obj ├── suzanne.obj ├── teapot.obj └── triangle.obj ├── projects ├── VisualStudio │ ├── TrimeshTracer.sln │ ├── TrimeshTracer.vcxproj │ ├── TrimeshTracer.vcxproj.filters │ └── TrimeshTracer.vcxproj.user └── Xcode │ └── TrimeshTracer.xcodeproj │ ├── project.pbxproj │ └── xcshareddata │ └── xcschemes │ └── TrimeshTracer.xcscheme ├── readme.md ├── result1Triangle.png ├── result2Cube.png ├── result3Suzanne.png ├── result4Teapot.png ├── result5Sponza.png ├── runCases.cmd ├── runCases.sh └── source ├── external ├── enkits │ ├── Atomics.h │ ├── LockLessMultiReadPipe.h │ ├── TaskScheduler.cpp │ ├── TaskScheduler.h │ ├── TaskScheduler_c.cpp │ ├── TaskScheduler_c.h │ └── Threads.h ├── nanort.h ├── objparser-license.md ├── objparser.cpp ├── objparser.h ├── sokol_time.h └── stb_image_write.h ├── main.cpp ├── maths.cpp ├── maths.h ├── scene.cpp └── scene.h /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | 9 | [*.{c,h,cpp,hpp,m,mm,cc,cs,hlsl,metal,ispc}] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [{Makefile,makefile}] 14 | indent_style = tab 15 | 16 | [*.{md,markdown}] 17 | trim_trailing_whitespace = false 18 | 19 | [*.{vcxproj,vcxproj.filters,csproj,props,targets}] 20 | indent_style = space 21 | indent_size = 2 22 | end_of_line = crlf 23 | charset = utf-8-bom 24 | trim_trailing_whitespace = true 25 | insert_final_newline = false 26 | [*.{sln,sln.template}] 27 | indent_style = tab 28 | indent_size = 4 29 | end_of_line = crlf 30 | trim_trailing_whitespace = true 31 | insert_final_newline = false 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | output*.png 3 | .vs 4 | .idea 5 | *.exe 6 | *.pdb 7 | project.xcworkspace 8 | xcschememanagement.plist 9 | xcuserdata 10 | -------------------------------------------------------------------------------- /data/cube.obj: -------------------------------------------------------------------------------- 1 | # Unit-volume cube with the same texture coordinates on each face. 2 | # 3 | # Created by Morgan McGuire and released into the Public Domain on 4 | # July 16, 2011. 5 | # 6 | # http://graphics.cs.williams.edu/data 7 | 8 | mtllib default.mtl 9 | 10 | v -0.5 0.5 -0.5 11 | v -0.5 0.5 0.5 12 | v 0.5 0.5 0.5 13 | v 0.5 0.5 -0.5 14 | v -0.5 -0.5 -0.5 15 | v -0.5 -0.5 0.5 16 | v 0.5 -0.5 0.5 17 | v 0.5 -0.5 -0.5 18 | 19 | vt 0 1 20 | vt 0 0 21 | vt 1 0 22 | vt 1 1 23 | 24 | vn 0 1 0 25 | vn -1 0 0 26 | vn 1 0 0 27 | vn 0 0 -1 28 | vn 0 0 1 29 | vn 0 -1 0 30 | 31 | g cube 32 | usemtl default 33 | 34 | f -8/-4/-6 -7/-3/-6 -6/-2/-6 35 | f -8/-4/-6 -6/-2/-6 -5/-1/-6 36 | f -8/-4/-5 -4/-3/-5 -3/-2/-5 37 | f -8/-4/-5 -3/-2/-5 -7/-1/-5 38 | f -6/-4/-4 -2/-3/-4 -1/-2/-4 39 | f -6/-4/-4 -1/-2/-4 -5/-1/-4 40 | f -5/-4/-3 -1/-3/-3 -4/-2/-3 41 | f -5/-4/-3 -4/-2/-3 -8/-1/-3 42 | f -7/-4/-2 -3/-3/-2 -2/-2/-2 43 | f -7/-4/-2 -2/-2/-2 -6/-1/-2 44 | f -3/-4/-1 -4/-3/-1 -1/-2/-1 45 | f -3/-4/-1 -1/-2/-1 -2/-1/-1 46 | -------------------------------------------------------------------------------- /data/triangle.obj: -------------------------------------------------------------------------------- 1 | v -0.5 0 -0.5 2 | v 0.5 0 0.5 3 | v 0 1 0 4 | 5 | vn 0 1 0 6 | 7 | f 1//1 2//1 3//1 8 | -------------------------------------------------------------------------------- /projects/VisualStudio/TrimeshTracer.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28307.168 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TrimeshTracer", "TrimeshTracer.vcxproj", "{6121F087-18BF-4606-8D98-86026EE99198}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {6121F087-18BF-4606-8D98-86026EE99198}.Debug|x64.ActiveCfg = Debug|x64 15 | {6121F087-18BF-4606-8D98-86026EE99198}.Debug|x64.Build.0 = Debug|x64 16 | {6121F087-18BF-4606-8D98-86026EE99198}.Release|x64.ActiveCfg = Release|x64 17 | {6121F087-18BF-4606-8D98-86026EE99198}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {8343A163-F549-4D47-9DE2-49DAEBEAACEC} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /projects/VisualStudio/TrimeshTracer.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 15.0 35 | {6121F087-18BF-4606-8D98-86026EE99198} 36 | Win32Proj 37 | TrimeshTracer 38 | 8.1 39 | 40 | 41 | 42 | Application 43 | true 44 | v141 45 | Unicode 46 | 47 | 48 | Application 49 | false 50 | true 51 | Unicode 52 | v141 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | true 68 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\ 69 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\ 70 | 71 | 72 | false 73 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\ 74 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\ 75 | 76 | 77 | 78 | NotUsing 79 | Level4 80 | Disabled 81 | _DEBUG;_CONSOLE;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) 82 | true 83 | 84 | 85 | false 86 | MultiThreadedDebug 87 | false 88 | false 89 | Fast 90 | 91 | 92 | Console 93 | true 94 | 95 | 96 | 97 | 98 | NotUsing 99 | Level4 100 | MaxSpeed 101 | true 102 | true 103 | NDEBUG;_CONSOLE;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) 104 | true 105 | 106 | 107 | MultiThreaded 108 | false 109 | false 110 | Fast 111 | 112 | 113 | Console 114 | true 115 | true 116 | true 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /projects/VisualStudio/TrimeshTracer.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | external 9 | 10 | 11 | external\enkiTS 12 | 13 | 14 | external\enkiTS 15 | 16 | 17 | 18 | 19 | {d9924442-c84f-46f0-ba57-70ac8a1e6fc9} 20 | 21 | 22 | {d7a73072-b709-4163-8dcc-3f20912ce4f0} 23 | 24 | 25 | 26 | 27 | 28 | 29 | external 30 | 31 | 32 | external 33 | 34 | 35 | external 36 | 37 | 38 | external\enkiTS 39 | 40 | 41 | external\enkiTS 42 | 43 | 44 | external\enkiTS 45 | 46 | 47 | external\enkiTS 48 | 49 | 50 | external\enkiTS 51 | 52 | 53 | -------------------------------------------------------------------------------- /projects/VisualStudio/TrimeshTracer.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(ProjectDir)..\.. 5 | WindowsLocalDebugger 6 | 640 360 4 data/cube.obj 7 | 8 | 9 | $(ProjectDir)..\.. 10 | WindowsLocalDebugger 11 | 640 360 4 data/cube.obj 12 | 13 | -------------------------------------------------------------------------------- /projects/Xcode/TrimeshTracer.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 50; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 2BBF7DCA22492D4500FCABB2 /* scene.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DC522492D4500FCABB2 /* scene.cpp */; }; 11 | 2BBF7DCB22492D4500FCABB2 /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DC822492D4500FCABB2 /* main.cpp */; }; 12 | 2BBF7DCC22492D4500FCABB2 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DC922492D4500FCABB2 /* maths.cpp */; }; 13 | 2BBF7DE5224A6E8D00FCABB2 /* objparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DE2224A6E8D00FCABB2 /* objparser.cpp */; }; 14 | 2BBF7DEF224BB32000FCABB2 /* TaskScheduler_c.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DE8224BB32000FCABB2 /* TaskScheduler_c.cpp */; }; 15 | 2BBF7DF0224BB32000FCABB2 /* TaskScheduler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DEE224BB32000FCABB2 /* TaskScheduler.cpp */; }; 16 | /* End PBXBuildFile section */ 17 | 18 | /* Begin PBXCopyFilesBuildPhase section */ 19 | 2BBF7DB022492CB600FCABB2 /* CopyFiles */ = { 20 | isa = PBXCopyFilesBuildPhase; 21 | buildActionMask = 2147483647; 22 | dstPath = /usr/share/man/man1/; 23 | dstSubfolderSpec = 0; 24 | files = ( 25 | ); 26 | runOnlyForDeploymentPostprocessing = 1; 27 | }; 28 | /* End PBXCopyFilesBuildPhase section */ 29 | 30 | /* Begin PBXFileReference section */ 31 | 2BBF7DB222492CB600FCABB2 /* TrimeshTracer */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = TrimeshTracer; sourceTree = BUILT_PRODUCTS_DIR; }; 32 | 2BBF7DC522492D4500FCABB2 /* scene.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = scene.cpp; sourceTree = ""; }; 33 | 2BBF7DC622492D4500FCABB2 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = ""; }; 34 | 2BBF7DC722492D4500FCABB2 /* scene.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = scene.h; sourceTree = ""; }; 35 | 2BBF7DC822492D4500FCABB2 /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = main.cpp; sourceTree = ""; }; 36 | 2BBF7DC922492D4500FCABB2 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = ""; }; 37 | 2BBF7DE1224A6E8D00FCABB2 /* objparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = objparser.h; sourceTree = ""; }; 38 | 2BBF7DE2224A6E8D00FCABB2 /* objparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = objparser.cpp; sourceTree = ""; }; 39 | 2BBF7DE3224A6E8D00FCABB2 /* sokol_time.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sokol_time.h; sourceTree = ""; }; 40 | 2BBF7DE4224A6E8D00FCABB2 /* stb_image_write.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stb_image_write.h; sourceTree = ""; }; 41 | 2BBF7DE8224BB32000FCABB2 /* TaskScheduler_c.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TaskScheduler_c.cpp; sourceTree = ""; }; 42 | 2BBF7DE9224BB32000FCABB2 /* LockLessMultiReadPipe.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LockLessMultiReadPipe.h; sourceTree = ""; }; 43 | 2BBF7DEA224BB32000FCABB2 /* TaskScheduler_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TaskScheduler_c.h; sourceTree = ""; }; 44 | 2BBF7DEB224BB32000FCABB2 /* TaskScheduler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TaskScheduler.h; sourceTree = ""; }; 45 | 2BBF7DEC224BB32000FCABB2 /* Threads.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Threads.h; sourceTree = ""; }; 46 | 2BBF7DED224BB32000FCABB2 /* Atomics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Atomics.h; sourceTree = ""; }; 47 | 2BBF7DEE224BB32000FCABB2 /* TaskScheduler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TaskScheduler.cpp; sourceTree = ""; }; 48 | 2BBF7DF3224CA0FE00FCABB2 /* nanort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nanort.h; sourceTree = ""; }; 49 | /* End PBXFileReference section */ 50 | 51 | /* Begin PBXFrameworksBuildPhase section */ 52 | 2BBF7DAF22492CB600FCABB2 /* Frameworks */ = { 53 | isa = PBXFrameworksBuildPhase; 54 | buildActionMask = 2147483647; 55 | files = ( 56 | ); 57 | runOnlyForDeploymentPostprocessing = 0; 58 | }; 59 | /* End PBXFrameworksBuildPhase section */ 60 | 61 | /* Begin PBXGroup section */ 62 | 2BBF7DA922492CB600FCABB2 = { 63 | isa = PBXGroup; 64 | children = ( 65 | 2BBF7DB422492CB600FCABB2 /* TrimeshTracer */, 66 | 2BBF7DB322492CB600FCABB2 /* Products */, 67 | ); 68 | sourceTree = ""; 69 | }; 70 | 2BBF7DB322492CB600FCABB2 /* Products */ = { 71 | isa = PBXGroup; 72 | children = ( 73 | 2BBF7DB222492CB600FCABB2 /* TrimeshTracer */, 74 | ); 75 | name = Products; 76 | sourceTree = ""; 77 | }; 78 | 2BBF7DB422492CB600FCABB2 /* TrimeshTracer */ = { 79 | isa = PBXGroup; 80 | children = ( 81 | 2BBF7DCD22492D5700FCABB2 /* external */, 82 | 2BBF7DC822492D4500FCABB2 /* main.cpp */, 83 | 2BBF7DC922492D4500FCABB2 /* maths.cpp */, 84 | 2BBF7DC622492D4500FCABB2 /* maths.h */, 85 | 2BBF7DC522492D4500FCABB2 /* scene.cpp */, 86 | 2BBF7DC722492D4500FCABB2 /* scene.h */, 87 | ); 88 | name = TrimeshTracer; 89 | path = ../../source; 90 | sourceTree = SOURCE_ROOT; 91 | }; 92 | 2BBF7DCD22492D5700FCABB2 /* external */ = { 93 | isa = PBXGroup; 94 | children = ( 95 | 2BBF7DF3224CA0FE00FCABB2 /* nanort.h */, 96 | 2BBF7DE7224BB32000FCABB2 /* enkits */, 97 | 2BBF7DE2224A6E8D00FCABB2 /* objparser.cpp */, 98 | 2BBF7DE1224A6E8D00FCABB2 /* objparser.h */, 99 | 2BBF7DE3224A6E8D00FCABB2 /* sokol_time.h */, 100 | 2BBF7DE4224A6E8D00FCABB2 /* stb_image_write.h */, 101 | ); 102 | path = external; 103 | sourceTree = ""; 104 | }; 105 | 2BBF7DE7224BB32000FCABB2 /* enkits */ = { 106 | isa = PBXGroup; 107 | children = ( 108 | 2BBF7DE8224BB32000FCABB2 /* TaskScheduler_c.cpp */, 109 | 2BBF7DE9224BB32000FCABB2 /* LockLessMultiReadPipe.h */, 110 | 2BBF7DEA224BB32000FCABB2 /* TaskScheduler_c.h */, 111 | 2BBF7DEB224BB32000FCABB2 /* TaskScheduler.h */, 112 | 2BBF7DEC224BB32000FCABB2 /* Threads.h */, 113 | 2BBF7DED224BB32000FCABB2 /* Atomics.h */, 114 | 2BBF7DEE224BB32000FCABB2 /* TaskScheduler.cpp */, 115 | ); 116 | path = enkits; 117 | sourceTree = ""; 118 | }; 119 | /* End PBXGroup section */ 120 | 121 | /* Begin PBXNativeTarget section */ 122 | 2BBF7DB122492CB600FCABB2 /* TrimeshTracer */ = { 123 | isa = PBXNativeTarget; 124 | buildConfigurationList = 2BBF7DB922492CB600FCABB2 /* Build configuration list for PBXNativeTarget "TrimeshTracer" */; 125 | buildPhases = ( 126 | 2BBF7DAE22492CB600FCABB2 /* Sources */, 127 | 2BBF7DAF22492CB600FCABB2 /* Frameworks */, 128 | 2BBF7DB022492CB600FCABB2 /* CopyFiles */, 129 | ); 130 | buildRules = ( 131 | ); 132 | dependencies = ( 133 | ); 134 | name = TrimeshTracer; 135 | productName = TrimeshTracer; 136 | productReference = 2BBF7DB222492CB600FCABB2 /* TrimeshTracer */; 137 | productType = "com.apple.product-type.tool"; 138 | }; 139 | /* End PBXNativeTarget section */ 140 | 141 | /* Begin PBXProject section */ 142 | 2BBF7DAA22492CB600FCABB2 /* Project object */ = { 143 | isa = PBXProject; 144 | attributes = { 145 | LastUpgradeCheck = 1010; 146 | ORGANIZATIONNAME = "Unity Technologies"; 147 | TargetAttributes = { 148 | 2BBF7DB122492CB600FCABB2 = { 149 | CreatedOnToolsVersion = 10.1; 150 | }; 151 | }; 152 | }; 153 | buildConfigurationList = 2BBF7DAD22492CB600FCABB2 /* Build configuration list for PBXProject "TrimeshTracer" */; 154 | compatibilityVersion = "Xcode 9.3"; 155 | developmentRegion = en; 156 | hasScannedForEncodings = 0; 157 | knownRegions = ( 158 | en, 159 | ); 160 | mainGroup = 2BBF7DA922492CB600FCABB2; 161 | productRefGroup = 2BBF7DB322492CB600FCABB2 /* Products */; 162 | projectDirPath = ""; 163 | projectRoot = ""; 164 | targets = ( 165 | 2BBF7DB122492CB600FCABB2 /* TrimeshTracer */, 166 | ); 167 | }; 168 | /* End PBXProject section */ 169 | 170 | /* Begin PBXSourcesBuildPhase section */ 171 | 2BBF7DAE22492CB600FCABB2 /* Sources */ = { 172 | isa = PBXSourcesBuildPhase; 173 | buildActionMask = 2147483647; 174 | files = ( 175 | 2BBF7DE5224A6E8D00FCABB2 /* objparser.cpp in Sources */, 176 | 2BBF7DEF224BB32000FCABB2 /* TaskScheduler_c.cpp in Sources */, 177 | 2BBF7DF0224BB32000FCABB2 /* TaskScheduler.cpp in Sources */, 178 | 2BBF7DCC22492D4500FCABB2 /* maths.cpp in Sources */, 179 | 2BBF7DCB22492D4500FCABB2 /* main.cpp in Sources */, 180 | 2BBF7DCA22492D4500FCABB2 /* scene.cpp in Sources */, 181 | ); 182 | runOnlyForDeploymentPostprocessing = 0; 183 | }; 184 | /* End PBXSourcesBuildPhase section */ 185 | 186 | /* Begin XCBuildConfiguration section */ 187 | 2BBF7DB722492CB600FCABB2 /* Debug */ = { 188 | isa = XCBuildConfiguration; 189 | buildSettings = { 190 | ALWAYS_SEARCH_USER_PATHS = NO; 191 | CLANG_ANALYZER_NONNULL = YES; 192 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 193 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 194 | CLANG_CXX_LIBRARY = "libc++"; 195 | CLANG_ENABLE_MODULES = YES; 196 | CLANG_ENABLE_OBJC_ARC = YES; 197 | CLANG_ENABLE_OBJC_WEAK = YES; 198 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 199 | CLANG_WARN_BOOL_CONVERSION = YES; 200 | CLANG_WARN_COMMA = YES; 201 | CLANG_WARN_CONSTANT_CONVERSION = YES; 202 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 203 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 204 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 205 | CLANG_WARN_EMPTY_BODY = YES; 206 | CLANG_WARN_ENUM_CONVERSION = YES; 207 | CLANG_WARN_INFINITE_RECURSION = YES; 208 | CLANG_WARN_INT_CONVERSION = YES; 209 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 210 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 211 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 212 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 213 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 214 | CLANG_WARN_STRICT_PROTOTYPES = YES; 215 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 216 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 217 | CLANG_WARN_UNREACHABLE_CODE = YES; 218 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 219 | CODE_SIGN_IDENTITY = "-"; 220 | COPY_PHASE_STRIP = NO; 221 | DEBUG_INFORMATION_FORMAT = dwarf; 222 | ENABLE_STRICT_OBJC_MSGSEND = YES; 223 | ENABLE_TESTABILITY = YES; 224 | GCC_C_LANGUAGE_STANDARD = gnu11; 225 | GCC_DYNAMIC_NO_PIC = NO; 226 | GCC_NO_COMMON_BLOCKS = YES; 227 | GCC_OPTIMIZATION_LEVEL = 0; 228 | GCC_PREPROCESSOR_DEFINITIONS = ( 229 | "DEBUG=1", 230 | "$(inherited)", 231 | ); 232 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 233 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 234 | GCC_WARN_UNDECLARED_SELECTOR = YES; 235 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 236 | GCC_WARN_UNUSED_FUNCTION = YES; 237 | GCC_WARN_UNUSED_VARIABLE = YES; 238 | MACOSX_DEPLOYMENT_TARGET = 10.12; 239 | MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; 240 | MTL_FAST_MATH = YES; 241 | ONLY_ACTIVE_ARCH = YES; 242 | SDKROOT = macosx; 243 | }; 244 | name = Debug; 245 | }; 246 | 2BBF7DB822492CB600FCABB2 /* Release */ = { 247 | isa = XCBuildConfiguration; 248 | buildSettings = { 249 | ALWAYS_SEARCH_USER_PATHS = NO; 250 | CLANG_ANALYZER_NONNULL = YES; 251 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 252 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 253 | CLANG_CXX_LIBRARY = "libc++"; 254 | CLANG_ENABLE_MODULES = YES; 255 | CLANG_ENABLE_OBJC_ARC = YES; 256 | CLANG_ENABLE_OBJC_WEAK = YES; 257 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 258 | CLANG_WARN_BOOL_CONVERSION = YES; 259 | CLANG_WARN_COMMA = YES; 260 | CLANG_WARN_CONSTANT_CONVERSION = YES; 261 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 262 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 263 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 264 | CLANG_WARN_EMPTY_BODY = YES; 265 | CLANG_WARN_ENUM_CONVERSION = YES; 266 | CLANG_WARN_INFINITE_RECURSION = YES; 267 | CLANG_WARN_INT_CONVERSION = YES; 268 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 269 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 270 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 271 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 272 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 273 | CLANG_WARN_STRICT_PROTOTYPES = YES; 274 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 275 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 276 | CLANG_WARN_UNREACHABLE_CODE = YES; 277 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 278 | CODE_SIGN_IDENTITY = "-"; 279 | COPY_PHASE_STRIP = NO; 280 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 281 | ENABLE_NS_ASSERTIONS = NO; 282 | ENABLE_STRICT_OBJC_MSGSEND = YES; 283 | GCC_C_LANGUAGE_STANDARD = gnu11; 284 | GCC_NO_COMMON_BLOCKS = YES; 285 | GCC_PREPROCESSOR_DEFINITIONS = "NDEBUG=1"; 286 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 287 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 288 | GCC_WARN_UNDECLARED_SELECTOR = YES; 289 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 290 | GCC_WARN_UNUSED_FUNCTION = YES; 291 | GCC_WARN_UNUSED_VARIABLE = YES; 292 | MACOSX_DEPLOYMENT_TARGET = 10.12; 293 | MTL_ENABLE_DEBUG_INFO = NO; 294 | MTL_FAST_MATH = YES; 295 | SDKROOT = macosx; 296 | }; 297 | name = Release; 298 | }; 299 | 2BBF7DBA22492CB600FCABB2 /* Debug */ = { 300 | isa = XCBuildConfiguration; 301 | buildSettings = { 302 | CODE_SIGN_STYLE = Automatic; 303 | PRODUCT_NAME = "$(TARGET_NAME)"; 304 | }; 305 | name = Debug; 306 | }; 307 | 2BBF7DBB22492CB600FCABB2 /* Release */ = { 308 | isa = XCBuildConfiguration; 309 | buildSettings = { 310 | CODE_SIGN_STYLE = Automatic; 311 | PRODUCT_NAME = "$(TARGET_NAME)"; 312 | }; 313 | name = Release; 314 | }; 315 | /* End XCBuildConfiguration section */ 316 | 317 | /* Begin XCConfigurationList section */ 318 | 2BBF7DAD22492CB600FCABB2 /* Build configuration list for PBXProject "TrimeshTracer" */ = { 319 | isa = XCConfigurationList; 320 | buildConfigurations = ( 321 | 2BBF7DB722492CB600FCABB2 /* Debug */, 322 | 2BBF7DB822492CB600FCABB2 /* Release */, 323 | ); 324 | defaultConfigurationIsVisible = 0; 325 | defaultConfigurationName = Release; 326 | }; 327 | 2BBF7DB922492CB600FCABB2 /* Build configuration list for PBXNativeTarget "TrimeshTracer" */ = { 328 | isa = XCConfigurationList; 329 | buildConfigurations = ( 330 | 2BBF7DBA22492CB600FCABB2 /* Debug */, 331 | 2BBF7DBB22492CB600FCABB2 /* Release */, 332 | ); 333 | defaultConfigurationIsVisible = 0; 334 | defaultConfigurationName = Release; 335 | }; 336 | /* End XCConfigurationList section */ 337 | }; 338 | rootObject = 2BBF7DAA22492CB600FCABB2 /* Project object */; 339 | } 340 | -------------------------------------------------------------------------------- /projects/Xcode/TrimeshTracer.xcodeproj/xcshareddata/xcschemes/TrimeshTracer.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 39 | 40 | 41 | 42 | 43 | 44 | 55 | 57 | 63 | 64 | 65 | 66 | 69 | 70 | 73 | 74 | 77 | 78 | 81 | 82 | 85 | 86 | 87 | 88 | 89 | 90 | 96 | 98 | 104 | 105 | 106 | 107 | 109 | 110 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Toy Mesh Path Tracer for a job interview task 2 | 3 | I used the task below for some job interviews I did in 2019 Q2. The initial version was a super simple brute-force 4 | triangle mesh path tracer (no acceleration structures, no threading, etc.), and the task was to make it faster. 5 | Of course I wanted to know how much faster it *can* get, so I did some simple speedups myself in parallel. 6 | 7 | The original non-optimized-at-all version is at 8 | [`01-initial-job-task`](https://github.com/aras-p/ToyMeshPathTracer/tree/01-initial-job-task) tag. 9 | I made other tags for later snapshots; here they are with performance numbers *(measured on 2018 MacBookPro i9)*, 10 | on the `teapot.obj` (15706 triangles) and `sponza.obj` (66452 triangles) scenes respectively: 11 | 12 | * `01-initial-job-task`, initial: 3.5 and LOLNOPE Krays/s 13 | * `02-multi-threaded`, multi-threading: 18.9 and LOLNOPE Krays/s 14 | * `03-bvh`, simple bounding volume hierarchy: 6978.2 and 1350.3 Krays/s 15 | * `04-simd`, simplistic naïve SIMD: 8919.0 and 1853.1 Krays/s 16 | * `05-change-ray-tri-algo`, better ray-triangle intersection test: 10251.1 and 1952.5 Krays/s 17 | * `06-shadow-rays`, faster code path for shadow rays: 10888.0 and 2568.3 Krays/s 18 | * `07-compare-with-embree`, compare with [Intel Embree](https://embree.github.io/) 3.5.2: 75965.0 and 40800.8 Krays/s *(yup, Embree is fast!)* 19 | * `08-compare-with-nanort`, compare with [NanoRT](https://github.com/lighttransport/nanort): 20638.2 and 4197.9 Krays/s 20 | 21 | And yeah, I know -- most obvious next steps would be to use better BVH building heuristics (like SAH), and 22 | doing SIMD properly instead of "let's make our vector/ray/color class use SIMD". I did not get to that (yet?), 23 | but in any case -- the code is already over 3000 *times* faster than the initial version. With the BVH being the major 24 | win, as one would expect. 25 | 26 | *Original desceription of the interview task is below:* 27 | 28 | # Interview task/assignment: speed up a simple path tracer 29 | 30 | This project contains a simple triangle mesh path tracer implemented in C++. 31 | It's a command line application that takes screen size & input .OBJ data file parameters, 32 | renders it using "path tracing" algorithm and produces `output.png` result file. 33 | 34 | Here are some images that the program can produce with the data files present under `data/` directory: 35 | 36 | ![result1](/result3Suzanne.png?raw=true "Suzanne") 37 | ![result2](/result5Sponza.png?raw=true "Sponza") 38 | 39 | ## The Task 40 | 41 | Current program is slow. *Really slow*. Rendering that monkey head model ("Suzanne") at a lowly 640x360 resolution, 42 | 4 samples per pixel, takes **one minute** on my PC! Rendering the other image ("Sponza") at the same resolution takes **five hours**. 43 | 44 | Your task then is, of course, to make the program faster :) 45 | 46 | I know for sure that it is possible to make it faster by *more than a hundred times* -- e.g. I got Suzanne from a minute down 47 | to 0.2 seconds, and Sponza from five hours down to 8 seconds. It might be possible to make it even faster, but I 48 | did not quite go there. 49 | 50 | Now, **your task is to make it run as fast as you can**. I'm not asking for a "hundred times", but something like 51 | "**at least ten times faster**" is what you should aim for. 52 | 53 | It's entirely your choice how you will do it. Better algorithms? More efficient math? Better data layout? Multi-threading? SIMD? 54 | Rewrite in assembly? Rewrite as a compute shader / CUDA / OpenCL? Rewrite for NVIDIA RTX? All of these? You pick :) Some of what I listed 55 | here is "certainly overkill" and not needed; achieving a 10x faster performance is entirely possible using relatively simple means. 56 | 57 | Go! 58 | 59 | #### What I will be looking at 60 | 61 | * Overall I would recommend making a clone of this project on github and using "actual version control" workflow to make your changes. 62 | If you don't like git or version control, that's fine; I can accept submissions in zip or dropbox or google drive (or whatever) form. 63 | As long as I can see the code and try it out. 64 | * Your optimized program should produce same looking images as the original one, just *do it faster*. 65 | * I'll be looking at "everything" that is important in programming job: whether the code works correctly, is understandable, 66 | how is it structured, how it behaves performance wise (computing usage, memory usage etc.). 67 | * It is *very* likely that your first submission will not be quite good, so do not delay it until the last day! Usually it 68 | takes 2-4 iterations to arrive at a good solution. 69 | 70 | 71 | 72 | ## About the code 73 | 74 | I made it work on Windows (Visual Studio 2017) and Mac (Xcode 10); the project files for them are respectively in 75 | `projects/VisualStudio/TrimeshTracer.sln` and `projects/Xcode/TrimeshTracer.xcodeproj`. In Visual Studio project, you might need to update settings to whatever Windows SDK version you have, I picked the oldest I had on my machine. If you have any trouble building or running it, 76 | ask me! 77 | 78 | The application accepts four command line arguments as input: ` `: 79 | 80 | * `width` is image width in pixels, 81 | * `height` is image height in pixels, 82 | * `spp` is "samples per pixel"; kind of like "anti-aliasing level", 83 | * `datafile` is path to a mesh to render; in Wavefront .OBJ format. 84 | 85 | I added some example meshes under `data/` folder; initially I suggest starting with e.g. `data/cube.obj` which is just a simple 86 | cube. I do *not* recommend trying to run the non-optimized version of the program on for example the Sponza model - it contains 66k 87 | triangles and will run *very very slow*. 88 | 89 | The code itself is fairly simple C++ code, and I tried to comment it extensively. No previous experience with ray-tracers 90 | or path-tracers should be required. 91 | 92 | If you *do* want to read up on what this "path tracing" thing is *(and maybe even get some ideas how to make it faster? who knows)*, 93 | I can recommend "Ray Tracing in a Weekend", "Ray Tracing: The Next Week" and "Ray Tracing: the Rest of Your Life" minibook series, 94 | which have been recently [made free here](http://www.realtimerendering.com/raytracing/). The internet is full of other information about ray/path tracing as well. 95 | 96 | For reference, here are the performance numbers I get on my laptop (2019 MacBookPro i9 2.9GHz), rendering at 640x360, 4 samples per pixel, on this un-optimized implementation: 97 | 98 | * cube.obj (14 triangles): 3818.3 KRay/s (0.6 sec) 99 | * suzanne.obj (970 triangles): 48.3 KRays/s (53.0 sec) 100 | * teapot.obj (15706 triangles): 3.5 KRays/s (683.0 sec) 101 | * sponza.obj (66452 triangles): LOL nope, ain't nobody got time for that 102 | -------------------------------------------------------------------------------- /result1Triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result1Triangle.png -------------------------------------------------------------------------------- /result2Cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result2Cube.png -------------------------------------------------------------------------------- /result3Suzanne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result3Suzanne.png -------------------------------------------------------------------------------- /result4Teapot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result4Teapot.png -------------------------------------------------------------------------------- /result5Sponza.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result5Sponza.png -------------------------------------------------------------------------------- /runCases.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | rem (build\x64-Release\TrimeshTracer.exe 640 360 4 data/triangle.obj) && (move /Y output.png output1Triangle.png) 3 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/cube.obj) && (move /Y output.png output2Cube.png) 4 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/suzanne.obj) && (move /Y output.png output3Suzanne.png) 5 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/teapot.obj) && (move /Y output.png output4Teapot.png) 6 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/sponza.obj) && (move /Y output.png output5Sponza.png) 7 | pause 8 | -------------------------------------------------------------------------------- /runCases.sh: -------------------------------------------------------------------------------- 1 | #(build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/triangle.obj) && (mv output.png output1Triangle.png) 2 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/cube.obj) && (mv output.png output2Cube.png) 3 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/suzanne.obj) && (mv output.png output3Suzanne.png) 4 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/teapot.obj) && (mv output.png output4Teapot.png) 5 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/sponza.obj) && (mv output.png output5Sponza.png) 6 | -------------------------------------------------------------------------------- /source/external/enkits/Atomics.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Doug Binks 2 | // 3 | // This software is provided 'as-is', without any express or implied 4 | // warranty. In no event will the authors be held liable for any damages 5 | // arising from the use of this software. 6 | // 7 | // Permission is granted to anyone to use this software for any purpose, 8 | // including commercial applications, and to alter it and redistribute it 9 | // freely, subject to the following restrictions: 10 | // 11 | // 1. The origin of this software must not be misrepresented; you must not 12 | // claim that you wrote the original software. If you use this software 13 | // in a product, an acknowledgement in the product documentation would be 14 | // appreciated but is not required. 15 | // 2. Altered source versions must be plainly marked as such, and must not be 16 | // misrepresented as being the original software. 17 | // 3. This notice may not be removed or altered from any source distribution. 18 | 19 | #pragma once 20 | 21 | #include 22 | 23 | #ifdef _WIN32 24 | #define WIN32_LEAN_AND_MEAN 25 | #include 26 | #undef GetObject 27 | #include 28 | 29 | extern "C" void _ReadWriteBarrier(); 30 | #pragma intrinsic(_ReadWriteBarrier) 31 | #pragma intrinsic(_InterlockedCompareExchange) 32 | #pragma intrinsic(_InterlockedExchangeAdd) 33 | 34 | // Memory Barriers to prevent CPU and Compiler re-ordering 35 | #define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier() 36 | #define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier() 37 | #define BASE_ALIGN(x) __declspec( align( x ) ) 38 | 39 | #else 40 | #define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory") 41 | #define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory") 42 | #define BASE_ALIGN(x) __attribute__ ((aligned( x ))) 43 | #endif 44 | 45 | namespace enki 46 | { 47 | // Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; } 48 | // returns old *pDest (so if successfull, returns compareWith) 49 | inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith ) 50 | { 51 | #ifdef _WIN32 52 | // assumes two's complement - unsigned / signed conversion leads to same bit pattern 53 | return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith ); 54 | #else 55 | return __sync_val_compare_and_swap( pDest, compareWith, swapTo ); 56 | #endif 57 | } 58 | 59 | inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith ) 60 | { 61 | #ifdef _WIN32 62 | // assumes two's complement - unsigned / signed conversion leads to same bit pattern 63 | return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith ); 64 | #else 65 | return __sync_val_compare_and_swap( pDest, compareWith, swapTo ); 66 | #endif 67 | } 68 | 69 | // Atomically performs: tmp = *pDest; *pDest += value; return tmp; 70 | inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value ) 71 | { 72 | #ifdef _WIN32 73 | return _InterlockedExchangeAdd( (long*)pDest, value ); 74 | #else 75 | return __sync_fetch_and_add( pDest, value ); 76 | #endif 77 | } 78 | 79 | } -------------------------------------------------------------------------------- /source/external/enkits/LockLessMultiReadPipe.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Doug Binks 2 | // 3 | // This software is provided 'as-is', without any express or implied 4 | // warranty. In no event will the authors be held liable for any damages 5 | // arising from the use of this software. 6 | // 7 | // Permission is granted to anyone to use this software for any purpose, 8 | // including commercial applications, and to alter it and redistribute it 9 | // freely, subject to the following restrictions: 10 | // 11 | // 1. The origin of this software must not be misrepresented; you must not 12 | // claim that you wrote the original software. If you use this software 13 | // in a product, an acknowledgement in the product documentation would be 14 | // appreciated but is not required. 15 | // 2. Altered source versions must be plainly marked as such, and must not be 16 | // misrepresented as being the original software. 17 | // 3. This notice may not be removed or altered from any source distribution. 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | #include "Atomics.h" 25 | #include 26 | 27 | 28 | namespace enki 29 | { 30 | // LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming 31 | // Readers can only read from the back of the pipe 32 | // The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader) 33 | // for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx 34 | // Note: using log2 sizes so we do not need to clamp (multi-operation) 35 | // T is the contained type 36 | // Note this is not true lockless as the use of flags as a form of lock state. 37 | template class LockLessMultiReadPipe 38 | { 39 | public: 40 | LockLessMultiReadPipe(); 41 | ~LockLessMultiReadPipe() {} 42 | 43 | // ReaderTryReadBack returns false if we were unable to read 44 | // This is thread safe for both multiple readers and the writer 45 | bool ReaderTryReadBack( T* pOut ); 46 | 47 | // WriterTryReadFront returns false if we were unable to read 48 | // This is thread safe for the single writer, but should not be called by readers 49 | bool WriterTryReadFront( T* pOut ); 50 | 51 | // WriterTryWriteFront returns false if we were unable to write 52 | // This is thread safe for the single writer, but should not be called by readers 53 | bool WriterTryWriteFront( const T& in ); 54 | 55 | // IsPipeEmpty() is a utility function, not intended for general use 56 | // Should only be used very prudently. 57 | bool IsPipeEmpty() const 58 | { 59 | return 0 == m_WriteIndex - m_ReadCount; 60 | } 61 | 62 | void Clear() 63 | { 64 | m_WriteIndex = 0; 65 | m_ReadIndex = 0; 66 | m_ReadCount = 0; 67 | memset( (void*)m_Flags, 0, sizeof( m_Flags ) ); 68 | } 69 | 70 | private: 71 | const static uint32_t ms_cSize = ( 1 << cSizeLog2 ); 72 | const static uint32_t ms_cIndexMask = ms_cSize - 1; 73 | const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS 74 | const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS 75 | const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS 76 | 77 | T m_Buffer[ ms_cSize ]; 78 | 79 | // read and write indexes allow fast access to the pipe, but actual access 80 | // controlled by the access flags. 81 | volatile uint32_t BASE_ALIGN(4) m_WriteIndex; 82 | volatile uint32_t BASE_ALIGN(4) m_ReadCount; 83 | volatile uint32_t m_Flags[ ms_cSize ]; 84 | volatile uint32_t BASE_ALIGN(4) m_ReadIndex; 85 | }; 86 | 87 | template inline 88 | LockLessMultiReadPipe::LockLessMultiReadPipe() 89 | : m_WriteIndex(0) 90 | , m_ReadIndex(0) 91 | , m_ReadCount(0) 92 | { 93 | assert( cSizeLog2 < 32 ); 94 | memset( (void*)m_Flags, 0, sizeof( m_Flags ) ); 95 | } 96 | 97 | template inline 98 | bool LockLessMultiReadPipe::ReaderTryReadBack( T* pOut ) 99 | { 100 | 101 | uint32_t actualReadIndex; 102 | 103 | uint32_t readCount = m_ReadCount; 104 | 105 | // We get hold of read index for consistency, 106 | // and do first pass starting at read count 107 | uint32_t readIndexToUse = readCount; 108 | 109 | 110 | while(true) 111 | { 112 | 113 | uint32_t writeIndex = m_WriteIndex; 114 | // power of two sizes ensures we can use a simple calc without modulus 115 | uint32_t numInPipe = writeIndex - readCount; 116 | if( 0 == numInPipe ) 117 | { 118 | return false; 119 | } 120 | if( readIndexToUse >= writeIndex ) 121 | { 122 | // move back to start 123 | readIndexToUse = m_ReadIndex; 124 | } 125 | 126 | 127 | // power of two sizes ensures we can perform AND for a modulus 128 | actualReadIndex = readIndexToUse & ms_cIndexMask; 129 | 130 | // Multiple potential readers mean we should check if the data is valid, 131 | // using an atomic compare exchange 132 | uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ ); 133 | if( FLAG_CAN_READ == previous ) 134 | { 135 | break; 136 | } 137 | ++readIndexToUse; 138 | 139 | //update known readcount 140 | readCount = m_ReadCount; 141 | } 142 | 143 | // we update the read index using an atomic add, as we've only read one piece of data. 144 | // this ensure consistency of the read index, and the above loop ensures readers 145 | // only read from unread data 146 | AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 ); 147 | 148 | BASE_MEMORYBARRIER_ACQUIRE(); 149 | // now read data, ensuring we do so after above reads & CAS 150 | *pOut = m_Buffer[ actualReadIndex ]; 151 | 152 | m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE; 153 | 154 | return true; 155 | } 156 | 157 | template inline 158 | bool LockLessMultiReadPipe::WriterTryReadFront( T* pOut ) 159 | { 160 | uint32_t writeIndex = m_WriteIndex; 161 | uint32_t frontReadIndex = writeIndex; 162 | 163 | // Multiple potential readers mean we should check if the data is valid, 164 | // using an atomic compare exchange - which acts as a form of lock (so not quite lockless really). 165 | uint32_t previous = FLAG_INVALID; 166 | uint32_t actualReadIndex = 0; 167 | while( true ) 168 | { 169 | // power of two sizes ensures we can use a simple calc without modulus 170 | uint32_t readCount = m_ReadCount; 171 | uint32_t numInPipe = writeIndex - readCount; 172 | if( 0 == numInPipe || 0 == frontReadIndex ) 173 | { 174 | // frontReadIndex can get to 0 here if that item was just being read by another thread. 175 | m_ReadIndex = readCount; 176 | return false; 177 | } 178 | --frontReadIndex; 179 | actualReadIndex = frontReadIndex & ms_cIndexMask; 180 | previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ ); 181 | if( FLAG_CAN_READ == previous ) 182 | { 183 | break; 184 | } 185 | else if( m_ReadIndex >= frontReadIndex ) 186 | { 187 | return false; 188 | } 189 | } 190 | 191 | // now read data, ensuring we do so after above reads & CAS 192 | *pOut = m_Buffer[ actualReadIndex ]; 193 | 194 | m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE; 195 | 196 | BASE_MEMORYBARRIER_RELEASE(); 197 | 198 | // 32-bit aligned stores are atomic, and writer owns the write index 199 | // we only move one back as this is as many as we have read, not where we have read from. 200 | --m_WriteIndex; 201 | return true; 202 | } 203 | 204 | 205 | template inline 206 | bool LockLessMultiReadPipe::WriterTryWriteFront( const T& in ) 207 | { 208 | // The writer 'owns' the write index, and readers can only reduce 209 | // the amount of data in the pipe. 210 | // We get hold of both values for consistency and to reduce false sharing 211 | // impacting more than one access 212 | uint32_t writeIndex = m_WriteIndex; 213 | 214 | 215 | // power of two sizes ensures we can perform AND for a modulus 216 | uint32_t actualWriteIndex = writeIndex & ms_cIndexMask; 217 | 218 | // a reader may still be reading this item, as there are multiple readers 219 | if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE ) 220 | { 221 | return false; // still being read, so have caught up with tail. 222 | } 223 | 224 | 225 | // as we are the only writer we can update the data without atomics 226 | // whilst the write index has not been updated 227 | m_Buffer[ actualWriteIndex ] = in; 228 | m_Flags[ actualWriteIndex ] = FLAG_CAN_READ; 229 | 230 | // We need to ensure the above writes occur prior to updating the write index, 231 | // otherwise another thread might read before it's finished 232 | BASE_MEMORYBARRIER_RELEASE(); 233 | 234 | // 32-bit aligned stores are atomic, and the writer controls the write index 235 | ++writeIndex; 236 | m_WriteIndex = writeIndex; 237 | return true; 238 | } 239 | 240 | } 241 | -------------------------------------------------------------------------------- /source/external/enkits/TaskScheduler.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Doug Binks 2 | // 3 | // This software is provided 'as-is', without any express or implied 4 | // warranty. In no event will the authors be held liable for any damages 5 | // arising from the use of this software. 6 | // 7 | // Permission is granted to anyone to use this software for any purpose, 8 | // including commercial applications, and to alter it and redistribute it 9 | // freely, subject to the following restrictions: 10 | // 11 | // 1. The origin of this software must not be misrepresented; you must not 12 | // claim that you wrote the original software. If you use this software 13 | // in a product, an acknowledgement in the product documentation would be 14 | // appreciated but is not required. 15 | // 2. Altered source versions must be plainly marked as such, and must not be 16 | // misrepresented as being the original software. 17 | // 3. This notice may not be removed or altered from any source distribution. 18 | 19 | #include 20 | 21 | #include "TaskScheduler.h" 22 | #include "LockLessMultiReadPipe.h" 23 | 24 | 25 | 26 | using namespace enki; 27 | 28 | 29 | static const uint32_t PIPESIZE_LOG2 = 8; 30 | static const uint32_t SPIN_COUNT = 100; 31 | static const uint32_t SPIN_BACKOFF_MULTIPLIER = 10; 32 | static const uint32_t MAX_NUM_INITIAL_PARTITIONS = 8; 33 | 34 | // each software thread gets it's own copy of gtl_threadNum, so this is safe to use as a static variable 35 | static THREAD_LOCAL uint32_t gtl_threadNum = 0; 36 | 37 | namespace enki 38 | { 39 | struct SubTaskSet 40 | { 41 | ITaskSet* pTask; 42 | TaskSetPartition partition; 43 | }; 44 | 45 | // we derive class TaskPipe rather than typedef to get forward declaration working easily 46 | class TaskPipe : public LockLessMultiReadPipe {}; 47 | 48 | struct ThreadArgs 49 | { 50 | uint32_t threadNum; 51 | TaskScheduler* pTaskScheduler; 52 | }; 53 | } 54 | 55 | namespace 56 | { 57 | SubTaskSet SplitTask( SubTaskSet& subTask_, uint32_t rangeToSplit_ ) 58 | { 59 | SubTaskSet splitTask = subTask_; 60 | uint32_t rangeLeft = subTask_.partition.end - subTask_.partition.start; 61 | 62 | if( rangeToSplit_ > rangeLeft ) 63 | { 64 | rangeToSplit_ = rangeLeft; 65 | } 66 | splitTask.partition.end = subTask_.partition.start + rangeToSplit_; 67 | subTask_.partition.start = splitTask.partition.end; 68 | return splitTask; 69 | } 70 | 71 | #if defined _WIN32 72 | #if defined _M_IX86 || defined _M_X64 73 | #pragma intrinsic(_mm_pause) 74 | inline void Pause() { _mm_pause(); } 75 | #endif 76 | #elif defined __i386__ || defined __x86_64__ 77 | inline void Pause() { __asm__ __volatile__("pause;"); } 78 | #else 79 | inline void Pause() { ;} // may have NOP or yield equiv 80 | #endif 81 | } 82 | 83 | 84 | static void SafeCallback(ProfilerCallbackFunc func_, uint32_t threadnum_) 85 | { 86 | if( func_ ) 87 | { 88 | func_(threadnum_); 89 | } 90 | } 91 | 92 | ProfilerCallbacks* TaskScheduler::GetProfilerCallbacks() 93 | { 94 | return &m_ProfilerCallbacks; 95 | } 96 | 97 | THREADFUNC_DECL TaskScheduler::TaskingThreadFunction( void* pArgs ) 98 | { 99 | ThreadArgs args = *(ThreadArgs*)pArgs; 100 | uint32_t threadNum = args.threadNum; 101 | TaskScheduler* pTS = args.pTaskScheduler; 102 | gtl_threadNum = threadNum; 103 | 104 | SafeCallback( pTS->m_ProfilerCallbacks.threadStart, threadNum ); 105 | 106 | uint32_t spinCount = 0; 107 | uint32_t hintPipeToCheck_io = threadNum + 1; // does not need to be clamped. 108 | while( pTS->m_bRunning ) 109 | { 110 | if(!pTS->TryRunTask( threadNum, hintPipeToCheck_io ) ) 111 | { 112 | // no tasks, will spin then wait 113 | ++spinCount; 114 | if( spinCount > SPIN_COUNT ) 115 | { 116 | pTS->WaitForTasks( threadNum ); 117 | spinCount = 0; 118 | } 119 | else 120 | { 121 | uint32_t spinBackoffCount = spinCount * SPIN_BACKOFF_MULTIPLIER; 122 | while( spinBackoffCount ) 123 | { 124 | Pause(); 125 | --spinBackoffCount; 126 | } 127 | } 128 | } 129 | else 130 | { 131 | spinCount = 0; 132 | } 133 | } 134 | 135 | AtomicAdd( &pTS->m_NumThreadsRunning, -1 ); 136 | SafeCallback( pTS->m_ProfilerCallbacks.threadStop, threadNum ); 137 | 138 | return 0; 139 | } 140 | 141 | 142 | void TaskScheduler::StartThreads() 143 | { 144 | if( m_bHaveThreads ) 145 | { 146 | return; 147 | } 148 | m_bRunning = true; 149 | 150 | SemaphoreCreate( m_NewTaskSemaphore ); 151 | 152 | // we create one less thread than m_NumThreads as the main thread counts as one 153 | m_pThreadNumStore = new ThreadArgs[m_NumThreads]; 154 | m_pThreadIDs = new threadid_t[m_NumThreads]; 155 | m_pThreadNumStore[0].threadNum = 0; 156 | m_pThreadNumStore[0].pTaskScheduler = this; 157 | m_pThreadIDs[0] = 0; 158 | m_NumThreadsWaiting = 0; 159 | m_NumThreadsRunning = 1;// acount for main thread 160 | for( uint32_t thread = 1; thread < m_NumThreads; ++thread ) 161 | { 162 | m_pThreadNumStore[thread].threadNum = thread; 163 | m_pThreadNumStore[thread].pTaskScheduler = this; 164 | ThreadCreate( &m_pThreadIDs[thread], TaskingThreadFunction, &m_pThreadNumStore[thread] ); 165 | ++m_NumThreadsRunning; 166 | } 167 | 168 | // ensure we have sufficient tasks to equally fill either all threads including main 169 | // or just the threads we've launched, this is outside the firstinit as we want to be able 170 | // to runtime change it 171 | if( 1 == m_NumThreads ) 172 | { 173 | m_NumPartitions = 1; 174 | m_NumInitialPartitions = 1; 175 | } 176 | else 177 | { 178 | m_NumPartitions = m_NumThreads * (m_NumThreads - 1); 179 | m_NumInitialPartitions = m_NumThreads - 1; 180 | if( m_NumInitialPartitions > MAX_NUM_INITIAL_PARTITIONS ) 181 | { 182 | m_NumInitialPartitions = MAX_NUM_INITIAL_PARTITIONS; 183 | } 184 | } 185 | 186 | m_bHaveThreads = true; 187 | } 188 | 189 | void TaskScheduler::StopThreads( bool bWait_ ) 190 | { 191 | if( m_bHaveThreads ) 192 | { 193 | // wait for them threads quit before deleting data 194 | m_bRunning = false; 195 | while( bWait_ && m_NumThreadsRunning > 1 ) 196 | { 197 | // keep firing event to ensure all threads pick up state of m_bRunning 198 | SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsRunning ); 199 | } 200 | 201 | for( uint32_t thread = 1; thread < m_NumThreads; ++thread ) 202 | { 203 | ThreadTerminate( m_pThreadIDs[thread] ); 204 | } 205 | 206 | m_NumThreads = 0; 207 | delete[] m_pThreadNumStore; 208 | delete[] m_pThreadIDs; 209 | m_pThreadNumStore = 0; 210 | m_pThreadIDs = 0; 211 | SemaphoreClose( m_NewTaskSemaphore ); 212 | 213 | m_bHaveThreads = false; 214 | m_NumThreadsWaiting = 0; 215 | m_NumThreadsRunning = 0; 216 | } 217 | } 218 | 219 | bool TaskScheduler::TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ ) 220 | { 221 | // check for tasks 222 | SubTaskSet subTask; 223 | bool bHaveTask = m_pPipesPerThread[ threadNum ].WriterTryReadFront( &subTask ); 224 | 225 | uint32_t threadToCheck = hintPipeToCheck_io_; 226 | uint32_t checkCount = 0; 227 | while( !bHaveTask && checkCount < m_NumThreads ) 228 | { 229 | threadToCheck = ( hintPipeToCheck_io_ + checkCount ) % m_NumThreads; 230 | if( threadToCheck != threadNum ) 231 | { 232 | bHaveTask = m_pPipesPerThread[ threadToCheck ].ReaderTryReadBack( &subTask ); 233 | } 234 | ++checkCount; 235 | } 236 | 237 | if( bHaveTask ) 238 | { 239 | // update hint, will preserve value unless actually got task from another thread. 240 | hintPipeToCheck_io_ = threadToCheck; 241 | 242 | uint32_t partitionSize = subTask.partition.end - subTask.partition.start; 243 | if( subTask.pTask->m_RangeToRun < partitionSize ) 244 | { 245 | SubTaskSet taskToRun = SplitTask( subTask, subTask.pTask->m_RangeToRun ); 246 | SplitAndAddTask( gtl_threadNum, subTask, subTask.pTask->m_RangeToRun, 0 ); 247 | taskToRun.pTask->ExecuteRange( taskToRun.partition, threadNum ); 248 | AtomicAdd( &taskToRun.pTask->m_RunningCount, -1 ); 249 | } 250 | else 251 | { 252 | 253 | // the task has already been divided up by AddTaskSetToPipe, so just run it 254 | subTask.pTask->ExecuteRange( subTask.partition, threadNum ); 255 | AtomicAdd( &subTask.pTask->m_RunningCount, -1 ); 256 | } 257 | } 258 | 259 | return bHaveTask; 260 | 261 | } 262 | 263 | void TaskScheduler::WaitForTasks( uint32_t threadNum ) 264 | { 265 | // We incrememt the number of threads waiting here in order 266 | // to ensure that the check for tasks occurs after the increment 267 | // to prevent a task being added after a check, then the thread waiting. 268 | // This will occasionally result in threads being mistakenly awoken, 269 | // but they will then go back to sleep. 270 | AtomicAdd( &m_NumThreadsWaiting, 1 ); 271 | 272 | bool bHaveTasks = false; 273 | for( uint32_t thread = 0; thread < m_NumThreads; ++thread ) 274 | { 275 | if( !m_pPipesPerThread[ thread ].IsPipeEmpty() ) 276 | { 277 | bHaveTasks = true; 278 | break; 279 | } 280 | } 281 | if( !bHaveTasks ) 282 | { 283 | SafeCallback( m_ProfilerCallbacks.waitStart, threadNum ); 284 | SemaphoreWait( m_NewTaskSemaphore ); 285 | SafeCallback( m_ProfilerCallbacks.waitStop, threadNum ); 286 | } 287 | 288 | int32_t prev = AtomicAdd( &m_NumThreadsWaiting, -1 ); 289 | (void)prev; 290 | assert( prev != 0 ); 291 | } 292 | 293 | void TaskScheduler::WakeThreads() 294 | { 295 | SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsWaiting ); 296 | } 297 | 298 | void TaskScheduler::SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_, 299 | uint32_t rangeToSplit_, int32_t runningCountOffset_ ) 300 | { 301 | int32_t numAdded = 0; 302 | while( subTask_.partition.start != subTask_.partition.end ) 303 | { 304 | SubTaskSet taskToAdd = SplitTask( subTask_, rangeToSplit_ ); 305 | 306 | // add the partition to the pipe 307 | ++numAdded; 308 | if( !m_pPipesPerThread[ gtl_threadNum ].WriterTryWriteFront( taskToAdd ) ) 309 | { 310 | if( numAdded > 1 ) 311 | { 312 | WakeThreads(); 313 | } 314 | // alter range to run the appropriate fraction 315 | if( taskToAdd.pTask->m_RangeToRun < rangeToSplit_ ) 316 | { 317 | taskToAdd.partition.end = taskToAdd.partition.start + taskToAdd.pTask->m_RangeToRun; 318 | subTask_.partition.start = taskToAdd.partition.end; 319 | } 320 | taskToAdd.pTask->ExecuteRange( taskToAdd.partition, threadNum_ ); 321 | --numAdded; 322 | } 323 | } 324 | 325 | // increment running count by number added 326 | AtomicAdd( &subTask_.pTask->m_RunningCount, numAdded + runningCountOffset_ ); 327 | 328 | WakeThreads(); 329 | } 330 | 331 | void TaskScheduler::AddTaskSetToPipe( ITaskSet* pTaskSet ) 332 | { 333 | // set running count to -1 to guarantee it won't be found complete until all subtasks added 334 | pTaskSet->m_RunningCount = -1; 335 | 336 | // divide task up and add to pipe 337 | pTaskSet->m_RangeToRun = pTaskSet->m_SetSize / m_NumPartitions; 338 | if( pTaskSet->m_RangeToRun < pTaskSet->m_MinRange ) { pTaskSet->m_RangeToRun = pTaskSet->m_MinRange; } 339 | 340 | uint32_t rangeToSplit = pTaskSet->m_SetSize / m_NumInitialPartitions; 341 | if( rangeToSplit < pTaskSet->m_MinRange ) { rangeToSplit = pTaskSet->m_MinRange; } 342 | 343 | SubTaskSet subTask; 344 | subTask.pTask = pTaskSet; 345 | subTask.partition.start = 0; 346 | subTask.partition.end = pTaskSet->m_SetSize; 347 | SplitAndAddTask( gtl_threadNum, subTask, rangeToSplit, 1 ); 348 | } 349 | 350 | void TaskScheduler::WaitforTaskSet( const ITaskSet* pTaskSet ) 351 | { 352 | uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped. 353 | if( pTaskSet ) 354 | { 355 | while( pTaskSet->m_RunningCount ) 356 | { 357 | TryRunTask( gtl_threadNum, hintPipeToCheck_io ); 358 | // should add a spin then wait for task completion event. 359 | } 360 | } 361 | else 362 | { 363 | TryRunTask( gtl_threadNum, hintPipeToCheck_io ); 364 | } 365 | } 366 | 367 | void TaskScheduler::WaitforAll() 368 | { 369 | bool bHaveTasks = true; 370 | uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped. 371 | int32_t threadsRunning = m_NumThreadsRunning - 1; 372 | while( bHaveTasks || m_NumThreadsWaiting < threadsRunning ) 373 | { 374 | TryRunTask( gtl_threadNum, hintPipeToCheck_io ); 375 | bHaveTasks = false; 376 | for( uint32_t thread = 0; thread < m_NumThreads; ++thread ) 377 | { 378 | if( !m_pPipesPerThread[ thread ].IsPipeEmpty() ) 379 | { 380 | bHaveTasks = true; 381 | break; 382 | } 383 | } 384 | } 385 | } 386 | 387 | void TaskScheduler::WaitforAllAndShutdown() 388 | { 389 | WaitforAll(); 390 | StopThreads(true); 391 | delete[] m_pPipesPerThread; 392 | m_pPipesPerThread = 0; 393 | } 394 | 395 | uint32_t TaskScheduler::GetNumTaskThreads() const 396 | { 397 | return m_NumThreads; 398 | } 399 | 400 | TaskScheduler::TaskScheduler() 401 | : m_pPipesPerThread(NULL) 402 | , m_NumThreads(0) 403 | , m_pThreadNumStore(NULL) 404 | , m_pThreadIDs(NULL) 405 | , m_bRunning(false) 406 | , m_NumThreadsRunning(0) 407 | , m_NumThreadsWaiting(0) 408 | , m_NumPartitions(0) 409 | , m_bHaveThreads(false) 410 | { 411 | memset(&m_ProfilerCallbacks, 0, sizeof(m_ProfilerCallbacks)); 412 | } 413 | 414 | TaskScheduler::~TaskScheduler() 415 | { 416 | StopThreads( true ); // Stops threads, waiting for them. 417 | 418 | delete[] m_pPipesPerThread; 419 | m_pPipesPerThread = 0; 420 | } 421 | 422 | void TaskScheduler::Initialize( uint32_t numThreads_ ) 423 | { 424 | assert( numThreads_ ); 425 | StopThreads( true ); // Stops threads, waiting for them. 426 | delete[] m_pPipesPerThread; 427 | 428 | m_NumThreads = numThreads_; 429 | 430 | m_pPipesPerThread = new TaskPipe[ m_NumThreads ]; 431 | 432 | StartThreads(); 433 | } 434 | 435 | void TaskScheduler::Initialize() 436 | { 437 | Initialize( GetNumHardwareThreads() ); 438 | } -------------------------------------------------------------------------------- /source/external/enkits/TaskScheduler.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Doug Binks 2 | // 3 | // This software is provided 'as-is', without any express or implied 4 | // warranty. In no event will the authors be held liable for any damages 5 | // arising from the use of this software. 6 | // 7 | // Permission is granted to anyone to use this software for any purpose, 8 | // including commercial applications, and to alter it and redistribute it 9 | // freely, subject to the following restrictions: 10 | // 11 | // 1. The origin of this software must not be misrepresented; you must not 12 | // claim that you wrote the original software. If you use this software 13 | // in a product, an acknowledgement in the product documentation would be 14 | // appreciated but is not required. 15 | // 2. Altered source versions must be plainly marked as such, and must not be 16 | // misrepresented as being the original software. 17 | // 3. This notice may not be removed or altered from any source distribution. 18 | 19 | #pragma once 20 | 21 | #include 22 | #include "Threads.h" 23 | 24 | namespace enki 25 | { 26 | 27 | struct TaskSetPartition 28 | { 29 | uint32_t start; 30 | uint32_t end; 31 | }; 32 | 33 | class TaskScheduler; 34 | class TaskPipe; 35 | struct ThreadArgs; 36 | struct SubTaskSet; 37 | 38 | // Subclass ITaskSet to create tasks. 39 | // TaskSets can be re-used, but check 40 | class ITaskSet 41 | { 42 | public: 43 | ITaskSet() 44 | : m_SetSize(1) 45 | , m_MinRange(1) 46 | , m_RunningCount(0) 47 | , m_RangeToRun(1) 48 | {} 49 | 50 | ITaskSet( uint32_t setSize_ ) 51 | : m_SetSize( setSize_ ) 52 | , m_MinRange(1) 53 | , m_RunningCount(0) 54 | , m_RangeToRun(1) 55 | {} 56 | 57 | ITaskSet( uint32_t setSize_, uint32_t minRange_ ) 58 | : m_SetSize( setSize_ ) 59 | , m_MinRange( minRange_ ) 60 | , m_RunningCount(0) 61 | , m_RangeToRun(minRange_) 62 | {} 63 | 64 | // Execute range should be overloaded to process tasks. It will be called with a 65 | // range_ where range.start >= 0; range.start < range.end; and range.end < m_SetSize; 66 | // The range values should be mapped so that linearly processing them in order is cache friendly 67 | // i.e. neighbouring values should be close together. 68 | // threadnum should not be used for changing processing of data, it's intended purpose 69 | // is to allow per-thread data buckets for output. 70 | virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) = 0; 71 | 72 | // Size of set - usually the number of data items to be processed, see ExecuteRange. Defaults to 1 73 | uint32_t m_SetSize; 74 | 75 | // Minimum size of of TaskSetPartition range when splitting a task set into partitions. 76 | // This should be set to a value which results in computation effort of at least 10k 77 | // clock cycles to minimize tast scheduler overhead. 78 | // NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple 79 | // of m_MinRange. 80 | // Also known as grain size in literature. 81 | uint32_t m_MinRange; 82 | 83 | bool GetIsComplete() 84 | { 85 | return 0 == m_RunningCount; 86 | } 87 | private: 88 | friend class TaskScheduler; 89 | volatile int32_t m_RunningCount; 90 | uint32_t m_RangeToRun; 91 | }; 92 | 93 | // TaskScheduler implements several callbacks intended for profilers 94 | typedef void (*ProfilerCallbackFunc)( uint32_t threadnum_ ); 95 | struct ProfilerCallbacks 96 | { 97 | ProfilerCallbackFunc threadStart; 98 | ProfilerCallbackFunc threadStop; 99 | ProfilerCallbackFunc waitStart; 100 | ProfilerCallbackFunc waitStop; 101 | }; 102 | 103 | class TaskScheduler 104 | { 105 | public: 106 | TaskScheduler(); 107 | ~TaskScheduler(); 108 | 109 | // Call either Initialize() or Initialize( numThreads_ ) before adding tasks. 110 | 111 | // Initialize() will create GetNumHardwareThreads()-1 threads, which is 112 | // sufficient to fill the system when including the main thread. 113 | // Initialize can be called multiple times - it will wait for completion 114 | // before re-initializing. 115 | void Initialize(); 116 | 117 | // Initialize( numThreads_ ) - numThreads_ (must be > 0) 118 | // will create numThreads_-1 threads, as thread 0 is 119 | // the thread on which the initialize was called. 120 | void Initialize( uint32_t numThreads_ ); 121 | 122 | 123 | // Adds the TaskSet to pipe and returns if the pipe is not full. 124 | // If the pipe is full, pTaskSet is run. 125 | // should only be called from main thread, or within a task 126 | void AddTaskSetToPipe( ITaskSet* pTaskSet ); 127 | 128 | // Runs the TaskSets in pipe until true == pTaskSet->GetIsComplete(); 129 | // should only be called from thread which created the taskscheduler , or within a task 130 | // if called with 0 it will try to run tasks, and return if none available. 131 | void WaitforTaskSet( const ITaskSet* pTaskSet ); 132 | 133 | // Waits for all task sets to complete - not guaranteed to work unless we know we 134 | // are in a situation where tasks aren't being continuosly added. 135 | void WaitforAll(); 136 | 137 | // Waits for all task sets to complete and shutdown threads - not guaranteed to work unless we know we 138 | // are in a situation where tasks aren't being continuosly added. 139 | void WaitforAllAndShutdown(); 140 | 141 | // Returns the number of threads created for running tasks + 1 142 | // to account for the main thread. 143 | uint32_t GetNumTaskThreads() const; 144 | 145 | // Returns the ProfilerCallbacks structure so that it can be modified to 146 | // set the callbacks. 147 | ProfilerCallbacks* GetProfilerCallbacks(); 148 | 149 | private: 150 | static THREADFUNC_DECL TaskingThreadFunction( void* pArgs ); 151 | void WaitForTasks( uint32_t threadNum ); 152 | bool TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ ); 153 | void StartThreads(); 154 | void StopThreads( bool bWait_ ); 155 | void SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_, 156 | uint32_t rangeToSplit_, int32_t runningCountOffset_ ); 157 | void WakeThreads(); 158 | 159 | TaskPipe* m_pPipesPerThread; 160 | 161 | uint32_t m_NumThreads; 162 | ThreadArgs* m_pThreadNumStore; 163 | threadid_t* m_pThreadIDs; 164 | volatile bool m_bRunning; 165 | volatile int32_t m_NumThreadsRunning; 166 | volatile int32_t m_NumThreadsWaiting; 167 | uint32_t m_NumPartitions; 168 | uint32_t m_NumInitialPartitions; 169 | semaphoreid_t m_NewTaskSemaphore; 170 | bool m_bHaveThreads; 171 | ProfilerCallbacks m_ProfilerCallbacks; 172 | 173 | TaskScheduler( const TaskScheduler& nocopy ); 174 | TaskScheduler& operator=( const TaskScheduler& nocopy ); 175 | }; 176 | 177 | } -------------------------------------------------------------------------------- /source/external/enkits/TaskScheduler_c.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Doug Binks 2 | // 3 | // This software is provided 'as-is', without any express or implied 4 | // warranty. In no event will the authors be held liable for any damages 5 | // arising from the use of this software. 6 | // 7 | // Permission is granted to anyone to use this software for any purpose, 8 | // including commercial applications, and to alter it and redistribute it 9 | // freely, subject to the following restrictions: 10 | // 11 | // 1. The origin of this software must not be misrepresented; you must not 12 | // claim that you wrote the original software. If you use this software 13 | // in a product, an acknowledgement in the product documentation would be 14 | // appreciated but is not required. 15 | // 2. Altered source versions must be plainly marked as such, and must not be 16 | // misrepresented as being the original software. 17 | // 3. This notice may not be removed or altered from any source distribution. 18 | 19 | #include "TaskScheduler_c.h" 20 | #include "TaskScheduler.h" 21 | 22 | #include 23 | 24 | using namespace enki; 25 | 26 | struct enkiTaskScheduler : TaskScheduler 27 | { 28 | }; 29 | 30 | struct enkiTaskSet : ITaskSet 31 | { 32 | enkiTaskSet( enkiTaskExecuteRange taskFun_ ) : taskFun(taskFun_), pArgs(NULL) {} 33 | 34 | virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) 35 | { 36 | taskFun( range.start, range.end, threadnum, pArgs ); 37 | } 38 | 39 | enkiTaskExecuteRange taskFun; 40 | void* pArgs; 41 | }; 42 | 43 | enkiTaskScheduler* enkiNewTaskScheduler() 44 | { 45 | enkiTaskScheduler* pETS = new enkiTaskScheduler(); 46 | return pETS; 47 | } 48 | 49 | void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ ) 50 | { 51 | pETS_->Initialize(); 52 | } 53 | 54 | void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ ) 55 | { 56 | pETS_->Initialize( numThreads_ ); 57 | } 58 | 59 | void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ ) 60 | { 61 | delete pETS_; 62 | } 63 | 64 | enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ ) 65 | { 66 | (void)pETS_; 67 | return new enkiTaskSet( taskFunc_ ); 68 | } 69 | 70 | void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ ) 71 | { 72 | delete pTaskSet_; 73 | } 74 | 75 | void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, void* pArgs_, uint32_t setSize_ ) 76 | { 77 | assert( pTaskSet_ ); 78 | assert( pTaskSet_->taskFun ); 79 | 80 | pTaskSet_->m_SetSize = setSize_; 81 | pTaskSet_->pArgs = pArgs_; 82 | pETS_->AddTaskSetToPipe( pTaskSet_ ); 83 | } 84 | 85 | void enkiAddTaskSetToPipeMinRange(enkiTaskScheduler * pETS_, enkiTaskSet * pTaskSet_, void * pArgs_, uint32_t setSize_, uint32_t minRange_) 86 | { 87 | assert( pTaskSet_ ); 88 | assert( pTaskSet_->taskFun ); 89 | 90 | pTaskSet_->m_SetSize = setSize_; 91 | pTaskSet_->m_MinRange = minRange_; 92 | pTaskSet_->pArgs = pArgs_; 93 | pETS_->AddTaskSetToPipe( pTaskSet_ ); 94 | } 95 | 96 | int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ ) 97 | { 98 | (void)pETS_; 99 | assert( pTaskSet_ ); 100 | return ( pTaskSet_->GetIsComplete() ) ? 1 : 0; 101 | } 102 | 103 | void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ ) 104 | { 105 | pETS_->WaitforTaskSet( pTaskSet_ ); 106 | } 107 | 108 | void enkiWaitForAll( enkiTaskScheduler* pETS_ ) 109 | { 110 | pETS_->WaitforAll(); 111 | } 112 | 113 | 114 | uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ ) 115 | { 116 | return pETS_->GetNumTaskThreads(); 117 | } 118 | 119 | enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ ) 120 | { 121 | assert( sizeof(enkiProfilerCallbacks) == sizeof(enki::ProfilerCallbacks) ); 122 | return (enkiProfilerCallbacks*)pETS_->GetProfilerCallbacks(); 123 | } 124 | 125 | -------------------------------------------------------------------------------- /source/external/enkits/TaskScheduler_c.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Doug Binks 2 | // 3 | // This software is provided 'as-is', without any express or implied 4 | // warranty. In no event will the authors be held liable for any damages 5 | // arising from the use of this software. 6 | // 7 | // Permission is granted to anyone to use this software for any purpose, 8 | // including commercial applications, and to alter it and redistribute it 9 | // freely, subject to the following restrictions: 10 | // 11 | // 1. The origin of this software must not be misrepresented; you must not 12 | // claim that you wrote the original software. If you use this software 13 | // in a product, an acknowledgement in the product documentation would be 14 | // appreciated but is not required. 15 | // 2. Altered source versions must be plainly marked as such, and must not be 16 | // misrepresented as being the original software. 17 | // 3. This notice may not be removed or altered from any source distribution. 18 | 19 | #pragma once 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | #include 26 | 27 | typedef struct enkiTaskScheduler enkiTaskScheduler; 28 | typedef struct enkiTaskSet enkiTaskSet; 29 | 30 | typedef void (* enkiTaskExecuteRange)( uint32_t start_, uint32_t end, uint32_t threadnum_, void* pArgs_ ); 31 | 32 | 33 | // Create a new task scheduler 34 | enkiTaskScheduler* enkiNewTaskScheduler(); 35 | 36 | // Initialize task scheduler - will create GetNumHardwareThreads()-1 threads, which is 37 | // sufficient to fill the system when including the main thread. 38 | // Initialize can be called multiple times - it will wait for completion 39 | // before re-initializing. 40 | void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ ); 41 | 42 | // Initialize a task scheduler with numThreads_ (must be > 0) 43 | // will create numThreads_-1 threads, as thread 0 is 44 | // the thread on which the initialize was called. 45 | void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ ); 46 | 47 | 48 | // Delete a task scheduler 49 | void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ ); 50 | 51 | // Create a task set. 52 | enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ ); 53 | 54 | // Delete a task set. 55 | void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ ); 56 | 57 | // Schedule the task 58 | void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, 59 | void* pArgs_, uint32_t setSize_ ); 60 | 61 | // Schedule the task with a minimum range. 62 | // This should be set to a value which results in computation effort of at least 10k 63 | // clock cycles to minimize tast scheduler overhead. 64 | // NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple 65 | // of m_MinRange. 66 | // Also known as grain size in literature. 67 | void enkiAddTaskSetToPipeMinRange( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, 68 | void* pArgs_, uint32_t setSize_, uint32_t minRange_ ); 69 | 70 | 71 | // Check if TaskSet is complete. Doesn't wait. Returns 1 if complete, 0 if not. 72 | int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ ); 73 | 74 | 75 | // Wait for a given task. 76 | // should only be called from thread which created the taskscheduler , or within a task 77 | // if called with 0 it will try to run tasks, and return if none available. 78 | void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ ); 79 | 80 | 81 | // Waits for all task sets to complete - not guaranteed to work unless we know we 82 | // are in a situation where tasks aren't being continuosly added. 83 | void enkiWaitForAll( enkiTaskScheduler* pETS_ ); 84 | 85 | 86 | // get number of threads 87 | uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ ); 88 | 89 | // TaskScheduler implements several callbacks intended for profilers 90 | typedef void (*enkiProfilerCallbackFunc)( uint32_t threadnum_ ); 91 | struct enkiProfilerCallbacks 92 | { 93 | enkiProfilerCallbackFunc threadStart; 94 | enkiProfilerCallbackFunc threadStop; 95 | enkiProfilerCallbackFunc waitStart; 96 | enkiProfilerCallbackFunc waitStop; 97 | }; 98 | 99 | // Get the callback structure so it can be set 100 | struct enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ ); 101 | 102 | #ifdef __cplusplus 103 | } 104 | #endif -------------------------------------------------------------------------------- /source/external/enkits/Threads.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Doug Binks 2 | // 3 | // This software is provided 'as-is', without any express or implied 4 | // warranty. In no event will the authors be held liable for any damages 5 | // arising from the use of this software. 6 | // 7 | // Permission is granted to anyone to use this software for any purpose, 8 | // including commercial applications, and to alter it and redistribute it 9 | // freely, subject to the following restrictions: 10 | // 11 | // 1. The origin of this software must not be misrepresented; you must not 12 | // claim that you wrote the original software. If you use this software 13 | // in a product, an acknowledgement in the product documentation would be 14 | // appreciated but is not required. 15 | // 2. Altered source versions must be plainly marked as such, and must not be 16 | // misrepresented as being the original software. 17 | // 3. This notice may not be removed or altered from any source distribution. 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | #ifdef _WIN32 25 | 26 | #include "Atomics.h" 27 | 28 | #define WIN32_LEAN_AND_MEAN 29 | #include 30 | 31 | #define THREADFUNC_DECL DWORD WINAPI 32 | #define THREAD_LOCAL __declspec( thread ) 33 | 34 | namespace enki 35 | { 36 | typedef HANDLE threadid_t; 37 | 38 | // declare the thread start function as: 39 | // THREADFUNC_DECL MyThreadStart( void* pArg ); 40 | inline bool ThreadCreate( threadid_t* returnid, DWORD ( WINAPI *StartFunc) (void* ), void* pArg ) 41 | { 42 | // posix equiv pthread_create 43 | DWORD threadid; 44 | *returnid = CreateThread( 0, 0, StartFunc, pArg, 0, &threadid ); 45 | return *returnid != NULL; 46 | } 47 | 48 | inline bool ThreadTerminate( threadid_t threadid ) 49 | { 50 | // posix equiv pthread_cancel 51 | return CloseHandle( threadid ) == 0; 52 | } 53 | 54 | inline uint32_t GetNumHardwareThreads() 55 | { 56 | SYSTEM_INFO sysInfo; 57 | GetSystemInfo(&sysInfo); 58 | return sysInfo.dwNumberOfProcessors; 59 | } 60 | } 61 | 62 | #else // posix 63 | 64 | #include 65 | #include 66 | #define THREADFUNC_DECL void* 67 | #define THREAD_LOCAL __thread 68 | 69 | namespace enki 70 | { 71 | typedef pthread_t threadid_t; 72 | 73 | // declare the thread start function as: 74 | // THREADFUNC_DECL MyThreadStart( void* pArg ); 75 | inline bool ThreadCreate( threadid_t* returnid, void* ( *StartFunc) (void* ), void* pArg ) 76 | { 77 | // posix equiv pthread_create 78 | int32_t retval = pthread_create( returnid, NULL, StartFunc, pArg ); 79 | 80 | return retval == 0; 81 | } 82 | 83 | inline bool ThreadTerminate( threadid_t threadid ) 84 | { 85 | // posix equiv pthread_cancel 86 | return pthread_cancel( threadid ) == 0; 87 | } 88 | 89 | inline uint32_t GetNumHardwareThreads() 90 | { 91 | return (uint32_t)sysconf( _SC_NPROCESSORS_ONLN ); 92 | } 93 | } 94 | 95 | #endif // posix 96 | 97 | 98 | // Semaphore implementation 99 | #ifdef _WIN32 100 | 101 | namespace enki 102 | { 103 | struct semaphoreid_t 104 | { 105 | HANDLE sem; 106 | }; 107 | 108 | inline void SemaphoreCreate( semaphoreid_t& semaphoreid ) 109 | { 110 | semaphoreid.sem = CreateSemaphore(NULL, 0, MAXLONG, NULL ); 111 | } 112 | 113 | inline void SemaphoreClose( semaphoreid_t& semaphoreid ) 114 | { 115 | CloseHandle( semaphoreid.sem ); 116 | } 117 | 118 | inline void SemaphoreWait( semaphoreid_t& semaphoreid ) 119 | { 120 | DWORD retval = WaitForSingleObject( semaphoreid.sem, INFINITE ); 121 | (void)retval; 122 | 123 | assert( retval != WAIT_FAILED ); 124 | } 125 | 126 | inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting ) 127 | { 128 | if( countWaiting ) 129 | { 130 | ReleaseSemaphore( semaphoreid.sem, countWaiting, NULL ); 131 | } 132 | } 133 | } 134 | #elif defined(__MACH__) 135 | 136 | // OS X does not have POSIX semaphores 137 | // see https://developer.apple.com/library/content/documentation/Darwin/Conceptual/KernelProgramming/synchronization/synchronization.html 138 | #include 139 | 140 | namespace enki 141 | { 142 | 143 | struct semaphoreid_t 144 | { 145 | semaphore_t sem; 146 | }; 147 | 148 | inline void SemaphoreCreate( semaphoreid_t& semaphoreid ) 149 | { 150 | semaphore_create( mach_task_self(), &semaphoreid.sem, SYNC_POLICY_FIFO, 0 ); 151 | } 152 | 153 | inline void SemaphoreClose( semaphoreid_t& semaphoreid ) 154 | { 155 | semaphore_destroy( mach_task_self(), semaphoreid.sem ); 156 | } 157 | 158 | inline void SemaphoreWait( semaphoreid_t& semaphoreid ) 159 | { 160 | semaphore_wait( semaphoreid.sem ); 161 | } 162 | 163 | inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting ) 164 | { 165 | while( countWaiting-- > 0 ) 166 | { 167 | semaphore_signal( semaphoreid.sem ); 168 | } 169 | } 170 | } 171 | 172 | #else // POSIX 173 | 174 | #include 175 | 176 | namespace enki 177 | { 178 | 179 | struct semaphoreid_t 180 | { 181 | sem_t sem; 182 | }; 183 | 184 | inline void SemaphoreCreate( semaphoreid_t& semaphoreid ) 185 | { 186 | int err = sem_init( &semaphoreid.sem, 0, 0 ); 187 | assert( err == 0 ); 188 | } 189 | 190 | inline void SemaphoreClose( semaphoreid_t& semaphoreid ) 191 | { 192 | sem_destroy( &semaphoreid.sem ); 193 | } 194 | 195 | inline void SemaphoreWait( semaphoreid_t& semaphoreid ) 196 | { 197 | int err = sem_wait( &semaphoreid.sem ); 198 | assert( err == 0 ); 199 | } 200 | 201 | inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting ) 202 | { 203 | while( countWaiting-- > 0 ) 204 | { 205 | sem_post( &semaphoreid.sem ); 206 | } 207 | } 208 | } 209 | #endif 210 | 211 | 212 | -------------------------------------------------------------------------------- /source/external/objparser-license.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2019 Arseny Kapoulkine 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /source/external/objparser.cpp: -------------------------------------------------------------------------------- 1 | #ifndef _CRT_SECURE_NO_WARNINGS 2 | #define _CRT_SECURE_NO_WARNINGS 3 | #endif 4 | 5 | #include "objparser.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | template 14 | static void growArray(T*& data, size_t& capacity) 15 | { 16 | size_t newcapacity = capacity == 0 ? 32 : capacity + capacity / 2; 17 | T* newdata = new T[newcapacity]; 18 | 19 | if (data) 20 | { 21 | memcpy(newdata, data, capacity * sizeof(T)); 22 | delete[] data; 23 | } 24 | 25 | data = newdata; 26 | capacity = newcapacity; 27 | } 28 | 29 | static int fixupIndex(int index, size_t size) 30 | { 31 | return (index >= 0) ? index - 1 : int(size) + index; 32 | } 33 | 34 | static int parseInt(const char* s, const char** end) 35 | { 36 | // skip whitespace 37 | while (*s == ' ' || *s == '\t') 38 | s++; 39 | 40 | // read sign bit 41 | int sign = (*s == '-'); 42 | s += (*s == '-' || *s == '+'); 43 | 44 | unsigned int result = 0; 45 | 46 | for (;;) 47 | { 48 | if (unsigned(*s - '0') < 10) 49 | result = result * 10 + (*s - '0'); 50 | else 51 | break; 52 | 53 | s++; 54 | } 55 | 56 | // return end-of-string 57 | *end = s; 58 | 59 | return sign ? -int(result) : int(result); 60 | } 61 | 62 | static float parseFloat(const char* s, const char** end) 63 | { 64 | static const double digits[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; 65 | static const double powers[] = {1e0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 1e+21, 1e+22}; 66 | 67 | // skip whitespace 68 | while (*s == ' ' || *s == '\t') 69 | s++; 70 | 71 | // read sign 72 | double sign = (*s == '-') ? -1 : 1; 73 | s += (*s == '-' || *s == '+'); 74 | 75 | // read integer part 76 | double result = 0; 77 | int power = 0; 78 | 79 | while (unsigned(*s - '0') < 10) 80 | { 81 | result = result * 10 + digits[*s - '0']; 82 | s++; 83 | } 84 | 85 | // read fractional part 86 | if (*s == '.') 87 | { 88 | s++; 89 | 90 | while (unsigned(*s - '0') < 10) 91 | { 92 | result = result * 10 + digits[*s - '0']; 93 | s++; 94 | power--; 95 | } 96 | } 97 | 98 | // read exponent part 99 | if ((*s | ' ') == 'e') 100 | { 101 | s++; 102 | 103 | // read exponent sign 104 | int expsign = (*s == '-') ? -1 : 1; 105 | s += (*s == '-' || *s == '+'); 106 | 107 | // read exponent 108 | int exppower = 0; 109 | 110 | while (unsigned(*s - '0') < 10) 111 | { 112 | exppower = exppower * 10 + (*s - '0'); 113 | s++; 114 | } 115 | 116 | // done! 117 | power += expsign * exppower; 118 | } 119 | 120 | // return end-of-string 121 | *end = s; 122 | 123 | // note: this is precise if result < 9e15 124 | // for longer inputs we lose a bit of precision here 125 | if (unsigned(-power) < sizeof(powers) / sizeof(powers[0])) 126 | return float(sign * result / powers[-power]); 127 | else if (unsigned(power) < sizeof(powers) / sizeof(powers[0])) 128 | return float(sign * result * powers[power]); 129 | else 130 | return float(sign * result * pow(10.0, power)); 131 | } 132 | 133 | static const char* parseFace(const char* s, int& vi, int& vti, int& vni) 134 | { 135 | while (*s == ' ' || *s == '\t') 136 | s++; 137 | 138 | vi = parseInt(s, &s); 139 | 140 | if (*s != '/') 141 | return s; 142 | s++; 143 | 144 | // handle vi//vni indices 145 | if (*s != '/') 146 | vti = parseInt(s, &s); 147 | 148 | if (*s != '/') 149 | return s; 150 | s++; 151 | 152 | vni = parseInt(s, &s); 153 | 154 | return s; 155 | } 156 | 157 | ObjFile::ObjFile() 158 | : v(0) 159 | , v_size(0) 160 | , v_cap(0) 161 | , vt(0) 162 | , vt_size(0) 163 | , vt_cap(0) 164 | , vn(0) 165 | , vn_size(0) 166 | , vn_cap(0) 167 | , f(0) 168 | , f_size(0) 169 | , f_cap(0) 170 | , g(0) 171 | , g_size(0) 172 | , g_cap(0) 173 | { 174 | } 175 | 176 | ObjFile::~ObjFile() 177 | { 178 | delete[] v; 179 | delete[] vt; 180 | delete[] vn; 181 | delete[] f; 182 | delete[] g; 183 | } 184 | 185 | void objParseLine(ObjFile& result, const char* line) 186 | { 187 | if (line[0] == 'v' && line[1] == ' ') 188 | { 189 | const char* s = line + 2; 190 | 191 | float x = parseFloat(s, &s); 192 | float y = parseFloat(s, &s); 193 | float z = parseFloat(s, &s); 194 | 195 | if (result.v_size + 3 > result.v_cap) 196 | growArray(result.v, result.v_cap); 197 | 198 | result.v[result.v_size++] = x; 199 | result.v[result.v_size++] = y; 200 | result.v[result.v_size++] = z; 201 | } 202 | else if (line[0] == 'v' && line[1] == 't' && line[2] == ' ') 203 | { 204 | const char* s = line + 3; 205 | 206 | float u = parseFloat(s, &s); 207 | float v = parseFloat(s, &s); 208 | float w = parseFloat(s, &s); 209 | 210 | if (result.vt_size + 3 > result.vt_cap) 211 | growArray(result.vt, result.vt_cap); 212 | 213 | result.vt[result.vt_size++] = u; 214 | result.vt[result.vt_size++] = v; 215 | result.vt[result.vt_size++] = w; 216 | } 217 | else if (line[0] == 'v' && line[1] == 'n' && line[2] == ' ') 218 | { 219 | const char* s = line + 3; 220 | 221 | float x = parseFloat(s, &s); 222 | float y = parseFloat(s, &s); 223 | float z = parseFloat(s, &s); 224 | 225 | if (result.vn_size + 3 > result.vn_cap) 226 | growArray(result.vn, result.vn_cap); 227 | 228 | result.vn[result.vn_size++] = x; 229 | result.vn[result.vn_size++] = y; 230 | result.vn[result.vn_size++] = z; 231 | } 232 | else if (line[0] == 'f' && line[1] == ' ') 233 | { 234 | const char* s = line + 2; 235 | 236 | if (!result.g) 237 | { 238 | growArray(result.g, result.g_cap); 239 | 240 | ObjGroup g = {}; 241 | result.g[result.g_size++] = g; 242 | } 243 | 244 | size_t v = result.v_size / 3; 245 | size_t vt = result.vt_size / 3; 246 | size_t vn = result.vn_size / 3; 247 | 248 | int fv = 0; 249 | int f[3][3] = {}; 250 | 251 | while (*s) 252 | { 253 | int vi = 0, vti = 0, vni = 0; 254 | s = parseFace(s, vi, vti, vni); 255 | 256 | if (vi == 0) 257 | break; 258 | 259 | f[fv][0] = fixupIndex(vi, v); 260 | f[fv][1] = fixupIndex(vti, vt); 261 | f[fv][2] = fixupIndex(vni, vn); 262 | 263 | if (fv == 2) 264 | { 265 | if (result.f_size + 9 > result.f_cap) 266 | growArray(result.f, result.f_cap); 267 | 268 | memcpy(&result.f[result.f_size], f, 9 * sizeof(int)); 269 | result.f_size += 9; 270 | 271 | result.g[result.g_size - 1].index_count += 3; 272 | 273 | f[1][0] = f[2][0]; 274 | f[1][1] = f[2][1]; 275 | f[1][2] = f[2][2]; 276 | } 277 | else 278 | { 279 | fv++; 280 | } 281 | } 282 | } 283 | else if (strncmp(line, "usemtl", 6) == 0) 284 | { 285 | const char* s = line + 6; 286 | 287 | // skip whitespace 288 | while (*s == ' ' || *s == '\t') 289 | s++; 290 | 291 | if (result.g_size + 1 > result.g_cap) 292 | growArray(result.g, result.g_cap); 293 | 294 | ObjGroup g = {}; 295 | g.index_offset = result.f_size / 3; 296 | 297 | strncpy(g.material, s, sizeof(g.material)); 298 | g.material[sizeof(g.material) - 1] = 0; 299 | 300 | result.g[result.g_size++] = g; 301 | } 302 | } 303 | 304 | bool objParseFile(ObjFile& result, const char* path) 305 | { 306 | FILE* file = fopen(path, "rb"); 307 | if (!file) 308 | return false; 309 | 310 | char buffer[65536]; 311 | size_t size = 0; 312 | 313 | while (!feof(file)) 314 | { 315 | size += fread(buffer + size, 1, sizeof(buffer) - size, file); 316 | 317 | size_t line = 0; 318 | 319 | while (line < size) 320 | { 321 | // find the end of current line 322 | void* eol = memchr(buffer + line, '\n', size - line); 323 | if (!eol) 324 | break; 325 | 326 | // zero-terminate for objParseLine 327 | size_t next = static_cast(eol) - buffer; 328 | 329 | buffer[next] = 0; 330 | 331 | // process next line 332 | objParseLine(result, buffer + line); 333 | 334 | line = next + 1; 335 | } 336 | 337 | // move prefix of the last line in the buffer to the beginning of the buffer for next iteration 338 | assert(line <= size); 339 | 340 | memmove(buffer, buffer + line, size - line); 341 | size -= line; 342 | } 343 | 344 | if (size) 345 | { 346 | // process last line 347 | assert(size < sizeof(buffer)); 348 | buffer[size] = 0; 349 | 350 | objParseLine(result, buffer); 351 | } 352 | 353 | fclose(file); 354 | return true; 355 | } 356 | 357 | bool objValidate(const ObjFile& result) 358 | { 359 | size_t v = result.v_size / 3; 360 | size_t vt = result.vt_size / 3; 361 | size_t vn = result.vn_size / 3; 362 | 363 | for (size_t i = 0; i < result.f_size; i += 3) 364 | { 365 | int vi = result.f[i + 0]; 366 | int vti = result.f[i + 1]; 367 | int vni = result.f[i + 2]; 368 | 369 | if (vi < 0) 370 | return false; 371 | 372 | if (vi >= 0 && size_t(vi) >= v) 373 | return false; 374 | 375 | if (vti >= 0 && size_t(vti) >= vt) 376 | return false; 377 | 378 | if (vni >= 0 && size_t(vni) >= vn) 379 | return false; 380 | } 381 | 382 | return true; 383 | } 384 | -------------------------------------------------------------------------------- /source/external/objparser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | struct ObjGroup 6 | { 7 | char material[256]; 8 | 9 | size_t index_offset; 10 | size_t index_count; 11 | }; 12 | 13 | class ObjFile 14 | { 15 | public: 16 | float* v; // positions; stride 3 (xyz) 17 | size_t v_size, v_cap; 18 | 19 | float* vt; // texture coordinates; stride 3 (uvw) 20 | size_t vt_size, vt_cap; 21 | 22 | float* vn; // vertex normals; stride 3 (xyz) 23 | size_t vn_size, vn_cap; 24 | 25 | int* f; // face elements; stride 9 (3 groups of indices into v/vt/vn) 26 | size_t f_size, f_cap; 27 | 28 | ObjGroup* g; 29 | size_t g_size, g_cap; 30 | 31 | ObjFile(); 32 | ~ObjFile(); 33 | 34 | private: 35 | ObjFile(const ObjFile&); 36 | ObjFile& operator=(const ObjFile&); 37 | }; 38 | 39 | void objParseLine(ObjFile& result, const char* line); 40 | bool objParseFile(ObjFile& result, const char* path); 41 | 42 | bool objValidate(const ObjFile& result); 43 | -------------------------------------------------------------------------------- /source/external/sokol_time.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* 3 | sokol_time.h -- simple cross-platform time measurement 4 | 5 | Do this: 6 | #define SOKOL_IMPL 7 | before you include this file in *one* C or C++ file to create the 8 | implementation. 9 | 10 | Optionally provide the following defines with your own implementations: 11 | SOKOL_ASSERT(c) - your own assert macro (default: assert(c)) 12 | SOKOL_API_DECL - public function declaration prefix (default: extern) 13 | SOKOL_API_IMPL - public function implementation prefix (default: -) 14 | 15 | void stm_setup(); 16 | Call once before any other functions to initialize sokol_time 17 | (this calls for instance QueryPerformanceFrequency on Windows) 18 | 19 | uint64_t stm_now(); 20 | Get current point in time in unspecified 'ticks'. The value that 21 | is returned has no relation to the 'wall-clock' time and is 22 | not in a specific time unit, it is only useful to compute 23 | time differences. 24 | 25 | uint64_t stm_diff(uint64_t new, uint64_t old); 26 | Computes the time difference between new and old. This will always 27 | return a positive, non-zero value. 28 | 29 | uint64_t stm_since(uint64_t start); 30 | Takes the current time, and returns the elapsed time since start 31 | (this is a shortcut for "stm_diff(stm_now(), start)") 32 | 33 | uint64_t stm_laptime(uint64_t* last_time); 34 | This is useful for measuring frame time and other recurring 35 | events. It takes the current time, returns the time difference 36 | to the value in last_time, and stores the current time in 37 | last_time for the next call. If the value in last_time is 0, 38 | the return value will be zero (this usually happens on the 39 | very first call). 40 | 41 | Use the following functions to convert a duration in ticks into 42 | useful time units: 43 | 44 | double stm_sec(uint64_t ticks); 45 | double stm_ms(uint64_t ticks); 46 | double stm_us(uint64_t ticks); 47 | double stm_ns(uint64_t ticks); 48 | Converts a tick value into seconds, milliseconds, microseconds 49 | or nanoseconds. Note that not all platforms will have nanosecond 50 | or even microsecond precision. 51 | 52 | Uses the following time measurement functions under the hood: 53 | 54 | Windows: QueryPerformanceFrequency() / QueryPerformanceCounter() 55 | MacOS/iOS: mach_absolute_time() 56 | emscripten: performance.now() 57 | Linux+others: clock_gettime(CLOCK_MONOTONIC) 58 | 59 | zlib/libpng license 60 | 61 | Copyright (c) 2018 Andre Weissflog 62 | 63 | This software is provided 'as-is', without any express or implied warranty. 64 | In no event will the authors be held liable for any damages arising from the 65 | use of this software. 66 | 67 | Permission is granted to anyone to use this software for any purpose, 68 | including commercial applications, and to alter it and redistribute it 69 | freely, subject to the following restrictions: 70 | 71 | 1. The origin of this software must not be misrepresented; you must not 72 | claim that you wrote the original software. If you use this software in a 73 | product, an acknowledgment in the product documentation would be 74 | appreciated but is not required. 75 | 76 | 2. Altered source versions must be plainly marked as such, and must not 77 | be misrepresented as being the original software. 78 | 79 | 3. This notice may not be removed or altered from any source 80 | distribution. 81 | */ 82 | #define SOKOL_TIME_INCLUDED (1) 83 | #include 84 | 85 | #ifndef SOKOL_API_DECL 86 | #define SOKOL_API_DECL extern 87 | #endif 88 | 89 | #ifdef __cplusplus 90 | extern "C" { 91 | #endif 92 | 93 | SOKOL_API_DECL void stm_setup(void); 94 | SOKOL_API_DECL uint64_t stm_now(void); 95 | SOKOL_API_DECL uint64_t stm_diff(uint64_t new_ticks, uint64_t old_ticks); 96 | SOKOL_API_DECL uint64_t stm_since(uint64_t start_ticks); 97 | SOKOL_API_DECL uint64_t stm_laptime(uint64_t* last_time); 98 | SOKOL_API_DECL double stm_sec(uint64_t ticks); 99 | SOKOL_API_DECL double stm_ms(uint64_t ticks); 100 | SOKOL_API_DECL double stm_us(uint64_t ticks); 101 | SOKOL_API_DECL double stm_ns(uint64_t ticks); 102 | 103 | #ifdef __cplusplus 104 | } /* extern "C" */ 105 | #endif 106 | 107 | /*-- IMPLEMENTATION ----------------------------------------------------------*/ 108 | #ifdef SOKOL_IMPL 109 | #define SOKOL_TIME_IMPL_INCLUDED (1) 110 | #include /* memset */ 111 | 112 | #ifndef SOKOL_API_IMPL 113 | #define SOKOL_API_IMPL 114 | #endif 115 | #ifndef SOKOL_ASSERT 116 | #include 117 | #define SOKOL_ASSERT(c) assert(c) 118 | #endif 119 | #ifndef _SOKOL_PRIVATE 120 | #if defined(__GNUC__) 121 | #define _SOKOL_PRIVATE __attribute__((unused)) static 122 | #else 123 | #define _SOKOL_PRIVATE static 124 | #endif 125 | #endif 126 | 127 | #if defined(_WIN32) 128 | #ifndef WIN32_LEAN_AND_MEAN 129 | #define WIN32_LEAN_AND_MEAN 130 | #endif 131 | #include 132 | typedef struct { 133 | uint32_t initialized; 134 | LARGE_INTEGER freq; 135 | LARGE_INTEGER start; 136 | } _stm_state_t; 137 | #elif defined(__APPLE__) && defined(__MACH__) 138 | #include 139 | typedef struct { 140 | uint32_t initialized; 141 | mach_timebase_info_data_t timebase; 142 | uint64_t start; 143 | } _stm_state_t; 144 | #elif defined(__EMSCRIPTEN__) 145 | #include 146 | typedef struct { 147 | uint32_t initialized; 148 | double start; 149 | } _stm_state_t; 150 | #else /* anything else, this will need more care for non-Linux platforms */ 151 | #include 152 | typedef struct { 153 | uint32_t initialized; 154 | uint64_t start; 155 | } _stm_state_t; 156 | #endif 157 | static _stm_state_t _stm; 158 | 159 | /* prevent 64-bit overflow when computing relative timestamp 160 | see https://gist.github.com/jspohr/3dc4f00033d79ec5bdaf67bc46c813e3 161 | */ 162 | #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) 163 | _SOKOL_PRIVATE int64_t int64_muldiv(int64_t value, int64_t numer, int64_t denom) { 164 | int64_t q = value / denom; 165 | int64_t r = value % denom; 166 | return q * numer + r * numer / denom; 167 | } 168 | #endif 169 | 170 | #if defined(__EMSCRIPTEN__) 171 | EM_JS(double, _stm_js_perfnow, (void), { 172 | return performance.now(); 173 | }); 174 | #endif 175 | 176 | SOKOL_API_IMPL void stm_setup(void) { 177 | memset(&_stm, 0, sizeof(_stm)); 178 | _stm.initialized = 0xABCDABCD; 179 | #if defined(_WIN32) 180 | QueryPerformanceFrequency(&_stm.freq); 181 | QueryPerformanceCounter(&_stm.start); 182 | #elif defined(__APPLE__) && defined(__MACH__) 183 | mach_timebase_info(&_stm.timebase); 184 | _stm.start = mach_absolute_time(); 185 | #elif defined(__EMSCRIPTEN__) 186 | _stm.start = _stm_js_perfnow(); 187 | #else 188 | struct timespec ts; 189 | clock_gettime(CLOCK_MONOTONIC, &ts); 190 | _stm.start = (uint64_t)ts.tv_sec*1000000000 + (uint64_t)ts.tv_nsec; 191 | #endif 192 | } 193 | 194 | SOKOL_API_IMPL uint64_t stm_now(void) { 195 | SOKOL_ASSERT(_stm.initialized == 0xABCDABCD); 196 | uint64_t now; 197 | #if defined(_WIN32) 198 | LARGE_INTEGER qpc_t; 199 | QueryPerformanceCounter(&qpc_t); 200 | now = int64_muldiv(qpc_t.QuadPart - _stm.start.QuadPart, 1000000000, _stm.freq.QuadPart); 201 | #elif defined(__APPLE__) && defined(__MACH__) 202 | const uint64_t mach_now = mach_absolute_time() - _stm.start; 203 | now = int64_muldiv(mach_now, _stm.timebase.numer, _stm.timebase.denom); 204 | #elif defined(__EMSCRIPTEN__) 205 | double js_now = _stm_js_perfnow() - _stm.start; 206 | SOKOL_ASSERT(js_now >= 0.0); 207 | now = (uint64_t) (js_now * 1000000.0); 208 | #else 209 | struct timespec ts; 210 | clock_gettime(CLOCK_MONOTONIC, &ts); 211 | now = ((uint64_t)ts.tv_sec*1000000000 + (uint64_t)ts.tv_nsec) - _stm.start; 212 | #endif 213 | return now; 214 | } 215 | 216 | SOKOL_API_IMPL uint64_t stm_diff(uint64_t new_ticks, uint64_t old_ticks) { 217 | if (new_ticks > old_ticks) { 218 | return new_ticks - old_ticks; 219 | } 220 | else { 221 | return 1; 222 | } 223 | } 224 | 225 | SOKOL_API_IMPL uint64_t stm_since(uint64_t start_ticks) { 226 | return stm_diff(stm_now(), start_ticks); 227 | } 228 | 229 | SOKOL_API_IMPL uint64_t stm_laptime(uint64_t* last_time) { 230 | SOKOL_ASSERT(last_time); 231 | uint64_t dt = 0; 232 | uint64_t now = stm_now(); 233 | if (0 != *last_time) { 234 | dt = stm_diff(now, *last_time); 235 | } 236 | *last_time = now; 237 | return dt; 238 | } 239 | 240 | SOKOL_API_IMPL double stm_sec(uint64_t ticks) { 241 | return (double)ticks / 1000000000.0; 242 | } 243 | 244 | SOKOL_API_IMPL double stm_ms(uint64_t ticks) { 245 | return (double)ticks / 1000000.0; 246 | } 247 | 248 | SOKOL_API_IMPL double stm_us(uint64_t ticks) { 249 | return (double)ticks / 1000.0; 250 | } 251 | 252 | SOKOL_API_IMPL double stm_ns(uint64_t ticks) { 253 | return (double)ticks; 254 | } 255 | #endif /* SOKOL_IMPL */ 256 | 257 | -------------------------------------------------------------------------------- /source/external/stb_image_write.h: -------------------------------------------------------------------------------- 1 | /* stb_image_write - v1.13 - public domain - http://nothings.org/stb/stb_image_write.h 2 | writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 3 | no warranty implied; use at your own risk 4 | 5 | Before #including, 6 | 7 | #define STB_IMAGE_WRITE_IMPLEMENTATION 8 | 9 | in the file that you want to have the implementation. 10 | 11 | Will probably not work correctly with strict-aliasing optimizations. 12 | 13 | If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause 14 | compilation warnings or even errors. To avoid this, also before #including, 15 | 16 | #define STBI_MSC_SECURE_CRT 17 | 18 | ABOUT: 19 | 20 | This header file is a library for writing images to C stdio or a callback. 21 | 22 | The PNG output is not optimal; it is 20-50% larger than the file 23 | written by a decent optimizing implementation; though providing a custom 24 | zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. 25 | This library is designed for source code compactness and simplicity, 26 | not optimal image file size or run-time performance. 27 | 28 | BUILDING: 29 | 30 | You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. 31 | You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace 32 | malloc,realloc,free. 33 | You can #define STBIW_MEMMOVE() to replace memmove() 34 | You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function 35 | for PNG compression (instead of the builtin one), it must have the following signature: 36 | unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); 37 | The returned data will be freed with STBIW_FREE() (free() by default), 38 | so it must be heap allocated with STBIW_MALLOC() (malloc() by default), 39 | 40 | UNICODE: 41 | 42 | If compiling for Windows and you wish to use Unicode filenames, compile 43 | with 44 | #define STBIW_WINDOWS_UTF8 45 | and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert 46 | Windows wchar_t filenames to utf8. 47 | 48 | USAGE: 49 | 50 | There are five functions, one for each image file format: 51 | 52 | int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 53 | int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 54 | int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 55 | int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); 56 | int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); 57 | 58 | void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically 59 | 60 | There are also five equivalent functions that use an arbitrary write function. You are 61 | expected to open/close your file-equivalent before and after calling these: 62 | 63 | int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); 64 | int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 65 | int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 66 | int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); 67 | int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); 68 | 69 | where the callback is: 70 | void stbi_write_func(void *context, void *data, int size); 71 | 72 | You can configure it with these global variables: 73 | int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE 74 | int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression 75 | int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode 76 | 77 | 78 | You can define STBI_WRITE_NO_STDIO to disable the file variant of these 79 | functions, so the library will not use stdio.h at all. However, this will 80 | also disable HDR writing, because it requires stdio for formatted output. 81 | 82 | Each function returns 0 on failure and non-0 on success. 83 | 84 | The functions create an image file defined by the parameters. The image 85 | is a rectangle of pixels stored from left-to-right, top-to-bottom. 86 | Each pixel contains 'comp' channels of data stored interleaved with 8-bits 87 | per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is 88 | monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. 89 | The *data pointer points to the first byte of the top-left-most pixel. 90 | For PNG, "stride_in_bytes" is the distance in bytes from the first byte of 91 | a row of pixels to the first byte of the next row of pixels. 92 | 93 | PNG creates output files with the same number of components as the input. 94 | The BMP format expands Y to RGB in the file format and does not 95 | output alpha. 96 | 97 | PNG supports writing rectangles of data even when the bytes storing rows of 98 | data are not consecutive in memory (e.g. sub-rectangles of a larger image), 99 | by supplying the stride between the beginning of adjacent rows. The other 100 | formats do not. (Thus you cannot write a native-format BMP through the BMP 101 | writer, both because it is in BGR order and because it may have padding 102 | at the end of the line.) 103 | 104 | PNG allows you to set the deflate compression level by setting the global 105 | variable 'stbi_write_png_compression_level' (it defaults to 8). 106 | 107 | HDR expects linear float data. Since the format is always 32-bit rgb(e) 108 | data, alpha (if provided) is discarded, and for monochrome data it is 109 | replicated across all three channels. 110 | 111 | TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed 112 | data, set the global variable 'stbi_write_tga_with_rle' to 0. 113 | 114 | JPEG does ignore alpha channels in input data; quality is between 1 and 100. 115 | Higher quality looks better but results in a bigger image. 116 | JPEG baseline (no JPEG progressive). 117 | 118 | CREDITS: 119 | 120 | 121 | Sean Barrett - PNG/BMP/TGA 122 | Baldur Karlsson - HDR 123 | Jean-Sebastien Guay - TGA monochrome 124 | Tim Kelsey - misc enhancements 125 | Alan Hickman - TGA RLE 126 | Emmanuel Julien - initial file IO callback implementation 127 | Jon Olick - original jo_jpeg.cpp code 128 | Daniel Gibson - integrate JPEG, allow external zlib 129 | Aarni Koskela - allow choosing PNG filter 130 | 131 | bugfixes: 132 | github:Chribba 133 | Guillaume Chereau 134 | github:jry2 135 | github:romigrou 136 | Sergio Gonzalez 137 | Jonas Karlsson 138 | Filip Wasil 139 | Thatcher Ulrich 140 | github:poppolopoppo 141 | Patrick Boettcher 142 | github:xeekworx 143 | Cap Petschulat 144 | Simon Rodriguez 145 | Ivan Tikhonov 146 | github:ignotion 147 | Adam Schackart 148 | 149 | LICENSE 150 | 151 | See end of file for license information. 152 | 153 | */ 154 | 155 | #ifndef INCLUDE_STB_IMAGE_WRITE_H 156 | #define INCLUDE_STB_IMAGE_WRITE_H 157 | 158 | #include 159 | 160 | // if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' 161 | #ifndef STBIWDEF 162 | #ifdef STB_IMAGE_WRITE_STATIC 163 | #define STBIWDEF static 164 | #else 165 | #ifdef __cplusplus 166 | #define STBIWDEF extern "C" 167 | #else 168 | #define STBIWDEF extern 169 | #endif 170 | #endif 171 | #endif 172 | 173 | #ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations 174 | extern int stbi_write_tga_with_rle; 175 | extern int stbi_write_png_compression_level; 176 | extern int stbi_write_force_png_filter; 177 | #endif 178 | 179 | #ifndef STBI_WRITE_NO_STDIO 180 | STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 181 | STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 182 | STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 183 | STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); 184 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); 185 | 186 | #ifdef STBI_WINDOWS_UTF8 187 | STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); 188 | #endif 189 | #endif 190 | 191 | typedef void stbi_write_func(void *context, void *data, int size); 192 | 193 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); 194 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 195 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); 196 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); 197 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); 198 | 199 | STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); 200 | 201 | #endif//INCLUDE_STB_IMAGE_WRITE_H 202 | 203 | #ifdef STB_IMAGE_WRITE_IMPLEMENTATION 204 | 205 | #ifdef _WIN32 206 | #ifndef _CRT_SECURE_NO_WARNINGS 207 | #define _CRT_SECURE_NO_WARNINGS 208 | #endif 209 | #ifndef _CRT_NONSTDC_NO_DEPRECATE 210 | #define _CRT_NONSTDC_NO_DEPRECATE 211 | #endif 212 | #endif 213 | 214 | #ifndef STBI_WRITE_NO_STDIO 215 | #include 216 | #endif // STBI_WRITE_NO_STDIO 217 | 218 | #include 219 | #include 220 | #include 221 | #include 222 | 223 | #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) 224 | // ok 225 | #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) 226 | // ok 227 | #else 228 | #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." 229 | #endif 230 | 231 | #ifndef STBIW_MALLOC 232 | #define STBIW_MALLOC(sz) malloc(sz) 233 | #define STBIW_REALLOC(p,newsz) realloc(p,newsz) 234 | #define STBIW_FREE(p) free(p) 235 | #endif 236 | 237 | #ifndef STBIW_REALLOC_SIZED 238 | #define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) 239 | #endif 240 | 241 | 242 | #ifndef STBIW_MEMMOVE 243 | #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) 244 | #endif 245 | 246 | 247 | #ifndef STBIW_ASSERT 248 | #include 249 | #define STBIW_ASSERT(x) assert(x) 250 | #endif 251 | 252 | #define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) 253 | 254 | #ifdef STB_IMAGE_WRITE_STATIC 255 | static int stbi__flip_vertically_on_write=0; 256 | static int stbi_write_png_compression_level = 8; 257 | static int stbi_write_tga_with_rle = 1; 258 | static int stbi_write_force_png_filter = -1; 259 | #else 260 | int stbi_write_png_compression_level = 8; 261 | int stbi__flip_vertically_on_write=0; 262 | int stbi_write_tga_with_rle = 1; 263 | int stbi_write_force_png_filter = -1; 264 | #endif 265 | 266 | STBIWDEF void stbi_flip_vertically_on_write(int flag) 267 | { 268 | stbi__flip_vertically_on_write = flag; 269 | } 270 | 271 | typedef struct 272 | { 273 | stbi_write_func *func; 274 | void *context; 275 | } stbi__write_context; 276 | 277 | // initialize a callback-based context 278 | static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) 279 | { 280 | s->func = c; 281 | s->context = context; 282 | } 283 | 284 | #ifndef STBI_WRITE_NO_STDIO 285 | 286 | static void stbi__stdio_write(void *context, void *data, int size) 287 | { 288 | fwrite(data,1,size,(FILE*) context); 289 | } 290 | 291 | #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8) 292 | #ifdef __cplusplus 293 | #define STBIW_EXTERN extern "C" 294 | #else 295 | #define STBIW_EXTERN extern 296 | #endif 297 | STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); 298 | STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); 299 | 300 | STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) 301 | { 302 | return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); 303 | } 304 | #endif 305 | 306 | static FILE *stbiw__fopen(char const *filename, char const *mode) 307 | { 308 | FILE *f; 309 | #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8) 310 | wchar_t wMode[64]; 311 | wchar_t wFilename[1024]; 312 | if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename))) 313 | return 0; 314 | 315 | if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode))) 316 | return 0; 317 | 318 | #if _MSC_VER >= 1400 319 | if (0 != _wfopen_s(&f, wFilename, wMode)) 320 | f = 0; 321 | #else 322 | f = _wfopen(wFilename, wMode); 323 | #endif 324 | 325 | #elif defined(_MSC_VER) && _MSC_VER >= 1400 326 | if (0 != fopen_s(&f, filename, mode)) 327 | f=0; 328 | #else 329 | f = fopen(filename, mode); 330 | #endif 331 | return f; 332 | } 333 | 334 | static int stbi__start_write_file(stbi__write_context *s, const char *filename) 335 | { 336 | FILE *f = stbiw__fopen(filename, "wb"); 337 | stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); 338 | return f != NULL; 339 | } 340 | 341 | static void stbi__end_write_file(stbi__write_context *s) 342 | { 343 | fclose((FILE *)s->context); 344 | } 345 | 346 | #endif // !STBI_WRITE_NO_STDIO 347 | 348 | typedef unsigned int stbiw_uint32; 349 | typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; 350 | 351 | static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) 352 | { 353 | while (*fmt) { 354 | switch (*fmt++) { 355 | case ' ': break; 356 | case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); 357 | s->func(s->context,&x,1); 358 | break; } 359 | case '2': { int x = va_arg(v,int); 360 | unsigned char b[2]; 361 | b[0] = STBIW_UCHAR(x); 362 | b[1] = STBIW_UCHAR(x>>8); 363 | s->func(s->context,b,2); 364 | break; } 365 | case '4': { stbiw_uint32 x = va_arg(v,int); 366 | unsigned char b[4]; 367 | b[0]=STBIW_UCHAR(x); 368 | b[1]=STBIW_UCHAR(x>>8); 369 | b[2]=STBIW_UCHAR(x>>16); 370 | b[3]=STBIW_UCHAR(x>>24); 371 | s->func(s->context,b,4); 372 | break; } 373 | default: 374 | STBIW_ASSERT(0); 375 | return; 376 | } 377 | } 378 | } 379 | 380 | static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) 381 | { 382 | va_list v; 383 | va_start(v, fmt); 384 | stbiw__writefv(s, fmt, v); 385 | va_end(v); 386 | } 387 | 388 | static void stbiw__putc(stbi__write_context *s, unsigned char c) 389 | { 390 | s->func(s->context, &c, 1); 391 | } 392 | 393 | static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) 394 | { 395 | unsigned char arr[3]; 396 | arr[0] = a; arr[1] = b; arr[2] = c; 397 | s->func(s->context, arr, 3); 398 | } 399 | 400 | static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) 401 | { 402 | unsigned char bg[3] = { 255, 0, 255}, px[3]; 403 | int k; 404 | 405 | if (write_alpha < 0) 406 | s->func(s->context, &d[comp - 1], 1); 407 | 408 | switch (comp) { 409 | case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case 410 | case 1: 411 | if (expand_mono) 412 | stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp 413 | else 414 | s->func(s->context, d, 1); // monochrome TGA 415 | break; 416 | case 4: 417 | if (!write_alpha) { 418 | // composite against pink background 419 | for (k = 0; k < 3; ++k) 420 | px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; 421 | stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); 422 | break; 423 | } 424 | /* FALLTHROUGH */ 425 | case 3: 426 | stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); 427 | break; 428 | } 429 | if (write_alpha > 0) 430 | s->func(s->context, &d[comp - 1], 1); 431 | } 432 | 433 | static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) 434 | { 435 | stbiw_uint32 zero = 0; 436 | int i,j, j_end; 437 | 438 | if (y <= 0) 439 | return; 440 | 441 | if (stbi__flip_vertically_on_write) 442 | vdir *= -1; 443 | 444 | if (vdir < 0) { 445 | j_end = -1; j = y-1; 446 | } else { 447 | j_end = y; j = 0; 448 | } 449 | 450 | for (; j != j_end; j += vdir) { 451 | for (i=0; i < x; ++i) { 452 | unsigned char *d = (unsigned char *) data + (j*x+i)*comp; 453 | stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); 454 | } 455 | s->func(s->context, &zero, scanline_pad); 456 | } 457 | } 458 | 459 | static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) 460 | { 461 | if (y < 0 || x < 0) { 462 | return 0; 463 | } else { 464 | va_list v; 465 | va_start(v, fmt); 466 | stbiw__writefv(s, fmt, v); 467 | va_end(v); 468 | stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); 469 | return 1; 470 | } 471 | } 472 | 473 | static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) 474 | { 475 | int pad = (-x*3) & 3; 476 | return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, 477 | "11 4 22 4" "4 44 22 444444", 478 | 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header 479 | 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header 480 | } 481 | 482 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) 483 | { 484 | stbi__write_context s; 485 | stbi__start_write_callbacks(&s, func, context); 486 | return stbi_write_bmp_core(&s, x, y, comp, data); 487 | } 488 | 489 | #ifndef STBI_WRITE_NO_STDIO 490 | STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) 491 | { 492 | stbi__write_context s; 493 | if (stbi__start_write_file(&s,filename)) { 494 | int r = stbi_write_bmp_core(&s, x, y, comp, data); 495 | stbi__end_write_file(&s); 496 | return r; 497 | } else 498 | return 0; 499 | } 500 | #endif //!STBI_WRITE_NO_STDIO 501 | 502 | static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) 503 | { 504 | int has_alpha = (comp == 2 || comp == 4); 505 | int colorbytes = has_alpha ? comp-1 : comp; 506 | int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 507 | 508 | if (y < 0 || x < 0) 509 | return 0; 510 | 511 | if (!stbi_write_tga_with_rle) { 512 | return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, 513 | "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); 514 | } else { 515 | int i,j,k; 516 | int jend, jdir; 517 | 518 | stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); 519 | 520 | if (stbi__flip_vertically_on_write) { 521 | j = 0; 522 | jend = y; 523 | jdir = 1; 524 | } else { 525 | j = y-1; 526 | jend = -1; 527 | jdir = -1; 528 | } 529 | for (; j != jend; j += jdir) { 530 | unsigned char *row = (unsigned char *) data + j * x * comp; 531 | int len; 532 | 533 | for (i = 0; i < x; i += len) { 534 | unsigned char *begin = row + i * comp; 535 | int diff = 1; 536 | len = 1; 537 | 538 | if (i < x - 1) { 539 | ++len; 540 | diff = memcmp(begin, row + (i + 1) * comp, comp); 541 | if (diff) { 542 | const unsigned char *prev = begin; 543 | for (k = i + 2; k < x && len < 128; ++k) { 544 | if (memcmp(prev, row + k * comp, comp)) { 545 | prev += comp; 546 | ++len; 547 | } else { 548 | --len; 549 | break; 550 | } 551 | } 552 | } else { 553 | for (k = i + 2; k < x && len < 128; ++k) { 554 | if (!memcmp(begin, row + k * comp, comp)) { 555 | ++len; 556 | } else { 557 | break; 558 | } 559 | } 560 | } 561 | } 562 | 563 | if (diff) { 564 | unsigned char header = STBIW_UCHAR(len - 1); 565 | s->func(s->context, &header, 1); 566 | for (k = 0; k < len; ++k) { 567 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); 568 | } 569 | } else { 570 | unsigned char header = STBIW_UCHAR(len - 129); 571 | s->func(s->context, &header, 1); 572 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); 573 | } 574 | } 575 | } 576 | } 577 | return 1; 578 | } 579 | 580 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) 581 | { 582 | stbi__write_context s; 583 | stbi__start_write_callbacks(&s, func, context); 584 | return stbi_write_tga_core(&s, x, y, comp, (void *) data); 585 | } 586 | 587 | #ifndef STBI_WRITE_NO_STDIO 588 | STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) 589 | { 590 | stbi__write_context s; 591 | if (stbi__start_write_file(&s,filename)) { 592 | int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); 593 | stbi__end_write_file(&s); 594 | return r; 595 | } else 596 | return 0; 597 | } 598 | #endif 599 | 600 | // ************************************************************************************************* 601 | // Radiance RGBE HDR writer 602 | // by Baldur Karlsson 603 | 604 | #define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) 605 | 606 | static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) 607 | { 608 | int exponent; 609 | float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); 610 | 611 | if (maxcomp < 1e-32f) { 612 | rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; 613 | } else { 614 | float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; 615 | 616 | rgbe[0] = (unsigned char)(linear[0] * normalize); 617 | rgbe[1] = (unsigned char)(linear[1] * normalize); 618 | rgbe[2] = (unsigned char)(linear[2] * normalize); 619 | rgbe[3] = (unsigned char)(exponent + 128); 620 | } 621 | } 622 | 623 | static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) 624 | { 625 | unsigned char lengthbyte = STBIW_UCHAR(length+128); 626 | STBIW_ASSERT(length+128 <= 255); 627 | s->func(s->context, &lengthbyte, 1); 628 | s->func(s->context, &databyte, 1); 629 | } 630 | 631 | static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) 632 | { 633 | unsigned char lengthbyte = STBIW_UCHAR(length); 634 | STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code 635 | s->func(s->context, &lengthbyte, 1); 636 | s->func(s->context, data, length); 637 | } 638 | 639 | static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) 640 | { 641 | unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; 642 | unsigned char rgbe[4]; 643 | float linear[3]; 644 | int x; 645 | 646 | scanlineheader[2] = (width&0xff00)>>8; 647 | scanlineheader[3] = (width&0x00ff); 648 | 649 | /* skip RLE for images too small or large */ 650 | if (width < 8 || width >= 32768) { 651 | for (x=0; x < width; x++) { 652 | switch (ncomp) { 653 | case 4: /* fallthrough */ 654 | case 3: linear[2] = scanline[x*ncomp + 2]; 655 | linear[1] = scanline[x*ncomp + 1]; 656 | linear[0] = scanline[x*ncomp + 0]; 657 | break; 658 | default: 659 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; 660 | break; 661 | } 662 | stbiw__linear_to_rgbe(rgbe, linear); 663 | s->func(s->context, rgbe, 4); 664 | } 665 | } else { 666 | int c,r; 667 | /* encode into scratch buffer */ 668 | for (x=0; x < width; x++) { 669 | switch(ncomp) { 670 | case 4: /* fallthrough */ 671 | case 3: linear[2] = scanline[x*ncomp + 2]; 672 | linear[1] = scanline[x*ncomp + 1]; 673 | linear[0] = scanline[x*ncomp + 0]; 674 | break; 675 | default: 676 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; 677 | break; 678 | } 679 | stbiw__linear_to_rgbe(rgbe, linear); 680 | scratch[x + width*0] = rgbe[0]; 681 | scratch[x + width*1] = rgbe[1]; 682 | scratch[x + width*2] = rgbe[2]; 683 | scratch[x + width*3] = rgbe[3]; 684 | } 685 | 686 | s->func(s->context, scanlineheader, 4); 687 | 688 | /* RLE each component separately */ 689 | for (c=0; c < 4; c++) { 690 | unsigned char *comp = &scratch[width*c]; 691 | 692 | x = 0; 693 | while (x < width) { 694 | // find first run 695 | r = x; 696 | while (r+2 < width) { 697 | if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) 698 | break; 699 | ++r; 700 | } 701 | if (r+2 >= width) 702 | r = width; 703 | // dump up to first run 704 | while (x < r) { 705 | int len = r-x; 706 | if (len > 128) len = 128; 707 | stbiw__write_dump_data(s, len, &comp[x]); 708 | x += len; 709 | } 710 | // if there's a run, output it 711 | if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd 712 | // find next byte after run 713 | while (r < width && comp[r] == comp[x]) 714 | ++r; 715 | // output run up to r 716 | while (x < r) { 717 | int len = r-x; 718 | if (len > 127) len = 127; 719 | stbiw__write_run_data(s, len, comp[x]); 720 | x += len; 721 | } 722 | } 723 | } 724 | } 725 | } 726 | } 727 | 728 | static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) 729 | { 730 | if (y <= 0 || x <= 0 || data == NULL) 731 | return 0; 732 | else { 733 | // Each component is stored separately. Allocate scratch space for full output scanline. 734 | unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); 735 | int i, len; 736 | char buffer[128]; 737 | char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; 738 | s->func(s->context, header, sizeof(header)-1); 739 | 740 | #ifdef __STDC_WANT_SECURE_LIB__ 741 | len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); 742 | #else 743 | len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); 744 | #endif 745 | s->func(s->context, buffer, len); 746 | 747 | for(i=0; i < y; i++) 748 | stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)); 749 | STBIW_FREE(scratch); 750 | return 1; 751 | } 752 | } 753 | 754 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) 755 | { 756 | stbi__write_context s; 757 | stbi__start_write_callbacks(&s, func, context); 758 | return stbi_write_hdr_core(&s, x, y, comp, (float *) data); 759 | } 760 | 761 | #ifndef STBI_WRITE_NO_STDIO 762 | STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) 763 | { 764 | stbi__write_context s; 765 | if (stbi__start_write_file(&s,filename)) { 766 | int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); 767 | stbi__end_write_file(&s); 768 | return r; 769 | } else 770 | return 0; 771 | } 772 | #endif // STBI_WRITE_NO_STDIO 773 | 774 | 775 | ////////////////////////////////////////////////////////////////////////////// 776 | // 777 | // PNG writer 778 | // 779 | 780 | #ifndef STBIW_ZLIB_COMPRESS 781 | // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() 782 | #define stbiw__sbraw(a) ((int *) (a) - 2) 783 | #define stbiw__sbm(a) stbiw__sbraw(a)[0] 784 | #define stbiw__sbn(a) stbiw__sbraw(a)[1] 785 | 786 | #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) 787 | #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) 788 | #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) 789 | 790 | #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) 791 | #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) 792 | #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) 793 | 794 | static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) 795 | { 796 | int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; 797 | void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); 798 | STBIW_ASSERT(p); 799 | if (p) { 800 | if (!*arr) ((int *) p)[1] = 0; 801 | *arr = (void *) ((int *) p + 2); 802 | stbiw__sbm(*arr) = m; 803 | } 804 | return *arr; 805 | } 806 | 807 | static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) 808 | { 809 | while (*bitcount >= 8) { 810 | stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); 811 | *bitbuffer >>= 8; 812 | *bitcount -= 8; 813 | } 814 | return data; 815 | } 816 | 817 | static int stbiw__zlib_bitrev(int code, int codebits) 818 | { 819 | int res=0; 820 | while (codebits--) { 821 | res = (res << 1) | (code & 1); 822 | code >>= 1; 823 | } 824 | return res; 825 | } 826 | 827 | static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) 828 | { 829 | int i; 830 | for (i=0; i < limit && i < 258; ++i) 831 | if (a[i] != b[i]) break; 832 | return i; 833 | } 834 | 835 | static unsigned int stbiw__zhash(unsigned char *data) 836 | { 837 | stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); 838 | hash ^= hash << 3; 839 | hash += hash >> 5; 840 | hash ^= hash << 4; 841 | hash += hash >> 17; 842 | hash ^= hash << 25; 843 | hash += hash >> 6; 844 | return hash; 845 | } 846 | 847 | #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) 848 | #define stbiw__zlib_add(code,codebits) \ 849 | (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) 850 | #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) 851 | // default huffman tables 852 | #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) 853 | #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) 854 | #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) 855 | #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) 856 | #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) 857 | #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) 858 | 859 | #define stbiw__ZHASH 16384 860 | 861 | #endif // STBIW_ZLIB_COMPRESS 862 | 863 | STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) 864 | { 865 | #ifdef STBIW_ZLIB_COMPRESS 866 | // user provided a zlib compress implementation, use that 867 | return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); 868 | #else // use builtin 869 | static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; 870 | static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; 871 | static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; 872 | static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; 873 | unsigned int bitbuf=0; 874 | int i,j, bitcount=0; 875 | unsigned char *out = NULL; 876 | unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); 877 | if (hash_table == NULL) 878 | return NULL; 879 | if (quality < 5) quality = 5; 880 | 881 | stbiw__sbpush(out, 0x78); // DEFLATE 32K window 882 | stbiw__sbpush(out, 0x5e); // FLEVEL = 1 883 | stbiw__zlib_add(1,1); // BFINAL = 1 884 | stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman 885 | 886 | for (i=0; i < stbiw__ZHASH; ++i) 887 | hash_table[i] = NULL; 888 | 889 | i=0; 890 | while (i < data_len-3) { 891 | // hash next 3 bytes of data to be compressed 892 | int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; 893 | unsigned char *bestloc = 0; 894 | unsigned char **hlist = hash_table[h]; 895 | int n = stbiw__sbcount(hlist); 896 | for (j=0; j < n; ++j) { 897 | if (hlist[j]-data > i-32768) { // if entry lies within window 898 | int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); 899 | if (d >= best) { best=d; bestloc=hlist[j]; } 900 | } 901 | } 902 | // when hash table entry is too long, delete half the entries 903 | if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { 904 | STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); 905 | stbiw__sbn(hash_table[h]) = quality; 906 | } 907 | stbiw__sbpush(hash_table[h],data+i); 908 | 909 | if (bestloc) { 910 | // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal 911 | h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); 912 | hlist = hash_table[h]; 913 | n = stbiw__sbcount(hlist); 914 | for (j=0; j < n; ++j) { 915 | if (hlist[j]-data > i-32767) { 916 | int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); 917 | if (e > best) { // if next match is better, bail on current match 918 | bestloc = NULL; 919 | break; 920 | } 921 | } 922 | } 923 | } 924 | 925 | if (bestloc) { 926 | int d = (int) (data+i - bestloc); // distance back 927 | STBIW_ASSERT(d <= 32767 && best <= 258); 928 | for (j=0; best > lengthc[j+1]-1; ++j); 929 | stbiw__zlib_huff(j+257); 930 | if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); 931 | for (j=0; d > distc[j+1]-1; ++j); 932 | stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); 933 | if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); 934 | i += best; 935 | } else { 936 | stbiw__zlib_huffb(data[i]); 937 | ++i; 938 | } 939 | } 940 | // write out final bytes 941 | for (;i < data_len; ++i) 942 | stbiw__zlib_huffb(data[i]); 943 | stbiw__zlib_huff(256); // end of block 944 | // pad with 0 bits to byte boundary 945 | while (bitcount) 946 | stbiw__zlib_add(0,1); 947 | 948 | for (i=0; i < stbiw__ZHASH; ++i) 949 | (void) stbiw__sbfree(hash_table[i]); 950 | STBIW_FREE(hash_table); 951 | 952 | { 953 | // compute adler32 on input 954 | unsigned int s1=1, s2=0; 955 | int blocklen = (int) (data_len % 5552); 956 | j=0; 957 | while (j < data_len) { 958 | for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; } 959 | s1 %= 65521; s2 %= 65521; 960 | j += blocklen; 961 | blocklen = 5552; 962 | } 963 | stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); 964 | stbiw__sbpush(out, STBIW_UCHAR(s2)); 965 | stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); 966 | stbiw__sbpush(out, STBIW_UCHAR(s1)); 967 | } 968 | *out_len = stbiw__sbn(out); 969 | // make returned pointer freeable 970 | STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); 971 | return (unsigned char *) stbiw__sbraw(out); 972 | #endif // STBIW_ZLIB_COMPRESS 973 | } 974 | 975 | static unsigned int stbiw__crc32(unsigned char *buffer, int len) 976 | { 977 | #ifdef STBIW_CRC32 978 | return STBIW_CRC32(buffer, len); 979 | #else 980 | static unsigned int crc_table[256] = 981 | { 982 | 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 983 | 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 984 | 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 985 | 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 986 | 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 987 | 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 988 | 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 989 | 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 990 | 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 991 | 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 992 | 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 993 | 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 994 | 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 995 | 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 996 | 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 997 | 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 998 | 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 999 | 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 1000 | 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 1001 | 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 1002 | 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 1003 | 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 1004 | 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 1005 | 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 1006 | 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 1007 | 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 1008 | 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 1009 | 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 1010 | 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 1011 | 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 1012 | 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 1013 | 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D 1014 | }; 1015 | 1016 | unsigned int crc = ~0u; 1017 | int i; 1018 | for (i=0; i < len; ++i) 1019 | crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; 1020 | return ~crc; 1021 | #endif 1022 | } 1023 | 1024 | #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) 1025 | #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); 1026 | #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) 1027 | 1028 | static void stbiw__wpcrc(unsigned char **data, int len) 1029 | { 1030 | unsigned int crc = stbiw__crc32(*data - len - 4, len+4); 1031 | stbiw__wp32(*data, crc); 1032 | } 1033 | 1034 | static unsigned char stbiw__paeth(int a, int b, int c) 1035 | { 1036 | int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); 1037 | if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); 1038 | if (pb <= pc) return STBIW_UCHAR(b); 1039 | return STBIW_UCHAR(c); 1040 | } 1041 | 1042 | // @OPTIMIZE: provide an option that always forces left-predict or paeth predict 1043 | static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) 1044 | { 1045 | static int mapping[] = { 0,1,2,3,4 }; 1046 | static int firstmap[] = { 0,1,0,5,6 }; 1047 | int *mymap = (y != 0) ? mapping : firstmap; 1048 | int i; 1049 | int type = mymap[filter_type]; 1050 | unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); 1051 | int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; 1052 | 1053 | if (type==0) { 1054 | memcpy(line_buffer, z, width*n); 1055 | return; 1056 | } 1057 | 1058 | // first loop isn't optimized since it's just one pixel 1059 | for (i = 0; i < n; ++i) { 1060 | switch (type) { 1061 | case 1: line_buffer[i] = z[i]; break; 1062 | case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; 1063 | case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; 1064 | case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; 1065 | case 5: line_buffer[i] = z[i]; break; 1066 | case 6: line_buffer[i] = z[i]; break; 1067 | } 1068 | } 1069 | switch (type) { 1070 | case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break; 1071 | case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break; 1072 | case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; 1073 | case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; 1074 | case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break; 1075 | case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; 1076 | } 1077 | } 1078 | 1079 | STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) 1080 | { 1081 | int force_filter = stbi_write_force_png_filter; 1082 | int ctype[5] = { -1, 0, 4, 2, 6 }; 1083 | unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; 1084 | unsigned char *out,*o, *filt, *zlib; 1085 | signed char *line_buffer; 1086 | int j,zlen; 1087 | 1088 | if (stride_bytes == 0) 1089 | stride_bytes = x * n; 1090 | 1091 | if (force_filter >= 5) { 1092 | force_filter = -1; 1093 | } 1094 | 1095 | filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; 1096 | line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } 1097 | for (j=0; j < y; ++j) { 1098 | int filter_type; 1099 | if (force_filter > -1) { 1100 | filter_type = force_filter; 1101 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer); 1102 | } else { // Estimate the best filter by running through all of them: 1103 | int best_filter = 0, best_filter_val = 0x7fffffff, est, i; 1104 | for (filter_type = 0; filter_type < 5; filter_type++) { 1105 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer); 1106 | 1107 | // Estimate the entropy of the line using this filter; the less, the better. 1108 | est = 0; 1109 | for (i = 0; i < x*n; ++i) { 1110 | est += abs((signed char) line_buffer[i]); 1111 | } 1112 | if (est < best_filter_val) { 1113 | best_filter_val = est; 1114 | best_filter = filter_type; 1115 | } 1116 | } 1117 | if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it 1118 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer); 1119 | filter_type = best_filter; 1120 | } 1121 | } 1122 | // when we get here, filter_type contains the filter type, and line_buffer contains the data 1123 | filt[j*(x*n+1)] = (unsigned char) filter_type; 1124 | STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); 1125 | } 1126 | STBIW_FREE(line_buffer); 1127 | zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); 1128 | STBIW_FREE(filt); 1129 | if (!zlib) return 0; 1130 | 1131 | // each tag requires 12 bytes of overhead 1132 | out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); 1133 | if (!out) return 0; 1134 | *out_len = 8 + 12+13 + 12+zlen + 12; 1135 | 1136 | o=out; 1137 | STBIW_MEMMOVE(o,sig,8); o+= 8; 1138 | stbiw__wp32(o, 13); // header length 1139 | stbiw__wptag(o, "IHDR"); 1140 | stbiw__wp32(o, x); 1141 | stbiw__wp32(o, y); 1142 | *o++ = 8; 1143 | *o++ = STBIW_UCHAR(ctype[n]); 1144 | *o++ = 0; 1145 | *o++ = 0; 1146 | *o++ = 0; 1147 | stbiw__wpcrc(&o,13); 1148 | 1149 | stbiw__wp32(o, zlen); 1150 | stbiw__wptag(o, "IDAT"); 1151 | STBIW_MEMMOVE(o, zlib, zlen); 1152 | o += zlen; 1153 | STBIW_FREE(zlib); 1154 | stbiw__wpcrc(&o, zlen); 1155 | 1156 | stbiw__wp32(o,0); 1157 | stbiw__wptag(o, "IEND"); 1158 | stbiw__wpcrc(&o,0); 1159 | 1160 | STBIW_ASSERT(o == out + *out_len); 1161 | 1162 | return out; 1163 | } 1164 | 1165 | #ifndef STBI_WRITE_NO_STDIO 1166 | STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) 1167 | { 1168 | FILE *f; 1169 | int len; 1170 | unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); 1171 | if (png == NULL) return 0; 1172 | 1173 | f = stbiw__fopen(filename, "wb"); 1174 | if (!f) { STBIW_FREE(png); return 0; } 1175 | fwrite(png, 1, len, f); 1176 | fclose(f); 1177 | STBIW_FREE(png); 1178 | return 1; 1179 | } 1180 | #endif 1181 | 1182 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) 1183 | { 1184 | int len; 1185 | unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); 1186 | if (png == NULL) return 0; 1187 | func(context, png, len); 1188 | STBIW_FREE(png); 1189 | return 1; 1190 | } 1191 | 1192 | 1193 | /* *************************************************************************** 1194 | * 1195 | * JPEG writer 1196 | * 1197 | * This is based on Jon Olick's jo_jpeg.cpp: 1198 | * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html 1199 | */ 1200 | 1201 | static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, 1202 | 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; 1203 | 1204 | static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { 1205 | int bitBuf = *bitBufP, bitCnt = *bitCntP; 1206 | bitCnt += bs[1]; 1207 | bitBuf |= bs[0] << (24 - bitCnt); 1208 | while(bitCnt >= 8) { 1209 | unsigned char c = (bitBuf >> 16) & 255; 1210 | stbiw__putc(s, c); 1211 | if(c == 255) { 1212 | stbiw__putc(s, 0); 1213 | } 1214 | bitBuf <<= 8; 1215 | bitCnt -= 8; 1216 | } 1217 | *bitBufP = bitBuf; 1218 | *bitCntP = bitCnt; 1219 | } 1220 | 1221 | static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { 1222 | float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; 1223 | float z1, z2, z3, z4, z5, z11, z13; 1224 | 1225 | float tmp0 = d0 + d7; 1226 | float tmp7 = d0 - d7; 1227 | float tmp1 = d1 + d6; 1228 | float tmp6 = d1 - d6; 1229 | float tmp2 = d2 + d5; 1230 | float tmp5 = d2 - d5; 1231 | float tmp3 = d3 + d4; 1232 | float tmp4 = d3 - d4; 1233 | 1234 | // Even part 1235 | float tmp10 = tmp0 + tmp3; // phase 2 1236 | float tmp13 = tmp0 - tmp3; 1237 | float tmp11 = tmp1 + tmp2; 1238 | float tmp12 = tmp1 - tmp2; 1239 | 1240 | d0 = tmp10 + tmp11; // phase 3 1241 | d4 = tmp10 - tmp11; 1242 | 1243 | z1 = (tmp12 + tmp13) * 0.707106781f; // c4 1244 | d2 = tmp13 + z1; // phase 5 1245 | d6 = tmp13 - z1; 1246 | 1247 | // Odd part 1248 | tmp10 = tmp4 + tmp5; // phase 2 1249 | tmp11 = tmp5 + tmp6; 1250 | tmp12 = tmp6 + tmp7; 1251 | 1252 | // The rotator is modified from fig 4-8 to avoid extra negations. 1253 | z5 = (tmp10 - tmp12) * 0.382683433f; // c6 1254 | z2 = tmp10 * 0.541196100f + z5; // c2-c6 1255 | z4 = tmp12 * 1.306562965f + z5; // c2+c6 1256 | z3 = tmp11 * 0.707106781f; // c4 1257 | 1258 | z11 = tmp7 + z3; // phase 5 1259 | z13 = tmp7 - z3; 1260 | 1261 | *d5p = z13 + z2; // phase 6 1262 | *d3p = z13 - z2; 1263 | *d1p = z11 + z4; 1264 | *d7p = z11 - z4; 1265 | 1266 | *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; 1267 | } 1268 | 1269 | static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { 1270 | int tmp1 = val < 0 ? -val : val; 1271 | val = val < 0 ? val-1 : val; 1272 | bits[1] = 1; 1273 | while(tmp1 >>= 1) { 1274 | ++bits[1]; 1275 | } 1276 | bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { 1314 | } 1315 | // end0pos = first element in reverse order !=0 1316 | if(end0pos == 0) { 1317 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); 1318 | return DU[0]; 1319 | } 1320 | for(i = 1; i <= end0pos; ++i) { 1321 | int startpos = i; 1322 | int nrzeroes; 1323 | unsigned short bits[2]; 1324 | for (; DU[i]==0 && i<=end0pos; ++i) { 1325 | } 1326 | nrzeroes = i-startpos; 1327 | if ( nrzeroes >= 16 ) { 1328 | int lng = nrzeroes>>4; 1329 | int nrmarker; 1330 | for (nrmarker=1; nrmarker <= lng; ++nrmarker) 1331 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); 1332 | nrzeroes &= 15; 1333 | } 1334 | stbiw__jpg_calcBits(DU[i], bits); 1335 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); 1336 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); 1337 | } 1338 | if(end0pos != 63) { 1339 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); 1340 | } 1341 | return DU[0]; 1342 | } 1343 | 1344 | static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { 1345 | // Constants that don't pollute global namespace 1346 | static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; 1347 | static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; 1348 | static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; 1349 | static const unsigned char std_ac_luminance_values[] = { 1350 | 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, 1351 | 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, 1352 | 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, 1353 | 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, 1354 | 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, 1355 | 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, 1356 | 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa 1357 | }; 1358 | static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; 1359 | static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; 1360 | static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; 1361 | static const unsigned char std_ac_chrominance_values[] = { 1362 | 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, 1363 | 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, 1364 | 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, 1365 | 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, 1366 | 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, 1367 | 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, 1368 | 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa 1369 | }; 1370 | // Huffman tables 1371 | static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; 1372 | static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; 1373 | static const unsigned short YAC_HT[256][2] = { 1374 | {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1375 | {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1376 | {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1377 | {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1378 | {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1379 | {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1380 | {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1381 | {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1382 | {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1383 | {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1384 | {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1385 | {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1386 | {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1387 | {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1388 | {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, 1389 | {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} 1390 | }; 1391 | static const unsigned short UVAC_HT[256][2] = { 1392 | {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1393 | {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1394 | {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1395 | {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1396 | {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1397 | {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1398 | {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1399 | {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1400 | {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1401 | {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1402 | {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1403 | {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1404 | {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1405 | {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 1406 | {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, 1407 | {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} 1408 | }; 1409 | static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, 1410 | 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; 1411 | static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, 1412 | 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; 1413 | static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1414 | 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; 1415 | 1416 | int row, col, i, k; 1417 | float fdtbl_Y[64], fdtbl_UV[64]; 1418 | unsigned char YTable[64], UVTable[64]; 1419 | 1420 | if(!data || !width || !height || comp > 4 || comp < 1) { 1421 | return 0; 1422 | } 1423 | 1424 | quality = quality ? quality : 90; 1425 | quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; 1426 | quality = quality < 50 ? 5000 / quality : 200 - quality * 2; 1427 | 1428 | for(i = 0; i < 64; ++i) { 1429 | int uvti, yti = (YQT[i]*quality+50)/100; 1430 | YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); 1431 | uvti = (UVQT[i]*quality+50)/100; 1432 | UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); 1433 | } 1434 | 1435 | for(row = 0, k = 0; row < 8; ++row) { 1436 | for(col = 0; col < 8; ++col, ++k) { 1437 | fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); 1438 | fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); 1439 | } 1440 | } 1441 | 1442 | // Write Headers 1443 | { 1444 | static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; 1445 | static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; 1446 | const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), 1447 | 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; 1448 | s->func(s->context, (void*)head0, sizeof(head0)); 1449 | s->func(s->context, (void*)YTable, sizeof(YTable)); 1450 | stbiw__putc(s, 1); 1451 | s->func(s->context, UVTable, sizeof(UVTable)); 1452 | s->func(s->context, (void*)head1, sizeof(head1)); 1453 | s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); 1454 | s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); 1455 | stbiw__putc(s, 0x10); // HTYACinfo 1456 | s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); 1457 | s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); 1458 | stbiw__putc(s, 1); // HTUDCinfo 1459 | s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); 1460 | s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); 1461 | stbiw__putc(s, 0x11); // HTUACinfo 1462 | s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); 1463 | s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); 1464 | s->func(s->context, (void*)head2, sizeof(head2)); 1465 | } 1466 | 1467 | // Encode 8x8 macroblocks 1468 | { 1469 | static const unsigned short fillBits[] = {0x7F, 7}; 1470 | const unsigned char *imageData = (const unsigned char *)data; 1471 | int DCY=0, DCU=0, DCV=0; 1472 | int bitBuf=0, bitCnt=0; 1473 | // comp == 2 is grey+alpha (alpha is ignored) 1474 | int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; 1475 | int x, y, pos; 1476 | for(y = 0; y < height; y += 8) { 1477 | for(x = 0; x < width; x += 8) { 1478 | float YDU[64], UDU[64], VDU[64]; 1479 | for(row = y, pos = 0; row < y+8; ++row) { 1480 | // row >= height => use last input row 1481 | int clamped_row = (row < height) ? row : height - 1; 1482 | int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; 1483 | for(col = x; col < x+8; ++col, ++pos) { 1484 | float r, g, b; 1485 | // if col >= width => use pixel from last input column 1486 | int p = base_p + ((col < width) ? col : (width-1))*comp; 1487 | 1488 | r = imageData[p+0]; 1489 | g = imageData[p+ofsG]; 1490 | b = imageData[p+ofsB]; 1491 | YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; 1492 | UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; 1493 | VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; 1494 | } 1495 | } 1496 | 1497 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); 1498 | DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); 1499 | DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); 1500 | } 1501 | } 1502 | 1503 | // Do the bit alignment of the EOI marker 1504 | stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); 1505 | } 1506 | 1507 | // EOI 1508 | stbiw__putc(s, 0xFF); 1509 | stbiw__putc(s, 0xD9); 1510 | 1511 | return 1; 1512 | } 1513 | 1514 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) 1515 | { 1516 | stbi__write_context s; 1517 | stbi__start_write_callbacks(&s, func, context); 1518 | return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); 1519 | } 1520 | 1521 | 1522 | #ifndef STBI_WRITE_NO_STDIO 1523 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) 1524 | { 1525 | stbi__write_context s; 1526 | if (stbi__start_write_file(&s,filename)) { 1527 | int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); 1528 | stbi__end_write_file(&s); 1529 | return r; 1530 | } else 1531 | return 0; 1532 | } 1533 | #endif 1534 | 1535 | #endif // STB_IMAGE_WRITE_IMPLEMENTATION 1536 | 1537 | /* Revision history 1538 | 1.10 (2019-02-07) 1539 | support utf8 filenames in Windows; fix warnings and platform ifdefs 1540 | 1.09 (2018-02-11) 1541 | fix typo in zlib quality API, improve STB_I_W_STATIC in C++ 1542 | 1.08 (2018-01-29) 1543 | add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter 1544 | 1.07 (2017-07-24) 1545 | doc fix 1546 | 1.06 (2017-07-23) 1547 | writing JPEG (using Jon Olick's code) 1548 | 1.05 ??? 1549 | 1.04 (2017-03-03) 1550 | monochrome BMP expansion 1551 | 1.03 ??? 1552 | 1.02 (2016-04-02) 1553 | avoid allocating large structures on the stack 1554 | 1.01 (2016-01-16) 1555 | STBIW_REALLOC_SIZED: support allocators with no realloc support 1556 | avoid race-condition in crc initialization 1557 | minor compile issues 1558 | 1.00 (2015-09-14) 1559 | installable file IO function 1560 | 0.99 (2015-09-13) 1561 | warning fixes; TGA rle support 1562 | 0.98 (2015-04-08) 1563 | added STBIW_MALLOC, STBIW_ASSERT etc 1564 | 0.97 (2015-01-18) 1565 | fixed HDR asserts, rewrote HDR rle logic 1566 | 0.96 (2015-01-17) 1567 | add HDR output 1568 | fix monochrome BMP 1569 | 0.95 (2014-08-17) 1570 | add monochrome TGA output 1571 | 0.94 (2014-05-31) 1572 | rename private functions to avoid conflicts with stb_image.h 1573 | 0.93 (2014-05-27) 1574 | warning fixes 1575 | 0.92 (2010-08-01) 1576 | casts to unsigned char to fix warnings 1577 | 0.91 (2010-07-17) 1578 | first public release 1579 | 0.90 first internal release 1580 | */ 1581 | 1582 | /* 1583 | ------------------------------------------------------------------------------ 1584 | This software is available under 2 licenses -- choose whichever you prefer. 1585 | ------------------------------------------------------------------------------ 1586 | ALTERNATIVE A - MIT License 1587 | Copyright (c) 2017 Sean Barrett 1588 | Permission is hereby granted, free of charge, to any person obtaining a copy of 1589 | this software and associated documentation files (the "Software"), to deal in 1590 | the Software without restriction, including without limitation the rights to 1591 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 1592 | of the Software, and to permit persons to whom the Software is furnished to do 1593 | so, subject to the following conditions: 1594 | The above copyright notice and this permission notice shall be included in all 1595 | copies or substantial portions of the Software. 1596 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1597 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1598 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1599 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1600 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1601 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 1602 | SOFTWARE. 1603 | ------------------------------------------------------------------------------ 1604 | ALTERNATIVE B - Public Domain (www.unlicense.org) 1605 | This is free and unencumbered software released into the public domain. 1606 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 1607 | software, either in source code form or as a compiled binary, for any purpose, 1608 | commercial or non-commercial, and by any means. 1609 | In jurisdictions that recognize copyright laws, the author or authors of this 1610 | software dedicate any and all copyright interest in the software to the public 1611 | domain. We make this dedication for the benefit of the public at large and to 1612 | the detriment of our heirs and successors. We intend this dedication to be an 1613 | overt act of relinquishment in perpetuity of all present and future rights to 1614 | this software under copyright law. 1615 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1616 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1617 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1618 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 1619 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 1620 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 1621 | ------------------------------------------------------------------------------ 1622 | */ 1623 | -------------------------------------------------------------------------------- /source/main.cpp: -------------------------------------------------------------------------------- 1 | // main program entry point and the actual raytracing bits 2 | 3 | #include "maths.h" 4 | #include "scene.h" 5 | #include 6 | 7 | // Include external libraries: 8 | // - PNG writing 9 | #define STBI_MSC_SECURE_CRT 10 | #define STB_IMAGE_WRITE_IMPLEMENTATION 11 | #include "external/stb_image_write.h" 12 | // - time measurement 13 | #define SOKOL_IMPL 14 | #include "external/sokol_time.h" 15 | // - OBJ file loading 16 | #include "external/objparser.h" 17 | // - Multi-threaded job system 18 | #include "external/enkits/TaskScheduler_c.h" 19 | 20 | 21 | // -------------------------------------------------------------------------- 22 | // "ray/path tracing" bits 23 | 24 | // general minimum/maximum distances for rays (from "very close to surface but not exacttly on it" 25 | // to "ten million units") 26 | const float kMinT = 0.001f; 27 | const float kMaxT = 1.0e7f; 28 | // maximum raytracing recursion depth, i.e. number of light bounces 29 | const int kMaxDepth = 10; 30 | 31 | // we have one hardcoded directional light, with this direction and color 32 | static const float3 kLightDir = normalize(float3(-0.7f,1.0f,0.5f)); 33 | static const float3 kLightColor = float3(0.7f,0.6f,0.5f); 34 | 35 | 36 | // when a ray "r" has just hit a surface at point "hit", decide what to do about it: 37 | // in our very simple case, we assume the surface is perfectly diffuse, so we'll return: 38 | // - surface albedo ("color") in "attenuation" 39 | // - new random ray for the next light bounce in "scattered" 40 | // - illumination from the directional light in "outLightE" 41 | static bool Scatter(const Ray& r, const Hit& hit, float3& attenuation, Ray& scattered, float3& outLightE, uint32_t& rngState, int& inoutRayCount) 42 | { 43 | outLightE = float3(0,0,0); 44 | 45 | // model a perfectly diffuse material: 46 | 47 | // random point on unit sphere that is tangent to the hit point 48 | float3 target = hit.pos + hit.normal + RandomUnitVector(rngState); 49 | scattered = Ray(hit.pos, normalize(target - hit.pos)); 50 | 51 | // make color slightly based on surface normals 52 | float3 albedo = hit.normal * 0.0f + float3(0.7f,0.7f,0.7f); 53 | attenuation = albedo; 54 | 55 | // explicit directional light by shooting a shadow ray 56 | ++inoutRayCount; 57 | if (!HitSceneShadow(Ray(hit.pos, kLightDir), kMinT, kMaxT)) 58 | { 59 | // ray towards the light did not hit anything in the scene, so 60 | // that means we are not in shadow: compute illumination from it 61 | float3 rdir = r.dir; 62 | AssertUnit(rdir); 63 | float3 nl = dot(hit.normal, rdir) < 0 ? hit.normal : -hit.normal; 64 | outLightE += albedo * kLightColor * (fmax(0.0f, dot(kLightDir, nl))); 65 | } 66 | 67 | return true; 68 | } 69 | 70 | 71 | // trace a ray into the scene, and return the final color for it 72 | static float3 Trace(const Ray& r, int depth, uint32_t& rngState, int& inoutRayCount) 73 | { 74 | ++inoutRayCount; 75 | Hit hit; 76 | int id = HitScene(r, kMinT, kMaxT, hit); 77 | if (id != -1) 78 | { 79 | // ray hits something in the scene 80 | Ray scattered; 81 | float3 attenuation; 82 | float3 lightE; 83 | if (depth < kMaxDepth && Scatter(r, hit, attenuation, scattered, lightE, rngState, inoutRayCount)) 84 | { 85 | // we got a new ray bounced from the surface; recursively trace it 86 | return lightE + attenuation * Trace(scattered, depth+1, rngState, inoutRayCount); 87 | } 88 | else 89 | { 90 | // reached recursion limit, or surface fully absorbed the ray: return black 91 | return float3(0,0,0); 92 | } 93 | } 94 | else 95 | { 96 | // ray does not hit anything: return illumination from the sky (just a simple gradient really) 97 | float3 unitDir = r.dir; 98 | float t = 0.5f*(unitDir.getY() + 1.0f); 99 | return ((1.0f - t)*float3(1.0f, 1.0f, 1.0f) + t * float3(0.5f, 0.7f, 1.0f)) * 0.5f; 100 | } 101 | } 102 | 103 | 104 | // load scene from an .OBJ file 105 | static bool LoadScene(const char* dataFile, float3& outBoundsMin, float3& outBoundsMax) 106 | { 107 | ObjFile objFile; 108 | if (!objParseFile(objFile, dataFile)) 109 | { 110 | printf("ERROR: failed to load .obj file\n"); 111 | return false; 112 | } 113 | outBoundsMin = float3(+1.0e6f, +1.0e6f, +1.0e6f); 114 | outBoundsMax = float3(-1.0e6f, -1.0e6f, -1.0e6f); 115 | 116 | int objTriCount = int(objFile.f_size / 9); 117 | Triangle* tris = new Triangle[objTriCount + 2]; // will add two triangles for the "floor" 118 | for (int i = 0; i < objTriCount; ++i) 119 | { 120 | int idx0 = objFile.f[i * 9 + 0] * 3; 121 | int idx1 = objFile.f[i * 9 + 3] * 3; 122 | int idx2 = objFile.f[i * 9 + 6] * 3; 123 | float3 v0 = float3(objFile.v[idx0 + 0], objFile.v[idx0 + 1], objFile.v[idx0 + 2]); 124 | float3 v1 = float3(objFile.v[idx1 + 0], objFile.v[idx1 + 1], objFile.v[idx1 + 2]); 125 | float3 v2 = float3(objFile.v[idx2 + 0], objFile.v[idx2 + 1], objFile.v[idx2 + 2]); 126 | tris[i].v0 = v0; 127 | tris[i].v1 = v1; 128 | tris[i].v2 = v2; 129 | outBoundsMin = min(outBoundsMin, v0); outBoundsMax = max(outBoundsMax, v0); 130 | outBoundsMin = min(outBoundsMin, v1); outBoundsMax = max(outBoundsMax, v1); 131 | outBoundsMin = min(outBoundsMin, v2); outBoundsMax = max(outBoundsMax, v2); 132 | } 133 | 134 | // add two triangles that are right "under the scene" and covering larger area than the scene 135 | // itself, to serve as a "floor" 136 | float3 size = outBoundsMax - outBoundsMin; 137 | float3 extra = size * 0.7f; 138 | tris[objTriCount+0].v0 = float3(outBoundsMin.getX()-extra.getX(), outBoundsMin.getY(), outBoundsMin.getZ()-extra.getZ()); 139 | tris[objTriCount+0].v1 = float3(outBoundsMin.getX()-extra.getX(), outBoundsMin.getY(), outBoundsMax.getZ()+extra.getZ()); 140 | tris[objTriCount+0].v2 = float3(outBoundsMax.getX()+extra.getX(), outBoundsMin.getY(), outBoundsMin.getZ()-extra.getZ()); 141 | tris[objTriCount+1].v0 = float3(outBoundsMin.getX()-extra.getX(), outBoundsMin.getY(), outBoundsMax.getZ()+extra.getZ()); 142 | tris[objTriCount+1].v1 = float3(outBoundsMax.getX()+extra.getX(), outBoundsMin.getY(), outBoundsMax.getZ()+extra.getZ()); 143 | tris[objTriCount+1].v2 = float3(outBoundsMax.getX()+extra.getX(), outBoundsMin.getY(), outBoundsMin.getZ()-extra.getZ()); 144 | 145 | uint64_t t0 = stm_now(); 146 | InitializeScene(objTriCount + 2, tris); 147 | printf("Initialized scene '%s' (%i tris) in %.3fs\n", dataFile, objTriCount+2, stm_sec(stm_since(t0))); 148 | 149 | delete[] tris; 150 | return true; 151 | } 152 | 153 | struct TraceData 154 | { 155 | int screenWidth, screenHeight, samplesPerPixel; 156 | uint8_t* image; 157 | const Camera* camera; 158 | std::atomic rayCount; 159 | }; 160 | 161 | static void TraceImageJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_) 162 | { 163 | (void)threadnum; 164 | TraceData& data = *(TraceData*)data_; 165 | uint8_t* image = data.image + start * data.screenWidth * 4; 166 | 167 | float invWidth = 1.0f / data.screenWidth; 168 | float invHeight = 1.0f / data.screenHeight; 169 | 170 | int rayCount = 0; 171 | // go over the image: each pixel row 172 | for (uint32_t y = start; y < end; ++y) 173 | { 174 | // go over the image: each pixel in the row 175 | uint32_t rngState = y * 9781 + 1; 176 | for (int x = 0; x < data.screenWidth; ++x) 177 | { 178 | float3 col(0, 0, 0); 179 | // we'll trace N slightly jittered rays for each pixel, to get anti-aliasing, loop over them here 180 | for (int s = 0; s < data.samplesPerPixel; s++) 181 | { 182 | // get a ray from camera, and trace it 183 | float u = float(x + RandomFloat01(rngState)) * invWidth; 184 | float v = float(y + RandomFloat01(rngState)) * invHeight; 185 | Ray r = data.camera->GetRay(u, v, rngState); 186 | col += Trace(r, 0, rngState, rayCount); 187 | } 188 | col *= 1.0f / float(data.samplesPerPixel); 189 | 190 | // simplistic "gamma correction" by just taking a square root of the final color 191 | col.setX(sqrtf(col.getX())); 192 | col.setY(sqrtf(col.getY())); 193 | col.setZ(sqrtf(col.getZ())); 194 | 195 | // our image is bytes in 0-255 range, turn our floats into them here and write into the image 196 | image[0] = uint8_t(saturate(col.getX()) * 255.0f); 197 | image[1] = uint8_t(saturate(col.getY()) * 255.0f); 198 | image[2] = uint8_t(saturate(col.getZ()) * 255.0f); 199 | image[3] = 255; 200 | image += 4; 201 | } 202 | } 203 | data.rayCount += rayCount; 204 | } 205 | 206 | 207 | int main(int argc, const char** argv) 208 | { 209 | // initialize timer 210 | stm_setup(); 211 | 212 | // parse screen size command line arguments 213 | int screenWidth, screenHeight, samplesPerPixel; 214 | if (argc < 5) 215 | { 216 | printf("Usage: TrimeshTracer.exe [width] [height] [samplesPerPixel] [objFile]\n"); 217 | return 1; 218 | } 219 | screenWidth = atoi(argv[1]); 220 | if (screenWidth < 1 || screenWidth > 10000) 221 | { 222 | printf("ERROR: invalid width argument '%s'\n", argv[1]); 223 | return 1; 224 | } 225 | screenHeight = atoi(argv[2]); 226 | if (screenHeight < 1 || screenHeight > 10000) 227 | { 228 | printf("ERROR: invalid height argument '%s'\n", argv[2]); 229 | return 1; 230 | } 231 | samplesPerPixel = atoi(argv[3]); 232 | if (samplesPerPixel < 1 || samplesPerPixel > 1024) 233 | { 234 | printf("ERROR: invalid samplesPerPixel argument '%s'\n", argv[3]); 235 | return 1; 236 | } 237 | 238 | // load model file and initialize the scene 239 | float3 sceneMin, sceneMax; 240 | if (!LoadScene(argv[4], sceneMin, sceneMax)) 241 | return 1; 242 | 243 | // place a camera: put it a bit outside scene bounds, looking at the center of it 244 | float3 sceneSize = sceneMax - sceneMin; 245 | float3 sceneCenter = (sceneMin + sceneMax) * 0.5f; 246 | float3 lookfrom = sceneCenter + sceneSize * float3(0.3f,0.6f,1.2f); 247 | if (strstr(argv[4], "sponza.obj") != nullptr) // sponza looks bad when viewed from outside; hardcode camera position 248 | lookfrom = float3(-5.96f, 4.08f, -1.22f); 249 | float3 lookat = sceneCenter + sceneSize * float3(0,-0.1f,0); 250 | float distToFocus = length(lookfrom - lookat); 251 | float aperture = 0.03f; 252 | auto camera = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus); 253 | 254 | // create RGBA image for the result 255 | uint8_t* image = new uint8_t[screenWidth * screenHeight * 4]; 256 | 257 | // initialize job system for threading 258 | enkiTaskScheduler* jobSystem = enkiNewTaskScheduler(); 259 | enkiInitTaskScheduler(jobSystem); 260 | enkiTaskSet* job = enkiCreateTaskSet(jobSystem, TraceImageJob); 261 | 262 | // generate the image - spawn TraceImageJob jobs to cover the whole image, and wait for all of them to complete 263 | uint64_t t0 = stm_now(); 264 | 265 | TraceData data; 266 | data.screenWidth = screenWidth; 267 | data.screenHeight = screenHeight; 268 | data.samplesPerPixel = samplesPerPixel; 269 | data.image = image; 270 | data.camera = &camera; 271 | data.rayCount = 0; 272 | // for debugging: set to false to effectively turn off threading (will create one job invocation 273 | // only, that covers the whole screen) 274 | bool threaded = true; 275 | enkiAddTaskSetToPipeMinRange(jobSystem, job, &data, screenHeight, threaded ? 1 : screenHeight); 276 | enkiWaitForTaskSet(jobSystem, job); 277 | 278 | double dt = stm_sec(stm_since(t0)); 279 | printf("Rendered scene at %ix%i,%ispp in %.3f s\n", screenWidth, screenHeight, samplesPerPixel, dt); 280 | printf("- %.1f K Rays, %.1f K Rays/s\n", data.rayCount/1000.0, data.rayCount/1000.0/dt); 281 | 282 | // write resulting image as PNG 283 | stbi_flip_vertically_on_write(1); 284 | stbi_write_png("output.png", screenWidth, screenHeight, 4, image, screenWidth*4); 285 | 286 | // cleanup and exit 287 | delete[] image; 288 | enkiDeleteTaskSet(job); 289 | enkiDeleteTaskScheduler(jobSystem); 290 | CleanupScene(); 291 | return 0; 292 | } 293 | -------------------------------------------------------------------------------- /source/maths.cpp: -------------------------------------------------------------------------------- 1 | #include "maths.h" 2 | #include 3 | #include 4 | 5 | static uint32_t XorShift32(uint32_t& state) 6 | { 7 | uint32_t x = state; 8 | x ^= x << 13; 9 | x ^= x >> 17; 10 | x ^= x << 15; 11 | state = x; 12 | return x; 13 | } 14 | 15 | float RandomFloat01(uint32_t& state) 16 | { 17 | return (XorShift32(state) & 0xFFFFFF) / 16777216.0f; 18 | } 19 | 20 | float3 RandomInUnitDisk(uint32_t& state) 21 | { 22 | float3 p; 23 | do 24 | { 25 | p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0); 26 | } while (dot(p,p) >= 1.0); 27 | return p; 28 | } 29 | 30 | float3 RandomUnitVector(uint32_t& state) 31 | { 32 | float z = RandomFloat01(state) * 2.0f - 1.0f; 33 | float a = RandomFloat01(state) * 2.0f * kPI; 34 | float r = sqrtf(1.0f - z * z); 35 | float x = r * cosf(a); 36 | float y = r * sinf(a); 37 | return float3(x, y, z); 38 | } 39 | -------------------------------------------------------------------------------- /source/maths.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // -------------------------------------------------------------------------- 4 | // various math utilities 5 | 6 | #define NOMINMAX 7 | #include 8 | #include 9 | #include 10 | 11 | #define DO_FLOAT3_WITH_SIMD 1 12 | 13 | #define kPI 3.1415926f 14 | 15 | // -------------------------------------------------------------------------- 16 | // simple 3D vector with x,y,z components - both SIMD (SSE) and simple scalar C paths 17 | 18 | #if DO_FLOAT3_WITH_SIMD 19 | 20 | 21 | // ---- SSE implementation, largely based on http://www.codersnotes.com/notes/maths-lib-2016/ 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | // SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz). 28 | // SHUFFLE3(v, 0,0,0) splats the X (v.xxx). 29 | #define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X))) 30 | 31 | struct float3 32 | { 33 | inline float3() {} 34 | inline explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); } 35 | inline explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); } 36 | inline explicit float3(float v) { m = _mm_set1_ps(v); } 37 | inline explicit float3(__m128 v) { m = v; } 38 | 39 | inline float getX() const { return _mm_cvtss_f32(m); } 40 | inline float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); } 41 | inline float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); } 42 | 43 | inline float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); } 44 | inline float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); } 45 | 46 | inline void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); } 47 | 48 | void setX(float x) 49 | { 50 | m = _mm_move_ss(m, _mm_set_ss(x)); 51 | } 52 | void setY(float y) 53 | { 54 | __m128 t = _mm_move_ss(m, _mm_set_ss(y)); 55 | t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0)); 56 | m = _mm_move_ss(t, m); 57 | } 58 | void setZ(float z) 59 | { 60 | __m128 t = _mm_move_ss(m, _mm_set_ss(z)); 61 | t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0)); 62 | m = _mm_move_ss(t, m); 63 | } 64 | 65 | __m128 m; 66 | }; 67 | 68 | typedef float3 bool3; 69 | 70 | inline float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; } 71 | inline float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; } 72 | inline float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; } 73 | inline float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; } 74 | inline float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; } 75 | inline float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; } 76 | inline float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; } 77 | inline float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; } 78 | inline float3& operator+= (float3 &a, float3 b) { a = a + b; return a; } 79 | inline float3& operator-= (float3 &a, float3 b) { a = a - b; return a; } 80 | inline float3& operator*= (float3 &a, float3 b) { a = a * b; return a; } 81 | inline float3& operator/= (float3 &a, float3 b) { a = a / b; return a; } 82 | inline float3& operator*= (float3 &a, float b) { a = a * b; return a; } 83 | inline float3& operator/= (float3 &a, float b) { a = a / b; return a; } 84 | inline bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; } 85 | inline bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; } 86 | inline bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; } 87 | inline bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; } 88 | inline bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; } 89 | inline bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; } 90 | inline float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; } 91 | inline float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; } 92 | 93 | inline float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; } 94 | 95 | inline float hmin(float3 v) 96 | { 97 | v = min(v, SHUFFLE3(v, 1, 0, 2)); 98 | return min(v, SHUFFLE3(v, 2, 0, 1)).getX(); 99 | } 100 | inline float hmax(float3 v) 101 | { 102 | v = max(v, SHUFFLE3(v, 1, 0, 2)); 103 | return max(v, SHUFFLE3(v, 2, 0, 1)).getX(); 104 | } 105 | 106 | inline float3 cross(float3 a, float3 b) 107 | { 108 | // x <- a.y*b.z - a.z*b.y 109 | // y <- a.z*b.x - a.x*b.z 110 | // z <- a.x*b.y - a.y*b.x 111 | // We can save a shuffle by grouping it in this wacky order: 112 | return (a.zxy()*b - a*b.zxy()).zxy(); 113 | } 114 | 115 | // Returns a 3-bit code where bit0..bit2 is X..Z 116 | inline unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; } 117 | // Once we have a comparison, we can branch based on its results: 118 | inline bool any(bool3 v) { return mask(v) != 0; } 119 | inline bool all(bool3 v) { return mask(v) == 7; } 120 | 121 | inline float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); } 122 | inline float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); } 123 | inline float dot(float3 a, float3 b) { return sum(a*b); } 124 | 125 | 126 | #else // #if DO_FLOAT3_WITH_SIMD 127 | 128 | // ---- Simple scalar C implementation 129 | 130 | 131 | struct float3 132 | { 133 | float3() : x(0), y(0), z(0) {} 134 | float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {} 135 | 136 | float3 operator-() const { return float3(-x, -y, -z); } 137 | float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; } 138 | float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; } 139 | float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; } 140 | float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; } 141 | 142 | inline float getX() const { return x; } 143 | inline float getY() const { return y; } 144 | inline float getZ() const { return z; } 145 | inline void setX(float x_) { x = x_; } 146 | inline void setY(float y_) { y = y_; } 147 | inline void setZ(float z_) { z = z_; } 148 | inline void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); } 149 | 150 | float x, y, z; 151 | }; 152 | 153 | inline float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); } 154 | inline float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); } 155 | inline float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); } 156 | inline float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); } 157 | inline float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); } 158 | 159 | inline float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; } 160 | 161 | inline float3 cross(const float3& a, const float3& b) 162 | { 163 | return float3(a.y*b.z - a.z*b.y, -(a.x*b.z - a.z*b.x), a.x*b.y - a.y*b.x); 164 | } 165 | 166 | inline float3 min(const float3& a, const float3& b) 167 | { 168 | return float3(fmin(a.x,b.x), fmin(a.y,b.y), fmin(a.z,b.z)); 169 | } 170 | inline float3 max(const float3& a, const float3& b) 171 | { 172 | return float3(fmax(a.x,b.x), fmax(a.y,b.y), fmax(a.z,b.z)); 173 | } 174 | #endif // #else of #if DO_FLOAT3_WITH_SIMD 175 | 176 | inline float length(float3 v) { return sqrtf(dot(v, v)); } 177 | inline float sqLength(float3 v) { return dot(v, v); } 178 | inline float3 normalize(float3 v) { return v * (1.0f / length(v)); } 179 | 180 | inline float saturate(float v) { if (v < 0) return 0; if (v > 1) return 1; return v; } 181 | 182 | 183 | inline void AssertUnit(float3 v) 184 | { 185 | (void)v; 186 | assert(fabsf(sqLength(v) - 1.0f) < 0.01f); 187 | } 188 | 189 | 190 | // -------------------------------------------------------------------------- 191 | // ray: starting position (origin) and direction. 192 | // direction is assumed to be normalized 193 | 194 | struct Ray 195 | { 196 | Ray() {} 197 | Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); } 198 | 199 | float3 pointAt(float t) const { return orig + dir * t; } 200 | 201 | float3 orig; 202 | float3 dir; 203 | }; 204 | 205 | 206 | // -------------------------------------------------------------------------- 207 | // ray hit point information: position where it hit something; 208 | // normal of the surface that was hit, and "t" position along the ray 209 | 210 | struct Hit 211 | { 212 | float3 pos; 213 | float3 normal; 214 | float t; 215 | }; 216 | 217 | 218 | // -------------------------------------------------------------------------- 219 | // random number generator utilities 220 | 221 | float RandomFloat01(uint32_t& state); 222 | float3 RandomInUnitDisk(uint32_t& state); 223 | float3 RandomUnitVector(uint32_t& state); 224 | 225 | 226 | // -------------------------------------------------------------------------- 227 | // camera 228 | 229 | struct Camera 230 | { 231 | Camera() {} 232 | 233 | // vfov is top to bottom in degrees 234 | Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist) 235 | { 236 | lensRadius = aperture / 2; 237 | float theta = vfov*kPI/180; 238 | float halfHeight = tanf(theta/2); 239 | float halfWidth = aspect * halfHeight; 240 | origin = lookFrom; 241 | w = normalize(lookFrom - lookAt); 242 | u = normalize(cross(vup, w)); 243 | v = cross(w, u); 244 | lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w; 245 | horizontal = 2*halfWidth*focusDist*u; 246 | vertical = 2*halfHeight*focusDist*v; 247 | } 248 | 249 | Ray GetRay(float s, float t, uint32_t& state) const 250 | { 251 | float3 rd = lensRadius * RandomInUnitDisk(state); 252 | float3 offset = u * rd.getX() + v * rd.getY(); 253 | return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset)); 254 | } 255 | 256 | float3 origin; 257 | float3 lowerLeftCorner; 258 | float3 horizontal; 259 | float3 vertical; 260 | float3 u, v, w; 261 | float lensRadius; 262 | }; 263 | 264 | -------------------------------------------------------------------------------- /source/scene.cpp: -------------------------------------------------------------------------------- 1 | #include "scene.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // Use our own simple BVH implementation to speed up ray queries? 8 | #define USE_BVH 1 9 | 10 | // Use Intel Embree for BVH and all ray queries? 11 | #define USE_EMBREE 0 12 | #if USE_EMBREE 13 | #include "external/embree3/rtcore.h" 14 | #endif 15 | 16 | // Use NanoRT for BVH and all ray queries? 17 | #define USE_NANORT 0 18 | #if USE_NANORT 19 | #include "external/nanort.h" 20 | #endif 21 | 22 | 23 | #if USE_EMBREE || USE_NANORT 24 | #undef USE_BVH 25 | #endif 26 | 27 | // -------------------------------------------------------------------------- 28 | // Axis-aligned bounding box and related functions 29 | 30 | #if USE_BVH 31 | struct AABB 32 | { 33 | float3 bmin; 34 | float3 bmax; 35 | }; 36 | 37 | // from "A Ray-Box Intersection Algorithm and Efficient Dynamic Voxel Rendering" 38 | // http://jcgt.org/published/0007/03/04/ 39 | // note: ray direction should be inverted, i.e 1.0/direction! 40 | static bool HitAABB(const Ray& r, const AABB& box, float tMin, float tMax) 41 | { 42 | float3 t0 = (box.bmin - r.orig) * r.dir; 43 | float3 t1 = (box.bmax - r.orig) * r.dir; 44 | 45 | float3 tsmaller = min(t0, t1); 46 | float3 tbigger = max(t0, t1); 47 | 48 | tMin = std::max(tMin, hmax(tsmaller)); 49 | tMax = std::min(tMax, hmin(tbigger)); 50 | 51 | return tMin <= tMax; 52 | } 53 | 54 | static AABB AABBUnion(const AABB& a, const AABB& b) 55 | { 56 | AABB res; 57 | res.bmin = min(a.bmin, b.bmin); 58 | res.bmax = max(a.bmax, b.bmax); 59 | return res; 60 | } 61 | 62 | static AABB AABBEnclose(const AABB& a, const float3& p) 63 | { 64 | AABB res; 65 | res.bmin = min(a.bmin, p); 66 | res.bmax = max(a.bmax, p); 67 | return res; 68 | } 69 | 70 | static AABB AABBOfTriangle(const Triangle& tri) 71 | { 72 | AABB res; 73 | res.bmin = tri.v0; 74 | res.bmax = tri.v0; 75 | res = AABBEnclose(res, tri.v1); 76 | res = AABBEnclose(res, tri.v2); 77 | return res; 78 | } 79 | #endif // #if USE_BVH 80 | 81 | 82 | // -------------------------------------------------------------------------- 83 | // Checks if one triangle is hit by a ray segment. 84 | // based on "The Graphics Codex" 85 | 86 | #if !USE_EMBREE && !USE_NANORT 87 | static bool HitTriangle(const Ray& r, const Triangle& tri, float tMin, float tMax, Hit& outHit) 88 | { 89 | float3 e1 = tri.v1 - tri.v0; 90 | float3 e2 = tri.v2 - tri.v0; 91 | float3 p = cross(r.dir, e2); 92 | float a = dot(e1, p); 93 | if (fabs(a) < 1e-5f) 94 | return false; // parallel to the plane 95 | 96 | float f = 1.0f / a; 97 | float3 s = r.orig - tri.v0; 98 | float u = f * dot(s, p); 99 | 100 | if (u < 0.0f || u > 1.0f) 101 | return false; // but outside the triangle 102 | 103 | float3 q = cross(s, e1); 104 | float v = f * dot(r.dir, q); 105 | 106 | if (v < 0.0f || (u + v) > 1.0f) 107 | return false; // but outside the triangle 108 | 109 | float t = f * dot(e2, q); 110 | 111 | if (t > tMin && t < tMax) 112 | { 113 | outHit.t = t; 114 | outHit.pos = r.pointAt(t); 115 | outHit.normal = normalize(cross(e1, e2)); 116 | return true; 117 | } 118 | return false; 119 | } 120 | 121 | static bool HitTriangleShadow(const Ray& r, const Triangle& tri, float tMin, float tMax) 122 | { 123 | float3 e1 = tri.v1 - tri.v0; 124 | float3 e2 = tri.v2 - tri.v0; 125 | float3 p = cross(r.dir, e2); 126 | float a = dot(e1, p); 127 | if (fabs(a) < 1e-5f) 128 | return false; // parallel to the plane 129 | 130 | float f = 1.0f / a; 131 | float3 s = r.orig - tri.v0; 132 | float u = f * dot(s, p); 133 | 134 | if (u < 0.0f || u > 1.0f) 135 | return false; // but outside the triangle 136 | 137 | float3 q = cross(s, e1); 138 | float v = f * dot(r.dir, q); 139 | 140 | if (v < 0.0f || (u + v) > 1.0f) 141 | return false; // but outside the triangle 142 | 143 | float t = f * dot(e2, q); 144 | 145 | if (t > tMin && t < tMax) 146 | return true; 147 | return false; 148 | } 149 | #endif // #if !USE_EMBREE && !USE_NANORT 150 | 151 | 152 | // -------------------------------------------------------------------------- 153 | // bounding volume hierarchy 154 | 155 | #if USE_BVH 156 | struct BVHNode 157 | { 158 | AABB box; 159 | int data1; // node: left index; leaf: start triangle index 160 | int data2; // node: right index; leaf: triangle count 161 | bool leaf; 162 | }; 163 | #endif // #if USE_BVH 164 | 165 | // Scene information: a copy of the input triangles 166 | static int s_TriangleCount; 167 | static Triangle* s_Triangles; 168 | static int* s_TriIndices; 169 | #if USE_BVH 170 | static std::vector s_BVH; 171 | #endif 172 | 173 | #if USE_EMBREE 174 | static RTCDevice s_Device; 175 | static RTCScene s_Scene; 176 | #endif 177 | 178 | #if USE_NANORT 179 | static unsigned int* s_Indices; 180 | static nanort::BVHAccel s_BVH; 181 | static nanort::TriangleMesh* s_Mesh; 182 | #endif 183 | 184 | #if USE_BVH 185 | static uint32_t XorShift32(uint32_t& state) 186 | { 187 | uint32_t x = state; 188 | x ^= x << 13; 189 | x ^= x >> 17; 190 | x ^= x << 15; 191 | state = x; 192 | return x; 193 | } 194 | 195 | static int CreateBVH(int triStart, int triCount, uint32_t& rngState) 196 | { 197 | // sort input triangles by a randomly chosen axis 198 | int axis = XorShift32(rngState) % 3; 199 | if (axis == 0) 200 | std::sort(s_TriIndices+triStart, s_TriIndices+triStart + triCount, [](int a, int b) 201 | { 202 | assert(a >= 0 && a < s_TriangleCount); 203 | assert(b >= 0 && b < s_TriangleCount); 204 | AABB boxa = AABBOfTriangle(s_Triangles[a]); 205 | AABB boxb = AABBOfTriangle(s_Triangles[b]); 206 | return boxa.bmin.getX() < boxb.bmin.getX(); 207 | }); 208 | else if (axis == 1) 209 | std::sort(s_TriIndices+triStart, s_TriIndices+triStart + triCount, [](int a, int b) 210 | { 211 | assert(a >= 0 && a < s_TriangleCount); 212 | assert(b >= 0 && b < s_TriangleCount); 213 | AABB boxa = AABBOfTriangle(s_Triangles[a]); 214 | AABB boxb = AABBOfTriangle(s_Triangles[b]); 215 | return boxa.bmin.getY() < boxb.bmin.getY(); 216 | }); 217 | else if (axis == 2) 218 | std::sort(s_TriIndices+triStart, s_TriIndices+triStart + triCount, [](int a, int b) 219 | { 220 | assert(a >= 0 && a < s_TriangleCount); 221 | assert(b >= 0 && b < s_TriangleCount); 222 | AABB boxa = AABBOfTriangle(s_Triangles[a]); 223 | AABB boxb = AABBOfTriangle(s_Triangles[b]); 224 | return boxa.bmin.getZ() < boxb.bmin.getZ(); 225 | }); 226 | 227 | // create the node 228 | BVHNode node; 229 | int nodeIndex = (int)s_BVH.size(); 230 | s_BVH.push_back(node); 231 | 232 | // if we have less than N triangles, make this node a leaf that just has all of them 233 | if (triCount <= 4) 234 | { 235 | node.data1 = triStart; 236 | node.data2 = triCount; 237 | node.leaf = true; 238 | node.box = AABBOfTriangle(s_Triangles[s_TriIndices[triStart]]); 239 | for (int i = 1; i < triCount; ++i) 240 | { 241 | auto tribox = AABBOfTriangle(s_Triangles[s_TriIndices[triStart+i]]); 242 | node.box = AABBUnion(node.box, tribox); 243 | } 244 | } 245 | else 246 | { 247 | node.data1 = CreateBVH(triStart, triCount / 2, rngState); 248 | node.data2 = CreateBVH(triStart + triCount / 2, triCount - triCount / 2, rngState); 249 | node.leaf = false; 250 | assert(node.data1 >= 0 && node.data1 < s_BVH.size()); 251 | assert(node.data2 >= 0 && node.data2 < s_BVH.size()); 252 | node.box = AABBUnion(s_BVH[node.data1].box, s_BVH[node.data2].box); 253 | } 254 | s_BVH[nodeIndex] = node; 255 | return nodeIndex; 256 | } 257 | #endif // #if USE_BVH 258 | 259 | void InitializeScene(int triangleCount, const Triangle* triangles) 260 | { 261 | s_TriangleCount = triangleCount; 262 | s_Triangles = new Triangle[triangleCount]; 263 | memcpy(s_Triangles, triangles, triangleCount * sizeof(triangles[0])); 264 | 265 | #if USE_EMBREE 266 | s_Device = rtcNewDevice("threads=1"); 267 | s_Scene = rtcNewScene(s_Device); 268 | 269 | RTCGeometry mesh = rtcNewGeometry (s_Device, RTC_GEOMETRY_TYPE_TRIANGLE); 270 | float* dstVerts = (float*)rtcSetNewGeometryBuffer(mesh, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, 12, triangleCount*3); 271 | int* indices = (int*)rtcSetNewGeometryBuffer(mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, 12, triangleCount); 272 | for (int i = 0; i < triangleCount; ++i) 273 | { 274 | memcpy(dstVerts+i*9+0, &triangles[i].v0, 12); 275 | memcpy(dstVerts+i*9+3, &triangles[i].v1, 12); 276 | memcpy(dstVerts+i*9+6, &triangles[i].v2, 12); 277 | indices[i*3+0] = i*3+0; 278 | indices[i*3+1] = i*3+1; 279 | indices[i*3+2] = i*3+2; 280 | } 281 | rtcCommitGeometry(mesh); 282 | rtcAttachGeometry(s_Scene, mesh); 283 | rtcReleaseGeometry(mesh); 284 | 285 | rtcCommitScene(s_Scene); 286 | 287 | #elif USE_NANORT 288 | 289 | nanort::BVHBuildOptions buildOptions; 290 | buildOptions.cache_bbox = false; 291 | 292 | s_Indices = new unsigned int[triangleCount*3]; 293 | for (int i = 0; i < triangleCount*3; ++i) 294 | s_Indices[i] = i; 295 | s_Mesh = new nanort::TriangleMesh((const float*)s_Triangles, s_Indices, sizeof(float3)); 296 | nanort::TriangleSAHPred pred((const float*)s_Triangles, s_Indices, sizeof(float3)); 297 | s_BVH.Build(triangleCount, *s_Mesh, pred, buildOptions); 298 | 299 | #elif USE_BVH 300 | 301 | // build BVH 302 | s_TriIndices = new int[triangleCount]; 303 | for (int i = 0; i < triangleCount; ++i) 304 | s_TriIndices[i] = i; 305 | uint32_t rngState = 1; 306 | CreateBVH(0, triangleCount, rngState); 307 | #endif 308 | } 309 | 310 | void CleanupScene() 311 | { 312 | delete[] s_Triangles; 313 | #if USE_EMBREE 314 | rtcReleaseScene(s_Scene); 315 | rtcReleaseDevice(s_Device); 316 | #elif USE_NANORT 317 | delete s_Mesh; 318 | delete[] s_Indices; 319 | #elif USE_BVH 320 | s_BVH.clear(); 321 | delete[] s_TriIndices; 322 | #endif 323 | } 324 | 325 | #if USE_BVH 326 | static int HitBVH(int index, const Ray& r, const Ray& invR, float tMin, float tMax, Hit& outHit) 327 | { 328 | // check if ray hits us at all 329 | const BVHNode& node = s_BVH[index]; 330 | if (!HitAABB(invR, node.box, tMin, tMax)) 331 | return -1; 332 | 333 | // if leaf node, check against triangles 334 | if (node.leaf) 335 | { 336 | int hitID = -1; 337 | for (int i = 0; i < node.data2; ++i) 338 | { 339 | int triIndex = s_TriIndices[node.data1 + i]; 340 | assert(triIndex >= 0 && triIndex < s_TriangleCount); 341 | if (HitTriangle(r, s_Triangles[triIndex], tMin, tMax, outHit)) 342 | { 343 | hitID = triIndex; 344 | tMax = outHit.t; 345 | } 346 | } 347 | return hitID; 348 | } 349 | 350 | // not a leaf node, go into child nodes 351 | int leftId = HitBVH(node.data1, r, invR, tMin, tMax, outHit); 352 | if (leftId != -1) 353 | { 354 | // left was hit: only check right hit up until left hit distance 355 | int rightId = HitBVH(node.data2, r, invR, tMin, outHit.t, outHit); 356 | if (rightId != -1) 357 | return rightId; 358 | return leftId; 359 | } 360 | // left was not hit: check right 361 | int rightId = HitBVH(node.data2, r, invR, tMin, tMax, outHit); 362 | return rightId; 363 | } 364 | 365 | static bool HitShadowBVH(int index, const Ray& r, const Ray& invR, float tMin, float tMax) 366 | { 367 | // check if ray hits us at all 368 | const BVHNode& node = s_BVH[index]; 369 | if (!HitAABB(invR, node.box, tMin, tMax)) 370 | return false; 371 | 372 | // if leaf node, check against triangles 373 | if (node.leaf) 374 | { 375 | for (int i = 0; i < node.data2; ++i) 376 | { 377 | int triIndex = s_TriIndices[node.data1 + i]; 378 | assert(triIndex >= 0 && triIndex < s_TriangleCount); 379 | if (HitTriangleShadow(r, s_Triangles[triIndex], tMin, tMax)) 380 | return true; 381 | } 382 | return false; 383 | } 384 | 385 | if (HitShadowBVH(node.data1, r, invR, tMin, tMax)) 386 | return true; 387 | if (HitShadowBVH(node.data2, r, invR, tMin, tMax)) 388 | return true; 389 | return false; 390 | } 391 | #endif // #if USE_BVH 392 | 393 | 394 | // Check all the triangles in the scene for a hit, and return the closest one. 395 | int HitScene(const Ray& r, float tMin, float tMax, Hit& outHit) 396 | { 397 | #if USE_EMBREE 398 | RTCIntersectContext ctx; 399 | rtcInitIntersectContext(&ctx); 400 | 401 | RTCRayHit rh; 402 | r.orig.store(&rh.ray.org_x); 403 | rh.ray.tnear = tMin; 404 | r.dir.store(&rh.ray.dir_x); 405 | rh.ray.time = 0; 406 | rh.ray.tfar = tMax; 407 | rh.ray.mask = 0; 408 | rh.ray.id = 0; 409 | rh.ray.flags = 0; 410 | rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; 411 | rh.hit.primID = RTC_INVALID_GEOMETRY_ID; 412 | 413 | rtcIntersect1(s_Scene, &ctx, &rh); 414 | if (rh.hit.geomID == RTC_INVALID_GEOMETRY_ID) 415 | return -1; 416 | outHit.t = rh.ray.tfar; 417 | outHit.pos = r.pointAt(outHit.t); 418 | outHit.normal = normalize(float3(rh.hit.Ng_x, rh.hit.Ng_y, rh.hit.Ng_z)); 419 | return rh.hit.primID; 420 | 421 | #elif USE_NANORT 422 | nanort::Ray ray; 423 | ray.min_t = tMin; 424 | ray.max_t = tMax; 425 | r.orig.store(ray.org); 426 | r.dir.store(ray.dir); 427 | 428 | nanort::TriangleIntersector<> intersector((const float*)s_Triangles, s_Indices, sizeof(float3)); 429 | nanort::TriangleIntersection<> isect; 430 | bool hit = s_BVH.Traverse(ray, intersector, &isect); 431 | if (!hit) 432 | return -1; 433 | 434 | outHit.t = isect.t; 435 | outHit.pos = r.pointAt(isect.t); 436 | const Triangle& tri = s_Triangles[isect.prim_id]; 437 | 438 | float3 e1 = tri.v1 - tri.v0; 439 | float3 e2 = tri.v2 - tri.v0; 440 | float3 n = normalize(cross(e1,e2)); 441 | outHit.normal = n; 442 | return isect.prim_id; 443 | 444 | #elif USE_BVH 445 | 446 | if (s_BVH.empty()) 447 | return -1; 448 | 449 | Ray invR = r; 450 | invR.dir = float3(1.0f) / r.dir; 451 | return HitBVH(0, r, invR, tMin, tMax, outHit); 452 | 453 | #else 454 | 455 | float hitMinT = tMax; 456 | int hitID = -1; 457 | for (int i = 0; i < s_TriangleCount; ++i) 458 | { 459 | Hit hit; 460 | if (HitTriangle(r, s_Triangles[i], tMin, tMax, hit)) 461 | { 462 | if (hit.t < hitMinT) 463 | { 464 | hitMinT = hit.t; 465 | hitID = i; 466 | outHit = hit; 467 | } 468 | } 469 | } 470 | 471 | return hitID; 472 | #endif 473 | } 474 | 475 | bool HitSceneShadow(const Ray& r, float tMin, float tMax) 476 | { 477 | #if USE_EMBREE 478 | RTCIntersectContext ctx; 479 | rtcInitIntersectContext(&ctx); 480 | 481 | RTCRay rh; 482 | r.orig.store(&rh.org_x); 483 | rh.tnear = tMin; 484 | r.dir.store(&rh.dir_x); 485 | rh.time = 0; 486 | rh.tfar = tMax; 487 | rh.mask = 0; 488 | rh.id = 0; 489 | rh.flags = 0; 490 | 491 | rtcOccluded1(s_Scene, &ctx, &rh); 492 | return rh.tfar < 0; 493 | 494 | #elif USE_NANORT 495 | nanort::Ray ray; 496 | ray.min_t = tMin; 497 | ray.max_t = tMax; 498 | r.orig.store(ray.org); 499 | r.dir.store(ray.dir); 500 | 501 | nanort::TriangleIntersector<> intersector((const float*)s_Triangles, s_Indices, sizeof(float3)); 502 | nanort::TriangleIntersection<> isect; 503 | return s_BVH.Traverse(ray, intersector, &isect); 504 | 505 | #elif USE_BVH 506 | if (s_BVH.empty()) 507 | return false; 508 | 509 | Ray invR = r; 510 | invR.dir = float3(1.0f) / r.dir; 511 | return HitShadowBVH(0, r, invR, tMin, tMax); 512 | 513 | #else 514 | for (int i = 0; i < s_TriangleCount; ++i) 515 | { 516 | if (HitTriangleShadow(r, s_Triangles[i], tMin, tMax)) 517 | return true; 518 | } 519 | return false; 520 | 521 | #endif 522 | } 523 | -------------------------------------------------------------------------------- /source/scene.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Scene: this represents all the scene geometry that the ray tracer works on. 4 | 5 | #include "maths.h" 6 | 7 | 8 | // One triangle: just three vertex positions. 9 | struct Triangle 10 | { 11 | float3 v0, v1, v2; 12 | }; 13 | 14 | 15 | // Our scene structure is very simple: just a bunch of triangles and nothing else 16 | // (no "objects", "instances" or "materials"). 17 | void InitializeScene(int triangleCount, const Triangle* triangles); 18 | 19 | // Cleanup any data or memory that the scene might have allocated. 20 | void CleanupScene(); 21 | 22 | // Checks if the ray segment hits a scene. If any triangle is hit by the ray, this 23 | // function should return information about the closest one. 24 | // 25 | // - r: the ray itself, 26 | // - tMin and tMax: segment of the ray that is checked, 27 | // - outHit: hit information, if any, 28 | // 29 | // Function returns the triangle index, or -1 if nothing is hit by the ray. 30 | int HitScene(const Ray& r, float tMin, float tMax, Hit& outHit); 31 | 32 | // Similar to HitScene, but only returns if the ray hit anything 33 | bool HitSceneShadow(const Ray& r, float tMin, float tMax); 34 | --------------------------------------------------------------------------------